extractSingleFractionParts($text); if ($parts === null) { return $option; } [$num, $den] = $parts; $compactPart = '/^[\-+0-9a-zA-Z\x{221A}\\\\{}]+$/u'; if ( preg_match($compactPart, $num) !== 1 || preg_match($compactPart, $den) !== 1 || preg_match('/[=<>]/u', $num.$den) === 1 || $this->hasBinaryOperator($num) || $this->hasBinaryOperator($den) ) { return $option; } return str_replace($text, $num.'/'.$den, $trimmed); } /** * @param array $options * @return array{class:string,layout:string,max_length:int,max_width_units:float,avg_width_units:float,width_std_dev:float,has_complex_formula:bool,opt_count:int} */ public function decide(array $options, string $context = 'exam'): array { $optCount = count($options); $maxOptionLength = 0; $maxWidthUnits = 0.0; $widthUnitsList = []; $sumWidthUnits = 0.0; $compactMathCount = 0; $plainCompactCount = 0; $optionMetas = []; $hasComplexFormulaOption = false; foreach ($options as $option) { $optionMeta = $this->analyzeOption((string) $option); $maxOptionLength = max($maxOptionLength, $optionMeta['effective_length']); $maxWidthUnits = max($maxWidthUnits, $optionMeta['width_units']); $widthUnitsList[] = $optionMeta['width_units']; $sumWidthUnits += $optionMeta['width_units']; if ($optionMeta['is_compact_math']) { $compactMathCount++; } if ($optionMeta['is_plain_compact']) { $plainCompactCount++; } $optionMetas[] = $optionMeta; $hasComplexFormulaOption = $hasComplexFormulaOption || $optionMeta['is_complex_formula']; } [$grid4Threshold, $grid2Threshold, $grid4WidthCap, $grid2WidthCap, $grid1WidthCap] = $this->thresholdsFor($context); $avgWidthUnits = $optCount > 0 ? ($sumWidthUnits / $optCount) : 0.0; $widthVariance = $this->variance($widthUnitsList, $avgWidthUnits); $widthStdDev = sqrt($widthVariance); $maxAvgRatio = $avgWidthUnits > 0 ? ($maxWidthUnits / $avgWidthUnits) : 0.0; // 4列仅在“整体紧凑 + 选项宽度分布均匀”时启用,避免单个长选项把 D 挤换行 $allowGrid4ByWidth = $maxWidthUnits <= ($grid4WidthCap * 0.92) && $maxAvgRatio <= 1.42 && $widthStdDev <= 1.65; $allCompactMath = $optCount > 0 && $compactMathCount === $optCount; $allowGrid4ByCompactMath = $optCount === 4 && $allCompactMath && $maxWidthUnits <= ($grid4WidthCap * 1.04) && $widthStdDev <= 2.1; $allowGrid4ByPlainCompact = $optCount === 4 && $plainCompactCount === $optCount && $maxOptionLength <= 9 && $maxWidthUnits <= ($grid4WidthCap * 1.20) && $widthStdDev <= 2.4; $forceGrid4ByPlainShort = $optCount === 4 && $plainCompactCount === $optCount && $maxOptionLength <= 8 && ! $hasComplexFormulaOption; $canTryGrid4 = $optCount <= 4 && ! $hasComplexFormulaOption && ( ( $maxOptionLength <= $grid4Threshold && $allowGrid4ByWidth ) || $allowGrid4ByCompactMath || $allowGrid4ByPlainCompact || $forceGrid4ByPlainShort ); $canTryGrid2 = $maxOptionLength <= $grid2Threshold && $maxWidthUnits <= ($grid2WidthCap * 1.18); $layoutCandidates = []; if ($canTryGrid4) { $layoutCandidates['options-grid-4'] = $this->layoutScore($optionMetas, 4, $grid4WidthCap); } if ($canTryGrid2) { $layoutCandidates['options-grid-2'] = $this->layoutScore($optionMetas, 2, $grid2WidthCap); } $layoutCandidates['options-grid-1'] = $this->layoutScore($optionMetas, 1, $grid1WidthCap); // 如果2列可行,尽量避免退化到1列(除非1列显著更优) if (isset($layoutCandidates['options-grid-2'], $layoutCandidates['options-grid-1'])) { $s2 = $layoutCandidates['options-grid-2']['score']; $s1 = $layoutCandidates['options-grid-1']['score']; if ($s2 <= ($s1 + 1.6)) { $layoutCandidates['options-grid-2']['score'] -= 0.35; } } $selectedClass = 'options-grid-1'; $selectedScore = PHP_FLOAT_MAX; foreach ($layoutCandidates as $class => $meta) { if ($meta['score'] < $selectedScore) { $selectedClass = $class; $selectedScore = $meta['score']; } } if ($selectedClass === 'options-grid-4') { return [ 'class' => 'options-grid-4', 'layout' => '4列布局', 'max_length' => $maxOptionLength, 'max_width_units' => $maxWidthUnits, 'avg_width_units' => $avgWidthUnits, 'width_std_dev' => $widthStdDev, 'has_complex_formula' => $hasComplexFormulaOption, 'opt_count' => $optCount, ]; } if ($selectedClass === 'options-grid-2') { return [ 'class' => 'options-grid-2', 'layout' => '2列布局', 'max_length' => $maxOptionLength, 'max_width_units' => $maxWidthUnits, 'avg_width_units' => $avgWidthUnits, 'width_std_dev' => $widthStdDev, 'has_complex_formula' => $hasComplexFormulaOption, 'opt_count' => $optCount, ]; } return [ 'class' => 'options-grid-1', 'layout' => '1列布局', 'max_length' => $maxOptionLength, 'max_width_units' => $maxWidthUnits, 'avg_width_units' => $avgWidthUnits, 'width_std_dev' => $widthStdDev, 'has_complex_formula' => $hasComplexFormulaOption, 'opt_count' => $optCount, ]; } /** * @return array{effective_length:int,raw_length:int,width_units:float,height_units:float,is_complex_formula:bool,is_compact_math:bool,is_plain_compact:bool,has_stacked_fraction:bool} */ private function analyzeOption(string $option): array { $optionText = html_entity_decode(strip_tags($option), ENT_QUOTES | ENT_HTML5, 'UTF-8'); $optionText = preg_replace('/\s+/u', '', $optionText) ?? ''; $optionTextNoDollar = preg_replace('/^\$(.*)\$$/u', '$1', $optionText) ?? $optionText; $rawLength = mb_strlen($optionText, 'UTF-8'); $optionLength = $rawLength; $isSimpleCompactMath = preg_match('/^-?[0-9a-zA-Z\x{221A}]+(?:\/[0-9a-zA-Z\x{221A}]+)?$/u', $optionTextNoDollar) === 1; $isCompactLatexFraction = preg_match( '/^\\\\d?frac\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}$/u', $optionTextNoDollar ) === 1; $isCompactLatexDegree = preg_match( '/^-?[0-9]+(?:\.[0-9]+)?(?:\^\{?\\\\circ\}?|°)$/u', $optionTextNoDollar ) === 1; $isCompactMath = $isSimpleCompactMath || $isCompactLatexFraction || $isCompactLatexDegree; $isSimpleSymbolLatex = preg_match('/^\\\\(pm|mp)\s*[0-9]+$/u', $optionTextNoDollar) === 1; $isPlainCompact = ( preg_match('/\\\\[a-zA-Z]+/u', $optionTextNoDollar) !== 1 || $isSimpleSymbolLatex ) && mb_strlen($optionTextNoDollar, 'UTF-8') <= 10; $hasLatexCmd = preg_match('/\\\\(frac|dfrac|sqrt|log|sin|cos|tan|cdot|times|left|right|begin|end)/u', $optionText) === 1; $hasStackedFraction = preg_match('/\\\\d?frac\{[^{}]+\}\{[^{}]+\}/u', $optionTextNoDollar) === 1; // 对可安全转为行内分式的短表达,不计入“堆叠分式”高度惩罚 if ($isCompactLatexFraction) { $hasStackedFraction = false; } $sqrtCount = preg_match_all('/\\\\sqrt|√/u', $optionText); $supCount = preg_match_all('/\^/u', $optionText); $operatorCount = preg_match_all('/[=<>+\-*\/\^_]/u', $optionText); $hasBrackets = preg_match('/[()\(\)\[\]\{\}]/u', $optionText) === 1; $isComplexFormula = ! $isCompactMath && ($hasLatexCmd || $operatorCount >= 2 || ($hasBrackets && $optionLength >= 8)); if ($isComplexFormula) { $optionLength += 6; } return [ 'effective_length' => $optionLength, 'raw_length' => $rawLength, 'width_units' => $this->estimateWidthUnits($optionText), 'height_units' => 1.0 + ($hasStackedFraction ? 0.72 : 0.0) + min(0.36, ((int) $sqrtCount) * 0.08) + min(0.28, ((int) $supCount) * 0.07), 'is_complex_formula' => $isComplexFormula, 'is_compact_math' => $isCompactMath, 'is_plain_compact' => $isPlainCompact, 'has_stacked_fraction' => $hasStackedFraction, ]; } /** * 判卷页面中题干+答案区域更紧凑,阈值应更保守。 * * @return array{0:int,1:int,2:float,3:float,4:float} */ private function thresholdsFor(string $context): array { if ($context === 'grading') { return [10, 24, 10.2, 21.0, 44.0]; } return [12, 28, 11.5, 23.5, 48.0]; } private function estimateWidthUnits(string $text): float { if ($text === '') { return 0.0; } // 简单TeX归一:减少命令名对宽度估计的干扰 $normalized = preg_replace('/\\\\(left|right|displaystyle)/u', '', $text) ?? $text; $normalized = preg_replace('/\\\\(frac|dfrac)\{([^{}]+)\}\{([^{}]+)\}/u', '($2/$3)', $normalized) ?? $normalized; $normalized = preg_replace('/\\\\sqrt\{([^{}]+)\}/u', '√($1)', $normalized) ?? $normalized; $chars = preg_split('//u', $normalized, -1, PREG_SPLIT_NO_EMPTY) ?: []; $units = 0.0; foreach ($chars as $ch) { if (preg_match('/[\x{4e00}-\x{9fff}]/u', $ch)) { $units += 1.0; } elseif (preg_match('/[A-Za-z]/u', $ch)) { $units += 0.62; } elseif (preg_match('/[0-9]/u', $ch)) { $units += 0.58; } elseif (preg_match('/[=<>+\-*\/\^_]/u', $ch)) { $units += 0.45; } elseif (preg_match('/[()\(\)\[\]\{\}]/u', $ch)) { $units += 0.35; } elseif ($ch === '√') { $units += 0.55; } else { $units += 0.5; } } // 选项标签(A.)和左侧间距补偿 return $units + 2.2; } /** * @param array $optionMetas * @return array{score:float} */ private function layoutScore(array $optionMetas, int $cols, float $colCap): array { $count = count($optionMetas); if ($count === 0) { return ['score' => 0.0]; } $overflowCount = 0; $lineWrapPenalty = 0.0; $rowHeights = []; $stackedFractionCount = 0; foreach ($optionMetas as $idx => $meta) { $effectiveWidth = $meta['width_units'] + ($meta['has_stacked_fraction'] ? ($cols === 4 ? 1.6 : 0.6) : 0.0); if ($effectiveWidth > $colCap) { $overflowCount++; } $lines = max(1.0, ceil($effectiveWidth / max(1.0, $colCap))); $lineWrapPenalty += max(0.0, $lines - 1.0); $itemHeight = $meta['height_units'] * $lines; $rowIndex = intdiv($idx, $cols); if (! isset($rowHeights[$rowIndex])) { $rowHeights[$rowIndex] = 0.0; } $rowHeights[$rowIndex] = max($rowHeights[$rowIndex], $itemHeight); if ($meta['has_stacked_fraction']) { $stackedFractionCount++; } } $overflowRate = $overflowCount / $count; $lineWrapRate = $lineWrapPenalty / $count; $rowAvg = array_sum($rowHeights) / max(1, count($rowHeights)); $rowStd = sqrt($this->variance(array_values($rowHeights), $rowAvg)); $rowCv = $rowAvg > 0 ? ($rowStd / $rowAvg) : 0.0; $stackedRate = $stackedFractionCount / $count; $whitespacePenalty = match ($cols) { 1 => 2.4, 2 => 0.7, default => 0.0, }; $score = ($overflowRate * 120.0) + ($lineWrapRate * 10.0) + ($rowCv * 5.0) + ($stackedRate * ($cols === 4 ? 2.6 : 0.8)) + $whitespacePenalty; return ['score' => $score]; } /** * @param array $values */ private function variance(array $values, float $avg): float { if (empty($values)) { return 0.0; } $sum = 0.0; foreach ($values as $v) { $d = $v - $avg; $sum += ($d * $d); } return $sum / count($values); } /** * @return array{0:string,1:string}|null */ private function extractSingleFractionParts(string $text): ?array { if (! preg_match('/^\\\\d?frac/u', $text)) { return null; } $offset = preg_match('/^\\\\dfrac/u', $text) ? 6 : 5; // \dfrac or \frac $len = mb_strlen($text, 'UTF-8'); if ($offset >= $len || mb_substr($text, $offset, 1, 'UTF-8') !== '{') { return null; } [$num, $next] = $this->readBalancedBraces($text, $offset); if ($num === null || $next >= $len || mb_substr($text, $next, 1, 'UTF-8') !== '{') { return null; } [$den, $end] = $this->readBalancedBraces($text, $next); if ($den === null) { return null; } // 必须刚好到结尾,避免把 "\frac{a}{b}\text{cm}" 这类误改坏 if ($end !== $len) { return null; } return [$num, $den]; } private function hasBinaryOperator(string $expr): bool { return preg_match('/(?= $len || mb_substr($text, $startOffset, 1, 'UTF-8') !== '{') { return [null, $startOffset]; } $depth = 0; $buffer = ''; for ($i = $startOffset; $i < $len; $i++) { $ch = mb_substr($text, $i, 1, 'UTF-8'); if ($ch === '{') { $depth++; if ($depth > 1) { $buffer .= $ch; } continue; } if ($ch === '}') { $depth--; if ($depth === 0) { return [$buffer, $i + 1]; } if ($depth < 0) { return [null, $i + 1]; } $buffer .= $ch; continue; } $buffer .= $ch; } return [null, $len]; } }