| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428 |
- <?php
- namespace App\Support;
- class OptionLayoutDecider
- {
- public function normalizeCompactMathForDisplay(string $option): string
- {
- $trimmed = trim($option);
- if ($trimmed === '') {
- return $option;
- }
- $text = preg_replace('/^\$(.*)\$$/u', '$1', $trimmed) ?? $trimmed;
- $parts = $this->extractSingleFractionParts($text);
- if ($parts === null) {
- return $option;
- }
- [$num, $den] = $parts;
- $compactPart = '/^[\-+0-9a-zA-Z\x{221A}\\\\{}]+$/u';
- if (
- preg_match($compactPart, $num) !== 1
- || preg_match($compactPart, $den) !== 1
- || preg_match('/[=<>]/u', $num.$den) === 1
- || $this->hasBinaryOperator($num)
- || $this->hasBinaryOperator($den)
- ) {
- return $option;
- }
- return str_replace($text, $num.'/'.$den, $trimmed);
- }
- /**
- * @param array<int|string, mixed> $options
- * @return array{class:string,layout:string,max_length:int,max_width_units:float,avg_width_units:float,width_std_dev:float,has_complex_formula:bool,opt_count:int}
- */
- public function decide(array $options, string $context = 'exam'): array
- {
- $optCount = count($options);
- $maxOptionLength = 0;
- $maxWidthUnits = 0.0;
- $widthUnitsList = [];
- $sumWidthUnits = 0.0;
- $compactMathCount = 0;
- $plainCompactCount = 0;
- $optionMetas = [];
- $hasComplexFormulaOption = false;
- foreach ($options as $option) {
- $optionMeta = $this->analyzeOption((string) $option);
- $maxOptionLength = max($maxOptionLength, $optionMeta['effective_length']);
- $maxWidthUnits = max($maxWidthUnits, $optionMeta['width_units']);
- $widthUnitsList[] = $optionMeta['width_units'];
- $sumWidthUnits += $optionMeta['width_units'];
- if ($optionMeta['is_compact_math']) {
- $compactMathCount++;
- }
- if ($optionMeta['is_plain_compact']) {
- $plainCompactCount++;
- }
- $optionMetas[] = $optionMeta;
- $hasComplexFormulaOption = $hasComplexFormulaOption || $optionMeta['is_complex_formula'];
- }
- [$grid4Threshold, $grid2Threshold, $grid4WidthCap, $grid2WidthCap, $grid1WidthCap] = $this->thresholdsFor($context);
- $avgWidthUnits = $optCount > 0 ? ($sumWidthUnits / $optCount) : 0.0;
- $widthVariance = $this->variance($widthUnitsList, $avgWidthUnits);
- $widthStdDev = sqrt($widthVariance);
- $maxAvgRatio = $avgWidthUnits > 0 ? ($maxWidthUnits / $avgWidthUnits) : 0.0;
- // 4列仅在“整体紧凑 + 选项宽度分布均匀”时启用,避免单个长选项把 D 挤换行
- $allowGrid4ByWidth = $maxWidthUnits <= ($grid4WidthCap * 0.92)
- && $maxAvgRatio <= 1.42
- && $widthStdDev <= 1.65;
- $allCompactMath = $optCount > 0 && $compactMathCount === $optCount;
- $allowGrid4ByCompactMath = $optCount === 4
- && $allCompactMath
- && $maxWidthUnits <= ($grid4WidthCap * 1.04)
- && $widthStdDev <= 2.1;
- $allowGrid4ByPlainCompact = $optCount === 4
- && $plainCompactCount === $optCount
- && $maxOptionLength <= 9
- && $maxWidthUnits <= ($grid4WidthCap * 1.20)
- && $widthStdDev <= 2.4;
- $forceGrid4ByPlainShort = $optCount === 4
- && $plainCompactCount === $optCount
- && $maxOptionLength <= 8
- && ! $hasComplexFormulaOption;
- $canTryGrid4 = $optCount <= 4
- && ! $hasComplexFormulaOption
- && (
- (
- $maxOptionLength <= $grid4Threshold
- && $allowGrid4ByWidth
- )
- || $allowGrid4ByCompactMath
- || $allowGrid4ByPlainCompact
- || $forceGrid4ByPlainShort
- );
- $canTryGrid2 = $maxOptionLength <= $grid2Threshold && $maxWidthUnits <= ($grid2WidthCap * 1.18);
- $layoutCandidates = [];
- if ($canTryGrid4) {
- $layoutCandidates['options-grid-4'] = $this->layoutScore($optionMetas, 4, $grid4WidthCap);
- }
- if ($canTryGrid2) {
- $layoutCandidates['options-grid-2'] = $this->layoutScore($optionMetas, 2, $grid2WidthCap);
- }
- $layoutCandidates['options-grid-1'] = $this->layoutScore($optionMetas, 1, $grid1WidthCap);
- // 如果2列可行,尽量避免退化到1列(除非1列显著更优)
- if (isset($layoutCandidates['options-grid-2'], $layoutCandidates['options-grid-1'])) {
- $s2 = $layoutCandidates['options-grid-2']['score'];
- $s1 = $layoutCandidates['options-grid-1']['score'];
- if ($s2 <= ($s1 + 1.6)) {
- $layoutCandidates['options-grid-2']['score'] -= 0.35;
- }
- }
- $selectedClass = 'options-grid-1';
- $selectedScore = PHP_FLOAT_MAX;
- foreach ($layoutCandidates as $class => $meta) {
- if ($meta['score'] < $selectedScore) {
- $selectedClass = $class;
- $selectedScore = $meta['score'];
- }
- }
- if ($selectedClass === 'options-grid-4') {
- return [
- 'class' => 'options-grid-4',
- 'layout' => '4列布局',
- 'max_length' => $maxOptionLength,
- 'max_width_units' => $maxWidthUnits,
- 'avg_width_units' => $avgWidthUnits,
- 'width_std_dev' => $widthStdDev,
- 'has_complex_formula' => $hasComplexFormulaOption,
- 'opt_count' => $optCount,
- ];
- }
- if ($selectedClass === 'options-grid-2') {
- return [
- 'class' => 'options-grid-2',
- 'layout' => '2列布局',
- 'max_length' => $maxOptionLength,
- 'max_width_units' => $maxWidthUnits,
- 'avg_width_units' => $avgWidthUnits,
- 'width_std_dev' => $widthStdDev,
- 'has_complex_formula' => $hasComplexFormulaOption,
- 'opt_count' => $optCount,
- ];
- }
- return [
- 'class' => 'options-grid-1',
- 'layout' => '1列布局',
- 'max_length' => $maxOptionLength,
- 'max_width_units' => $maxWidthUnits,
- 'avg_width_units' => $avgWidthUnits,
- 'width_std_dev' => $widthStdDev,
- 'has_complex_formula' => $hasComplexFormulaOption,
- 'opt_count' => $optCount,
- ];
- }
- /**
- * @return array{effective_length:int,raw_length:int,width_units:float,height_units:float,is_complex_formula:bool,is_compact_math:bool,is_plain_compact:bool,has_stacked_fraction:bool}
- */
- private function analyzeOption(string $option): array
- {
- $optionText = html_entity_decode(strip_tags($option), ENT_QUOTES | ENT_HTML5, 'UTF-8');
- $optionText = preg_replace('/\s+/u', '', $optionText) ?? '';
- $optionTextNoDollar = preg_replace('/^\$(.*)\$$/u', '$1', $optionText) ?? $optionText;
- $rawLength = mb_strlen($optionText, 'UTF-8');
- $optionLength = $rawLength;
- $isSimpleCompactMath = preg_match('/^-?[0-9a-zA-Z\x{221A}]+(?:\/[0-9a-zA-Z\x{221A}]+)?$/u', $optionTextNoDollar) === 1;
- $isCompactLatexFraction = preg_match(
- '/^\\\\d?frac\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}$/u',
- $optionTextNoDollar
- ) === 1;
- $isCompactLatexDegree = preg_match(
- '/^-?[0-9]+(?:\.[0-9]+)?(?:\^\{?\\\\circ\}?|°)$/u',
- $optionTextNoDollar
- ) === 1;
- $isCompactMath = $isSimpleCompactMath || $isCompactLatexFraction || $isCompactLatexDegree;
- $isSimpleSymbolLatex = preg_match('/^\\\\(pm|mp)\s*[0-9]+$/u', $optionTextNoDollar) === 1;
- $isPlainCompact = (
- preg_match('/\\\\[a-zA-Z]+/u', $optionTextNoDollar) !== 1
- || $isSimpleSymbolLatex
- ) && mb_strlen($optionTextNoDollar, 'UTF-8') <= 10;
- $hasLatexCmd = preg_match('/\\\\(frac|dfrac|sqrt|log|sin|cos|tan|cdot|times|left|right|begin|end)/u', $optionText) === 1;
- $hasStackedFraction = preg_match('/\\\\d?frac\{[^{}]+\}\{[^{}]+\}/u', $optionTextNoDollar) === 1;
- // 对可安全转为行内分式的短表达,不计入“堆叠分式”高度惩罚
- if ($isCompactLatexFraction) {
- $hasStackedFraction = false;
- }
- $sqrtCount = preg_match_all('/\\\\sqrt|√/u', $optionText);
- $supCount = preg_match_all('/\^/u', $optionText);
- $operatorCount = preg_match_all('/[=<>+\-*\/\^_]/u', $optionText);
- $hasBrackets = preg_match('/[()\(\)\[\]\{\}]/u', $optionText) === 1;
- $isComplexFormula = ! $isCompactMath
- && ($hasLatexCmd || $operatorCount >= 2 || ($hasBrackets && $optionLength >= 8));
- if ($isComplexFormula) {
- $optionLength += 6;
- }
- return [
- 'effective_length' => $optionLength,
- 'raw_length' => $rawLength,
- 'width_units' => $this->estimateWidthUnits($optionText),
- 'height_units' => 1.0
- + ($hasStackedFraction ? 0.72 : 0.0)
- + min(0.36, ((int) $sqrtCount) * 0.08)
- + min(0.28, ((int) $supCount) * 0.07),
- 'is_complex_formula' => $isComplexFormula,
- 'is_compact_math' => $isCompactMath,
- 'is_plain_compact' => $isPlainCompact,
- 'has_stacked_fraction' => $hasStackedFraction,
- ];
- }
- /**
- * 判卷页面中题干+答案区域更紧凑,阈值应更保守。
- *
- * @return array{0:int,1:int,2:float,3:float,4:float}
- */
- private function thresholdsFor(string $context): array
- {
- if ($context === 'grading') {
- return [10, 24, 10.2, 21.0, 44.0];
- }
- return [12, 28, 11.5, 23.5, 48.0];
- }
- private function estimateWidthUnits(string $text): float
- {
- if ($text === '') {
- return 0.0;
- }
- // 简单TeX归一:减少命令名对宽度估计的干扰
- $normalized = preg_replace('/\\\\(left|right|displaystyle)/u', '', $text) ?? $text;
- $normalized = preg_replace('/\\\\(frac|dfrac)\{([^{}]+)\}\{([^{}]+)\}/u', '($2/$3)', $normalized) ?? $normalized;
- $normalized = preg_replace('/\\\\sqrt\{([^{}]+)\}/u', '√($1)', $normalized) ?? $normalized;
- $chars = preg_split('//u', $normalized, -1, PREG_SPLIT_NO_EMPTY) ?: [];
- $units = 0.0;
- foreach ($chars as $ch) {
- if (preg_match('/[\x{4e00}-\x{9fff}]/u', $ch)) {
- $units += 1.0;
- } elseif (preg_match('/[A-Za-z]/u', $ch)) {
- $units += 0.62;
- } elseif (preg_match('/[0-9]/u', $ch)) {
- $units += 0.58;
- } elseif (preg_match('/[=<>+\-*\/\^_]/u', $ch)) {
- $units += 0.45;
- } elseif (preg_match('/[()\(\)\[\]\{\}]/u', $ch)) {
- $units += 0.35;
- } elseif ($ch === '√') {
- $units += 0.55;
- } else {
- $units += 0.5;
- }
- }
- // 选项标签(A.)和左侧间距补偿
- return $units + 2.2;
- }
- /**
- * @param array<int,array{width_units:float,height_units:float,has_stacked_fraction:bool}> $optionMetas
- * @return array{score:float}
- */
- private function layoutScore(array $optionMetas, int $cols, float $colCap): array
- {
- $count = count($optionMetas);
- if ($count === 0) {
- return ['score' => 0.0];
- }
- $overflowCount = 0;
- $lineWrapPenalty = 0.0;
- $rowHeights = [];
- $stackedFractionCount = 0;
- foreach ($optionMetas as $idx => $meta) {
- $effectiveWidth = $meta['width_units'] + ($meta['has_stacked_fraction'] ? ($cols === 4 ? 1.6 : 0.6) : 0.0);
- if ($effectiveWidth > $colCap) {
- $overflowCount++;
- }
- $lines = max(1.0, ceil($effectiveWidth / max(1.0, $colCap)));
- $lineWrapPenalty += max(0.0, $lines - 1.0);
- $itemHeight = $meta['height_units'] * $lines;
- $rowIndex = intdiv($idx, $cols);
- if (! isset($rowHeights[$rowIndex])) {
- $rowHeights[$rowIndex] = 0.0;
- }
- $rowHeights[$rowIndex] = max($rowHeights[$rowIndex], $itemHeight);
- if ($meta['has_stacked_fraction']) {
- $stackedFractionCount++;
- }
- }
- $overflowRate = $overflowCount / $count;
- $lineWrapRate = $lineWrapPenalty / $count;
- $rowAvg = array_sum($rowHeights) / max(1, count($rowHeights));
- $rowStd = sqrt($this->variance(array_values($rowHeights), $rowAvg));
- $rowCv = $rowAvg > 0 ? ($rowStd / $rowAvg) : 0.0;
- $stackedRate = $stackedFractionCount / $count;
- $whitespacePenalty = match ($cols) {
- 1 => 2.4,
- 2 => 0.7,
- default => 0.0,
- };
- $score = ($overflowRate * 120.0)
- + ($lineWrapRate * 10.0)
- + ($rowCv * 5.0)
- + ($stackedRate * ($cols === 4 ? 2.6 : 0.8))
- + $whitespacePenalty;
- return ['score' => $score];
- }
- /**
- * @param array<int,float> $values
- */
- private function variance(array $values, float $avg): float
- {
- if (empty($values)) {
- return 0.0;
- }
- $sum = 0.0;
- foreach ($values as $v) {
- $d = $v - $avg;
- $sum += ($d * $d);
- }
- return $sum / count($values);
- }
- /**
- * @return array{0:string,1:string}|null
- */
- private function extractSingleFractionParts(string $text): ?array
- {
- if (! preg_match('/^\\\\d?frac/u', $text)) {
- return null;
- }
- $offset = preg_match('/^\\\\dfrac/u', $text) ? 6 : 5; // \dfrac or \frac
- $len = mb_strlen($text, 'UTF-8');
- if ($offset >= $len || mb_substr($text, $offset, 1, 'UTF-8') !== '{') {
- return null;
- }
- [$num, $next] = $this->readBalancedBraces($text, $offset);
- if ($num === null || $next >= $len || mb_substr($text, $next, 1, 'UTF-8') !== '{') {
- return null;
- }
- [$den, $end] = $this->readBalancedBraces($text, $next);
- if ($den === null) {
- return null;
- }
- // 必须刚好到结尾,避免把 "\frac{a}{b}\text{cm}" 这类误改坏
- if ($end !== $len) {
- return null;
- }
- return [$num, $den];
- }
- private function hasBinaryOperator(string $expr): bool
- {
- return preg_match('/(?<!^)[+\-*]/u', $expr) === 1;
- }
- /**
- * @return array{0:string|null,1:int}
- */
- private function readBalancedBraces(string $text, int $startOffset): array
- {
- $len = mb_strlen($text, 'UTF-8');
- if ($startOffset >= $len || mb_substr($text, $startOffset, 1, 'UTF-8') !== '{') {
- return [null, $startOffset];
- }
- $depth = 0;
- $buffer = '';
- for ($i = $startOffset; $i < $len; $i++) {
- $ch = mb_substr($text, $i, 1, 'UTF-8');
- if ($ch === '{') {
- $depth++;
- if ($depth > 1) {
- $buffer .= $ch;
- }
- continue;
- }
- if ($ch === '}') {
- $depth--;
- if ($depth === 0) {
- return [$buffer, $i + 1];
- }
- if ($depth < 0) {
- return [null, $i + 1];
- }
- $buffer .= $ch;
- continue;
- }
- $buffer .= $ch;
- }
- return [null, $len];
- }
- }
|