OptionLayoutDecider.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. <?php
  2. namespace App\Support;
  3. class OptionLayoutDecider
  4. {
  5. public function normalizeCompactMathForDisplay(string $option): string
  6. {
  7. $trimmed = trim($option);
  8. if ($trimmed === '') {
  9. return $option;
  10. }
  11. $text = preg_replace('/^\$(.*)\$$/u', '$1', $trimmed) ?? $trimmed;
  12. $parts = $this->extractSingleFractionParts($text);
  13. if ($parts === null) {
  14. return $option;
  15. }
  16. [$num, $den] = $parts;
  17. $compactPart = '/^[\-+0-9a-zA-Z\x{221A}\\\\{}]+$/u';
  18. if (
  19. preg_match($compactPart, $num) !== 1
  20. || preg_match($compactPart, $den) !== 1
  21. || preg_match('/[=<>]/u', $num.$den) === 1
  22. || $this->hasBinaryOperator($num)
  23. || $this->hasBinaryOperator($den)
  24. ) {
  25. return $option;
  26. }
  27. return str_replace($text, $num.'/'.$den, $trimmed);
  28. }
  29. /**
  30. * @param array<int|string, mixed> $options
  31. * @return array{class:string,layout:string,max_length:int,max_width_units:float,avg_width_units:float,width_std_dev:float,has_complex_formula:bool,opt_count:int}
  32. */
  33. public function decide(array $options, string $context = 'exam'): array
  34. {
  35. $optCount = count($options);
  36. $maxOptionLength = 0;
  37. $maxWidthUnits = 0.0;
  38. $widthUnitsList = [];
  39. $sumWidthUnits = 0.0;
  40. $compactMathCount = 0;
  41. $plainCompactCount = 0;
  42. $optionMetas = [];
  43. $hasComplexFormulaOption = false;
  44. foreach ($options as $option) {
  45. $optionMeta = $this->analyzeOption((string) $option);
  46. $maxOptionLength = max($maxOptionLength, $optionMeta['effective_length']);
  47. $maxWidthUnits = max($maxWidthUnits, $optionMeta['width_units']);
  48. $widthUnitsList[] = $optionMeta['width_units'];
  49. $sumWidthUnits += $optionMeta['width_units'];
  50. if ($optionMeta['is_compact_math']) {
  51. $compactMathCount++;
  52. }
  53. if ($optionMeta['is_plain_compact']) {
  54. $plainCompactCount++;
  55. }
  56. $optionMetas[] = $optionMeta;
  57. $hasComplexFormulaOption = $hasComplexFormulaOption || $optionMeta['is_complex_formula'];
  58. }
  59. [$grid4Threshold, $grid2Threshold, $grid4WidthCap, $grid2WidthCap, $grid1WidthCap] = $this->thresholdsFor($context);
  60. $avgWidthUnits = $optCount > 0 ? ($sumWidthUnits / $optCount) : 0.0;
  61. $widthVariance = $this->variance($widthUnitsList, $avgWidthUnits);
  62. $widthStdDev = sqrt($widthVariance);
  63. $maxAvgRatio = $avgWidthUnits > 0 ? ($maxWidthUnits / $avgWidthUnits) : 0.0;
  64. // 4列仅在“整体紧凑 + 选项宽度分布均匀”时启用,避免单个长选项把 D 挤换行
  65. $allowGrid4ByWidth = $maxWidthUnits <= ($grid4WidthCap * 0.92)
  66. && $maxAvgRatio <= 1.42
  67. && $widthStdDev <= 1.65;
  68. $allCompactMath = $optCount > 0 && $compactMathCount === $optCount;
  69. $allowGrid4ByCompactMath = $optCount === 4
  70. && $allCompactMath
  71. && $maxWidthUnits <= ($grid4WidthCap * 1.04)
  72. && $widthStdDev <= 2.1;
  73. $allowGrid4ByPlainCompact = $optCount === 4
  74. && $plainCompactCount === $optCount
  75. && $maxOptionLength <= 9
  76. && $maxWidthUnits <= ($grid4WidthCap * 1.20)
  77. && $widthStdDev <= 2.4;
  78. $forceGrid4ByPlainShort = $optCount === 4
  79. && $plainCompactCount === $optCount
  80. && $maxOptionLength <= 8
  81. && ! $hasComplexFormulaOption;
  82. $canTryGrid4 = $optCount <= 4
  83. && ! $hasComplexFormulaOption
  84. && (
  85. (
  86. $maxOptionLength <= $grid4Threshold
  87. && $allowGrid4ByWidth
  88. )
  89. || $allowGrid4ByCompactMath
  90. || $allowGrid4ByPlainCompact
  91. || $forceGrid4ByPlainShort
  92. );
  93. $canTryGrid2 = $maxOptionLength <= $grid2Threshold && $maxWidthUnits <= ($grid2WidthCap * 1.18);
  94. $layoutCandidates = [];
  95. if ($canTryGrid4) {
  96. $layoutCandidates['options-grid-4'] = $this->layoutScore($optionMetas, 4, $grid4WidthCap);
  97. }
  98. if ($canTryGrid2) {
  99. $layoutCandidates['options-grid-2'] = $this->layoutScore($optionMetas, 2, $grid2WidthCap);
  100. }
  101. $layoutCandidates['options-grid-1'] = $this->layoutScore($optionMetas, 1, $grid1WidthCap);
  102. // 如果2列可行,尽量避免退化到1列(除非1列显著更优)
  103. if (isset($layoutCandidates['options-grid-2'], $layoutCandidates['options-grid-1'])) {
  104. $s2 = $layoutCandidates['options-grid-2']['score'];
  105. $s1 = $layoutCandidates['options-grid-1']['score'];
  106. if ($s2 <= ($s1 + 1.6)) {
  107. $layoutCandidates['options-grid-2']['score'] -= 0.35;
  108. }
  109. }
  110. $selectedClass = 'options-grid-1';
  111. $selectedScore = PHP_FLOAT_MAX;
  112. foreach ($layoutCandidates as $class => $meta) {
  113. if ($meta['score'] < $selectedScore) {
  114. $selectedClass = $class;
  115. $selectedScore = $meta['score'];
  116. }
  117. }
  118. if ($selectedClass === 'options-grid-4') {
  119. return [
  120. 'class' => 'options-grid-4',
  121. 'layout' => '4列布局',
  122. 'max_length' => $maxOptionLength,
  123. 'max_width_units' => $maxWidthUnits,
  124. 'avg_width_units' => $avgWidthUnits,
  125. 'width_std_dev' => $widthStdDev,
  126. 'has_complex_formula' => $hasComplexFormulaOption,
  127. 'opt_count' => $optCount,
  128. ];
  129. }
  130. if ($selectedClass === 'options-grid-2') {
  131. return [
  132. 'class' => 'options-grid-2',
  133. 'layout' => '2列布局',
  134. 'max_length' => $maxOptionLength,
  135. 'max_width_units' => $maxWidthUnits,
  136. 'avg_width_units' => $avgWidthUnits,
  137. 'width_std_dev' => $widthStdDev,
  138. 'has_complex_formula' => $hasComplexFormulaOption,
  139. 'opt_count' => $optCount,
  140. ];
  141. }
  142. return [
  143. 'class' => 'options-grid-1',
  144. 'layout' => '1列布局',
  145. 'max_length' => $maxOptionLength,
  146. 'max_width_units' => $maxWidthUnits,
  147. 'avg_width_units' => $avgWidthUnits,
  148. 'width_std_dev' => $widthStdDev,
  149. 'has_complex_formula' => $hasComplexFormulaOption,
  150. 'opt_count' => $optCount,
  151. ];
  152. }
  153. /**
  154. * @return array{effective_length:int,raw_length:int,width_units:float,height_units:float,is_complex_formula:bool,is_compact_math:bool,is_plain_compact:bool,has_stacked_fraction:bool}
  155. */
  156. private function analyzeOption(string $option): array
  157. {
  158. $optionText = html_entity_decode(strip_tags($option), ENT_QUOTES | ENT_HTML5, 'UTF-8');
  159. $optionText = preg_replace('/\s+/u', '', $optionText) ?? '';
  160. $optionTextNoDollar = preg_replace('/^\$(.*)\$$/u', '$1', $optionText) ?? $optionText;
  161. $rawLength = mb_strlen($optionText, 'UTF-8');
  162. $optionLength = $rawLength;
  163. $isSimpleCompactMath = preg_match('/^-?[0-9a-zA-Z\x{221A}]+(?:\/[0-9a-zA-Z\x{221A}]+)?$/u', $optionTextNoDollar) === 1;
  164. $isCompactLatexFraction = preg_match(
  165. '/^\\\\d?frac\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}$/u',
  166. $optionTextNoDollar
  167. ) === 1;
  168. $isCompactLatexDegree = preg_match(
  169. '/^-?[0-9]+(?:\.[0-9]+)?(?:\^\{?\\\\circ\}?|°)$/u',
  170. $optionTextNoDollar
  171. ) === 1;
  172. $isCompactMath = $isSimpleCompactMath || $isCompactLatexFraction || $isCompactLatexDegree;
  173. $isSimpleSymbolLatex = preg_match('/^\\\\(pm|mp)\s*[0-9]+$/u', $optionTextNoDollar) === 1;
  174. $isPlainCompact = (
  175. preg_match('/\\\\[a-zA-Z]+/u', $optionTextNoDollar) !== 1
  176. || $isSimpleSymbolLatex
  177. ) && mb_strlen($optionTextNoDollar, 'UTF-8') <= 10;
  178. $hasLatexCmd = preg_match('/\\\\(frac|dfrac|sqrt|log|sin|cos|tan|cdot|times|left|right|begin|end)/u', $optionText) === 1;
  179. $hasStackedFraction = preg_match('/\\\\d?frac\{[^{}]+\}\{[^{}]+\}/u', $optionTextNoDollar) === 1;
  180. // 对可安全转为行内分式的短表达,不计入“堆叠分式”高度惩罚
  181. if ($isCompactLatexFraction) {
  182. $hasStackedFraction = false;
  183. }
  184. $sqrtCount = preg_match_all('/\\\\sqrt|√/u', $optionText);
  185. $supCount = preg_match_all('/\^/u', $optionText);
  186. $operatorCount = preg_match_all('/[=<>+\-*\/\^_]/u', $optionText);
  187. $hasBrackets = preg_match('/[()\(\)\[\]\{\}]/u', $optionText) === 1;
  188. $isComplexFormula = ! $isCompactMath
  189. && ($hasLatexCmd || $operatorCount >= 2 || ($hasBrackets && $optionLength >= 8));
  190. if ($isComplexFormula) {
  191. $optionLength += 6;
  192. }
  193. return [
  194. 'effective_length' => $optionLength,
  195. 'raw_length' => $rawLength,
  196. 'width_units' => $this->estimateWidthUnits($optionText),
  197. 'height_units' => 1.0
  198. + ($hasStackedFraction ? 0.72 : 0.0)
  199. + min(0.36, ((int) $sqrtCount) * 0.08)
  200. + min(0.28, ((int) $supCount) * 0.07),
  201. 'is_complex_formula' => $isComplexFormula,
  202. 'is_compact_math' => $isCompactMath,
  203. 'is_plain_compact' => $isPlainCompact,
  204. 'has_stacked_fraction' => $hasStackedFraction,
  205. ];
  206. }
  207. /**
  208. * 判卷页面中题干+答案区域更紧凑,阈值应更保守。
  209. *
  210. * @return array{0:int,1:int,2:float,3:float,4:float}
  211. */
  212. private function thresholdsFor(string $context): array
  213. {
  214. if ($context === 'grading') {
  215. return [10, 24, 10.2, 21.0, 44.0];
  216. }
  217. return [12, 28, 11.5, 23.5, 48.0];
  218. }
  219. private function estimateWidthUnits(string $text): float
  220. {
  221. if ($text === '') {
  222. return 0.0;
  223. }
  224. // 简单TeX归一:减少命令名对宽度估计的干扰
  225. $normalized = preg_replace('/\\\\(left|right|displaystyle)/u', '', $text) ?? $text;
  226. $normalized = preg_replace('/\\\\(frac|dfrac)\{([^{}]+)\}\{([^{}]+)\}/u', '($2/$3)', $normalized) ?? $normalized;
  227. $normalized = preg_replace('/\\\\sqrt\{([^{}]+)\}/u', '√($1)', $normalized) ?? $normalized;
  228. $chars = preg_split('//u', $normalized, -1, PREG_SPLIT_NO_EMPTY) ?: [];
  229. $units = 0.0;
  230. foreach ($chars as $ch) {
  231. if (preg_match('/[\x{4e00}-\x{9fff}]/u', $ch)) {
  232. $units += 1.0;
  233. } elseif (preg_match('/[A-Za-z]/u', $ch)) {
  234. $units += 0.62;
  235. } elseif (preg_match('/[0-9]/u', $ch)) {
  236. $units += 0.58;
  237. } elseif (preg_match('/[=<>+\-*\/\^_]/u', $ch)) {
  238. $units += 0.45;
  239. } elseif (preg_match('/[()\(\)\[\]\{\}]/u', $ch)) {
  240. $units += 0.35;
  241. } elseif ($ch === '√') {
  242. $units += 0.55;
  243. } else {
  244. $units += 0.5;
  245. }
  246. }
  247. // 选项标签(A.)和左侧间距补偿
  248. return $units + 2.2;
  249. }
  250. /**
  251. * @param array<int,array{width_units:float,height_units:float,has_stacked_fraction:bool}> $optionMetas
  252. * @return array{score:float}
  253. */
  254. private function layoutScore(array $optionMetas, int $cols, float $colCap): array
  255. {
  256. $count = count($optionMetas);
  257. if ($count === 0) {
  258. return ['score' => 0.0];
  259. }
  260. $overflowCount = 0;
  261. $lineWrapPenalty = 0.0;
  262. $rowHeights = [];
  263. $stackedFractionCount = 0;
  264. foreach ($optionMetas as $idx => $meta) {
  265. $effectiveWidth = $meta['width_units'] + ($meta['has_stacked_fraction'] ? ($cols === 4 ? 1.6 : 0.6) : 0.0);
  266. if ($effectiveWidth > $colCap) {
  267. $overflowCount++;
  268. }
  269. $lines = max(1.0, ceil($effectiveWidth / max(1.0, $colCap)));
  270. $lineWrapPenalty += max(0.0, $lines - 1.0);
  271. $itemHeight = $meta['height_units'] * $lines;
  272. $rowIndex = intdiv($idx, $cols);
  273. if (! isset($rowHeights[$rowIndex])) {
  274. $rowHeights[$rowIndex] = 0.0;
  275. }
  276. $rowHeights[$rowIndex] = max($rowHeights[$rowIndex], $itemHeight);
  277. if ($meta['has_stacked_fraction']) {
  278. $stackedFractionCount++;
  279. }
  280. }
  281. $overflowRate = $overflowCount / $count;
  282. $lineWrapRate = $lineWrapPenalty / $count;
  283. $rowAvg = array_sum($rowHeights) / max(1, count($rowHeights));
  284. $rowStd = sqrt($this->variance(array_values($rowHeights), $rowAvg));
  285. $rowCv = $rowAvg > 0 ? ($rowStd / $rowAvg) : 0.0;
  286. $stackedRate = $stackedFractionCount / $count;
  287. $whitespacePenalty = match ($cols) {
  288. 1 => 2.4,
  289. 2 => 0.7,
  290. default => 0.0,
  291. };
  292. $score = ($overflowRate * 120.0)
  293. + ($lineWrapRate * 10.0)
  294. + ($rowCv * 5.0)
  295. + ($stackedRate * ($cols === 4 ? 2.6 : 0.8))
  296. + $whitespacePenalty;
  297. return ['score' => $score];
  298. }
  299. /**
  300. * @param array<int,float> $values
  301. */
  302. private function variance(array $values, float $avg): float
  303. {
  304. if (empty($values)) {
  305. return 0.0;
  306. }
  307. $sum = 0.0;
  308. foreach ($values as $v) {
  309. $d = $v - $avg;
  310. $sum += ($d * $d);
  311. }
  312. return $sum / count($values);
  313. }
  314. /**
  315. * @return array{0:string,1:string}|null
  316. */
  317. private function extractSingleFractionParts(string $text): ?array
  318. {
  319. if (! preg_match('/^\\\\d?frac/u', $text)) {
  320. return null;
  321. }
  322. $offset = preg_match('/^\\\\dfrac/u', $text) ? 6 : 5; // \dfrac or \frac
  323. $len = mb_strlen($text, 'UTF-8');
  324. if ($offset >= $len || mb_substr($text, $offset, 1, 'UTF-8') !== '{') {
  325. return null;
  326. }
  327. [$num, $next] = $this->readBalancedBraces($text, $offset);
  328. if ($num === null || $next >= $len || mb_substr($text, $next, 1, 'UTF-8') !== '{') {
  329. return null;
  330. }
  331. [$den, $end] = $this->readBalancedBraces($text, $next);
  332. if ($den === null) {
  333. return null;
  334. }
  335. // 必须刚好到结尾,避免把 "\frac{a}{b}\text{cm}" 这类误改坏
  336. if ($end !== $len) {
  337. return null;
  338. }
  339. return [$num, $den];
  340. }
  341. private function hasBinaryOperator(string $expr): bool
  342. {
  343. return preg_match('/(?<!^)[+\-*]/u', $expr) === 1;
  344. }
  345. /**
  346. * @return array{0:string|null,1:int}
  347. */
  348. private function readBalancedBraces(string $text, int $startOffset): array
  349. {
  350. $len = mb_strlen($text, 'UTF-8');
  351. if ($startOffset >= $len || mb_substr($text, $startOffset, 1, 'UTF-8') !== '{') {
  352. return [null, $startOffset];
  353. }
  354. $depth = 0;
  355. $buffer = '';
  356. for ($i = $startOffset; $i < $len; $i++) {
  357. $ch = mb_substr($text, $i, 1, 'UTF-8');
  358. if ($ch === '{') {
  359. $depth++;
  360. if ($depth > 1) {
  361. $buffer .= $ch;
  362. }
  363. continue;
  364. }
  365. if ($ch === '}') {
  366. $depth--;
  367. if ($depth === 0) {
  368. return [$buffer, $i + 1];
  369. }
  370. if ($depth < 0) {
  371. return [null, $i + 1];
  372. }
  373. $buffer .= $ch;
  374. continue;
  375. }
  376. $buffer .= $ch;
  377. }
  378. return [null, $len];
  379. }
  380. }