OptionLayoutDecider.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. <?php
  2. namespace App\Support;
  3. class OptionLayoutDecider
  4. {
  5. public function normalizeCompactMathForDisplay(string $option): string
  6. {
  7. // 展示层保持数学标准分式(分子/分母上下结构),不做 \frac -> a/b 的文本替换
  8. return $option;
  9. }
  10. /**
  11. * @param array<int|string, mixed> $options
  12. * @return array{class:string,layout:string,max_length:int,max_width_units:float,avg_width_units:float,width_std_dev:float,has_complex_formula:bool,opt_count:int}
  13. */
  14. public function decide(array $options, string $context = 'exam'): array
  15. {
  16. $optCount = count($options);
  17. $maxOptionLength = 0;
  18. $maxWidthUnits = 0.0;
  19. $widthUnitsList = [];
  20. $sumWidthUnits = 0.0;
  21. $compactMathCount = 0;
  22. $plainCompactCount = 0;
  23. $optionMetas = [];
  24. $hasComplexFormulaOption = false;
  25. foreach ($options as $option) {
  26. $optionMeta = $this->analyzeOption((string) $option);
  27. $maxOptionLength = max($maxOptionLength, $optionMeta['effective_length']);
  28. $maxWidthUnits = max($maxWidthUnits, $optionMeta['width_units']);
  29. $widthUnitsList[] = $optionMeta['width_units'];
  30. $sumWidthUnits += $optionMeta['width_units'];
  31. if ($optionMeta['is_compact_math']) {
  32. $compactMathCount++;
  33. }
  34. if ($optionMeta['is_plain_compact']) {
  35. $plainCompactCount++;
  36. }
  37. $optionMetas[] = $optionMeta;
  38. $hasComplexFormulaOption = $hasComplexFormulaOption || $optionMeta['is_complex_formula'];
  39. }
  40. [$grid4Threshold, $grid2Threshold, $grid4WidthCap, $grid2WidthCap, $grid1WidthCap] = $this->thresholdsFor($context);
  41. $avgWidthUnits = $optCount > 0 ? ($sumWidthUnits / $optCount) : 0.0;
  42. $widthVariance = $this->variance($widthUnitsList, $avgWidthUnits);
  43. $widthStdDev = sqrt($widthVariance);
  44. $maxAvgRatio = $avgWidthUnits > 0 ? ($maxWidthUnits / $avgWidthUnits) : 0.0;
  45. // 4列仅在“整体紧凑 + 选项宽度分布均匀”时启用,避免单个长选项把 D 挤换行
  46. $allowGrid4ByWidth = $maxWidthUnits <= ($grid4WidthCap * 0.92)
  47. && $maxAvgRatio <= 1.42
  48. && $widthStdDev <= 1.65;
  49. $allCompactMath = $optCount > 0 && $compactMathCount === $optCount;
  50. $allowGrid4ByCompactMath = $optCount === 4
  51. && $allCompactMath
  52. && $maxWidthUnits <= ($grid4WidthCap * 1.04)
  53. && $widthStdDev <= 2.1;
  54. $allowGrid4ByPlainCompact = $optCount === 4
  55. && $plainCompactCount === $optCount
  56. && $maxOptionLength <= 9
  57. && $maxWidthUnits <= ($grid4WidthCap * 1.20)
  58. && $widthStdDev <= 2.4;
  59. $forceGrid4ByPlainShort = $optCount === 4
  60. && $plainCompactCount === $optCount
  61. && $maxOptionLength <= 8
  62. && ! $hasComplexFormulaOption;
  63. $canTryGrid4 = $optCount <= 4
  64. && ! $hasComplexFormulaOption
  65. && (
  66. (
  67. $maxOptionLength <= $grid4Threshold
  68. && $allowGrid4ByWidth
  69. )
  70. || $allowGrid4ByCompactMath
  71. || $allowGrid4ByPlainCompact
  72. || $forceGrid4ByPlainShort
  73. );
  74. $canTryGrid2 = $maxOptionLength <= $grid2Threshold && $maxWidthUnits <= ($grid2WidthCap * 1.18);
  75. $layoutCandidates = [];
  76. if ($canTryGrid4) {
  77. $layoutCandidates['options-grid-4'] = $this->layoutScore($optionMetas, 4, $grid4WidthCap);
  78. }
  79. if ($canTryGrid2) {
  80. $layoutCandidates['options-grid-2'] = $this->layoutScore($optionMetas, 2, $grid2WidthCap);
  81. }
  82. $layoutCandidates['options-grid-1'] = $this->layoutScore($optionMetas, 1, $grid1WidthCap);
  83. // 如果2列可行,尽量避免退化到1列(除非1列显著更优)
  84. if (isset($layoutCandidates['options-grid-2'], $layoutCandidates['options-grid-1'])) {
  85. $s2 = $layoutCandidates['options-grid-2']['score'];
  86. $s1 = $layoutCandidates['options-grid-1']['score'];
  87. if ($s2 <= ($s1 + 1.6)) {
  88. $layoutCandidates['options-grid-2']['score'] -= 0.35;
  89. }
  90. }
  91. $selectedClass = 'options-grid-1';
  92. $selectedScore = PHP_FLOAT_MAX;
  93. foreach ($layoutCandidates as $class => $meta) {
  94. if ($meta['score'] < $selectedScore) {
  95. $selectedClass = $class;
  96. $selectedScore = $meta['score'];
  97. }
  98. }
  99. if ($selectedClass === 'options-grid-4') {
  100. return [
  101. 'class' => 'options-grid-4',
  102. 'layout' => '4列布局',
  103. 'max_length' => $maxOptionLength,
  104. 'max_width_units' => $maxWidthUnits,
  105. 'avg_width_units' => $avgWidthUnits,
  106. 'width_std_dev' => $widthStdDev,
  107. 'has_complex_formula' => $hasComplexFormulaOption,
  108. 'opt_count' => $optCount,
  109. ];
  110. }
  111. if ($selectedClass === 'options-grid-2') {
  112. return [
  113. 'class' => 'options-grid-2',
  114. 'layout' => '2列布局',
  115. 'max_length' => $maxOptionLength,
  116. 'max_width_units' => $maxWidthUnits,
  117. 'avg_width_units' => $avgWidthUnits,
  118. 'width_std_dev' => $widthStdDev,
  119. 'has_complex_formula' => $hasComplexFormulaOption,
  120. 'opt_count' => $optCount,
  121. ];
  122. }
  123. return [
  124. 'class' => 'options-grid-1',
  125. 'layout' => '1列布局',
  126. 'max_length' => $maxOptionLength,
  127. 'max_width_units' => $maxWidthUnits,
  128. 'avg_width_units' => $avgWidthUnits,
  129. 'width_std_dev' => $widthStdDev,
  130. 'has_complex_formula' => $hasComplexFormulaOption,
  131. 'opt_count' => $optCount,
  132. ];
  133. }
  134. /**
  135. * @return array{effective_length:int,raw_length:int,width_units:float,height_units:float,is_complex_formula:bool,is_compact_math:bool,is_plain_compact:bool,has_stacked_fraction:bool}
  136. */
  137. private function analyzeOption(string $option): array
  138. {
  139. $optionText = html_entity_decode(strip_tags($option), ENT_QUOTES | ENT_HTML5, 'UTF-8');
  140. $optionText = preg_replace('/\s+/u', '', $optionText) ?? '';
  141. $optionTextNoDollar = preg_replace('/^\$(.*)\$$/u', '$1', $optionText) ?? $optionText;
  142. $rawLength = mb_strlen($optionText, 'UTF-8');
  143. $optionLength = $rawLength;
  144. $isSimpleCompactMath = preg_match('/^-?[0-9a-zA-Z\x{221A}]+(?:\/[0-9a-zA-Z\x{221A}]+)?$/u', $optionTextNoDollar) === 1;
  145. $isCompactLatexFraction = preg_match(
  146. '/^[+\-]?\\\\d?frac\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}\{[-+0-9a-zA-Z\\\\\x{221A}\^\(\)]+\}$/u',
  147. $optionTextNoDollar
  148. ) === 1;
  149. $isCompactLatexDegree = preg_match(
  150. '/^-?[0-9]+(?:\.[0-9]+)?(?:\^\{?\\\\circ\}?|°)$/u',
  151. $optionTextNoDollar
  152. ) === 1;
  153. $isCompactMath = $isSimpleCompactMath || $isCompactLatexFraction || $isCompactLatexDegree;
  154. $isSimpleSymbolLatex = preg_match('/^\\\\(pm|mp)\s*[0-9]+$/u', $optionTextNoDollar) === 1;
  155. $isPlainCompact = (
  156. preg_match('/\\\\[a-zA-Z]+/u', $optionTextNoDollar) !== 1
  157. || $isSimpleSymbolLatex
  158. ) && mb_strlen($optionTextNoDollar, 'UTF-8') <= 10;
  159. $hasLatexCmd = preg_match('/\\\\(frac|dfrac|sqrt|log|sin|cos|tan|cdot|times|left|right|begin|end)/u', $optionText) === 1;
  160. $hasStackedFraction = preg_match('/\\\\d?frac\{[^{}]+\}\{[^{}]+\}/u', $optionTextNoDollar) === 1;
  161. // 对可安全转为行内分式的短表达,不计入“堆叠分式”高度惩罚
  162. if ($isCompactLatexFraction) {
  163. $hasStackedFraction = false;
  164. }
  165. $sqrtCount = preg_match_all('/\\\\sqrt|√/u', $optionText);
  166. $supCount = preg_match_all('/\^/u', $optionText);
  167. $operatorCount = preg_match_all('/[=<>+\-*\/\^_]/u', $optionText);
  168. $hasBrackets = preg_match('/[()\(\)\[\]\{\}]/u', $optionText) === 1;
  169. $isComplexFormula = ! $isCompactMath
  170. && ($hasLatexCmd || $operatorCount >= 2 || ($hasBrackets && $optionLength >= 8));
  171. if ($isComplexFormula) {
  172. $optionLength += 6;
  173. }
  174. return [
  175. 'effective_length' => $optionLength,
  176. 'raw_length' => $rawLength,
  177. 'width_units' => $this->estimateWidthUnits($optionText),
  178. 'height_units' => 1.0
  179. + ($hasStackedFraction ? 0.72 : 0.0)
  180. + min(0.36, ((int) $sqrtCount) * 0.08)
  181. + min(0.28, ((int) $supCount) * 0.07),
  182. 'is_complex_formula' => $isComplexFormula,
  183. 'is_compact_math' => $isCompactMath,
  184. 'is_plain_compact' => $isPlainCompact,
  185. 'has_stacked_fraction' => $hasStackedFraction,
  186. ];
  187. }
  188. /**
  189. * 判卷页面中题干+答案区域更紧凑,阈值应更保守。
  190. *
  191. * @return array{0:int,1:int,2:float,3:float,4:float}
  192. */
  193. private function thresholdsFor(string $context): array
  194. {
  195. if ($context === 'grading') {
  196. return [10, 24, 10.2, 21.0, 44.0];
  197. }
  198. return [12, 28, 11.5, 23.5, 48.0];
  199. }
  200. private function estimateWidthUnits(string $text): float
  201. {
  202. if ($text === '') {
  203. return 0.0;
  204. }
  205. // 简单TeX归一:减少命令名对宽度估计的干扰
  206. $normalized = preg_replace('/\\\\(left|right|displaystyle)/u', '', $text) ?? $text;
  207. $normalized = preg_replace('/\\\\(frac|dfrac)\{([^{}]+)\}\{([^{}]+)\}/u', '($2/$3)', $normalized) ?? $normalized;
  208. $normalized = preg_replace('/\\\\sqrt\{([^{}]+)\}/u', '√($1)', $normalized) ?? $normalized;
  209. $chars = preg_split('//u', $normalized, -1, PREG_SPLIT_NO_EMPTY) ?: [];
  210. $units = 0.0;
  211. foreach ($chars as $ch) {
  212. if (preg_match('/[\x{4e00}-\x{9fff}]/u', $ch)) {
  213. $units += 1.0;
  214. } elseif (preg_match('/[A-Za-z]/u', $ch)) {
  215. $units += 0.62;
  216. } elseif (preg_match('/[0-9]/u', $ch)) {
  217. $units += 0.58;
  218. } elseif (preg_match('/[=<>+\-*\/\^_]/u', $ch)) {
  219. $units += 0.45;
  220. } elseif (preg_match('/[()\(\)\[\]\{\}]/u', $ch)) {
  221. $units += 0.35;
  222. } elseif ($ch === '√') {
  223. $units += 0.55;
  224. } else {
  225. $units += 0.5;
  226. }
  227. }
  228. // 选项标签(A.)和左侧间距补偿
  229. return $units + 2.2;
  230. }
  231. /**
  232. * @param array<int,array{width_units:float,height_units:float,has_stacked_fraction:bool}> $optionMetas
  233. * @return array{score:float}
  234. */
  235. private function layoutScore(array $optionMetas, int $cols, float $colCap): array
  236. {
  237. $count = count($optionMetas);
  238. if ($count === 0) {
  239. return ['score' => 0.0];
  240. }
  241. $overflowCount = 0;
  242. $lineWrapPenalty = 0.0;
  243. $rowHeights = [];
  244. $stackedFractionCount = 0;
  245. foreach ($optionMetas as $idx => $meta) {
  246. $effectiveWidth = $meta['width_units'] + ($meta['has_stacked_fraction'] ? ($cols === 4 ? 1.6 : 0.6) : 0.0);
  247. if ($effectiveWidth > $colCap) {
  248. $overflowCount++;
  249. }
  250. $lines = max(1.0, ceil($effectiveWidth / max(1.0, $colCap)));
  251. $lineWrapPenalty += max(0.0, $lines - 1.0);
  252. $itemHeight = $meta['height_units'] * $lines;
  253. $rowIndex = intdiv($idx, $cols);
  254. if (! isset($rowHeights[$rowIndex])) {
  255. $rowHeights[$rowIndex] = 0.0;
  256. }
  257. $rowHeights[$rowIndex] = max($rowHeights[$rowIndex], $itemHeight);
  258. if ($meta['has_stacked_fraction']) {
  259. $stackedFractionCount++;
  260. }
  261. }
  262. $overflowRate = $overflowCount / $count;
  263. $lineWrapRate = $lineWrapPenalty / $count;
  264. $rowAvg = array_sum($rowHeights) / max(1, count($rowHeights));
  265. $rowStd = sqrt($this->variance(array_values($rowHeights), $rowAvg));
  266. $rowCv = $rowAvg > 0 ? ($rowStd / $rowAvg) : 0.0;
  267. $stackedRate = $stackedFractionCount / $count;
  268. $whitespacePenalty = match ($cols) {
  269. 1 => 2.4,
  270. 2 => 0.7,
  271. default => 0.0,
  272. };
  273. $score = ($overflowRate * 120.0)
  274. + ($lineWrapRate * 10.0)
  275. + ($rowCv * 5.0)
  276. + ($stackedRate * ($cols === 4 ? 2.6 : 0.8))
  277. + $whitespacePenalty;
  278. return ['score' => $score];
  279. }
  280. /**
  281. * @param array<int,float> $values
  282. */
  283. private function variance(array $values, float $avg): float
  284. {
  285. if (empty($values)) {
  286. return 0.0;
  287. }
  288. $sum = 0.0;
  289. foreach ($values as $v) {
  290. $d = $v - $avg;
  291. $sum += ($d * $d);
  292. }
  293. return $sum / count($values);
  294. }
  295. /**
  296. * @return array{0:string,1:string,2:string}|null
  297. */
  298. private function extractSingleFractionParts(string $text): ?array
  299. {
  300. if (! preg_match('/^([+\-]?)(\\\\d?frac)/u', $text, $m)) {
  301. return null;
  302. }
  303. $prefix = (string) ($m[1] ?? '');
  304. $fracCmd = (string) ($m[2] ?? '\\frac');
  305. $offset = mb_strlen($prefix.$fracCmd, 'UTF-8');
  306. $len = mb_strlen($text, 'UTF-8');
  307. if ($offset >= $len || mb_substr($text, $offset, 1, 'UTF-8') !== '{') {
  308. return null;
  309. }
  310. [$num, $next] = $this->readBalancedBraces($text, $offset);
  311. if ($num === null || $next >= $len || mb_substr($text, $next, 1, 'UTF-8') !== '{') {
  312. return null;
  313. }
  314. [$den, $end] = $this->readBalancedBraces($text, $next);
  315. if ($den === null) {
  316. return null;
  317. }
  318. // 必须刚好到结尾,避免把 "\frac{a}{b}\text{cm}" 这类误改坏
  319. if ($end !== $len) {
  320. return null;
  321. }
  322. return [$prefix, $num, $den];
  323. }
  324. private function hasBinaryOperator(string $expr): bool
  325. {
  326. return preg_match('/(?<!^)[+\-*]/u', $expr) === 1;
  327. }
  328. /**
  329. * @return array{0:string|null,1:int}
  330. */
  331. private function readBalancedBraces(string $text, int $startOffset): array
  332. {
  333. $len = mb_strlen($text, 'UTF-8');
  334. if ($startOffset >= $len || mb_substr($text, $startOffset, 1, 'UTF-8') !== '{') {
  335. return [null, $startOffset];
  336. }
  337. $depth = 0;
  338. $buffer = '';
  339. for ($i = $startOffset; $i < $len; $i++) {
  340. $ch = mb_substr($text, $i, 1, 'UTF-8');
  341. if ($ch === '{') {
  342. $depth++;
  343. if ($depth > 1) {
  344. $buffer .= $ch;
  345. }
  346. continue;
  347. }
  348. if ($ch === '}') {
  349. $depth--;
  350. if ($depth === 0) {
  351. return [$buffer, $i + 1];
  352. }
  353. if ($depth < 0) {
  354. return [null, $i + 1];
  355. }
  356. $buffer .= $ch;
  357. continue;
  358. }
  359. $buffer .= $ch;
  360. }
  361. return [null, $len];
  362. }
  363. }