AnswerSolutionStepMarkerInjector.php 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. <?php
  2. namespace App\Support;
  3. /**
  4. * 待入库解答题:若解析中尚无「步骤 n / 第 n 步」标记,但存在按顺序出现的小问 (1)→(2)→(3),
  5. * 则在合法边界处插入「步骤一:」「步骤二:」…(与判卷 {@see resources/views/components/exam/paper-body.blade.php} 中加方框的规则同源)。
  6. *
  7. * 只对「从 (1) 起顺序递增」的第一条锚点链加前缀,避免文中再次出现 (1) 时被当成新步骤导致「步骤三:(1)」等错乱。
  8. */
  9. final class AnswerSolutionStepMarkerInjector
  10. {
  11. /** 与 paper-body 解答题 $stepPattern 对齐:已有则不再注入 */
  12. private const STEP_HEAD_RE = '/步骤\s*[0-9一二三四五六七八九十百零两]+\s*[::..]?|第\s*[0-9一二三四五六七八九十百零两]+\s*步\s*[::..]?/u';
  13. private const MAX_ORDERED_SUBQUESTIONS = 5;
  14. /**
  15. * @param mixed $rawQuestionType questions_tem.question_type / tags 等
  16. */
  17. public static function enrichIfNeeded(string $solution, mixed $rawQuestionType): string
  18. {
  19. $solution = trim($solution);
  20. if ($solution === '') {
  21. return $solution;
  22. }
  23. $t = strtolower(trim((string) $rawQuestionType));
  24. if ($t !== '' && (str_contains($t, 'choice') || str_contains($t, '选择'))) {
  25. return $solution;
  26. }
  27. if ($t !== '' && (str_contains($t, 'fill') || str_contains($t, 'blank') || str_contains($t, '填空'))) {
  28. return $solution;
  29. }
  30. if (preg_match(self::STEP_HEAD_RE, $solution)) {
  31. return $solution;
  32. }
  33. return self::injectOrderedSubQuestionAnchors($solution);
  34. }
  35. private static function injectOrderedSubQuestionAnchors(string $solution): string
  36. {
  37. $offsets = self::collectOrderedSubQuestionByteOffsets($solution, self::MAX_ORDERED_SUBQUESTIONS);
  38. if (count($offsets) < 2) {
  39. return $solution;
  40. }
  41. $insertions = [];
  42. foreach ($offsets as $i => $bytePos) {
  43. $insertions[] = [$bytePos, '步骤'.self::chineseOrdinal($i + 1).':'];
  44. }
  45. usort($insertions, static fn (array $a, array $b): int => $b[0] <=> $a[0]);
  46. $out = $solution;
  47. foreach ($insertions as [$pos, $label]) {
  48. $out = substr($out, 0, $pos).$label.substr($out, $pos);
  49. }
  50. return $out;
  51. }
  52. /**
  53. * 严格按 1、2、3… 顺序在字符串中找第一条 (n) 或 (中文 n),且该位置须处于小问边界(段首或句末标点后)。
  54. *
  55. * @return list<int> UTF-8 字节偏移
  56. */
  57. private static function collectOrderedSubQuestionByteOffsets(string $s, int $maxN): array
  58. {
  59. $offsets = [];
  60. $searchFrom = 0;
  61. for ($n = 1; $n <= $maxN; $n++) {
  62. $hit = self::findNextAnchoredSubQuestion($s, $searchFrom, $n);
  63. if ($hit === null) {
  64. break;
  65. }
  66. [$byteStart, $matchLen] = $hit;
  67. $offsets[] = $byteStart;
  68. $searchFrom = $byteStart + $matchLen;
  69. }
  70. return $offsets;
  71. }
  72. /**
  73. * @return ?array{0: int, 1: int} [byteStart, matchByteLength]
  74. */
  75. private static function findNextAnchoredSubQuestion(string $s, int $searchFrom, int $n): ?array
  76. {
  77. $cn = self::chineseOrdinal($n);
  78. // 全角括号 + 阿拉伯数字(1)(2)在解析/OCR 中极常见;原先仅支持(一)(二)会漏检整条小问链
  79. $pattern = '/(?<![A-Za-z\'\x{2019}\x{2032}])(\(\s*'.$n.'\s*\)|(\s*'.$n.'\s*)|(\s*'.preg_quote($cn, '/').'\s*))\s*[、,;::..]?/u';
  80. $len = strlen($s);
  81. $pos = $searchFrom;
  82. for ($guard = 0; $guard < 8000 && $pos < $len; $guard++) {
  83. if (! preg_match($pattern, $s, $m, PREG_OFFSET_CAPTURE, $pos)) {
  84. return null;
  85. }
  86. $byteStart = (int) ($m[0][1] ?? -1);
  87. if ($byteStart < 0) {
  88. return null;
  89. }
  90. $matched = (string) ($m[0][0] ?? '');
  91. $mLen = strlen($matched);
  92. if ($mLen < 1) {
  93. $pos = $byteStart + 1;
  94. continue;
  95. }
  96. if (self::isSubQuestionAnchorContext($s, $byteStart)) {
  97. return [$byteStart, $mLen];
  98. }
  99. $pos = $byteStart + $mLen;
  100. }
  101. return null;
  102. }
  103. /**
  104. * 小问编号须在段首、换行后或句末标点后,避免正文中的数值括号被当成小问。
  105. */
  106. private static function isSubQuestionAnchorContext(string $s, int $bytePos): bool
  107. {
  108. if ($bytePos <= 0) {
  109. return true;
  110. }
  111. $before = substr($s, 0, $bytePos);
  112. $before = preg_replace('/[ \t\x{3000}]+$/u', '', $before) ?? $before;
  113. if ($before === '') {
  114. return true;
  115. }
  116. if (preg_match('/\R\z/u', $before)) {
  117. return true;
  118. }
  119. $last = mb_substr($before, mb_strlen($before, 'UTF-8') - 1, 1, 'UTF-8');
  120. return $last !== '' && (bool) preg_match('/[。!?;:·….、,,\]\}】〉』」)]/u', $last);
  121. }
  122. private static function chineseOrdinal(int $n): string
  123. {
  124. static $map = [
  125. 1 => '一', 2 => '二', 3 => '三', 4 => '四', 5 => '五',
  126. 6 => '六', 7 => '七', 8 => '八', 9 => '九', 10 => '十',
  127. ];
  128. if (isset($map[$n])) {
  129. return $map[$n];
  130. }
  131. if ($n > 10 && $n <= 19) {
  132. return '十'.$map[$n - 10];
  133. }
  134. return (string) $n;
  135. }
  136. }