| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- <?php
- namespace App\Support;
- /**
- * 待入库解答题:若解析中尚无「步骤 n / 第 n 步」标记,但存在按顺序出现的小问 (1)→(2)→(3),
- * 则在合法边界处插入「步骤一:」「步骤二:」…(与判卷 {@see resources/views/components/exam/paper-body.blade.php} 中加方框的规则同源)。
- *
- * 只对「从 (1) 起顺序递增」的第一条锚点链加前缀,避免文中再次出现 (1) 时被当成新步骤导致「步骤三:(1)」等错乱。
- */
- final class AnswerSolutionStepMarkerInjector
- {
- /** 与 paper-body 解答题 $stepPattern 对齐:已有则不再注入 */
- private const STEP_HEAD_RE = '/步骤\s*[0-9一二三四五六七八九十百零两]+\s*[::..]?|第\s*[0-9一二三四五六七八九十百零两]+\s*步\s*[::..]?/u';
- private const MAX_ORDERED_SUBQUESTIONS = 5;
- /**
- * @param mixed $rawQuestionType questions_tem.question_type / tags 等
- */
- public static function enrichIfNeeded(string $solution, mixed $rawQuestionType): string
- {
- $solution = trim($solution);
- if ($solution === '') {
- return $solution;
- }
- $t = strtolower(trim((string) $rawQuestionType));
- if ($t !== '' && (str_contains($t, 'choice') || str_contains($t, '选择'))) {
- return $solution;
- }
- if ($t !== '' && (str_contains($t, 'fill') || str_contains($t, 'blank') || str_contains($t, '填空'))) {
- return $solution;
- }
- if (preg_match(self::STEP_HEAD_RE, $solution)) {
- return $solution;
- }
- return self::injectOrderedSubQuestionAnchors($solution);
- }
- private static function injectOrderedSubQuestionAnchors(string $solution): string
- {
- $offsets = self::collectOrderedSubQuestionByteOffsets($solution, self::MAX_ORDERED_SUBQUESTIONS);
- if (count($offsets) < 2) {
- return $solution;
- }
- $insertions = [];
- foreach ($offsets as $i => $bytePos) {
- $insertions[] = [$bytePos, '步骤'.self::chineseOrdinal($i + 1).':'];
- }
- usort($insertions, static fn (array $a, array $b): int => $b[0] <=> $a[0]);
- $out = $solution;
- foreach ($insertions as [$pos, $label]) {
- $out = substr($out, 0, $pos).$label.substr($out, $pos);
- }
- return $out;
- }
- /**
- * 严格按 1、2、3… 顺序在字符串中找第一条 (n) 或 (中文 n),且该位置须处于小问边界(段首或句末标点后)。
- *
- * @return list<int> UTF-8 字节偏移
- */
- private static function collectOrderedSubQuestionByteOffsets(string $s, int $maxN): array
- {
- $offsets = [];
- $searchFrom = 0;
- for ($n = 1; $n <= $maxN; $n++) {
- $hit = self::findNextAnchoredSubQuestion($s, $searchFrom, $n);
- if ($hit === null) {
- break;
- }
- [$byteStart, $matchLen] = $hit;
- $offsets[] = $byteStart;
- $searchFrom = $byteStart + $matchLen;
- }
- return $offsets;
- }
- /**
- * @return ?array{0: int, 1: int} [byteStart, matchByteLength]
- */
- private static function findNextAnchoredSubQuestion(string $s, int $searchFrom, int $n): ?array
- {
- $cn = self::chineseOrdinal($n);
- // 全角括号 + 阿拉伯数字(1)(2)在解析/OCR 中极常见;原先仅支持(一)(二)会漏检整条小问链
- $pattern = '/(?<![A-Za-z\'\x{2019}\x{2032}])(\(\s*'.$n.'\s*\)|(\s*'.$n.'\s*)|(\s*'.preg_quote($cn, '/').'\s*))\s*[、,;::..]?/u';
- $len = strlen($s);
- $pos = $searchFrom;
- for ($guard = 0; $guard < 8000 && $pos < $len; $guard++) {
- if (! preg_match($pattern, $s, $m, PREG_OFFSET_CAPTURE, $pos)) {
- return null;
- }
- $byteStart = (int) ($m[0][1] ?? -1);
- if ($byteStart < 0) {
- return null;
- }
- $matched = (string) ($m[0][0] ?? '');
- $mLen = strlen($matched);
- if ($mLen < 1) {
- $pos = $byteStart + 1;
- continue;
- }
- if (self::isSubQuestionAnchorContext($s, $byteStart)) {
- return [$byteStart, $mLen];
- }
- $pos = $byteStart + $mLen;
- }
- return null;
- }
- /**
- * 小问编号须在段首、换行后或句末标点后,避免正文中的数值括号被当成小问。
- */
- private static function isSubQuestionAnchorContext(string $s, int $bytePos): bool
- {
- if ($bytePos <= 0) {
- return true;
- }
- $before = substr($s, 0, $bytePos);
- $before = preg_replace('/[ \t\x{3000}]+$/u', '', $before) ?? $before;
- if ($before === '') {
- return true;
- }
- if (preg_match('/\R\z/u', $before)) {
- return true;
- }
- $last = mb_substr($before, mb_strlen($before, 'UTF-8') - 1, 1, 'UTF-8');
- return $last !== '' && (bool) preg_match('/[。!?;:·….、,,\]\}】〉』」)]/u', $last);
- }
- private static function chineseOrdinal(int $n): string
- {
- static $map = [
- 1 => '一', 2 => '二', 3 => '三', 4 => '四', 5 => '五',
- 6 => '六', 7 => '七', 8 => '八', 9 => '九', 10 => '十',
- ];
- if (isset($map[$n])) {
- return $map[$n];
- }
- if ($n > 10 && $n <= 19) {
- return '十'.$map[$n - 10];
- }
- return (string) $n;
- }
- }
|