gwd
/
math_cms


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
							<?php

namespace App\Support;

/**
 * 待入库解答题：若解析中尚无「步骤 n / 第 n 步」标记，但存在按顺序出现的小问 (1)→(2)→(3)，
 * 则在合法边界处插入「步骤一：」「步骤二：」…（与判卷 {@see resources/views/components/exam/paper-body.blade.php} 中加方框的规则同源）。
 *
 * 只对「从 (1) 起顺序递增」的第一条锚点链加前缀，避免文中再次出现 (1) 时被当成新步骤导致「步骤三：(1)」等错乱。
 */
final class AnswerSolutionStepMarkerInjector
{
    /** 与 paper-body 解答题 $stepPattern 对齐：已有则不再注入 */
    private const STEP_HEAD_RE = '/步骤\s*[0-9一二三四五六七八九十百零两]+\s*[：:.．]?|第\s*[0-9一二三四五六七八九十百零两]+\s*步\s*[：:.．]?/u';

    private const MAX_ORDERED_SUBQUESTIONS = 5;

    /**
     * @param  mixed  $rawQuestionType  questions_tem.question_type / tags 等
     */
    public static function enrichIfNeeded(string $solution, mixed $rawQuestionType): string
    {
        $solution = trim($solution);
        if ($solution === '') {
            return $solution;
        }

        $t = strtolower(trim((string) $rawQuestionType));
        if ($t !== '' && (str_contains($t, 'choice') || str_contains($t, '选择'))) {
            return $solution;
        }
        if ($t !== '' && (str_contains($t, 'fill') || str_contains($t, 'blank') || str_contains($t, '填空'))) {
            return $solution;
        }

        if (preg_match(self::STEP_HEAD_RE, $solution)) {
            return $solution;
        }

        return self::injectOrderedSubQuestionAnchors($solution);
    }

    private static function injectOrderedSubQuestionAnchors(string $solution): string
    {
        $offsets = self::collectOrderedSubQuestionByteOffsets($solution, self::MAX_ORDERED_SUBQUESTIONS);
        if (count($offsets) < 2) {
            return $solution;
        }

        $insertions = [];
        foreach ($offsets as $i => $bytePos) {
            $insertions[] = [$bytePos, '步骤'.self::chineseOrdinal($i + 1).'：'];
        }

        usort($insertions, static fn (array $a, array $b): int => $b[0] <=> $a[0]);

        $out = $solution;
        foreach ($insertions as [$pos, $label]) {
            $out = substr($out, 0, $pos).$label.substr($out, $pos);
        }

        return $out;
    }

    /**
     * 严格按 1、2、3… 顺序在字符串中找第一条 (n) 或 （中文 n），且该位置须处于小问边界（段首或句末标点后）。
     *
     * @return list<int> UTF-8 字节偏移
     */
    private static function collectOrderedSubQuestionByteOffsets(string $s, int $maxN): array
    {
        $offsets = [];
        $searchFrom = 0;
        for ($n = 1; $n <= $maxN; $n++) {
            $hit = self::findNextAnchoredSubQuestion($s, $searchFrom, $n);
            if ($hit === null) {
                break;
            }
            [$byteStart, $matchLen] = $hit;
            $offsets[] = $byteStart;
            $searchFrom = $byteStart + $matchLen;
        }

        return $offsets;
    }

    /**
     * @return ?array{0: int, 1: int} [byteStart, matchByteLength]
     */
    private static function findNextAnchoredSubQuestion(string $s, int $searchFrom, int $n): ?array
    {
        $cn = self::chineseOrdinal($n);
        // 全角括号 + 阿拉伯数字（1）（2）在解析/OCR 中极常见；原先仅支持（一）（二）会漏检整条小问链
        $pattern = '/(?<![A-Za-z\'\x{2019}\x{2032}])(\(\s*'.$n.'\s*\)|（\s*'.$n.'\s*）|（\s*'.preg_quote($cn, '/').'\s*）)\s*[、，；:：．.]?/u';

        $len = strlen($s);
        $pos = $searchFrom;
        for ($guard = 0; $guard < 8000 && $pos < $len; $guard++) {
            if (! preg_match($pattern, $s, $m, PREG_OFFSET_CAPTURE, $pos)) {
                return null;
            }
            $byteStart = (int) ($m[0][1] ?? -1);
            if ($byteStart < 0) {
                return null;
            }
            $matched = (string) ($m[0][0] ?? '');
            $mLen = strlen($matched);
            if ($mLen < 1) {
                $pos = $byteStart + 1;

                continue;
            }
            if (self::isSubQuestionAnchorContext($s, $byteStart)) {
                return [$byteStart, $mLen];
            }
            $pos = $byteStart + $mLen;
        }

        return null;
    }

    /**
     * 小问编号须在段首、换行后或句末标点后，避免正文中的数值括号被当成小问。
     */
    private static function isSubQuestionAnchorContext(string $s, int $bytePos): bool
    {
        if ($bytePos <= 0) {
            return true;
        }

        $before = substr($s, 0, $bytePos);
        $before = preg_replace('/[ \t\x{3000}]+$/u', '', $before) ?? $before;
        if ($before === '') {
            return true;
        }

        if (preg_match('/\R\z/u', $before)) {
            return true;
        }

        $last = mb_substr($before, mb_strlen($before, 'UTF-8') - 1, 1, 'UTF-8');

        return $last !== '' && (bool) preg_match('/[。！？；：·…．、，,\]\}】〉』」）]/u', $last);
    }

    private static function chineseOrdinal(int $n): string
    {
        static $map = [
            1 => '一', 2 => '二', 3 => '三', 4 => '四', 5 => '五',
            6 => '六', 7 => '七', 8 => '八', 9 => '九', 10 => '十',
        ];

        if (isset($map[$n])) {
            return $map[$n];
        }
        if ($n > 10 && $n <= 19) {
            return '十'.$map[$n - 10];
        }

        return (string) $n;
    }
}