'; // 仅匹配“空白占位”型 underline,不匹配 \underline{\frac{...}} 这类有内容公式下划线 private const BLANK_UNDERLINE_PATTERN = '/\\\\+underline\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|\\\\+hphantom\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|_{2,}| | |\s| |\\\\+\s+)*)\s*\}|_{2,}| | |\s| |\\\\+\s+)*)\s*\}/u'; /** * 将题干中的空括号/下划线/部分异常占位符统一替换为标准空位样式。 * * @return array{0:string,1:bool} [renderedContent, replacedAnyPlaceholder] */ public static function replaceToBlankSpan( string $content, ?string $blankSpan = null, bool $collapseAdjacentBlanks = false, bool $normalizeChineseTerminalPeriod = true ): array { $blankSpan = $blankSpan ?: self::DEFAULT_BLANK_SPAN; $renderedContent = $content; $latexPlaceholders = []; $counter = 0; $renderedContent = preg_replace_callback('/\$(?:[^\$]|\\\\.)*\$/u', function ($matches) use (&$latexPlaceholders, &$counter, $blankSpan) { $latexContent = $matches[0]; $inner = mb_substr($latexContent, 1, mb_strlen($latexContent) - 2); // 数学环境内也可能包含填空占位符(如 $\\underline{\\qquad}$ / $\\angle A=\\underline{\\quad}$) $blankToken = '<<>>'; $innerWithBlanks = preg_replace( [ self::BLANK_UNDERLINE_PATTERN, '/\\\\+qquad+/u', '/\\\\+quad+/u', '/[((](?:\s| | | )*[))]/u', '/_{2,}/u', ], $blankToken, $inner, -1, $blankCount ); if ($blankCount > 0) { $parts = explode($blankToken, $innerWithBlanks); $rebuilt = ''; $lastIndex = count($parts) - 1; foreach ($parts as $index => $part) { if ($part !== '') { // 纯标点不再包进数学环境,避免生成 "$.$" 这类尾部格式。 if (preg_match('/^[\..。]$/u', $part)) { $rebuilt .= $part; } else { $rebuilt .= htmlspecialchars('$'.$part.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8'); } } if ($index < $lastIndex) { $rebuilt .= $blankSpan; } } return $rebuilt === '' ? $blankSpan : $rebuilt; } $placeholder = '<<>>'; $latexPlaceholders[$placeholder] = $latexContent; $counter++; return $placeholder; }, $renderedContent); // 兼容常见空位写法:\underline{...}、\qquad、空括号(含 nbsp 等空白)、连续下划线、尾部 \\$ $patterns = [ self::BLANK_UNDERLINE_PATTERN, '/\\\\+qquad+/u', '/[((](?:\s| | | )*[))]/u', '/_{2,}/u', '/\\\\+\$(?=\s*$)/u', ]; $renderedContent = preg_replace($patterns, $blankSpan, $renderedContent); if ($collapseAdjacentBlanks) { $quotedBlankSpan = preg_quote($blankSpan, '/'); $renderedContent = preg_replace('/(?:'.$quotedBlankSpan.'(?:\s| | | )*){2,}/u', $blankSpan, $renderedContent); } // 兼容脏数据:空位后紧跟孤立 "$" 且位于句尾(如 "...=____$."),移除该孤立 "$"。 // 仅作用在“标准空位 + 句尾”场景,不影响正常数学公式分隔符。 $quotedBlankSpan = preg_quote($blankSpan, '/'); $renderedContent = preg_replace( '/('.$quotedBlankSpan.')\s*\$(?=\s*[\..。]?(?:\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$)/u', '$1', $renderedContent ) ?? $renderedContent; foreach ($latexPlaceholders as $placeholder => $latexContent) { if (preg_match('/^\$(.*?)(\\\\+)\$$/u', $latexContent, $match)) { $inner = rtrim($match[1]); if ($inner === '' || preg_match('/[=::]\s*$/u', $inner)) { if ($inner === '') { $replacement = $blankSpan; } else { $replacement = htmlspecialchars('$'.$inner.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8').' '.$blankSpan; } $renderedContent = str_replace($placeholder, $replacement, $renderedContent); continue; } } $encodedLatex = htmlspecialchars($latexContent, ENT_QUOTES | ENT_HTML5, 'UTF-8'); $renderedContent = str_replace($placeholder, $encodedLatex, $renderedContent); } if ($normalizeChineseTerminalPeriod) { $renderedContent = self::normalizeChineseTerminalPeriod($renderedContent); } return [$renderedContent, $renderedContent !== $content]; } public static function defaultBlankSpan(): string { return self::DEFAULT_BLANK_SPAN; } /** * 统一句尾标点(仅处理句尾,不影响中间小数/表达式) * * $mode: * - remove: 去掉句尾句号 * - dot: 句尾统一为英文实心点 "." * - cn: 句尾统一为中文句号 "。" */ public static function normalizeTerminalPunctuation(string $content, string $mode): string { $replacement = match ($mode) { 'remove' => '', 'dot' => '.', 'cn' => '。', default => null, }; if ($replacement === null) { return $content; } // 仅处理句尾最后一个标点(允许句尾带 HTML 标签,如 )。 // 1) 先处理数学片段尾点(如 "$.$" / "$。$" / "$.$")。 if (preg_match('/^(.*)\$\s*[\..。]\s*\$(\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content, $m)) { return $m[1].$replacement.$m[2]; } // 2) 再处理普通句尾点(只替换最后一个,不影响中间文本)。 if (preg_match('/^(.*?)([\..。])(\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content, $m)) { return $m[1].$replacement.$m[3]; } return $content; } /** * 仅当句尾不存在句号类标点时,追加目标标点。 * 不会覆盖已存在的句尾标点,也不处理正文中间内容。 */ public static function appendTerminalPunctuationIfMissing(string $content, string $punctuation): string { if ($punctuation === '') { return $content; } // 句尾若已有终止符号(中英文句号/问号/叹号/分号/冒号),则不再追加 if (preg_match('/[\..。!!\??;;::](\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content)) { return $content; } return rtrim($content).$punctuation; } private static function normalizeChineseTerminalPeriod(string $content): string { // 仅在存在中文语境时,把句末英文句号统一为中文句号。 if (! preg_match('/\p{Han}/u', $content)) { return $content; } return self::normalizeTerminalPunctuation($content, 'cn'); } }