|
@@ -0,0 +1,188 @@
|
|
|
|
|
+<?php
|
|
|
|
|
+
|
|
|
|
|
+namespace App\Support;
|
|
|
|
|
+
|
|
|
|
|
+class BlankPlaceholderRenderer
|
|
|
|
|
+{
|
|
|
|
|
+ private const DEFAULT_BLANK_SPAN = '<span style="display:inline-block; min-width:80px; border-bottom:1.2px dashed #444; vertical-align:bottom;"> </span>';
|
|
|
|
|
+ // 仅匹配“空白占位”型 underline,不匹配 \underline{\frac{...}} 这类有内容公式下划线
|
|
|
|
|
+ private const BLANK_UNDERLINE_PATTERN = '/\\\\+underline\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|\\\\+hphantom\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|_{2,}| | |\s| |\\\\+\s+)*)\s*\}|_{2,}| | |\s| |\\\\+\s+)*)\s*\}/u';
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 将题干中的空括号/下划线/部分异常占位符统一替换为标准空位样式。
|
|
|
|
|
+ *
|
|
|
|
|
+ * @return array{0:string,1:bool} [renderedContent, replacedAnyPlaceholder]
|
|
|
|
|
+ */
|
|
|
|
|
+ public static function replaceToBlankSpan(
|
|
|
|
|
+ string $content,
|
|
|
|
|
+ ?string $blankSpan = null,
|
|
|
|
|
+ bool $collapseAdjacentBlanks = false,
|
|
|
|
|
+ bool $normalizeChineseTerminalPeriod = true
|
|
|
|
|
+ ): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $blankSpan = $blankSpan ?: self::DEFAULT_BLANK_SPAN;
|
|
|
|
|
+ $renderedContent = $content;
|
|
|
|
|
+
|
|
|
|
|
+ $latexPlaceholders = [];
|
|
|
|
|
+ $counter = 0;
|
|
|
|
|
+ $renderedContent = preg_replace_callback('/\$(?:[^\$]|\\\\.)*\$/u', function ($matches) use (&$latexPlaceholders, &$counter, $blankSpan) {
|
|
|
|
|
+ $latexContent = $matches[0];
|
|
|
|
|
+ $inner = mb_substr($latexContent, 1, mb_strlen($latexContent) - 2);
|
|
|
|
|
+
|
|
|
|
|
+ // 数学环境内也可能包含填空占位符(如 $\\underline{\\qquad}$ / $\\angle A=\\underline{\\quad}$)
|
|
|
|
|
+ $blankToken = '<<<BLANK_IN_MATH_'.$counter.'>>>';
|
|
|
|
|
+ $innerWithBlanks = preg_replace(
|
|
|
|
|
+ [
|
|
|
|
|
+ self::BLANK_UNDERLINE_PATTERN,
|
|
|
|
|
+ '/\\\\+qquad+/u',
|
|
|
|
|
+ '/\\\\+quad+/u',
|
|
|
|
|
+ '/[((](?:\s| | | )*[))]/u',
|
|
|
|
|
+ '/_{2,}/u',
|
|
|
|
|
+ ],
|
|
|
|
|
+ $blankToken,
|
|
|
|
|
+ $inner,
|
|
|
|
|
+ -1,
|
|
|
|
|
+ $blankCount
|
|
|
|
|
+ );
|
|
|
|
|
+ if ($blankCount > 0) {
|
|
|
|
|
+ $parts = explode($blankToken, $innerWithBlanks);
|
|
|
|
|
+ $rebuilt = '';
|
|
|
|
|
+ $lastIndex = count($parts) - 1;
|
|
|
|
|
+ foreach ($parts as $index => $part) {
|
|
|
|
|
+ if ($part !== '') {
|
|
|
|
|
+ // 纯标点不再包进数学环境,避免生成 "$.$" 这类尾部格式。
|
|
|
|
|
+ if (preg_match('/^[\..。]$/u', $part)) {
|
|
|
|
|
+ $rebuilt .= $part;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ $rebuilt .= htmlspecialchars('$'.$part.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($index < $lastIndex) {
|
|
|
|
|
+ $rebuilt .= $blankSpan;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return $rebuilt === '' ? $blankSpan : $rebuilt;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $placeholder = '<<<LATEX_BLANK_'.$counter.'>>>';
|
|
|
|
|
+ $latexPlaceholders[$placeholder] = $latexContent;
|
|
|
|
|
+ $counter++;
|
|
|
|
|
+
|
|
|
|
|
+ return $placeholder;
|
|
|
|
|
+ }, $renderedContent);
|
|
|
|
|
+
|
|
|
|
|
+ // 兼容常见空位写法:\underline{...}、\qquad、空括号(含 nbsp 等空白)、连续下划线、尾部 \\$
|
|
|
|
|
+ $patterns = [
|
|
|
|
|
+ self::BLANK_UNDERLINE_PATTERN,
|
|
|
|
|
+ '/\\\\+qquad+/u',
|
|
|
|
|
+ '/[((](?:\s| | | )*[))]/u',
|
|
|
|
|
+ '/_{2,}/u',
|
|
|
|
|
+ '/\\\\+\$(?=\s*$)/u',
|
|
|
|
|
+ ];
|
|
|
|
|
+ $renderedContent = preg_replace($patterns, $blankSpan, $renderedContent);
|
|
|
|
|
+ if ($collapseAdjacentBlanks) {
|
|
|
|
|
+ $quotedBlankSpan = preg_quote($blankSpan, '/');
|
|
|
|
|
+ $renderedContent = preg_replace('/(?:'.$quotedBlankSpan.'(?:\s| | | )*){2,}/u', $blankSpan, $renderedContent);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 兼容脏数据:空位后紧跟孤立 "$" 且位于句尾(如 "...=____$."),移除该孤立 "$"。
|
|
|
|
|
+ // 仅作用在“标准空位 + 句尾”场景,不影响正常数学公式分隔符。
|
|
|
|
|
+ $quotedBlankSpan = preg_quote($blankSpan, '/');
|
|
|
|
|
+ $renderedContent = preg_replace(
|
|
|
|
|
+ '/('.$quotedBlankSpan.')\s*\$(?=\s*[\..。]?(?:\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$)/u',
|
|
|
|
|
+ '$1',
|
|
|
|
|
+ $renderedContent
|
|
|
|
|
+ ) ?? $renderedContent;
|
|
|
|
|
+
|
|
|
|
|
+ foreach ($latexPlaceholders as $placeholder => $latexContent) {
|
|
|
|
|
+ if (preg_match('/^\$(.*?)(\\\\+)\$$/u', $latexContent, $match)) {
|
|
|
|
|
+ $inner = rtrim($match[1]);
|
|
|
|
|
+ if ($inner === '' || preg_match('/[=::]\s*$/u', $inner)) {
|
|
|
|
|
+ if ($inner === '') {
|
|
|
|
|
+ $replacement = $blankSpan;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ $replacement = htmlspecialchars('$'.$inner.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8').' '.$blankSpan;
|
|
|
|
|
+ }
|
|
|
|
|
+ $renderedContent = str_replace($placeholder, $replacement, $renderedContent);
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $encodedLatex = htmlspecialchars($latexContent, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
|
|
|
|
+ $renderedContent = str_replace($placeholder, $encodedLatex, $renderedContent);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if ($normalizeChineseTerminalPeriod) {
|
|
|
|
|
+ $renderedContent = self::normalizeChineseTerminalPeriod($renderedContent);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return [$renderedContent, $renderedContent !== $content];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static function defaultBlankSpan(): string
|
|
|
|
|
+ {
|
|
|
|
|
+ return self::DEFAULT_BLANK_SPAN;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 统一句尾标点(仅处理句尾,不影响中间小数/表达式)
|
|
|
|
|
+ *
|
|
|
|
|
+ * $mode:
|
|
|
|
|
+ * - remove: 去掉句尾句号
|
|
|
|
|
+ * - dot: 句尾统一为英文实心点 "."
|
|
|
|
|
+ * - cn: 句尾统一为中文句号 "。"
|
|
|
|
|
+ */
|
|
|
|
|
+ public static function normalizeTerminalPunctuation(string $content, string $mode): string
|
|
|
|
|
+ {
|
|
|
|
|
+ $replacement = match ($mode) {
|
|
|
|
|
+ 'remove' => '',
|
|
|
|
|
+ 'dot' => '.',
|
|
|
|
|
+ 'cn' => '。',
|
|
|
|
|
+ default => null,
|
|
|
|
|
+ };
|
|
|
|
|
+ if ($replacement === null) {
|
|
|
|
|
+ return $content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 仅处理句尾最后一个标点(允许句尾带 HTML 标签,如 <image .../>)。
|
|
|
|
|
+ // 1) 先处理数学片段尾点(如 "$.$" / "$。$" / "$.$")。
|
|
|
|
|
+ if (preg_match('/^(.*)\$\s*[\..。]\s*\$(\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content, $m)) {
|
|
|
|
|
+ return $m[1].$replacement.$m[2];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 2) 再处理普通句尾点(只替换最后一个,不影响中间文本)。
|
|
|
|
|
+ if (preg_match('/^(.*?)([\..。])(\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content, $m)) {
|
|
|
|
|
+ return $m[1].$replacement.$m[3];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return $content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 仅当句尾不存在句号类标点时,追加目标标点。
|
|
|
|
|
+ * 不会覆盖已存在的句尾标点,也不处理正文中间内容。
|
|
|
|
|
+ */
|
|
|
|
|
+ public static function appendTerminalPunctuationIfMissing(string $content, string $punctuation): string
|
|
|
|
|
+ {
|
|
|
|
|
+ if ($punctuation === '') {
|
|
|
|
|
+ return $content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 句尾若已有终止符号(中英文句号/问号/叹号/分号/冒号),则不再追加
|
|
|
|
|
+ if (preg_match('/[\..。!!\??;;::](\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*)$/us', $content)) {
|
|
|
|
|
+ return $content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return rtrim($content).$punctuation;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static function normalizeChineseTerminalPeriod(string $content): string
|
|
|
|
|
+ {
|
|
|
|
|
+ // 仅在存在中文语境时,把句末英文句号统一为中文句号。
|
|
|
|
|
+ if (! preg_match('/\p{Han}/u', $content)) {
|
|
|
|
|
+ return $content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return self::normalizeTerminalPunctuation($content, 'cn');
|
|
|
|
|
+ }
|
|
|
|
|
+}
|