BlankPlaceholderRenderer.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. <?php
  2. namespace App\Support;
  3. class BlankPlaceholderRenderer
  4. {
  5. private const DEFAULT_BLANK_SPAN = '<span style="display:inline-block; min-width:80px; border-bottom:1.2px dashed #444; vertical-align:bottom;">&nbsp;</span>';
  6. /**
  7. * 将题干中的空括号/下划线/部分异常占位符统一替换为标准空位样式。
  8. *
  9. * @return array{0:string,1:bool} [renderedContent, replacedAnyPlaceholder]
  10. */
  11. public static function replaceToBlankSpan(
  12. string $content,
  13. ?string $blankSpan = null,
  14. bool $collapseAdjacentBlanks = false,
  15. bool $normalizeChineseTerminalPeriod = true
  16. ): array
  17. {
  18. $blankSpan = $blankSpan ?: self::DEFAULT_BLANK_SPAN;
  19. $renderedContent = $content;
  20. $latexPlaceholders = [];
  21. $counter = 0;
  22. $renderedContent = preg_replace_callback('/\$(?:[^\$]|\\\\.)*\$/u', function ($matches) use (&$latexPlaceholders, &$counter, $blankSpan) {
  23. $latexContent = $matches[0];
  24. $inner = mb_substr($latexContent, 1, mb_strlen($latexContent) - 2);
  25. // 数学环境内也可能包含填空占位符(如 $\\underline{\\qquad}$ / $\\angle A=\\underline{\\quad}$)
  26. $blankToken = '<<<BLANK_IN_MATH_'.$counter.'>>>';
  27. $innerWithBlanks = preg_replace(
  28. [
  29. '/\\\\underline\{[^}]*\}/u',
  30. '/\\\\qquad+/u',
  31. '/\\\\quad+/u',
  32. '/[((](?:\s|&nbsp;|&#160;| )*[))]/u',
  33. '/_{2,}/u',
  34. ],
  35. $blankToken,
  36. $inner,
  37. -1,
  38. $blankCount
  39. );
  40. if ($blankCount > 0) {
  41. $parts = explode($blankToken, $innerWithBlanks);
  42. $rebuilt = '';
  43. $lastIndex = count($parts) - 1;
  44. foreach ($parts as $index => $part) {
  45. if ($part !== '') {
  46. // 纯标点不再包进数学环境,避免生成 "$.$" 这类尾部格式。
  47. if (preg_match('/^[\..。]$/u', $part)) {
  48. $rebuilt .= $part;
  49. } else {
  50. $rebuilt .= htmlspecialchars('$'.$part.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8');
  51. }
  52. }
  53. if ($index < $lastIndex) {
  54. $rebuilt .= $blankSpan;
  55. }
  56. }
  57. return $rebuilt === '' ? $blankSpan : $rebuilt;
  58. }
  59. $placeholder = '<<<LATEX_BLANK_'.$counter.'>>>';
  60. $latexPlaceholders[$placeholder] = $latexContent;
  61. $counter++;
  62. return $placeholder;
  63. }, $renderedContent);
  64. // 兼容常见空位写法:\underline{...}、\qquad、空括号(含 nbsp 等空白)、连续下划线、尾部 \\$
  65. $patterns = [
  66. '/\\\underline\{[^}]*\}/u',
  67. '/\\\qquad+/u',
  68. '/[((](?:\s|&nbsp;|&#160;| )*[))]/u',
  69. '/_{2,}/u',
  70. '/\\\\+\$(?=\s*$)/u',
  71. ];
  72. $renderedContent = preg_replace($patterns, $blankSpan, $renderedContent);
  73. if ($collapseAdjacentBlanks) {
  74. $quotedBlankSpan = preg_quote($blankSpan, '/');
  75. $renderedContent = preg_replace('/(?:'.$quotedBlankSpan.'(?:\s|&nbsp;|&#160;| )*){2,}/u', $blankSpan, $renderedContent);
  76. }
  77. foreach ($latexPlaceholders as $placeholder => $latexContent) {
  78. if (preg_match('/^\$(.*?)(\\\\+)\$$/u', $latexContent, $match)) {
  79. $inner = rtrim($match[1]);
  80. if ($inner === '' || preg_match('/[=::]\s*$/u', $inner)) {
  81. if ($inner === '') {
  82. $replacement = $blankSpan;
  83. } else {
  84. $replacement = htmlspecialchars('$'.$inner.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8').' '.$blankSpan;
  85. }
  86. $renderedContent = str_replace($placeholder, $replacement, $renderedContent);
  87. continue;
  88. }
  89. }
  90. $encodedLatex = htmlspecialchars($latexContent, ENT_QUOTES | ENT_HTML5, 'UTF-8');
  91. $renderedContent = str_replace($placeholder, $encodedLatex, $renderedContent);
  92. }
  93. if ($normalizeChineseTerminalPeriod) {
  94. $renderedContent = self::normalizeChineseTerminalPeriod($renderedContent);
  95. }
  96. return [$renderedContent, $renderedContent !== $content];
  97. }
  98. public static function defaultBlankSpan(): string
  99. {
  100. return self::DEFAULT_BLANK_SPAN;
  101. }
  102. /**
  103. * 统一句尾标点(仅处理句尾,不影响中间小数/表达式)
  104. *
  105. * $mode:
  106. * - remove: 去掉句尾句号
  107. * - dot: 句尾统一为英文实心点 "."
  108. * - cn: 句尾统一为中文句号 "。"
  109. */
  110. public static function normalizeTerminalPunctuation(string $content, string $mode): string
  111. {
  112. $replacement = match ($mode) {
  113. 'remove' => '',
  114. 'dot' => '.',
  115. 'cn' => '。',
  116. default => null,
  117. };
  118. if ($replacement === null) {
  119. return $content;
  120. }
  121. // 先处理数学片段尾点(如 "$.$" / "$。$" / "$.$")。
  122. $content = preg_replace('/\$\s*[\..。]\s*\$(?=\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*$)/u', $replacement, $content);
  123. // 再处理普通句尾点。
  124. return preg_replace('/[\..。](?=\s*(?:(?:<\/[^>]+>|<[^>]+\/>)\s*)*$)/u', $replacement, $content);
  125. }
  126. private static function normalizeChineseTerminalPeriod(string $content): string
  127. {
  128. // 仅在存在中文语境时,把句末英文句号统一为中文句号。
  129. if (! preg_match('/\p{Han}/u', $content)) {
  130. return $content;
  131. }
  132. return self::normalizeTerminalPunctuation($content, 'cn');
  133. }
  134. }