Ver código fonte

fix(pdf): normalize terminal periods to Chinese full stop in Chinese stems

yemeishu 3 semanas atrás
pai
commit
d7c5b3709f
1 arquivos alterados com 20 adições e 1 exclusões
  1. 20 1
      app/Support/BlankPlaceholderRenderer.php

+ 20 - 1
app/Support/BlankPlaceholderRenderer.php

@@ -11,7 +11,12 @@ class BlankPlaceholderRenderer
      *
      * @return array{0:string,1:bool} [renderedContent, replacedAnyPlaceholder]
      */
-    public static function replaceToBlankSpan(string $content, ?string $blankSpan = null, bool $collapseAdjacentBlanks = false): array
+    public static function replaceToBlankSpan(
+        string $content,
+        ?string $blankSpan = null,
+        bool $collapseAdjacentBlanks = false,
+        bool $normalizeChineseTerminalPeriod = true
+    ): array
     {
         $blankSpan = $blankSpan ?: self::DEFAULT_BLANK_SPAN;
         $renderedContent = $content;
@@ -92,6 +97,10 @@ class BlankPlaceholderRenderer
             $renderedContent = str_replace($placeholder, $encodedLatex, $renderedContent);
         }
 
+        if ($normalizeChineseTerminalPeriod) {
+            $renderedContent = self::normalizeChineseTerminalPeriod($renderedContent);
+        }
+
         return [$renderedContent, $renderedContent !== $content];
     }
 
@@ -99,4 +108,14 @@ class BlankPlaceholderRenderer
     {
         return self::DEFAULT_BLANK_SPAN;
     }
+
+    private static function normalizeChineseTerminalPeriod(string $content): string
+    {
+        // 仅在存在中文语境时,把句末英文句号统一为中文句号。
+        if (! preg_match('/\p{Han}/u', $content)) {
+            return $content;
+        }
+
+        return preg_replace('/[\..](?=\s*(?:<\/[^>]+>\s*)*$)/u', '。', $content);
+    }
 }