Bladeren bron

fix(blank): LaTeX $ 配对与填空横线占位渲染

- BlankPlaceholderRenderer: PCRE 行尾 $ 字面量修正;closeMissing 仅在偶数位 $ 锚点补闭合并避免误用上一段收尾 $。\n- ExamPdfExportService: generateQuestionCheckPdf 支持可选本地路径直写,跳过 CDN。\n- 题库占位符审计脚本、本地分批 PDF 导出脚本、抽样 PDF 脚本;BlankPlaceholderRenderer 单元测试。\n默认审计不测 $ 奇偶;输出 priority_issues 明细。

Made-with: Cursor
yemeishu 3 weken geleden
bovenliggende
commit
d618df47b3

+ 43 - 5
app/Services/ExamPdfExportService.php

@@ -161,8 +161,8 @@ class ExamPdfExportService
                 Log::info('generateUnifiedPdf: 开始获取知识点讲解HTML', ['paper_id' => $paperId]);
                 $kpExplainHtml = $this->fetchKnowledgeExplanationHtml($paperId);
                 if ($kpExplainHtml) {
-                    // 对知识点讲解HTML进行内联资源处理(与服务端公式渲染)
-                    $kpExplainHtml = $this->inlineExternalResources($kpExplainHtml);
+                    // 统一在 mergeHtmlWithPageBreak()->ensureUtf8Html() 阶段处理内联与公式预渲染,
+                    // 避免在此处重复处理导致额外耗时。
                     Log::info('generateUnifiedPdf: 知识点讲解HTML获取并处理成功', [
                         'paper_id' => $paperId,
                         'length' => strlen($kpExplainHtml),
@@ -1531,7 +1531,19 @@ class ExamPdfExportService
                 return null;
             }
 
+            $socketTimeoutBackup = null;
+            $imageProbeTimeout = max(1, (int) config('pdf.image_probe_timeout_seconds', 2));
+            if (str_starts_with($src, 'http://') || str_starts_with($src, 'https://')) {
+                // 远程图片探测加硬超时,避免首次命中慢图源拖慢整份 PDF 生成。
+                $socketTimeoutBackup = ini_get('default_socket_timeout');
+                @ini_set('default_socket_timeout', (string) $imageProbeTimeout);
+            }
+
             $size = @getimagesize($src);
+
+            if ($socketTimeoutBackup !== null && $socketTimeoutBackup !== false) {
+                @ini_set('default_socket_timeout', (string) $socketTimeoutBackup);
+            }
             if (is_array($size) && count($size) >= 2) {
                 $data = ['w' => (int) $size[0], 'h' => (int) $size[1]];
                 $this->persistPdfImageMetrics($src, $data);
@@ -1725,7 +1737,7 @@ class ExamPdfExportService
 
             // 轮询检测PDF是否生成
             $pollStart = microtime(true);
-            $maxPollSeconds = 80; // 【修复】增加轮询超时到80秒
+            $maxPollSeconds = max(10, (int) config('pdf.chrome_poll_timeout_seconds', 40));
             while ($process->isRunning() && (microtime(true) - $pollStart) < $maxPollSeconds) {
                 if (file_exists($tmpPdf) && filesize($tmpPdf) > 0) {
                     $pdfGenerated = true;
@@ -3312,13 +3324,14 @@ class ExamPdfExportService
      * 题目质检专用 PDF:固定使用判题卡体系模板(答案详解 + 判题卡)。
      * 不进入正常组卷流程,仅用于检查题干、答案、解题思路渲染效果。
      *
-     * @return array{pdf_url?: string}
+     * @return array{pdf_url?: string,grading_pdf_url?: string,local_path?: string}
      */
     public function generateQuestionCheckPdf(
         object $paper,
         array $groupedQuestions,
         array $student = [],
-        array $teacher = []
+        array $teacher = [],
+        ?string $localOutputPath = null
     ): array {
         Log::info('generateQuestionCheckPdf 开始', [
             'paper_id' => $paper->paper_id ?? null,
@@ -3372,6 +3385,31 @@ class ExamPdfExportService
                 return [];
             }
 
+            if ($localOutputPath !== null && $localOutputPath !== '') {
+                $dir = dirname($localOutputPath);
+                if ($dir !== '.' && $dir !== '' && ! is_dir($dir)) {
+                    @mkdir($dir, 0775, true);
+                }
+                if (file_put_contents($localOutputPath, $pdfBinary) === false) {
+                    Log::error('generateQuestionCheckPdf: 本地写入失败', [
+                        'paper_id' => $paper->paper_id ?? null,
+                        'local_path' => $localOutputPath,
+                    ]);
+
+                    return [];
+                }
+
+                Log::info('generateQuestionCheckPdf: 已写入本地', [
+                    'paper_id' => $paper->paper_id ?? null,
+                    'local_path' => $localOutputPath,
+                    'bytes' => strlen($pdfBinary),
+                ]);
+
+                return [
+                    'local_path' => $localOutputPath,
+                ];
+            }
+
             $path = 'custom_exams/'.($paper->paper_id ?? ('custom_'.time())).'.pdf';
             $url = $this->pdfStorageService->put($path, $pdfBinary);
             if (! $url) {

+ 135 - 18
app/Support/BlankPlaceholderRenderer.php

@@ -8,6 +8,132 @@ class BlankPlaceholderRenderer
     // 仅匹配“空白占位”型 underline,不匹配 \underline{\frac{...}} 这类有内容公式下划线
     private const BLANK_UNDERLINE_PATTERN = '/\\\\+underline\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|\\\\+hphantom\{\s*(?:(?:\\\\+qquad+|\\\\+quad+|\\\\+hspace\{[^{}]*\}|_{2,}|&nbsp;|&#160;|\s| |\\\\+\s+)*)\s*\}|_{2,}|&nbsp;|&#160;|\s| |\\\\+\s+)*)\s*\}/u';
 
+    /**
+     * \left(\quad\right) / \left(\qquad\right) 中的 \quad 是合法间距,不是填空占位;替换前临时保护以免误伤。
+     *
+     * @return array{0:string,1:array<string,string>}
+     */
+    private static function protectLeftRightQuadPairs(string $inner): array
+    {
+        $map = [];
+        $idx = 0;
+
+        // 使用 \x5C 避免 PCRE 将 \left / \quad 中的 \l、\q 解析为无效转义
+        $protected = preg_replace_callback(
+            '/\x5Cleft\s*\(\s*(?:\x5Cquad|\x5Cqquad)\s*\x5Cright\s*\)/u',
+            static function (array $m) use (&$map, &$idx): string {
+                $key = '<<<LR_PAIR_'.$idx.'>>>';
+                $map[$key] = $m[0];
+                $idx++;
+
+                return $key;
+            },
+            $inner
+        );
+
+        return [$protected ?? $inner, $map];
+    }
+
+    /**
+     * @param  array<string,string>  $restoreMap
+     */
+    private static function restoreProtectedLeftRightQuadPairs(string $inner, array $restoreMap): string
+    {
+        if ($restoreMap === []) {
+            return $inner;
+        }
+
+        return str_replace(array_keys($restoreMap), array_values($restoreMap), $inner);
+    }
+
+    /**
+     * 数学片段内因占位拆分后的分段,按顺序交替输出「小段 $...$」与 HTML 空位;避免首尾空分段导致半截 `$` 或与 HTML 错位。
+     *
+     * @param  array<int,string>  $parts
+     */
+    private static function rebuildMathSegmentsWithBlankSpans(array $parts, string $blankSpan): string
+    {
+        $rebuilt = '';
+        $lastIndex = count($parts) - 1;
+
+        foreach ($parts as $index => $part) {
+            if ($part !== '') {
+                if (preg_match('/^[\..。]$/u', $part)) {
+                    $rebuilt .= $part;
+                } else {
+                    $rebuilt .= htmlspecialchars('$'.$part.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8');
+                }
+            }
+            if ($index < $lastIndex) {
+                $rebuilt .= $blankSpan;
+            }
+        }
+
+        return $rebuilt === '' ? $blankSpan : $rebuilt;
+    }
+
+    /**
+     * 脏数据常见:$a=______ 后紧跟中文但漏写闭合 $,导致下一个 $...$ 整段被吞并。
+     * 在连续下划线占位后、若紧接着汉字/全角逗号且中间仍无第二个 $,则补一个闭合 $。
+     *
+     * 规范写法「$a$=__________时」在横线前的 $ 是 $a$ 的闭合符,紧跟 =,不得在此插 $(否则会得到 ...=____$时)。
+     * 因此仅当「该 $ 后面第一个非 $ 片段不是以 = 开头接上横线」时…… 更简:(?! =) 在匹配起点 $ 之后:若紧跟 = 则本规则不锚定在此 $(见下 (?!=))。
+     */
+    private static function closeMissingDollarAfterUnderscoreBlank(string $content): string
+    {
+        // 不能在「已成对的 $…$」的闭合 $ 上锚定:否则会把 $40^{\circ}$ 的收尾 $ 当成新段开头,
+        // 一路吞到后面 ______度,错误插入「______$度」(见 questions.id=332)。
+        if (! preg_match_all('/\$(?!=)([^$]*_{2,})(?=[\p{Han},。;])/u', $content, $matches, PREG_OFFSET_CAPTURE)) {
+            return $content;
+        }
+
+        $out = $content;
+        foreach (array_reverse($matches[0]) as [$text, $byteOffset]) {
+            $before = substr($out, 0, $byteOffset);
+            if ((substr_count($before, '$') % 2) === 1) {
+                continue;
+            }
+            $len = strlen($text);
+            $out = substr_replace($out, $text.'$', $byteOffset, $len);
+        }
+
+        return $out;
+    }
+
+    /**
+     * 选择题常见:答案区写成 $=\left(\quad\right)$,意图为答题横线而非 LaTeX 括号间距。
+     * 将段尾的 $=\left(\quad\right)$ / $=\left(\qquad\right)$ 拆成「公式到等号为止」+ 段外连续下划线,后续由 _{2,} 规则换成标准空位。
+     */
+    private static function moveTrailingLeftQuadRightAnswerBlankToUnderscores(string $content): string
+    {
+        $suffixQuad = '=\\left(\\quad\\right)';
+        $suffixQquad = '=\\left(\\qquad\\right)';
+
+        $out = preg_replace_callback(
+            '/\$(?:[^\$]|\\\\.)*?\\$/u',
+            static function (array $m) use ($suffixQuad, $suffixQquad): string {
+                $full = $m[0];
+                $inner = mb_substr($full, 1, mb_strlen($full) - 2);
+                $suffixLen = null;
+                if (str_ends_with($inner, $suffixQuad)) {
+                    $suffixLen = mb_strlen($suffixQuad);
+                } elseif (str_ends_with($inner, $suffixQquad)) {
+                    $suffixLen = mb_strlen($suffixQquad);
+                }
+                if ($suffixLen !== null && $suffixLen <= mb_strlen($inner)) {
+                    $prefix = mb_substr($inner, 0, mb_strlen($inner) - $suffixLen);
+
+                    return '$'.$prefix.'=$'.'__________';
+                }
+
+                return $full;
+            },
+            $content
+        );
+
+        return $out ?? $content;
+    }
+
     /**
      * 将题干中的空括号/下划线/部分异常占位符统一替换为标准空位样式。
      *
@@ -21,14 +147,19 @@ class BlankPlaceholderRenderer
     ): array
     {
         $blankSpan = $blankSpan ?: self::DEFAULT_BLANK_SPAN;
-        $renderedContent = $content;
+        $renderedContent = self::closeMissingDollarAfterUnderscoreBlank($content);
+        $renderedContent = self::moveTrailingLeftQuadRightAnswerBlankToUnderscores($renderedContent);
 
         $latexPlaceholders = [];
         $counter = 0;
-        $renderedContent = preg_replace_callback('/\$(?:[^\$]|\\\\.)*\$/u', function ($matches) use (&$latexPlaceholders, &$counter, $blankSpan) {
+        // 非贪婪:遇到第一个闭合 $ 即结束;避免紧邻多段 "$...$…$…$" 时被吞成一段(混入中文标点,破坏公式边界)。
+        $renderedContent = preg_replace_callback('/\$(?:[^\$]|\\\\.)*?\\$/u', function ($matches) use (&$latexPlaceholders, &$counter, $blankSpan) {
             $latexContent = $matches[0];
             $inner = mb_substr($latexContent, 1, mb_strlen($latexContent) - 2);
 
+            // \left(\quad\right) 先保护,避免下方 \quad 替换误伤(见选择题题干中的合法间距)。
+            [$inner, $lrQuadRestore] = self::protectLeftRightQuadPairs($inner);
+
             // 数学环境内也可能包含填空占位符(如 $\\underline{\\qquad}$ / $\\angle A=\\underline{\\quad}$)
             $blankToken = '<<<BLANK_IN_MATH_'.$counter.'>>>';
             $innerWithBlanks = preg_replace(
@@ -44,25 +175,11 @@ class BlankPlaceholderRenderer
                 -1,
                 $blankCount
             );
+            $innerWithBlanks = self::restoreProtectedLeftRightQuadPairs($innerWithBlanks, $lrQuadRestore);
             if ($blankCount > 0) {
                 $parts = explode($blankToken, $innerWithBlanks);
-                $rebuilt = '';
-                $lastIndex = count($parts) - 1;
-                foreach ($parts as $index => $part) {
-                    if ($part !== '') {
-                        // 纯标点不再包进数学环境,避免生成 "$.$" 这类尾部格式。
-                        if (preg_match('/^[\..。]$/u', $part)) {
-                            $rebuilt .= $part;
-                        } else {
-                            $rebuilt .= htmlspecialchars('$'.$part.'$', ENT_QUOTES | ENT_HTML5, 'UTF-8');
-                        }
-                    }
-                    if ($index < $lastIndex) {
-                        $rebuilt .= $blankSpan;
-                    }
-                }
 
-                return $rebuilt === '' ? $blankSpan : $rebuilt;
+                return self::rebuildMathSegmentsWithBlankSpans($parts, $blankSpan);
             }
 
             $placeholder = '<<<LATEX_BLANK_'.$counter.'>>>';

+ 220 - 0
scripts/audit_rendered_placeholder_integrity.php

@@ -0,0 +1,220 @@
+<?php
+
+/**
+ * 全库题干「下划线占位 + 句点小黑点」流水线校验(与 paper-body 选择/填空口径对齐)。
+ *
+ * 用法:
+ *   php scripts/audit_rendered_placeholder_integrity.php [--connection mysql] [--table questions]
+ *       [--chunk 2000] [--out-dir storage/app/audit_placeholder]
+ *       [--types choice,fill]
+ *       [--check-unbalanced-dollars]
+ *
+ * 默认:仅扫描 choice + fill;输出 summary JSON + ndjson 明细。
+ * 「$ 个数奇偶」默认不测(题库脏数据多时可加 --check-unbalanced-dollars)。
+ * 重点排查项单独写入 *priority_issues*.ndjson(空位夹在双 $…$ 段之间、公式段以运算符结尾紧邻空位)。
+ */
+
+declare(strict_types=1);
+
+require __DIR__.'/../vendor/autoload.php';
+$app = require __DIR__.'/../bootstrap/app.php';
+$kernel = $app->make(Illuminate\Contracts\Console\Kernel::class);
+$kernel->bootstrap();
+
+use App\Support\BlankPlaceholderRenderer;
+use Illuminate\Support\Facades\DB;
+
+$options = getopt('', [
+    'table::',
+    'connection::',
+    'chunk::',
+    'out-dir::',
+    'types::',
+    'check-unbalanced-dollars::',
+]);
+
+$checkUnbalancedDollars = array_key_exists('check-unbalanced-dollars', $options);
+
+$table = isset($options['table']) ? trim((string) $options['table']) : 'questions';
+$connection = isset($options['connection']) ? trim((string) $options['connection']) : config('database.default');
+$chunk = isset($options['chunk']) ? max(100, (int) $options['chunk']) : 2000;
+$defaultOut = dirname(__DIR__).'/storage/app/audit_placeholder';
+$outDir = isset($options['out-dir']) ? rtrim((string) $options['out-dir'], '/') : $defaultOut;
+// 默认仅选择与填空(与用户需求一致);若要全题型可传 --types=all 并在下方解析
+$typeFilter = isset($options['types']) ? trim((string) $options['types']) : 'choice,fill';
+$types = [];
+if (strtolower($typeFilter) === 'all') {
+    $types = [];
+} elseif ($typeFilter !== '') {
+    $types = array_values(array_filter(array_map('trim', explode(',', $typeFilter)), static fn($v) => $v !== ''));
+}
+
+@mkdir($outDir, 0777, true);
+$stamp = date('Ymd_His');
+$summaryPath = "{$outDir}/rendered_placeholder_audit_summary_{$stamp}.json";
+$detailPath = "{$outDir}/rendered_placeholder_audit_details_{$stamp}.ndjson";
+$priorityDetailPath = "{$outDir}/rendered_placeholder_audit_priority_issues_{$stamp}.ndjson";
+
+$detailFp = fopen($detailPath, 'wb');
+if ($detailFp === false) {
+    fwrite(STDERR, "Failed to open detail file: {$detailPath}\n");
+    exit(1);
+}
+
+$priorityIssueTypes = [
+    'blank_between_math_segments',
+    'math_ends_with_operator_before_blank',
+];
+
+$priorityFp = fopen($priorityDetailPath, 'wb');
+if ($priorityFp === false) {
+    fwrite(STDERR, "Failed to open priority detail file: {$priorityDetailPath}\n");
+    exit(1);
+}
+
+$issues = [];
+$examples = [];
+
+$scanned = 0;
+$startedAt = microtime(true);
+
+$recordIssue = static function (string $type, object $row, string $reason, string $rendered) use (&$issues, &$examples, $detailFp, $priorityFp, $priorityIssueTypes): void {
+    if (! isset($issues[$type])) {
+        $issues[$type] = 0;
+        $examples[$type] = [];
+    }
+    $issues[$type]++;
+
+    $entry = [
+        'issue' => $type,
+        'id' => (int) $row->id,
+        'question_type' => (string) ($row->question_type ?? ''),
+        'reason' => $reason,
+        'stem_preview' => mb_substr((string) $row->stem, 0, 220),
+        'rendered_preview' => mb_substr($rendered, 0, 260),
+    ];
+
+    fwrite($detailFp, json_encode($entry, JSON_UNESCAPED_UNICODE)."\n");
+
+    if (in_array($type, $priorityIssueTypes, true)) {
+        fwrite($priorityFp, json_encode($entry, JSON_UNESCAPED_UNICODE)."\n");
+    }
+
+    if (count($examples[$type]) < 20) {
+        $examples[$type][] = [
+            'id' => (int) $row->id,
+            'question_type' => (string) ($row->question_type ?? ''),
+            'reason' => $reason,
+        ];
+    }
+};
+
+$blankSpan = BlankPlaceholderRenderer::defaultBlankSpan();
+$query = DB::connection($connection)
+    ->table($table)
+    ->select('id', 'question_type', 'stem')
+    ->whereNotNull('stem')
+    ->orderBy('id');
+
+if ($types !== []) {
+    $query->whereIn('question_type', $types);
+}
+
+$query->chunkById($chunk, function ($rows) use (&$scanned, $recordIssue, $blankSpan, $checkUnbalancedDollars): void {
+    foreach ($rows as $row) {
+        $stem = (string) $row->stem;
+        $type = strtolower(trim((string) ($row->question_type ?? '')));
+
+        [$rendered, $hasPlaceholders] = BlankPlaceholderRenderer::replaceToBlankSpan($stem, $blankSpan, false, false);
+
+        // 与当前 paper-body 渲染口径一致(只覆盖选择/填空)
+        if ($type === 'choice') {
+            $rendered = BlankPlaceholderRenderer::normalizeTerminalPunctuation($rendered, 'remove');
+        } elseif ($type === 'fill') {
+            if (! $hasPlaceholders) {
+                $rendered .= ' '.$blankSpan;
+            }
+            $rendered = BlankPlaceholderRenderer::normalizeTerminalPunctuation($rendered, 'dot');
+            $rendered = BlankPlaceholderRenderer::normalizePeriodBeforeTrailingParentheticalNote($rendered, '.');
+            $rendered = BlankPlaceholderRenderer::appendTerminalPunctuationIfMissing($rendered, '.');
+        }
+
+        // 1) 30949 类:\left( + 空位 + \right) 被拆成多个数学段
+        if (preg_match('/\$\\s*\\\\left[\\(\\[]\\s*\$\\s*<span[^>]*>.*?<\\/span>\\s*\$\\s*\\\\right[\\)\\]]\\s*\$/u', $rendered)) {
+            $recordIssue('broken_left_right_split', $row, 'left/right wrapped blank split into separate math segments', $rendered);
+        }
+
+        // 2) 空位夹在两个数学段中(高风险结构,常导致公式语义断裂)
+        if (preg_match('/\$[^$]*\$\\s*<span[^>]*>.*?<\\/span>\\s*\$[^$]*\$/u', $rendered)) {
+            $recordIssue('blank_between_math_segments', $row, 'blank span inserted between two $...$ segments', $rendered);
+        }
+
+        // 3) 渲染后「可见文本」里 $ 个数奇数 — 默认跳过(原始题干脏数据多);需要时加 --check-unbalanced-dollars
+        if ($checkUnbalancedDollars) {
+            $visibleForDollar = html_entity_decode(strip_tags($rendered), ENT_QUOTES | ENT_HTML5, 'UTF-8');
+            if ((substr_count($visibleForDollar, '$') % 2) !== 0) {
+                $recordIssue('unbalanced_dollar_after_render', $row, 'odd number of $ in visible text after rendering', $rendered);
+            }
+        }
+
+        // 4) 数学段在空位前以操作符结束(语义可能不完整)
+        if (preg_match('/\$[^$]*[=+\-×÷*\\\\cdot]\\s*\$\\s*<span[^>]*>.*?<\\/span>/u', $rendered)) {
+            $recordIssue('math_ends_with_operator_before_blank', $row, 'math segment ends with operator right before blank span', $rendered);
+        }
+
+        // 5) 2562 类回归:空位 span 后紧跟孤立 $ + 汉字(错误插 $)
+        if (preg_match('/<\\/span>\s*\$\s*[\p{Han}]/u', $rendered)) {
+            $recordIssue('span_then_dollar_before_han', $row, 'blank span followed by stray $ before Chinese (formula boundary break)', $rendered);
+        }
+
+        // 6) 占位 token 泄漏(不应出现在最终 HTML)
+        if (preg_match('/<<<|BLANK_IN_MATH|LATEX_BLANK|LR_PAIR_/u', $rendered)) {
+            $recordIssue('internal_placeholder_token_leak', $row, 'placeholder token not restored in output', $rendered);
+        }
+
+        $scanned++;
+        if (($scanned % 5000) === 0) {
+            fwrite(STDERR, "scanned={$scanned}\n");
+        }
+    }
+}, 'id');
+
+fclose($detailFp);
+fclose($priorityFp);
+
+$elapsed = round(microtime(true) - $startedAt, 3);
+
+$investigationFocus = [
+    'rules' => $priorityIssueTypes,
+    'issue_counts' => [
+        'blank_between_math_segments' => $issues['blank_between_math_segments'] ?? 0,
+        'math_ends_with_operator_before_blank' => $issues['math_ends_with_operator_before_blank'] ?? 0,
+    ],
+];
+
+$summary = [
+    'table' => $table,
+    'connection' => $connection,
+    'chunk' => $chunk,
+    'types_filter' => $types,
+    'scanned_rows' => $scanned,
+    'investigation_focus' => $investigationFocus,
+    'checks_disabled_by_default' => array_values(array_filter([
+        $checkUnbalancedDollars ? null : 'unbalanced_dollar_after_render ($ odd/even in visible text)',
+    ])),
+    'issue_counts' => $issues,
+    'example_ids' => array_map(static fn(array $list) => array_column($list, 'id'), array_filter($examples, 'is_array')),
+    'elapsed_seconds' => $elapsed,
+    'generated_at' => date('c'),
+    'detail_path' => $detailPath,
+    'priority_issues_detail_path' => $priorityDetailPath,
+];
+
+file_put_contents($summaryPath, json_encode($summary, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
+
+echo json_encode([
+    'summary_path' => $summaryPath,
+    'detail_path' => $detailPath,
+    'priority_issues_detail_path' => $priorityDetailPath,
+    'summary' => $summary,
+], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)."\n";

+ 276 - 0
scripts/dump_priority_issue_pdfs_local.php

@@ -0,0 +1,276 @@
+<?php
+
+/**
+ * 将审计 priority 明细里的题目分批导出为本地 PDF(题目质检模板),不上传 CDN。
+ *
+ * 用法:
+ *   php scripts/dump_priority_issue_pdfs_local.php [--per-pdf 150]
+ *       [--file storage/app/audit_placeholder/rendered_placeholder_audit_priority_issues_*.ndjson]
+ *       [--out-dir storage/app/audit_placeholder/local_priority_pdfs]
+ *       [--connection mysql] [--table questions]
+ *
+ * 不指定 --file 时,取 storage/app/audit_placeholder 下最新的 priority_issues ndjson。
+ * 每份 PDF 内题量由 --per-pdf 控制(默认 150,可改 50/200 等)。
+ */
+
+declare(strict_types=1);
+
+require __DIR__.'/../vendor/autoload.php';
+$app = require __DIR__.'/../bootstrap/app.php';
+$kernel = $app->make(Illuminate\Contracts\Console\Kernel::class);
+$kernel->bootstrap();
+
+use App\Services\ExamPdfExportService;
+use App\Support\PaperNaming;
+use Illuminate\Support\Facades\DB;
+
+$options = getopt('', [
+    'per-pdf::',
+    'file::',
+    'out-dir::',
+    'connection::',
+    'table::',
+]);
+
+$perPdf = isset($options['per-pdf']) ? max(1, (int) $options['per-pdf']) : 150;
+$connection = isset($options['connection']) ? trim((string) $options['connection']) : config('database.default');
+$table = isset($options['table']) ? trim((string) $options['table']) : 'questions';
+
+$defaultOut = dirname(__DIR__).'/storage/app/audit_placeholder/local_priority_pdfs';
+$outDir = isset($options['out-dir']) ? rtrim((string) $options['out-dir'], '/') : $defaultOut;
+if ($outDir[0] !== '/') {
+    $outDir = dirname(__DIR__).'/'.$outDir;
+}
+
+$ndjsonPath = isset($options['file']) ? trim((string) $options['file']) : '';
+if ($ndjsonPath === '') {
+    $auditDir = dirname(__DIR__).'/storage/app/audit_placeholder';
+    $glob = glob($auditDir.'/rendered_placeholder_audit_priority_issues_*.ndjson') ?: [];
+    if ($glob === []) {
+        fwrite(STDERR, "No priority ndjson under {$auditDir}. Run audit script first.\n");
+        exit(1);
+    }
+    usort($glob, static fn(string $a, string $b): int => strcmp($b, $a));
+    $ndjsonPath = $glob[0];
+}
+
+if (! is_readable($ndjsonPath)) {
+    fwrite(STDERR, "Cannot read: {$ndjsonPath}\n");
+    exit(1);
+}
+
+$orderedUniqueIds = [];
+$seen = [];
+$fh = fopen($ndjsonPath, 'rb');
+if ($fh === false) {
+    fwrite(STDERR, "Failed to open {$ndjsonPath}\n");
+    exit(1);
+}
+while (($line = fgets($fh)) !== false) {
+    $line = trim($line);
+    if ($line === '') {
+        continue;
+    }
+    $row = json_decode($line, true);
+    if (! is_array($row) || ! isset($row['id'])) {
+        continue;
+    }
+    $id = (int) $row['id'];
+    if ($id <= 0 || isset($seen[$id])) {
+        continue;
+    }
+    $seen[$id] = true;
+    $orderedUniqueIds[] = $id;
+}
+fclose($fh);
+
+if ($orderedUniqueIds === []) {
+    fwrite(STDERR, "No question ids in {$ndjsonPath}\n");
+    exit(1);
+}
+
+$batches = array_chunk($orderedUniqueIds, $perPdf);
+$stamp = date('Ymd_His');
+$runDir = $outDir.'/'.$stamp;
+if (! @mkdir($runDir, 0775, true) && ! is_dir($runDir)) {
+    fwrite(STDERR, "Cannot mkdir {$runDir}\n");
+    exit(1);
+}
+
+/** @var ExamPdfExportService $pdfService */
+$pdfService = $app->make(ExamPdfExportService::class);
+
+$written = [];
+$batchIndex = 0;
+foreach ($batches as $chunk) {
+    $batchIndex++;
+    $questions = DB::connection($connection)
+        ->table($table)
+        ->whereIn('id', $chunk)
+        ->get();
+
+    $questionMap = [];
+    foreach ($questions as $q) {
+        $questionMap[(int) $q->id] = $q;
+    }
+
+    $groupedQuestions = groupQuestionsByType($questionMap, $chunk);
+    $firstId = $chunk[0];
+    $lastId = $chunk[count($chunk) - 1];
+    $safeTitle = PaperNaming::toSafeFilename(
+        'priority_issues_batch_'.$batchIndex.'_'.count($chunk).'qs_'.$firstId.'-'.$lastId
+    );
+    $filename = $safeTitle.'.pdf';
+    $localPath = $runDir.'/'.$filename;
+
+    $paper = buildVirtualPaper(
+        '重点排查 PDF 第 '.$batchIndex.'/'.count($batches).' 批('.count($chunk).' 题)',
+        'priority_local_'.$stamp.'_b'.$batchIndex,
+        $groupedQuestions
+    );
+
+    $result = $pdfService->generateQuestionCheckPdf(
+        $paper,
+        $groupedQuestions,
+        ['name' => '本地导出', 'grade' => '________'],
+        ['name' => '________'],
+        $localPath
+    );
+
+    if (($result['local_path'] ?? '') === '') {
+        fwrite(STDERR, "FAILED batch {$batchIndex}: ".json_encode($result, JSON_UNESCAPED_UNICODE)."\n");
+        exit(1);
+    }
+
+    $written[] = [
+        'batch' => $batchIndex,
+        'count' => count($chunk),
+        'ids_range' => [$firstId, $lastId],
+        'local_path' => $localPath,
+        'bytes' => @filesize($localPath) ?: 0,
+    ];
+
+    fwrite(STDERR, "wrote batch {$batchIndex}/".count($batches)." → {$filename}\n");
+}
+
+$manifestPath = $runDir.'/manifest.json';
+file_put_contents($manifestPath, json_encode([
+    'generated_at' => date('c'),
+    'source_ndjson' => realpath($ndjsonPath) ?: $ndjsonPath,
+    'connection' => $connection,
+    'table' => $table,
+    'per_pdf' => $perPdf,
+    'total_questions' => count($orderedUniqueIds),
+    'batch_count' => count($batches),
+    'output_directory' => $runDir,
+    'batches' => $written,
+], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
+
+echo json_encode([
+    'output_directory' => $runDir,
+    'manifest_path' => $manifestPath,
+    'batch_count' => count($written),
+    'total_questions' => count($orderedUniqueIds),
+    'batches' => $written,
+], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)."\n";
+
+/**
+ * @param  array<int, object>  $questionMap
+ */
+function groupQuestionsByType(array $questionMap, array $originalOrder): array
+{
+    $grouped = [
+        'choice' => [],
+        'fill' => [],
+        'answer' => [],
+    ];
+
+    $questionNumber = 1;
+
+    foreach ($originalOrder as $id) {
+        if (! isset($questionMap[$id])) {
+            continue;
+        }
+
+        $q = $questionMap[$id];
+        $type = normalizeQuestionType($q->question_type ?? null);
+
+        $questionObj = (object) [
+            'id' => $q->id,
+            'question_number' => $questionNumber++,
+            'content' => $q->stem,
+            'options' => is_string($q->options) ? json_decode($q->options, true) : ($q->options ?? []),
+            'answer' => $q->answer,
+            'solution' => $q->solution,
+            'score' => getDefaultScore($type),
+            'difficulty' => $q->difficulty,
+            'kp_code' => $q->kp_code,
+        ];
+
+        $grouped[$type][] = $questionObj;
+    }
+
+    return $grouped;
+}
+
+function normalizeQuestionType(?string $type): string
+{
+    if (! $type) {
+        return 'answer';
+    }
+
+    $type = strtolower(trim($type));
+
+    $typeMap = [
+        'choice' => 'choice',
+        '选择题' => 'choice',
+        'single_choice' => 'choice',
+        'multiple_choice' => 'choice',
+        'fill' => 'fill',
+        '填空题' => 'fill',
+        'blank' => 'fill',
+        'answer' => 'answer',
+        '解答题' => 'answer',
+        'subjective' => 'answer',
+        'calculation' => 'answer',
+        'proof' => 'answer',
+    ];
+
+    return $typeMap[$type] ?? 'answer';
+}
+
+function getDefaultScore(string $type): int
+{
+    return match ($type) {
+        'choice' => 5,
+        'fill' => 5,
+        'answer' => 10,
+        default => 5,
+    };
+}
+
+/**
+ * @param  array<string, array<int, object>>  $groupedQuestions
+ */
+function buildVirtualPaper(string $paperName, string $studentId, array $groupedQuestions): object
+{
+    $totalScore = 0;
+    $totalQuestions = 0;
+
+    foreach ($groupedQuestions as $questions) {
+        foreach ($questions as $q) {
+            $totalScore += $q->score;
+            $totalQuestions++;
+        }
+    }
+
+    $paperId = $studentId.'_'.uniqid();
+
+    return (object) [
+        'paper_id' => $paperId,
+        'paper_name' => $paperName,
+        'total_score' => $totalScore,
+        'total_questions' => $totalQuestions,
+        'created_at' => now()->toDateTimeString(),
+    ];
+}

+ 283 - 0
scripts/generate_sample_placeholder_audit_pdf.php

@@ -0,0 +1,283 @@
+<?php
+
+/**
+ * 从占位符审计明细(ndjson)中按类型轮询抽样 N 道题,生成「题目质检」PDF(与 POST /api/questions/pdf 同源逻辑)。
+ *
+ * 用法:
+ *   php scripts/generate_sample_placeholder_audit_pdf.php [--count 30]
+ *       [--detail storage/app/audit_placeholder/rendered_placeholder_audit_details_*.ndjson]
+ *       [--connection mysql] [--table questions]
+ *
+ * 若不指定 --detail,则自动选用 storage/app/audit_placeholder 下最新的 rendered_placeholder_audit_details_*.ndjson。
+ *
+ * 抽样优先级(轮询):
+ *   internal_placeholder_token_leak → broken_left_right_split → span_then_dollar_before_han
+ *   → blank_between_math_segments → unbalanced_dollar_after_render → math_ends_with_operator_before_blank
+ */
+
+declare(strict_types=1);
+
+require __DIR__.'/../vendor/autoload.php';
+$app = require __DIR__.'/../bootstrap/app.php';
+$kernel = $app->make(Illuminate\Contracts\Console\Kernel::class);
+$kernel->bootstrap();
+
+use App\Services\ExamPdfExportService;
+use Illuminate\Support\Facades\DB;
+
+$options = getopt('', [
+    'count::',
+    'detail::',
+    'connection::',
+    'table::',
+]);
+
+$count = isset($options['count']) ? max(1, min(100, (int) $options['count'])) : 30;
+$connection = isset($options['connection']) ? trim((string) $options['connection']) : config('database.default');
+$table = isset($options['table']) ? trim((string) $options['table']) : 'questions';
+
+$detailPath = isset($options['detail']) ? trim((string) $options['detail']) : '';
+if ($detailPath === '') {
+    $auditDir = dirname(__DIR__).'/storage/app/audit_placeholder';
+    $glob = glob($auditDir.'/rendered_placeholder_audit_details_*.ndjson') ?: [];
+    if ($glob === []) {
+        fwrite(STDERR, "No detail ndjson found under {$auditDir}. Run scripts/audit_rendered_placeholder_integrity.php first.\n");
+        exit(1);
+    }
+    usort($glob, static fn(string $a, string $b): int => strcmp($b, $a));
+    $detailPath = $glob[0];
+}
+
+if (! is_readable($detailPath)) {
+    fwrite(STDERR, "Cannot read detail file: {$detailPath}\n");
+    exit(1);
+}
+
+$priorityTypes = [
+    'internal_placeholder_token_leak',
+    'broken_left_right_split',
+    'span_then_dollar_before_han',
+    'blank_between_math_segments',
+    'unbalanced_dollar_after_render',
+    'math_ends_with_operator_before_blank',
+];
+
+$buckets = array_fill_keys($priorityTypes, []);
+
+$fh = fopen($detailPath, 'rb');
+if ($fh === false) {
+    fwrite(STDERR, "Failed to open {$detailPath}\n");
+    exit(1);
+}
+while (($line = fgets($fh)) !== false) {
+    $line = trim($line);
+    if ($line === '') {
+        continue;
+    }
+    $row = json_decode($line, true);
+    if (! is_array($row) || ! isset($row['issue'], $row['id'])) {
+        continue;
+    }
+    $issue = (string) $row['issue'];
+    $id = (int) $row['id'];
+    if ($id <= 0 || ! isset($buckets[$issue])) {
+        continue;
+    }
+    $buckets[$issue][] = $id;
+}
+fclose($fh);
+
+foreach ($buckets as $issue => &$ids) {
+    $seen = [];
+    $unique = [];
+    foreach ($ids as $id) {
+        if (! isset($seen[$id])) {
+            $seen[$id] = true;
+            $unique[] = $id;
+        }
+    }
+    $ids = $unique;
+}
+unset($ids);
+
+$selectedOrder = [];
+$selectedSet = [];
+
+while (count($selectedOrder) < $count) {
+    $progress = false;
+    foreach ($priorityTypes as $type) {
+        if (count($selectedOrder) >= $count) {
+            break 2;
+        }
+        while ($buckets[$type] !== []) {
+            $id = array_shift($buckets[$type]);
+            if (! isset($selectedSet[$id])) {
+                $selectedSet[$id] = true;
+                $selectedOrder[] = $id;
+                $progress = true;
+                break;
+            }
+        }
+    }
+    if (! $progress) {
+        break;
+    }
+}
+
+if ($selectedOrder === []) {
+    fwrite(STDERR, "No issue rows parsed from {$detailPath}; nothing to sample.\n");
+    exit(1);
+}
+
+if (count($selectedOrder) < $count) {
+    fwrite(STDERR, 'warning: only '.count($selectedOrder)." unique ids available (requested {$count}).\n");
+}
+
+$questions = DB::connection($connection)
+    ->table($table)
+    ->whereIn('id', $selectedOrder)
+    ->get();
+
+$questionMap = [];
+foreach ($questions as $q) {
+    $questionMap[(int) $q->id] = $q;
+}
+
+$missing = array_values(array_filter($selectedOrder, static fn(int $id): bool => ! isset($questionMap[$id])));
+if ($missing !== []) {
+    fwrite(STDERR, 'warning: ids not found in '.$table.' ('.$connection.'): '.implode(',', $missing)."\n");
+}
+
+$groupedQuestions = groupQuestionsByType($questionMap, $selectedOrder);
+$paper = buildVirtualPaper('占位符抽样质检_'.count($selectedOrder).'题', 'placeholder_audit_sample', $groupedQuestions);
+
+/** @var ExamPdfExportService $pdf */
+$pdf = $app->make(ExamPdfExportService::class);
+$result = $pdf->generateQuestionCheckPdf(
+    $paper,
+    $groupedQuestions,
+    ['name' => '质检抽样', 'grade' => '________'],
+    ['name' => '________']
+);
+
+$manifestPath = dirname(__DIR__).'/storage/app/audit_placeholder/sample_pdf_manifest_'.date('Ymd_His').'.json';
+$manifest = [
+    'generated_at' => date('c'),
+    'detail_source' => $detailPath,
+    'connection' => $connection,
+    'table' => $table,
+    'requested_count' => $count,
+    'sampled_question_ids' => $selectedOrder,
+    'pdf_result' => $result,
+];
+
+@mkdir(dirname($manifestPath), 0777, true);
+file_put_contents($manifestPath, json_encode($manifest, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
+
+echo json_encode([
+    'detail_source' => $detailPath,
+    'manifest_path' => $manifestPath,
+    'sampled_ids' => $selectedOrder,
+    'pdf' => $result,
+], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)."\n";
+
+/**
+ * @param  array<int, object>  $questionMap
+ */
+function groupQuestionsByType(array $questionMap, array $originalOrder): array
+{
+    $grouped = [
+        'choice' => [],
+        'fill' => [],
+        'answer' => [],
+    ];
+
+    $questionNumber = 1;
+
+    foreach ($originalOrder as $id) {
+        if (! isset($questionMap[$id])) {
+            continue;
+        }
+
+        $q = $questionMap[$id];
+        $type = normalizeQuestionType($q->question_type ?? null);
+
+        $questionObj = (object) [
+            'id' => $q->id,
+            'question_number' => $questionNumber++,
+            'content' => $q->stem,
+            'options' => is_string($q->options) ? json_decode($q->options, true) : ($q->options ?? []),
+            'answer' => $q->answer,
+            'solution' => $q->solution,
+            'score' => getDefaultScore($type),
+            'difficulty' => $q->difficulty,
+            'kp_code' => $q->kp_code,
+        ];
+
+        $grouped[$type][] = $questionObj;
+    }
+
+    return $grouped;
+}
+
+function normalizeQuestionType(?string $type): string
+{
+    if (! $type) {
+        return 'answer';
+    }
+
+    $type = strtolower(trim($type));
+
+    $typeMap = [
+        'choice' => 'choice',
+        '选择题' => 'choice',
+        'single_choice' => 'choice',
+        'multiple_choice' => 'choice',
+        'fill' => 'fill',
+        '填空题' => 'fill',
+        'blank' => 'fill',
+        'answer' => 'answer',
+        '解答题' => 'answer',
+        'subjective' => 'answer',
+        'calculation' => 'answer',
+        'proof' => 'answer',
+    ];
+
+    return $typeMap[$type] ?? 'answer';
+}
+
+function getDefaultScore(string $type): int
+{
+    return match ($type) {
+        'choice' => 5,
+        'fill' => 5,
+        'answer' => 10,
+        default => 5,
+    };
+}
+
+/**
+ * @param  array<string, array<int, object>>  $groupedQuestions
+ */
+function buildVirtualPaper(string $paperName, string $studentId, array $groupedQuestions): object
+{
+    $totalScore = 0;
+    $totalQuestions = 0;
+
+    foreach ($groupedQuestions as $questions) {
+        foreach ($questions as $q) {
+            $totalScore += $q->score;
+            $totalQuestions++;
+        }
+    }
+
+    $paperId = $studentId.'_'.time().'_'.uniqid();
+
+    return (object) [
+        'paper_id' => $paperId,
+        'paper_name' => $paperName,
+        'total_score' => $totalScore,
+        'total_questions' => $totalQuestions,
+        'created_at' => now()->toDateTimeString(),
+    ];
+}

+ 111 - 0
tests/Unit/BlankPlaceholderRendererTest.php

@@ -0,0 +1,111 @@
+<?php
+
+namespace Tests\Unit;
+
+use App\Support\BlankPlaceholderRenderer;
+use PHPUnit\Framework\TestCase;
+
+class BlankPlaceholderRendererTest extends TestCase
+{
+    public function test_fill_stem_canonical_2562_dollar_around_a_only(): void
+    {
+        // questions.id=2562 本源:$a$ 与横线分段,横线在 $…$ 外;不得出现 </span>$时 这类误插 $。
+        $stem = '当$a$=__________时,$2(2a-3)$的值比$3(a+1)$的值大$1$.';
+
+        [$rendered, $changed] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertTrue($changed);
+        $this->assertStringNotContainsString('</span>$时', $rendered);
+        $this->assertMatchesRegularExpression('/\$2\(2a-3\)\$/u', $rendered);
+        $this->assertMatchesRegularExpression('/当\$a\$=/u', $rendered);
+    }
+
+    public function test_fill_stem_malformed_missing_close_dollar_before_han_still_repaired(): void
+    {
+        // 脏数据:仅 $a=__________($a 与横线同一数学段且漏写收尾 $)时补闭合,避免吞段。
+        $stem = '当$a=__________时,$2(2a-3)$的值比$3(a+1)$的值大$1$.';
+
+        [$rendered, $changed] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertTrue($changed);
+        $this->assertStringNotContainsString('</span>$时', $rendered);
+        $this->assertMatchesRegularExpression('/\$2\(2a-3\)\$/u', $rendered);
+    }
+
+    public function test_inline_math_with_normal_parentheses_not_turned_to_blank(): void
+    {
+        $stem = '设$f(x)=\sin(x)$,则$g(2)$的值是__________。';
+
+        [$rendered] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertStringContainsString('sin(x)', $rendered);
+        $this->assertStringContainsString('g(2)', $rendered);
+        $this->assertStringContainsString('border-bottom:1.2px dashed', $rendered);
+    }
+
+    public function test_left_quad_right_not_at_suffix_stays_in_formula(): void
+    {
+        // 段末不是「…=\left(\quad\right)」完整后缀时,不转为下划线(避免误伤中段记号)。
+        $stem = '若$x=\left(\quad\right)+1$,则$y=__________。';
+
+        [$rendered] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertStringContainsString('\\left(\\quad\\right)', $rendered);
+    }
+
+    public function test_fill_degree_after_plain_blank_not_prefixed_with_stray_dollar(): void
+    {
+        // questions.id=332:inline $40^{\circ}$ 后又出现「向左拐______度」,中间无 $;
+        // closeMissing 不得误用上一段公式的收尾 $,否则会出现「……横线 $度」。
+        $stem = '一辆汽车在笔直的公路上行驶,第一次向左拐 $40^{\circ}$,若经第二次拐弯后,运动路线与原来的路线平行,则第二次拐弯是否向右拐或向左拐______度。';
+
+        [$rendered] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertStringContainsString('</span>度', $rendered);
+        $this->assertStringNotContainsString('</span>$度', $rendered);
+    }
+
+    public function test_choice_stem_trailing_left_quad_right_becomes_blank_line(): void
+    {
+        // questions.id=30949:段末 $=\left(\quad\right)$ 意图为答题横线,转为段外下划线再渲染为标准空位。
+        $stem = '在三角形$ABC$中,$AC=3,AB=4,\angle CAB=120^{\circ}$,则$\left(\overrightarrow{AB}+\overrightarrow{AC}\right)\cdot\overrightarrow{AB}=\left(\quad\right)$';
+
+        [$rendered] = BlankPlaceholderRenderer::replaceToBlankSpan(
+            $stem,
+            BlankPlaceholderRenderer::defaultBlankSpan(),
+            false,
+            false
+        );
+
+        $this->assertStringContainsString('border-bottom:1.2px dashed', $rendered);
+        $this->assertStringContainsString('min-width:80px', $rendered);
+        $this->assertStringNotContainsString('\\left(\\quad\\right)', $rendered);
+        $this->assertStringNotContainsString('TOKEN', $rendered);
+        $this->assertStringContainsString('\\overrightarrow{AB}', $rendered);
+    }
+}