Bladeren bron

fix(queue): group卷任务重回 pdf 队列;接入题干质检脚本

Made-with: Cursor
yemeishu 3 weken geleden
bovenliggende
commit
84337f4f57
2 gewijzigde bestanden met toevoegingen van 90 en 2 verwijderingen
  1. 2 2
      app/Jobs/AssembleExamTaskJob.php
  2. 88 0
      scripts/audit_question_stem_quality.php

+ 2 - 2
app/Jobs/AssembleExamTaskJob.php

@@ -27,8 +27,8 @@ class AssembleExamTaskJob implements ShouldQueue
     public function __construct(string $taskId)
     {
         $this->taskId = $taskId;
-        // 组卷任务放到默认队列,与 PDF 队列解耦,降低排队等待
-        $this->onQueue('default');
+        // 复用现有 pdf 队列,与历史部署/消费者一致
+        $this->onQueue('pdf');
         $this->afterCommit();
     }
 

+ 88 - 0
scripts/audit_question_stem_quality.php

@@ -0,0 +1,88 @@
+<?php
+
+declare(strict_types=1);
+
+require __DIR__.'/../vendor/autoload.php';
+$app = require __DIR__.'/../bootstrap/app.php';
+$kernel = $app->make(Illuminate\Contracts\Console\Kernel::class);
+$kernel->bootstrap();
+
+use Illuminate\Support\Facades\DB;
+
+$limit = isset($argv[1]) ? max(1, (int) $argv[1]) : 50000;
+$outDir = isset($argv[2]) ? rtrim($argv[2], '/'): '/tmp';
+$table = isset($argv[3]) ? trim((string) $argv[3]) : 'questions';
+@mkdir($outDir, 0777, true);
+
+$rows = DB::connection('remote_mysql')
+    ->table($table)
+    ->select('id', 'question_type', 'stem')
+    ->whereNotNull('stem')
+    ->orderByDesc('id')
+    ->limit($limit)
+    ->get();
+
+$issues = [
+    'unbalanced_dollar' => [],
+    'suspicious_latex_env' => [],
+    'mixed_placeholder_inside_math' => [],
+    'compare_blank_between_math_tokens' => [],
+    'tail_backslash_dollar_marker' => [],
+];
+
+$push = static function(array &$bucket, object $row, string $reason): void {
+    if (count($bucket) >= 2000) {
+        return;
+    }
+    $bucket[] = [
+        'id' => (int) $row->id,
+        'question_type' => (string) $row->question_type,
+        'reason' => $reason,
+        'stem_preview' => mb_substr((string) $row->stem, 0, 220),
+    ];
+};
+
+foreach ($rows as $row) {
+    $stem = (string) $row->stem;
+
+    $dollarCount = substr_count($stem, '$');
+    if (($dollarCount % 2) !== 0) {
+        $push($issues['unbalanced_dollar'], $row, 'odd number of $ delimiters');
+    }
+
+    if (preg_match('/\\\\begin\{[^}]*$/u', $stem) || preg_match('/\\\\end\{[^}]*$/u', $stem)) {
+        $push($issues['suspicious_latex_env'], $row, 'truncated \\begin/\\end block');
+    }
+
+    if (preg_match('/\$(?:[^$]|\\\\.)*(?:\\\\underline\{[^}]*\}|_{2,}|[((](?:\s|&nbsp;|&#160;| )*[))])(?:[^$]|\\\\.)*\$/u', $stem)) {
+        $push($issues['mixed_placeholder_inside_math'], $row, 'placeholder token appears inside $...$');
+    }
+
+    if (preg_match('/\$[^$]*\$\s*_{2,}\s*\$[^$]*\$/u', $stem) || preg_match('/\$[^$]*\$\s*[((](?:\s|&nbsp;|&#160;| )*[))]\s*\$[^$]*\$/u', $stem)) {
+        $push($issues['compare_blank_between_math_tokens'], $row, 'blank token inserted between two math segments');
+    }
+
+    if (preg_match('/\\\\+\$(?=\s*(?:<[^>]+>\s*)*$)/u', $stem)) {
+        $push($issues['tail_backslash_dollar_marker'], $row, 'tail backslash-dollar marker used as blank placeholder');
+    }
+}
+
+$summary = [
+    'table' => $table,
+    'scan_limit' => $limit,
+    'scanned_rows' => count($rows),
+    'counts' => array_map('count', $issues),
+    'generated_at' => date('c'),
+];
+
+$stamp = date('Ymd_His');
+$summaryPath = "$outDir/question_stem_quality_summary_$stamp.json";
+$detailPath = "$outDir/question_stem_quality_details_$stamp.json";
+file_put_contents($summaryPath, json_encode($summary, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT));
+file_put_contents($detailPath, json_encode($issues, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT));
+
+echo json_encode([
+    'summary_path' => $summaryPath,
+    'detail_path' => $detailPath,
+    'summary' => $summary,
+], JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT), "\n";