ProcessMarkdownCandidateBatch.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. <?php
  2. namespace App\Jobs;
  3. use App\Models\MarkdownImport;
  4. use App\Models\PreQuestionCandidate;
  5. use App\Services\MarkdownQuestionParser;
  6. use Illuminate\Bus\Queueable;
  7. use Illuminate\Contracts\Queue\ShouldQueue;
  8. use Illuminate\Foundation\Bus\Dispatchable;
  9. use Illuminate\Queue\InteractsWithQueue;
  10. use Illuminate\Queue\SerializesModels;
  11. use Illuminate\Support\Facades\DB;
  12. use Illuminate\Support\Facades\Log;
  13. class ProcessMarkdownCandidateBatch implements ShouldQueue
  14. {
  15. use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
  16. public int $timeout = 300; // 5分钟超时
  17. public int $tries = 3;
  18. public function __construct(
  19. public int $markdownImportId,
  20. public int $sequenceStart,
  21. public int $sequenceEnd
  22. ) {
  23. //
  24. }
  25. public function handle(MarkdownQuestionParser $parser): void
  26. {
  27. $import = MarkdownImport::find($this->markdownImportId);
  28. if (!$import) {
  29. Log::error('MarkdownImport not found for batch', [
  30. 'id' => $this->markdownImportId,
  31. 'sequence_start' => $this->sequenceStart,
  32. 'sequence_end' => $this->sequenceEnd,
  33. ]);
  34. return;
  35. }
  36. Log::info('Markdown batch started', [
  37. 'import_id' => $this->markdownImportId,
  38. 'sequence_start' => $this->sequenceStart,
  39. 'sequence_end' => $this->sequenceEnd,
  40. ]);
  41. $records = PreQuestionCandidate::query()
  42. ->where('import_id', $this->markdownImportId)
  43. ->whereBetween('sequence', [$this->sequenceStart, $this->sequenceEnd])
  44. ->orderBy('sequence')
  45. ->get();
  46. $processed = 0;
  47. $failed = 0;
  48. foreach ($records as $record) {
  49. try {
  50. $existingConfidence = $record->confidence ?? $record->ai_confidence;
  51. // 置信度高(>=0.85)时跳过再次 AI 解析,直接计入进度
  52. if ($existingConfidence !== null && (float) $existingConfidence >= 0.85) {
  53. $processed++;
  54. continue;
  55. }
  56. // 快速过滤卷子/区块标题,避免误判为题目再次走 AI
  57. if (!$this->isLikelyQuestion((string) $record->raw_markdown)) {
  58. $record->update([
  59. 'is_question_candidate' => false,
  60. 'ai_confidence' => 0.0,
  61. 'confidence' => 0.0,
  62. 'is_valid_question' => false,
  63. 'status' => 'rejected',
  64. ]);
  65. $processed++;
  66. continue;
  67. }
  68. // 已经处理过的不重复处理
  69. if (in_array($record->status, ['pending', 'reviewed', 'accepted', 'rejected'], true) && $record->stem !== null) {
  70. continue;
  71. }
  72. $parsed = $parser->parseRawMarkdown((string) $record->raw_markdown, (int) $record->index);
  73. $record->update([
  74. 'stem' => $parsed['stem'] ?? null,
  75. 'options' => $parsed['options'] ?? null,
  76. 'images' => $parsed['images'] ?? [],
  77. 'tables' => $parsed['tables'] ?? [],
  78. 'is_question_candidate' => (bool) ($parsed['is_question_candidate'] ?? false),
  79. 'ai_confidence' => $parsed['ai_confidence'] ?? null,
  80. 'status' => 'pending',
  81. ]);
  82. $processed++;
  83. } catch (\Throwable $e) {
  84. $failed++;
  85. Log::warning('Markdown batch item failed', [
  86. 'import_id' => $this->markdownImportId,
  87. 'candidate_id' => $record->id,
  88. 'sequence' => $record->sequence,
  89. 'index' => $record->index,
  90. 'error' => $e->getMessage(),
  91. ]);
  92. }
  93. }
  94. if ($processed > 0) {
  95. DB::table('markdown_imports')
  96. ->where('id', $this->markdownImportId)
  97. ->update([
  98. 'progress_current' => DB::raw('progress_current + ' . (int) $processed),
  99. 'progress_updated_at' => now(),
  100. 'progress_stage' => MarkdownImport::STAGE_AI_PARSING,
  101. 'progress_message' => 'AI 解析中…',
  102. ]);
  103. }
  104. Log::info('Markdown batch finished', [
  105. 'import_id' => $this->markdownImportId,
  106. 'sequence_start' => $this->sequenceStart,
  107. 'sequence_end' => $this->sequenceEnd,
  108. 'processed' => $processed,
  109. 'failed' => $failed,
  110. ]);
  111. $this->finalizeIfDone();
  112. }
  113. private function finalizeIfDone(): void
  114. {
  115. $import = MarkdownImport::find($this->markdownImportId);
  116. if (!$import) {
  117. return;
  118. }
  119. $total = (int) ($import->progress_total ?? 0);
  120. $current = (int) ($import->progress_current ?? 0);
  121. if ($total <= 0 || $current < $total) {
  122. return;
  123. }
  124. // 只要有一个 batch 到达“完成条件”,就尝试做一次幂等的最终状态更新
  125. $updated = DB::table('markdown_imports')
  126. ->where('id', $this->markdownImportId)
  127. ->where('status', 'processing')
  128. ->update([
  129. 'status' => 'parsed',
  130. 'progress_stage' => MarkdownImport::STAGE_PARSED,
  131. 'progress_message' => '解析完成,等待人工校对',
  132. 'progress_updated_at' => now(),
  133. 'processing_finished_at' => now(),
  134. ]);
  135. if ($updated) {
  136. Log::info('Markdown import finalized', [
  137. 'import_id' => $this->markdownImportId,
  138. 'progress_total' => $total,
  139. 'progress_current' => $current,
  140. ]);
  141. }
  142. }
  143. /**
  144. * 轻量启发式判断是否像一道题目,过滤卷子/部分标题和说明文字。
  145. */
  146. private function isLikelyQuestion(string $raw): bool
  147. {
  148. $text = trim(strip_tags($raw));
  149. $length = mb_strlen($text);
  150. // Markdown 标题或“卷/部分/说明”且文本很短,视为非题
  151. if (preg_match('/^#+\\s+/m', $raw)) {
  152. return false;
  153. }
  154. if (preg_match('/(第[一二三四五六七八九十IVX]+[卷部分]|题型|说明|试卷)/u', $text) && $length <= 80) {
  155. return false;
  156. }
  157. // 过短且无问句/命令词/选项特征
  158. if ($length < 25 && !preg_match('/[\\??求解求证计算]|[A-D]\\.|(本小题满分/u', $text)) {
  159. return false;
  160. }
  161. return true;
  162. }
  163. }