|
|
@@ -75,8 +75,10 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
|
|
|
|
|
|
// 快速过滤卷子/区块标题,避免误判为题目再次走 AI
|
|
|
if (!$this->isLikelyQuestion((string) $record->raw_markdown)) {
|
|
|
+ // 标记为已过滤,但不标记为已解析
|
|
|
$meta['ai_parsed'] = true;
|
|
|
$meta['ai_parsed_at'] = now()->toDateTimeString();
|
|
|
+ $meta['filtered_out'] = true; // 添加过滤标记
|
|
|
|
|
|
$record->update([
|
|
|
'is_question_candidate' => false,
|
|
|
@@ -157,52 +159,104 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- $total = (int) ($import->progress_total ?? 0);
|
|
|
- $current = (int) ($import->progress_current ?? 0);
|
|
|
+ // 重新计算真实的解析进度
|
|
|
+ $total = PreQuestionCandidate::query()
|
|
|
+ ->where('import_id', $this->markdownImportId)
|
|
|
+ ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
+ ->count();
|
|
|
|
|
|
- if ($total <= 0 || $current < $total) {
|
|
|
- return;
|
|
|
- }
|
|
|
+ // 真正完成AI解析的记录数
|
|
|
+ $parsed = PreQuestionCandidate::query()
|
|
|
+ ->where('import_id', $this->markdownImportId)
|
|
|
+ ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
+ ->where(function ($query) {
|
|
|
+ $query->whereNotNull('stem')
|
|
|
+ ->where('stem', '!=', '')
|
|
|
+ ->orWhere(function ($q) {
|
|
|
+ $q->whereNotNull('ai_confidence')
|
|
|
+ ->where('ai_confidence', '>', 0);
|
|
|
+ });
|
|
|
+ })
|
|
|
+ ->count();
|
|
|
|
|
|
- // 只要有一个 batch 到达“完成条件”,就尝试做一次幂等的最终状态更新
|
|
|
- $updated = DB::table('markdown_imports')
|
|
|
- ->where('id', $this->markdownImportId)
|
|
|
- ->where('status', 'processing')
|
|
|
- ->update([
|
|
|
- 'status' => 'parsed',
|
|
|
- 'progress_stage' => MarkdownImport::STAGE_PARSED,
|
|
|
- 'progress_message' => '解析完成,等待人工校对',
|
|
|
- 'progress_updated_at' => now(),
|
|
|
- 'processing_finished_at' => now(),
|
|
|
- ]);
|
|
|
+ // 如果所有候选题都已解析完成,更新状态
|
|
|
+ if ($total > 0 && $parsed >= $total) {
|
|
|
+ $updated = DB::table('markdown_imports')
|
|
|
+ ->where('id', $this->markdownImportId)
|
|
|
+ ->where('status', 'processing')
|
|
|
+ ->update([
|
|
|
+ 'status' => 'parsed',
|
|
|
+ 'progress_stage' => MarkdownImport::STAGE_PARSED,
|
|
|
+ 'progress_message' => "解析完成,等待人工校对 ({$parsed}/{$total})",
|
|
|
+ 'progress_total' => $total,
|
|
|
+ 'progress_current' => $parsed,
|
|
|
+ 'progress_updated_at' => now(),
|
|
|
+ 'processing_finished_at' => now(),
|
|
|
+ ]);
|
|
|
|
|
|
- if ($updated) {
|
|
|
- Log::info('Markdown import finalized', [
|
|
|
- 'import_id' => $this->markdownImportId,
|
|
|
- 'progress_total' => $total,
|
|
|
- 'progress_current' => $current,
|
|
|
- ]);
|
|
|
+ if ($updated) {
|
|
|
+ Log::info('Markdown import finalized', [
|
|
|
+ 'import_id' => $this->markdownImportId,
|
|
|
+ 'total_candidates' => $total,
|
|
|
+ 'parsed_candidates' => $parsed,
|
|
|
+ ]);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
private function refreshProgress(): void
|
|
|
{
|
|
|
+ // 总候选题数(排除被过滤的)
|
|
|
$total = PreQuestionCandidate::query()
|
|
|
->where('import_id', $this->markdownImportId)
|
|
|
->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
+ ->where(function ($query) {
|
|
|
+ $query->whereNull('meta')
|
|
|
+ ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
|
|
|
+ })
|
|
|
->count();
|
|
|
|
|
|
+ // 真正完成AI解析的判断:有stem字段或有有效的ai_confidence
|
|
|
$parsed = PreQuestionCandidate::query()
|
|
|
->where('import_id', $this->markdownImportId)
|
|
|
->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
->where(function ($query) {
|
|
|
- $query->whereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.ai_parsed')) = 'true'")
|
|
|
- ->orWhereNotNull('stem')
|
|
|
- ->orWhereNotNull('ai_confidence')
|
|
|
- ->orWhereNotNull('confidence');
|
|
|
+ $query->whereNull('meta')
|
|
|
+ ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
|
|
|
+ })
|
|
|
+ ->where(function ($query) {
|
|
|
+ $query->whereNotNull('stem')
|
|
|
+ ->where('stem', '!=', '')
|
|
|
+ ->orWhere(function ($q) {
|
|
|
+ $q->whereNotNull('ai_confidence')
|
|
|
+ ->where('ai_confidence', '>', 0);
|
|
|
+ });
|
|
|
})
|
|
|
->count();
|
|
|
|
|
|
+ // 计算有stem但AI置信度为0的数量(可能是非题目被错误解析)
|
|
|
+ $stemOnlyCount = PreQuestionCandidate::query()
|
|
|
+ ->where('import_id', $this->markdownImportId)
|
|
|
+ ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
+ ->where(function ($query) {
|
|
|
+ $query->whereNull('meta')
|
|
|
+ ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
|
|
|
+ })
|
|
|
+ ->whereNotNull('stem')
|
|
|
+ ->where('stem', '!=', '')
|
|
|
+ ->where(function ($query) {
|
|
|
+ $query->whereNull('ai_confidence')
|
|
|
+ ->orWhere('ai_confidence', '=', 0);
|
|
|
+ })
|
|
|
+ ->count();
|
|
|
+
|
|
|
+ // 被过滤的记录数
|
|
|
+ $filteredCount = PreQuestionCandidate::query()
|
|
|
+ ->where('import_id', $this->markdownImportId)
|
|
|
+ ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
|
|
|
+ ->whereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) = 'true'")
|
|
|
+ ->count();
|
|
|
+
|
|
|
DB::table('markdown_imports')
|
|
|
->where('id', $this->markdownImportId)
|
|
|
->update([
|
|
|
@@ -210,20 +264,23 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
|
|
|
'progress_current' => min($parsed, $total),
|
|
|
'progress_updated_at' => now(),
|
|
|
'progress_stage' => MarkdownImport::STAGE_AI_PARSING,
|
|
|
- 'progress_message' => 'AI 解析中…',
|
|
|
+ 'progress_message' => "AI 解析中… {$parsed}/{$total}" .
|
|
|
+ ($stemOnlyCount > 0 ? " (含{$stemOnlyCount}个待筛选)" : '') .
|
|
|
+ ($filteredCount > 0 ? " (已过滤{$filteredCount}个非题目)" : ''),
|
|
|
]);
|
|
|
}
|
|
|
|
|
|
private function markParsed(PreQuestionCandidate $record): void
|
|
|
{
|
|
|
- $meta = $record->meta ?? [];
|
|
|
- if (!empty($meta['ai_parsed'])) {
|
|
|
- return;
|
|
|
+ // 只有在记录真正有解析结果时才标记为已解析
|
|
|
+ if (!empty($record->stem) || (!empty($record->ai_confidence) && $record->ai_confidence > 0)) {
|
|
|
+ $meta = $record->meta ?? [];
|
|
|
+ if (empty($meta['ai_parsed'])) {
|
|
|
+ $meta['ai_parsed'] = true;
|
|
|
+ $meta['ai_parsed_at'] = now()->toDateTimeString();
|
|
|
+ $record->update(['meta' => $meta]);
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
- $meta['ai_parsed'] = true;
|
|
|
- $meta['ai_parsed_at'] = now()->toDateTimeString();
|
|
|
- $record->update(['meta' => $meta]);
|
|
|
}
|
|
|
|
|
|
/**
|