markdownImportId); if (!$import) { Log::error('MarkdownImport not found for batch', [ 'id' => $this->markdownImportId, 'sequence_start' => $this->sequenceStart, 'sequence_end' => $this->sequenceEnd, ]); return; } Log::info('Markdown batch started', [ 'import_id' => $this->markdownImportId, 'sequence_start' => $this->sequenceStart, 'sequence_end' => $this->sequenceEnd, ]); $records = PreQuestionCandidate::query() ->where('import_id', $this->markdownImportId) ->whereBetween('sequence', [$this->sequenceStart, $this->sequenceEnd]) ->orderBy('sequence') ->get(); $processed = 0; $failed = 0; foreach ($records as $record) { try { $meta = $record->meta ?? []; if (!empty($meta['ai_parsed'])) { $processed++; continue; } $existingConfidence = $record->confidence ?? $record->ai_confidence; // 置信度高(>=0.85)时跳过再次 AI 解析,直接计入进度 if ($existingConfidence !== null && (float) $existingConfidence >= 0.85) { $this->markParsed($record); $processed++; continue; } // 快速过滤卷子/区块标题,避免误判为题目再次走 AI if (!$this->isLikelyQuestion((string) $record->raw_markdown)) { $meta['ai_parsed'] = true; $meta['ai_parsed_at'] = now()->toDateTimeString(); $record->update([ 'is_question_candidate' => false, 'ai_confidence' => 0.0, 'confidence' => 0.0, 'is_valid_question' => false, 'status' => 'rejected', 'meta' => $meta, ]); $processed++; continue; } // 已经处理过的不重复处理 if (in_array($record->status, ['pending', 'reviewed', 'accepted', 'rejected'], true) && $record->stem !== null) { $this->markParsed($record); continue; } $parsed = $parser->parseRawMarkdown((string) $record->raw_markdown, (int) $record->index); $meta = $record->meta ?? []; $meta['ai_parsed'] = true; $meta['ai_parsed_at'] = now()->toDateTimeString(); // 结构化后立即回传图片 $uploadedImages = []; if (!empty($parsed['images'])) { foreach ($parsed['images'] as $idx => $imgUrl) { $path = "imports/images/{$record->id}_{$idx}.jpg"; $uploadedImages[] = $uploader->put($path, (string)@file_get_contents($imgUrl)) ?: $imgUrl; } } $meta['images_uploaded'] = !empty($uploadedImages); $record->update([ 'stem' => $parsed['stem'] ?? null, 'options' => $parsed['options'] ?? null, 'images' => !empty($uploadedImages) ? $uploadedImages : $parsedImages, 'tables' => $parsed['tables'] ?? [], 'is_question_candidate' => (bool) ($parsed['is_question_candidate'] ?? false), 'ai_confidence' => $parsed['ai_confidence'] ?? null, 'status' => 'pending', 'meta' => $meta, ]); $processed++; } catch (\Throwable $e) { $failed++; Log::warning('Markdown batch item failed', [ 'import_id' => $this->markdownImportId, 'candidate_id' => $record->id, 'sequence' => $record->sequence, 'index' => $record->index, 'error' => $e->getMessage(), ]); } } $this->refreshProgress(); Log::info('Markdown batch finished', [ 'import_id' => $this->markdownImportId, 'sequence_start' => $this->sequenceStart, 'sequence_end' => $this->sequenceEnd, 'processed' => $processed, 'failed' => $failed, ]); $this->finalizeIfDone(); } private function finalizeIfDone(): void { $import = MarkdownImport::find($this->markdownImportId); if (!$import) { return; } $total = (int) ($import->progress_total ?? 0); $current = (int) ($import->progress_current ?? 0); if ($total <= 0 || $current < $total) { return; } // 只要有一个 batch 到达“完成条件”,就尝试做一次幂等的最终状态更新 $updated = DB::table('markdown_imports') ->where('id', $this->markdownImportId) ->where('status', 'processing') ->update([ 'status' => 'parsed', 'progress_stage' => MarkdownImport::STAGE_PARSED, 'progress_message' => '解析完成,等待人工校对', 'progress_updated_at' => now(), 'processing_finished_at' => now(), ]); if ($updated) { Log::info('Markdown import finalized', [ 'import_id' => $this->markdownImportId, 'progress_total' => $total, 'progress_current' => $current, ]); } } private function refreshProgress(): void { $total = PreQuestionCandidate::query() ->where('import_id', $this->markdownImportId) ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED) ->count(); $parsed = PreQuestionCandidate::query() ->where('import_id', $this->markdownImportId) ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED) ->where(function ($query) { $query->whereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.ai_parsed')) = 'true'") ->orWhereNotNull('stem') ->orWhereNotNull('ai_confidence') ->orWhereNotNull('confidence'); }) ->count(); DB::table('markdown_imports') ->where('id', $this->markdownImportId) ->update([ 'progress_total' => $total, 'progress_current' => min($parsed, $total), 'progress_updated_at' => now(), 'progress_stage' => MarkdownImport::STAGE_AI_PARSING, 'progress_message' => 'AI 解析中…', ]); } private function markParsed(PreQuestionCandidate $record): void { $meta = $record->meta ?? []; if (!empty($meta['ai_parsed'])) { return; } $meta['ai_parsed'] = true; $meta['ai_parsed_at'] = now()->toDateTimeString(); $record->update(['meta' => $meta]); } /** * 轻量启发式判断是否像一道题目,过滤卷子/部分标题和说明文字。 */ private function isLikelyQuestion(string $raw): bool { $text = trim(strip_tags($raw)); $length = mb_strlen($text); // Markdown 标题或“卷/部分/说明”且文本很短,视为非题 if (preg_match('/^#+\\s+/m', $raw)) { return false; } if (preg_match('/(第[一二三四五六七八九十IVX]+[卷部分]|题型|说明|试卷)/u', $text) && $length <= 80) { return false; } // 过短且无问句/命令词/选项特征 if ($length < 25 && !preg_match('/[\\??求解求证计算]|[A-D]\\.|(本小题满分/u', $text)) { return false; } return true; } }