yemeishu 4 päivää sitten
vanhempi
commit
d9ef37e22f

+ 120 - 0
app/Filament/Resources/MarkdownImportResource.php

@@ -28,6 +28,7 @@ use Illuminate\Database\Eloquent\Builder;
 use Illuminate\Database\Eloquent\Model;
 use Illuminate\Support\Facades\Storage;
 use Illuminate\Support\Facades\DB;
+use Illuminate\Support\Collection;
 use UnitEnum;
 use Livewire\Features\SupportFileUploads\TemporaryUploadedFile;
 use App\Support\TextEncoding;
@@ -442,6 +443,22 @@ class MarkdownImportResource extends Resource
                         }
                     }),
 
+                Action::make('ai_parse')
+                    ->label('AI 解析')
+                    ->icon('heroicon-o-sparkles')
+                    ->color('warning')
+                    ->visible(fn (?Model $record): bool => in_array($record?->status, ['pending', 'processing', 'parsed', 'failed']))
+                    ->requiresConfirmation()
+                    ->modalHeading('重新执行 AI 解析')
+                    ->modalDescription('将对所有候选题重新进行 AI 结构化解析,清除之前的解析标记。此操作不会重新拆分题目。')
+                    ->action(function (?Model $record) {
+                        if (!$record) {
+                            return;
+                        }
+
+                        static::triggerAiParsing($record);
+                    }),
+
                 Action::make('review')
                     ->label('进入校对')
                     ->icon('heroicon-o-clipboard-document-list')
@@ -489,6 +506,19 @@ class MarkdownImportResource extends Resource
             ->bulkActions([
                 BulkActionGroup::make([
                     DeleteBulkAction::make(),
+
+                    Tables\Actions\BulkAction::make('bulk_ai_parse')
+                        ->label('批量 AI 解析')
+                        ->icon('heroicon-o-sparkles')
+                        ->color('warning')
+                        ->requiresConfirmation()
+                        ->modalHeading('批量执行 AI 解析')
+                        ->modalDescription('将对选中的所有记录重新执行 AI 结构化解析,清除之前的解析标记。')
+                        ->action(function (Collection $records) {
+                            foreach ($records as $record) {
+                                static::triggerAiParsing($record);
+                            }
+                        }),
                 ]),
             ])
             ->recordClasses(fn (Model $record) => $record->status === 'failed' ? 'bg-rose-50/60' : null)
@@ -584,4 +614,94 @@ class MarkdownImportResource extends Resource
                 ->send();
         }
     }
+
+    /**
+     * 重新执行 AI 解析
+     */
+    public static function triggerAiParsing(Model $record): void
+    {
+        try {
+            // 检查是否有候选题
+            $candidateCount = \App\Models\PreQuestionCandidate::where('import_id', $record->id)
+                ->where('status', '!=', 'superseded')
+                ->count();
+
+            if ($candidateCount === 0) {
+                Notification::make()
+                    ->title('没有找到候选题,无法执行 AI 解析')
+                    ->warning()
+                    ->send();
+                return;
+            }
+
+            // 清理旧的队列任务
+            \Illuminate\Support\Facades\DB::table('jobs')
+                ->where('payload', 'like', '%"markdownImportId":' . $record->id . '%')
+                ->orWhere('payload', 'like', '%"markdownImportId";i:' . $record->id . ';%')
+                ->delete();
+
+            // 清除所有候选题的 AI 解析标记
+            $candidates = \App\Models\PreQuestionCandidate::where('import_id', $record->id)
+                ->where('status', '!=', 'superseded')
+                ->get();
+
+            foreach ($candidates as $candidate) {
+                $meta = $candidate->meta ?? [];
+                unset($meta['ai_parsed'], $meta['ai_parsed_at']);
+                $candidate->update([
+                    'stem' => null,
+                    'options' => null,
+                    'images' => null,
+                    'tables' => null,
+                    'ai_confidence' => null,
+                    'confidence' => null,
+                    'status' => 'pending',
+                    'meta' => $meta,
+                ]);
+            }
+
+            // 更新导入记录状态
+            $record->update([
+                'status' => 'processing',
+                'progress_stage' => \App\Models\MarkdownImport::STAGE_AI_PARSING,
+                'progress_message' => 'AI 解析中…',
+                'progress_current' => 0,
+                'progress_total' => $candidateCount,
+                'progress_updated_at' => now(),
+                'processing_started_at' => now(),
+                'processing_finished_at' => null,
+                'error_message' => null,
+            ]);
+
+            // 创建批次并派发 jobs
+            $batchSize = 10;
+            $batches = (int) ceil($candidateCount / $batchSize);
+
+            for ($b = 0; $b < $batches; $b++) {
+                $startSeq = ($b * $batchSize) + 1;
+                $endSeq = min(($b + 1) * $batchSize, $candidateCount);
+
+                \App\Jobs\ProcessMarkdownCandidateBatch::dispatch($record->id, $startSeq, $endSeq);
+            }
+
+            \Illuminate\Support\Facades\Log::info('AI parsing batches dispatched', [
+                'import_id' => $record->id,
+                'total_candidates' => $candidateCount,
+                'batch_size' => $batchSize,
+                'batches' => $batches,
+            ]);
+
+            Notification::make()
+                ->title('已提交 AI 解析任务')
+                ->body("共 {$candidateCount} 个候选题,已分为 {$batches} 个批次并发处理")
+                ->success()
+                ->send();
+
+        } catch (\Exception $e) {
+            Notification::make()
+                ->title('AI 解析失败:' . $e->getMessage())
+                ->danger()
+                ->send();
+        }
+    }
 }

+ 91 - 34
app/Jobs/ProcessMarkdownCandidateBatch.php

@@ -75,8 +75,10 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
 
                 // 快速过滤卷子/区块标题,避免误判为题目再次走 AI
                 if (!$this->isLikelyQuestion((string) $record->raw_markdown)) {
+                    // 标记为已过滤,但不标记为已解析
                     $meta['ai_parsed'] = true;
                     $meta['ai_parsed_at'] = now()->toDateTimeString();
+                    $meta['filtered_out'] = true; // 添加过滤标记
 
                     $record->update([
                         'is_question_candidate' => false,
@@ -157,52 +159,104 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
             return;
         }
 
-        $total = (int) ($import->progress_total ?? 0);
-        $current = (int) ($import->progress_current ?? 0);
+        // 重新计算真实的解析进度
+        $total = PreQuestionCandidate::query()
+            ->where('import_id', $this->markdownImportId)
+            ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
+            ->count();
 
-        if ($total <= 0 || $current < $total) {
-            return;
-        }
+        // 真正完成AI解析的记录数
+        $parsed = PreQuestionCandidate::query()
+            ->where('import_id', $this->markdownImportId)
+            ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
+            ->where(function ($query) {
+                $query->whereNotNull('stem')
+                    ->where('stem', '!=', '')
+                    ->orWhere(function ($q) {
+                        $q->whereNotNull('ai_confidence')
+                          ->where('ai_confidence', '>', 0);
+                    });
+            })
+            ->count();
 
-        // 只要有一个 batch 到达“完成条件”,就尝试做一次幂等的最终状态更新
-        $updated = DB::table('markdown_imports')
-            ->where('id', $this->markdownImportId)
-            ->where('status', 'processing')
-            ->update([
-                'status' => 'parsed',
-                'progress_stage' => MarkdownImport::STAGE_PARSED,
-                'progress_message' => '解析完成,等待人工校对',
-                'progress_updated_at' => now(),
-                'processing_finished_at' => now(),
-            ]);
+        // 如果所有候选题都已解析完成,更新状态
+        if ($total > 0 && $parsed >= $total) {
+            $updated = DB::table('markdown_imports')
+                ->where('id', $this->markdownImportId)
+                ->where('status', 'processing')
+                ->update([
+                    'status' => 'parsed',
+                    'progress_stage' => MarkdownImport::STAGE_PARSED,
+                    'progress_message' => "解析完成,等待人工校对 ({$parsed}/{$total})",
+                    'progress_total' => $total,
+                    'progress_current' => $parsed,
+                    'progress_updated_at' => now(),
+                    'processing_finished_at' => now(),
+                ]);
 
-        if ($updated) {
-            Log::info('Markdown import finalized', [
-                'import_id' => $this->markdownImportId,
-                'progress_total' => $total,
-                'progress_current' => $current,
-            ]);
+            if ($updated) {
+                Log::info('Markdown import finalized', [
+                    'import_id' => $this->markdownImportId,
+                    'total_candidates' => $total,
+                    'parsed_candidates' => $parsed,
+                ]);
+            }
         }
     }
 
     private function refreshProgress(): void
     {
+        // 总候选题数(排除被过滤的)
         $total = PreQuestionCandidate::query()
             ->where('import_id', $this->markdownImportId)
             ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
+            ->where(function ($query) {
+                $query->whereNull('meta')
+                    ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
+            })
             ->count();
 
+        // 真正完成AI解析的判断:有stem字段或有有效的ai_confidence
         $parsed = PreQuestionCandidate::query()
             ->where('import_id', $this->markdownImportId)
             ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
             ->where(function ($query) {
-                $query->whereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.ai_parsed')) = 'true'")
-                    ->orWhereNotNull('stem')
-                    ->orWhereNotNull('ai_confidence')
-                    ->orWhereNotNull('confidence');
+                $query->whereNull('meta')
+                    ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
+            })
+            ->where(function ($query) {
+                $query->whereNotNull('stem')
+                    ->where('stem', '!=', '')
+                    ->orWhere(function ($q) {
+                        $q->whereNotNull('ai_confidence')
+                          ->where('ai_confidence', '>', 0);
+                    });
             })
             ->count();
 
+        // 计算有stem但AI置信度为0的数量(可能是非题目被错误解析)
+        $stemOnlyCount = PreQuestionCandidate::query()
+            ->where('import_id', $this->markdownImportId)
+            ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
+            ->where(function ($query) {
+                $query->whereNull('meta')
+                    ->orWhereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) != 'true'");
+            })
+            ->whereNotNull('stem')
+            ->where('stem', '!=', '')
+            ->where(function ($query) {
+                $query->whereNull('ai_confidence')
+                    ->orWhere('ai_confidence', '=', 0);
+            })
+            ->count();
+
+        // 被过滤的记录数
+        $filteredCount = PreQuestionCandidate::query()
+            ->where('import_id', $this->markdownImportId)
+            ->where('status', '!=', PreQuestionCandidate::STATUS_SUPERSEDED)
+            ->whereRaw("JSON_UNQUOTE(JSON_EXTRACT(meta, '$.filtered_out')) = 'true'")
+            ->count();
+
         DB::table('markdown_imports')
             ->where('id', $this->markdownImportId)
             ->update([
@@ -210,20 +264,23 @@ class ProcessMarkdownCandidateBatch implements ShouldQueue
                 'progress_current' => min($parsed, $total),
                 'progress_updated_at' => now(),
                 'progress_stage' => MarkdownImport::STAGE_AI_PARSING,
-                'progress_message' => 'AI 解析中…',
+                'progress_message' => "AI 解析中… {$parsed}/{$total}" .
+                    ($stemOnlyCount > 0 ? " (含{$stemOnlyCount}个待筛选)" : '') .
+                    ($filteredCount > 0 ? " (已过滤{$filteredCount}个非题目)" : ''),
             ]);
     }
 
     private function markParsed(PreQuestionCandidate $record): void
     {
-        $meta = $record->meta ?? [];
-        if (!empty($meta['ai_parsed'])) {
-            return;
+        // 只有在记录真正有解析结果时才标记为已解析
+        if (!empty($record->stem) || (!empty($record->ai_confidence) && $record->ai_confidence > 0)) {
+            $meta = $record->meta ?? [];
+            if (empty($meta['ai_parsed'])) {
+                $meta['ai_parsed'] = true;
+                $meta['ai_parsed_at'] = now()->toDateTimeString();
+                $record->update(['meta' => $meta]);
+            }
         }
-
-        $meta['ai_parsed'] = true;
-        $meta['ai_parsed_at'] = now()->toDateTimeString();
-        $record->update(['meta' => $meta]);
     }
 
     /**