aiDriver = config('ai.driver', env('AI_DRIVER', 'deepseek')); $this->deepseekBaseUrl = rtrim((string) config('ai.deepseek.base_url', 'https://api.deepseek.com/v1'), '/'); $this->deepseekModel = (string) config('ai.deepseek.model', 'deepseek-chat'); $this->deepseekTimeout = (int) config('ai.deepseek.timeout', 30); $this->openAiBaseUrl = rtrim((string) config('ai.openai.base_url', 'https://api.openai.com/v1'), '/'); $this->openAiModel = (string) config('ai.openai.model', 'gpt-3.5-turbo'); $this->openAiTimeout = (int) config('ai.openai.timeout', 30); } /** * 解析 Markdown 文本,返回候选题数组 */ public function parse(string $markdown): array { $splitter = app(AsyncMarkdownSplitter::class); $blocks = $splitter->split($markdown); if (!$splitter->validate($blocks)) { Log::warning('Markdown split validation failed; continue with best-effort parsing', [ 'blocks_count' => count($blocks), ]); } $candidates = []; foreach ($blocks as $block) { $candidates[] = $this->parseRawMarkdown( (string) ($block['raw_markdown'] ?? ''), (int) ($block['index'] ?? 0), ); } return $candidates; } /** * 解析单题 raw_markdown,返回候选题结构 */ public function parseRawMarkdown(string $rawMarkdown, int $index): array { Log::debug('Parse raw_markdown start', [ 'index' => $index, 'raw_len' => strlen($rawMarkdown), 'raw_sha1' => LogContext::sha1($rawMarkdown), 'raw_excerpt' => LogContext::excerpt($rawMarkdown), ]); $candidate = $this->parseBlock($rawMarkdown, $index); // AI 结构化解析(失败则回退为启发式提取 + AI 判题) $aiStructured = $this->parseWithAi($candidate['raw_markdown'], $candidate['index']); if ($aiStructured !== null) { Log::debug('Parse raw_markdown done (ai_structured)', [ 'index' => $index, 'keys' => array_keys($aiStructured), 'is_question_candidate' => $aiStructured['is_question_candidate'] ?? null, 'ai_confidence' => $aiStructured['ai_confidence'] ?? null, 'options_count' => is_array($aiStructured['options'] ?? null) ? count($aiStructured['options']) : 0, 'images_count' => is_array($aiStructured['images'] ?? null) ? count($aiStructured['images']) : 0, 'tables_count' => is_array($aiStructured['tables'] ?? null) ? count($aiStructured['tables']) : 0, ]); return array_merge($candidate, $aiStructured); } $this->enhanceWithAi($candidate); Log::debug('Parse raw_markdown done (heuristic+detect)', [ 'index' => $index, 'is_question_candidate' => $candidate['is_question_candidate'] ?? null, 'ai_confidence' => $candidate['ai_confidence'] ?? null, ]); return $candidate; } /** * 解析单个题目块 */ private function parseBlock(string $block, int $index): array { $candidate = [ 'index' => $index, 'raw_markdown' => $block, 'stem' => null, 'options' => null, 'images' => [], 'tables' => [], 'is_question_candidate' => false, 'ai_confidence' => null, ]; // ② Stem 提取 $candidate['stem'] = $this->extractStem($block); // ③ 选项识别 $candidate['options'] = $this->extractOptions($block); // ④ 图片识别 $candidate['images'] = $this->extractImages($block); // ⑤ 表格识别 $candidate['tables'] = $this->extractTables($block); return $candidate; } /** * AI 结构化解析:返回符合候选库字段的结构化数组,失败返回 null * * @return array{ * index:int, * stem:?string, * options:?array, * images:array, * tables:array, * is_question_candidate:bool, * ai_confidence:?float * }|null */ private function parseWithAi(string $rawMarkdown, int $index): ?array { $template = (string) config('ai.question_parse_prompt'); if (trim($template) === '') { return null; } $prompt = str_replace(['{index}', '{content}'], [(string) $index, $rawMarkdown], $template); try { Log::debug('AI structured parse request', [ 'driver' => $this->aiDriver, 'index' => $index, 'prompt_len' => strlen($prompt), 'raw_sha1' => LogContext::sha1($rawMarkdown), ]); $result = $this->callAiApi($prompt); $normalized = [ 'index' => (int) ($result['index'] ?? $index), 'stem' => isset($result['stem']) ? (string) $result['stem'] : null, 'options' => isset($result['options']) && is_array($result['options']) ? $result['options'] : null, 'images' => isset($result['images']) && is_array($result['images']) ? $result['images'] : [], 'tables' => isset($result['tables']) && is_array($result['tables']) ? $result['tables'] : [], 'is_question_candidate' => (bool) ($result['is_question_candidate'] ?? $result['is_question'] ?? false), 'ai_confidence' => isset($result['ai_confidence']) ? (float) $result['ai_confidence'] : (isset($result['confidence']) ? (float) $result['confidence'] : null), ]; Log::debug('AI structured parse response', [ 'driver' => $this->aiDriver, 'index' => $index, 'response_keys' => array_keys($result), 'normalized' => [ 'index' => $normalized['index'], 'is_question_candidate' => $normalized['is_question_candidate'], 'ai_confidence' => $normalized['ai_confidence'], 'options_count' => is_array($normalized['options']) ? count($normalized['options']) : 0, 'images_count' => is_array($normalized['images']) ? count($normalized['images']) : 0, 'tables_count' => is_array($normalized['tables']) ? count($normalized['tables']) : 0, ], ]); return $normalized; } catch (\Throwable $e) { Log::warning('AI structured parse failed, fallback to heuristic', [ 'index' => $index, 'error' => $e->getMessage(), 'raw_sha1' => LogContext::sha1($rawMarkdown), ]); return null; } } /** * 提取题目主干 */ private function extractStem(string $block): ?string { $lines = explode("\n", $block); $stemLines = []; foreach ($lines as $line) { $line = trim($line); // 跳过选项行 if (preg_match('/^[A-D]\.\s+/', $line)) { break; } // 跳过空行和图片行 if (empty($line) || preg_match('/^]+src=["\']([^"\']+)["\'][^>]*>/i', $block, $matches); foreach ($matches[1] as $src) { $images[] = $src; } return $images; } /** * 提取表格 */ private function extractTables(string $block): array { $tables = []; // 简单匹配 HTML 表格标签 preg_match_all('/]*>.*?<\/table>/s', $block, $matches); foreach ($matches[0] as $table) { $tables[] = $table; } return $tables; } /** * AI 增强:判断是否为题目 */ private function enhanceWithAi(array &$candidate): void { $prompt = $this->buildQuestionDetectionPrompt($candidate['raw_markdown']); try { $result = $this->callAiApi($prompt); if (isset($result['is_question'])) { $candidate['is_question_candidate'] = $result['is_question']; $candidate['ai_confidence'] = $result['confidence'] ?? null; } } catch (\Exception $e) { Log::error('AI question detection failed', [ 'error' => $e->getMessage(), 'block' => substr($candidate['raw_markdown'], 0, 200), ]); // 默认值:不是题目 $candidate['is_question_candidate'] = false; $candidate['ai_confidence'] = 0.0; } } /** * 构建题目检测 Prompt */ private function buildQuestionDetectionPrompt(string $rawMarkdown): string { $template = (string) config('ai.question_detection_prompt'); if (trim($template) === '') { $template = "请判断下面这段 Markdown 是否是一道数学题目。\n\n题目内容:\n{content}\n\n请输出 JSON:{\"is_question\":true|false,\"confidence\":0~1}"; } return str_replace('{content}', $rawMarkdown, $template); } /** * 调用 AI API */ private function callAiApi(string $prompt): array { if ($this->aiDriver === 'deepseek') { return $this->callDeepSeek($prompt); } elseif ($this->aiDriver === 'openai') { return $this->callOpenAI($prompt); } throw new \Exception("Unsupported AI driver: {$this->aiDriver}"); } /** * DeepSeek API 调用 */ private function callDeepSeek(string $prompt): array { $apiKey = config('ai.deepseek.api_key', env('DEEPSEEK_API_KEY')); $response = Http::withHeaders([ 'Authorization' => "Bearer {$apiKey}", 'Content-Type' => 'application/json', ])->timeout($this->deepseekTimeout)->post($this->deepseekBaseUrl . '/chat/completions', [ 'model' => $this->deepseekModel, 'messages' => [ ['role' => 'user', 'content' => $prompt] ], 'temperature' => 0.1, ]); if (!$response->successful()) { throw new \Exception('DeepSeek API error: ' . $response->body()); } $content = $response->json('choices.0.message.content'); return $this->parseJsonResponse($content); } /** * OpenAI API 调用 */ private function callOpenAI(string $prompt): array { $apiKey = config('ai.openai.api_key', env('OPENAI_API_KEY')); $response = Http::withHeaders([ 'Authorization' => "Bearer {$apiKey}", 'Content-Type' => 'application/json', ])->timeout($this->openAiTimeout)->post($this->openAiBaseUrl . '/chat/completions', [ 'model' => $this->openAiModel, 'messages' => [ ['role' => 'user', 'content' => $prompt] ], 'temperature' => 0.1, ]); if (!$response->successful()) { throw new \Exception('OpenAI API error: ' . $response->body()); } $content = $response->json('choices.0.message.content'); return $this->parseJsonResponse($content); } /** * 解析 AI 返回的 JSON */ private function parseJsonResponse(string $content): array { // 提取 JSON 部分 preg_match('/\{.*\}/s', $content, $matches); if (empty($matches[0])) { throw new \Exception('No JSON found in response'); } $json = json_decode($matches[0], true); if (json_last_error() !== JSON_ERROR_NONE) { throw new \Exception('Invalid JSON: ' . json_last_error_msg()); } return $json; } }