ocrDriver = \App\Services\OCR\OCRFactory::create(); $this->learningAnalyticsService = $learningAnalyticsService; $this->imageProcessingService = $imageProcessingService; } /** * 上传卷子照片并创建OCR记录 */ public function uploadExamPaper(UploadedFile $image, string $studentId): OCRRecord { // 验证图片 $this->validateImage($image); // 生成唯一ID $recordId = 'ocr_' . Str::uuid()->toString(); $examId = 'exam_' . now()->format('YmdHis') . '_' . Str::random(8); // 获取图片信息 $imageInfo = getimagesize($image->getPathName()); $imageWidth = $imageInfo[0] ?? 0; $imageHeight = $imageInfo[1] ?? 0; $imageSize = filesize($image->getPathName()); // 保存图片 $extension = $image->getClientOriginalExtension(); $filename = $recordId . '.' . $extension; $imagePath = 'uploads/ocr/' . $filename; Storage::disk('public')->put($imagePath, file_get_contents($image->getPathName())); // 创建OCR记录 $ocrRecord = OCRRecord::create([ 'user_id' => $studentId, 'student_id' => $studentId, // 同时设置 student_id 'file_path' => $imagePath, 'paper_title' => $image->getClientOriginalName(), 'status' => 'pending', ]); // 发送到OCR服务处理 $this->dispatchToOcrService($ocrRecord); return $ocrRecord; } /** * 验证上传的图片 */ protected function validateImage(UploadedFile $image): void { $maxSize = config('ocr.upload.max_size', 10 * 1024 * 1024); $allowedTypes = config('ocr.upload.allowed_types', ['jpg', 'jpeg', 'png', 'webp']); if (!$image->isValid()) { throw new \Exception('文件上传失败'); } if ($image->getSize() > $maxSize) { throw new \Exception('文件大小超出限制(' . ($maxSize / 1024 / 1024) . 'MB)'); } $extension = strtolower($image->getClientOriginalExtension()); if (!in_array($extension, $allowedTypes)) { throw new \Exception('不支持的文件类型,仅支持:' . implode(', ', $allowedTypes)); } } /** * 发送到OCR服务处理 */ protected function dispatchToOcrService(OCRRecord $ocrRecord): void { try { // 检查图片路径是否存在 if (empty($ocrRecord->image_path)) { throw new \Exception('OCR记录缺少图片路径,record_id: ' . $ocrRecord->id); } // 读取图片文件 $imagePath = Storage::disk($this->getDisk())->path($ocrRecord->image_path); // 确保返回的是字符串路径 if (empty($imagePath)) { throw new \Exception('无法获取图片路径: ' . $ocrRecord->image_path); } if (!file_exists($imagePath)) { throw new \Exception('图片文件不存在: ' . $imagePath); } // 更新状态为processing $ocrRecord->update(['status' => 'processing']); // Single API call with cutType: answer (returns both question and answer) \Log::info('OCR: Extracting questions and answers', ['record_id' => $ocrRecord->id]); $result = $this->ocrDriver->recognize($imagePath, [ 'cutType' => 'answer', 'subject' => 'Math', 'ocr_record_id' => $ocrRecord->id ]); $items = $result['questions'] ?? []; \Log::info('OCR extraction complete', ['item_count' => count($items)]); // Step 2: Parse student answers from the answer_list data // Each item in answer_list contains the full question+answer text // The student's answer is typically the last letter (A/B/C/D) in the text \Log::info('Parsing student answers from question text'); $parsedQuestions = []; foreach ($items as $item) { $questionNumber = $item['question_number']; $fullText = $item['content'] ?? ''; $questionText = $fullText; $studentAnswer = ''; // Smart parsing: extract the last single letter (A/B/C/D) as student answer // Pattern: "题目内容...选项D[学生答案]" // The student answer is usually the very last character if it's A/B/C/D if (preg_match('/([A-D])\s*$/u', $fullText, $matches)) { $studentAnswer = $matches[1]; // Remove the answer from question text $questionText = preg_replace('/\s*[A-D]\s*$/', '', $fullText); \Log::info('Extracted student answer', [ 'question_number' => $questionNumber, 'answer' => $studentAnswer, 'original_text_length' => mb_strlen($fullText), 'cleaned_text_length' => mb_strlen($questionText) ]); } $parsedQuestions[] = [ 'question_number' => $questionNumber, 'content' => trim($questionText), 'student_answer' => $studentAnswer, 'confidence' => $item['confidence'] ?? 0.0, 'raw_data' => $item['raw_data'] ?? null ]; } // 使用新的OCR数据解析器进行结构化解析 try { $finalQuestions = []; $paper = null; // 获取试卷信息 if ($ocrRecord->analysis_id) { $paper = \App\Models\Paper::where('paper_id', $ocrRecord->analysis_id)->first(); } $parser = new \App\Services\OCRDataParser(); // 如果是系统试卷,使用增强匹配 if ($paper && $paper->paper_type === 'auto_generated') { $paperQuestions = \App\Models\PaperQuestion::where('paper_id', $paper->paper_id) ->orderBy('question_number') ->get(); $finalQuestions = $this->performEnhancedMatching($ocrRecord, $result, $paperQuestions); } else { // 原有的解析逻辑 $paperInfo = null; if ($paper) { $paperQuestionsArr = \App\Models\PaperQuestion::where('paper_id', $paper->paper_id) ->get() ->map(function($q) { return [ 'question_number' => $q->question_number, 'question_type' => $q->question_type, 'correct_answer' => $q->correct_answer, 'content' => $q->question_text ]; }) ->toArray(); $paperInfo = ['questions' => $paperQuestionsArr]; } $structuredQuestions = $parser->parseStructuredQuestions($result, $paperInfo); foreach ($structuredQuestions as $q) { $finalQuestions[] = [ 'question_number' => $q['question_number'], 'content' => $q['content'], 'student_answer' => $q['answer'], 'confidence' => $q['confidence'], 'raw_data' => [ 'options' => $q['options'] ?? [], 'blocks' => $q['blocks'] ?? [] ] ]; } } $this->processOcrResult($ocrRecord, [ 'questions' => $finalQuestions, 'raw' => $result ]); } catch (\Exception $e) { // 如果新解析器失败,回退到原有逻辑 \Log::warning('OCR: 解析失败,回退到原有逻辑', [ 'record_id' => $ocrRecord->id, 'error' => $e->getMessage() ]); $this->processOcrResult($ocrRecord, [ 'questions' => $parsedQuestions, 'raw' => $result ]); } } catch (\Exception $e) { \Log::error('OCR服务调用失败', [ 'record_id' => $ocrRecord->id, 'error' => $e->getMessage(), ]); // 标记为失败 $ocrRecord->update([ 'status' => 'failed', 'error_message' => 'OCR服务调用失败:' . $e->getMessage(), ]); } } /** * Match answers to questions by question number */ protected function matchAnswersToQuestions(array $questions, array $answers): array { // Create a map of answers by question number $answerMap = []; foreach ($answers as $answer) { $questionNumber = $answer['question_number'] ?? null; if ($questionNumber) { $answerMap[$questionNumber] = $answer['content'] ?? ''; } } // Match answers to questions $matched = []; foreach ($questions as $question) { $questionNumber = $question['question_number']; $matched[] = [ 'question_number' => $questionNumber, 'content' => $question['content'], 'student_answer' => $answerMap[$questionNumber] ?? '', 'confidence' => $question['confidence'] ?? 0.0, 'raw_data' => $question['raw_data'] ?? null ]; } return $matched; } /** * 处理OCR结果 */ protected function processOcrResult(OCRRecord $ocrRecord, array $result): void { // 将完整的API返回数据写入单独的文件 $logFile = storage_path("logs/ocr_raw_data_{$ocrRecord->id}_" . date('Y-m-d_H-i-s') . ".json"); file_put_contents($logFile, json_encode([ 'timestamp' => now()->toISOString(), 'record_id' => $ocrRecord->id, 'paper_title' => $ocrRecord->paper_title, 'student_id' => $ocrRecord->student_id, 'file_path' => $ocrRecord->file_path, 'aliyun_response' => $result ], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)); \Log::info('OCR: 完整API数据已写入文件', [ 'record_id' => $ocrRecord->id, 'log_file' => basename($logFile) ]); // 保存到数据库 ocr_raw_data 表 try { \Illuminate\Support\Facades\DB::table('ocr_raw_data')->updateOrInsert( ['ocr_record_id' => $ocrRecord->id], [ 'raw_response' => json_encode($result, JSON_UNESCAPED_UNICODE), 'api_request_id' => $result['requestId'] ?? null, 'algo_version' => $result['data']['algo_version'] ?? null, 'total_blocks' => count($result['questions'] ?? []), 'metadata' => json_encode([ 'saved_at' => now()->toISOString(), 'source' => 'OCRService' ]), 'created_at' => now(), 'updated_at' => now(), ] ); \Log::info('OCR: 原始数据已保存到数据库', ['record_id' => $ocrRecord->id]); } catch (\Exception $e) { \Log::error('OCR: 保存原始数据到数据库失败', [ 'record_id' => $ocrRecord->id, 'error' => $e->getMessage() ]); } // Get matched questions from two-pass OCR $questions = $result['questions'] ?? []; // 将识别到的题目列表写入单独文件 if (!empty($questions)) { $questionsLogFile = storage_path("logs/ocr_questions_{$ocrRecord->id}_" . date('Y-m-d_H-i-s') . ".json"); file_put_contents($questionsLogFile, json_encode([ 'timestamp' => now()->toISOString(), 'record_id' => $ocrRecord->id, 'paper_title' => $ocrRecord->paper_title, 'total_questions' => count($questions), 'questions' => $questions ], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)); \Log::info('OCR: 题目列表已写入文件', [ 'record_id' => $ocrRecord->id, 'questions_count' => count($questions), 'log_file' => basename($questionsLogFile) ]); } // 使用 LaTeX 清理服务预处理所有公式 $latexCleaner = app(\App\Services\LatexCleanerService::class); $questions = $latexCleaner->cleanArray($questions, ['content', 'student_answer']); \Log::info('LaTeX formulas cleaned', ['question_count' => count($questions)]); $processedCount = 0; foreach ($questions as $question) { // 再次确保清理(双重保险) $questionText = $latexCleaner->clean($question['content'] ?? ''); $studentAnswer = $latexCleaner->clean($question['student_answer'] ?? ''); // 验证清理后的内容 $validation = $latexCleaner->validate($questionText); if (!$validation['valid']) { \Log::warning('LaTeX validation warnings', [ 'question_number' => $question['question_number'], 'errors' => $validation['errors'] ]); } OCRQuestionResult::create([ 'ocr_record_id' => $ocrRecord->id, 'question_number' => $question['question_number'], 'question_text' => $questionText, 'student_answer' => $studentAnswer, 'score_value' => 0, // Will be filled by AI grading 'mark_detected' => null, 'score_confidence' => $question['confidence'] ?? 0, ]); $processedCount++; } $ocrRecord->update([ 'status' => 'completed', 'processed_at' => now(), 'total_questions' => $processedCount, 'processed_questions' => $processedCount, 'confidence_avg' => collect($questions)->avg('confidence') ?? 0, ]); \Log::info('OCR processing complete', [ 'record_id' => $ocrRecord->id, 'questions_processed' => $processedCount ]); // 不再自动提交分析,让用户在 OCR 详情页先生成题库题目 // 用户需要在 ocr-record-view 页面手动点击"生成题库题目"和"提交分析" // if ($processedCount > 0) { // $this->submitToAnalysis($ocrRecord, $questions); // } } /** * 提交到分析服务 */ protected function submitToAnalysis(OCRRecord $ocrRecord, array $questions): void { try { $analysisData = [ 'exam_id' => $ocrRecord->exam_id ?? ('ocr_' . $ocrRecord->id), // 使用 OCR 记录 ID 作为后备 'student_id' => $ocrRecord->student_id, 'ocr_record_id' => $ocrRecord->id, 'teacher_name' => 'System', // 或者是上传者的名字 'analysis_type' => 'mastery', 'questions' => array_map(function($q) { // 优先使用人工校准的答案 $studentAnswer = $q['student_answer'] ?? ''; if (isset($q['manual_answer']) && !empty($q['manual_answer'])) { $studentAnswer = $q['manual_answer']; } return [ 'question_id' => $q['question_number'], // 使用题号作为临时ID 'question_number' => (string)$q['question_number'], 'kp_code' => $q['kp_code'] ?? null, 'score_value' => $q['score_value'] ?? 0, 'student_answer' => $studentAnswer, 'ocr_confidence' => $q['confidence'] ?? 0, 'question_text' => $q['content'] ?? '', // 传递题目内容供AI分析 'teacher_validated' => $q['answer_verified'] ?? false, ]; }, $questions) ]; $result = $this->learningAnalyticsService->submitOCRAnalysis($analysisData); if (isset($result['success']) && $result['success']) { $ocrRecord->update([ 'ai_analyzed_at' => now(), 'ai_analysis_count' => ($ocrRecord->ai_analysis_count ?? 0) + 1 ]); } } catch (\Exception $e) { \Log::error('Failed to submit to analysis service', [ 'record_id' => $ocrRecord->id, 'error' => $e->getMessage() ]); // 不抛出异常,以免影响OCR流程的完成状态 } } /** * 重新处理OCR记录 */ public function reprocess(OCRRecord $ocrRecord): bool { // 重置状态 $ocrRecord->update([ 'status' => 'pending', 'error_message' => null, 'processed_at' => null, 'total_questions' => 0, 'processed_questions' => 0, 'confidence_avg' => null, ]); // 删除旧的题目结果 OCRQuestionResult::where('ocr_record_id', $ocrRecord->id)->delete(); // 重新发送到OCR服务 $this->dispatchToOcrService($ocrRecord); return true; } /** * 获取OCR记录的统计信息 */ public function getStatistics(): array { $total = OCRRecord::count(); $pending = OCRRecord::where('status', 'pending')->count(); $processing = OCRRecord::where('status', 'processing')->count(); $completed = OCRRecord::where('status', 'completed')->count(); $failed = OCRRecord::where('status', 'failed')->count(); return [ 'total' => $total, 'pending' => $pending, 'processing' => $processing, 'completed' => $completed, 'failed' => $failed, ]; } /** * Perform enhanced matching with system paper, including ROI cropping and secondary OCR. */ public function performEnhancedMatching(OCRRecord $ocrRecord, array $ocrResult, $paperQuestions): array { $parser = new \App\Services\OCRDataParser(); $latexCleaner = app(\App\Services\LatexCleanerService::class); $matchedResults = $parser->matchWithSystemPaper($ocrResult, $paperQuestions); $finalQuestions = []; $imagePath = Storage::disk($this->getDisk())->path($ocrRecord->image_path); // Secondary OCR Loop: Crop and Re-recognize with handwriting support foreach ($matchedResults as $qNum => $match) { $secondaryAnswer = $match['student_answer']; // Default to initial match $questionText = $match['question_text'] ?? ''; if (isset($match['coordinates'])) { $yMin = $match['coordinates']['y_min']; $yMax = $match['coordinates']['y_max']; $cropPath = 'uploads/ocr/crops/' . $ocrRecord->id . "_q{$qNum}.jpg"; $absoluteCropPath = Storage::disk($this->getDisk())->path($cropPath); // Ensure directory exists $cropDir = dirname($absoluteCropPath); if (!file_exists($cropDir)) { mkdir($cropDir, 0777, true); } // Crop the image if ($this->imageProcessingService->cropImage($imagePath, $yMin, $yMax, $absoluteCropPath)) { try { \Log::info("Secondary OCR for Q{$qNum} (Handwriting)", ['crop_path' => $cropPath]); // Use handwriting recognition for cropped region if (method_exists($this->ocrDriver, 'recognizeHandwriting')) { $handwritingResult = $this->ocrDriver->recognizeHandwriting($absoluteCropPath, [ 'subject' => 'Math', 'ocr_record_id' => $ocrRecord->id ]); // Construct a cropResult structure from handwritingResult for extractAnswerFromCrop if (!empty($handwritingResult['texts'])) { $combinedText = implode(' ', array_column($handwritingResult['texts'], 'text')); $cropResult = [ 'questions' => [ [ 'question_number' => $qNum, // Use current question number 'content' => $combinedText, 'student_answer' => $combinedText, // For now, treat full text as answer 'confidence' => 1, // Assume high confidence for handwriting 'bounding_box' => [ // Placeholder bbox for the whole crop 'x_min' => 0, 'y_min' => 0, 'x_max' => 1, 'y_max' => 1 ] ] ] ]; $secondaryAnswer = $parser->extractAnswerFromCrop($cropResult, $match['question_text'] ?? ''); \Log::info("Handwriting OCR Result for Q{$qNum}", [ 'raw_answer' => $secondaryAnswer, 'texts_count' => count($handwritingResult['texts']) ]); } else { \Log::info("No handwriting detected for Q{$qNum}, using original answer"); } } else { // Fallback to original method if handwriting not supported \Log::warning("Handwriting recognition not supported, using standard OCR"); $cropResult = $this->ocrDriver->recognize($absoluteCropPath, [ 'cutType' => 'answer', 'subject' => 'Math', 'ocr_record_id' => $ocrRecord->id ]); if (!empty($cropResult['questions'])) { $secondaryAnswer = $parser->extractAnswerFromCrop($cropResult, $match['question_text'] ?? ''); \Log::info("Standard OCR Result for Q{$qNum}: {$secondaryAnswer}"); } } } catch (\Exception $e) { \Log::warning("Secondary OCR failed for Q{$qNum}: " . $e->getMessage()); } } } // Clean up any residual question text/noise so学生答案仅保留手写内容 $secondaryAnswer = $this->cleanHandwritingAnswer($secondaryAnswer, $questionText); $secondaryAnswer = $latexCleaner->clean($secondaryAnswer); $finalQuestions[] = [ 'question_number' => $qNum, 'content' => '系统题目', // 或者是从PaperQuestion获取 'student_answer' => $secondaryAnswer, 'confidence' => $match['confidence'], 'student_answer_bbox' => $match['coordinates'] ?? null, 'raw_data' => $match['debug_info'] ?? [] ]; } \Log::info('OCR: 使用增强匹配完成 (含手写识别)', [ 'record_id' => $ocrRecord->id, 'matched_count' => count($finalQuestions) ]); return $finalQuestions; } /** * 获取存储磁盘名称 */ protected function getDisk(): string { return 'public'; // OCR uploads are stored in public disk } /** * 清理手写识别结果,去除题干和常见前缀,返回纯答案 * * @param string $rawAnswer 手写识别得到的完整文本 * @param string $questionText 对应题目的题干文本(可能为空) * @return string 处理后的答案,仅保留学生答案部分 */ private function cleanHandwritingAnswer(string $rawAnswer, string $questionText = ''): string { // 预清洗空白 $answer = trim(preg_replace('/\s+/', ' ', $rawAnswer)); if ($answer === '') { return ''; } // 常用前缀与编号噪声 $answer = preg_replace('/^[O0〇]?\s*\d+[\\..、\\))]?\s*/u', '', $answer); $answer = preg_replace('/^(解|答|答案)[::]?\s*/u', '', $answer); // 去掉全局换行/多空格后再比较 // 归一化文本用于相似度判断 $normalize = function (string $text): string { $text = strip_tags($text); $text = preg_replace('/\s+/', '', $text); $text = preg_replace('/[[:punct:]]/u', '', $text); return mb_strtolower($text); }; $normAnswer = $normalize($answer); $normQuestion = $normalize($questionText); // 如果整体与题干非常相似,直接判定为空答案 if ($normQuestion !== '') { similar_text($normAnswer, $normQuestion, $similarity); if ($similarity >= 70 && mb_strlen($normAnswer) <= mb_strlen($normQuestion) * 1.2) { return ''; } } // 移除显式的题干锚点(利用题干末尾或前缀模糊匹配) if ($questionText !== '') { $anchor = mb_substr($questionText, -12); // 取题干末尾作为锚点 if ($anchor !== '') { $pos = mb_stripos($answer, $anchor); if ($pos !== false) { $answer = trim(mb_substr($answer, $pos + mb_strlen($anchor))); $normAnswer = $normalize($answer); } } // 如果答案仍然以题干开头,粗暴截掉题干长度 if ($normQuestion !== '' && str_starts_with($normAnswer, $normQuestion)) { $answer = trim(mb_substr($answer, mb_strlen($questionText))); $normAnswer = $normalize($answer); } // 用题干前缀再截一次(更适合短题目) $prefix = mb_substr($questionText, 0, 18); if ($prefix !== '') { $pos = mb_stripos($answer, $prefix); if ($pos !== false && $pos + mb_strlen($prefix) <= mb_strlen($answer)) { $answer = trim(mb_substr($answer, $pos + mb_strlen($prefix))); $normAnswer = $normalize($answer); } } } // 如果仍然包含长句,尽量取“得”“=”等关键词后的尾部 if (mb_strlen($answer) > 40) { if (preg_match('/得[::]?\s*([^,。;]*)/u', $answer, $matches) && !empty(trim($matches[1]))) { $answer = trim($matches[1]); } elseif (preg_match('/=\s*([^\s,。;]+)\s*$/u', $answer, $matches)) { $answer = trim($matches[1]); } } else { // 对于短文本,允许简单的等号截断 if (preg_match('/=\s*([^\s,。;]+)\s*$/u', $answer, $matches)) { $answer = trim($matches[1]); } } // 最后一次相似度检查,避免把题干残留当作答案 $normAnswer = $normalize($answer); if ($normQuestion !== '') { similar_text($normAnswer, $normQuestion, $finalSim); if ($finalSim >= 65 && mb_strlen($normAnswer) > 0) { return ''; } } // 如果包含多段内容,优先取最后一段非空的短文本 $parts = preg_split('/[\\n;]/u', $answer); if (is_array($parts)) { $parts = array_map('trim', array_filter($parts, fn($p) => $p !== '')); if (!empty($parts)) { $candidate = end($parts); if (mb_strlen($candidate) <= 50) { $answer = $candidate; } } } return trim($answer); } }