| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- <?php
- namespace App\Services;
- use App\Models\OCRRecord;
- use App\Models\OCRQuestionResult;
- use Illuminate\Http\UploadedFile;
- use Illuminate\Support\Facades\Http;
- use Illuminate\Support\Facades\Storage;
- use Illuminate\Support\Str;
- class OCRService
- {
- protected $ocrDriver;
- public function __construct()
- {
- $this->ocrDriver = \App\Services\OCR\OCRFactory::create();
- }
- /**
- * 上传卷子照片并创建OCR记录
- */
- public function uploadExamPaper(UploadedFile $image, string $studentId): OCRRecord
- {
- // 验证图片
- $this->validateImage($image);
- // 生成唯一ID
- $recordId = 'ocr_' . Str::uuid()->toString();
- $examId = 'exam_' . now()->format('YmdHis') . '_' . Str::random(8);
- // 获取图片信息
- $imageInfo = getimagesize($image->getPathName());
- $imageWidth = $imageInfo[0] ?? 0;
- $imageHeight = $imageInfo[1] ?? 0;
- $imageSize = filesize($image->getPathName());
- // 保存图片
- $extension = $image->getClientOriginalExtension();
- $filename = $recordId . '.' . $extension;
- $imagePath = 'uploads/ocr/' . $filename;
- Storage::disk('public')->put($imagePath, file_get_contents($image->getPathName()));
- // 创建OCR记录
- $ocrRecord = OCRRecord::create([
- 'id' => $recordId,
- 'exam_id' => $examId,
- 'student_id' => $studentId,
- 'image_path' => $imagePath,
- 'image_filename' => $image->getClientOriginalName(),
- 'image_size' => $imageSize,
- 'image_width' => $imageWidth,
- 'image_height' => $imageHeight,
- 'status' => 'pending',
- ]);
- // 发送到OCR服务处理
- $this->dispatchToOcrService($ocrRecord);
- return $ocrRecord;
- }
- /**
- * 验证上传的图片
- */
- protected function validateImage(UploadedFile $image): void
- {
- $maxSize = config('ocr.upload.max_size', 10 * 1024 * 1024);
- $allowedTypes = config('ocr.upload.allowed_types', ['jpg', 'jpeg', 'png', 'webp']);
- if (!$image->isValid()) {
- throw new \Exception('文件上传失败');
- }
- if ($image->getSize() > $maxSize) {
- throw new \Exception('文件大小超出限制(' . ($maxSize / 1024 / 1024) . 'MB)');
- }
- $extension = strtolower($image->getClientOriginalExtension());
- if (!in_array($extension, $allowedTypes)) {
- throw new \Exception('不支持的文件类型,仅支持:' . implode(', ', $allowedTypes));
- }
- }
- /**
- * 发送到OCR服务处理
- */
- protected function dispatchToOcrService(OCRRecord $ocrRecord): void
- {
- try {
- // 读取图片文件
- $imagePath = Storage::disk($this->getDisk())->path($ocrRecord->image_path);
- if (!file_exists($imagePath)) {
- throw new \Exception('图片文件不存在: ' . $imagePath);
- }
- // 更新状态为processing
- $ocrRecord->update(['status' => 'processing']);
- // Single API call with cutType: answer (returns both question and answer)
- \Log::info('OCR: Extracting questions and answers', ['record_id' => $ocrRecord->id]);
- $result = $this->ocrDriver->recognize($imagePath, [
- 'cutType' => 'answer',
- 'subject' => 'Math'
- ]);
- $items = $result['questions'] ?? [];
- \Log::info('OCR extraction complete', ['item_count' => count($items)]);
- // Step 2: Parse student answers from the answer_list data
- // Each item in answer_list contains the full question+answer text
- // The student's answer is typically the last letter (A/B/C/D) in the text
- \Log::info('Parsing student answers from question text');
-
- $parsedQuestions = [];
-
- foreach ($items as $item) {
- $questionNumber = $item['question_number'];
- $fullText = $item['content'] ?? '';
- $questionText = $fullText;
- $studentAnswer = '';
-
- // Smart parsing: extract the last single letter (A/B/C/D) as student answer
- // Pattern: "题目内容...选项D[学生答案]"
- // The student answer is usually the very last character if it's A/B/C/D
- if (preg_match('/([A-D])\s*$/u', $fullText, $matches)) {
- $studentAnswer = $matches[1];
- // Remove the answer from question text
- $questionText = preg_replace('/\s*[A-D]\s*$/', '', $fullText);
-
- \Log::info('Extracted student answer', [
- 'question_number' => $questionNumber,
- 'answer' => $studentAnswer,
- 'original_text_length' => mb_strlen($fullText),
- 'cleaned_text_length' => mb_strlen($questionText)
- ]);
- }
-
- $parsedQuestions[] = [
- 'question_number' => $questionNumber,
- 'content' => trim($questionText),
- 'student_answer' => $studentAnswer,
- 'confidence' => $item['confidence'] ?? 0.0,
- 'raw_data' => $item['raw_data'] ?? null
- ];
- }
- // 处理结果
- $this->processOcrResult($ocrRecord, [
- 'questions' => $parsedQuestions,
- 'raw' => $result
- ]);
- } catch (\Exception $e) {
- \Log::error('OCR服务调用失败', [
- 'record_id' => $ocrRecord->id,
- 'error' => $e->getMessage(),
- ]);
- // 标记为失败
- $ocrRecord->update([
- 'status' => 'failed',
- 'error_message' => 'OCR服务调用失败:' . $e->getMessage(),
- ]);
- }
- }
- /**
- * Match answers to questions by question number
- */
- protected function matchAnswersToQuestions(array $questions, array $answers): array
- {
- // Create a map of answers by question number
- $answerMap = [];
- foreach ($answers as $answer) {
- $questionNumber = $answer['question_number'] ?? null;
- if ($questionNumber) {
- $answerMap[$questionNumber] = $answer['content'] ?? '';
- }
- }
- // Match answers to questions
- $matched = [];
- foreach ($questions as $question) {
- $questionNumber = $question['question_number'];
- $matched[] = [
- 'question_number' => $questionNumber,
- 'content' => $question['content'],
- 'student_answer' => $answerMap[$questionNumber] ?? '',
- 'confidence' => $question['confidence'] ?? 0.0,
- 'raw_data' => $question['raw_data'] ?? null
- ];
- }
- return $matched;
- }
- /**
- * 处理OCR结果
- */
- protected function processOcrResult(OCRRecord $ocrRecord, array $result): void
- {
- // Log the raw result for debugging
- \Log::info('OCR Result received', ['question_count' => count($result['questions'] ?? [])]);
-
- // Get matched questions from two-pass OCR
- $questions = $result['questions'] ?? [];
- $processedCount = 0;
- foreach ($questions as $question) {
- OCRQuestionResult::create([
- 'ocr_record_id' => $ocrRecord->id,
- 'question_number' => $question['question_number'],
- 'question_text' => $question['content'] ?? '',
- 'student_answer' => $question['student_answer'] ?? '',
- 'score_value' => 0, // Will be filled by AI grading
- 'mark_detected' => null,
- 'score_confidence' => $question['confidence'] ?? 0,
- ]);
- $processedCount++;
- }
- $ocrRecord->update([
- 'status' => 'completed',
- 'processed_at' => now(),
- 'total_questions' => $processedCount,
- 'processed_questions' => $processedCount,
- 'confidence_avg' => collect($questions)->avg('confidence') ?? 0,
- ]);
- \Log::info('OCR processing complete', [
- 'record_id' => $ocrRecord->id,
- 'questions_processed' => $processedCount
- ]);
- }
- /**
- * 重新处理OCR记录
- */
- public function reprocess(OCRRecord $ocrRecord): bool
- {
- // 重置状态
- $ocrRecord->update([
- 'status' => 'pending',
- 'error_message' => null,
- 'processed_at' => null,
- 'total_questions' => 0,
- 'processed_questions' => 0,
- 'confidence_avg' => null,
- ]);
- // 删除旧的题目结果
- OCRQuestionResult::where('ocr_record_id', $ocrRecord->id)->delete();
- // 重新发送到OCR服务
- $this->dispatchToOcrService($ocrRecord);
- return true;
- }
- /**
- * 获取OCR记录的统计信息
- */
- public function getStatistics(): array
- {
- $total = OCRRecord::count();
- $pending = OCRRecord::where('status', 'pending')->count();
- $processing = OCRRecord::where('status', 'processing')->count();
- $completed = OCRRecord::where('status', 'completed')->count();
- $failed = OCRRecord::where('status', 'failed')->count();
- return [
- 'total' => $total,
- 'pending' => $pending,
- 'processing' => $processing,
- 'completed' => $completed,
- 'failed' => $failed,
- ];
- }
- /**
- * 获取存储磁盘名称
- */
- protected function getDisk(): string
- {
- return 'public'; // OCR uploads are stored in public disk
- }
- }
|