yms
/
math_cms


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
							<?php

namespace App\Services;

use App\Models\OCRRecord;
use App\Models\OCRQuestionResult;
use App\Services\ImageProcessingService;
use Illuminate\Http\UploadedFile;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;

class OCRService
{
    protected $ocrDriver;
    protected $learningAnalyticsService;
    protected $imageProcessingService;

    public function __construct(
        LearningAnalyticsService $learningAnalyticsService,
        ImageProcessingService $imageProcessingService
    ) {
        $this->ocrDriver = \App\Services\OCR\OCRFactory::create();
        $this->learningAnalyticsService = $learningAnalyticsService;
        $this->imageProcessingService = $imageProcessingService;
    }

    /**
     * 上传卷子照片并创建OCR记录
     */
    public function uploadExamPaper(UploadedFile $image, string $studentId): OCRRecord
    {
        // 验证图片
        $this->validateImage($image);

        // 生成唯一ID
        $recordId = 'ocr_' . Str::uuid()->toString();
        $examId = 'exam_' . now()->format('YmdHis') . '_' . Str::random(8);

        // 获取图片信息
        $imageInfo = getimagesize($image->getPathName());
        $imageWidth = $imageInfo[0] ?? 0;
        $imageHeight = $imageInfo[1] ?? 0;
        $imageSize = filesize($image->getPathName());

        // 保存图片
        $extension = $image->getClientOriginalExtension();
        $filename = $recordId . '.' . $extension;
        $imagePath = 'uploads/ocr/' . $filename;

        Storage::disk('public')->put($imagePath, file_get_contents($image->getPathName()));

        // 创建OCR记录
        $ocrRecord = OCRRecord::create([
            'user_id' => $studentId,
            'student_id' => $studentId,  // 同时设置 student_id
            'file_path' => $imagePath,
            'paper_title' => $image->getClientOriginalName(),
            'status' => 'pending',
        ]);

        // 发送到OCR服务处理
        $this->dispatchToOcrService($ocrRecord);

        return $ocrRecord;
    }

    /**
     * 验证上传的图片
     */
    protected function validateImage(UploadedFile $image): void
    {
        $maxSize = config('ocr.upload.max_size', 10 * 1024 * 1024);
        $allowedTypes = config('ocr.upload.allowed_types', ['jpg', 'jpeg', 'png', 'webp']);

        if (!$image->isValid()) {
            throw new \Exception('文件上传失败');
        }

        if ($image->getSize() > $maxSize) {
            throw new \Exception('文件大小超出限制（' . ($maxSize / 1024 / 1024) . 'MB）');
        }

        $extension = strtolower($image->getClientOriginalExtension());
        if (!in_array($extension, $allowedTypes)) {
            throw new \Exception('不支持的文件类型，仅支持：' . implode(', ', $allowedTypes));
        }
    }

    /**
     * 发送到OCR服务处理
     */
    protected function dispatchToOcrService(OCRRecord $ocrRecord): void
    {
        try {
            // 检查图片路径是否存在
            if (empty($ocrRecord->image_path)) {
                throw new \Exception('OCR记录缺少图片路径，record_id: ' . $ocrRecord->id);
            }

            // 读取图片文件
            $imagePath = Storage::disk($this->getDisk())->path($ocrRecord->image_path);

            // 确保返回的是字符串路径
            if (empty($imagePath)) {
                throw new \Exception('无法获取图片路径: ' . $ocrRecord->image_path);
            }

            if (!file_exists($imagePath)) {
                throw new \Exception('图片文件不存在: ' . $imagePath);
            }

            // 更新状态为processing
            $ocrRecord->update(['status' => 'processing']);

            // Single API call with cutType: answer (returns both question and answer)
            \Log::info('OCR: Extracting questions and answers', ['record_id' => $ocrRecord->id]);
            $result = $this->ocrDriver->recognize($imagePath, [
                'cutType' => 'answer',
                'subject' => 'Math',
                'ocr_record_id' => $ocrRecord->id
            ]);

            $items = $result['questions'] ?? [];
            \Log::info('OCR extraction complete', ['item_count' => count($items)]);

            // Step 2: Parse student answers from the answer_list data
            // Each item in answer_list contains the full question+answer text
            // The student's answer is typically the last letter (A/B/C/D) in the text
            \Log::info('Parsing student answers from question text');
            
            $parsedQuestions = [];
            
            foreach ($items as $item) {
                $questionNumber = $item['question_number'];
                $fullText = $item['content'] ?? '';
                $questionText = $fullText;
                $studentAnswer = '';
                
                // Smart parsing: extract the last single letter (A/B/C/D) as student answer
                // Pattern: "题目内容...选项D[学生答案]"
                // The student answer is usually the very last character if it's A/B/C/D
                if (preg_match('/([A-D])\s*$/u', $fullText, $matches)) {
                    $studentAnswer = $matches[1];
                    // Remove the answer from question text
                    $questionText = preg_replace('/\s*[A-D]\s*$/', '', $fullText);
                    
                    \Log::info('Extracted student answer', [
                        'question_number' => $questionNumber,
                        'answer' => $studentAnswer,
                        'original_text_length' => mb_strlen($fullText),
                        'cleaned_text_length' => mb_strlen($questionText)
                    ]);
                }
                
                $parsedQuestions[] = [
                    'question_number' => $questionNumber,
                    'content' => trim($questionText),
                    'student_answer' => $studentAnswer,
                    'confidence' => $item['confidence'] ?? 0.0,
                    'raw_data' => $item['raw_data'] ?? null
                ];
            }

            // 使用新的OCR数据解析器进行结构化解析
            try {
                $finalQuestions = [];
                $paper = null;
                
                // 获取试卷信息
                if ($ocrRecord->analysis_id) {
                    $paper = \App\Models\Paper::where('paper_id', $ocrRecord->analysis_id)->first();
                }

                $parser = new \App\Services\OCRDataParser();

                // 如果是系统试卷，使用增强匹配
                if ($paper && $paper->paper_type === 'auto_generated') {
                    $paperQuestions = \App\Models\PaperQuestion::where('paper_id', $paper->paper_id)
                        ->orderBy('question_number')
                        ->get();
                    
                    $finalQuestions = $this->performEnhancedMatching($ocrRecord, $result, $paperQuestions);
                } else {
                    // 原有的解析逻辑
                    $paperInfo = null;
                    if ($paper) {
                        $paperQuestionsArr = \App\Models\PaperQuestion::where('paper_id', $paper->paper_id)
                            ->get()
                            ->map(function($q) {
                                return [
                                    'question_number' => $q->question_number,
                                    'question_type' => $q->question_type,
                                    'correct_answer' => $q->correct_answer,
                                    'content' => $q->question_text
                                ];
                            })
                            ->toArray();
                        $paperInfo = ['questions' => $paperQuestionsArr];
                    }

                    $structuredQuestions = $parser->parseStructuredQuestions($result, $paperInfo);
                    
                    foreach ($structuredQuestions as $q) {
                        $finalQuestions[] = [
                            'question_number' => $q['question_number'],
                            'content' => $q['content'],
                            'student_answer' => $q['answer'],
                            'confidence' => $q['confidence'],
                            'raw_data' => [
                                'options' => $q['options'] ?? [],
                                'blocks' => $q['blocks'] ?? []
                            ]
                        ];
                    }
                }

                $this->processOcrResult($ocrRecord, [
                    'questions' => $finalQuestions,
                    'raw' => $result
                ]);

            } catch (\Exception $e) {
                // 如果新解析器失败，回退到原有逻辑
                \Log::warning('OCR: 解析失败，回退到原有逻辑', [
                    'record_id' => $ocrRecord->id,
                    'error' => $e->getMessage()
                ]);

                $this->processOcrResult($ocrRecord, [
                    'questions' => $parsedQuestions,
                    'raw' => $result
                ]);
            }

        } catch (\Exception $e) {
            \Log::error('OCR服务调用失败', [
                'record_id' => $ocrRecord->id,
                'error' => $e->getMessage(),
            ]);

            // 标记为失败
            $ocrRecord->update([
                'status' => 'failed',
                'error_message' => 'OCR服务调用失败：' . $e->getMessage(),
            ]);
        }
    }

    /**
     * Match answers to questions by question number
     */
    protected function matchAnswersToQuestions(array $questions, array $answers): array
    {
        // Create a map of answers by question number
        $answerMap = [];
        foreach ($answers as $answer) {
            $questionNumber = $answer['question_number'] ?? null;
            if ($questionNumber) {
                $answerMap[$questionNumber] = $answer['content'] ?? '';
            }
        }

        // Match answers to questions
        $matched = [];
        foreach ($questions as $question) {
            $questionNumber = $question['question_number'];
            $matched[] = [
                'question_number' => $questionNumber,
                'content' => $question['content'],
                'student_answer' => $answerMap[$questionNumber] ?? '',
                'confidence' => $question['confidence'] ?? 0.0,
                'raw_data' => $question['raw_data'] ?? null
            ];
        }

        return $matched;
    }

    /**
     * 处理OCR结果
     */
    protected function processOcrResult(OCRRecord $ocrRecord, array $result): void
    {
        // 将完整的API返回数据写入单独的文件
        $logFile = storage_path("logs/ocr_raw_data_{$ocrRecord->id}_" . date('Y-m-d_H-i-s') . ".json");
        file_put_contents($logFile, json_encode([
            'timestamp' => now()->toISOString(),
            'record_id' => $ocrRecord->id,
            'paper_title' => $ocrRecord->paper_title,
            'student_id' => $ocrRecord->student_id,
            'file_path' => $ocrRecord->file_path,
            'aliyun_response' => $result
        ], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));

        \Log::info('OCR: 完整API数据已写入文件', [
            'record_id' => $ocrRecord->id,
            'log_file' => basename($logFile)
        ]);

        // 保存到数据库 ocr_raw_data 表
        try {
            \Illuminate\Support\Facades\DB::table('ocr_raw_data')->updateOrInsert(
                ['ocr_record_id' => $ocrRecord->id],
                [
                    'raw_response' => json_encode($result, JSON_UNESCAPED_UNICODE),
                    'api_request_id' => $result['requestId'] ?? null,
                    'algo_version' => $result['data']['algo_version'] ?? null,
                    'total_blocks' => count($result['questions'] ?? []),
                    'metadata' => json_encode([
                        'saved_at' => now()->toISOString(),
                        'source' => 'OCRService'
                    ]),
                    'created_at' => now(),
                    'updated_at' => now(),
                ]
            );
            \Log::info('OCR: 原始数据已保存到数据库', ['record_id' => $ocrRecord->id]);
        } catch (\Exception $e) {
            \Log::error('OCR: 保存原始数据到数据库失败', [
                'record_id' => $ocrRecord->id,
                'error' => $e->getMessage()
            ]);
        }

        // Get matched questions from two-pass OCR
        $questions = $result['questions'] ?? [];

        // 将识别到的题目列表写入单独文件
        if (!empty($questions)) {
            $questionsLogFile = storage_path("logs/ocr_questions_{$ocrRecord->id}_" . date('Y-m-d_H-i-s') . ".json");
            file_put_contents($questionsLogFile, json_encode([
                'timestamp' => now()->toISOString(),
                'record_id' => $ocrRecord->id,
                'paper_title' => $ocrRecord->paper_title,
                'total_questions' => count($questions),
                'questions' => $questions
            ], JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));

            \Log::info('OCR: 题目列表已写入文件', [
                'record_id' => $ocrRecord->id,
                'questions_count' => count($questions),
                'log_file' => basename($questionsLogFile)
            ]);
        }

        // 使用 LaTeX 清理服务预处理所有公式
        $latexCleaner = app(\App\Services\LatexCleanerService::class);
        $questions = $latexCleaner->cleanArray($questions, ['content', 'student_answer']);
        
        \Log::info('LaTeX formulas cleaned', ['question_count' => count($questions)]);

        $processedCount = 0;

        foreach ($questions as $question) {
            // 再次确保清理（双重保险）
            $questionText = $latexCleaner->clean($question['content'] ?? '');
            $studentAnswer = $latexCleaner->clean($question['student_answer'] ?? '');

            // 验证清理后的内容
            $validation = $latexCleaner->validate($questionText);
            if (!$validation['valid']) {
                \Log::warning('LaTeX validation warnings', [
                    'question_number' => $question['question_number'],
                    'errors' => $validation['errors']
                ]);
            }

            OCRQuestionResult::create([
                'ocr_record_id' => $ocrRecord->id,
                'question_number' => $question['question_number'],
                'question_text' => $questionText,
                'student_answer' => $studentAnswer,
                'score_value' => 0, // Will be filled by AI grading
                'mark_detected' => null,
                'score_confidence' => $question['confidence'] ?? 0,
            ]);
            $processedCount++;
        }

        $ocrRecord->update([
            'status' => 'completed',
            'processed_at' => now(),
            'total_questions' => $processedCount,
            'processed_questions' => $processedCount,
            'confidence_avg' => collect($questions)->avg('confidence') ?? 0,
        ]);

        \Log::info('OCR processing complete', [
            'record_id' => $ocrRecord->id,
            'questions_processed' => $processedCount
        ]);

        // 不再自动提交分析,让用户在 OCR 详情页先生成题库题目
        // 用户需要在 ocr-record-view 页面手动点击"生成题库题目"和"提交分析"
        // if ($processedCount > 0) {
        //     $this->submitToAnalysis($ocrRecord, $questions);
        // }
    }

    /**
     * 提交到分析服务
     */
    protected function submitToAnalysis(OCRRecord $ocrRecord, array $questions): void
    {
        try {
            $analysisData = [
                'exam_id' => $ocrRecord->exam_id ?? ('ocr_' . $ocrRecord->id), // 使用 OCR 记录 ID 作为后备
                'student_id' => $ocrRecord->student_id,
                'ocr_record_id' => $ocrRecord->id,
                'teacher_name' => 'System', // 或者是上传者的名字
                'analysis_type' => 'mastery',
                'questions' => array_map(function($q) {
                    // 优先使用人工校准的答案
                    $studentAnswer = $q['student_answer'] ?? '';
                    if (isset($q['manual_answer']) && !empty($q['manual_answer'])) {
                        $studentAnswer = $q['manual_answer'];
                    }

                    return [
                        'question_id' => $q['question_number'], // 使用题号作为临时ID
                        'question_number' => (string)$q['question_number'],
                        'kp_code' => $q['kp_code'] ?? null,
                        'score_value' => $q['score_value'] ?? 0,
                        'student_answer' => $studentAnswer,
                        'ocr_confidence' => $q['confidence'] ?? 0,
                        'question_text' => $q['content'] ?? '', // 传递题目内容供AI分析
                        'teacher_validated' => $q['answer_verified'] ?? false,
                    ];
                }, $questions)
            ];

            $result = $this->learningAnalyticsService->submitOCRAnalysis($analysisData);

            if (isset($result['success']) && $result['success']) {
                $ocrRecord->update([
                    'ai_analyzed_at' => now(),
                    'ai_analysis_count' => ($ocrRecord->ai_analysis_count ?? 0) + 1
                ]);
            }

        } catch (\Exception $e) {
            \Log::error('Failed to submit to analysis service', [
                'record_id' => $ocrRecord->id,
                'error' => $e->getMessage()
            ]);
            // 不抛出异常，以免影响OCR流程的完成状态
        }
    }

    /**
     * 重新处理OCR记录
     */
    public function reprocess(OCRRecord $ocrRecord): bool
    {
        // 重置状态
        $ocrRecord->update([
            'status' => 'pending',
            'error_message' => null,
            'processed_at' => null,
            'total_questions' => 0,
            'processed_questions' => 0,
            'confidence_avg' => null,
        ]);

        // 删除旧的题目结果
        OCRQuestionResult::where('ocr_record_id', $ocrRecord->id)->delete();

        // 重新发送到OCR服务
        $this->dispatchToOcrService($ocrRecord);

        return true;
    }

    /**
     * 获取OCR记录的统计信息
     */
    public function getStatistics(): array
    {
        $total = OCRRecord::count();
        $pending = OCRRecord::where('status', 'pending')->count();
        $processing = OCRRecord::where('status', 'processing')->count();
        $completed = OCRRecord::where('status', 'completed')->count();
        $failed = OCRRecord::where('status', 'failed')->count();

        return [
            'total' => $total,
            'pending' => $pending,
            'processing' => $processing,
            'completed' => $completed,
            'failed' => $failed,
        ];
    }

    /**
     * Perform enhanced matching with system paper, including ROI cropping and secondary OCR.
     */
    public function performEnhancedMatching(OCRRecord $ocrRecord, array $ocrResult, $paperQuestions): array
    {
        $parser = new \App\Services\OCRDataParser();
        $latexCleaner = app(\App\Services\LatexCleanerService::class);
        $matchedResults = $parser->matchWithSystemPaper($ocrResult, $paperQuestions);
        $finalQuestions = [];
        
        $imagePath = Storage::disk($this->getDisk())->path($ocrRecord->image_path);


        // Secondary OCR Loop: Crop and Re-recognize with handwriting support
        foreach ($matchedResults as $qNum => $match) {
            $secondaryAnswer = $match['student_answer']; // Default to initial match
            $questionText = $match['question_text'] ?? '';
            
            if (isset($match['coordinates'])) {
                $yMin = $match['coordinates']['y_min'];
                $yMax = $match['coordinates']['y_max'];
                $cropPath = 'uploads/ocr/crops/' . $ocrRecord->id . "_q{$qNum}.jpg";
                $absoluteCropPath = Storage::disk($this->getDisk())->path($cropPath);
                
                // Ensure directory exists
                $cropDir = dirname($absoluteCropPath);
                if (!file_exists($cropDir)) {
                    mkdir($cropDir, 0777, true);
                }

                // Crop the image
                if ($this->imageProcessingService->cropImage($imagePath, $yMin, $yMax, $absoluteCropPath)) {
                    try {
                        \Log::info("Secondary OCR for Q{$qNum} (Handwriting)", ['crop_path' => $cropPath]);
                        
                        // Use handwriting recognition for cropped region
                        if (method_exists($this->ocrDriver, 'recognizeHandwriting')) {
                            $handwritingResult = $this->ocrDriver->recognizeHandwriting($absoluteCropPath, [
                                'subject' => 'Math',
                                'ocr_record_id' => $ocrRecord->id
                            ]);
                            
                            // Construct a cropResult structure from handwritingResult for extractAnswerFromCrop
                            if (!empty($handwritingResult['texts'])) {
                                $combinedText = implode(' ', array_column($handwritingResult['texts'], 'text'));
                                $cropResult = [
                                    'questions' => [
                                        [
                                            'question_number' => $qNum, // Use current question number
                                            'content' => $combinedText,
                                            'student_answer' => $combinedText, // For now, treat full text as answer
                                            'confidence' => 1, // Assume high confidence for handwriting
                                            'bounding_box' => [ // Placeholder bbox for the whole crop
                                                'x_min' => 0, 'y_min' => 0, 'x_max' => 1, 'y_max' => 1
                                            ]
                                        ]
                                    ]
                                ];
                                $secondaryAnswer = $parser->extractAnswerFromCrop($cropResult, $match['question_text'] ?? '');
                                \Log::info("Handwriting OCR Result for Q{$qNum}", [
                                    'raw_answer' => $secondaryAnswer,
                                    'texts_count' => count($handwritingResult['texts'])
                                ]);
                            } else {
                                \Log::info("No handwriting detected for Q{$qNum}, using original answer");
                            }
                        } else {
                            // Fallback to original method if handwriting not supported
                            \Log::warning("Handwriting recognition not supported, using standard OCR");
                            $cropResult = $this->ocrDriver->recognize($absoluteCropPath, [
                                'cutType' => 'answer',
                                'subject' => 'Math',
                                'ocr_record_id' => $ocrRecord->id
                            ]);
                            
                            if (!empty($cropResult['questions'])) {
                                $secondaryAnswer = $parser->extractAnswerFromCrop($cropResult, $match['question_text'] ?? '');
                                \Log::info("Standard OCR Result for Q{$qNum}: {$secondaryAnswer}");
                            }
                        }
                    } catch (\Exception $e) {
                        \Log::warning("Secondary OCR failed for Q{$qNum}: " . $e->getMessage());
                    }
                }
            }

            // Clean up any residual question text/noise so学生答案仅保留手写内容
            $secondaryAnswer = $this->cleanHandwritingAnswer($secondaryAnswer, $questionText);
            $secondaryAnswer = $latexCleaner->clean($secondaryAnswer);

            $finalQuestions[] = [
                'question_number' => $qNum,
                'content' => '系统题目', // 或者是从PaperQuestion获取
                'student_answer' => $secondaryAnswer,
                'confidence' => $match['confidence'],
                'student_answer_bbox' => $match['coordinates'] ?? null,
                'raw_data' => $match['debug_info'] ?? []
            ];
        }
        
        \Log::info('OCR: 使用增强匹配完成 (含手写识别)', [
            'record_id' => $ocrRecord->id,
            'matched_count' => count($finalQuestions)
        ]);

        return $finalQuestions;
    }

    /**
     * 获取存储磁盘名称
     */
    protected function getDisk(): string
    {
        return 'public'; // OCR uploads are stored in public disk
    }

    /**
     * 清理手写识别结果，去除题干和常见前缀，返回纯答案
     *
     * @param string $rawAnswer 手写识别得到的完整文本
     * @param string $questionText 对应题目的题干文本（可能为空）
     * @return string 处理后的答案，仅保留学生答案部分
     */
    private function cleanHandwritingAnswer(string $rawAnswer, string $questionText = ''): string
    {
        // 预清洗空白
        $answer = trim(preg_replace('/\s+/', ' ', $rawAnswer));
        if ($answer === '') {
            return '';
        }

        // 常用前缀与编号噪声
        $answer = preg_replace('/^[O0〇]?\s*\d+[\\.．、\\)）]?\s*/u', '', $answer);
        $answer = preg_replace('/^(解|答|答案)[:：]?\s*/u', '', $answer);

        // 去掉全局换行/多空格后再比较
        // 归一化文本用于相似度判断
        $normalize = function (string $text): string {
            $text = strip_tags($text);
            $text = preg_replace('/\s+/', '', $text);
            $text = preg_replace('/[[:punct:]]/u', '', $text);
            return mb_strtolower($text);
        };
        $normAnswer = $normalize($answer);
        $normQuestion = $normalize($questionText);

        // 如果整体与题干非常相似，直接判定为空答案
        if ($normQuestion !== '') {
            similar_text($normAnswer, $normQuestion, $similarity);
            if ($similarity >= 70 && mb_strlen($normAnswer) <= mb_strlen($normQuestion) * 1.2) {
                return '';
            }
        }

        // 移除显式的题干锚点（利用题干末尾或前缀模糊匹配）
        if ($questionText !== '') {
            $anchor = mb_substr($questionText, -12); // 取题干末尾作为锚点
            if ($anchor !== '') {
                $pos = mb_stripos($answer, $anchor);
                if ($pos !== false) {
                    $answer = trim(mb_substr($answer, $pos + mb_strlen($anchor)));
                    $normAnswer = $normalize($answer);
                }
            }

            // 如果答案仍然以题干开头，粗暴截掉题干长度
            if ($normQuestion !== '' && str_starts_with($normAnswer, $normQuestion)) {
                $answer = trim(mb_substr($answer, mb_strlen($questionText)));
                $normAnswer = $normalize($answer);
            }

            // 用题干前缀再截一次（更适合短题目）
            $prefix = mb_substr($questionText, 0, 18);
            if ($prefix !== '') {
                $pos = mb_stripos($answer, $prefix);
                if ($pos !== false && $pos + mb_strlen($prefix) <= mb_strlen($answer)) {
                    $answer = trim(mb_substr($answer, $pos + mb_strlen($prefix)));
                    $normAnswer = $normalize($answer);
                }
            }
        }

        // 如果仍然包含长句，尽量取“得”“=”等关键词后的尾部
        if (mb_strlen($answer) > 40) {
            if (preg_match('/得[:：]?\s*([^，。；]*)/u', $answer, $matches) && !empty(trim($matches[1]))) {
                $answer = trim($matches[1]);
            } elseif (preg_match('/=\s*([^\s，。；]+)\s*$/u', $answer, $matches)) {
                $answer = trim($matches[1]);
            }
        } else {
            // 对于短文本，允许简单的等号截断
            if (preg_match('/=\s*([^\s，。；]+)\s*$/u', $answer, $matches)) {
                $answer = trim($matches[1]);
            }
        }

        // 最后一次相似度检查，避免把题干残留当作答案
        $normAnswer = $normalize($answer);
        if ($normQuestion !== '') {
            similar_text($normAnswer, $normQuestion, $finalSim);
            if ($finalSim >= 65 && mb_strlen($normAnswer) > 0) {
                return '';
            }
        }

        // 如果包含多段内容，优先取最后一段非空的短文本
        $parts = preg_split('/[\\n;]/u', $answer);
        if (is_array($parts)) {
            $parts = array_map('trim', array_filter($parts, fn($p) => $p !== ''));
            if (!empty($parts)) {
                $candidate = end($parts);
                if (mb_strlen($candidate) <= 50) {
                    $answer = $candidate;
                }
            }
        }

        return trim($answer);
    }
}