| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- <?php
- namespace App\Services;
- use Illuminate\Support\Facades\Http;
- use Illuminate\Support\Facades\Log;
- use Illuminate\Support\Facades\Storage;
- class OCRProcessingService
- {
- protected string $driver; // aliyun 或 baidu
- protected array $aliyunConfig;
- protected array $baiduConfig;
- public function __construct()
- {
- $this->driver = env('OCR_DRIVER', 'aliyun');
- $this->aliyunConfig = [
- 'access_key_id' => env('ALIYUN_ACCESS_KEY_ID'),
- 'access_key_secret' => env('ALIYUN_ACCESS_KEY_SECRET'),
- 'endpoint' => env('ALIYUN_OCR_ENDPOINT', 'ocr-api.cn-hangzhou.aliyuncs.com'),
- ];
- $this->baiduConfig = [
- 'app_id' => env('BAIDU_MATH_APP_ID'),
- 'app_key' => env('BAIDU_MATH_APP_KEY'),
- 'secret_key' => env('BAIDU_MATH_SECRET_KEY'),
- ];
- }
- /**
- * 处理图片OCR识别
- */
- public function processImage(string $imagePath, ?string $paperId = null): array
- {
- $imageUrl = $this->getImageUrl($imagePath);
- Log::info('开始OCR识别', [
- 'image_url' => $imageUrl,
- 'driver' => $this->driver,
- 'paper_id' => $paperId
- ]);
- // 获取标准题目(如果选择了试卷)
- $standardQuestions = [];
- if ($paperId) {
- $questionBankService = app(\App\Services\QuestionBankService::class);
- $standardQuestions = $questionBankService->getPaperQuestions($paperId);
- Log::info('获取标准题目', [
- 'paper_id' => $paperId,
- 'question_count' => count($standardQuestions)
- ]);
- }
- // 根据配置选择OCR服务
- if ($this->driver === 'aliyun') {
- $results = $this->processWithAliyun($imageUrl, $standardQuestions);
- } else {
- $results = $this->processWithBaidu($imageUrl, $standardQuestions);
- }
- // 如果有多个提供商,可以合并结果
- return $this->formatResults($results);
- }
- /**
- * 阿里云OCR识别
- */
- protected function processWithAliyun(string $imageUrl, array $standardQuestions = []): array
- {
- try {
- Log::info('调用阿里云OCR API', ['standard_questions_count' => count($standardQuestions)]);
- // TODO: 实现阿里云OCR具体调用逻辑
- // 这里需要使用阿里云OCR SDK进行调用
- // 参考:https://help.aliyun.com/document_detail/306402.html
- // 如果有标准题目,可以根据题目数量来验证OCR结果
- $expectedQuestions = count($standardQuestions);
- Log::info('预期题目数量', ['expected' => $expectedQuestions]);
- // 模拟返回数据(实际需要实现)
- $answers = [];
- $questionCount = $expectedQuestions > 0 ? $expectedQuestions : 4; // 默认4题
- for ($i = 1; $i <= $questionCount; $i++) {
- $questionType = 'choice';
- if ($standardQuestions) {
- // 使用标准题目的类型
- $stdQuestion = $standardQuestions[$i - 1] ?? null;
- $questionType = $stdQuestion['question_type'] ?? $stdQuestion['type'] ?? 'choice';
- }
- $answers[] = [
- 'q' => $i,
- 'type' => $questionType,
- 'value' => $this->generateMockAnswer($questionType),
- 'confidence' => 0.90 + ($i * 0.02), // 模拟不同题目的置信度
- ];
- }
- return ['answers' => $answers];
- } catch (\Exception $e) {
- Log::error('阿里云OCR调用失败', ['error' => $e->getMessage()]);
- throw new \Exception('OCR识别失败:' . $e->getMessage());
- }
- }
- /**
- * 百度OCR识别
- */
- protected function processWithBaidu(string $imageUrl, array $standardQuestions = []): array
- {
- try {
- Log::info('调用百度OCR API', ['standard_questions_count' => count($standardQuestions)]);
- // TODO: 实现百度OCR具体调用逻辑
- // 这里需要使用百度OCR API进行调用
- // 参考:https://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu
- // 如果有标准题目,可以根据题目数量来验证OCR结果
- $expectedQuestions = count($standardQuestions);
- Log::info('预期题目数量', ['expected' => $expectedQuestions]);
- // 模拟返回数据(实际需要实现)
- $answers = [];
- $questionCount = $expectedQuestions > 0 ? $expectedQuestions : 4; // 默认4题
- for ($i = 1; $i <= $questionCount; $i++) {
- $questionType = 'choice';
- if ($standardQuestions) {
- // 使用标准题目的类型
- $stdQuestion = $standardQuestions[$i - 1] ?? null;
- $questionType = $stdQuestion['question_type'] ?? $stdQuestion['type'] ?? 'choice';
- }
- $answers[] = [
- 'q' => $i,
- 'type' => $questionType,
- 'value' => $this->generateMockAnswer($questionType),
- 'confidence' => 0.88 + ($i * 0.02), // 模拟不同题目的置信度
- ];
- }
- return ['answers' => $answers];
- } catch (\Exception $e) {
- Log::error('百度OCR调用失败', ['error' => $e->getMessage()]);
- throw new \Exception('OCR识别失败:' . $e->getMessage());
- }
- }
- /**
- * 生成模拟答案
- */
- protected function generateMockAnswer(string $type): string
- {
- return match ($type) {
- 'choice' => ['A', 'B', 'C', 'D'][array_rand(['A', 'B', 'C', 'D'])],
- 'fill' => (string) rand(1, 100),
- 'solve' => '解答步骤和答案',
- default => '未知答案',
- };
- }
- /**
- * 获取图片URL
- */
- protected function getImageUrl(string $imagePath): string
- {
- // 如果是完整URL,直接返回
- if (filter_var($imagePath, FILTER_VALIDATE_URL)) {
- return $imagePath;
- }
- // 如果是相对路径,构建完整URL
- if (strpos($imagePath, 'http') !== 0) {
- return asset('storage/' . $imagePath);
- }
- return $imagePath;
- }
- /**
- * 格式化结果
- */
- protected function formatResults(array $rawResults): array
- {
- $answers = [];
- foreach ($rawResults['answers'] as $result) {
- $answers[] = [
- 'q' => $result['q'],
- 'type' => $result['type'],
- 'value' => is_array($result['value'] ?? $result['steps'] ?? '')
- ? implode('|', $result['value'] ?? $result['steps'])
- : ($result['value'] ?? ''),
- 'confidence' => $result['confidence'] ?? 0.9,
- 'provider' => $this->driver,
- ];
- }
- return ['answers' => $answers];
- }
- /**
- * 提取选择题答案
- */
- protected function extractChoiceAnswers(string $text): array
- {
- $answers = [];
- preg_match_all('/(\d+)[\.\、]\s*[A-D]/', $text, $matches);
- foreach ($matches[1] as $index => $questionNum) {
- $questionNum = intval($questionNum);
- preg_match('/' . $questionNum . '[\.\、]\s*([A-D])/', $text, $answerMatch);
- $answers[$questionNum] = $answerMatch[1] ?? null;
- }
- return $answers;
- }
- /**
- * 提取填空题答案
- */
- protected function extractFillAnswers(string $text): array
- {
- $answers = [];
- preg_match_all('/(\d+)[\.\、]\s*[::]\s*([^\s\n]+)/', $text, $matches);
- foreach ($matches[1] as $index => $questionNum) {
- $questionNum = intval($questionNum);
- $answers[$questionNum] = $matches[2][$index] ?? '';
- }
- return $answers;
- }
- /**
- * 提取解答题答案
- */
- protected function extractSolveAnswers(string $text): array
- {
- $answers = [];
- // 简化处理,实际需要更复杂的解析逻辑
- preg_match_all('/(\d+)[\.\、]([\s\S]+?)(?=\d+\.|$)/', $text, $matches);
- foreach ($matches[1] as $index => $questionNum) {
- $questionNum = intval($questionNum);
- $answers[$questionNum] = trim($matches[2][$index] ?? '');
- }
- return $answers;
- }
- }
|