OCRProcessingService.php 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Http;
  4. use Illuminate\Support\Facades\Log;
  5. use Illuminate\Support\Facades\Storage;
  6. class OCRProcessingService
  7. {
  8. protected string $driver; // aliyun 或 baidu
  9. protected array $aliyunConfig;
  10. protected array $baiduConfig;
  11. public function __construct()
  12. {
  13. $this->driver = env('OCR_DRIVER', 'aliyun');
  14. $this->aliyunConfig = [
  15. 'access_key_id' => env('ALIYUN_ACCESS_KEY_ID'),
  16. 'access_key_secret' => env('ALIYUN_ACCESS_KEY_SECRET'),
  17. 'endpoint' => env('ALIYUN_OCR_ENDPOINT', 'ocr-api.cn-hangzhou.aliyuncs.com'),
  18. ];
  19. $this->baiduConfig = [
  20. 'app_id' => env('BAIDU_MATH_APP_ID'),
  21. 'app_key' => env('BAIDU_MATH_APP_KEY'),
  22. 'secret_key' => env('BAIDU_MATH_SECRET_KEY'),
  23. ];
  24. }
  25. /**
  26. * 处理图片OCR识别
  27. */
  28. public function processImage(string $imagePath, ?string $paperId = null): array
  29. {
  30. $imageUrl = $this->getImageUrl($imagePath);
  31. Log::info('开始OCR识别', [
  32. 'image_url' => $imageUrl,
  33. 'driver' => $this->driver,
  34. 'paper_id' => $paperId
  35. ]);
  36. // 获取标准题目(如果选择了试卷)
  37. $standardQuestions = [];
  38. if ($paperId) {
  39. $questionBankService = app(\App\Services\QuestionBankService::class);
  40. $standardQuestions = $questionBankService->getPaperQuestions($paperId);
  41. Log::info('获取标准题目', [
  42. 'paper_id' => $paperId,
  43. 'question_count' => count($standardQuestions)
  44. ]);
  45. }
  46. // 根据配置选择OCR服务
  47. if ($this->driver === 'aliyun') {
  48. $results = $this->processWithAliyun($imageUrl, $standardQuestions);
  49. } else {
  50. $results = $this->processWithBaidu($imageUrl, $standardQuestions);
  51. }
  52. // 如果有多个提供商,可以合并结果
  53. return $this->formatResults($results);
  54. }
  55. /**
  56. * 阿里云OCR识别
  57. */
  58. protected function processWithAliyun(string $imageUrl, array $standardQuestions = []): array
  59. {
  60. try {
  61. Log::info('调用阿里云OCR API', ['standard_questions_count' => count($standardQuestions)]);
  62. // TODO: 实现阿里云OCR具体调用逻辑
  63. // 这里需要使用阿里云OCR SDK进行调用
  64. // 参考:https://help.aliyun.com/document_detail/306402.html
  65. // 如果有标准题目,可以根据题目数量来验证OCR结果
  66. $expectedQuestions = count($standardQuestions);
  67. Log::info('预期题目数量', ['expected' => $expectedQuestions]);
  68. // 模拟返回数据(实际需要实现)
  69. $answers = [];
  70. $questionCount = $expectedQuestions > 0 ? $expectedQuestions : 4; // 默认4题
  71. for ($i = 1; $i <= $questionCount; $i++) {
  72. $questionType = 'choice';
  73. if ($standardQuestions) {
  74. // 使用标准题目的类型
  75. $stdQuestion = $standardQuestions[$i - 1] ?? null;
  76. $questionType = $stdQuestion['question_type'] ?? $stdQuestion['type'] ?? 'choice';
  77. }
  78. $answers[] = [
  79. 'q' => $i,
  80. 'type' => $questionType,
  81. 'value' => $this->generateMockAnswer($questionType),
  82. 'confidence' => 0.90 + ($i * 0.02), // 模拟不同题目的置信度
  83. ];
  84. }
  85. return ['answers' => $answers];
  86. } catch (\Exception $e) {
  87. Log::error('阿里云OCR调用失败', ['error' => $e->getMessage()]);
  88. throw new \Exception('OCR识别失败:' . $e->getMessage());
  89. }
  90. }
  91. /**
  92. * 百度OCR识别
  93. */
  94. protected function processWithBaidu(string $imageUrl, array $standardQuestions = []): array
  95. {
  96. try {
  97. Log::info('调用百度OCR API', ['standard_questions_count' => count($standardQuestions)]);
  98. // TODO: 实现百度OCR具体调用逻辑
  99. // 这里需要使用百度OCR API进行调用
  100. // 参考:https://ai.baidu.com/ai-doc/REFERENCE/Ck3dwjhhu
  101. // 如果有标准题目,可以根据题目数量来验证OCR结果
  102. $expectedQuestions = count($standardQuestions);
  103. Log::info('预期题目数量', ['expected' => $expectedQuestions]);
  104. // 模拟返回数据(实际需要实现)
  105. $answers = [];
  106. $questionCount = $expectedQuestions > 0 ? $expectedQuestions : 4; // 默认4题
  107. for ($i = 1; $i <= $questionCount; $i++) {
  108. $questionType = 'choice';
  109. if ($standardQuestions) {
  110. // 使用标准题目的类型
  111. $stdQuestion = $standardQuestions[$i - 1] ?? null;
  112. $questionType = $stdQuestion['question_type'] ?? $stdQuestion['type'] ?? 'choice';
  113. }
  114. $answers[] = [
  115. 'q' => $i,
  116. 'type' => $questionType,
  117. 'value' => $this->generateMockAnswer($questionType),
  118. 'confidence' => 0.88 + ($i * 0.02), // 模拟不同题目的置信度
  119. ];
  120. }
  121. return ['answers' => $answers];
  122. } catch (\Exception $e) {
  123. Log::error('百度OCR调用失败', ['error' => $e->getMessage()]);
  124. throw new \Exception('OCR识别失败:' . $e->getMessage());
  125. }
  126. }
  127. /**
  128. * 生成模拟答案
  129. */
  130. protected function generateMockAnswer(string $type): string
  131. {
  132. return match ($type) {
  133. 'choice' => ['A', 'B', 'C', 'D'][array_rand(['A', 'B', 'C', 'D'])],
  134. 'fill' => (string) rand(1, 100),
  135. 'solve' => '解答步骤和答案',
  136. default => '未知答案',
  137. };
  138. }
  139. /**
  140. * 获取图片URL
  141. */
  142. protected function getImageUrl(string $imagePath): string
  143. {
  144. // 如果是完整URL,直接返回
  145. if (filter_var($imagePath, FILTER_VALIDATE_URL)) {
  146. return $imagePath;
  147. }
  148. // 如果是相对路径,构建完整URL
  149. if (strpos($imagePath, 'http') !== 0) {
  150. return asset('storage/' . $imagePath);
  151. }
  152. return $imagePath;
  153. }
  154. /**
  155. * 格式化结果
  156. */
  157. protected function formatResults(array $rawResults): array
  158. {
  159. $answers = [];
  160. foreach ($rawResults['answers'] as $result) {
  161. $answers[] = [
  162. 'q' => $result['q'],
  163. 'type' => $result['type'],
  164. 'value' => is_array($result['value'] ?? $result['steps'] ?? '')
  165. ? implode('|', $result['value'] ?? $result['steps'])
  166. : ($result['value'] ?? ''),
  167. 'confidence' => $result['confidence'] ?? 0.9,
  168. 'provider' => $this->driver,
  169. ];
  170. }
  171. return ['answers' => $answers];
  172. }
  173. /**
  174. * 提取选择题答案
  175. */
  176. protected function extractChoiceAnswers(string $text): array
  177. {
  178. $answers = [];
  179. preg_match_all('/(\d+)[\.\、]\s*[A-D]/', $text, $matches);
  180. foreach ($matches[1] as $index => $questionNum) {
  181. $questionNum = intval($questionNum);
  182. preg_match('/' . $questionNum . '[\.\、]\s*([A-D])/', $text, $answerMatch);
  183. $answers[$questionNum] = $answerMatch[1] ?? null;
  184. }
  185. return $answers;
  186. }
  187. /**
  188. * 提取填空题答案
  189. */
  190. protected function extractFillAnswers(string $text): array
  191. {
  192. $answers = [];
  193. preg_match_all('/(\d+)[\.\、]\s*[::]\s*([^\s\n]+)/', $text, $matches);
  194. foreach ($matches[1] as $index => $questionNum) {
  195. $questionNum = intval($questionNum);
  196. $answers[$questionNum] = $matches[2][$index] ?? '';
  197. }
  198. return $answers;
  199. }
  200. /**
  201. * 提取解答题答案
  202. */
  203. protected function extractSolveAnswers(string $text): array
  204. {
  205. $answers = [];
  206. // 简化处理,实际需要更复杂的解析逻辑
  207. preg_match_all('/(\d+)[\.\、]([\s\S]+?)(?=\d+\.|$)/', $text, $matches);
  208. foreach ($matches[1] as $index => $questionNum) {
  209. $questionNum = intval($questionNum);
  210. $answers[$questionNum] = trim($matches[2][$index] ?? '');
  211. }
  212. return $answers;
  213. }
  214. }