fetch_ocr_raw_data.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. <?php
  2. require __DIR__.'/vendor/autoload.php';
  3. // 启动Laravel
  4. $app = require_once __DIR__.'/bootstrap/app.php';
  5. use AlibabaCloud\Client\Config\Config;
  6. use AlibabaCloud\SDK\Ocrapi\V20210707\Ocrapi;
  7. use AlibabaCloud\SDK\Ocrapi\V20210707\Models\RecognizeEduPaperCutRequest;
  8. use AlibabaCloud\SDK\Ocrapi\V20210707\Models\RecognizeEduPaperCutResponse;
  9. use Darabonba\OpenApi\Models\Config as OpenApiConfig;
  10. use Darabonba\OpenApi\Util\Util as OpenApiUtil;
  11. use GuzzleHttp\Psr7\Utils;
  12. echo "=== 重新获取OCR原始数据 ===\n\n";
  13. // 使用Laravel DB Facade
  14. use Illuminate\Support\Facades\DB;
  15. // 获取OCR记录ID 3
  16. $ocrRecord = DB::table('ocr_records')->find(3);
  17. if (!$ocrRecord) {
  18. echo "未找到OCR记录ID=3\n";
  19. exit;
  20. }
  21. $imagePath = storage_path('app/public/' . $ocrRecord->file_path);
  22. if (!file_exists($imagePath)) {
  23. echo "图片文件不存在: {$imagePath}\n";
  24. exit;
  25. }
  26. echo "使用图片: {$imagePath}\n";
  27. // 配置阿里云客户端
  28. $config = new Config([
  29. 'accessKeyId' => env('ALIYUN_ACCESS_KEY_ID'),
  30. 'accessKeySecret' => env('ALIYUN_ACCESS_KEY_SECRET'),
  31. 'regionId' => 'cn-shanghai',
  32. 'endpoint' => 'ocr-api.cn-shanghai.aliyuncs.com',
  33. ]);
  34. $client = new Ocrapi($config);
  35. try {
  36. // 创建请求
  37. $fileStream = fopen($imagePath, 'rb');
  38. $stream = Utils::streamFor($fileStream);
  39. $request = new RecognizeEduPaperCutRequest([
  40. 'body' => $stream,
  41. 'cutType' => 'answer', // 获取题目和答案
  42. 'imageType' => 'photo',
  43. 'subject' => 'Math',
  44. 'outputOricoord' => true // 输出坐标信息
  45. ]);
  46. echo "正在调用阿里云OCR API...\n";
  47. // 发送请求
  48. $response = $client->recognizeEduPaperCutWithOptions($request, new RuntimeOptions([]));
  49. // 关闭文件流
  50. fclose($fileStream);
  51. // 解析响应
  52. $body = json_decode(json_encode($response->body), true);
  53. echo "API调用成功!\n";
  54. echo "- RequestID: " . ($body['requestId'] ?? 'N/A') . "\n";
  55. echo "- 算法版本: " . ($body['data']['algo_version'] ?? 'N/A') . "\n";
  56. // 保存到ocr_raw_data表
  57. $rawData = [
  58. 'ocr_record_id' => 3,
  59. 'raw_response' => $body,
  60. 'api_request_id' => $body['requestId'] ?? null,
  61. 'algo_version' => $body['data']['algo_version'] ?? null,
  62. 'total_blocks' => 0,
  63. 'metadata' => [
  64. 'saved_at' => now()->toISOString(),
  65. 'retrieved_at' => date('Y-m-d H:i:s')
  66. ]
  67. ];
  68. // 提取文本块
  69. $blocks = [];
  70. if (isset($body['data']['page_list'])) {
  71. foreach ($body['data']['page_list'] as $page) {
  72. if (isset($page['answer_list'])) {
  73. foreach ($page['answer_list'] as $item) {
  74. if (isset($item['content_list_info'])) {
  75. foreach ($item['content_list_info'] as $content) {
  76. if (isset($content['text']) && !empty(trim($content['text']))) {
  77. $blocks[] = [
  78. 'text' => trim($content['text']),
  79. 'position' => $content['pos'] ?? null,
  80. 'confidence' => $content['confidence'] ?? null,
  81. 'doc_index' => $content['doc_index'] ?? null,
  82. 'type' => null
  83. ];
  84. }
  85. }
  86. }
  87. }
  88. }
  89. }
  90. }
  91. $rawData['parsed_blocks'] = $blocks;
  92. $rawData['total_blocks'] = count($blocks);
  93. // 插入数据库
  94. DB::table('ocr_raw_data')->insert($rawData);
  95. echo "\n原始数据已保存到ocr_raw_data表\n";
  96. echo "- 文本块总数: " . count($blocks) . "\n";
  97. echo "- 请求ID: " . $rawData['api_request_id'] . "\n";
  98. // 显示前5个文本块示例
  99. echo "\n=== 前5个文本块示例 ===\n";
  100. for ($i = 0; $i < min(5, count($blocks)); $i++) {
  101. $block = $blocks[$i];
  102. echo "块" . ($i + 1) . ": " . substr($block['text'], 0, 80) . "...\n";
  103. if ($block['position']) {
  104. echo " 位置: (" . ($block['position'][0]['x'] ?? 'N/A') . ", " . ($block['position'][0]['y'] ?? 'N/A') . ")\n";
  105. }
  106. }
  107. // 保存完整响应到文件
  108. file_put_contents('/tmp/ocr_api_response_id3.json', json_encode($body, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
  109. echo "\n完整API响应已保存到: /tmp/ocr_api_response_id3.json\n";
  110. } catch (Exception $e) {
  111. echo "错误: " . $e->getMessage() . "\n";
  112. echo "请检查阿里云配置和API密钥\n";
  113. }
  114. echo "\n完成!\n";