| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- <?php
- require __DIR__.'/vendor/autoload.php';
- // 启动Laravel
- $app = require_once __DIR__.'/bootstrap/app.php';
- use AlibabaCloud\Client\Config\Config;
- use AlibabaCloud\SDK\Ocrapi\V20210707\Ocrapi;
- use AlibabaCloud\SDK\Ocrapi\V20210707\Models\RecognizeEduPaperCutRequest;
- use AlibabaCloud\SDK\Ocrapi\V20210707\Models\RecognizeEduPaperCutResponse;
- use Darabonba\OpenApi\Models\Config as OpenApiConfig;
- use Darabonba\OpenApi\Util\Util as OpenApiUtil;
- use GuzzleHttp\Psr7\Utils;
- echo "=== 重新获取OCR原始数据 ===\n\n";
- // 使用Laravel DB Facade
- use Illuminate\Support\Facades\DB;
- // 获取OCR记录ID 3
- $ocrRecord = DB::table('ocr_records')->find(3);
- if (!$ocrRecord) {
- echo "未找到OCR记录ID=3\n";
- exit;
- }
- $imagePath = storage_path('app/public/' . $ocrRecord->file_path);
- if (!file_exists($imagePath)) {
- echo "图片文件不存在: {$imagePath}\n";
- exit;
- }
- echo "使用图片: {$imagePath}\n";
- // 配置阿里云客户端
- $config = new Config([
- 'accessKeyId' => env('ALIYUN_ACCESS_KEY_ID'),
- 'accessKeySecret' => env('ALIYUN_ACCESS_KEY_SECRET'),
- 'regionId' => 'cn-shanghai',
- 'endpoint' => 'ocr-api.cn-shanghai.aliyuncs.com',
- ]);
- $client = new Ocrapi($config);
- try {
- // 创建请求
- $fileStream = fopen($imagePath, 'rb');
- $stream = Utils::streamFor($fileStream);
- $request = new RecognizeEduPaperCutRequest([
- 'body' => $stream,
- 'cutType' => 'answer', // 获取题目和答案
- 'imageType' => 'photo',
- 'subject' => 'Math',
- 'outputOricoord' => true // 输出坐标信息
- ]);
- echo "正在调用阿里云OCR API...\n";
- // 发送请求
- $response = $client->recognizeEduPaperCutWithOptions($request, new RuntimeOptions([]));
- // 关闭文件流
- fclose($fileStream);
- // 解析响应
- $body = json_decode(json_encode($response->body), true);
- echo "API调用成功!\n";
- echo "- RequestID: " . ($body['requestId'] ?? 'N/A') . "\n";
- echo "- 算法版本: " . ($body['data']['algo_version'] ?? 'N/A') . "\n";
- // 保存到ocr_raw_data表
- $rawData = [
- 'ocr_record_id' => 3,
- 'raw_response' => $body,
- 'api_request_id' => $body['requestId'] ?? null,
- 'algo_version' => $body['data']['algo_version'] ?? null,
- 'total_blocks' => 0,
- 'metadata' => [
- 'saved_at' => now()->toISOString(),
- 'retrieved_at' => date('Y-m-d H:i:s')
- ]
- ];
- // 提取文本块
- $blocks = [];
- if (isset($body['data']['page_list'])) {
- foreach ($body['data']['page_list'] as $page) {
- if (isset($page['answer_list'])) {
- foreach ($page['answer_list'] as $item) {
- if (isset($item['content_list_info'])) {
- foreach ($item['content_list_info'] as $content) {
- if (isset($content['text']) && !empty(trim($content['text']))) {
- $blocks[] = [
- 'text' => trim($content['text']),
- 'position' => $content['pos'] ?? null,
- 'confidence' => $content['confidence'] ?? null,
- 'doc_index' => $content['doc_index'] ?? null,
- 'type' => null
- ];
- }
- }
- }
- }
- }
- }
- }
- $rawData['parsed_blocks'] = $blocks;
- $rawData['total_blocks'] = count($blocks);
- // 插入数据库
- DB::table('ocr_raw_data')->insert($rawData);
- echo "\n原始数据已保存到ocr_raw_data表\n";
- echo "- 文本块总数: " . count($blocks) . "\n";
- echo "- 请求ID: " . $rawData['api_request_id'] . "\n";
- // 显示前5个文本块示例
- echo "\n=== 前5个文本块示例 ===\n";
- for ($i = 0; $i < min(5, count($blocks)); $i++) {
- $block = $blocks[$i];
- echo "块" . ($i + 1) . ": " . substr($block['text'], 0, 80) . "...\n";
- if ($block['position']) {
- echo " 位置: (" . ($block['position'][0]['x'] ?? 'N/A') . ", " . ($block['position'][0]['y'] ?? 'N/A') . ")\n";
- }
- }
- // 保存完整响应到文件
- file_put_contents('/tmp/ocr_api_response_id3.json', json_encode($body, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
- echo "\n完整API响应已保存到: /tmp/ocr_api_response_id3.json\n";
- } catch (Exception $e) {
- echo "错误: " . $e->getMessage() . "\n";
- echo "请检查阿里云配置和API密钥\n";
- }
- echo "\n完成!\n";
|