appId = $config['app_id'] ?? ''; $this->apiKey = $config['api_key'] ?? ''; $this->secretKey = $config['secret_key'] ?? ''; $this->aesKey = $config['aes_key'] ?? ''; return; } $this->appId = $config['app_id'] ?? ''; $this->apiKey = $config['api_key'] ?? ''; $this->secretKey = $config['secret_key'] ?? ''; $this->aesKey = $config['aes_key'] ?? ''; } public function recognize(string $imagePath, array $options = []): array { try { // Check if file exists if (!file_exists($imagePath)) { throw new \Exception("Image file not found: {$imagePath}"); } // Get cutType from options $cutType = $options['cutType'] ?? 'question'; $subject = $options['subject'] ?? 'Math'; // Get access token $accessToken = $this->getAccessToken(); // Read image file $imageData = base64_encode(file_get_contents($imagePath)); // Call Baidu OCR API $response = Http::post("https://aip.baidubce.com/rest/2.0/ocr/v1/edu_paper?access_token={$accessToken}", [ 'image' => $imageData, 'cut_type' => $cutType, 'subject' => $subject, ]); if ($response->failed()) { throw new \Exception('Baidu OCR API failed: ' . $response->body()); } $body = $response->json(); // Log the response Log::info('Baidu OCR Full Response', [ 'cutType' => $cutType, 'has_data' => isset($body['data']), 'request_id' => $body['request_id'] ?? null, 'error_code' => $body['error_code'] ?? null, 'error_msg' => $body['error_msg'] ?? null, 'body_keys' => array_keys($body ?? []) ]); // Log raw data if exists if (isset($body['data'])) { $dataPreview = is_string($body['data']) ? substr($body['data'], 0, 500) : json_encode($body['data']); Log::info('Baidu OCR Data Preview', ['data' => $dataPreview]); } // Parse Baidu OCR response $questions = []; if (isset($body['data'])) { $data = is_string($body['data']) ? json_decode($body['data'], true) : $body['data']; // Extract page_list -> subject_list OR answer_list if (isset($data['page_list']) && is_array($data['page_list'])) { foreach ($data['page_list'] as $page) { // Determine which list to use based on cutType $itemList = null; if ($cutType === 'answer' && isset($page['answer_list'])) { $itemList = $page['answer_list']; } elseif (isset($page['subject_list'])) { $itemList = $page['subject_list']; } if ($itemList && is_array($itemList)) { foreach ($itemList as $index => $item) { // Extract question/answer data $questionNumber = count($questions) + 1; // 默认使用索引 // 百度OCR的题号可能在不同的字段中 // 尝试从多个可能的字段获取题号 $idValue = null; if (isset($item['question_id'])) { $idValue = $item['question_id']; } elseif (isset($item['id'])) { $idValue = $item['id']; } elseif (isset($item['index'])) { $idValue = $item['index']; } // 只有当 idValue 是数字时才使用它作为题号 if ($idValue !== null && is_numeric($idValue)) { $questionNumber = (int) $idValue; } // Get text $text = $item['text'] ?? ''; if (empty($text) && isset($item['words'])) { // 百度OCR使用words字段 if (is_array($item['words'])) { $words = array_column($item['words'], 'word'); $text = implode('', $words); } else { $text = (string) $item['words']; } } // Calculate confidence $confidence = 0.0; if (isset($item['confidence'])) { $confidence = (float) $item['confidence']; } elseif (isset($item['words']) && is_array($item['words'])) { // 计算words的平均置信度 $totalProb = 0; $count = 0; foreach ($item['words'] as $word) { if (isset($word['confidence'])) { $totalProb += $word['confidence']; $count++; } } $confidence = $count > 0 ? ($totalProb / $count) / 100 : 0.0; } $questions[] = [ 'question_number' => $questionNumber, 'content' => $text, 'cut_type' => $cutType, 'confidence' => $confidence, 'raw_data' => $item ]; } } } } } return [ 'raw' => $body, 'questions' => $questions, 'cut_type' => $cutType ]; } catch (\Exception $e) { Log::error('Baidu OCR Error', [ 'message' => $e->getMessage(), 'trace' => $e->getTraceAsString(), ]); throw $e; } } /** * 获取百度OCR的访问令牌 */ protected function getAccessToken(): string { // 缓存访问令牌以避免频繁请求 $cacheKey = 'baidu_ocr_access_token'; $cachedToken = cache($cacheKey); if ($cachedToken) { return $cachedToken; } // 获取新的访问令牌 $response = Http::post('https://aip.baidubce.com/oauth/2.0/token', [ 'grant_type' => 'client_credentials', 'client_id' => $this->apiKey, 'client_secret' => $this->secretKey, ]); if ($response->failed()) { throw new \Exception('Failed to get Baidu OCR access token: ' . $response->body()); } $data = $response->json(); if (!isset($data['access_token'])) { throw new \Exception('Invalid response from Baidu OCR token API'); } $accessToken = $data['access_token']; // 缓存访问令牌(默认25分钟过期,提前5分钟刷新) $expiresIn = $data['expires_in'] ?? 3600; cache([$cacheKey => $accessToken], now()->addMinutes($expiresIn / 60 - 5)); return $accessToken; } }