import requests import json import re import io import base64 from PIL import Image def get_normalized_base64_image(image_url): try: response = requests.get(image_url, timeout=30) response.raise_for_status() with Image.open(io.BytesIO(response.content)) as img: if img.mode != 'RGB': img = img.convert('RGB') max_dim = 2000 if max(img.width, img.height) > max_dim: ratio = max_dim / max(img.width, img.height) new_size = (int(img.width * ratio), int(img.height * ratio)) img = img.resize(new_size, Image.Resampling.LANCZOS) buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=85) b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8') return f'data:image/jpeg;base64,{b64_str}' except Exception as e: print(f'Error normalizing image: {e}') return image_url def call_doubao_image_api(image_url, prompt): api_key = 'a1800657-9212-4afe-9b7c-b49f015c54d3' api_url = 'https://ark.cn-beijing.volces.com/api/v3/responses' ai_payload_url = get_normalized_base64_image(image_url) payload = { 'model': 'doubao-seed-1-8-251228', 'stream': False, 'input': [ { 'role': 'user', 'content': [ {'type': 'input_image', 'image_url': ai_payload_url}, {'type': 'input_text', 'text': prompt} ] } ] } headers = { 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json' } try: response = requests.post( api_url, json=payload, headers=headers, timeout=120, verify=False, proxies={'http': None, 'https': None} ) if response.status_code == 200: return response.json() else: print(f'API Error: {response.status_code}') return None except Exception as e: print(f'Exception: {e}') return None def extract_text_from_response(response): """从API响应中提取文本内容""" if not response: return '' # 尝试多种响应格式 if 'output' in response: for item in response['output']: # 跳过reasoning类型 if item.get('type') == 'reasoning': continue content = item.get('content') if isinstance(content, str): return content elif isinstance(content, list): text_parts = [] for part in content: if isinstance(part, dict): if part.get('type') == 'text': text_parts.append(part.get('text', '')) elif part.get('type') == 'reasoning': continue elif isinstance(part, str): text_parts.append(part) return ''.join(text_parts) if 'choices' in response and len(response['choices']) > 0: message = response['choices'][0].get('message', {}) return message.get('content', '') return str(response) def clean_text(text): """清理文本,去除多余内容""" if not text: return '' text = text.strip() # 去除代码块标记 if text.startswith('```json'): text = text[7:] if text.startswith('```'): text = text[3:] if text.endswith('```'): text = text[:-3] text = text.strip() # 尝试解析JSON try: result = json.loads(text) if isinstance(result, dict): # 尝试多种可能的字段名 for key in ['genealogy_traditional', 'traditional', 'text', 'content', 'result']: if key in result: text = str(result[key]) break except json.JSONDecodeError: pass # 去除解释性文字 unwanted_patterns = [ '请分析', '要求', '提取', '转换', '繁体', '简体', 'genealogy', 'traditional', 'simplified', '原始', '原文', 'JSON', '格式', '输出', 'reasoning', 'thinking', '思考', '分析', '我现在需要', '首先', '然后', '接下来', '根据图片', '图片中', '识别', 'OCR' ] for pattern in unwanted_patterns: text = text.replace(pattern, '') # 去除JSON结构残留 text = re.sub(r'["\']text["\']\s*[,:]\s*["\']', '', text) text = re.sub(r'["\']', '', text) # 提取纯中文 chinese_text = re.findall(r'[\u4e00-\u9fff]+', text) if chinese_text: text = ''.join(chinese_text) return text.strip() # 测试不同的prompt prompts = [ '提取图片中的繁体中文文字,直接输出,不要解释。', '识别图片中的竖排繁体中文,按阅读顺序输出原文。', 'OCR识别图片文字,只输出结果。', '读取图片中的族谱文字,直接返回。', '分析图片,提取所有中文文字,不要分析。' ] print('=== 测试不同Prompt效果 ===') for i, prompt in enumerate(prompts): print(f'\nPrompt {i+1}: {prompt}') print('-' * 50) # 这里需要实际的图片URL进行测试 # 测试模式:打印prompt供参考 print('(需要实际图片URL进行测试)') # 手动测试样例 - 根据用户提供的图片内容 print('\n=== 预期提取结果(根据图片手动识别)===') print('因公图片原文(竖排繁体):') print('因公') print('字廷大授南州刺史上距陽公三十五世後漢延康元年二月初六日渡') print('婺州之金華縣長樂鄉 娶林氏生三子 塟藤就村見有石柱石人華表')