| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- import requests
- import json
- import re
- import io
- import base64
- from PIL import Image
- def get_normalized_base64_image(image_url):
- try:
- response = requests.get(image_url, timeout=30)
- response.raise_for_status()
-
- with Image.open(io.BytesIO(response.content)) as img:
- if img.mode != 'RGB':
- img = img.convert('RGB')
-
- max_dim = 2000
- if max(img.width, img.height) > max_dim:
- ratio = max_dim / max(img.width, img.height)
- new_size = (int(img.width * ratio), int(img.height * ratio))
- img = img.resize(new_size, Image.Resampling.LANCZOS)
-
- buffer = io.BytesIO()
- img.save(buffer, format='JPEG', quality=85)
-
- b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
- return f'data:image/jpeg;base64,{b64_str}'
- except Exception as e:
- print(f'Error normalizing image: {e}')
- return image_url
- def call_doubao_image_api(image_url, prompt):
- api_key = 'a1800657-9212-4afe-9b7c-b49f015c54d3'
- api_url = 'https://ark.cn-beijing.volces.com/api/v3/responses'
-
- ai_payload_url = get_normalized_base64_image(image_url)
-
- payload = {
- 'model': 'doubao-seed-1-8-251228',
- 'stream': False,
- 'input': [
- {
- 'role': 'user',
- 'content': [
- {'type': 'input_image', 'image_url': ai_payload_url},
- {'type': 'input_text', 'text': prompt}
- ]
- }
- ]
- }
-
- headers = {
- 'Authorization': f'Bearer {api_key}',
- 'Content-Type': 'application/json'
- }
-
- try:
- response = requests.post(
- api_url,
- json=payload,
- headers=headers,
- timeout=120,
- verify=False,
- proxies={'http': None, 'https': None}
- )
-
- if response.status_code == 200:
- return response.json()
- else:
- print(f'API Error: {response.status_code}')
- return None
- except Exception as e:
- print(f'Exception: {e}')
- return None
- def extract_text_from_response(response):
- """从API响应中提取文本内容"""
- if not response:
- return ''
-
- # 尝试多种响应格式
- if 'output' in response:
- for item in response['output']:
- # 跳过reasoning类型
- if item.get('type') == 'reasoning':
- continue
-
- content = item.get('content')
- if isinstance(content, str):
- return content
- elif isinstance(content, list):
- text_parts = []
- for part in content:
- if isinstance(part, dict):
- if part.get('type') == 'text':
- text_parts.append(part.get('text', ''))
- elif part.get('type') == 'reasoning':
- continue
- elif isinstance(part, str):
- text_parts.append(part)
- return ''.join(text_parts)
-
- if 'choices' in response and len(response['choices']) > 0:
- message = response['choices'][0].get('message', {})
- return message.get('content', '')
-
- return str(response)
- def clean_text(text):
- """清理文本,去除多余内容"""
- if not text:
- return ''
-
- text = text.strip()
-
- # 去除代码块标记
- if text.startswith('```json'):
- text = text[7:]
- if text.startswith('```'):
- text = text[3:]
- if text.endswith('```'):
- text = text[:-3]
-
- text = text.strip()
-
- # 尝试解析JSON
- try:
- result = json.loads(text)
- if isinstance(result, dict):
- # 尝试多种可能的字段名
- for key in ['genealogy_traditional', 'traditional', 'text', 'content', 'result']:
- if key in result:
- text = str(result[key])
- break
- except json.JSONDecodeError:
- pass
-
- # 去除解释性文字
- unwanted_patterns = [
- '请分析', '要求', '提取', '转换', '繁体', '简体',
- 'genealogy', 'traditional', 'simplified',
- '原始', '原文', 'JSON', '格式', '输出',
- 'reasoning', 'thinking', '思考', '分析',
- '我现在需要', '首先', '然后', '接下来',
- '根据图片', '图片中', '识别', 'OCR'
- ]
-
- for pattern in unwanted_patterns:
- text = text.replace(pattern, '')
-
- # 去除JSON结构残留
- text = re.sub(r'["\']text["\']\s*[,:]\s*["\']', '', text)
- text = re.sub(r'["\']', '', text)
-
- # 提取纯中文
- chinese_text = re.findall(r'[\u4e00-\u9fff]+', text)
- if chinese_text:
- text = ''.join(chinese_text)
-
- return text.strip()
- # 测试不同的prompt
- prompts = [
- '提取图片中的繁体中文文字,直接输出,不要解释。',
- '识别图片中的竖排繁体中文,按阅读顺序输出原文。',
- 'OCR识别图片文字,只输出结果。',
- '读取图片中的族谱文字,直接返回。',
- '分析图片,提取所有中文文字,不要分析。'
- ]
- print('=== 测试不同Prompt效果 ===')
- for i, prompt in enumerate(prompts):
- print(f'\nPrompt {i+1}: {prompt}')
- print('-' * 50)
-
- # 这里需要实际的图片URL进行测试
- # 测试模式:打印prompt供参考
- print('(需要实际图片URL进行测试)')
- # 手动测试样例 - 根据用户提供的图片内容
- print('\n=== 预期提取结果(根据图片手动识别)===')
- print('因公图片原文(竖排繁体):')
- print('因公')
- print('字廷大授南州刺史上距陽公三十五世後漢延康元年二月初六日渡')
- print('婺州之金華縣長樂鄉 娶林氏生三子 塟藤就村見有石柱石人華表')
|