linh
/
genealogy-app


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
							import requests
import json
import re
import io
import base64
from PIL import Image

def get_normalized_base64_image(image_url):
    try:
        response = requests.get(image_url, timeout=30)
        response.raise_for_status()
        
        with Image.open(io.BytesIO(response.content)) as img:
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            max_dim = 2000
            if max(img.width, img.height) > max_dim:
                ratio = max_dim / max(img.width, img.height)
                new_size = (int(img.width * ratio), int(img.height * ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
            
            buffer = io.BytesIO()
            img.save(buffer, format='JPEG', quality=85)
            
            b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
            return f'data:image/jpeg;base64,{b64_str}'
    except Exception as e:
        print(f'Error normalizing image: {e}')
        return image_url

def call_doubao_image_api(image_url, prompt):
    api_key = 'a1800657-9212-4afe-9b7c-b49f015c54d3'
    api_url = 'https://ark.cn-beijing.volces.com/api/v3/responses'
    
    ai_payload_url = get_normalized_base64_image(image_url)
    
    payload = {
        'model': 'doubao-seed-1-8-251228',
        'stream': False,
        'input': [
            {
                'role': 'user',
                'content': [
                    {'type': 'input_image', 'image_url': ai_payload_url},
                    {'type': 'input_text', 'text': prompt}
                ]
            }
        ]
    }
    
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }
    
    try:
        response = requests.post(
            api_url, 
            json=payload, 
            headers=headers, 
            timeout=120,
            verify=False,
            proxies={'http': None, 'https': None}
        )
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f'API Error: {response.status_code}')
            return None
    except Exception as e:
        print(f'Exception: {e}')
        return None

def extract_text_from_response(response):
    """从API响应中提取文本内容"""
    if not response:
        return ''
    
    # 尝试多种响应格式
    if 'output' in response:
        for item in response['output']:
            # 跳过reasoning类型
            if item.get('type') == 'reasoning':
                continue
            
            content = item.get('content')
            if isinstance(content, str):
                return content
            elif isinstance(content, list):
                text_parts = []
                for part in content:
                    if isinstance(part, dict):
                        if part.get('type') == 'text':
                            text_parts.append(part.get('text', ''))
                        elif part.get('type') == 'reasoning':
                            continue
                    elif isinstance(part, str):
                        text_parts.append(part)
                return ''.join(text_parts)
    
    if 'choices' in response and len(response['choices']) > 0:
        message = response['choices'][0].get('message', {})
        return message.get('content', '')
    
    return str(response)

def clean_text(text):
    """清理文本，去除多余内容"""
    if not text:
        return ''
    
    text = text.strip()
    
    # 去除代码块标记
    if text.startswith('```json'):
        text = text[7:]
    if text.startswith('```'):
        text = text[3:]
    if text.endswith('```'):
        text = text[:-3]
    
    text = text.strip()
    
    # 尝试解析JSON
    try:
        result = json.loads(text)
        if isinstance(result, dict):
            # 尝试多种可能的字段名
            for key in ['genealogy_traditional', 'traditional', 'text', 'content', 'result']:
                if key in result:
                    text = str(result[key])
                    break
    except json.JSONDecodeError:
        pass
    
    # 去除解释性文字
    unwanted_patterns = [
        '请分析', '要求', '提取', '转换', '繁体', '简体',
        'genealogy', 'traditional', 'simplified',
        '原始', '原文', 'JSON', '格式', '输出',
        'reasoning', 'thinking', '思考', '分析',
        '我现在需要', '首先', '然后', '接下来',
        '根据图片', '图片中', '识别', 'OCR'
    ]
    
    for pattern in unwanted_patterns:
        text = text.replace(pattern, '')
    
    # 去除JSON结构残留
    text = re.sub(r'["\']text["\']\s*[,:]\s*["\']', '', text)
    text = re.sub(r'["\']', '', text)
    
    # 提取纯中文
    chinese_text = re.findall(r'[\u4e00-\u9fff]+', text)
    if chinese_text:
        text = ''.join(chinese_text)
    
    return text.strip()

# 测试不同的prompt
prompts = [
    '提取图片中的繁体中文文字，直接输出，不要解释。',
    '识别图片中的竖排繁体中文，按阅读顺序输出原文。',
    'OCR识别图片文字，只输出结果。',
    '读取图片中的族谱文字，直接返回。',
    '分析图片，提取所有中文文字，不要分析。'
]

print('=== 测试不同Prompt效果 ===')
for i, prompt in enumerate(prompts):
    print(f'\nPrompt {i+1}: {prompt}')
    print('-' * 50)
    
    # 这里需要实际的图片URL进行测试
    # 测试模式：打印prompt供参考
    print('(需要实际图片URL进行测试)')

# 手动测试样例 - 根据用户提供的图片内容
print('\n=== 预期提取结果（根据图片手动识别）===')
print('因公图片原文（竖排繁体）：')
print('因公')
print('字廷大授南州刺史上距陽公三十五世後漢延康元年二月初六日渡')
print('婺州之金華縣長樂鄉 娶林氏生三子 塟藤就村見有石柱石人華表')