瀏覽代碼

commit 提取族谱原文

林海 2 天之前
父節點
當前提交
d524dda987
共有 5 個文件被更改,包括 1943 次插入0 次删除
  1. 1412 0
      app.py
  2. 1 0
      requirements.txt
  3. 342 0
      templates/batch_genealogy.html
  4. 3 0
      templates/layout.html
  5. 185 0
      test_extract.py

+ 1412 - 0
app.py

@@ -151,6 +151,16 @@ def manual_simplify(text):
         result += mapping.get(char, char)
     return result
 
+def convert_to_simplified(text):
+    """繁体转简体,优先使用 zhconv 库,失败则降级到 manual_simplify"""
+    if not text:
+        return text
+    try:
+        import zhconv
+        return zhconv.convert(text, 'zh-hans')
+    except Exception:
+        return manual_simplify(text)
+
 def _build_reverse_simplify_map():
     """
     Build a reverse map from simplified char -> list of traditional chars
@@ -1114,6 +1124,12 @@ def members():
     
     return render_template('members.html', members=members, search_name=search_name, page=page, total_pages=total_pages, total=total)
 
+@app.route('/manager/batch_genealogy')
+def batch_genealogy():
+    if 'user_id' not in session:
+        return redirect(url_for('login'))
+    return render_template('batch_genealogy.html')
+
 @app.route('/manager/suspected_errors')
 def suspected_errors():
     if 'user_id' not in session:
@@ -1597,6 +1613,319 @@ def get_members():
     finally:
         conn.close()
 
+def call_doubao_api(prompt, image_url=None):
+    """调用豆包API处理文本"""
+    api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
+    api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
+    
+    payload = {
+        "model": "doubao-seed-1-8-251228",
+        "stream": False,
+        "input": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": prompt}
+                ]
+            }
+        ]
+    }
+    
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = requests.post(
+            api_url, 
+            json=payload, 
+            headers=headers, 
+            timeout=120,
+            verify=False,
+            proxies={"http": None, "https": None}
+        )
+        
+        if response.status_code == 200:
+            result = response.json()
+            print(f"[AI API] Raw response: {result}")
+            
+            # 解析响应 - 尝试多种格式
+            if 'output' in result:
+                for item in result['output']:
+                    if item.get('type') == 'message':
+                        content = item.get('content')
+                        if isinstance(content, str):
+                            return content
+                        elif isinstance(content, list):
+                            for part in content:
+                                if isinstance(part, dict) and part.get('type') == 'text':
+                                    return part.get('text', '')
+                        elif isinstance(content, dict) and 'text' in content:
+                            return content.get('text', '')
+            # 尝试其他响应格式
+            if 'choices' in result and len(result['choices']) > 0:
+                message = result['choices'][0].get('message', {})
+                return message.get('content', '')
+            # 尝试直接获取文本内容
+            if 'text' in result:
+                return result['text']
+            # 尝试获取响应中的message
+            if 'message' in result:
+                msg = result['message']
+                if isinstance(msg, str):
+                    return msg
+                elif isinstance(msg, dict) and 'content' in msg:
+                    return msg['content']
+            # 返回字符串形式
+            return str(result)
+        else:
+            print(f"[AI API] Error: {response.status_code} - {response.text}")
+            return None
+    except Exception as e:
+        print(f"[AI API] Exception: {e}")
+        return None
+
+def parse_ai_response(ai_response):
+    """解析AI响应,提取族谱原文"""
+    if not ai_response:
+        return None, None
+    
+    # 尝试从响应中提取JSON
+    try:
+        # 移除可能的markdown代码块标记
+        text = ai_response.strip()
+        if text.startswith('```json'):
+            text = text[7:]
+        if text.endswith('```'):
+            text = text[:-3]
+        text = text.strip()
+        
+        # 尝试解析JSON
+        result = json.loads(text)
+        traditional = result.get('genealogy_traditional', '')
+        simplified = result.get('genealogy_simplified', '')
+        
+        if traditional or simplified:
+            return traditional, simplified
+    except json.JSONDecodeError:
+        print(f"[AI Parse] JSON decode error: {ai_response[:200]}")
+    
+    # 如果JSON解析失败,尝试直接提取文本
+    # 尝试匹配模式
+    import re
+    traditional_match = re.search(r'genealogy_traditional["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
+    simplified_match = re.search(r'genealogy_simplified["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
+    
+    traditional = traditional_match.group(1) if traditional_match else ''
+    simplified = simplified_match.group(1) if simplified_match else ''
+    
+    return traditional, simplified
+
+@app.route('/manager/api/members/empty_genealogy', methods=['GET'])
+def get_members_empty_genealogy():
+    """获取族谱原文为空的成员列表"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    page = int(request.args.get('page', 1))
+    per_page = int(request.args.get('per_page', 20))
+    offset = (page - 1) * per_page
+    
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            # Count total
+            cursor.execute("""
+                SELECT COUNT(*) as total 
+                FROM family_member_info 
+                WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
+                  AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
+            """)
+            total_result = cursor.fetchone()
+            total = total_result['total'] if total_result else 0
+            
+            # Get members
+            cursor.execute("""
+                SELECT id, name, simplified_name, name_word_generation, sex, occupation, notes, birth_place
+                FROM family_member_info 
+                WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
+                  AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
+                LIMIT %s OFFSET %s
+            """, (per_page, offset))
+            members = cursor.fetchall()
+            
+            # 关联查询父亲信息
+            member_list = []
+            for member in members:
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name, p.name_word_generation
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 1
+                    LIMIT 1
+                """, (member['id'],))
+                father = cursor.fetchone()
+                
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 2
+                    LIMIT 1
+                """, (member['id'],))
+                mother = cursor.fetchone()
+                
+                member_list.append({
+                    'id': member['id'],
+                    'name': member['name'],
+                    'simplified_name': member['simplified_name'],
+                    'name_word_generation': member['name_word_generation'],
+                    'sex': member['sex'],
+                    'occupation': member['occupation'],
+                    'notes': member['notes'],
+                    'birth_place': member['birth_place'],
+                    'father_name': father['name'] if father else None,
+                    'father_simplified_name': father['simplified_name'] if father else None,
+                    'father_generation': father['name_word_generation'] if father else None,
+                    'mother_name': mother['name'] if mother else None,
+                    'mother_simplified_name': mother['simplified_name'] if mother else None
+                })
+            
+            return jsonify({"success": True, "members": member_list, "total": total})
+    except Exception as e:
+        return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
+    finally:
+        conn.close()
+
+@app.route('/manager/api/members/batch_process_genealogy', methods=['POST'])
+def batch_process_genealogy():
+    """批量处理成员族谱原文"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    data = request.get_json()
+    member_ids = data.get('member_ids', [])
+    
+    if not member_ids or len(member_ids) > 10:
+        return jsonify({"success": False, "message": "请选择1-10个成员进行处理"}), 400
+    
+    conn = get_db_connection()
+    results = []
+    
+    try:
+        for member_id in member_ids:
+            with conn.cursor() as cursor:
+                cursor.execute("""
+                    SELECT id, name, simplified_name, name_word_generation, 
+                           birth_place, occupation, notes, sex
+                    FROM family_member_info WHERE id = %s
+                """, (member_id,))
+                member = cursor.fetchone()
+                
+                # 获取父亲信息
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 1
+                    LIMIT 1
+                """, (member_id,))
+                father = cursor.fetchone()
+                
+                # 获取母亲信息
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 2
+                    LIMIT 1
+                """, (member_id,))
+                mother = cursor.fetchone()
+                
+                member['father_name'] = father['name'] if father else None
+                member['father_simplified_name'] = father['simplified_name'] if father else None
+                member['mother_name'] = mother['name'] if mother else None
+                member['mother_simplified_name'] = mother['simplified_name'] if mother else None
+            
+            if not member:
+                results.append({"member_id": member_id, "success": False, "message": "成员不存在"})
+                continue
+            
+            # 构建AI提示词
+            member_info = f"""
+            姓名(繁体):{member['name']}
+            姓名(简体):{member['simplified_name'] or '未知'}
+            世系世代:{member['name_word_generation'] or '未知'}
+            父亲姓名:{member['father_name'] or '未知'}
+            母亲姓名:{member['mother_name'] or '未知'}
+            出生地:{member['birth_place'] or '未知'}
+            职业:{member['occupation'] or '未知'}
+            备注:{member['notes'] or '无'}
+            """
+            
+            prompt = f"""
+            请根据以下人员信息,模拟生成该人员的族谱原文:
+            
+            {member_info}
+            
+            请输出两个字段:
+            1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
+            2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
+            
+            请严格按照JSON格式输出,不要包含任何额外解释:
+            {{
+                "genealogy_traditional": "繁体族谱原文内容",
+                "genealogy_simplified": "简体族谱原文内容"
+            }}
+            """
+            
+            ai_response = call_doubao_api(prompt)
+            print(f"[AI Response] Member {member_id}: {ai_response}")
+            
+            if ai_response:
+                # 使用新的解析函数
+                traditional, simplified = parse_ai_response(ai_response)
+                
+                if traditional or simplified:
+                    with conn.cursor() as cursor:
+                        cursor.execute("""
+                            UPDATE family_member_info 
+                            SET genealogy_original_traditional = %s, 
+                                genealogy_original_simplified = %s
+                            WHERE id = %s
+                        """, (traditional, simplified, member_id))
+                    conn.commit()
+                    
+                    results.append({
+                        "member_id": member_id,
+                        "name": member['name'],
+                        "success": True,
+                        "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
+                        "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
+                    })
+                else:
+                    results.append({
+                        "member_id": member_id,
+                        "name": member['name'],
+                        "success": False,
+                        "message": "AI未返回有效数据"
+                    })
+            else:
+                results.append({
+                    "member_id": member_id,
+                    "name": member['name'],
+                    "success": False,
+                    "message": "AI调用失败"
+                })
+        
+        return jsonify({"success": True, "results": results})
+    except Exception as e:
+        print(f"[Batch Process] Exception: {e}")
+        return jsonify({"success": False, "message": f"批量处理失败: {e}"}), 500
+    finally:
+        conn.close()
+
 @app.route('/manager/api/member/<int:member_id>')
 def get_member(member_id):
     if 'user_id' not in session:
@@ -3050,5 +3379,1088 @@ def delete_settlement(id):
     finally:
         conn.close()
 
+# 异步批量处理族谱原文功能
+import uuid
+
+def init_batch_task_table():
+    """初始化批量任务表(如果不存在)"""
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS batch_genealogy_task (
+                    id INT AUTO_INCREMENT PRIMARY KEY,
+                    task_id VARCHAR(64) UNIQUE NOT NULL,
+                    user_id INT NOT NULL,
+                    status VARCHAR(20) DEFAULT 'pending',
+                    total_count INT DEFAULT 0,
+                    completed_count INT DEFAULT 0,
+                    failed_count INT DEFAULT 0,
+                    last_processed_id INT DEFAULT 0,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+                    results TEXT
+                );
+            """)
+            # 检查是否存在last_processed_id字段,如果不存在则添加
+            cursor.execute("SHOW COLUMNS FROM batch_genealogy_task LIKE 'last_processed_id'")
+            if not cursor.fetchone():
+                cursor.execute("ALTER TABLE batch_genealogy_task ADD COLUMN last_processed_id INT DEFAULT 0")
+        conn.commit()
+        print("[Database] batch_genealogy_task table initialized")
+    except Exception as e:
+        print(f"[Database] Error creating batch_genealogy_task table: {e}")
+    finally:
+        conn.close()
+
+# 初始化表
+init_batch_task_table()
+
+def async_process_genealogy_task(task_id, member_ids, user_id):
+    """异步处理族谱原文任务"""
+    results = []
+    
+    conn = get_db_connection()
+    try:
+        # 更新任务状态为处理中
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                UPDATE batch_genealogy_task 
+                SET status = 'processing', total_count = %s 
+                WHERE task_id = %s
+            """, (len(member_ids), task_id))
+        conn.commit()
+        
+        completed_count = 0
+        failed_count = 0
+        
+        for member_id in member_ids:
+            try:
+                with conn.cursor() as cursor:
+                    cursor.execute("""
+                        SELECT id, name, simplified_name, name_word_generation, 
+                               birth_place, occupation, notes, sex
+                        FROM family_member_info WHERE id = %s
+                    """, (member_id,))
+                    member = cursor.fetchone()
+                    
+                    # 获取父亲信息
+                    cursor.execute("""
+                        SELECT p.name, p.simplified_name
+                        FROM family_relation_info r
+                        JOIN family_member_info p ON r.parent_mid = p.id
+                        WHERE r.child_mid = %s AND r.relation_type = 1
+                        LIMIT 1
+                    """, (member_id,))
+                    father = cursor.fetchone()
+                    
+                    # 获取母亲信息
+                    cursor.execute("""
+                        SELECT p.name, p.simplified_name
+                        FROM family_relation_info r
+                        JOIN family_member_info p ON r.parent_mid = p.id
+                        WHERE r.child_mid = %s AND r.relation_type = 2
+                        LIMIT 1
+                    """, (member_id,))
+                    mother = cursor.fetchone()
+                    
+                    member['father_name'] = father['name'] if father else None
+                    member['father_simplified_name'] = father['simplified_name'] if father else None
+                    member['mother_name'] = mother['name'] if mother else None
+                    member['mother_simplified_name'] = mother['simplified_name'] if mother else None
+            
+            except Exception as e:
+                print(f"[Async Process] Error getting member {member_id}: {e}")
+                results.append({
+                    "member_id": member_id,
+                    "name": "未知",
+                    "success": False,
+                    "message": f"获取成员信息失败: {e}"
+                })
+                failed_count += 1
+                continue
+            
+            if not member:
+                results.append({
+                    "member_id": member_id,
+                    "name": "未知",
+                    "success": False,
+                    "message": "成员不存在"
+                })
+                failed_count += 1
+                continue
+            
+            # 构建AI提示词
+            member_info = f"""
+            姓名(繁体):{member['name']}
+            姓名(简体):{member['simplified_name'] or '未知'}
+            世系世代:{member['name_word_generation'] or '未知'}
+            父亲姓名:{member['father_name'] or '未知'}
+            母亲姓名:{member['mother_name'] or '未知'}
+            出生地:{member['birth_place'] or '未知'}
+            职业:{member['occupation'] or '未知'}
+            备注:{member['notes'] or '无'}
+            """
+            
+            prompt = f"""
+            请根据以下人员信息,模拟生成该人员的族谱原文:
+            
+            {member_info}
+            
+            请输出两个字段:
+            1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
+            2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
+            
+            请严格按照JSON格式输出,不要包含任何额外解释:
+            {{
+                "genealogy_traditional": "繁体族谱原文内容",
+                "genealogy_simplified": "简体族谱原文内容"
+            }}
+            """
+            
+            ai_response = call_doubao_api(prompt)
+            
+            if ai_response:
+                traditional, simplified = parse_ai_response(ai_response)
+                
+                if traditional or simplified:
+                    try:
+                        with conn.cursor() as cursor:
+                            cursor.execute("""
+                                UPDATE family_member_info 
+                                SET genealogy_original_traditional = %s, 
+                                    genealogy_original_simplified = %s
+                                WHERE id = %s
+                            """, (traditional, simplified, member_id))
+                        conn.commit()
+                        
+                        results.append({
+                            "member_id": member_id,
+                            "name": member['name'],
+                            "success": True,
+                            "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
+                            "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
+                        })
+                        completed_count += 1
+                    except Exception as e:
+                        print(f"[Async Process] Error updating member {member_id}: {e}")
+                        results.append({
+                            "member_id": member_id,
+                            "name": member['name'],
+                            "success": False,
+                            "message": f"保存失败: {e}"
+                        })
+                        failed_count += 1
+                else:
+                    results.append({
+                        "member_id": member_id,
+                        "name": member['name'],
+                        "success": False,
+                        "message": "AI未返回有效数据"
+                    })
+                    failed_count += 1
+            else:
+                results.append({
+                    "member_id": member_id,
+                    "name": member['name'],
+                    "success": False,
+                    "message": "AI调用失败"
+                })
+                failed_count += 1
+        
+        # 更新任务状态
+        status = 'completed' if failed_count == 0 else 'completed_with_errors'
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                UPDATE batch_genealogy_task 
+                SET status = %s, completed_count = %s, failed_count = %s, results = %s
+                WHERE task_id = %s
+            """, (status, completed_count, failed_count, json.dumps(results, ensure_ascii=False), task_id))
+        conn.commit()
+        
+        print(f"[Async Process] Task {task_id} completed: {completed_count} success, {failed_count} failed")
+        
+    except Exception as e:
+        print(f"[Async Process] Error in task {task_id}: {e}")
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                UPDATE batch_genealogy_task 
+                SET status = 'failed', results = %s
+                WHERE task_id = %s
+            """, (json.dumps({"error": str(e)}, ensure_ascii=False), task_id))
+        conn.commit()
+    finally:
+        conn.close()
+
+@app.route('/manager/api/members/batch_process_genealogy_async', methods=['POST'])
+def batch_process_genealogy_async():
+    """异步批量处理族谱原文"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    data = request.get_json()
+    member_ids = data.get('member_ids', [])
+    
+    if not member_ids:
+        return jsonify({"success": False, "message": "请选择成员进行处理"}), 400
+    
+    # 生成任务ID
+    task_id = str(uuid.uuid4())
+    
+    # 保存任务到数据库
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count)
+                VALUES (%s, %s, 'pending', %s)
+            """, (task_id, session['user_id'], len(member_ids)))
+        conn.commit()
+    finally:
+        conn.close()
+    
+    # 启动异步线程处理
+    threading.Thread(target=async_process_genealogy_task, args=(task_id, member_ids, session['user_id'])).start()
+    
+    return jsonify({
+        "success": True,
+        "task_id": task_id,
+        "message": "任务已创建,正在后台处理中"
+    })
+
+@app.route('/manager/api/members/batch_task_status/<task_id>', methods=['GET'])
+def get_batch_task_status(task_id):
+    """获取批量任务状态"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                SELECT task_id, status, total_count, completed_count, failed_count, 
+                       created_at, updated_at, results
+                FROM batch_genealogy_task 
+                WHERE task_id = %s AND user_id = %s
+            """, (task_id, session['user_id']))
+            task = cursor.fetchone()
+            
+            if task:
+                result = {
+                    "task_id": task['task_id'],
+                    "status": task['status'],
+                    "total_count": task['total_count'],
+                    "completed_count": task['completed_count'],
+                    "failed_count": task['failed_count'],
+                    "created_at": task['created_at'].isoformat() if task['created_at'] else None,
+                    "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
+                }
+                if task['results']:
+                    try:
+                        result['results'] = json.loads(task['results'])
+                    except:
+                        result['results'] = task['results']
+                return jsonify({"success": True, "task": result})
+            else:
+                return jsonify({"success": False, "message": "任务不存在或无权访问"}), 404
+    finally:
+        conn.close()
+
+@app.route('/manager/api/members/batch_tasks', methods=['GET'])
+def get_batch_tasks():
+    """获取用户的批量任务列表"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                SELECT task_id, status, total_count, completed_count, failed_count, 
+                       last_processed_id, created_at, updated_at
+                FROM batch_genealogy_task 
+                WHERE user_id = %s
+                ORDER BY created_at DESC
+                LIMIT 20
+            """, (session['user_id'],))
+            tasks = cursor.fetchall()
+            
+            result = []
+            for task in tasks:
+                result.append({
+                    "task_id": task['task_id'],
+                    "status": task['status'],
+                    "total_count": task['total_count'],
+                    "completed_count": task['completed_count'],
+                    "failed_count": task['failed_count'],
+                    "last_processed_id": task['last_processed_id'],
+                    "created_at": task['created_at'].isoformat() if task['created_at'] else None,
+                    "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
+                })
+            
+            return jsonify({"success": True, "tasks": result})
+    finally:
+        conn.close()
+
+def call_doubao_image_api(image_url, prompt):
+    """调用豆包API处理图片,提取文本内容"""
+    api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
+    api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
+    
+    ai_payload_url = get_normalized_base64_image(image_url)
+    
+    payload = {
+        "model": "doubao-seed-1-8-251228",
+        "stream": False,
+        "input": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_image", "image_url": ai_payload_url},
+                    {"type": "input_text", "text": prompt}
+                ]
+            }
+        ]
+    }
+    
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    
+    try:
+        response = requests.post(
+            api_url, 
+            json=payload, 
+            headers=headers, 
+            timeout=120,
+            verify=False,
+            proxies={"http": None, "https": None}
+        )
+        
+        if response.status_code == 200:
+            return response.json()
+        else:
+            print(f"[Image AI API] Error: {response.status_code} - {response.text}")
+            return None
+    except Exception as e:
+        print(f"[Image AI API] Exception: {e}")
+        return None
+
+def extract_pure_text(response):
+    """从API响应中提取纯文本内容,优先返回 message 类型的最终答案"""
+    if not response:
+        return ''
+
+    # 优先从 output 列表中提取 message 类型(最终答案)
+    if 'output' in response:
+        # 第一遍:只找 message 类型
+        for item in response['output']:
+            if item.get('type') == 'message':
+                content = item.get('content')
+                if isinstance(content, str):
+                    return content
+                elif isinstance(content, list):
+                    text_parts = []
+                    for part in content:
+                        if isinstance(part, dict) and part.get('type') == 'text':
+                            text_parts.append(part.get('text', ''))
+                        elif isinstance(part, str):
+                            text_parts.append(part)
+                    result = ''.join(text_parts)
+                    if result:
+                        return result
+
+        # 第二遍:没有 message 时才使用 reasoning 内容作为兜底
+        for item in response['output']:
+            if item.get('type') == 'reasoning':
+                content = item.get('content')
+                all_text = ''
+                summary = item.get('summary', [])
+                for part in summary:
+                    if isinstance(part, dict):
+                        if part.get('type') in ('summary_text', 'text'):
+                            all_text += part.get('text', '')
+                    elif isinstance(part, str):
+                        all_text += part
+                if isinstance(content, str):
+                    all_text += content
+                elif isinstance(content, list):
+                    for part in content:
+                        if isinstance(part, dict) and part.get('type') == 'text':
+                            all_text += part.get('text', '')
+                        elif isinstance(part, str):
+                            all_text += part
+                if all_text:
+                    return all_text
+
+        # 第三遍:content 直接是字符串的情况
+        for item in response['output']:
+            content = item.get('content')
+            if isinstance(content, str) and content:
+                return content
+
+    # 尝试从 choices 中提取(兼容 OpenAI 格式)
+    if 'choices' in response and len(response['choices']) > 0:
+        message = response['choices'][0].get('message', {})
+        return message.get('content', '')
+
+    return str(response)
+
+def build_genealogy_prompt(member_name):
+    """
+    构建用于竖排繁体家谱图片 OCR 提取的 Prompt。
+    家谱图片为竖排版式(从上到下、从右到左),每位人物记录通常包含:
+    辈字+名讳、字号、行次、父子关系、配偶(配某氏)、生卒年、葬地、子嗣等。
+    """
+    return f"""这是一张竖排繁体中文家谱图片。图片文字采用竖排格式,从上到下、从右到左逐列阅读。
+
+每位人物的记录通常包含以下内容(不一定全有):
+- 辈字加名讳(如:公諱光元)
+- 字号(如:字維亮)
+- 行次(如:行仁一)
+- 与父亲的关系(如:某某公長子、次子、三子)
+- 配偶(如:配李氏、娶王氏)
+- 生卒年月(如:生於某年某月、卒於某年某月)
+- 葬地(如:葬祖山某向、塟於某地)
+- 子嗣(如:子二:長某某、次某某)
+
+任务:找到人物「{member_name}」在图片中的完整记录,将其繁体原文逐字准确复制输出。
+
+要求:
+1. 只输出「{member_name}」这一个人物的记录,不包含其他人的内容
+2. 保持繁体字原貌,不要转换为简体
+3. 保留原文中的标点符号
+4. 不要添加任何解释、标注、序号或额外说明
+5. 直接输出原文内容"""
+
+
+def _extract_from_thinking_output(text):
+    """
+    从推理模型的思维链输出中提取最终答案。
+
+    推理模型(如 doubao-seed 系列)会在 message 内容里写出完整思考过程:
+    反复写候选答案、说"不对"再修正,最后以"现在确认/所以输出这个内容"等结论收尾。
+    本函数的策略:
+      1. 找最后一个"答案引导词 + 冒号"之后的文本(如"准确的原文是:"、"准确复制:")
+      2. 若无引导词,则取"现在确认"/"所以输出这个内容"之前的最后一段文本
+      3. 以上均失败则原文返回
+    """
+    # 思维链特征词
+    THINKING_SIGNALS = ['不对,', '现在确认', '准确复制', '准确的原文是', '正确的输出是', '所以输出这个内容']
+    if not any(sig in text for sig in THINKING_SIGNALS):
+        return text  # 非思维链输出,原样返回
+
+    print(f"[CleanText] Detected thinking-model output, extracting final answer")
+
+    # ---- 策略1:找最后一个答案引导词 ----
+    ANSWER_INTRO_PATTERNS = [
+        r'准确的原文是[::]\s*',
+        r'正确的输出是[::]\s*',
+        r'现在准确复制[::]\s*',
+        r'准确复制[::]\s*',
+        r'应该是[::]\s*',
+        r'因此输出[::]\s*',
+        r'所以正确.*?是[::]\s*',
+        r'原文是[::]\s*',
+        r'输出[::]\s*',
+    ]
+    last_end = -1
+    for pattern in ANSWER_INTRO_PATTERNS:
+        for m in re.finditer(pattern, text):
+            if m.end() > last_end:
+                last_end = m.end()
+
+    if last_end >= 0:
+        remaining = text[last_end:]
+        # 取到第一个"结束标志"前
+        END_MARKERS = ['不对', '现在确认', '但是', '然而', '\n\n']
+        end_pos = len(remaining)
+        for marker in END_MARKERS:
+            idx = remaining.find(marker)
+            if 0 < idx < end_pos:
+                end_pos = idx
+        candidate = remaining[:end_pos].strip()
+        if len(candidate) >= 5:
+            print(f"[CleanText] Extracted via answer-intro pattern: '{candidate[:80]}'")
+            return candidate
+
+    # ---- 策略2:取"现在确认"之前的最后一段 ----
+    for end_phrase in ['现在确认', '所以输出这个内容', '这就是.*?的完整记录']:
+        m = re.search(end_phrase, text)
+        if m:
+            before = text[:m.start()].rstrip()
+            # 找最后一个换行符,取之后的内容
+            last_nl = before.rfind('\n')
+            candidate = (before[last_nl + 1:] if last_nl >= 0 else before[-400:]).strip()
+            if len(candidate) >= 5:
+                print(f"[CleanText] Extracted before confirmation phrase: '{candidate[:80]}'")
+                return candidate
+
+    return text  # 均失败则原样返回
+
+
+def _apply_char_whitelist(text):
+    """只保留汉字(含扩展A区)和常见中文标点"""
+    return re.sub(
+        r'[^\u4e00-\u9fff\u3400-\u4dbf\u3000-\u303f\uff00-\uffef,。;:、()【】「」『』〔〕·~—…《》]',
+        '', text
+    ).strip()
+
+
+def clean_genealogy_text(text):
+    """
+    清理从 AI 响应中提取的族谱文本。
+    - 处理 Markdown/JSON 格式噪声
+    - 自动识别思维链推理模型输出,提取最终答案段落
+    - 保留中文字符和中文标点,去除英文说明行
+    """
+    if not text:
+        return ''
+
+    text = text.strip()
+
+    # 去除代码块标记
+    text = re.sub(r'^```[a-z]*\n?', '', text)
+    text = re.sub(r'\n?```$', '', text)
+    text = text.strip()
+
+    # 尝试解析 JSON,从已知字段提取
+    try:
+        result = json.loads(text)
+        if isinstance(result, dict):
+            for key in ['text', 'content', 'result', 'traditional', 'genealogy_traditional']:
+                if key in result:
+                    text = str(result[key])
+                    break
+    except (json.JSONDecodeError, ValueError):
+        pass
+
+    # 针对思维链推理模型输出,提取最终答案(必须在行过滤之前,因为推理文本中含有必要的换行结构)
+    text = _extract_from_thinking_output(text)
+
+    # 按行过滤:去除纯英文/数字行、空行及明显解释性前缀行
+    lines = text.splitlines()
+    kept_lines = []
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        non_ascii = sum(1 for c in line if ord(c) > 127)
+        if non_ascii == 0:
+            continue
+        if re.match(r'^(注[::]|说明[::]|Note[::]|备注[::])', line):
+            continue
+        kept_lines.append(line)
+
+    text = ''.join(kept_lines)
+
+    # 字符白名单:只保留汉字和中文标点
+    text = _apply_char_whitelist(text)
+
+    return text
+
+def async_process_all_empty_genealogy(task_id, user_id):
+    """
+    异步批量处理族谱原文为空的成员,支持断点续跑。
+    
+    连接管理原则:DB 连接仅在快速读写期间持有,AI 调用(最长120s)期间
+    不占用任何 DB 连接,避免影响其他用户的正常操作。
+    """
+    import time
+
+    # ── 1. 读取断点位置,立即释放连接 ──────────────────────────────────────
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute(
+                "SELECT last_processed_id FROM batch_genealogy_task WHERE task_id = %s",
+                (task_id,)
+            )
+            task = cursor.fetchone()
+            last_processed_id = task['last_processed_id'] if task else 0
+    finally:
+        conn.close()
+
+    completed_count = 0
+    failed_count = 0
+    results = []
+
+    while True:
+        # ── 2. 取下一条待处理成员(短暂占用连接后立即释放)────────────────
+        conn = get_db_connection()
+        try:
+            with conn.cursor() as cursor:
+                cursor.execute("""
+                    SELECT m.id, m.name, m.name_word_generation, m.source_record_id,
+                           r.oss_url AS image_url, r.ai_content AS record_ai_content
+                    FROM family_member_info m
+                    LEFT JOIN genealogy_records r ON m.source_record_id = r.id
+                    WHERE (m.genealogy_original_traditional IS NULL
+                           OR m.genealogy_original_traditional = ''
+                           OR m.genealogy_original_traditional = 'None')
+                      AND (m.genealogy_original_simplified IS NULL
+                           OR m.genealogy_original_simplified = ''
+                           OR m.genealogy_original_simplified = 'None')
+                      AND m.id > %s
+                    ORDER BY m.id ASC
+                    LIMIT 1
+                """, (last_processed_id,))
+                member = cursor.fetchone()
+        finally:
+            conn.close()
+
+        if not member:
+            break
+
+        member_id        = member['id']
+        member_name      = member['name']
+        image_url        = member['image_url']
+        record_ai_content = member['record_ai_content']
+
+        print(f"[Batch Process] Processing member {member_id}: {member_name}")
+
+        traditional   = ""
+        simplified    = ""
+        extract_source = "basic_info"
+
+        try:
+            # ── 3. AI 提取(此阶段不持有任何 DB 连接)────────────────────
+            if image_url:
+                print(f"[Batch Process] Extracting from image: {image_url}")
+                prompt      = build_genealogy_prompt(member_name)
+                ai_response = call_doubao_image_api(image_url, prompt)
+                print(f"[Batch Process] AI response for {member_id}: {str(ai_response)[:300]}")
+
+                if ai_response:
+                    raw_text    = extract_pure_text(ai_response)
+                    traditional = clean_genealogy_text(raw_text)
+                    print(f"[Batch Process] Cleaned traditional: {traditional[:100]}")
+
+                    name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
+                    name_found = any(c in traditional for c in name_chars)
+
+                    if traditional and len(traditional) >= 5 and name_found:
+                        simplified     = convert_to_simplified(traditional)
+                        extract_source = "image"
+                        print(f"[Batch Process] Image extract OK - trad: {traditional[:80]}")
+                    else:
+                        traditional = ""
+                        simplified  = ""
+                        print(f"[Batch Process] Image extract invalid "
+                              f"(name_found={name_found}, len={len(traditional)}), resetting")
+
+            # ── 4. 回退:从 record AI content 拼装(内存操作,无需 DB)──
+            if not (traditional and simplified) and record_ai_content:
+                print(f"[Batch Process] Fallback: trying record AI content")
+                try:
+                    ai_content = json.loads(record_ai_content)
+                    if isinstance(ai_content, list):
+                        current_person = None
+                        for person in ai_content:
+                            person_name = person.get('original_name', person.get('name', '')).strip()
+                            if person_name and (
+                                member_name in person_name or person_name in member_name
+                            ):
+                                current_person = person
+                                break
+
+                        if current_person:
+                            name        = current_person.get('original_name',
+                                          current_person.get('name', member_name))
+                            father_name = current_person.get('father_name', '')
+                            spouse_name = current_person.get('spouse_name', '')
+                            generation  = current_person.get('generation',
+                                          member['name_word_generation'])
+
+                            traditional = f"{name},{father_name}之子" if father_name else name
+                            if spouse_name:
+                                traditional += f",配{spouse_name}"
+                            if generation:
+                                traditional = f"第{generation}世 " + traditional
+
+                            simplified     = convert_to_simplified(traditional)
+                            extract_source = "ai_content"
+                            print(f"[Batch Process] AI content fallback: {traditional[:80]}")
+                        else:
+                            print(f"[Batch Process] No matching person for '{member_name}' in AI content")
+                except Exception as e:
+                    print(f"[Batch Process] Failed to parse record AI content: {e}")
+
+            # ── 5. 最终回退:从关系表查父亲和配偶,短暂占用连接后立即释放 ──
+            if not (traditional and simplified):
+                print(f"[Batch Process] Fallback: basic info from DB")
+                conn = get_db_connection()
+                try:
+                    with conn.cursor() as cursor:
+                        cursor.execute("""
+                            SELECT p.name FROM family_relation_info r
+                            JOIN family_member_info p ON r.parent_mid = p.id
+                            WHERE r.child_mid = %s AND r.relation_type = 1 LIMIT 1
+                        """, (member_id,))
+                        father = cursor.fetchone()
+
+                        cursor.execute("""
+                            SELECT p.name FROM family_relation_info r
+                            JOIN family_member_info p ON r.parent_mid = p.id
+                            WHERE r.child_mid = %s AND r.relation_type = 2 LIMIT 1
+                        """, (member_id,))
+                        spouse = cursor.fetchone()
+                finally:
+                    conn.close()
+
+                father_name = father['name'] if father else ''
+                spouse_name = spouse['name'] if spouse else ''
+                generation  = member['name_word_generation']
+
+                traditional = f"{member_name},{father_name}之子" if father_name else member_name
+                if spouse_name:
+                    traditional += f",配{spouse_name}"
+                if generation:
+                    traditional = f"第{generation}世 " + traditional
+
+                simplified     = convert_to_simplified(traditional)
+                extract_source = "basic_info"
+                print(f"[Batch Process] Basic info fallback: {traditional[:80]}")
+
+        except Exception as extract_err:
+            print(f"[Batch Process] Extraction error for member {member_id}: {extract_err}")
+            traditional = ""
+            simplified  = ""
+
+        # ── 6. 保存结果(短暂占用连接后立即释放)────────────────────────
+        last_processed_id = member_id
+        conn = get_db_connection()
+        try:
+            if traditional and simplified:
+                with conn.cursor() as cursor:
+                    cursor.execute("""
+                        UPDATE family_member_info
+                        SET genealogy_original_traditional = %s,
+                            genealogy_original_simplified  = %s
+                        WHERE id = %s
+                    """, (traditional, simplified, member_id))
+                completed_count += 1
+                results.append({
+                    "member_id":         member_id,
+                    "name":              member_name,
+                    "success":           True,
+                    "source":            extract_source,
+                    "traditional_length": len(traditional),
+                    "simplified_length":  len(simplified),
+                })
+                print(f"[Batch Process] Saved member {member_id} (source={extract_source})")
+            else:
+                failed_count += 1
+                results.append({
+                    "member_id": member_id,
+                    "name":      member_name,
+                    "success":   False,
+                    "message":   "无法提取或生成族谱原文",
+                })
+                print(f"[Batch Process] Skipped member {member_id}: no valid text extracted")
+
+            with conn.cursor() as cursor:
+                cursor.execute("""
+                    UPDATE batch_genealogy_task
+                    SET completed_count  = %s,
+                        failed_count     = %s,
+                        last_processed_id = %s,
+                        status           = 'processing'
+                    WHERE task_id = %s
+                """, (completed_count, failed_count, last_processed_id, task_id))
+            conn.commit()
+        except Exception as db_err:
+            print(f"[Batch Process] DB save error for member {member_id}: {db_err}")
+            failed_count += 1
+        finally:
+            conn.close()
+
+        # 每条处理完后短暂暂停,降低对 AI API 和服务器资源的压力
+        time.sleep(0.5)
+
+    # ── 7. 任务完成,写入最终状态 ─────────────────────────────────────────
+    conn = get_db_connection()
+    try:
+        status = 'completed' if failed_count == 0 else 'completed_with_errors'
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                UPDATE batch_genealogy_task
+                SET status          = %s,
+                    completed_count = %s,
+                    failed_count    = %s,
+                    results         = %s
+                WHERE task_id = %s
+            """, (status, completed_count, failed_count,
+                  json.dumps(results, ensure_ascii=False), task_id))
+        conn.commit()
+        print(f"[Batch Process] Task {task_id} done: "
+              f"{completed_count} success, {failed_count} failed")
+    except Exception as e:
+        print(f"[Batch Process] Error writing final status for {task_id}: {e}")
+    finally:
+        conn.close()
+
+@app.route('/manager/api/members/extract_genealogy/<int:member_id>', methods=['GET'])
+def extract_single_genealogy(member_id):
+    """单人员提取族谱原文,核心逻辑与批量处理一致,提取后写入数据库"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    conn = get_db_connection()
+    try:
+        # 查询成员信息
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                SELECT 
+                    m.id, m.name, m.name_word_generation,
+                    m.source_record_id, r.oss_url as image_url,
+                    r.ai_content AS record_ai_content
+                FROM family_member_info m
+                LEFT JOIN genealogy_records r ON m.source_record_id = r.id
+                WHERE m.id = %s
+            """, (member_id,))
+            row = cursor.fetchone()
+        
+        if not row:
+            return jsonify({"success": False, "message": "未找到成员"}), 404
+        
+        # 处理字典或元组格式的返回
+        if isinstance(row, dict):
+            member = row
+        else:
+            member = {
+                'id': row[0],
+                'name': row[1],
+                'name_word_generation': row[2],
+                'source_record_id': row[3],
+                'image_url': row[4],
+                'record_ai_content': row[5]
+            }
+        
+        # 调试:打印查询结果
+        print(f"[Single Extract] Query result - id: {member['id']}, name: '{member['name']}', name_word_generation: '{member['name_word_generation']}', source_record_id: {member['source_record_id']}, image_url: '{member['image_url']}', record_ai_content: '{member['record_ai_content'][:50] if member['record_ai_content'] else None}'")
+        
+        traditional = ""
+        simplified = ""
+        source = "basic_info"
+        image_url = member['image_url']
+        record_ai_content = member['record_ai_content']
+        
+        print(f"[Single Extract] Processing member {member_id}: {member['name']}")
+        
+        # 优先从关联图片中提取族谱原文
+        if image_url:
+            print(f"[Single Extract] Extracting from image: {image_url}")
+
+            member_name = member['name']
+            prompt = build_genealogy_prompt(member_name)
+            ai_response = call_doubao_image_api(image_url, prompt)
+
+            print(f"[Single Extract] AI response: {str(ai_response)[:500]}")
+
+            if ai_response:
+                raw_text = extract_pure_text(ai_response)
+                print(f"[Single Extract] Raw text from response: '{raw_text[:300]}'")
+
+                traditional = clean_genealogy_text(raw_text)
+                print(f"[Single Extract] Cleaned traditional: '{traditional[:200]}', length: {len(traditional)}")
+
+                # 验证提取结果是否包含该人物的姓名(至少包含名字中的一个字)
+                name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
+                name_found = any(c in traditional for c in name_chars)
+
+                if traditional and len(traditional) >= 5 and name_found:
+                    simplified = convert_to_simplified(traditional)
+                    source = "image"
+                    print(f"[Single Extract] Extracted from image - traditional: {traditional[:100]}, simplified: {simplified[:100]}")
+                else:
+                    traditional = ""
+                    simplified = ""
+                    if not name_found:
+                        print(f"[Single Extract] Extracted text does not contain name '{member_name}', resetting")
+                    else:
+                        print(f"[Single Extract] Image extraction too short ({len(traditional)} chars), resetting")
+            else:
+                print(f"[Single Extract] AI response is None or empty")
+        else:
+            print(f"[Single Extract] No image URL found for member {member_id}")
+        
+        # 如果从图片提取失败或没有图片,尝试从已有的AI解析内容中提取
+        if not (traditional and simplified) and record_ai_content:
+            print(f"[Single Extract] Trying to extract from record AI content")
+            try:
+                ai_content = json.loads(record_ai_content)
+                if isinstance(ai_content, list) and len(ai_content) > 0:
+                    current_person = None
+                    member_name = member['name']
+                    
+                    for person in ai_content:
+                        person_name = person.get('original_name', person.get('name', '')).strip()
+                        if person_name and (member_name in person_name or person_name in member_name):
+                            current_person = person
+                            break
+                    
+                    if current_person:
+                        name = current_person.get('original_name', current_person.get('name', member['name']))
+                        father_name = current_person.get('father_name', '')
+                        spouse_name = current_person.get('spouse_name', '')
+                        generation = current_person.get('generation', member['name_word_generation'])
+                        
+                        traditional = f"{name},{father_name}之子"
+                        if spouse_name:
+                            traditional += f",配{spouse_name}"
+                        if generation:
+                            traditional = f"第{generation}世 " + traditional
+                        
+                        simplified = convert_to_simplified(traditional)
+                        source = "ai_content"
+                        print(f"[Single Extract] Generated from AI content: {traditional}")
+            except Exception as e:
+                print(f"[Single Extract] Failed to parse record AI content: {e}")
+        
+        # 如果还是没有内容,使用基本信息生成(标注来源为 basic_info)
+        if not (traditional and simplified):
+            print(f"[Single Extract] Generating from basic info")
+            with conn.cursor() as cursor:
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 1
+                    LIMIT 1
+                """, (member_id,))
+                father_row = cursor.fetchone()
+
+                father_name = father_row[0] if father_row else ''
+
+                cursor.execute("""
+                    SELECT p.name, p.simplified_name
+                    FROM family_relation_info r
+                    JOIN family_member_info p ON r.parent_mid = p.id
+                    WHERE r.child_mid = %s AND r.relation_type = 2
+                    LIMIT 1
+                """, (member_id,))
+                spouse_row = cursor.fetchone()
+
+                spouse_name = spouse_row[0] if spouse_row else ''
+
+            generation = member['name_word_generation']
+            name = member['name']
+
+            traditional = f"{name},{father_name}之子" if father_name else name
+            if spouse_name:
+                traditional += f",配{spouse_name}"
+            if generation:
+                traditional = f"第{generation}世 " + traditional
+
+            simplified = convert_to_simplified(traditional)
+            source = "basic_info"
+            print(f"[Single Extract] Generated from basic info: {traditional}")
+        
+        # 调试:打印最终结果
+        print(f"[Single Extract] Final result - traditional: '{traditional}', simplified: '{simplified}'")
+        
+        # 写入数据库
+        if traditional and simplified:
+            with conn.cursor() as cursor:
+                cursor.execute("""
+                    UPDATE family_member_info
+                    SET genealogy_original_traditional = %s,
+                        genealogy_original_simplified = %s
+                    WHERE id = %s
+                """, (traditional, simplified, member_id))
+            conn.commit()
+            print(f"[Single Extract] Successfully saved to database")
+            
+            return jsonify({
+                "success": True,
+                "member_id": member_id,
+                "name": member['name'],
+                "genealogy_traditional": traditional,
+                "genealogy_simplified": simplified,
+                "source": source
+            })
+        else:
+            return jsonify({
+                "success": False,
+                "member_id": member_id,
+                "message": "无法提取或生成族谱原文"
+            })
+    except Exception as e:
+        import traceback
+        print(f"[Single Extract] Error: {e}")
+        print(f"[Single Extract] Traceback: {traceback.format_exc()}")
+        return jsonify({
+            "success": False,
+            "member_id": member_id,
+            "message": str(e),
+            "error_type": type(e).__name__
+        })
+    finally:
+        conn.close()
+
+@app.route('/manager/api/members/batch_process_all_empty', methods=['GET'])
+def batch_process_all_empty():
+    """简便批量处理接口:自动处理所有族谱原文为空的成员,支持断点续跑"""
+    if 'user_id' not in session:
+        return jsonify({"success": False, "message": "Unauthorized"}), 401
+    
+    conn = get_db_connection()
+    try:
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                SELECT COUNT(*) as count 
+                FROM family_member_info 
+                WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
+                  AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
+            """)
+            result = cursor.fetchone()
+            total_empty = result['count'] if result else 0
+            
+            cursor.execute("""
+                SELECT task_id, status, last_processed_id, total_count, completed_count, failed_count
+                FROM batch_genealogy_task 
+                WHERE user_id = %s AND status IN ('pending', 'processing')
+                ORDER BY created_at DESC
+                LIMIT 1
+            """, (session['user_id'],))
+            running_task = cursor.fetchone()
+            
+            if running_task:
+                return jsonify({
+                    "success": False,
+                    "message": "存在正在进行的任务",
+                    "task_id": running_task['task_id'],
+                    "status": running_task['status'],
+                    "last_processed_id": running_task['last_processed_id'],
+                    "completed_count": running_task['completed_count'],
+                    "total_count": running_task['total_count']
+                })
+        
+        task_id = str(uuid.uuid4())
+        
+        with conn.cursor() as cursor:
+            cursor.execute("""
+                INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count, last_processed_id)
+                VALUES (%s, %s, 'processing', %s, 0)
+            """, (task_id, session['user_id'], total_empty))
+        conn.commit()
+        
+        threading.Thread(
+            target=async_process_all_empty_genealogy,
+            args=(task_id, session['user_id']),
+            daemon=True
+        ).start()
+        
+        return jsonify({
+            "success": True,
+            "task_id": task_id,
+            "message": f"任务已创建,将处理 {total_empty} 个族谱原文为空的成员",
+            "total_count": total_empty
+        })
+    finally:
+        conn.close()
+
 if __name__ == '__main__':
     app.run(debug=False, port=5001)

+ 1 - 0
requirements.txt

@@ -4,3 +4,4 @@ requests==2.26.0
 Pillow==8.3.1
 pytesseract==0.3.8
 Werkzeug==2.0.1
+zhconv==1.4.3

+ 342 - 0
templates/batch_genealogy.html

@@ -0,0 +1,342 @@
+{% extends "layout.html" %}
+
+{% block title %}批量处理族谱原文 - 家谱管理系统{% endblock %}
+
+{% block extra_css %}
+<style>
+    .batch-container {
+        max-width: 1200px;
+        margin: 0 auto;
+    }
+    .member-card {
+        border: 2px solid #e9ecef;
+        border-radius: 8px;
+        padding: 15px;
+        cursor: pointer;
+        transition: all 0.2s;
+    }
+    .member-card:hover {
+        border-color: #0d6efd;
+        box-shadow: 0 2px 8px rgba(13, 110, 253, 0.15);
+    }
+    .member-card.selected {
+        border-color: #0d6efd;
+        background: #e7f3ff;
+    }
+    .batch-actions {
+        position: sticky;
+        top: 10px;
+        z-index: 100;
+    }
+    .result-modal-body {
+        max-height: 60vh;
+        overflow-y: auto;
+    }
+    .success-badge {
+        background-color: #d1fae5;
+        color: #065f46;
+    }
+    .error-badge {
+        background-color: #fee2e2;
+        color: #991b1b;
+    }
+</style>
+{% endblock %}
+
+{% block content %}
+<div class="batch-container">
+    <div class="d-flex justify-content-between align-items-center mb-4">
+        <h2><i class="bi bi-file-text"></i> 批量处理族谱原文</h2>
+        <div class="btn-group batch-actions">
+            <button id="selectAllBtn" class="btn btn-outline-primary">全选</button>
+            <button id="clearSelectionBtn" class="btn btn-outline-secondary">清除选择</button>
+            <button id="processBtn" class="btn btn-primary" disabled>
+                <i class="bi bi-cpu"></i> 批量生成族谱原文 (0/10)
+            </button>
+        </div>
+    </div>
+
+    <!-- 处理结果弹窗 -->
+    <div class="modal fade" id="resultModal" tabindex="-1" aria-labelledby="resultModalLabel" aria-hidden="true">
+        <div class="modal-dialog modal-lg">
+            <div class="modal-content">
+                <div class="modal-header">
+                    <h5 class="modal-title" id="resultModalLabel">处理结果</h5>
+                    <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+                </div>
+                <div class="modal-body result-modal-body" id="resultContent">
+                    <div class="text-center py-4">
+                        <div class="spinner-border text-primary" role="status">
+                            <span class="visually-hidden">处理中...</span>
+                        </div>
+                        <p class="mt-2">正在处理,请稍候...</p>
+                    </div>
+                </div>
+                <div class="modal-footer">
+                    <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">关闭</button>
+                    <button type="button" class="btn btn-primary" id="refreshBtn">刷新列表</button>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <!-- 成员列表 -->
+    <div class="row g-3" id="membersContainer">
+        <div class="col-12 text-center py-8">
+            <div class="spinner-border text-primary" role="status">
+                <span class="visually-hidden">加载中...</span>
+            </div>
+            <p class="mt-2">加载成员列表中...</p>
+        </div>
+    </div>
+
+    <!-- 分页 -->
+    <nav class="mt-4" id="paginationContainer">
+    </nav>
+</div>
+{% endblock %}
+
+{% block extra_js %}
+<script>
+let selectedIds = new Set();
+let currentPage = 1;
+let totalPages = 1;
+
+document.addEventListener('DOMContentLoaded', function() {
+    loadMembers(currentPage);
+    
+    document.getElementById('selectAllBtn').addEventListener('click', selectAll);
+    document.getElementById('clearSelectionBtn').addEventListener('click', clearSelection);
+    document.getElementById('processBtn').addEventListener('click', processSelected);
+    document.getElementById('refreshBtn').addEventListener('click', function() {
+        clearSelection();
+        loadMembers(1);
+        const modal = bootstrap.Modal.getInstance(document.getElementById('resultModal'));
+        if (modal) modal.hide();
+    });
+});
+
+function loadMembers(page) {
+    fetch(`/manager/api/members/empty_genealogy?page=${page}&per_page=20`)
+        .then(response => response.json())
+        .then(data => {
+            if (data.success) {
+                renderMembers(data.members);
+                renderPagination(data.total, page);
+                currentPage = page;
+            } else {
+                document.getElementById('membersContainer').innerHTML = 
+                    '<div class="col-12 text-center py-8"><p class="text-danger">加载失败: ' + data.message + '</p></div>';
+            }
+        })
+        .catch(error => {
+            document.getElementById('membersContainer').innerHTML = 
+                '<div class="col-12 text-center py-8"><p class="text-danger">加载失败: ' + error.message + '</p></div>';
+        });
+}
+
+function renderMembers(members) {
+    if (members.length === 0) {
+        document.getElementById('membersContainer').innerHTML = 
+            '<div class="col-12 text-center py-8"><i class="bi bi-check-circle text-success" style="font-size: 48px;"></i><p class="mt-3 text-success">所有成员的族谱原文都已填写完毕!</p></div>';
+        return;
+    }
+    
+    let html = '';
+    members.forEach(member => {
+        const isSelected = selectedIds.has(member.id);
+        html += `
+            <div class="col-md-6 col-lg-4">
+                <div class="member-card ${isSelected ? 'selected' : ''}" data-id="${member.id}" onclick="toggleSelect(${member.id})">
+                    <div class="d-flex justify-content-between align-items-start">
+                        <div>
+                            <h5 class="mb-1">${member.name}</h5>
+                            ${member.simplified_name && member.simplified_name !== member.name ? `<p class="text-sm text-muted">(${member.simplified_name})</p>` : ''}
+                        </div>
+                        <div class="form-check" onclick="event.stopPropagation()">
+                            <input type="checkbox" class="form-check-input" ${isSelected ? 'checked' : ''} onchange="toggleSelect(${member.id})" style="transform: scale(1.2);">
+                        </div>
+                    </div>
+                    <hr class="my-2">
+                    <div class="text-sm text-muted">
+                        ${member.name_word_generation ? `<div><i class="bi bi-tree"></i> ${member.name_word_generation}</div>` : ''}
+                        ${member.father_name ? `<div><i class="bi bi-user"></i> 父:${member.father_name}</div>` : ''}
+                        ${member.mother_name ? `<div><i class="bi bi-user"></i> 母:${member.mother_name}</div>` : ''}
+                    </div>
+                </div>
+            </div>
+        `;
+    });
+    document.getElementById('membersContainer').innerHTML = html;
+}
+
+function renderPagination(total, page) {
+    const perPage = 20;
+    totalPages = Math.ceil(total / perPage);
+    
+    if (totalPages <= 1) {
+        document.getElementById('paginationContainer').innerHTML = '';
+        return;
+    }
+    
+    let html = `<ul class="pagination justify-content-center">`;
+    
+    // 上一页
+    html += `<li class="page-item ${page === 1 ? 'disabled' : ''}">
+        <a class="page-link" href="#" onclick="loadMembers(${page - 1})">上一页</a>
+    </li>`;
+    
+    // 页码
+    for (let i = 1; i <= totalPages; i++) {
+        html += `<li class="page-item ${i === page ? 'active' : ''}">
+            <a class="page-link" href="#" onclick="loadMembers(${i})">${i}</a>
+        </li>`;
+    }
+    
+    // 下一页
+    html += `<li class="page-item ${page === totalPages ? 'disabled' : ''}">
+        <a class="page-link" href="#" onclick="loadMembers(${page + 1})">下一页</a>
+    </li>`;
+    
+    html += `</ul>`;
+    document.getElementById('paginationContainer').innerHTML = html;
+}
+
+function toggleSelect(id) {
+    if (selectedIds.has(id)) {
+        selectedIds.delete(id);
+    } else {
+        if (selectedIds.size >= 10) {
+            alert('最多选择10个成员进行处理');
+            return;
+        }
+        selectedIds.add(id);
+    }
+    updateSelection();
+}
+
+function selectAll() {
+    const cards = document.querySelectorAll('.member-card');
+    const notSelected = [];
+    
+    cards.forEach(card => {
+        const id = parseInt(card.dataset.id);
+        if (!selectedIds.has(id)) {
+            notSelected.push(id);
+        }
+    });
+    
+    // 最多选择10个,从未选择的中选
+    notSelected.slice(0, 10 - selectedIds.size).forEach(id => {
+        selectedIds.add(id);
+    });
+    
+    updateSelection();
+}
+
+function clearSelection() {
+    selectedIds.clear();
+    updateSelection();
+}
+
+function updateSelection() {
+    // 更新卡片样式
+    document.querySelectorAll('.member-card').forEach(card => {
+        const id = parseInt(card.dataset.id);
+        card.classList.toggle('selected', selectedIds.has(id));
+        const checkbox = card.querySelector('.form-check-input');
+        if (checkbox) checkbox.checked = selectedIds.has(id);
+    });
+    
+    // 更新按钮状态
+    const btn = document.getElementById('processBtn');
+    if (selectedIds.size > 0) {
+        btn.disabled = false;
+        btn.innerHTML = `<i class="bi bi-cpu"></i> 批量生成族谱原文 (${selectedIds.size}/10)`;
+    } else {
+        btn.disabled = true;
+        btn.innerHTML = '<i class="bi bi-cpu"></i> 批量生成族谱原文 (0/10)';
+    }
+}
+
+function processSelected() {
+    const ids = Array.from(selectedIds);
+    console.log('processSelected called, selected ids:', ids);
+    
+    if (ids.length === 0) {
+        alert('请先选择成员');
+        return;
+    }
+    
+    document.getElementById('resultContent').innerHTML = `
+        <div class="text-center py-4">
+            <div class="spinner-border text-primary" role="status">
+                <span class="visually-hidden">处理中...</span>
+            </div>
+            <p class="mt-2">正在处理 ${ids.length} 个成员,请稍候...</p>
+        </div>
+    `;
+    
+    const modal = new bootstrap.Modal(document.getElementById('resultModal'));
+    modal.show();
+    console.log('Modal shown');
+    
+    fetch('/manager/api/members/batch_process_genealogy', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json'
+        },
+        body: JSON.stringify({ member_ids: ids })
+    })
+    .then(response => response.json())
+    .then(data => {
+        if (data.success) {
+            let html = '<div class="mb-4"><h6 class="text-primary">处理完成</h6></div>';
+            html += '<div class="list-group">';
+            
+            data.results.forEach(result => {
+                html += `
+                    <div class="list-group-item">
+                        <div class="d-flex justify-content-between align-items-start">
+                            <div>
+                                <strong>${result.name}</strong>
+                                ${result.success ? '' : `<span class="text-muted"> (ID: ${result.member_id})</span>`}
+                            </div>
+                            <span class="badge ${result.success ? 'success-badge' : 'error-badge'}">
+                                ${result.success ? '成功' : '失败'}
+                            </span>
+                        </div>
+                        ${result.success ? `
+                            <div class="mt-2 text-sm">
+                                <div class="mb-1">
+                                    <span class="text-muted">繁体:</span>
+                                    <span>${result.traditional}</span>
+                                </div>
+                                <div>
+                                    <span class="text-muted">简体:</span>
+                                    <span>${result.simplified}</span>
+                                </div>
+                            </div>
+                        ` : `
+                            <div class="mt-2 text-sm text-danger">
+                                原因:${result.message}
+                            </div>
+                        `}
+                    </div>
+                `;
+            });
+            
+            html += '</div>';
+            document.getElementById('resultContent').innerHTML = html;
+        } else {
+            document.getElementById('resultContent').innerHTML = 
+                `<div class="text-center py-4"><p class="text-danger">处理失败: ${data.message}</p></div>`;
+        }
+    })
+    .catch(error => {
+        document.getElementById('resultContent').innerHTML = 
+            `<div class="text-center py-4"><p class="text-danger">请求失败: ${error.message}</p></div>`;
+    });
+}
+</script>
+{% endblock %}

+ 3 - 0
templates/layout.html

@@ -77,6 +77,9 @@
                     <a href="{{ url_for('members') }}" class="{% if request.endpoint == 'members' %}active{% endif %}">
                         <i class="bi bi-people me-2"></i> 成员列表
                     </a>
+                    <a href="{{ url_for('batch_genealogy') }}" class="{% if request.endpoint == 'batch_genealogy' %}active{% endif %}">
+                        <i class="bi bi-file-text me-2"></i> 批量处理族谱原文
+                    </a>
                     <a href="{{ url_for('tree') }}" class="{% if request.endpoint == 'tree' %}active{% endif %}">
                         <i class="bi bi-diagram-3 me-2"></i> 家谱世系树状图
                     </a>

+ 185 - 0
test_extract.py

@@ -0,0 +1,185 @@
+import requests
+import json
+import re
+import io
+import base64
+from PIL import Image
+
+def get_normalized_base64_image(image_url):
+    try:
+        response = requests.get(image_url, timeout=30)
+        response.raise_for_status()
+        
+        with Image.open(io.BytesIO(response.content)) as img:
+            if img.mode != 'RGB':
+                img = img.convert('RGB')
+            
+            max_dim = 2000
+            if max(img.width, img.height) > max_dim:
+                ratio = max_dim / max(img.width, img.height)
+                new_size = (int(img.width * ratio), int(img.height * ratio))
+                img = img.resize(new_size, Image.Resampling.LANCZOS)
+            
+            buffer = io.BytesIO()
+            img.save(buffer, format='JPEG', quality=85)
+            
+            b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
+            return f'data:image/jpeg;base64,{b64_str}'
+    except Exception as e:
+        print(f'Error normalizing image: {e}')
+        return image_url
+
+def call_doubao_image_api(image_url, prompt):
+    api_key = 'a1800657-9212-4afe-9b7c-b49f015c54d3'
+    api_url = 'https://ark.cn-beijing.volces.com/api/v3/responses'
+    
+    ai_payload_url = get_normalized_base64_image(image_url)
+    
+    payload = {
+        'model': 'doubao-seed-1-8-251228',
+        'stream': False,
+        'input': [
+            {
+                'role': 'user',
+                'content': [
+                    {'type': 'input_image', 'image_url': ai_payload_url},
+                    {'type': 'input_text', 'text': prompt}
+                ]
+            }
+        ]
+    }
+    
+    headers = {
+        'Authorization': f'Bearer {api_key}',
+        'Content-Type': 'application/json'
+    }
+    
+    try:
+        response = requests.post(
+            api_url, 
+            json=payload, 
+            headers=headers, 
+            timeout=120,
+            verify=False,
+            proxies={'http': None, 'https': None}
+        )
+        
+        if response.status_code == 200:
+            return response.json()
+        else:
+            print(f'API Error: {response.status_code}')
+            return None
+    except Exception as e:
+        print(f'Exception: {e}')
+        return None
+
+def extract_text_from_response(response):
+    """从API响应中提取文本内容"""
+    if not response:
+        return ''
+    
+    # 尝试多种响应格式
+    if 'output' in response:
+        for item in response['output']:
+            # 跳过reasoning类型
+            if item.get('type') == 'reasoning':
+                continue
+            
+            content = item.get('content')
+            if isinstance(content, str):
+                return content
+            elif isinstance(content, list):
+                text_parts = []
+                for part in content:
+                    if isinstance(part, dict):
+                        if part.get('type') == 'text':
+                            text_parts.append(part.get('text', ''))
+                        elif part.get('type') == 'reasoning':
+                            continue
+                    elif isinstance(part, str):
+                        text_parts.append(part)
+                return ''.join(text_parts)
+    
+    if 'choices' in response and len(response['choices']) > 0:
+        message = response['choices'][0].get('message', {})
+        return message.get('content', '')
+    
+    return str(response)
+
+def clean_text(text):
+    """清理文本,去除多余内容"""
+    if not text:
+        return ''
+    
+    text = text.strip()
+    
+    # 去除代码块标记
+    if text.startswith('```json'):
+        text = text[7:]
+    if text.startswith('```'):
+        text = text[3:]
+    if text.endswith('```'):
+        text = text[:-3]
+    
+    text = text.strip()
+    
+    # 尝试解析JSON
+    try:
+        result = json.loads(text)
+        if isinstance(result, dict):
+            # 尝试多种可能的字段名
+            for key in ['genealogy_traditional', 'traditional', 'text', 'content', 'result']:
+                if key in result:
+                    text = str(result[key])
+                    break
+    except json.JSONDecodeError:
+        pass
+    
+    # 去除解释性文字
+    unwanted_patterns = [
+        '请分析', '要求', '提取', '转换', '繁体', '简体',
+        'genealogy', 'traditional', 'simplified',
+        '原始', '原文', 'JSON', '格式', '输出',
+        'reasoning', 'thinking', '思考', '分析',
+        '我现在需要', '首先', '然后', '接下来',
+        '根据图片', '图片中', '识别', 'OCR'
+    ]
+    
+    for pattern in unwanted_patterns:
+        text = text.replace(pattern, '')
+    
+    # 去除JSON结构残留
+    text = re.sub(r'["\']text["\']\s*[,:]\s*["\']', '', text)
+    text = re.sub(r'["\']', '', text)
+    
+    # 提取纯中文
+    chinese_text = re.findall(r'[\u4e00-\u9fff]+', text)
+    if chinese_text:
+        text = ''.join(chinese_text)
+    
+    return text.strip()
+
+# 测试不同的prompt
+prompts = [
+    '提取图片中的繁体中文文字,直接输出,不要解释。',
+    '识别图片中的竖排繁体中文,按阅读顺序输出原文。',
+    'OCR识别图片文字,只输出结果。',
+    '读取图片中的族谱文字,直接返回。',
+    '分析图片,提取所有中文文字,不要分析。'
+]
+
+print('=== 测试不同Prompt效果 ===')
+for i, prompt in enumerate(prompts):
+    print(f'\nPrompt {i+1}: {prompt}')
+    print('-' * 50)
+    
+    # 这里需要实际的图片URL进行测试
+    # 测试模式:打印prompt供参考
+    print('(需要实际图片URL进行测试)')
+
+# 手动测试样例 - 根据用户提供的图片内容
+print('\n=== 预期提取结果(根据图片手动识别)===')
+print('因公图片原文(竖排繁体):')
+print('因公')
+print('字廷大授南州刺史上距陽公三十五世後漢延康元年二月初六日渡')
+print('婺州之金華縣長樂鄉 娶林氏生三子 塟藤就村見有石柱石人華表')