|
|
@@ -151,6 +151,16 @@ def manual_simplify(text):
|
|
|
result += mapping.get(char, char)
|
|
|
return result
|
|
|
|
|
|
+def convert_to_simplified(text):
|
|
|
+ """繁体转简体,优先使用 zhconv 库,失败则降级到 manual_simplify"""
|
|
|
+ if not text:
|
|
|
+ return text
|
|
|
+ try:
|
|
|
+ import zhconv
|
|
|
+ return zhconv.convert(text, 'zh-hans')
|
|
|
+ except Exception:
|
|
|
+ return manual_simplify(text)
|
|
|
+
|
|
|
def _build_reverse_simplify_map():
|
|
|
"""
|
|
|
Build a reverse map from simplified char -> list of traditional chars
|
|
|
@@ -1114,6 +1124,12 @@ def members():
|
|
|
|
|
|
return render_template('members.html', members=members, search_name=search_name, page=page, total_pages=total_pages, total=total)
|
|
|
|
|
|
+@app.route('/manager/batch_genealogy')
|
|
|
+def batch_genealogy():
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return redirect(url_for('login'))
|
|
|
+ return render_template('batch_genealogy.html')
|
|
|
+
|
|
|
@app.route('/manager/suspected_errors')
|
|
|
def suspected_errors():
|
|
|
if 'user_id' not in session:
|
|
|
@@ -1597,6 +1613,319 @@ def get_members():
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
+def call_doubao_api(prompt, image_url=None):
|
|
|
+ """调用豆包API处理文本"""
|
|
|
+ api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
|
|
|
+ api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
|
|
|
+
|
|
|
+ payload = {
|
|
|
+ "model": "doubao-seed-1-8-251228",
|
|
|
+ "stream": False,
|
|
|
+ "input": [
|
|
|
+ {
|
|
|
+ "role": "user",
|
|
|
+ "content": [
|
|
|
+ {"type": "input_text", "text": prompt}
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ "Authorization": f"Bearer {api_key}",
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+
|
|
|
+ try:
|
|
|
+ response = requests.post(
|
|
|
+ api_url,
|
|
|
+ json=payload,
|
|
|
+ headers=headers,
|
|
|
+ timeout=120,
|
|
|
+ verify=False,
|
|
|
+ proxies={"http": None, "https": None}
|
|
|
+ )
|
|
|
+
|
|
|
+ if response.status_code == 200:
|
|
|
+ result = response.json()
|
|
|
+ print(f"[AI API] Raw response: {result}")
|
|
|
+
|
|
|
+ # 解析响应 - 尝试多种格式
|
|
|
+ if 'output' in result:
|
|
|
+ for item in result['output']:
|
|
|
+ if item.get('type') == 'message':
|
|
|
+ content = item.get('content')
|
|
|
+ if isinstance(content, str):
|
|
|
+ return content
|
|
|
+ elif isinstance(content, list):
|
|
|
+ for part in content:
|
|
|
+ if isinstance(part, dict) and part.get('type') == 'text':
|
|
|
+ return part.get('text', '')
|
|
|
+ elif isinstance(content, dict) and 'text' in content:
|
|
|
+ return content.get('text', '')
|
|
|
+ # 尝试其他响应格式
|
|
|
+ if 'choices' in result and len(result['choices']) > 0:
|
|
|
+ message = result['choices'][0].get('message', {})
|
|
|
+ return message.get('content', '')
|
|
|
+ # 尝试直接获取文本内容
|
|
|
+ if 'text' in result:
|
|
|
+ return result['text']
|
|
|
+ # 尝试获取响应中的message
|
|
|
+ if 'message' in result:
|
|
|
+ msg = result['message']
|
|
|
+ if isinstance(msg, str):
|
|
|
+ return msg
|
|
|
+ elif isinstance(msg, dict) and 'content' in msg:
|
|
|
+ return msg['content']
|
|
|
+ # 返回字符串形式
|
|
|
+ return str(result)
|
|
|
+ else:
|
|
|
+ print(f"[AI API] Error: {response.status_code} - {response.text}")
|
|
|
+ return None
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[AI API] Exception: {e}")
|
|
|
+ return None
|
|
|
+
|
|
|
+def parse_ai_response(ai_response):
|
|
|
+ """解析AI响应,提取族谱原文"""
|
|
|
+ if not ai_response:
|
|
|
+ return None, None
|
|
|
+
|
|
|
+ # 尝试从响应中提取JSON
|
|
|
+ try:
|
|
|
+ # 移除可能的markdown代码块标记
|
|
|
+ text = ai_response.strip()
|
|
|
+ if text.startswith('```json'):
|
|
|
+ text = text[7:]
|
|
|
+ if text.endswith('```'):
|
|
|
+ text = text[:-3]
|
|
|
+ text = text.strip()
|
|
|
+
|
|
|
+ # 尝试解析JSON
|
|
|
+ result = json.loads(text)
|
|
|
+ traditional = result.get('genealogy_traditional', '')
|
|
|
+ simplified = result.get('genealogy_simplified', '')
|
|
|
+
|
|
|
+ if traditional or simplified:
|
|
|
+ return traditional, simplified
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ print(f"[AI Parse] JSON decode error: {ai_response[:200]}")
|
|
|
+
|
|
|
+ # 如果JSON解析失败,尝试直接提取文本
|
|
|
+ # 尝试匹配模式
|
|
|
+ import re
|
|
|
+ traditional_match = re.search(r'genealogy_traditional["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
|
|
|
+ simplified_match = re.search(r'genealogy_simplified["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
|
|
|
+
|
|
|
+ traditional = traditional_match.group(1) if traditional_match else ''
|
|
|
+ simplified = simplified_match.group(1) if simplified_match else ''
|
|
|
+
|
|
|
+ return traditional, simplified
|
|
|
+
|
|
|
+@app.route('/manager/api/members/empty_genealogy', methods=['GET'])
|
|
|
+def get_members_empty_genealogy():
|
|
|
+ """获取族谱原文为空的成员列表"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ page = int(request.args.get('page', 1))
|
|
|
+ per_page = int(request.args.get('per_page', 20))
|
|
|
+ offset = (page - 1) * per_page
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ # Count total
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT COUNT(*) as total
|
|
|
+ FROM family_member_info
|
|
|
+ WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
|
|
|
+ AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
|
|
|
+ """)
|
|
|
+ total_result = cursor.fetchone()
|
|
|
+ total = total_result['total'] if total_result else 0
|
|
|
+
|
|
|
+ # Get members
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT id, name, simplified_name, name_word_generation, sex, occupation, notes, birth_place
|
|
|
+ FROM family_member_info
|
|
|
+ WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
|
|
|
+ AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
|
|
|
+ LIMIT %s OFFSET %s
|
|
|
+ """, (per_page, offset))
|
|
|
+ members = cursor.fetchall()
|
|
|
+
|
|
|
+ # 关联查询父亲信息
|
|
|
+ member_list = []
|
|
|
+ for member in members:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name, p.name_word_generation
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 1
|
|
|
+ LIMIT 1
|
|
|
+ """, (member['id'],))
|
|
|
+ father = cursor.fetchone()
|
|
|
+
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 2
|
|
|
+ LIMIT 1
|
|
|
+ """, (member['id'],))
|
|
|
+ mother = cursor.fetchone()
|
|
|
+
|
|
|
+ member_list.append({
|
|
|
+ 'id': member['id'],
|
|
|
+ 'name': member['name'],
|
|
|
+ 'simplified_name': member['simplified_name'],
|
|
|
+ 'name_word_generation': member['name_word_generation'],
|
|
|
+ 'sex': member['sex'],
|
|
|
+ 'occupation': member['occupation'],
|
|
|
+ 'notes': member['notes'],
|
|
|
+ 'birth_place': member['birth_place'],
|
|
|
+ 'father_name': father['name'] if father else None,
|
|
|
+ 'father_simplified_name': father['simplified_name'] if father else None,
|
|
|
+ 'father_generation': father['name_word_generation'] if father else None,
|
|
|
+ 'mother_name': mother['name'] if mother else None,
|
|
|
+ 'mother_simplified_name': mother['simplified_name'] if mother else None
|
|
|
+ })
|
|
|
+
|
|
|
+ return jsonify({"success": True, "members": member_list, "total": total})
|
|
|
+ except Exception as e:
|
|
|
+ return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+@app.route('/manager/api/members/batch_process_genealogy', methods=['POST'])
|
|
|
+def batch_process_genealogy():
|
|
|
+ """批量处理成员族谱原文"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ data = request.get_json()
|
|
|
+ member_ids = data.get('member_ids', [])
|
|
|
+
|
|
|
+ if not member_ids or len(member_ids) > 10:
|
|
|
+ return jsonify({"success": False, "message": "请选择1-10个成员进行处理"}), 400
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ results = []
|
|
|
+
|
|
|
+ try:
|
|
|
+ for member_id in member_ids:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT id, name, simplified_name, name_word_generation,
|
|
|
+ birth_place, occupation, notes, sex
|
|
|
+ FROM family_member_info WHERE id = %s
|
|
|
+ """, (member_id,))
|
|
|
+ member = cursor.fetchone()
|
|
|
+
|
|
|
+ # 获取父亲信息
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 1
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ father = cursor.fetchone()
|
|
|
+
|
|
|
+ # 获取母亲信息
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 2
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ mother = cursor.fetchone()
|
|
|
+
|
|
|
+ member['father_name'] = father['name'] if father else None
|
|
|
+ member['father_simplified_name'] = father['simplified_name'] if father else None
|
|
|
+ member['mother_name'] = mother['name'] if mother else None
|
|
|
+ member['mother_simplified_name'] = mother['simplified_name'] if mother else None
|
|
|
+
|
|
|
+ if not member:
|
|
|
+ results.append({"member_id": member_id, "success": False, "message": "成员不存在"})
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 构建AI提示词
|
|
|
+ member_info = f"""
|
|
|
+ 姓名(繁体):{member['name']}
|
|
|
+ 姓名(简体):{member['simplified_name'] or '未知'}
|
|
|
+ 世系世代:{member['name_word_generation'] or '未知'}
|
|
|
+ 父亲姓名:{member['father_name'] or '未知'}
|
|
|
+ 母亲姓名:{member['mother_name'] or '未知'}
|
|
|
+ 出生地:{member['birth_place'] or '未知'}
|
|
|
+ 职业:{member['occupation'] or '未知'}
|
|
|
+ 备注:{member['notes'] or '无'}
|
|
|
+ """
|
|
|
+
|
|
|
+ prompt = f"""
|
|
|
+ 请根据以下人员信息,模拟生成该人员的族谱原文:
|
|
|
+
|
|
|
+ {member_info}
|
|
|
+
|
|
|
+ 请输出两个字段:
|
|
|
+ 1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
|
|
|
+ 2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
|
|
|
+
|
|
|
+ 请严格按照JSON格式输出,不要包含任何额外解释:
|
|
|
+ {{
|
|
|
+ "genealogy_traditional": "繁体族谱原文内容",
|
|
|
+ "genealogy_simplified": "简体族谱原文内容"
|
|
|
+ }}
|
|
|
+ """
|
|
|
+
|
|
|
+ ai_response = call_doubao_api(prompt)
|
|
|
+ print(f"[AI Response] Member {member_id}: {ai_response}")
|
|
|
+
|
|
|
+ if ai_response:
|
|
|
+ # 使用新的解析函数
|
|
|
+ traditional, simplified = parse_ai_response(ai_response)
|
|
|
+
|
|
|
+ if traditional or simplified:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE family_member_info
|
|
|
+ SET genealogy_original_traditional = %s,
|
|
|
+ genealogy_original_simplified = %s
|
|
|
+ WHERE id = %s
|
|
|
+ """, (traditional, simplified, member_id))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": True,
|
|
|
+ "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
|
|
|
+ "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": False,
|
|
|
+ "message": "AI未返回有效数据"
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": False,
|
|
|
+ "message": "AI调用失败"
|
|
|
+ })
|
|
|
+
|
|
|
+ return jsonify({"success": True, "results": results})
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Batch Process] Exception: {e}")
|
|
|
+ return jsonify({"success": False, "message": f"批量处理失败: {e}"}), 500
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
@app.route('/manager/api/member/<int:member_id>')
|
|
|
def get_member(member_id):
|
|
|
if 'user_id' not in session:
|
|
|
@@ -3050,5 +3379,1088 @@ def delete_settlement(id):
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
+# 异步批量处理族谱原文功能
|
|
|
+import uuid
|
|
|
+
|
|
|
+def init_batch_task_table():
|
|
|
+ """初始化批量任务表(如果不存在)"""
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ CREATE TABLE IF NOT EXISTS batch_genealogy_task (
|
|
|
+ id INT AUTO_INCREMENT PRIMARY KEY,
|
|
|
+ task_id VARCHAR(64) UNIQUE NOT NULL,
|
|
|
+ user_id INT NOT NULL,
|
|
|
+ status VARCHAR(20) DEFAULT 'pending',
|
|
|
+ total_count INT DEFAULT 0,
|
|
|
+ completed_count INT DEFAULT 0,
|
|
|
+ failed_count INT DEFAULT 0,
|
|
|
+ last_processed_id INT DEFAULT 0,
|
|
|
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
|
+ results TEXT
|
|
|
+ );
|
|
|
+ """)
|
|
|
+ # 检查是否存在last_processed_id字段,如果不存在则添加
|
|
|
+ cursor.execute("SHOW COLUMNS FROM batch_genealogy_task LIKE 'last_processed_id'")
|
|
|
+ if not cursor.fetchone():
|
|
|
+ cursor.execute("ALTER TABLE batch_genealogy_task ADD COLUMN last_processed_id INT DEFAULT 0")
|
|
|
+ conn.commit()
|
|
|
+ print("[Database] batch_genealogy_task table initialized")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Database] Error creating batch_genealogy_task table: {e}")
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+# 初始化表
|
|
|
+init_batch_task_table()
|
|
|
+
|
|
|
+def async_process_genealogy_task(task_id, member_ids, user_id):
|
|
|
+ """异步处理族谱原文任务"""
|
|
|
+ results = []
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ # 更新任务状态为处理中
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE batch_genealogy_task
|
|
|
+ SET status = 'processing', total_count = %s
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (len(member_ids), task_id))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ completed_count = 0
|
|
|
+ failed_count = 0
|
|
|
+
|
|
|
+ for member_id in member_ids:
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT id, name, simplified_name, name_word_generation,
|
|
|
+ birth_place, occupation, notes, sex
|
|
|
+ FROM family_member_info WHERE id = %s
|
|
|
+ """, (member_id,))
|
|
|
+ member = cursor.fetchone()
|
|
|
+
|
|
|
+ # 获取父亲信息
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 1
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ father = cursor.fetchone()
|
|
|
+
|
|
|
+ # 获取母亲信息
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 2
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ mother = cursor.fetchone()
|
|
|
+
|
|
|
+ member['father_name'] = father['name'] if father else None
|
|
|
+ member['father_simplified_name'] = father['simplified_name'] if father else None
|
|
|
+ member['mother_name'] = mother['name'] if mother else None
|
|
|
+ member['mother_simplified_name'] = mother['simplified_name'] if mother else None
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Async Process] Error getting member {member_id}: {e}")
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": "未知",
|
|
|
+ "success": False,
|
|
|
+ "message": f"获取成员信息失败: {e}"
|
|
|
+ })
|
|
|
+ failed_count += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ if not member:
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": "未知",
|
|
|
+ "success": False,
|
|
|
+ "message": "成员不存在"
|
|
|
+ })
|
|
|
+ failed_count += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 构建AI提示词
|
|
|
+ member_info = f"""
|
|
|
+ 姓名(繁体):{member['name']}
|
|
|
+ 姓名(简体):{member['simplified_name'] or '未知'}
|
|
|
+ 世系世代:{member['name_word_generation'] or '未知'}
|
|
|
+ 父亲姓名:{member['father_name'] or '未知'}
|
|
|
+ 母亲姓名:{member['mother_name'] or '未知'}
|
|
|
+ 出生地:{member['birth_place'] or '未知'}
|
|
|
+ 职业:{member['occupation'] or '未知'}
|
|
|
+ 备注:{member['notes'] or '无'}
|
|
|
+ """
|
|
|
+
|
|
|
+ prompt = f"""
|
|
|
+ 请根据以下人员信息,模拟生成该人员的族谱原文:
|
|
|
+
|
|
|
+ {member_info}
|
|
|
+
|
|
|
+ 请输出两个字段:
|
|
|
+ 1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
|
|
|
+ 2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
|
|
|
+
|
|
|
+ 请严格按照JSON格式输出,不要包含任何额外解释:
|
|
|
+ {{
|
|
|
+ "genealogy_traditional": "繁体族谱原文内容",
|
|
|
+ "genealogy_simplified": "简体族谱原文内容"
|
|
|
+ }}
|
|
|
+ """
|
|
|
+
|
|
|
+ ai_response = call_doubao_api(prompt)
|
|
|
+
|
|
|
+ if ai_response:
|
|
|
+ traditional, simplified = parse_ai_response(ai_response)
|
|
|
+
|
|
|
+ if traditional or simplified:
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE family_member_info
|
|
|
+ SET genealogy_original_traditional = %s,
|
|
|
+ genealogy_original_simplified = %s
|
|
|
+ WHERE id = %s
|
|
|
+ """, (traditional, simplified, member_id))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": True,
|
|
|
+ "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
|
|
|
+ "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
|
|
|
+ })
|
|
|
+ completed_count += 1
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Async Process] Error updating member {member_id}: {e}")
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": False,
|
|
|
+ "message": f"保存失败: {e}"
|
|
|
+ })
|
|
|
+ failed_count += 1
|
|
|
+ else:
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": False,
|
|
|
+ "message": "AI未返回有效数据"
|
|
|
+ })
|
|
|
+ failed_count += 1
|
|
|
+ else:
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "success": False,
|
|
|
+ "message": "AI调用失败"
|
|
|
+ })
|
|
|
+ failed_count += 1
|
|
|
+
|
|
|
+ # 更新任务状态
|
|
|
+ status = 'completed' if failed_count == 0 else 'completed_with_errors'
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE batch_genealogy_task
|
|
|
+ SET status = %s, completed_count = %s, failed_count = %s, results = %s
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (status, completed_count, failed_count, json.dumps(results, ensure_ascii=False), task_id))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ print(f"[Async Process] Task {task_id} completed: {completed_count} success, {failed_count} failed")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Async Process] Error in task {task_id}: {e}")
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE batch_genealogy_task
|
|
|
+ SET status = 'failed', results = %s
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (json.dumps({"error": str(e)}, ensure_ascii=False), task_id))
|
|
|
+ conn.commit()
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+@app.route('/manager/api/members/batch_process_genealogy_async', methods=['POST'])
|
|
|
+def batch_process_genealogy_async():
|
|
|
+ """异步批量处理族谱原文"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ data = request.get_json()
|
|
|
+ member_ids = data.get('member_ids', [])
|
|
|
+
|
|
|
+ if not member_ids:
|
|
|
+ return jsonify({"success": False, "message": "请选择成员进行处理"}), 400
|
|
|
+
|
|
|
+ # 生成任务ID
|
|
|
+ task_id = str(uuid.uuid4())
|
|
|
+
|
|
|
+ # 保存任务到数据库
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count)
|
|
|
+ VALUES (%s, %s, 'pending', %s)
|
|
|
+ """, (task_id, session['user_id'], len(member_ids)))
|
|
|
+ conn.commit()
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ # 启动异步线程处理
|
|
|
+ threading.Thread(target=async_process_genealogy_task, args=(task_id, member_ids, session['user_id'])).start()
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "task_id": task_id,
|
|
|
+ "message": "任务已创建,正在后台处理中"
|
|
|
+ })
|
|
|
+
|
|
|
+@app.route('/manager/api/members/batch_task_status/<task_id>', methods=['GET'])
|
|
|
+def get_batch_task_status(task_id):
|
|
|
+ """获取批量任务状态"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT task_id, status, total_count, completed_count, failed_count,
|
|
|
+ created_at, updated_at, results
|
|
|
+ FROM batch_genealogy_task
|
|
|
+ WHERE task_id = %s AND user_id = %s
|
|
|
+ """, (task_id, session['user_id']))
|
|
|
+ task = cursor.fetchone()
|
|
|
+
|
|
|
+ if task:
|
|
|
+ result = {
|
|
|
+ "task_id": task['task_id'],
|
|
|
+ "status": task['status'],
|
|
|
+ "total_count": task['total_count'],
|
|
|
+ "completed_count": task['completed_count'],
|
|
|
+ "failed_count": task['failed_count'],
|
|
|
+ "created_at": task['created_at'].isoformat() if task['created_at'] else None,
|
|
|
+ "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
|
|
|
+ }
|
|
|
+ if task['results']:
|
|
|
+ try:
|
|
|
+ result['results'] = json.loads(task['results'])
|
|
|
+ except:
|
|
|
+ result['results'] = task['results']
|
|
|
+ return jsonify({"success": True, "task": result})
|
|
|
+ else:
|
|
|
+ return jsonify({"success": False, "message": "任务不存在或无权访问"}), 404
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+@app.route('/manager/api/members/batch_tasks', methods=['GET'])
|
|
|
+def get_batch_tasks():
|
|
|
+ """获取用户的批量任务列表"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT task_id, status, total_count, completed_count, failed_count,
|
|
|
+ last_processed_id, created_at, updated_at
|
|
|
+ FROM batch_genealogy_task
|
|
|
+ WHERE user_id = %s
|
|
|
+ ORDER BY created_at DESC
|
|
|
+ LIMIT 20
|
|
|
+ """, (session['user_id'],))
|
|
|
+ tasks = cursor.fetchall()
|
|
|
+
|
|
|
+ result = []
|
|
|
+ for task in tasks:
|
|
|
+ result.append({
|
|
|
+ "task_id": task['task_id'],
|
|
|
+ "status": task['status'],
|
|
|
+ "total_count": task['total_count'],
|
|
|
+ "completed_count": task['completed_count'],
|
|
|
+ "failed_count": task['failed_count'],
|
|
|
+ "last_processed_id": task['last_processed_id'],
|
|
|
+ "created_at": task['created_at'].isoformat() if task['created_at'] else None,
|
|
|
+ "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
|
|
|
+ })
|
|
|
+
|
|
|
+ return jsonify({"success": True, "tasks": result})
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+def call_doubao_image_api(image_url, prompt):
|
|
|
+ """调用豆包API处理图片,提取文本内容"""
|
|
|
+ api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
|
|
|
+ api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
|
|
|
+
|
|
|
+ ai_payload_url = get_normalized_base64_image(image_url)
|
|
|
+
|
|
|
+ payload = {
|
|
|
+ "model": "doubao-seed-1-8-251228",
|
|
|
+ "stream": False,
|
|
|
+ "input": [
|
|
|
+ {
|
|
|
+ "role": "user",
|
|
|
+ "content": [
|
|
|
+ {"type": "input_image", "image_url": ai_payload_url},
|
|
|
+ {"type": "input_text", "text": prompt}
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }
|
|
|
+
|
|
|
+ headers = {
|
|
|
+ "Authorization": f"Bearer {api_key}",
|
|
|
+ "Content-Type": "application/json"
|
|
|
+ }
|
|
|
+
|
|
|
+ try:
|
|
|
+ response = requests.post(
|
|
|
+ api_url,
|
|
|
+ json=payload,
|
|
|
+ headers=headers,
|
|
|
+ timeout=120,
|
|
|
+ verify=False,
|
|
|
+ proxies={"http": None, "https": None}
|
|
|
+ )
|
|
|
+
|
|
|
+ if response.status_code == 200:
|
|
|
+ return response.json()
|
|
|
+ else:
|
|
|
+ print(f"[Image AI API] Error: {response.status_code} - {response.text}")
|
|
|
+ return None
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Image AI API] Exception: {e}")
|
|
|
+ return None
|
|
|
+
|
|
|
+def extract_pure_text(response):
|
|
|
+ """从API响应中提取纯文本内容,优先返回 message 类型的最终答案"""
|
|
|
+ if not response:
|
|
|
+ return ''
|
|
|
+
|
|
|
+ # 优先从 output 列表中提取 message 类型(最终答案)
|
|
|
+ if 'output' in response:
|
|
|
+ # 第一遍:只找 message 类型
|
|
|
+ for item in response['output']:
|
|
|
+ if item.get('type') == 'message':
|
|
|
+ content = item.get('content')
|
|
|
+ if isinstance(content, str):
|
|
|
+ return content
|
|
|
+ elif isinstance(content, list):
|
|
|
+ text_parts = []
|
|
|
+ for part in content:
|
|
|
+ if isinstance(part, dict) and part.get('type') == 'text':
|
|
|
+ text_parts.append(part.get('text', ''))
|
|
|
+ elif isinstance(part, str):
|
|
|
+ text_parts.append(part)
|
|
|
+ result = ''.join(text_parts)
|
|
|
+ if result:
|
|
|
+ return result
|
|
|
+
|
|
|
+ # 第二遍:没有 message 时才使用 reasoning 内容作为兜底
|
|
|
+ for item in response['output']:
|
|
|
+ if item.get('type') == 'reasoning':
|
|
|
+ content = item.get('content')
|
|
|
+ all_text = ''
|
|
|
+ summary = item.get('summary', [])
|
|
|
+ for part in summary:
|
|
|
+ if isinstance(part, dict):
|
|
|
+ if part.get('type') in ('summary_text', 'text'):
|
|
|
+ all_text += part.get('text', '')
|
|
|
+ elif isinstance(part, str):
|
|
|
+ all_text += part
|
|
|
+ if isinstance(content, str):
|
|
|
+ all_text += content
|
|
|
+ elif isinstance(content, list):
|
|
|
+ for part in content:
|
|
|
+ if isinstance(part, dict) and part.get('type') == 'text':
|
|
|
+ all_text += part.get('text', '')
|
|
|
+ elif isinstance(part, str):
|
|
|
+ all_text += part
|
|
|
+ if all_text:
|
|
|
+ return all_text
|
|
|
+
|
|
|
+ # 第三遍:content 直接是字符串的情况
|
|
|
+ for item in response['output']:
|
|
|
+ content = item.get('content')
|
|
|
+ if isinstance(content, str) and content:
|
|
|
+ return content
|
|
|
+
|
|
|
+ # 尝试从 choices 中提取(兼容 OpenAI 格式)
|
|
|
+ if 'choices' in response and len(response['choices']) > 0:
|
|
|
+ message = response['choices'][0].get('message', {})
|
|
|
+ return message.get('content', '')
|
|
|
+
|
|
|
+ return str(response)
|
|
|
+
|
|
|
+def build_genealogy_prompt(member_name):
|
|
|
+ """
|
|
|
+ 构建用于竖排繁体家谱图片 OCR 提取的 Prompt。
|
|
|
+ 家谱图片为竖排版式(从上到下、从右到左),每位人物记录通常包含:
|
|
|
+ 辈字+名讳、字号、行次、父子关系、配偶(配某氏)、生卒年、葬地、子嗣等。
|
|
|
+ """
|
|
|
+ return f"""这是一张竖排繁体中文家谱图片。图片文字采用竖排格式,从上到下、从右到左逐列阅读。
|
|
|
+
|
|
|
+每位人物的记录通常包含以下内容(不一定全有):
|
|
|
+- 辈字加名讳(如:公諱光元)
|
|
|
+- 字号(如:字維亮)
|
|
|
+- 行次(如:行仁一)
|
|
|
+- 与父亲的关系(如:某某公長子、次子、三子)
|
|
|
+- 配偶(如:配李氏、娶王氏)
|
|
|
+- 生卒年月(如:生於某年某月、卒於某年某月)
|
|
|
+- 葬地(如:葬祖山某向、塟於某地)
|
|
|
+- 子嗣(如:子二:長某某、次某某)
|
|
|
+
|
|
|
+任务:找到人物「{member_name}」在图片中的完整记录,将其繁体原文逐字准确复制输出。
|
|
|
+
|
|
|
+要求:
|
|
|
+1. 只输出「{member_name}」这一个人物的记录,不包含其他人的内容
|
|
|
+2. 保持繁体字原貌,不要转换为简体
|
|
|
+3. 保留原文中的标点符号
|
|
|
+4. 不要添加任何解释、标注、序号或额外说明
|
|
|
+5. 直接输出原文内容"""
|
|
|
+
|
|
|
+
|
|
|
+def _extract_from_thinking_output(text):
|
|
|
+ """
|
|
|
+ 从推理模型的思维链输出中提取最终答案。
|
|
|
+
|
|
|
+ 推理模型(如 doubao-seed 系列)会在 message 内容里写出完整思考过程:
|
|
|
+ 反复写候选答案、说"不对"再修正,最后以"现在确认/所以输出这个内容"等结论收尾。
|
|
|
+ 本函数的策略:
|
|
|
+ 1. 找最后一个"答案引导词 + 冒号"之后的文本(如"准确的原文是:"、"准确复制:")
|
|
|
+ 2. 若无引导词,则取"现在确认"/"所以输出这个内容"之前的最后一段文本
|
|
|
+ 3. 以上均失败则原文返回
|
|
|
+ """
|
|
|
+ # 思维链特征词
|
|
|
+ THINKING_SIGNALS = ['不对,', '现在确认', '准确复制', '准确的原文是', '正确的输出是', '所以输出这个内容']
|
|
|
+ if not any(sig in text for sig in THINKING_SIGNALS):
|
|
|
+ return text # 非思维链输出,原样返回
|
|
|
+
|
|
|
+ print(f"[CleanText] Detected thinking-model output, extracting final answer")
|
|
|
+
|
|
|
+ # ---- 策略1:找最后一个答案引导词 ----
|
|
|
+ ANSWER_INTRO_PATTERNS = [
|
|
|
+ r'准确的原文是[::]\s*',
|
|
|
+ r'正确的输出是[::]\s*',
|
|
|
+ r'现在准确复制[::]\s*',
|
|
|
+ r'准确复制[::]\s*',
|
|
|
+ r'应该是[::]\s*',
|
|
|
+ r'因此输出[::]\s*',
|
|
|
+ r'所以正确.*?是[::]\s*',
|
|
|
+ r'原文是[::]\s*',
|
|
|
+ r'输出[::]\s*',
|
|
|
+ ]
|
|
|
+ last_end = -1
|
|
|
+ for pattern in ANSWER_INTRO_PATTERNS:
|
|
|
+ for m in re.finditer(pattern, text):
|
|
|
+ if m.end() > last_end:
|
|
|
+ last_end = m.end()
|
|
|
+
|
|
|
+ if last_end >= 0:
|
|
|
+ remaining = text[last_end:]
|
|
|
+ # 取到第一个"结束标志"前
|
|
|
+ END_MARKERS = ['不对', '现在确认', '但是', '然而', '\n\n']
|
|
|
+ end_pos = len(remaining)
|
|
|
+ for marker in END_MARKERS:
|
|
|
+ idx = remaining.find(marker)
|
|
|
+ if 0 < idx < end_pos:
|
|
|
+ end_pos = idx
|
|
|
+ candidate = remaining[:end_pos].strip()
|
|
|
+ if len(candidate) >= 5:
|
|
|
+ print(f"[CleanText] Extracted via answer-intro pattern: '{candidate[:80]}'")
|
|
|
+ return candidate
|
|
|
+
|
|
|
+ # ---- 策略2:取"现在确认"之前的最后一段 ----
|
|
|
+ for end_phrase in ['现在确认', '所以输出这个内容', '这就是.*?的完整记录']:
|
|
|
+ m = re.search(end_phrase, text)
|
|
|
+ if m:
|
|
|
+ before = text[:m.start()].rstrip()
|
|
|
+ # 找最后一个换行符,取之后的内容
|
|
|
+ last_nl = before.rfind('\n')
|
|
|
+ candidate = (before[last_nl + 1:] if last_nl >= 0 else before[-400:]).strip()
|
|
|
+ if len(candidate) >= 5:
|
|
|
+ print(f"[CleanText] Extracted before confirmation phrase: '{candidate[:80]}'")
|
|
|
+ return candidate
|
|
|
+
|
|
|
+ return text # 均失败则原样返回
|
|
|
+
|
|
|
+
|
|
|
+def _apply_char_whitelist(text):
|
|
|
+ """只保留汉字(含扩展A区)和常见中文标点"""
|
|
|
+ return re.sub(
|
|
|
+ r'[^\u4e00-\u9fff\u3400-\u4dbf\u3000-\u303f\uff00-\uffef,。;:、()【】「」『』〔〕·~—…《》]',
|
|
|
+ '', text
|
|
|
+ ).strip()
|
|
|
+
|
|
|
+
|
|
|
+def clean_genealogy_text(text):
|
|
|
+ """
|
|
|
+ 清理从 AI 响应中提取的族谱文本。
|
|
|
+ - 处理 Markdown/JSON 格式噪声
|
|
|
+ - 自动识别思维链推理模型输出,提取最终答案段落
|
|
|
+ - 保留中文字符和中文标点,去除英文说明行
|
|
|
+ """
|
|
|
+ if not text:
|
|
|
+ return ''
|
|
|
+
|
|
|
+ text = text.strip()
|
|
|
+
|
|
|
+ # 去除代码块标记
|
|
|
+ text = re.sub(r'^```[a-z]*\n?', '', text)
|
|
|
+ text = re.sub(r'\n?```$', '', text)
|
|
|
+ text = text.strip()
|
|
|
+
|
|
|
+ # 尝试解析 JSON,从已知字段提取
|
|
|
+ try:
|
|
|
+ result = json.loads(text)
|
|
|
+ if isinstance(result, dict):
|
|
|
+ for key in ['text', 'content', 'result', 'traditional', 'genealogy_traditional']:
|
|
|
+ if key in result:
|
|
|
+ text = str(result[key])
|
|
|
+ break
|
|
|
+ except (json.JSONDecodeError, ValueError):
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 针对思维链推理模型输出,提取最终答案(必须在行过滤之前,因为推理文本中含有必要的换行结构)
|
|
|
+ text = _extract_from_thinking_output(text)
|
|
|
+
|
|
|
+ # 按行过滤:去除纯英文/数字行、空行及明显解释性前缀行
|
|
|
+ lines = text.splitlines()
|
|
|
+ kept_lines = []
|
|
|
+ for line in lines:
|
|
|
+ line = line.strip()
|
|
|
+ if not line:
|
|
|
+ continue
|
|
|
+ non_ascii = sum(1 for c in line if ord(c) > 127)
|
|
|
+ if non_ascii == 0:
|
|
|
+ continue
|
|
|
+ if re.match(r'^(注[::]|说明[::]|Note[::]|备注[::])', line):
|
|
|
+ continue
|
|
|
+ kept_lines.append(line)
|
|
|
+
|
|
|
+ text = ''.join(kept_lines)
|
|
|
+
|
|
|
+ # 字符白名单:只保留汉字和中文标点
|
|
|
+ text = _apply_char_whitelist(text)
|
|
|
+
|
|
|
+ return text
|
|
|
+
|
|
|
+def async_process_all_empty_genealogy(task_id, user_id):
|
|
|
+ """
|
|
|
+ 异步批量处理族谱原文为空的成员,支持断点续跑。
|
|
|
+
|
|
|
+ 连接管理原则:DB 连接仅在快速读写期间持有,AI 调用(最长120s)期间
|
|
|
+ 不占用任何 DB 连接,避免影响其他用户的正常操作。
|
|
|
+ """
|
|
|
+ import time
|
|
|
+
|
|
|
+ # ── 1. 读取断点位置,立即释放连接 ──────────────────────────────────────
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute(
|
|
|
+ "SELECT last_processed_id FROM batch_genealogy_task WHERE task_id = %s",
|
|
|
+ (task_id,)
|
|
|
+ )
|
|
|
+ task = cursor.fetchone()
|
|
|
+ last_processed_id = task['last_processed_id'] if task else 0
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ completed_count = 0
|
|
|
+ failed_count = 0
|
|
|
+ results = []
|
|
|
+
|
|
|
+ while True:
|
|
|
+ # ── 2. 取下一条待处理成员(短暂占用连接后立即释放)────────────────
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT m.id, m.name, m.name_word_generation, m.source_record_id,
|
|
|
+ r.oss_url AS image_url, r.ai_content AS record_ai_content
|
|
|
+ FROM family_member_info m
|
|
|
+ LEFT JOIN genealogy_records r ON m.source_record_id = r.id
|
|
|
+ WHERE (m.genealogy_original_traditional IS NULL
|
|
|
+ OR m.genealogy_original_traditional = ''
|
|
|
+ OR m.genealogy_original_traditional = 'None')
|
|
|
+ AND (m.genealogy_original_simplified IS NULL
|
|
|
+ OR m.genealogy_original_simplified = ''
|
|
|
+ OR m.genealogy_original_simplified = 'None')
|
|
|
+ AND m.id > %s
|
|
|
+ ORDER BY m.id ASC
|
|
|
+ LIMIT 1
|
|
|
+ """, (last_processed_id,))
|
|
|
+ member = cursor.fetchone()
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ if not member:
|
|
|
+ break
|
|
|
+
|
|
|
+ member_id = member['id']
|
|
|
+ member_name = member['name']
|
|
|
+ image_url = member['image_url']
|
|
|
+ record_ai_content = member['record_ai_content']
|
|
|
+
|
|
|
+ print(f"[Batch Process] Processing member {member_id}: {member_name}")
|
|
|
+
|
|
|
+ traditional = ""
|
|
|
+ simplified = ""
|
|
|
+ extract_source = "basic_info"
|
|
|
+
|
|
|
+ try:
|
|
|
+ # ── 3. AI 提取(此阶段不持有任何 DB 连接)────────────────────
|
|
|
+ if image_url:
|
|
|
+ print(f"[Batch Process] Extracting from image: {image_url}")
|
|
|
+ prompt = build_genealogy_prompt(member_name)
|
|
|
+ ai_response = call_doubao_image_api(image_url, prompt)
|
|
|
+ print(f"[Batch Process] AI response for {member_id}: {str(ai_response)[:300]}")
|
|
|
+
|
|
|
+ if ai_response:
|
|
|
+ raw_text = extract_pure_text(ai_response)
|
|
|
+ traditional = clean_genealogy_text(raw_text)
|
|
|
+ print(f"[Batch Process] Cleaned traditional: {traditional[:100]}")
|
|
|
+
|
|
|
+ name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
|
|
|
+ name_found = any(c in traditional for c in name_chars)
|
|
|
+
|
|
|
+ if traditional and len(traditional) >= 5 and name_found:
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ extract_source = "image"
|
|
|
+ print(f"[Batch Process] Image extract OK - trad: {traditional[:80]}")
|
|
|
+ else:
|
|
|
+ traditional = ""
|
|
|
+ simplified = ""
|
|
|
+ print(f"[Batch Process] Image extract invalid "
|
|
|
+ f"(name_found={name_found}, len={len(traditional)}), resetting")
|
|
|
+
|
|
|
+ # ── 4. 回退:从 record AI content 拼装(内存操作,无需 DB)──
|
|
|
+ if not (traditional and simplified) and record_ai_content:
|
|
|
+ print(f"[Batch Process] Fallback: trying record AI content")
|
|
|
+ try:
|
|
|
+ ai_content = json.loads(record_ai_content)
|
|
|
+ if isinstance(ai_content, list):
|
|
|
+ current_person = None
|
|
|
+ for person in ai_content:
|
|
|
+ person_name = person.get('original_name', person.get('name', '')).strip()
|
|
|
+ if person_name and (
|
|
|
+ member_name in person_name or person_name in member_name
|
|
|
+ ):
|
|
|
+ current_person = person
|
|
|
+ break
|
|
|
+
|
|
|
+ if current_person:
|
|
|
+ name = current_person.get('original_name',
|
|
|
+ current_person.get('name', member_name))
|
|
|
+ father_name = current_person.get('father_name', '')
|
|
|
+ spouse_name = current_person.get('spouse_name', '')
|
|
|
+ generation = current_person.get('generation',
|
|
|
+ member['name_word_generation'])
|
|
|
+
|
|
|
+ traditional = f"{name},{father_name}之子" if father_name else name
|
|
|
+ if spouse_name:
|
|
|
+ traditional += f",配{spouse_name}"
|
|
|
+ if generation:
|
|
|
+ traditional = f"第{generation}世 " + traditional
|
|
|
+
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ extract_source = "ai_content"
|
|
|
+ print(f"[Batch Process] AI content fallback: {traditional[:80]}")
|
|
|
+ else:
|
|
|
+ print(f"[Batch Process] No matching person for '{member_name}' in AI content")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Batch Process] Failed to parse record AI content: {e}")
|
|
|
+
|
|
|
+ # ── 5. 最终回退:从关系表查父亲和配偶,短暂占用连接后立即释放 ──
|
|
|
+ if not (traditional and simplified):
|
|
|
+ print(f"[Batch Process] Fallback: basic info from DB")
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 1 LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ father = cursor.fetchone()
|
|
|
+
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 2 LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ spouse = cursor.fetchone()
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ father_name = father['name'] if father else ''
|
|
|
+ spouse_name = spouse['name'] if spouse else ''
|
|
|
+ generation = member['name_word_generation']
|
|
|
+
|
|
|
+ traditional = f"{member_name},{father_name}之子" if father_name else member_name
|
|
|
+ if spouse_name:
|
|
|
+ traditional += f",配{spouse_name}"
|
|
|
+ if generation:
|
|
|
+ traditional = f"第{generation}世 " + traditional
|
|
|
+
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ extract_source = "basic_info"
|
|
|
+ print(f"[Batch Process] Basic info fallback: {traditional[:80]}")
|
|
|
+
|
|
|
+ except Exception as extract_err:
|
|
|
+ print(f"[Batch Process] Extraction error for member {member_id}: {extract_err}")
|
|
|
+ traditional = ""
|
|
|
+ simplified = ""
|
|
|
+
|
|
|
+ # ── 6. 保存结果(短暂占用连接后立即释放)────────────────────────
|
|
|
+ last_processed_id = member_id
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ if traditional and simplified:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE family_member_info
|
|
|
+ SET genealogy_original_traditional = %s,
|
|
|
+ genealogy_original_simplified = %s
|
|
|
+ WHERE id = %s
|
|
|
+ """, (traditional, simplified, member_id))
|
|
|
+ completed_count += 1
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member_name,
|
|
|
+ "success": True,
|
|
|
+ "source": extract_source,
|
|
|
+ "traditional_length": len(traditional),
|
|
|
+ "simplified_length": len(simplified),
|
|
|
+ })
|
|
|
+ print(f"[Batch Process] Saved member {member_id} (source={extract_source})")
|
|
|
+ else:
|
|
|
+ failed_count += 1
|
|
|
+ results.append({
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member_name,
|
|
|
+ "success": False,
|
|
|
+ "message": "无法提取或生成族谱原文",
|
|
|
+ })
|
|
|
+ print(f"[Batch Process] Skipped member {member_id}: no valid text extracted")
|
|
|
+
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE batch_genealogy_task
|
|
|
+ SET completed_count = %s,
|
|
|
+ failed_count = %s,
|
|
|
+ last_processed_id = %s,
|
|
|
+ status = 'processing'
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (completed_count, failed_count, last_processed_id, task_id))
|
|
|
+ conn.commit()
|
|
|
+ except Exception as db_err:
|
|
|
+ print(f"[Batch Process] DB save error for member {member_id}: {db_err}")
|
|
|
+ failed_count += 1
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ # 每条处理完后短暂暂停,降低对 AI API 和服务器资源的压力
|
|
|
+ time.sleep(0.5)
|
|
|
+
|
|
|
+ # ── 7. 任务完成,写入最终状态 ─────────────────────────────────────────
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ status = 'completed' if failed_count == 0 else 'completed_with_errors'
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE batch_genealogy_task
|
|
|
+ SET status = %s,
|
|
|
+ completed_count = %s,
|
|
|
+ failed_count = %s,
|
|
|
+ results = %s
|
|
|
+ WHERE task_id = %s
|
|
|
+ """, (status, completed_count, failed_count,
|
|
|
+ json.dumps(results, ensure_ascii=False), task_id))
|
|
|
+ conn.commit()
|
|
|
+ print(f"[Batch Process] Task {task_id} done: "
|
|
|
+ f"{completed_count} success, {failed_count} failed")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Batch Process] Error writing final status for {task_id}: {e}")
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+@app.route('/manager/api/members/extract_genealogy/<int:member_id>', methods=['GET'])
|
|
|
+def extract_single_genealogy(member_id):
|
|
|
+ """单人员提取族谱原文,核心逻辑与批量处理一致,提取后写入数据库"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ # 查询成员信息
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT
|
|
|
+ m.id, m.name, m.name_word_generation,
|
|
|
+ m.source_record_id, r.oss_url as image_url,
|
|
|
+ r.ai_content AS record_ai_content
|
|
|
+ FROM family_member_info m
|
|
|
+ LEFT JOIN genealogy_records r ON m.source_record_id = r.id
|
|
|
+ WHERE m.id = %s
|
|
|
+ """, (member_id,))
|
|
|
+ row = cursor.fetchone()
|
|
|
+
|
|
|
+ if not row:
|
|
|
+ return jsonify({"success": False, "message": "未找到成员"}), 404
|
|
|
+
|
|
|
+ # 处理字典或元组格式的返回
|
|
|
+ if isinstance(row, dict):
|
|
|
+ member = row
|
|
|
+ else:
|
|
|
+ member = {
|
|
|
+ 'id': row[0],
|
|
|
+ 'name': row[1],
|
|
|
+ 'name_word_generation': row[2],
|
|
|
+ 'source_record_id': row[3],
|
|
|
+ 'image_url': row[4],
|
|
|
+ 'record_ai_content': row[5]
|
|
|
+ }
|
|
|
+
|
|
|
+ # 调试:打印查询结果
|
|
|
+ print(f"[Single Extract] Query result - id: {member['id']}, name: '{member['name']}', name_word_generation: '{member['name_word_generation']}', source_record_id: {member['source_record_id']}, image_url: '{member['image_url']}', record_ai_content: '{member['record_ai_content'][:50] if member['record_ai_content'] else None}'")
|
|
|
+
|
|
|
+ traditional = ""
|
|
|
+ simplified = ""
|
|
|
+ source = "basic_info"
|
|
|
+ image_url = member['image_url']
|
|
|
+ record_ai_content = member['record_ai_content']
|
|
|
+
|
|
|
+ print(f"[Single Extract] Processing member {member_id}: {member['name']}")
|
|
|
+
|
|
|
+ # 优先从关联图片中提取族谱原文
|
|
|
+ if image_url:
|
|
|
+ print(f"[Single Extract] Extracting from image: {image_url}")
|
|
|
+
|
|
|
+ member_name = member['name']
|
|
|
+ prompt = build_genealogy_prompt(member_name)
|
|
|
+ ai_response = call_doubao_image_api(image_url, prompt)
|
|
|
+
|
|
|
+ print(f"[Single Extract] AI response: {str(ai_response)[:500]}")
|
|
|
+
|
|
|
+ if ai_response:
|
|
|
+ raw_text = extract_pure_text(ai_response)
|
|
|
+ print(f"[Single Extract] Raw text from response: '{raw_text[:300]}'")
|
|
|
+
|
|
|
+ traditional = clean_genealogy_text(raw_text)
|
|
|
+ print(f"[Single Extract] Cleaned traditional: '{traditional[:200]}', length: {len(traditional)}")
|
|
|
+
|
|
|
+ # 验证提取结果是否包含该人物的姓名(至少包含名字中的一个字)
|
|
|
+ name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
|
|
|
+ name_found = any(c in traditional for c in name_chars)
|
|
|
+
|
|
|
+ if traditional and len(traditional) >= 5 and name_found:
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ source = "image"
|
|
|
+ print(f"[Single Extract] Extracted from image - traditional: {traditional[:100]}, simplified: {simplified[:100]}")
|
|
|
+ else:
|
|
|
+ traditional = ""
|
|
|
+ simplified = ""
|
|
|
+ if not name_found:
|
|
|
+ print(f"[Single Extract] Extracted text does not contain name '{member_name}', resetting")
|
|
|
+ else:
|
|
|
+ print(f"[Single Extract] Image extraction too short ({len(traditional)} chars), resetting")
|
|
|
+ else:
|
|
|
+ print(f"[Single Extract] AI response is None or empty")
|
|
|
+ else:
|
|
|
+ print(f"[Single Extract] No image URL found for member {member_id}")
|
|
|
+
|
|
|
+ # 如果从图片提取失败或没有图片,尝试从已有的AI解析内容中提取
|
|
|
+ if not (traditional and simplified) and record_ai_content:
|
|
|
+ print(f"[Single Extract] Trying to extract from record AI content")
|
|
|
+ try:
|
|
|
+ ai_content = json.loads(record_ai_content)
|
|
|
+ if isinstance(ai_content, list) and len(ai_content) > 0:
|
|
|
+ current_person = None
|
|
|
+ member_name = member['name']
|
|
|
+
|
|
|
+ for person in ai_content:
|
|
|
+ person_name = person.get('original_name', person.get('name', '')).strip()
|
|
|
+ if person_name and (member_name in person_name or person_name in member_name):
|
|
|
+ current_person = person
|
|
|
+ break
|
|
|
+
|
|
|
+ if current_person:
|
|
|
+ name = current_person.get('original_name', current_person.get('name', member['name']))
|
|
|
+ father_name = current_person.get('father_name', '')
|
|
|
+ spouse_name = current_person.get('spouse_name', '')
|
|
|
+ generation = current_person.get('generation', member['name_word_generation'])
|
|
|
+
|
|
|
+ traditional = f"{name},{father_name}之子"
|
|
|
+ if spouse_name:
|
|
|
+ traditional += f",配{spouse_name}"
|
|
|
+ if generation:
|
|
|
+ traditional = f"第{generation}世 " + traditional
|
|
|
+
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ source = "ai_content"
|
|
|
+ print(f"[Single Extract] Generated from AI content: {traditional}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[Single Extract] Failed to parse record AI content: {e}")
|
|
|
+
|
|
|
+ # 如果还是没有内容,使用基本信息生成(标注来源为 basic_info)
|
|
|
+ if not (traditional and simplified):
|
|
|
+ print(f"[Single Extract] Generating from basic info")
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 1
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ father_row = cursor.fetchone()
|
|
|
+
|
|
|
+ father_name = father_row[0] if father_row else ''
|
|
|
+
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT p.name, p.simplified_name
|
|
|
+ FROM family_relation_info r
|
|
|
+ JOIN family_member_info p ON r.parent_mid = p.id
|
|
|
+ WHERE r.child_mid = %s AND r.relation_type = 2
|
|
|
+ LIMIT 1
|
|
|
+ """, (member_id,))
|
|
|
+ spouse_row = cursor.fetchone()
|
|
|
+
|
|
|
+ spouse_name = spouse_row[0] if spouse_row else ''
|
|
|
+
|
|
|
+ generation = member['name_word_generation']
|
|
|
+ name = member['name']
|
|
|
+
|
|
|
+ traditional = f"{name},{father_name}之子" if father_name else name
|
|
|
+ if spouse_name:
|
|
|
+ traditional += f",配{spouse_name}"
|
|
|
+ if generation:
|
|
|
+ traditional = f"第{generation}世 " + traditional
|
|
|
+
|
|
|
+ simplified = convert_to_simplified(traditional)
|
|
|
+ source = "basic_info"
|
|
|
+ print(f"[Single Extract] Generated from basic info: {traditional}")
|
|
|
+
|
|
|
+ # 调试:打印最终结果
|
|
|
+ print(f"[Single Extract] Final result - traditional: '{traditional}', simplified: '{simplified}'")
|
|
|
+
|
|
|
+ # 写入数据库
|
|
|
+ if traditional and simplified:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ UPDATE family_member_info
|
|
|
+ SET genealogy_original_traditional = %s,
|
|
|
+ genealogy_original_simplified = %s
|
|
|
+ WHERE id = %s
|
|
|
+ """, (traditional, simplified, member_id))
|
|
|
+ conn.commit()
|
|
|
+ print(f"[Single Extract] Successfully saved to database")
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "member_id": member_id,
|
|
|
+ "name": member['name'],
|
|
|
+ "genealogy_traditional": traditional,
|
|
|
+ "genealogy_simplified": simplified,
|
|
|
+ "source": source
|
|
|
+ })
|
|
|
+ else:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "member_id": member_id,
|
|
|
+ "message": "无法提取或生成族谱原文"
|
|
|
+ })
|
|
|
+ except Exception as e:
|
|
|
+ import traceback
|
|
|
+ print(f"[Single Extract] Error: {e}")
|
|
|
+ print(f"[Single Extract] Traceback: {traceback.format_exc()}")
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "member_id": member_id,
|
|
|
+ "message": str(e),
|
|
|
+ "error_type": type(e).__name__
|
|
|
+ })
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+@app.route('/manager/api/members/batch_process_all_empty', methods=['GET'])
|
|
|
+def batch_process_all_empty():
|
|
|
+ """简便批量处理接口:自动处理所有族谱原文为空的成员,支持断点续跑"""
|
|
|
+ if 'user_id' not in session:
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
+
|
|
|
+ conn = get_db_connection()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT COUNT(*) as count
|
|
|
+ FROM family_member_info
|
|
|
+ WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
|
|
|
+ AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
|
|
|
+ """)
|
|
|
+ result = cursor.fetchone()
|
|
|
+ total_empty = result['count'] if result else 0
|
|
|
+
|
|
|
+ cursor.execute("""
|
|
|
+ SELECT task_id, status, last_processed_id, total_count, completed_count, failed_count
|
|
|
+ FROM batch_genealogy_task
|
|
|
+ WHERE user_id = %s AND status IN ('pending', 'processing')
|
|
|
+ ORDER BY created_at DESC
|
|
|
+ LIMIT 1
|
|
|
+ """, (session['user_id'],))
|
|
|
+ running_task = cursor.fetchone()
|
|
|
+
|
|
|
+ if running_task:
|
|
|
+ return jsonify({
|
|
|
+ "success": False,
|
|
|
+ "message": "存在正在进行的任务",
|
|
|
+ "task_id": running_task['task_id'],
|
|
|
+ "status": running_task['status'],
|
|
|
+ "last_processed_id": running_task['last_processed_id'],
|
|
|
+ "completed_count": running_task['completed_count'],
|
|
|
+ "total_count": running_task['total_count']
|
|
|
+ })
|
|
|
+
|
|
|
+ task_id = str(uuid.uuid4())
|
|
|
+
|
|
|
+ with conn.cursor() as cursor:
|
|
|
+ cursor.execute("""
|
|
|
+ INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count, last_processed_id)
|
|
|
+ VALUES (%s, %s, 'processing', %s, 0)
|
|
|
+ """, (task_id, session['user_id'], total_empty))
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ threading.Thread(
|
|
|
+ target=async_process_all_empty_genealogy,
|
|
|
+ args=(task_id, session['user_id']),
|
|
|
+ daemon=True
|
|
|
+ ).start()
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "success": True,
|
|
|
+ "task_id": task_id,
|
|
|
+ "message": f"任务已创建,将处理 {total_empty} 个族谱原文为空的成员",
|
|
|
+ "total_count": total_empty
|
|
|
+ })
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
|
app.run(debug=False, port=5001)
|