linh
/
genealogy-app


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174
							import os
import pymysql
import requests
import json
import re
import threading
import urllib3
import fitz  # PyMuPDF
from flask import Flask, render_template, request, redirect, url_for, session, flash, jsonify, Response, stream_with_context
from werkzeug.utils import secure_filename
from oss_utils import upload_to_oss
from ocr_utils import extract_page_number
import time
from datetime import datetime

# Suppress InsecureRequestWarning
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

app = Flask(__name__, static_folder='static', static_url_path='/manager/static')
app.secret_key = 'genealogy_secret_key'
app.config['UPLOAD_FOLDER'] = 'uploads'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# 数据库配置
DB_CONFIG = {
    "host": "rm-f8ze60yirdj8786u2wo.mysql.rds.aliyuncs.com",
    "port": 3306,
    "user": "root",
    "password": "csqz@20255",
    "db": "csqz-client",
    "charset": "utf8mb4",
    "cursorclass": pymysql.cursors.DictCursor
}

from PIL import Image

def compress_image_if_needed(file_path, max_dim=2000):
    """Compress, resize and normalize image to JPEG for AI processing."""
    try:
        # We always want to normalize to JPEG so AI doesn't complain about format
        with Image.open(file_path) as img:
            # Convert RGBA/P or any other mode to RGB for JPEG saving
            if img.mode != 'RGB':
                img = img.convert('RGB')
                
            width, height = img.size
            if max(width, height) > max_dim:
                ratio = max_dim / max(width, height)
                new_size = (int(width * ratio), int(height * ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
                
            # Always save as JPEG to normalize the format
            new_path = os.path.splitext(file_path)[0] + '_normalized.jpg'
            img.save(new_path, 'JPEG', quality=85)
            return new_path
    except Exception as e:
        print(f"Warning: Image compression/normalization failed for {file_path}: {e}")
        return file_path

def get_db_connection():
    return pymysql.connect(**DB_CONFIG)

def format_timestamp(ts):
    if not ts: return '未知'
    try:
        # 兼容秒和毫秒
        if ts > 10000000000: # 超过2286年的秒数，通常认为是毫秒
            ts = ts / 1000
        return time.strftime('%Y-%m-%d', time.localtime(ts))
    except:
        return '未知'

def manual_simplify(text):
    """
    Simple fallback for common Traditional to Simplified conversion 
    if AI fails to convert specific characters.
    """
    if not text: return text
    mapping = {
        '學': '学', '國': '国', '萬': '万', '寶': '宝', '興': '兴', 
        '華': '华', '會': '会', '葉': '叶', '藝': '艺', '號': '号',
        '處': '处', '見': '见', '視': '视', '言': '言', '語': '语',
        '貝': '贝', '車': '车', '長': '长', '門': '门', '韋': '韦',
        '頁': '页', '風': '风', '飛': '飞', '食': '食', '馬': '马',
        '魚': '鱼', '鳥': '鸟', '麥': '麦', '黃': '黄', '齊': '齐',
        '齒': '齿', '龍': '龙', '龜': '龟', '壽': '寿', '榮': '荣',
        '愛': '爱', '慶': '庆', '衛': '卫', '賢': '贤', '義': '义',
        '禮': '礼', '樂': '乐', '靈': '灵', '滅': '灭', '氣': '气',
        '智': '智', '信': '信', '仁': '仁', '勇': '勇', '嚴': '严',
        '銳': '锐', '優': '优', '楊': '杨', '吳': '吴', '銀': '银'
    }
    
    result = ""
    for char in text:
        result += mapping.get(char, char)
    return result

def _build_reverse_simplify_map():
    """
    Build a reverse map from simplified char -> list of traditional chars
    based on the fallback manual_simplify mapping.
    """
    mapping = {
        '學': '学', '國': '国', '萬': '万', '寶': '宝', '興': '兴',
        '華': '华', '會': '会', '葉': '叶', '藝': '艺', '號': '号',
        '處': '处', '見': '见', '視': '视', '言': '言', '語': '语',
        '貝': '贝', '車': '车', '長': '长', '門': '门', '韋': '韦',
        '頁': '页', '風': '风', '飛': '飞', '食': '食', '馬': '马',
        '魚': '鱼', '鳥': '鸟', '麥': '麦', '黃': '黄', '齊': '齐',
        '齒': '齿', '龍': '龙', '龜': '龟', '壽': '寿', '榮': '荣',
        '愛': '爱', '慶': '庆', '衛': '卫', '賢': '贤', '義': '义',
        '禮': '礼', '樂': '乐', '靈': '灵', '滅': '灭', '氣': '气',
        '智': '智', '信': '信', '仁': '仁', '勇': '勇', '嚴': '严',
        '銳': '锐', '優': '优', '楊': '杨', '吳': '吴', '銀': '银'
    }
    rev = {}
    for trad, simp in mapping.items():
        rev.setdefault(simp, [])
        if trad not in rev[simp]:
            rev[simp].append(trad)
    return rev

_REVERSE_SIMPLIFY_MAP = _build_reverse_simplify_map()

def expand_name_search_variants(keyword, max_variants=60):
    """
    Expand keyword into a small set of variants so Simplified/Traditional
    searches can match both `name` and `simplified_name`.

    - Always includes original keyword
    - Includes fallback-trad->simp conversion
    - Includes best-effort simp->trad expansions based on reverse map
    """
    if not keyword:
        return []
    kw = str(keyword).strip()
    if not kw:
        return []

    variants = set([kw])
    variants.add(manual_simplify(kw))

    # Build possible traditional variants when the input is simplified.
    # For each char, if we have traditional candidates, branch; otherwise keep itself.
    choices = []
    for ch in kw:
        cand = _REVERSE_SIMPLIFY_MAP.get(ch)
        if cand:
            # include itself too (covers already-traditional or neutral chars)
            choices.append([ch] + cand)
        else:
            choices.append([ch])

    # Cartesian product with early stop.
    results = ['']
    for opts in choices:
        new_results = []
        for prefix in results:
            for opt in opts:
                new_results.append(prefix + opt)
                if len(new_results) >= max_variants:
                    break
            if len(new_results) >= max_variants:
                break
        results = new_results
        if len(results) >= max_variants:
            break

    for r in results:
        if r:
            variants.add(r)
            variants.add(manual_simplify(r))

    # Keep deterministic order for stable SQL params
    ordered = []
    for v in variants:
        v2 = (v or '').strip()
        if v2 and v2 not in ordered:
            ordered.append(v2)
        if len(ordered) >= max_variants:
            break
    return ordered

def clean_name(name):
    """
    Clean name according to Liu family genealogy rules:
    1. If name is '学公' or '留学公', keep 'Gong' (exception).
    2. Otherwise, if name ends with '公', remove '公'.
    3. If name does not start with '留', prepend '留'.
    """
    if not name: return name
    name = name.strip()
    
    # Pre-process: Ensure Simplified Chinese for specific chars
    name = manual_simplify(name)
    
    # 1. Check exceptions (names that SHOULD keep 'Gong')
    exceptions = ['学公', '留学公']
    
    if name in exceptions:
        if not name.startswith('留'):
            name = '留' + name
        return name
        
    # 2. General Rule: Remove 'Gong' suffix
    if name.endswith('公'):
        name = name[:-1]
        
    # 3. Ensure 'Liu' surname
    if not name.startswith('留'):
        name = '留' + name
        
    return name

def is_female_value(sex_value):
    """Return True when sex value represents female."""
    if sex_value is None:
        return False
    s = str(sex_value).strip().lower()
    return s in ('女', '2', 'female', 'f')

def normalize_lookup_name(name):
    """Normalize names for loose matching in AI parsed content."""
    if not name:
        return ''
    return manual_simplify(str(name)).strip()

def should_skip_liu_prefix_for_person(person, spouse_name_set):
    """
    Female spouse records should not auto-prepend '留' in simplified_name.
    We treat a person as female spouse if:
    1) sex is female, and
    2) has spouse_name field OR appears in another person's spouse_name list.
    """
    if not isinstance(person, dict):
        return False
    if not is_female_value(person.get('sex')):
        return False

    own_names = set()
    own_names.add(normalize_lookup_name(person.get('name')))
    own_names.add(normalize_lookup_name(person.get('original_name')))
    own_names.discard('')

    has_spouse_name = bool(normalize_lookup_name(person.get('spouse_name')))
    referenced_by_other = any(n in spouse_name_set for n in own_names)
    return has_spouse_name or referenced_by_other

def get_normalized_base64_image(image_url):
    """Download image, normalize to JPEG, and return base64 data URI for AI payload."""
    import io
    import base64
    import requests
    from PIL import Image
    
    try:
        response = requests.get(image_url, timeout=30)
        response.raise_for_status()
        
        with Image.open(io.BytesIO(response.content)) as img:
            # Convert to RGB to ensure JPEG compatibility
            if img.mode != 'RGB':
                img = img.convert('RGB')
                
            # Resize if too large
            max_dim = 2000
            if max(img.width, img.height) > max_dim:
                ratio = max_dim / max(img.width, img.height)
                new_size = (int(img.width * ratio), int(img.height * ratio))
                img = img.resize(new_size, Image.Resampling.LANCZOS)
                
            # Save as JPEG in memory
            buffer = io.BytesIO()
            img.save(buffer, format='JPEG', quality=85)
            
            b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
            return f"data:image/jpeg;base64,{b64_str}"
    except Exception as e:
        print(f"Error normalizing image from {image_url}: {e}")
        return image_url # Fallback to original URL if processing fails

def process_ai_task(record_id, image_url):
    """Background task to process image with AI and store result."""
    print(f"[AI Task] Starting task for record {record_id}...")
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("UPDATE genealogy_records SET ai_status = 1 WHERE id = %s", (record_id,))
        conn.commit()
        print(f"[AI Task] Status updated to 'Processing' for record {record_id}")

        api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
        api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
        
        prompt = """
        请分析这张家谱图片，提取其中关于人物的信息。
        请务必将繁体字转换为简体字（original_name 字段除外）。
        特别注意：'name' 字段必须是纯简体中文，不能包含繁体字（例如：'學'应转换为'学'，'劉'应转换为'刘'，'萬'应转换为'万'）。
        请提取以下字段（如果存在）：
        - original_name: 原始姓名（严格保持图片上的繁体字，不做任何修改或转换）
        - name: 简体姓名（必须转换为简体中文，去除不需要的敬称）
        - sex: 性别（男/女）
        - birthday: 出生日期（尝试转换为YYYY-MM-DD格式，如果无法确定年份可只填月日）
        - death_date: 逝世日期（如文本中出现“殁”、“葬”、“卒”等字眼及其对应的时间，请提取）
        - father_name: 父亲姓名
        - spouse_name: 配偶姓名
        - generation: 第几世/代数
        - name_word: 字辈（例如名字为“学勤公”，“学”为字辈；提取名字中的字辈信息）
        - education: 学历/功名
        - title: 官职/称号
        
        请严格以JSON列表格式返回，不要包含Markdown代码块标记（如 ```json ... ```），直接返回JSON数组。
        如果包含多个人物，请都提取出来。
        Do not output any reasoning or explanation, just the JSON.
        """

        ai_payload_url = get_normalized_base64_image(image_url)
        
        payload = {
            "model": "doubao-seed-1-8-251228",
            "stream": True,  # Streaming for robust handling
            "input": [
                {
                    "role": "user",
                    "content": [
                        {"type": "input_image", "image_url": ai_payload_url},
                        {"type": "input_text", "text": prompt}
                    ]
                }
            ]
        }
        
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        
        max_retries = 3
        last_exception = None
        
        for attempt in range(max_retries):
            try:
                print(f"[AI Task] Attempt {attempt+1}/{max_retries} connecting to API for record {record_id}...")
                response = requests.post(
                    api_url, 
                    json=payload, 
                    headers=headers, 
                    timeout=1200, 
                    stream=True,
                    verify=False,
                    proxies={"http": None, "https": None}
                )
                
                if response.status_code == 200:
                    print(f"[AI Task] Connection established for record {record_id}, receiving stream...")
                    full_content = ""
                    
                    for line in response.iter_lines():
                        if not line: continue
                        line_str = line.decode('utf-8')
                        
                        # Debug: Print full line to understand event flow
                        print(f"[AI Task Debug] Raw Line: {line_str[:500]}") # Truncate very long lines

                        if line_str.startswith('data: '):
                            json_str = line_str[6:]
                            if json_str.strip() == '[DONE]':
                                print("[AI Task Debug] Received [DONE]")
                                break
                            try:
                                chunk = json.loads(json_str)
                                chunk_type = chunk.get('type')
                                
                                # Standard OpenAI format (choices)
                                if 'choices' in chunk and len(chunk['choices']) > 0:
                                    delta = chunk['choices'][0].get('delta', {})
                                    if 'content' in delta:
                                        full_content += delta['content']
                                
                                # Doubao/Volcengine specific formats (delta)
                                elif chunk_type == 'response.text.delta':
                                    full_content += chunk.get('delta', '')
                                
                                # Check response.completed if empty
                                elif chunk_type == 'response.completed' and not full_content:
                                    output = chunk.get('response', {}).get('output', [])
                                    for item in output:
                                        # Also extract from reasoning if it contains JSON-like text
                                        if item.get('type') == 'reasoning':
                                            summary = item.get('summary', [])
                                            for sum_item in summary:
                                                if sum_item.get('type') == 'summary_text':
                                                    full_content += sum_item.get('text', '')
                                        
                                        elif item.get('type') == 'message':
                                            content = item.get('content')
                                            if isinstance(content, str):
                                                full_content += content
                                            elif isinstance(content, list):
                                                for part in content:
                                                    if isinstance(part, dict) and part.get('type') == 'text':
                                                        full_content += part.get('text', '')
                                
                                # Fallback: output_item.added
                                elif chunk_type == 'response.output_item.added':
                                    item = chunk.get('item', {})
                                    if item.get('role') == 'assistant':
                                        content_field = item.get('content', [])
                                        if isinstance(content_field, str):
                                            full_content += content_field
                                        elif isinstance(content_field, list):
                                            for part in content_field:
                                                if isinstance(part, dict) and part.get('type') == 'text':
                                                    full_content += part.get('text', '')

                            except Exception as e:
                                print(f"[AI Task] Chunk parse error: {e}")
                        else:
                             # Fallback for non-SSE
                             try:
                                chunk = json.loads(line_str)
                                if 'choices' in chunk and len(chunk['choices']) > 0:
                                    content = chunk['choices'][0]['message']['content']
                                    full_content += content
                             except:
                                pass
                    
                    print(f"[AI Task] Stream finished. Content length: {len(full_content)}")
                    if len(full_content) == 0:
                         print(f"[AI Task] WARNING: No content received from AI stream.")
                         # Continue to JSON parse to fail gracefully
                    
                    # Clean JSON
                    try:
                        # 1. Try finding [...] array
                        start = full_content.find('[')
                        end = full_content.rfind(']')
                        
                        # 2. If not found, try finding {...} object and wrap it
                        is_single_object = False
                        if start == -1 or end == -1 or end <= start:
                            start = full_content.find('{')
                            end = full_content.rfind('}')
                            is_single_object = True

                        if start != -1 and end != -1 and end > start:
                            content_clean = full_content[start:end+1]
                        else:
                            # Fallback to regex or raw
                            content_clean = re.sub(r'^```json\s*', '', full_content)
                            content_clean = re.sub(r'```$', '', content_clean)

                        parsed = json.loads(content_clean)
                        
                        # Normalize single object to list
                        if is_single_object and isinstance(parsed, dict):
                            parsed = [parsed]
                            content_clean = json.dumps(parsed, ensure_ascii=False)
                        elif isinstance(parsed, dict) and not isinstance(parsed, list):
                             # Just in case json.loads parsed a dict even if we looked for []
                             parsed = [parsed]
                             content_clean = json.dumps(parsed, ensure_ascii=False)

                        # Build spouse name lookup for "female spouse" detection
                        spouse_name_set = set()
                        if isinstance(parsed, list):
                            for person in parsed:
                                n = normalize_lookup_name(person.get('spouse_name'))
                                if n:
                                    spouse_name_set.add(n)

                        # Clean names in parsed content
                        if isinstance(parsed, list):
                            for person in parsed:
                                # Process Name: 'name' is Simplified from AI, 'original_name' is Traditional/Raw from AI
                                simplified_name = person.get('name', '') or person.get('original_name', '')
                                original_name = person.get('original_name', '')
                                
                                # Female spouse: only simplify Chinese, do NOT prepend '留'
                                if should_skip_liu_prefix_for_person(person, spouse_name_set):
                                    cleaned_simplified = manual_simplify(simplified_name)
                                else:
                                    # Same-clan default: prepend '留' and handle trailing '公'
                                    cleaned_simplified = clean_name(simplified_name)
                                person['simplified_name'] = cleaned_simplified
                                
                                # Store raw name in 'name' field (as requested)
                                if original_name:
                                    person['name'] = original_name
                                else:
                                    # Fallback: if no original_name returned, use the uncleaned name as 'name'
                                    # or keep existing logic. But user wants raw in 'name'.
                                    # If AI didn't return original_name, 'name' is likely simplified.
                                    pass # Keep 'name' as is (which is Simplified) if original_name missing

                                # Father name：同族，需要按“留”姓规则清洗
                                if 'father_name' in person and person['father_name']:
                                    person['father_name'] = clean_name(person['father_name'])

                                # Spouse name：只做繁转简，不拼接“留”姓，也不去“公”
                                if 'spouse_name' in person and person['spouse_name']:
                                    person['spouse_name'] = manual_simplify(person['spouse_name'])
                        
                        # Re-serialize
                        content_clean = json.dumps(parsed, ensure_ascii=False)

                        with conn.cursor() as cursor:
                            cursor.execute("UPDATE genealogy_records SET ai_status = 2, ai_content = %s WHERE id = %s", (content_clean, record_id))
                        conn.commit()
                        print(f"[AI Task] SUCCESS: Record {record_id} processed and saved.")
                        return # Success
                    except json.JSONDecodeError as err:
                        raise Exception(f"JSON Parse Error: {str(err)}. Raw: {full_content}")
                else:
                    raise Exception(f"API Error {response.status_code}: {response.text}")

            except Exception as e:
                print(f"[AI Task] Attempt {attempt+1} failed for record {record_id}: {e}")
                last_exception = e
                if attempt < max_retries - 1:
                    wait_time = 2 * (attempt + 1)
                    print(f"[AI Task] Waiting {wait_time}s before retry...")
                    time.sleep(wait_time)
        
        raise last_exception or Exception("Unknown error")
            
    except Exception as e:
        print(f"[AI Task] FINAL FAILURE for record {record_id}: {e}")
        try:
            with conn.cursor() as cursor:
                cursor.execute("UPDATE genealogy_records SET ai_status = 3, ai_content = %s WHERE id = %s", (f"Max Retries Exceeded. Error: {str(e)}", record_id))
            conn.commit()
        except:
            pass
    finally:
        conn.close()
        print(f"[AI Task] Task finished for record {record_id}")

def ensure_pdf_table():
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("""
                CREATE TABLE IF NOT EXISTS genealogy_pdfs (
                    id INT AUTO_INCREMENT PRIMARY KEY,
                    file_name VARCHAR(255) NOT NULL,
                    oss_url TEXT NOT NULL,
                    description VARCHAR(500) DEFAULT '',
                    upload_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    uploader VARCHAR(100) DEFAULT '',
                    version_name VARCHAR(255) DEFAULT '',
                    version_source VARCHAR(255) DEFAULT '',
                    file_provider VARCHAR(100) DEFAULT '',
                    parse_status INT DEFAULT 0
                ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """)
            # 检查是否存在parse_status字段，如果不存在则添加
            cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'parse_status'")
            if not cursor.fetchone():
                cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN parse_status INT DEFAULT 0")
            # 检查是否存在version_name字段，如果不存在则添加
            cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_name'")
            if not cursor.fetchone():
                cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_name VARCHAR(255) DEFAULT ''")
            # 检查是否存在version_source字段，如果不存在则添加
            cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_source'")
            if not cursor.fetchone():
                cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_source VARCHAR(255) DEFAULT ''")
            # 检查是否存在file_provider字段，如果不存在则添加
            cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'file_provider'")
            if not cursor.fetchone():
                cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN file_provider VARCHAR(100) DEFAULT ''")
        conn.commit()
    finally:
        conn.close()

@app.route('/manager/pdf_management')
def pdf_management():
    if 'user_id' not in session:
        return redirect(url_for('login'))

    ensure_pdf_table()
    view_id = request.args.get('view', type=int)
    preview = request.args.get('preview', type=bool, default=False)
    selected_pdf = None

    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT * FROM genealogy_pdfs ORDER BY upload_time DESC")
            pdfs = cursor.fetchall()
            if view_id and preview:
                cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (view_id,))
                selected_pdf = cursor.fetchone()
    finally:
        conn.close()

    return render_template('pdf_management.html', pdfs=pdfs, selected_pdf=selected_pdf)


@app.route('/manager/parse_pdf/<int:pdf_id>', methods=['POST'])
def parse_pdf(pdf_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401

    # 标记PDF为解析中
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("UPDATE genealogy_pdfs SET parse_status = 1 WHERE id = %s", (pdf_id,))
        conn.commit()
    finally:
        conn.close()

    # 异步执行PDF解析
    def parse_pdf_async():
        try:
            # 获取PDF信息
            conn = get_db_connection()
            pdf_info = None
            try:
                with conn.cursor() as cursor:
                    cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (pdf_id,))
                    pdf_info = cursor.fetchone()
            finally:
                conn.close()

            if not pdf_info:
                return

            # 下载PDF并拆分
            pdf_url = pdf_info['oss_url']
            response = requests.get(pdf_url)
            response.raise_for_status()

            # 保存临时PDF文件
            temp_pdf_path = f"/tmp/{pdf_info['file_name']}"
            with open(temp_pdf_path, 'wb') as f:
                f.write(response.content)

            # 使用PyMuPDF拆分PDF
            doc = fitz.open(temp_pdf_path)
            page_count = doc.page_count

            # 获取当前最大页码
            conn = get_db_connection()
            max_page = 0
            try:
                with conn.cursor() as cursor:
                    cursor.execute("SELECT MAX(page_number) as max_page FROM genealogy_records")
                    result = cursor.fetchone()
                    if result and result['max_page']:
                        max_page = result['max_page']
            finally:
                conn.close()

            # 逐页处理
            for i in range(page_count):
                page = doc[i]
                pix = page.get_pixmap()
                image_path = f"/tmp/{pdf_info['file_name']}_page_{i+1}.png"
                pix.save(image_path)

                # 上传图片到OSS
                with open(image_path, 'rb') as f:
                    image_oss_url = upload_to_oss(f, f"{pdf_info['file_name']}_page_{i+1}.png")

                # 保存到genealogy_records表
                conn = get_db_connection()
                try:
                    with conn.cursor() as cursor:
                        cursor.execute("""
                            INSERT INTO genealogy_records 
                            (file_name, oss_url, file_type, page_number, genealogy_version, genealogy_source, upload_person, upload_time)
                            VALUES (%s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP)
                        """, (
                            f"{pdf_info['file_name']}_page_{i+1}.png",
                            image_oss_url,
                            '图片',
                            max_page + i + 1,
                            pdf_info['version_name'],
                            pdf_info['version_source'],
                            pdf_info['file_provider']
                        ))
                    conn.commit()
                finally:
                    conn.close()

                # 删除临时图片文件
                if os.path.exists(image_path):
                    os.remove(image_path)

            # 删除临时PDF文件
            if os.path.exists(temp_pdf_path):
                os.remove(temp_pdf_path)

            # 更新PDF解析状态为成功
            conn = get_db_connection()
            try:
                with conn.cursor() as cursor:
                    cursor.execute("UPDATE genealogy_pdfs SET parse_status = 2 WHERE id = %s", (pdf_id,))
                conn.commit()
            finally:
                conn.close()

        except Exception as e:
            # 更新PDF解析状态为失败
            conn = get_db_connection()
            try:
                with conn.cursor() as cursor:
                    cursor.execute("UPDATE genealogy_pdfs SET parse_status = 3 WHERE id = %s", (pdf_id,))
                conn.commit()
            finally:
                conn.close()
            print(f"PDF解析失败: {e}")

    # 启动异步任务
    thread = threading.Thread(target=parse_pdf_async)
    thread.daemon = True
    thread.start()

    return jsonify({"success": True, "message": "PDF解析已开始，将在后台执行"})

@app.route('/manager/delete_pdf/<int:pdf_id>', methods=['POST'])
def delete_pdf(pdf_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401

    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("DELETE FROM genealogy_pdfs WHERE id = %s", (pdf_id,))
        conn.commit()
        flash('PDF文件记录已删除')
    except Exception as e:
        flash(f'删除失败: {e}')
    finally:
        conn.close()

    return redirect(url_for('pdf_management'))

@app.route('/manager/')
def index():
    if 'user_id' not in session:
        return redirect(url_for('login'))
        
    page = request.args.get('page', 1, type=int)
    version = request.args.get('version', '').strip()
    source = request.args.get('source', '').strip()
    person = request.args.get('person', '').strip()
    file_type = request.args.get('file_type', '').strip()
    per_page = 10
    offset = (page - 1) * per_page
    
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            query_conditions = []
            params = []
            if version:
                query_conditions.append("genealogy_version LIKE %s")
                params.append(f"%{version}%")
            if source:
                query_conditions.append("genealogy_source LIKE %s")
                params.append(f"%{source}%")
            if person:
                query_conditions.append("upload_person LIKE %s")
                params.append(f"%{person}%")
            if file_type:
                query_conditions.append("file_type = %s")
                params.append(file_type)
                
            where_clause = ""
            if query_conditions:
                where_clause = "WHERE " + " AND ".join(query_conditions)
                
            count_sql = f"SELECT COUNT(*) as count FROM genealogy_records {where_clause}"
            cursor.execute(count_sql, params)
            total = cursor.fetchone()['count']
            
            sql = f"SELECT * FROM genealogy_records {where_clause} ORDER BY page_number ASC LIMIT %s OFFSET %s"
            cursor.execute(sql, params + [per_page, offset])
            records = cursor.fetchall()
            
            total_pages = (total + per_page - 1) // per_page
            
    finally:
        conn.close()
    
    return render_template('index.html', records=records, page=page, total_pages=total_pages, version=version, source=source, person=person, file_type=file_type, total=total)

@app.route('/manager/members')
def members():
    if 'user_id' not in session:
        return redirect(url_for('login'))
    
    search_name = request.args.get('name', '').strip()
    page = request.args.get('page', 1, type=int)
    per_page = 10
    offset = (page - 1) * per_page
    
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # 1. Get total count
            if search_name:
                variants = expand_name_search_variants(search_name)
                where_parts = []
                params = []
                for v in variants:
                    where_parts.append("(name LIKE %s OR simplified_name LIKE %s)")
                    like = f"%{v}%"
                    params.extend([like, like])
                where_clause = " OR ".join(where_parts) if where_parts else "name LIKE %s"
                if not where_parts:
                    params = [f"%{search_name}%"]
                cursor.execute(f"SELECT COUNT(*) as count FROM family_member_info WHERE {where_clause}", tuple(params))
            else:
                cursor.execute("SELECT COUNT(*) as count FROM family_member_info")
            
            result = cursor.fetchone()
            total = result['count'] if result else 0
            total_pages = (total + per_page - 1) // per_page
            
            # 2. Get paginated results, ordered by modified_time DESC (or create_time if modified is null/same)
            # Using COALESCE to ensure sort works even if modified_time is NULL
            order_clause = "ORDER BY COALESCE(modified_time, create_time) DESC"
            
            if search_name:
                variants = expand_name_search_variants(search_name)
                where_parts = []
                params = []
                for v in variants:
                    where_parts.append("(name LIKE %s OR simplified_name LIKE %s)")
                    like = f"%{v}%"
                    params.extend([like, like])
                where_clause = " OR ".join(where_parts) if where_parts else "(name LIKE %s OR simplified_name LIKE %s)"
                if not where_parts:
                    like = f"%{search_name}%"
                    params = [like, like]
                sql = f"SELECT * FROM family_member_info WHERE {where_clause} {order_clause} LIMIT %s OFFSET %s"
                cursor.execute(sql, tuple(params + [per_page, offset]))
            else:
                sql = f"SELECT * FROM family_member_info {order_clause} LIMIT %s OFFSET %s"
                cursor.execute(sql, (per_page, offset))
            
            members = cursor.fetchall()
            
            # 格式化日期
            for m in members:
                m['birthday_str'] = format_timestamp(m.get('birthday'))
                # 格式化创建时间 (针对 TIMESTAMP 字段)
                if m.get('create_time'):
                    m['create_time_str'] = m['create_time'].strftime('%Y-%m-%d')
                if m.get('modified_time'):
                    m['modified_time_str'] = m['modified_time'].strftime('%Y-%m-%d %H:%M')
                    
    finally:
        conn.close()
    
    return render_template('members.html', members=members, search_name=search_name, page=page, total_pages=total_pages, total=total)

@app.route('/manager/tree')
def tree():
    if 'user_id' not in session:
        return redirect(url_for('login'))
    return render_template('tree.html')

@app.route('/manager/tree_classic')
def tree_classic():
    if 'user_id' not in session:
        return redirect(url_for('login'))
    return render_template('tree_classic.html')

@app.route('/manager/api/tree_data')
def tree_data():
    if 'user_id' not in session:
        return jsonify({"error": "Unauthorized"}), 401
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # 获取所有成员
            cursor.execute("SELECT id, name, simplified_name, sex, family_rank, name_word_generation FROM family_member_info")
            members = cursor.fetchall()
            # 获取所有关系 (1:父子 2:母子 10:夫妻 11:兄弟 12:姐妹)
            cursor.execute("SELECT parent_mid, child_mid, relation_type FROM family_relation_info")
            relations = cursor.fetchall()
            
            return jsonify({"members": members, "relations": relations})
    finally:
        conn.close()

@app.route('/manager/api/save_relation', methods=['POST'])
def save_relation():
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
    
    data = request.json
    source_mid = data.get('source_mid') # The member being dragged
    target_mid = data.get('target_mid') # The member being dropped onto
    rel_type = int(data.get('relation_type'))
    sub_rel_type = int(data.get('sub_relation_type', 0))
    
    if not source_mid or not target_mid or not rel_type:
        return jsonify({"success": False, "message": "参数不完整"}), 400

    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # 简单处理：如果是父子/母子关系
            # target_mid 是父辈，source_mid 是子辈
            parent_mid = target_mid
            child_mid = source_mid
            gen_diff = 1
            
            if rel_type == 10: # 夫妻
                # 夫妻关系中，我们通常把关联人设为 parent_mid
                parent_mid = target_mid
                child_mid = source_mid
                gen_diff = 0
            elif rel_type in [11, 12]: # 兄弟姐妹
                # 这里逻辑上比较复杂，通常兄弟姐妹有共同父母。
                # 简化处理：暂时存为同级关系 (gen_diff=0)
                parent_mid = target_mid
                child_mid = source_mid
                gen_diff = 0
            
            # 删除旧关系
            cursor.execute("DELETE FROM family_relation_info WHERE source_mid = %s", (source_mid,))
            
            # 插入新关系
            sql = """
                INSERT INTO family_relation_info 
                (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff) 
                VALUES (%s, %s, %s, %s, %s, %s)
            """
            cursor.execute(sql, (parent_mid, child_mid, rel_type, sub_rel_type, source_mid, gen_diff))
            conn.commit()
            return jsonify({"success": True, "message": "关系已保存"})
    except Exception as e:
        return jsonify({"success": False, "message": str(e)}), 500
    finally:
        conn.close()

@app.route('/manager/api/members')
def get_members():
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
    
    page = int(request.args.get('page', 1))
    search = request.args.get('search', '')
    per_page = 10
    offset = (page - 1) * per_page
    
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # Count total members
            if search:
                cursor.execute("SELECT COUNT(*) as total FROM family_member_info WHERE name LIKE %s OR simplified_name LIKE %s", 
                              (f'%{search}%', f'%{search}%'))
            else:
                cursor.execute("SELECT COUNT(*) as total FROM family_member_info")
            total_result = cursor.fetchone()
            total = total_result['total'] if total_result else 0
            
            # Get members for current page
            if search:
                cursor.execute("SELECT id, name, simplified_name, sex FROM family_member_info WHERE name LIKE %s OR simplified_name LIKE %s LIMIT %s OFFSET %s", 
                              (f'%{search}%', f'%{search}%', per_page, offset))
            else:
                cursor.execute("SELECT id, name, simplified_name, sex FROM family_member_info LIMIT %s OFFSET %s", 
                              (per_page, offset))
            members = cursor.fetchall()
            
            # Convert to list of dictionaries if needed
            members_list = []
            for member in members:
                members_list.append({
                    'id': member['id'],
                    'name': member['name'],
                    'simplified_name': member['simplified_name'],
                    'sex': member['sex']
                })
            
            return jsonify({"members": members_list, "total": total})
    except Exception as e:
        return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
    finally:
        conn.close()

@app.route('/manager/api/member/<int:member_id>')
def get_member(member_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
    
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT id, name, name_word_generation, source_record_id FROM family_member_info WHERE id = %s", (member_id,))
            member = cursor.fetchone()
            if not member:
                return jsonify({"success": False, "message": "成员不存在"}), 404
            return jsonify({"member": member})
    except Exception as e:
        return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
    finally:
        conn.close()

@app.route('/manager/api/check_relations', methods=['POST'])
def check_relations():
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
    
    data = request.json
    people = data.get('people', [])
    if not people:
        return jsonify({"success": False, "matches": {}})

    conn = get_db_connection()
    matches = {}
    
    try:
        with conn.cursor() as cursor:
            # Collect all father names and spouse names to query
            names_to_check = set()
            for p in people:
                if p.get('father_name'): names_to_check.add(p['father_name'])
                if p.get('spouse_name'): names_to_check.add(p['spouse_name'])
            
            if not names_to_check:
                return jsonify({"success": True, "matches": {}})

            # Query DB
            format_strings = ','.join(['%s'] * len(names_to_check))
            if names_to_check:
                sql = "SELECT id, name, simplified_name, sex, birthday FROM family_member_info WHERE name IN (%s) OR simplified_name IN (%s)" % (format_strings, format_strings)
                cursor.execute(sql, tuple(names_to_check) * 2)
                results = cursor.fetchall()
            else:
                results = []
            
            # Organize by name
            db_map = {} # name -> [list of members]
            for r in results:
                # Add under 'name' (Traditional/Old Simplified)
                if r['name'] not in db_map: db_map[r['name']] = []
                db_map[r['name']].append(r)
                
                # Add under 'simplified_name' if exists
                if r.get('simplified_name'):
                     sname = r['simplified_name']
                     if sname not in db_map: db_map[sname] = []
                     # Avoid duplicates if simplified_name is same as name?
                     # The list might contain same object reference, which is fine.
                     if sname != r['name']:
                         db_map[sname].append(r)
            
            # Build matches for each input person
            for index, p in enumerate(people):
                p_match = {}
                
                # Check Father
                fname = p.get('father_name')
                if fname and fname in db_map:
                    candidates = db_map[fname]
                    # Filter: Father should be Male usually, and older than child (if birthday available)
                    valid_fathers = [c for c in candidates if c['sex'] == 1]
                    if valid_fathers:
                        p_match['father'] = valid_fathers # Return all candidates
                
                # Check Spouse
                sname = p.get('spouse_name')
                if sname and sname in db_map:
                    candidates = db_map[sname]
                    # Filter: Spouse usually opposite sex
                    target_sex = 1 if p.get('sex') == '女' else 2
                    valid_spouses = [c for c in candidates if c['sex'] == target_sex]
                    if valid_spouses:
                        p_match['spouse'] = valid_spouses

                if p_match:
                    matches[index] = p_match

        return jsonify({"success": True, "matches": matches})
    finally:
        conn.close()

@app.route('/manager/add_member', methods=['GET', 'POST'])
def add_member():
    if 'user_id' not in session:
        return redirect(url_for('login'))
        
    conn = get_db_connection()
    try:
        # Check for source_record_id (from GET or POST)
        source_record_id = request.args.get('record_id') or request.form.get('source_record_id')
        prefilled_content = None
        source_oss_url = None
        
        if source_record_id:
            with conn.cursor() as cursor:
                cursor.execute("SELECT oss_url, ai_content, ai_status FROM genealogy_records WHERE id = %s", (source_record_id,))
                rec = cursor.fetchone()
                if rec:
                    source_oss_url = rec['oss_url']
                    # Check ai_status (2 = success)
                    if rec['ai_status'] == 2 and rec['ai_content']:
                        prefilled_content = rec['ai_content']

        if request.method == 'POST':
            # 处理生日转换为 Unix 时间戳
            birthday_str = request.form.get('birthday')
            birthday_ts = 0
            if birthday_str:
                try:
                    birthday_ts = int(datetime.strptime(birthday_str, '%Y-%m-%d').timestamp())
                except ValueError:
                    birthday_ts = 0

            # 关系数据
            related_mid = request.form.get('related_mid')
            relation_type = request.form.get('relation_type')
            sub_relation_type = request.form.get('sub_relation_type', 0)
            
            # 年龄校验逻辑
            if related_mid and relation_type in ['1', '2']: # 1:父子 2:母子
                with conn.cursor() as cursor:
                    cursor.execute("SELECT name, birthday FROM family_member_info WHERE id = %s", (related_mid,))
                    parent = cursor.fetchone()
                    if parent and parent['birthday'] > 0 and birthday_ts > 0:
                        if birthday_ts < parent['birthday']:
                            error_msg = f"数据冲突：成员年龄不能比其父亲/母亲（{parent['name']}）大，请检查并修正出生日期。"
                            flash(error_msg)
                            
                            # Re-fetch data for rendering
                            cursor.execute("SELECT id, name FROM family_member_info ORDER BY name")
                            all_members = cursor.fetchall()
                            cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
                            images = cursor.fetchall()

                            if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
                                return jsonify({
                                    "success": False, 
                                    "message": error_msg
                                }), 400
                            
                            selected_member_name = ''
                            return render_template('add_member.html', all_members=all_members, images=images, 
                                   prefilled_content=prefilled_content, source_oss_url=source_oss_url, source_record_id=source_record_id, selected_member_name=selected_member_name)

            # 获取表单数据
            data = {
                'name': request.form['name'],
                'simplified_name': request.form.get('simplified_name'),
                'former_name': request.form.get('former_name'),
                'childhood_name': request.form.get('childhood_name'),
                'name_word': request.form.get('name_word'),
                'name_word_generation': ';'.join([g.strip() for g in request.form.getlist('lineage_generations[]') if g.strip()]),
                'name_title': request.form.get('name_title'),
                'sex': request.form['sex'],
                'birthday': birthday_ts,
                'is_pass_away': request.form.get('is_pass_away', 0),
                'marital_status': request.form.get('marital_status', 0),
                'birth_place': request.form.get('birth_place'),
                'branch_family_hall': request.form.get('branch_family_hall'),
                'cluster_place': request.form.get('cluster_place'),
                'nation': request.form.get('nation'),
                'residential_address': request.form.get('residential_address'),
                'phone': request.form.get('phone'),
                'mail': request.form.get('mail'),
                'wechat_account': request.form.get('wechat_account'),
                'id_number': request.form.get('id_number'),
                'occupation': request.form.get('occupation'),
                'educational': request.form.get('educational'),
                'blood_type': request.form.get('blood_type'),
                'religion': request.form.get('religion'),
                'hobbies': request.form.get('hobbies'),
                'personal_achievements': request.form.get('personal_achievements'),
                'family_rank': request.form.get('family_rank'),
                'tags': request.form.get('tags'),
                'notes': request.form.get('notes'),
                'source_record_id': request.form.get('source_record_id') or None  # Save source record ID
            }
            
            # ... (rest of logic) ...
            
            with conn.cursor() as cursor:
                fields = ", ".join(data.keys())
                placeholders = ", ".join(["%s"] * len(data))
                sql = f"INSERT INTO family_member_info ({fields}) VALUES ({placeholders})"
                cursor.execute(sql, list(data.values()))
                member_id = cursor.lastrowid
                
                # 录入关系
                if related_mid and relation_type:
                    rel_type = int(relation_type)
                    parent_mid = int(related_mid)
                    child_mid = member_id
                    gen_diff = 1 if rel_type in [1, 2] else 0
                        
                    sql_relation = """
                        INSERT INTO family_relation_info 
                        (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff) 
                        VALUES (%s, %s, %s, %s, %s, %s)
                    """
                    cursor.execute(sql_relation, (parent_mid, child_mid, rel_type, sub_relation_type, member_id, gen_diff))
                
                # Update AI Record Status if applicable
                source_record_id = data.get('source_record_id')
                source_index = request.form.get('source_index')
                
                if source_record_id and source_index and source_index.isdigit():
                    try:
                        idx = int(source_index)
                        cursor.execute("SELECT ai_content FROM genealogy_records WHERE id = %s FOR UPDATE", (source_record_id,))
                        rec = cursor.fetchone()
                        if rec and rec['ai_content']:
                            import json
                            content = json.loads(rec['ai_content'])
                            # Ensure content is a list (it might be a dict if single object, though we try to normalize)
                            if isinstance(content, dict):
                                content = [content]
                                
                            if isinstance(content, list):
                                updated = False
                                if 0 <= idx < len(content):
                                    if not content[idx].get('is_imported'): # Avoid redundant updates
                                        content[idx]['is_imported'] = True
                                        content[idx]['imported_member_id'] = member_id
                                        updated = True
                                
                                if updated:
                                    new_content = json.dumps(content, ensure_ascii=False)
                                    cursor.execute("UPDATE genealogy_records SET ai_content = %s WHERE id = %s", (new_content, source_record_id))
                    except Exception as e:
                        print(f"Error updating AI content status: {e}")

                conn.commit()
                
                if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
                    return jsonify({"success": True, "message": "成员录入成功", "member_id": member_id})

                flash('成员录入成功')
                return redirect(url_for('members'))
        
        with conn.cursor() as cursor:
            cursor.execute("SELECT id, name FROM family_member_info ORDER BY name")
            all_members = cursor.fetchall()
            cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
            images = cursor.fetchall()
            
    except Exception as e:
        flash(f'发生错误: {e}')
        all_members = []
        images = []
    finally:
        conn.close()
        
    selected_member_name = ''
    return render_template('add_member.html', all_members=all_members, images=images, 
           prefilled_content=prefilled_content, source_oss_url=source_oss_url, source_record_id=source_record_id, selected_member_name=selected_member_name)

@app.route('/manager/edit_member/<int:member_id>', methods=['GET', 'POST'])
def edit_member(member_id):
    if 'user_id' not in session:
        return redirect(url_for('login'))
        
    conn = get_db_connection()
    try:
        if request.method == 'POST':
            birthday_str = request.form.get('birthday')
            birthday_ts = 0
            if birthday_str:
                try:
                    birthday_ts = int(datetime.strptime(birthday_str, '%Y-%m-%d').timestamp())
                except ValueError:
                    birthday_ts = 0

            # 关系数据
            related_mid = request.form.get('related_mid')
            relation_type = request.form.get('relation_type')
            sub_relation_type = request.form.get('sub_relation_type', 0)

            # 年龄校验逻辑
            if related_mid and relation_type in ['1', '2']:
                with conn.cursor() as cursor:
                    cursor.execute("SELECT name, birthday FROM family_member_info WHERE id = %s", (related_mid,))
                    parent = cursor.fetchone()
                    if parent and parent['birthday'] > 0 and birthday_ts > 0:
                        if birthday_ts < parent['birthday']:
                            flash(f"数据冲突：成员年龄不能比其父亲/母亲（{parent['name']}）大，请检查并修正出生日期。")
                            # 重新加载编辑页所需数据
                            cursor.execute("SELECT * FROM family_member_info WHERE id = %s", (member_id,))
                            member = cursor.fetchone()
                            member['birthday_date'] = birthday_str # 保持用户输入
                            cursor.execute("SELECT id, name FROM family_member_info WHERE id != %s ORDER BY name", (member_id,))
                            all_members = cursor.fetchall()
                            cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
                            images = cursor.fetchall()
                            
                            if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
                                return jsonify({
                                    "success": False, 
                                    "message": f"数据冲突：成员年龄不能比其父亲/母亲（{parent['name']}）大，请检查并修正出生日期。"
                                }), 400
                            
                            selected_member_name = ''
                            return render_template('add_member.html', member=member, images=images, all_members=all_members, selected_member_name=selected_member_name)

            data = {
                'name': request.form['name'],
                'simplified_name': request.form.get('simplified_name'),
                'former_name': request.form.get('former_name'),
                'childhood_name': request.form.get('childhood_name'),
                'name_word': request.form.get('name_word'),
                'name_word_generation': ';'.join([g.strip() for g in request.form.getlist('lineage_generations[]') if g.strip()]),
                'name_title': request.form.get('name_title'),
                'sex': request.form['sex'],
                'birthday': birthday_ts,
                'is_pass_away': request.form.get('is_pass_away', 0),
                'marital_status': request.form.get('marital_status', 0),
                'birth_place': request.form.get('birth_place'),
                'branch_family_hall': request.form.get('branch_family_hall'),
                'cluster_place': request.form.get('cluster_place'),
                'nation': request.form.get('nation'),
                'residential_address': request.form.get('residential_address'),
                'phone': request.form.get('phone'),
                'mail': request.form.get('mail'),
                'wechat_account': request.form.get('wechat_account'),
                'id_number': request.form.get('id_number'),
                'occupation': request.form.get('occupation'),
                'educational': request.form.get('educational'),
                'blood_type': request.form.get('blood_type'),
                'religion': request.form.get('religion'),
                'hobbies': request.form.get('hobbies'),
                'personal_achievements': request.form.get('personal_achievements'),
                'family_rank': request.form.get('family_rank'),
                'tags': request.form.get('tags'),
                'notes': request.form.get('notes'),
                'source_record_id': request.form.get('source_record_id') or None
            }
            
            # 关系数据
            related_mid = request.form.get('related_mid')
            relation_type = request.form.get('relation_type')
            sub_relation_type = request.form.get('sub_relation_type', 0)
            
            with conn.cursor() as cursor:
                update_parts = [f"{k} = %s" for k in data.keys()]
                sql = f"UPDATE family_member_info SET {', '.join(update_parts)} WHERE id = %s"
                cursor.execute(sql, list(data.values()) + [member_id])
                
                # 更新关系
                if related_mid and relation_type:
                    rel_type = int(relation_type)
                    cursor.execute("DELETE FROM family_relation_info WHERE source_mid = %s", (member_id,))
                    
                    parent_mid = int(related_mid)
                    child_mid = member_id
                    gen_diff = 1 if rel_type in [1, 2] else 0
                    
                    sql_relation = """
                        INSERT INTO family_relation_info 
                        (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff) 
                        VALUES (%s, %s, %s, %s, %s, %s)
                    """
                    cursor.execute(sql_relation, (parent_mid, child_mid, rel_type, sub_relation_type, member_id, gen_diff))
                
                # Update AI Record Status if applicable
                source_record_id = data.get('source_record_id')
                source_index = request.form.get('source_index')
                
                if source_record_id and source_index and source_index.isdigit():
                    try:
                        idx = int(source_index)
                        cursor.execute("SELECT ai_content FROM genealogy_records WHERE id = %s FOR UPDATE", (source_record_id,))
                        rec = cursor.fetchone()
                        if rec and rec['ai_content']:
                            import json
                            content = json.loads(rec['ai_content'])
                            if isinstance(content, dict):
                                content = [content]
                                
                            if isinstance(content, list):
                                updated = False
                                if 0 <= idx < len(content):
                                    if not content[idx].get('is_imported'): # Avoid redundant updates
                                        content[idx]['is_imported'] = True
                                        content[idx]['imported_member_id'] = member_id
                                        updated = True
                                
                                if updated:
                                    new_content = json.dumps(content, ensure_ascii=False)
                                    cursor.execute("UPDATE genealogy_records SET ai_content = %s WHERE id = %s", (new_content, source_record_id))
                    except Exception as e:
                        print(f"Error updating AI content status: {e}")

                conn.commit()
                if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
                    return jsonify({"success": True, "message": "成员信息更新成功"})
                
                flash('成员信息更新成功')
                return redirect(url_for('members'))
        
        with conn.cursor() as cursor:
            cursor.execute("SELECT * FROM family_member_info WHERE id = %s", (member_id,))
            member = cursor.fetchone()
            if not member:
                flash('成员不存在')
                return redirect(url_for('members'))
            
            # 格式化日期供显示
            if member.get('birthday'):
                member['birthday_date'] = format_timestamp(member['birthday'])
            
            # 获取现有关系
            cursor.execute("SELECT * FROM family_relation_info WHERE source_mid = %s LIMIT 1", (member_id,))
            current_relation = cursor.fetchone()
            
            cursor.execute("SELECT id, name FROM family_member_info WHERE id != %s ORDER BY name", (member_id,))
            all_members = cursor.fetchall()
            
            cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
            images = cursor.fetchall()
    finally:
        conn.close()
        
    # Calculate selected_member_name based on current_relation
    selected_member_name = ''
    if current_relation and current_relation['parent_mid']:
        for m in all_members:
            if m['id'] == current_relation['parent_mid']:
                selected_member_name = m['name']
                break
    
    # Get source_record_id from member data
    source_record_id = member.get('source_record_id') if member else None
    
    return render_template('add_member.html', member=member, images=images, all_members=all_members, current_relation=current_relation, selected_member_name=selected_member_name, source_record_id=source_record_id)

@app.route('/manager/member_detail/<int:member_id>')
def member_detail(member_id):
    if 'user_id' not in session:
        return redirect(url_for('login'))
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # Join with genealogy_records to get source image info
            sql = """
                SELECT m.*, r.oss_url as source_image_url, r.page_number as source_page,
                       r.genealogy_version, r.genealogy_source, r.upload_person
                FROM family_member_info m
                LEFT JOIN genealogy_records r ON m.source_record_id = r.id
                WHERE m.id = %s
            """
            cursor.execute(sql, (member_id,))
            member = cursor.fetchone()
            if not member:
                flash('成员不存在')
                return redirect(url_for('members'))
            
            member['birthday_str'] = format_timestamp(member.get('birthday'))
            
            # 获取关系
            cursor.execute("""
                SELECT m.id, m.name, r.relation_type 
                FROM family_relation_info r 
                JOIN family_member_info m ON r.parent_mid = m.id 
                WHERE r.child_mid = %s
            """, (member_id,))
            parents = cursor.fetchall()
            
            cursor.execute("""
                SELECT m.id, m.name, r.relation_type 
                FROM family_relation_info r 
                JOIN family_member_info m ON r.child_mid = m.id 
                WHERE r.parent_mid = %s
            """, (member_id,))
            children = cursor.fetchall()
    finally:
        conn.close()
        
    return render_template('member_detail.html', member=member, parents=parents, children=children)

@app.route('/manager/delete_member/<int:member_id>', methods=['POST'])
def delete_member(member_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # 1. 删除关系表中关联该成员的所有记录
            cursor.execute("DELETE FROM family_relation_info WHERE parent_mid = %s OR child_mid = %s OR source_mid = %s", 
                         (member_id, member_id, member_id))
            
            # 2. 删除成员本身
            cursor.execute("DELETE FROM family_member_info WHERE id = %s", (member_id,))
            
            conn.commit()
            flash('成员及其关系已成功删除')
            return redirect(url_for('members'))
    except Exception as e:
        conn.rollback()
        flash(f'删除失败: {e}')
        return redirect(url_for('members'))
    finally:
        conn.close()

@app.route('/manager/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']
        
        try:
            conn = get_db_connection()
            try:
                with conn.cursor() as cursor:
                    cursor.execute("SELECT * FROM users WHERE username=%s AND password=%s", (username, password))
                    user = cursor.fetchone()
                    if user:
                        session['user_id'] = user['id']
                        session['username'] = user['username']
                        return redirect(url_for('index'))
                    else:
                        flash('用户名或密码错误')
            finally:
                conn.close()
        except Exception as e:
            flash(f'数据库连接错误: {str(e)}')
            print(f'Login error: {str(e)}')
            
    return render_template('login.html')

@app.route('/manager/logout')
def logout():
    session.clear()
    return redirect(url_for('login'))

@app.route('/manager/api/check_name')
def check_name():
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
        
    name = request.args.get('name', '').strip()
    if not name:
        return jsonify({"success": True, "exists": False})
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # Check for name or simplified_name match
            cursor.execute("SELECT id, name, simplified_name, sex, birthday, is_pass_away FROM family_member_info WHERE name = %s OR simplified_name = %s", (name, name))
            matches = cursor.fetchall()
            
            if matches:
                # Format birthday for display
                for m in matches:
                    if m.get('birthday'):
                        m['birthday_str'] = format_timestamp(m['birthday'])
                    else:
                        m['birthday_str'] = '未知'
                
                return jsonify({"success": True, "exists": True, "matches": matches})
            else:
                return jsonify({"success": True, "exists": False})
    except Exception as e:
        return jsonify({"success": False, "error": str(e)}), 500
    finally:
        conn.close()

import requests
import json
import re

@app.route('/manager/api/recognize_image', methods=['POST'])
def recognize_image():
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
    
    data = request.json
    image_url = data.get('image_url')
    if not image_url:
        return jsonify({"success": False, "message": "No image URL provided"}), 400

    api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
    api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
    
    prompt = """
    请分析这张家谱图片，提取其中关于人物的信息。
    请务必将繁体字转换为简体字（original_name 字段除外）。
    特别注意：'name' 字段必须是纯简体中文，不能包含繁体字（例如：'學'应转换为'学'，'劉'应转换为'刘'，'萬'应转换为'万'）。
    请提取以下字段（如果存在）：
    - original_name: 原始姓名（严格保持图片上的繁体字，不做任何修改或转换）
    - name: 简体姓名（必须转换为简体中文，去除不需要的敬称）
    - sex: 性别（男/女）
    - birthday: 出生日期（尝试转换为YYYY-MM-DD格式，如果无法确定年份可只填月日）
    - death_date: 逝世日期（如文本中出现“殁”、“葬”、“卒”等字眼及其对应的时间，请提取）
    - father_name: 父亲姓名
    - spouse_name: 配偶姓名
    - generation: 第几世/代数
    - name_word: 字辈（例如名字为“学勤公”，“学”为字辈；提取名字中的字辈信息）
    - education: 学历/功名
    - title: 官职/称号
    
    请严格以JSON列表格式返回，不要包含Markdown代码块标记（如 ```json ... ```），直接返回JSON数组。
    如果包含多个人物，请都提取出来。
    """

    ai_payload_url = get_normalized_base64_image(image_url)
    
    payload = {
        "model": "doubao-seed-1-8-251228",
        "stream": True,
        "input": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_image",
                        "image_url": ai_payload_url
                    },
                    {
                        "type": "input_text",
                        "text": prompt
                    }
                ]
            }
        ]
    }
    
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    
    def generate():
        yield "正在连接 AI 服务...\n"
        try:
            # 使用 stream=True, timeout=120
            # 增加 verify=False 以防 SSL 问题（开发环境）
            # 增加 proxies=None 以防本地代理干扰
            with requests.post(
                api_url, 
                json=payload, 
                headers=headers, 
                stream=True, 
                timeout=1200, 
                verify=False,
                proxies={"http": None, "https": None}
            ) as r:
                if r.status_code != 200:
                    yield f"Error: API returned status code {r.status_code}. Response: {r.text}"
                    return

                yield "连接成功，正在等待 AI 响应...\n"
                
                full_reasoning = ""
                
                json_started = False
                
                for line in r.iter_lines():
                    if line:
                        line_str = line.decode('utf-8')
                        if line_str.startswith('data: '):
                            json_str = line_str[6:]
                            if json_str.strip() == '[DONE]':
                                break
                            try:
                                chunk = json.loads(json_str)
                                
                                # 处理 standard OpenAI choices format (content)
                                if 'choices' in chunk and len(chunk['choices']) > 0:
                                    delta = chunk['choices'][0].get('delta', {})
                                    if 'content' in delta:
                                        if not json_started:
                                            yield "|||JSON_START|||"
                                            json_started = True
                                        yield delta['content']
                                    
                                    # 处理 standard OpenAI choices format (reasoning_content) if any
                                    if 'reasoning_content' in delta:
                                        yield f"\n[推理]: {delta['reasoning_content']}"

                                # 处理 Doubao/Volcano specific formats
                                # Type: response.reasoning_summary_text.delta
                                if chunk.get('type') == 'response.reasoning_summary_text.delta':
                                    if 'delta' in chunk:
                                        yield chunk['delta']
                                
                                # Type: response.text.delta
                                if chunk.get('type') == 'response.text.delta':
                                    if 'delta' in chunk:
                                        if not json_started:
                                            yield "|||JSON_START|||"
                                            json_started = True
                                        yield chunk['delta']
                                        
                                # Type: response.output_item.added (May contain initial content or status)
                                # Type: response.reasoning_summary_part.added
                                
                            except Exception as e:
                                print(f"Chunk parse error: {e}")
                        else:
                            # 尝试直接解析非 data: 开头的行
                            try:
                                chunk = json.loads(line_str)
                                if 'choices' in chunk and len(chunk['choices']) > 0:
                                    content = chunk['choices'][0]['message']['content']
                                    yield content
                            except:
                                pass
        except Exception as e:
            yield f"\n[Error: {str(e)}]"

    return Response(stream_with_context(generate()), mimetype='text/plain')

@app.route('/manager/api/start_analysis/<int:record_id>', methods=['POST'])
def start_analysis(record_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # Check if record exists
            cursor.execute("SELECT oss_url, ai_status FROM genealogy_records WHERE id = %s", (record_id,))
            record = cursor.fetchone()
            
            if not record:
                return jsonify({"success": False, "message": "Record not found"}), 404
            
            # Update status to processing (1)
            cursor.execute("UPDATE genealogy_records SET ai_status = 1 WHERE id = %s", (record_id,))
            conn.commit()
            
            # Start background task
            threading.Thread(target=process_ai_task, args=(record_id, record['oss_url'])).start()
            
            return jsonify({"success": True, "message": "Analysis started"})
    except Exception as e:
        return jsonify({"success": False, "message": str(e)}), 500
    finally:
        conn.close()

def process_files_background(upload_folder, saved_files, manual_page, suggested_page, genealogy_version, genealogy_source, upload_person):
    current_suggested_page = int(manual_page) if manual_page and str(manual_page).isdigit() else suggested_page
    ensure_pdf_table()

    for item in saved_files:
        if len(item) >= 4:
            filename, file_path, file_page, original_filename = item[0], item[1], item[2], item[3]
        elif len(item) == 3:
            filename, file_path, file_page = item
            original_filename = filename
        else:
            filename, file_path = item[0], item[1]
            file_page = None
            original_filename = filename

        try:
            if filename.lower().endswith('.pdf'):
                import uuid
                display_pdf_name = (original_filename or filename).strip() or filename
                oss_pdf_name = secure_filename(display_pdf_name)
                if not oss_pdf_name or not oss_pdf_name.lower().endswith('.pdf'):
                    oss_pdf_name = f"genealogy_pdf_{uuid.uuid4().hex[:8]}.pdf"
                pdf_oss_url = upload_to_oss(file_path, custom_filename=oss_pdf_name)
                if pdf_oss_url:
                    desc_parts = []
                    if genealogy_version:
                        desc_parts.append(genealogy_version)
                    if genealogy_source:
                        desc_parts.append(genealogy_source)
                    pdf_description = ' · '.join(desc_parts) if desc_parts else ''
                    conn_pdf = get_db_connection()
                    try:
                        with conn_pdf.cursor() as cursor:
                            cursor.execute(
                                "INSERT INTO genealogy_pdfs (file_name, oss_url, description, uploader) VALUES (%s, %s, %s, %s)",
                                (display_pdf_name, pdf_oss_url, pdf_description, upload_person or '')
                            )
                        conn_pdf.commit()
                    except Exception as pdf_meta_e:
                        print(f"Error inserting genealogy_pdfs for {display_pdf_name}: {pdf_meta_e}")
                    finally:
                        conn_pdf.close()
                else:
                    print(f"Warning: full PDF upload to OSS failed for {filename}, scan pages will still be processed.")

                doc = fitz.open(file_path)
                for page_index in range(len(doc)):
                    img_path = None
                    try:
                        page = doc.load_page(page_index)
                        max_dim = max(page.rect.width, page.rect.height)
                        zoom = 2000 / max_dim if max_dim > 0 else 2.0
                        if zoom > 2.5: zoom = 2.5
                        mat = fitz.Matrix(zoom, zoom)
                        
                        # Use get_pixmap with matrix directly
                        pix = page.get_pixmap(matrix=mat)
                        
                        final_page = current_suggested_page
                        if genealogy_version and genealogy_source:
                            if final_page is not None and str(final_page).strip() != '':
                                img_filename = f"{genealogy_version}_{genealogy_source}_{final_page}.jpg"
                            else:
                                img_filename = f"{genealogy_version}_{genealogy_source}.jpg"
                        else:
                            img_filename = f"{os.path.splitext(filename)[0]}_page_{page_index+1}.jpg"
                            
                        img_path = os.path.join(upload_folder, img_filename)
                        
                        # Save the pixmap to the image path
                        pix.save(img_path)
                        
                        oss_url = upload_to_oss(img_path, custom_filename=img_filename)
                        if oss_url:
                            conn = get_db_connection()
                            try:
                                with conn.cursor() as cursor:
                                    sql = """INSERT INTO genealogy_records 
                                             (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type) 
                                             VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
                                    cursor.execute(sql, (img_filename, oss_url, final_page, genealogy_version, genealogy_source, upload_person, 'PDF'))
                                    record_id = cursor.lastrowid
                                conn.commit()
                                threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
                                current_suggested_page += 1
                            finally:
                                conn.close()
                    except Exception as page_e:
                        print(f"Error processing page {page_index} of {filename}: {page_e}")
                    finally:
                        if img_path and os.path.exists(img_path):
                            try:
                                os.remove(img_path)
                            except:
                                pass
                doc.close()
            else:
                img_path = compress_image_if_needed(file_path)
                
                # Use explicitly set page number if provided, otherwise extract from filename or auto-increment
                if file_page and str(file_page).isdigit():
                    final_page = int(file_page)
                    current_suggested_page = final_page + 1
                    page_num = final_page
                else:
                    page_num = extract_page_number(img_path)
                    final_page = page_num if page_num else current_suggested_page
                
                ext = os.path.splitext(img_path)[1]
                if genealogy_version and genealogy_source:
                    if final_page is not None and str(final_page).strip() != '':
                        img_filename = f"{genealogy_version}_{genealogy_source}_{final_page}{ext}"
                    else:
                        img_filename = f"{genealogy_version}_{genealogy_source}{ext}"
                else:
                    img_filename = os.path.basename(img_path)
                
                oss_url = upload_to_oss(img_path, custom_filename=img_filename)
                if oss_url:
                    conn = get_db_connection()
                    try:
                        with conn.cursor() as cursor:
                            sql = """INSERT INTO genealogy_records 
                                     (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type) 
                                     VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
                            cursor.execute(sql, (img_filename, oss_url, final_page, genealogy_version, genealogy_source, upload_person, '图片'))
                            record_id = cursor.lastrowid
                        conn.commit()
                        threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
                        if page_num:
                            current_suggested_page = page_num + 1
                        else:
                            current_suggested_page += 1
                    finally:
                        conn.close()
                if img_path and img_path != file_path and os.path.exists(img_path):
                    try:
                        os.remove(img_path)
                    except:
                        pass
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
        finally:
            if os.path.exists(file_path):
                try:
                    os.remove(file_path)
                except:
                    pass

@app.route('/manager/upload', methods=['GET', 'POST'])
def upload():
    if 'user_id' not in session:
        return redirect(url_for('login'))
        
    # 获取建议页码 (当前最大页码 + 1)
    conn = get_db_connection()
    suggested_page = 1
    try:
        with conn.cursor() as cursor:
            cursor.execute("SELECT MAX(page_number) as max_p FROM genealogy_records")
            result = cursor.fetchone()
            if result and result['max_p']:
                suggested_page = result['max_p'] + 1
    finally:
        conn.close()

    if request.method == 'POST':
        if 'file' not in request.files:
            flash('未选择文件')
            return redirect(request.url)
        
        files = request.files.getlist('file')
        if not files or files[0].filename == '':
            flash('未选择文件')
            return redirect(request.url)
            
        manual_page = request.form.get('manual_page')
        genealogy_version = request.form.get('genealogy_version', '')
        genealogy_source = request.form.get('genealogy_source', '')
        upload_person = request.form.get('upload_person', '')
        if not upload_person:
            upload_person = session.get('username', '')
            
        import uuid
        saved_files = []
        for i, file in enumerate(files):
            if not file or not file.filename:
                continue
            
            original_filename = file.filename
            ext = os.path.splitext(original_filename)[1].lower()
            base_name = secure_filename(original_filename)
            
            # If secure_filename removes all characters (e.g., pure Chinese name) or just leaves 'pdf'
            if not base_name or base_name == ext.strip('.'):
                filename = f"upload_{uuid.uuid4().hex[:8]}{ext}"
            else:
                # Ensure the extension is preserved
                if not base_name.lower().endswith(ext):
                    filename = f"{base_name}{ext}"
                else:
                    filename = base_name
                    
            file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(file_path)
            
            # Fetch individual page number if it exists
            file_page = request.form.get(f'page_number_{i}')
            saved_files.append((filename, file_path, file_page, original_filename))
            
        if saved_files:
            threading.Thread(
                target=process_files_background,
                args=(app.config['UPLOAD_FOLDER'], saved_files, manual_page, suggested_page, genealogy_version, genealogy_source, upload_person)
            ).start()
            flash('上传完成，AI解析中，稍后查看')
            
        time.sleep(1.5)
        return redirect(url_for('index'))
                
    return render_template('upload.html', suggested_page=suggested_page)

@app.route('/manager/save_upload', methods=['POST'])
def save_upload():
    if 'user_id' not in session: return redirect(url_for('login'))
    
    filename = request.form.get('filename')
    oss_url = request.form.get('oss_url')
    page_number = request.form.get('page_number')
    genealogy_version = request.form.get('genealogy_version', '')
    genealogy_source = request.form.get('genealogy_source', '')
    upload_person = request.form.get('upload_person', session.get('username', ''))
    file_type = request.form.get('file_type', '图片')
    
    if not oss_url or not page_number:
        flash('页码不能为空')
        return redirect(url_for('upload'))
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            sql = """INSERT INTO genealogy_records 
                     (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type) 
                     VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
            cursor.execute(sql, (filename, oss_url, page_number, genealogy_version, genealogy_source, upload_person, file_type))
            record_id = cursor.lastrowid
        conn.commit()
        
        # Start AI Task
        threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
        
        flash('上传完成，AI解析中，稍后查看')
    except Exception as e:
        flash(f'保存失败: {e}')
    finally:
        conn.close()
    return redirect(url_for('index'))

@app.route('/manager/delete_upload/<int:record_id>', methods=['POST'])
def delete_upload(record_id):
    if 'user_id' not in session:
        return jsonify({"success": False, "message": "Unauthorized"}), 401
        
    conn = get_db_connection()
    try:
        with conn.cursor() as cursor:
            # 删除记录
            cursor.execute("DELETE FROM genealogy_records WHERE id = %s", (record_id,))
            conn.commit()
            flash('文件记录已成功删除')
            return redirect(url_for('index'))
    except Exception as e:
        conn.rollback()
        flash(f'删除失败: {e}')
        return redirect(url_for('index'))
    finally:
        conn.close()

@app.route('/manager/upload_pdf', methods=['GET', 'POST'])
def upload_pdf():
    if 'user_id' not in session:
        return redirect(url_for('login'))
    
    if request.method == 'GET':
        return render_template('upload_pdf.html')
    
    # POST请求处理
    if 'file' not in request.files:
        flash('请选择要上传的PDF文件')
        return redirect(request.url)

    file = request.files['file']
    if file.filename == '':
        flash('请选择要上传的PDF文件')
        return redirect(request.url)

    # 检查文件类型
    if not file.filename.lower().endswith('.pdf'):
        flash('只支持PDF文件上传')
        return redirect(request.url)

    # 获取表单数据
    version_name = request.form.get('version_name', '').strip()
    version_source = request.form.get('version_source', '').strip()
    file_provider = request.form.get('file_provider', '').strip()

    # 验证必填字段
    if not version_name:
        flash('版本名称为必填项')
        return redirect(request.url)
    if not version_source:
        flash('版本来源为必填项')
        return redirect(request.url)

    # 如果未提供文件提供人，使用当前登录用户
    if not file_provider:
        file_provider = session.get('user_id', '未知')

    import uuid
    original_filename = file.filename
    ext = os.path.splitext(original_filename)[1].lower()
    base_name = secure_filename(original_filename)
    
    if not base_name or base_name == ext.strip('.'):
        filename = f"genealogy_pdf_{uuid.uuid4().hex[:8]}{ext}"
    else:
        if not base_name.lower().endswith(ext):
            filename = f"{base_name}{ext}"
        else:
            filename = base_name
    
    file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
    file.save(file_path)
    
    try:
        # Upload to OSS
        oss_url = upload_to_oss(file_path, custom_filename=filename)
        if not oss_url:
            flash('文件上传失败')
            return redirect(request.url)
        
        # Save to database
        conn = get_db_connection()
        try:
            with conn.cursor() as cursor:
                cursor.execute(
                    "INSERT INTO genealogy_pdfs (file_name, oss_url, version_name, version_source, file_provider, upload_time) VALUES (%s, %s, %s, %s, %s, CURRENT_TIMESTAMP)",
                    (original_filename, oss_url, version_name, version_source, file_provider)
                )
            conn.commit()
            
            flash('PDF文件上传成功')
            return redirect(url_for('pdf_management'))
        except Exception as e:
            flash(f'保存失败: {e}')
            return redirect(request.url)
        finally:
            conn.close()
    finally:
        if os.path.exists(file_path):
            try:
                os.remove(file_path)
            except:
                pass

def process_pdf_pages(file_path, pdf_oss_url, uploader):
    """Process PDF pages and add them to genealogy records"""
    try:
        import fitz
        doc = fitz.open(file_path)
        
        # Get current max page number
        conn = get_db_connection()
        suggested_page = 1
        try:
            with conn.cursor() as cursor:
                cursor.execute("SELECT MAX(page_number) as max_p FROM genealogy_records")
                result = cursor.fetchone()
                if result and result['max_p']:
                    suggested_page = result['max_p'] + 1
        finally:
            conn.close()
        
        for page_index in range(len(doc)):
            try:
                page = doc[page_index]
                pix = page.get_pixmap(dpi=150)
                
                # Save as image
                img_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}_page_{page_index+1}.jpg"
                img_path = os.path.join(app.config['UPLOAD_FOLDER'], img_filename)
                pix.save(img_path)
                
                # Upload to OSS
                img_oss_url = upload_to_oss(img_path, custom_filename=img_filename)
                if img_oss_url:
                    # Save to genealogy_records
                    conn = get_db_connection()
                    try:
                        with conn.cursor() as cursor:
                            cursor.execute(
                                "INSERT INTO genealogy_records (file_name, oss_url, page_number, ai_status, upload_person, file_type) VALUES (%s, %s, %s, 1, %s, %s)",
                                (img_filename, img_oss_url, suggested_page + page_index, uploader, '图片')
                            )
                            record_id = cursor.lastrowid
                        conn.commit()
                        
                        # Start AI processing
                        threading.Thread(target=process_ai_task, args=(record_id, img_oss_url)).start()
                    finally:
                        conn.close()
            except Exception as e:
                print(f"Error processing page {page_index+1}: {e}")
            finally:
                if 'img_path' in locals() and os.path.exists(img_path):
                    try:
                        os.remove(img_path)
                    except:
                        pass
    except Exception as e:
        print(f"Error processing PDF: {e}")

if __name__ == '__main__':
    app.run(debug=False, port=5001)