app.py 200 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581
  1. import os
  2. import pymysql
  3. import requests
  4. import json
  5. import re
  6. import threading
  7. import urllib3
  8. import fitz # PyMuPDF
  9. from flask import Flask, render_template, request, redirect, url_for, session, flash, jsonify, Response, stream_with_context
  10. from werkzeug.utils import secure_filename
  11. from oss_utils import upload_to_oss
  12. from ocr_utils import extract_page_number
  13. import time
  14. from datetime import datetime
  15. # Suppress InsecureRequestWarning
  16. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
  17. app = Flask(__name__, static_folder='static', static_url_path='/manager/static')
  18. app.secret_key = 'genealogy_secret_key'
  19. app.config['UPLOAD_FOLDER'] = 'uploads'
  20. os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
  21. # 数据库配置
  22. DB_CONFIG = {
  23. "host": "rm-f8ze60yirdj8786u2wo.mysql.rds.aliyuncs.com",
  24. "port": 3306,
  25. "user": "root",
  26. "password": "csqz@20255",
  27. "db": "csqz-client",
  28. "charset": "utf8mb4",
  29. "cursorclass": pymysql.cursors.DictCursor
  30. }
  31. from PIL import Image
  32. def compress_image_if_needed(file_path, max_dim=2000):
  33. """Compress, resize and normalize image to JPEG for AI processing."""
  34. try:
  35. # We always want to normalize to JPEG so AI doesn't complain about format
  36. with Image.open(file_path) as img:
  37. # Convert RGBA/P or any other mode to RGB for JPEG saving
  38. if img.mode != 'RGB':
  39. img = img.convert('RGB')
  40. width, height = img.size
  41. if max(width, height) > max_dim:
  42. ratio = max_dim / max(width, height)
  43. new_size = (int(width * ratio), int(height * ratio))
  44. img = img.resize(new_size, Image.Resampling.LANCZOS)
  45. # Always save as JPEG to normalize the format
  46. new_path = os.path.splitext(file_path)[0] + '_normalized.jpg'
  47. img.save(new_path, 'JPEG', quality=85)
  48. return new_path
  49. except Exception as e:
  50. print(f"Warning: Image compression/normalization failed for {file_path}: {e}")
  51. return file_path
  52. # 尝试使用数据库连接池,如果不可用则使用普通连接
  53. try:
  54. from DBUtils.PooledDB import PooledDB
  55. # 创建连接池
  56. pool = PooledDB(
  57. creator=pymysql,
  58. maxconnections=10, # 连接池最大连接数
  59. mincached=2, # 初始化时创建的空闲连接数
  60. maxcached=5, # 最大空闲连接数
  61. maxshared=3, # 最大共享连接数
  62. blocking=True, # 连接池满时是否阻塞等待
  63. maxusage=1000, # 一个连接最多被重复使用的次数,防止连接长时间使用失效
  64. setsession=[], # 开始会话前执行的命令列表
  65. ping=1, # 每次获取连接时都检查连接是否可用
  66. **DB_CONFIG
  67. )
  68. def get_db_connection():
  69. conn = pool.connection()
  70. print(f"[Database] Got connection from pool: {id(conn)}")
  71. return conn
  72. print("[Database] Database connection pool initialized successfully")
  73. except ImportError:
  74. # 如果DBUtils不可用,使用普通连接
  75. def get_db_connection():
  76. conn = pymysql.connect(**DB_CONFIG)
  77. print(f"[Database] Created new connection: {id(conn)}")
  78. return conn
  79. print("[Database] DBUtils not available, using regular database connections")
  80. def verify_connection(conn):
  81. """Verify database connection is still alive"""
  82. try:
  83. cursor = conn.cursor()
  84. cursor.execute("SELECT 1")
  85. cursor.fetchone()
  86. cursor.close()
  87. return True
  88. except Exception as e:
  89. print(f"[Database] Connection verification failed: {e}")
  90. return False
  91. def safe_commit(conn):
  92. """Safely commit transaction with error handling"""
  93. try:
  94. conn.commit()
  95. print(f"[Database] Transaction committed successfully")
  96. return True
  97. except Exception as e:
  98. print(f"[Database] Commit failed: {e}")
  99. try:
  100. conn.rollback()
  101. print(f"[Database] Rollback completed")
  102. except Exception as rollback_err:
  103. print(f"[Database] Rollback also failed: {rollback_err}")
  104. return False
  105. def format_timestamp(ts):
  106. if not ts: return '未知'
  107. try:
  108. # 兼容秒和毫秒
  109. if ts > 10000000000: # 超过2286年的秒数,通常认为是毫秒
  110. ts = ts / 1000
  111. return time.strftime('%Y-%m-%d', time.localtime(ts))
  112. except:
  113. return '未知'
  114. def manual_simplify(text):
  115. """
  116. Simple fallback for common Traditional to Simplified conversion
  117. if AI fails to convert specific characters.
  118. """
  119. if not text: return text
  120. mapping = {
  121. '學': '学', '國': '国', '萬': '万', '寶': '宝', '興': '兴',
  122. '華': '华', '會': '会', '葉': '叶', '藝': '艺', '號': '号',
  123. '處': '处', '見': '见', '視': '视', '言': '言', '語': '语',
  124. '貝': '贝', '車': '车', '長': '长', '門': '门', '韋': '韦',
  125. '頁': '页', '風': '风', '飛': '飞', '食': '食', '馬': '马',
  126. '魚': '鱼', '鳥': '鸟', '麥': '麦', '黃': '黄', '齊': '齐',
  127. '齒': '齿', '龍': '龙', '龜': '龟', '壽': '寿', '榮': '荣',
  128. '愛': '爱', '慶': '庆', '衛': '卫', '賢': '贤', '義': '义',
  129. '禮': '礼', '樂': '乐', '靈': '灵', '滅': '灭', '氣': '气',
  130. '智': '智', '信': '信', '仁': '仁', '勇': '勇', '嚴': '严',
  131. '銳': '锐', '優': '优', '楊': '杨', '吳': '吴', '銀': '银'
  132. }
  133. result = ""
  134. for char in text:
  135. result += mapping.get(char, char)
  136. return result
  137. def convert_to_simplified(text):
  138. """繁体转简体,优先使用 zhconv 库,失败则降级到 manual_simplify"""
  139. if not text:
  140. return text
  141. try:
  142. import zhconv
  143. return zhconv.convert(text, 'zh-hans')
  144. except Exception:
  145. return manual_simplify(text)
  146. def _build_reverse_simplify_map():
  147. """
  148. Build a reverse map from simplified char -> list of traditional chars
  149. based on the fallback manual_simplify mapping.
  150. """
  151. mapping = {
  152. '學': '学', '國': '国', '萬': '万', '寶': '宝', '興': '兴',
  153. '華': '华', '會': '会', '葉': '叶', '藝': '艺', '號': '号',
  154. '處': '处', '見': '见', '視': '视', '言': '言', '語': '语',
  155. '貝': '贝', '車': '车', '長': '长', '門': '门', '韋': '韦',
  156. '頁': '页', '風': '风', '飛': '飞', '食': '食', '馬': '马',
  157. '魚': '鱼', '鳥': '鸟', '麥': '麦', '黃': '黄', '齊': '齐',
  158. '齒': '齿', '龍': '龙', '龜': '龟', '壽': '寿', '榮': '荣',
  159. '愛': '爱', '慶': '庆', '衛': '卫', '賢': '贤', '義': '义',
  160. '禮': '礼', '樂': '乐', '靈': '灵', '滅': '灭', '氣': '气',
  161. '智': '智', '信': '信', '仁': '仁', '勇': '勇', '嚴': '严',
  162. '銳': '锐', '優': '优', '楊': '杨', '吳': '吴', '銀': '银'
  163. }
  164. rev = {}
  165. for trad, simp in mapping.items():
  166. rev.setdefault(simp, [])
  167. if trad not in rev[simp]:
  168. rev[simp].append(trad)
  169. return rev
  170. _REVERSE_SIMPLIFY_MAP = _build_reverse_simplify_map()
  171. def expand_name_search_variants(keyword, max_variants=60):
  172. """
  173. Expand keyword into a small set of variants so Simplified/Traditional
  174. searches can match both `name` and `simplified_name`.
  175. - Always includes original keyword
  176. - Includes fallback-trad->simp conversion
  177. - Includes best-effort simp->trad expansions based on reverse map
  178. """
  179. if not keyword:
  180. return []
  181. kw = str(keyword).strip()
  182. if not kw:
  183. return []
  184. variants = set([kw])
  185. variants.add(manual_simplify(kw))
  186. # Build possible traditional variants when the input is simplified.
  187. # For each char, if we have traditional candidates, branch; otherwise keep itself.
  188. choices = []
  189. for ch in kw:
  190. cand = _REVERSE_SIMPLIFY_MAP.get(ch)
  191. if cand:
  192. # include itself too (covers already-traditional or neutral chars)
  193. choices.append([ch] + cand)
  194. else:
  195. choices.append([ch])
  196. # Cartesian product with early stop.
  197. results = ['']
  198. for opts in choices:
  199. new_results = []
  200. for prefix in results:
  201. for opt in opts:
  202. new_results.append(prefix + opt)
  203. if len(new_results) >= max_variants:
  204. break
  205. if len(new_results) >= max_variants:
  206. break
  207. results = new_results
  208. if len(results) >= max_variants:
  209. break
  210. for r in results:
  211. if r:
  212. variants.add(r)
  213. variants.add(manual_simplify(r))
  214. # Keep deterministic order for stable SQL params
  215. ordered = []
  216. for v in variants:
  217. v2 = (v or '').strip()
  218. if v2 and v2 not in ordered:
  219. ordered.append(v2)
  220. if len(ordered) >= max_variants:
  221. break
  222. return ordered
  223. def clean_name(name):
  224. """
  225. Clean name according to Liu family genealogy rules:
  226. 1. If name is '学公' or '留学公', keep 'Gong' (exception).
  227. 2. Otherwise, if name ends with '公', remove '公'.
  228. 3. If name does not start with '留', prepend '留'.
  229. """
  230. if not name: return name
  231. name = name.strip()
  232. # Pre-process: Ensure Simplified Chinese for specific chars
  233. name = manual_simplify(name)
  234. # 1. Check exceptions (names that SHOULD keep 'Gong')
  235. exceptions = ['学公', '留学公']
  236. if name in exceptions:
  237. if not name.startswith('留'):
  238. name = '留' + name
  239. return name
  240. # 2. General Rule: Remove 'Gong' suffix
  241. if name.endswith('公'):
  242. name = name[:-1]
  243. # 3. Ensure 'Liu' surname
  244. if not name.startswith('留'):
  245. name = '留' + name
  246. return name
  247. def is_female_value(sex_value):
  248. """Return True when sex value represents female."""
  249. if sex_value is None:
  250. return False
  251. s = str(sex_value).strip().lower()
  252. return s in ('女', '2', 'female', 'f')
  253. def normalize_lookup_name(name):
  254. """Normalize names for loose matching in AI parsed content."""
  255. if not name:
  256. return ''
  257. return manual_simplify(str(name)).strip()
  258. def should_skip_liu_prefix_for_person(person, spouse_name_set):
  259. """
  260. Female spouse records should not auto-prepend '留' in simplified_name.
  261. We treat a person as female spouse if:
  262. 1) sex is female, and
  263. 2) has spouse_name field OR appears in another person's spouse_name list.
  264. """
  265. if not isinstance(person, dict):
  266. return False
  267. if not is_female_value(person.get('sex')):
  268. return False
  269. own_names = set()
  270. own_names.add(normalize_lookup_name(person.get('name')))
  271. own_names.add(normalize_lookup_name(person.get('original_name')))
  272. own_names.discard('')
  273. has_spouse_name = bool(normalize_lookup_name(person.get('spouse_name')))
  274. referenced_by_other = any(n in spouse_name_set for n in own_names)
  275. return has_spouse_name or referenced_by_other
  276. def get_normalized_base64_image(image_url):
  277. """Download image, normalize to JPEG, and return base64 data URI for AI payload."""
  278. import io
  279. import base64
  280. import requests
  281. from PIL import Image
  282. try:
  283. response = requests.get(image_url, timeout=30)
  284. response.raise_for_status()
  285. with Image.open(io.BytesIO(response.content)) as img:
  286. # Convert to RGB to ensure JPEG compatibility
  287. if img.mode != 'RGB':
  288. img = img.convert('RGB')
  289. # Resize if too large
  290. max_dim = 2000
  291. if max(img.width, img.height) > max_dim:
  292. ratio = max_dim / max(img.width, img.height)
  293. new_size = (int(img.width * ratio), int(img.height * ratio))
  294. img = img.resize(new_size, Image.Resampling.LANCZOS)
  295. # Save as JPEG in memory
  296. buffer = io.BytesIO()
  297. img.save(buffer, format='JPEG', quality=85)
  298. b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
  299. return f"data:image/jpeg;base64,{b64_str}"
  300. except Exception as e:
  301. print(f"Error normalizing image from {image_url}: {e}")
  302. return image_url # Fallback to original URL if processing fails
  303. def process_ai_task(record_id, image_url):
  304. """Background task to process image with AI and store result."""
  305. print(f"[AI Task] Starting task for record {record_id}...")
  306. conn = get_db_connection()
  307. try:
  308. with conn.cursor() as cursor:
  309. cursor.execute("UPDATE genealogy_records SET ai_status = 1 WHERE id = %s", (record_id,))
  310. conn.commit()
  311. print(f"[AI Task] Status updated to 'Processing' for record {record_id}")
  312. api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
  313. api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
  314. prompt = """
  315. 请分析这张家谱图片,提取其中关于人物的信息。
  316. 请务必将繁体字转换为简体字(original_name 字段除外)。
  317. 特别注意:'name' 字段必须是纯简体中文,不能包含繁体字(例如:'學'应转换为'学','劉'应转换为'刘','萬'应转换为'万')。
  318. 请提取以下字段(如果存在):
  319. - original_name: 原始姓名(严格保持图片上的繁体字,不做任何修改或转换)
  320. - name: 简体姓名(必须转换为简体中文,去除不需要的敬称)
  321. - sex: 性别(男/女)
  322. - birthday: 出生日期(尝试转换为YYYY-MM-DD格式,如果无法确定年份可只填月日)
  323. - death_date: 逝世日期(如文本中出现“殁”、“葬”、“卒”等字眼及其对应的时间,请提取)
  324. - father_name: 父亲姓名
  325. - spouse_name: 配偶姓名
  326. - generation: 第几世/代数
  327. - name_word: 字辈(例如名字为“学勤公”,“学”为字辈;提取名字中的字辈信息)
  328. - education: 学历/功名
  329. - title: 官职/称号
  330. 请严格以JSON列表格式返回,不要包含Markdown代码块标记(如 ```json ... ```),直接返回JSON数组。
  331. 如果包含多个人物,请都提取出来。
  332. Do not output any reasoning or explanation, just the JSON.
  333. """
  334. ai_payload_url = get_normalized_base64_image(image_url)
  335. payload = {
  336. "model": "doubao-seed-1-8-251228",
  337. "stream": True, # Streaming for robust handling
  338. "input": [
  339. {
  340. "role": "user",
  341. "content": [
  342. {"type": "input_image", "image_url": ai_payload_url},
  343. {"type": "input_text", "text": prompt}
  344. ]
  345. }
  346. ]
  347. }
  348. headers = {
  349. "Authorization": f"Bearer {api_key}",
  350. "Content-Type": "application/json"
  351. }
  352. max_retries = 3
  353. last_exception = None
  354. for attempt in range(max_retries):
  355. try:
  356. print(f"[AI Task] Attempt {attempt+1}/{max_retries} connecting to API for record {record_id}...")
  357. response = requests.post(
  358. api_url,
  359. json=payload,
  360. headers=headers,
  361. timeout=1200,
  362. stream=True,
  363. verify=False,
  364. proxies={"http": None, "https": None}
  365. )
  366. if response.status_code == 200:
  367. print(f"[AI Task] Connection established for record {record_id}, receiving stream...")
  368. full_content = ""
  369. for line in response.iter_lines():
  370. if not line: continue
  371. line_str = line.decode('utf-8')
  372. # Debug: Print full line to understand event flow
  373. print(f"[AI Task Debug] Raw Line: {line_str[:500]}") # Truncate very long lines
  374. if line_str.startswith('data: '):
  375. json_str = line_str[6:]
  376. if json_str.strip() == '[DONE]':
  377. print("[AI Task Debug] Received [DONE]")
  378. break
  379. try:
  380. chunk = json.loads(json_str)
  381. chunk_type = chunk.get('type')
  382. # Standard OpenAI format (choices)
  383. if 'choices' in chunk and len(chunk['choices']) > 0:
  384. delta = chunk['choices'][0].get('delta', {})
  385. if 'content' in delta:
  386. full_content += delta['content']
  387. # Doubao/Volcengine specific formats (delta)
  388. elif chunk_type == 'response.text.delta':
  389. full_content += chunk.get('delta', '')
  390. # Check response.completed if empty
  391. elif chunk_type == 'response.completed' and not full_content:
  392. output = chunk.get('response', {}).get('output', [])
  393. for item in output:
  394. # Also extract from reasoning if it contains JSON-like text
  395. if item.get('type') == 'reasoning':
  396. summary = item.get('summary', [])
  397. for sum_item in summary:
  398. if sum_item.get('type') == 'summary_text':
  399. full_content += sum_item.get('text', '')
  400. elif item.get('type') == 'message':
  401. content = item.get('content')
  402. if isinstance(content, str):
  403. full_content += content
  404. elif isinstance(content, list):
  405. for part in content:
  406. if isinstance(part, dict) and part.get('type') == 'text':
  407. full_content += part.get('text', '')
  408. # Fallback: output_item.added
  409. elif chunk_type == 'response.output_item.added':
  410. item = chunk.get('item', {})
  411. if item.get('role') == 'assistant':
  412. content_field = item.get('content', [])
  413. if isinstance(content_field, str):
  414. full_content += content_field
  415. elif isinstance(content_field, list):
  416. for part in content_field:
  417. if isinstance(part, dict) and part.get('type') == 'text':
  418. full_content += part.get('text', '')
  419. except Exception as e:
  420. print(f"[AI Task] Chunk parse error: {e}")
  421. else:
  422. # Fallback for non-SSE
  423. try:
  424. chunk = json.loads(line_str)
  425. if 'choices' in chunk and len(chunk['choices']) > 0:
  426. content = chunk['choices'][0]['message']['content']
  427. full_content += content
  428. except:
  429. pass
  430. print(f"[AI Task] Stream finished. Content length: {len(full_content)}")
  431. if len(full_content) == 0:
  432. print(f"[AI Task] WARNING: No content received from AI stream.")
  433. # Continue to JSON parse to fail gracefully
  434. # Clean JSON
  435. try:
  436. # 1. Try finding [...] array
  437. start = full_content.find('[')
  438. end = full_content.rfind(']')
  439. # 2. If not found, try finding {...} object and wrap it
  440. is_single_object = False
  441. if start == -1 or end == -1 or end <= start:
  442. start = full_content.find('{')
  443. end = full_content.rfind('}')
  444. is_single_object = True
  445. if start != -1 and end != -1 and end > start:
  446. content_clean = full_content[start:end+1]
  447. else:
  448. # Fallback to regex or raw
  449. content_clean = re.sub(r'^```json\s*', '', full_content)
  450. content_clean = re.sub(r'```$', '', content_clean)
  451. parsed = json.loads(content_clean)
  452. # Normalize single object to list
  453. if is_single_object and isinstance(parsed, dict):
  454. parsed = [parsed]
  455. content_clean = json.dumps(parsed, ensure_ascii=False)
  456. elif isinstance(parsed, dict) and not isinstance(parsed, list):
  457. # Just in case json.loads parsed a dict even if we looked for []
  458. parsed = [parsed]
  459. content_clean = json.dumps(parsed, ensure_ascii=False)
  460. # Build spouse name lookup for "female spouse" detection
  461. spouse_name_set = set()
  462. if isinstance(parsed, list):
  463. for person in parsed:
  464. n = normalize_lookup_name(person.get('spouse_name'))
  465. if n:
  466. spouse_name_set.add(n)
  467. # Clean names in parsed content
  468. if isinstance(parsed, list):
  469. for person in parsed:
  470. # Process Name: 'name' is Simplified from AI, 'original_name' is Traditional/Raw from AI
  471. simplified_name = person.get('name', '') or person.get('original_name', '')
  472. original_name = person.get('original_name', '')
  473. # Female spouse: only simplify Chinese, do NOT prepend '留'
  474. if should_skip_liu_prefix_for_person(person, spouse_name_set):
  475. cleaned_simplified = manual_simplify(simplified_name)
  476. else:
  477. # Same-clan default: prepend '留' and handle trailing '公'
  478. cleaned_simplified = clean_name(simplified_name)
  479. person['simplified_name'] = cleaned_simplified
  480. # Store raw name in 'name' field (as requested)
  481. if original_name:
  482. person['name'] = original_name
  483. else:
  484. # Fallback: if no original_name returned, use the uncleaned name as 'name'
  485. # or keep existing logic. But user wants raw in 'name'.
  486. # If AI didn't return original_name, 'name' is likely simplified.
  487. pass # Keep 'name' as is (which is Simplified) if original_name missing
  488. # Father name:同族,需要按“留”姓规则清洗
  489. if 'father_name' in person and person['father_name']:
  490. person['father_name'] = clean_name(person['father_name'])
  491. # Spouse name:只做繁转简,不拼接“留”姓,也不去“公”
  492. if 'spouse_name' in person and person['spouse_name']:
  493. person['spouse_name'] = manual_simplify(person['spouse_name'])
  494. # Re-serialize
  495. content_clean = json.dumps(parsed, ensure_ascii=False)
  496. with conn.cursor() as cursor:
  497. cursor.execute("UPDATE genealogy_records SET ai_status = 2, ai_content = %s WHERE id = %s", (content_clean, record_id))
  498. conn.commit()
  499. print(f"[AI Task] SUCCESS: Record {record_id} processed and saved.")
  500. return # Success
  501. except json.JSONDecodeError as err:
  502. raise Exception(f"JSON Parse Error: {str(err)}. Raw: {full_content}")
  503. else:
  504. raise Exception(f"API Error {response.status_code}: {response.text}")
  505. except Exception as e:
  506. print(f"[AI Task] Attempt {attempt+1} failed for record {record_id}: {e}")
  507. last_exception = e
  508. if attempt < max_retries - 1:
  509. wait_time = 2 * (attempt + 1)
  510. print(f"[AI Task] Waiting {wait_time}s before retry...")
  511. time.sleep(wait_time)
  512. raise last_exception or Exception("Unknown error")
  513. except Exception as e:
  514. print(f"[AI Task] FINAL FAILURE for record {record_id}: {e}")
  515. try:
  516. with conn.cursor() as cursor:
  517. cursor.execute("UPDATE genealogy_records SET ai_status = 3, ai_content = %s WHERE id = %s", (f"Max Retries Exceeded. Error: {str(e)}", record_id))
  518. conn.commit()
  519. except:
  520. pass
  521. finally:
  522. conn.close()
  523. print(f"[AI Task] Task finished for record {record_id}")
  524. def ensure_pdf_table():
  525. conn = get_db_connection()
  526. try:
  527. with conn.cursor() as cursor:
  528. cursor.execute("""
  529. CREATE TABLE IF NOT EXISTS genealogy_pdfs (
  530. id INT AUTO_INCREMENT PRIMARY KEY,
  531. file_name VARCHAR(255) NOT NULL,
  532. oss_url TEXT NOT NULL,
  533. description VARCHAR(500) DEFAULT '',
  534. upload_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  535. uploader VARCHAR(100) DEFAULT '',
  536. version_name VARCHAR(255) DEFAULT '',
  537. version_source VARCHAR(255) DEFAULT '',
  538. file_provider VARCHAR(100) DEFAULT '',
  539. parse_status INT DEFAULT 0
  540. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
  541. """)
  542. # 检查是否存在parse_status字段,如果不存在则添加
  543. cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'parse_status'")
  544. if not cursor.fetchone():
  545. cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN parse_status INT DEFAULT 0")
  546. # 检查是否存在version_name字段,如果不存在则添加
  547. cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_name'")
  548. if not cursor.fetchone():
  549. cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_name VARCHAR(255) DEFAULT ''")
  550. # 检查是否存在version_source字段,如果不存在则添加
  551. cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_source'")
  552. if not cursor.fetchone():
  553. cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_source VARCHAR(255) DEFAULT ''")
  554. # 检查是否存在file_provider字段,如果不存在则添加
  555. cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'file_provider'")
  556. if not cursor.fetchone():
  557. cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN file_provider VARCHAR(100) DEFAULT ''")
  558. conn.commit()
  559. finally:
  560. conn.close()
  561. @app.route('/manager/pdf_management')
  562. def pdf_management():
  563. if 'user_id' not in session:
  564. return redirect(url_for('login'))
  565. username = session.get('username', 'unknown')
  566. is_super_admin = session.get('is_super_admin', 'NOT_SET')
  567. print(f"[PDF Management Access] User: {username}, is_super_admin: {is_super_admin}")
  568. # Verify is_super_admin against database - always check latest status
  569. conn = get_db_connection()
  570. try:
  571. with conn.cursor() as cursor:
  572. cursor.execute("SELECT is_super_admin FROM users WHERE id = %s", (session['user_id'],))
  573. db_result = cursor.fetchone()
  574. db_is_super = db_result['is_super_admin'] if db_result else 0
  575. print(f"[PDF Management Access] DB is_super_admin: {db_is_super}")
  576. if not db_is_super:
  577. print(f"[PDF Management Access] Denied for {username} (DB check)")
  578. flash('无权限访问此页面')
  579. return redirect(url_for('home'))
  580. finally:
  581. conn.close()
  582. print(f"[PDF Management Access] Allowed for {username}")
  583. ensure_pdf_table()
  584. view_id = request.args.get('view', type=int)
  585. preview = request.args.get('preview', type=bool, default=False)
  586. selected_pdf = None
  587. conn = get_db_connection()
  588. try:
  589. with conn.cursor() as cursor:
  590. cursor.execute("SELECT * FROM genealogy_pdfs ORDER BY upload_time DESC")
  591. pdfs = cursor.fetchall()
  592. if view_id and preview:
  593. cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (view_id,))
  594. selected_pdf = cursor.fetchone()
  595. finally:
  596. conn.close()
  597. return render_template('pdf_management.html', pdfs=pdfs, selected_pdf=selected_pdf)
  598. @app.route('/manager/parse_pdf/<int:pdf_id>', methods=['POST'])
  599. def parse_pdf(pdf_id):
  600. if 'user_id' not in session:
  601. return jsonify({"success": False, "message": "Unauthorized"}), 401
  602. # 标记PDF为解析中
  603. conn = get_db_connection()
  604. try:
  605. with conn.cursor() as cursor:
  606. cursor.execute("UPDATE genealogy_pdfs SET parse_status = 1 WHERE id = %s", (pdf_id,))
  607. conn.commit()
  608. finally:
  609. conn.close()
  610. # 异步执行PDF解析
  611. def parse_pdf_async():
  612. try:
  613. # 获取PDF信息
  614. conn = get_db_connection()
  615. pdf_info = None
  616. try:
  617. with conn.cursor() as cursor:
  618. cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (pdf_id,))
  619. pdf_info = cursor.fetchone()
  620. finally:
  621. conn.close()
  622. if not pdf_info:
  623. return
  624. # 下载PDF并拆分
  625. pdf_url = pdf_info['oss_url']
  626. response = requests.get(pdf_url)
  627. response.raise_for_status()
  628. # 保存临时PDF文件
  629. temp_pdf_path = f"/tmp/{pdf_info['file_name']}"
  630. with open(temp_pdf_path, 'wb') as f:
  631. f.write(response.content)
  632. # 使用PyMuPDF拆分PDF
  633. doc = fitz.open(temp_pdf_path)
  634. page_count = doc.page_count
  635. # 每个PDF的页码从1开始计算
  636. max_page = 0
  637. # 逐页处理
  638. for i in range(page_count):
  639. page = doc[i]
  640. pix = page.get_pixmap()
  641. image_path = f"/tmp/{pdf_info['file_name']}_page_{i+1}.png"
  642. pix.save(image_path)
  643. # 上传图片到OSS
  644. image_oss_url = upload_to_oss(image_path, f"{pdf_info['file_name']}_page_{i+1}.png")
  645. # 检查上传是否成功
  646. if not image_oss_url:
  647. raise Exception(f"Failed to upload image to OSS: {image_path}")
  648. # 保存到genealogy_records表
  649. conn = get_db_connection()
  650. try:
  651. with conn.cursor() as cursor:
  652. cursor.execute("""
  653. INSERT INTO genealogy_records
  654. (file_name, oss_url, file_type, page_number, genealogy_version, genealogy_source, upload_person, upload_time)
  655. VALUES (%s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP)
  656. """, (
  657. f"{pdf_info['file_name']}_page_{i+1}.png",
  658. image_oss_url,
  659. '图片',
  660. max_page + i + 1,
  661. pdf_info['version_name'],
  662. pdf_info['version_source'],
  663. pdf_info['file_provider']
  664. ))
  665. conn.commit()
  666. finally:
  667. conn.close()
  668. # 删除临时图片文件
  669. if os.path.exists(image_path):
  670. os.remove(image_path)
  671. # 删除临时PDF文件
  672. if os.path.exists(temp_pdf_path):
  673. os.remove(temp_pdf_path)
  674. # 更新PDF解析状态为成功
  675. conn = get_db_connection()
  676. try:
  677. with conn.cursor() as cursor:
  678. cursor.execute("UPDATE genealogy_pdfs SET parse_status = 2 WHERE id = %s", (pdf_id,))
  679. conn.commit()
  680. finally:
  681. conn.close()
  682. except Exception as e:
  683. # 更新PDF解析状态为失败
  684. conn = get_db_connection()
  685. try:
  686. with conn.cursor() as cursor:
  687. cursor.execute("UPDATE genealogy_pdfs SET parse_status = 3 WHERE id = %s", (pdf_id,))
  688. conn.commit()
  689. finally:
  690. conn.close()
  691. print(f"PDF解析失败: {e}")
  692. # 启动异步任务
  693. thread = threading.Thread(target=parse_pdf_async)
  694. thread.daemon = True
  695. thread.start()
  696. return jsonify({"success": True, "message": "PDF解析已开始,将在后台执行"})
  697. @app.route('/manager/batch_ai_parse', methods=['GET'])
  698. def batch_ai_parse():
  699. """Batch AI parse for unprocessed records."""
  700. if 'user_id' not in session:
  701. return jsonify({"success": False, "message": "Unauthorized"}), 401
  702. # Start background thread
  703. thread = threading.Thread(target=batch_ai_parse_async)
  704. thread.daemon = True
  705. thread.start()
  706. return jsonify({"success": True, "message": "批量AI解析已开始,请稍候查看结果"})
  707. def batch_ai_parse_async():
  708. """Background task to batch AI parse unprocessed records."""
  709. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  710. print(f"[{timestamp}] [Batch AI Parse] Starting batch AI parse task...")
  711. # Get unprocessed records (ai_status = 0)
  712. conn = None
  713. unprocessed_records = []
  714. try:
  715. conn = get_db_connection()
  716. with conn.cursor() as cursor:
  717. cursor.execute("SELECT id, oss_url FROM genealogy_records WHERE ai_status = 0 order by page_number")
  718. unprocessed_records = cursor.fetchall()
  719. conn.close()
  720. conn = None
  721. total_records = len(unprocessed_records)
  722. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  723. print(f"[{timestamp}] [Batch AI Parse] Found {total_records} unprocessed records")
  724. if total_records == 0:
  725. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  726. print(f"[{timestamp}] [Batch AI Parse] No unprocessed records found")
  727. return
  728. # Control concurrency to 5
  729. max_concurrency = 5
  730. semaphore = threading.Semaphore(max_concurrency)
  731. threads = []
  732. def process_record(record):
  733. """Process a single record with semaphore."""
  734. with semaphore:
  735. try:
  736. record_id = record['id']
  737. image_url = record['oss_url']
  738. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  739. print(f"[{timestamp}] [Batch AI Parse] Processing record {record_id}")
  740. process_ai_task(record_id, image_url)
  741. except Exception as e:
  742. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  743. print(f"[{timestamp}] [Batch AI Parse] Error processing record {record['id']}: {e}")
  744. # If failed, we'll handle it in the next batch
  745. # Start threads for each record
  746. for record in unprocessed_records:
  747. thread = threading.Thread(target=process_record, args=(record,))
  748. thread.daemon = True
  749. thread.start()
  750. threads.append(thread)
  751. # Wait for all threads to complete
  752. for thread in threads:
  753. thread.join()
  754. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  755. print(f"[{timestamp}] [Batch AI Parse] Batch processing completed. Processed {total_records} records")
  756. # Check for failed records and restart them
  757. check_failed_records()
  758. except Exception as e:
  759. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  760. print(f"[{timestamp}] [Batch AI Parse] Error: {e}")
  761. finally:
  762. if conn:
  763. try:
  764. conn.close()
  765. except:
  766. pass
  767. def check_failed_records():
  768. """Check for failed records and restart them."""
  769. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  770. print(f"[{timestamp}] [Batch AI Parse] Checking for failed records...")
  771. conn = None
  772. failed_records = []
  773. try:
  774. conn = get_db_connection()
  775. with conn.cursor() as cursor:
  776. cursor.execute("SELECT id, oss_url FROM genealogy_records WHERE ai_status = 3")
  777. failed_records = cursor.fetchall()
  778. conn.close()
  779. conn = None
  780. total_failed = len(failed_records)
  781. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  782. print(f"[{timestamp}] [Batch AI Parse] Found {total_failed} failed records")
  783. if total_failed == 0:
  784. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  785. print(f"[{timestamp}] [Batch AI Parse] No failed records found")
  786. return
  787. # Control concurrency to 5 for failed records
  788. max_concurrency = 5
  789. semaphore = threading.Semaphore(max_concurrency)
  790. threads = []
  791. def process_failed_record(record):
  792. """Process a failed record with semaphore."""
  793. with semaphore:
  794. retry_conn = None
  795. try:
  796. record_id = record['id']
  797. image_url = record['oss_url']
  798. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  799. print(f"[{timestamp}] [Batch AI Parse] Retrying failed record {record_id}")
  800. # Reset status to processing
  801. retry_conn = get_db_connection()
  802. with retry_conn.cursor() as cursor:
  803. cursor.execute("UPDATE genealogy_records SET ai_status = 1 WHERE id = %s", (record_id,))
  804. retry_conn.commit()
  805. retry_conn.close()
  806. retry_conn = None
  807. process_ai_task(record_id, image_url)
  808. except Exception as e:
  809. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  810. print(f"[{timestamp}] [Batch AI Parse] Error retrying record {record['id']}: {e}")
  811. finally:
  812. if retry_conn:
  813. try:
  814. retry_conn.close()
  815. except:
  816. pass
  817. # Start threads for each failed record
  818. for record in failed_records:
  819. thread = threading.Thread(target=process_failed_record, args=(record,))
  820. thread.daemon = True
  821. thread.start()
  822. threads.append(thread)
  823. # Wait for all threads to complete
  824. for thread in threads:
  825. thread.join()
  826. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  827. print(f"[{timestamp}] [Batch AI Parse] Retry processing completed. Retried {total_failed} failed records")
  828. except Exception as e:
  829. timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  830. print(f"[{timestamp}] [Batch AI Parse] Error checking failed records: {e}")
  831. finally:
  832. if conn:
  833. try:
  834. conn.close()
  835. except:
  836. pass
  837. @app.route('/manager/delete_pdf/<int:pdf_id>', methods=['POST'])
  838. def delete_pdf(pdf_id):
  839. if 'user_id' not in session:
  840. return jsonify({"success": False, "message": "Unauthorized"}), 401
  841. conn = get_db_connection()
  842. try:
  843. with conn.cursor() as cursor:
  844. cursor.execute("DELETE FROM genealogy_pdfs WHERE id = %s", (pdf_id,))
  845. conn.commit()
  846. flash('PDF文件记录已删除')
  847. except Exception as e:
  848. flash(f'删除失败: {e}')
  849. finally:
  850. conn.close()
  851. return redirect(url_for('pdf_management'))
  852. @app.route('/manager/')
  853. def index():
  854. if 'user_id' not in session:
  855. return redirect(url_for('login'))
  856. page = request.args.get('page', 1, type=int)
  857. version = request.args.get('version', '').strip()
  858. print(f"Received version parameter: '{version}'")
  859. source = request.args.get('source', '').strip()
  860. person = request.args.get('person', '').strip()
  861. file_type = request.args.get('file_type', '').strip()
  862. per_page = 10
  863. offset = (page - 1) * per_page
  864. conn = get_db_connection()
  865. try:
  866. with conn.cursor() as cursor:
  867. query_conditions = []
  868. params = []
  869. if version:
  870. query_conditions.append("genealogy_version LIKE %s")
  871. params.append(f"%{version}%")
  872. if source:
  873. query_conditions.append("genealogy_source LIKE %s")
  874. params.append(f"%{source}%")
  875. if person:
  876. query_conditions.append("upload_person LIKE %s")
  877. params.append(f"%{person}%")
  878. if file_type:
  879. query_conditions.append("file_type = %s")
  880. params.append(file_type)
  881. where_clause = ""
  882. if query_conditions:
  883. where_clause = "WHERE " + " AND ".join(query_conditions)
  884. count_sql = f"SELECT COUNT(*) as count FROM genealogy_records {where_clause}"
  885. cursor.execute(count_sql, params)
  886. total = cursor.fetchone()['count']
  887. sql = f"SELECT * FROM genealogy_records {where_clause} ORDER BY page_number ASC LIMIT %s OFFSET %s"
  888. cursor.execute(sql, params + [per_page, offset])
  889. records = cursor.fetchall()
  890. total_pages = (total + per_page - 1) // per_page
  891. finally:
  892. conn.close()
  893. return render_template('index.html', records=records, page=page, total_pages=total_pages, version=version, source=source, person=person, file_type=file_type, total=total)
  894. @app.route('/manager/members')
  895. def members():
  896. if 'user_id' not in session:
  897. return redirect(url_for('login'))
  898. search_name = request.args.get('name', '').strip()
  899. page = request.args.get('page', 1, type=int)
  900. per_page = 10
  901. offset = (page - 1) * per_page
  902. print(f"[Members List] Fetching members page: {page}, search: '{search_name}', per_page: {per_page}")
  903. conn = get_db_connection()
  904. try:
  905. with conn.cursor() as cursor:
  906. # 1. Get total count
  907. if search_name:
  908. variants = expand_name_search_variants(search_name)
  909. where_parts = []
  910. params = []
  911. for v in variants:
  912. where_parts.append("(name LIKE %s OR simplified_name LIKE %s)")
  913. like = f"%{v}%"
  914. params.extend([like, like])
  915. where_clause = " OR ".join(where_parts) if where_parts else "name LIKE %s"
  916. if not where_parts:
  917. params = [f"%{search_name}%"]
  918. count_sql = f"SELECT COUNT(*) as count FROM family_member_info WHERE {where_clause}"
  919. print(f"[Members List] Executing count SQL: {count_sql}")
  920. print(f"[Members List] Count SQL parameters: {params}")
  921. cursor.execute(count_sql, tuple(params))
  922. else:
  923. count_sql = "SELECT COUNT(*) as count FROM family_member_info"
  924. print(f"[Members List] Executing count SQL: {count_sql}")
  925. cursor.execute(count_sql)
  926. result = cursor.fetchone()
  927. total = result['count'] if result else 0
  928. total_pages = (total + per_page - 1) // per_page
  929. print(f"[Members List] Total members: {total}, total pages: {total_pages}")
  930. # 2. Get paginated results, ordered by modified_time DESC (or create_time if modified is null/same)
  931. # Using COALESCE to ensure sort works even if modified_time is NULL
  932. order_clause = "ORDER BY COALESCE(modified_time, create_time) DESC"
  933. if search_name:
  934. variants = expand_name_search_variants(search_name)
  935. where_parts = []
  936. params = []
  937. for v in variants:
  938. where_parts.append("(name LIKE %s OR simplified_name LIKE %s)")
  939. like = f"%{v}%"
  940. params.extend([like, like])
  941. where_clause = " OR ".join(where_parts) if where_parts else "(name LIKE %s OR simplified_name LIKE %s)"
  942. if not where_parts:
  943. like = f"%{search_name}%"
  944. params = [like, like]
  945. sql = f"SELECT id, name, simplified_name, sex, name_word_generation, birthday, occupation, family_rank, branch_family_hall, residential_address, is_pass_away, create_time, modified_time FROM family_member_info WHERE {where_clause} {order_clause} LIMIT %s OFFSET %s"
  946. print(f"[Members List] Executing members SQL: {sql}")
  947. print(f"[Members List] Members SQL parameters: {params + [per_page, offset]}")
  948. cursor.execute(sql, tuple(params + [per_page, offset]))
  949. else:
  950. sql = f"SELECT id, name, simplified_name, sex, name_word_generation, birthday, occupation, family_rank, branch_family_hall, residential_address, is_pass_away, create_time, modified_time FROM family_member_info {order_clause} LIMIT %s OFFSET %s"
  951. print(f"[Members List] Executing members SQL: {sql}")
  952. print(f"[Members List] Members SQL parameters: {[per_page, offset]}")
  953. cursor.execute(sql, (per_page, offset))
  954. members = cursor.fetchall()
  955. print(f"[Members List] Fetched {len(members)} members")
  956. # 格式化日期
  957. for m in members:
  958. m['birthday_str'] = format_timestamp(m.get('birthday'))
  959. # 格式化创建时间 (针对 TIMESTAMP 字段)
  960. if m.get('create_time'):
  961. m['create_time_str'] = m['create_time'].strftime('%Y-%m-%d')
  962. if m.get('modified_time'):
  963. m['modified_time_str'] = m['modified_time'].strftime('%Y-%m-%d %H:%M')
  964. finally:
  965. print(f"[Members List] Closing database connection")
  966. conn.close()
  967. return render_template('members.html', members=members, search_name=search_name, page=page, total_pages=total_pages, total=total)
  968. @app.route('/manager/batch_genealogy')
  969. def batch_genealogy():
  970. if 'user_id' not in session:
  971. return redirect(url_for('login'))
  972. return render_template('batch_genealogy.html')
  973. @app.route('/manager/suspected_errors')
  974. def suspected_errors():
  975. if 'user_id' not in session:
  976. return redirect(url_for('login'))
  977. search_name = request.args.get('name', '').strip()
  978. page = request.args.get('page', 1, type=int)
  979. per_page = 20
  980. offset = (page - 1) * per_page
  981. conn = get_db_connection()
  982. try:
  983. with conn.cursor() as cursor:
  984. # Base query with condition for non-empty suspected_error (using TRIM to remove whitespace)
  985. base_query = "SELECT id, name, simplified_name, sex, name_word_generation, birthday, suspected_error FROM family_member_info WHERE suspected_error IS NOT NULL AND TRIM(suspected_error) != ''"
  986. count_query = "SELECT COUNT(*) as count FROM family_member_info WHERE suspected_error IS NOT NULL AND TRIM(suspected_error) != ''"
  987. # Add search condition if provided
  988. params = []
  989. if search_name:
  990. # Support both traditional and simplified name search
  991. base_query += " AND (name LIKE %s OR simplified_name LIKE %s)"
  992. count_query += " AND (name LIKE %s OR simplified_name LIKE %s)"
  993. search_param = f"%{search_name}%"
  994. params.extend([search_param, search_param])
  995. # Get total count
  996. cursor.execute(count_query, params)
  997. result = cursor.fetchone()
  998. total = result['count'] if result else 0
  999. total_pages = (total + per_page - 1) // per_page
  1000. # Get members with pagination
  1001. base_query += " ORDER BY name LIMIT %s OFFSET %s"
  1002. params.extend([per_page, offset])
  1003. cursor.execute(base_query, params)
  1004. members = cursor.fetchall()
  1005. # Format birthday for display
  1006. for member in members:
  1007. if member['birthday']:
  1008. member['birthday_str'] = format_timestamp(member['birthday'])
  1009. else:
  1010. member['birthday_str'] = '未知'
  1011. finally:
  1012. conn.close()
  1013. return render_template('suspected_errors.html', members=members, search_name=search_name, page=page, total_pages=total_pages, total=total)
  1014. @app.route('/manager/tree')
  1015. def tree():
  1016. if 'user_id' not in session:
  1017. return redirect(url_for('login'))
  1018. return render_template('tree.html')
  1019. @app.route('/manager/lineage_query')
  1020. def lineage_query():
  1021. if 'user_id' not in session:
  1022. return redirect(url_for('login'))
  1023. return render_template('lineage_query.html')
  1024. @app.route('/manager/tree_classic')
  1025. def tree_classic():
  1026. if 'user_id' not in session:
  1027. return redirect(url_for('login'))
  1028. return render_template('tree_classic.html')
  1029. @app.route('/manager/api/tree_data')
  1030. def tree_data():
  1031. if 'user_id' not in session:
  1032. return jsonify({"error": "Unauthorized"}), 401
  1033. conn = get_db_connection()
  1034. try:
  1035. with conn.cursor() as cursor:
  1036. # 获取所有成员
  1037. cursor.execute("SELECT id, name, simplified_name, sex, family_rank, name_word_generation FROM family_member_info")
  1038. members = cursor.fetchall()
  1039. # 获取所有关系 (1:父子 2:母子 10:夫妻 11:兄弟 12:姐妹),包括子类型
  1040. cursor.execute("SELECT parent_mid, child_mid, relation_type, sub_relation_type FROM family_relation_info")
  1041. relations = cursor.fetchall()
  1042. return jsonify({"members": members, "relations": relations})
  1043. finally:
  1044. conn.close()
  1045. @app.route('/manager/api/search_member', methods=['POST'])
  1046. def search_member():
  1047. if 'user_id' not in session:
  1048. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1049. data = request.get_json()
  1050. keyword = data.get('keyword', '').strip()
  1051. if not keyword:
  1052. return jsonify({"success": False, "message": "请输入搜索关键词"})
  1053. conn = get_db_connection()
  1054. try:
  1055. with conn.cursor() as cursor:
  1056. cursor.execute("""
  1057. SELECT id, name, simplified_name
  1058. FROM family_member_info
  1059. WHERE name LIKE %s OR simplified_name LIKE %s OR former_name LIKE %s
  1060. ORDER BY
  1061. CASE WHEN name = %s THEN 1
  1062. WHEN simplified_name = %s THEN 2
  1063. WHEN name LIKE %s THEN 3
  1064. WHEN simplified_name LIKE %s THEN 4
  1065. ELSE 5 END
  1066. """, (f'%{keyword}%', f'%{keyword}%', f'%{keyword}%', keyword, keyword, f'{keyword}%', f'{keyword}%'))
  1067. members = cursor.fetchall()
  1068. if members:
  1069. return jsonify({"success": True, "members": members})
  1070. else:
  1071. return jsonify({"success": False, "message": "未找到匹配的成员"})
  1072. finally:
  1073. conn.close()
  1074. @app.route('/manager/api/get_lineage/<int:member_id>')
  1075. def get_lineage(member_id):
  1076. if 'user_id' not in session:
  1077. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1078. import time
  1079. start_time = time.time()
  1080. print(f"[Lineage Query] Starting query for member_id: {member_id} at {time.strftime('%Y-%m-%d %H:%M:%S')}")
  1081. conn = get_db_connection()
  1082. try:
  1083. with conn.cursor() as cursor:
  1084. # Step 1: Get center person
  1085. step_start = time.time()
  1086. cursor.execute("SELECT id, name, simplified_name, name_word, name_word_generation FROM family_member_info WHERE id = %s", (member_id,))
  1087. center = cursor.fetchone()
  1088. print(f"[Lineage Query] Step 1 - Get center: {time.time() - step_start:.3f}s")
  1089. if not center:
  1090. return jsonify({"success": False, "message": "成员不存在"})
  1091. # Step 2: Get ancestors with their siblings (generations)
  1092. step_start = time.time()
  1093. generations = [] # Array of generations, each with main ancestor and siblings
  1094. current_id = member_id
  1095. max_depth = 15
  1096. ancestor_ids = [] # Track ancestor IDs for exclusion when expanding
  1097. displayed_ids = set() # Track IDs that are already displayed
  1098. displayed_ids.add(member_id) # Center person is displayed
  1099. for depth in range(max_depth):
  1100. # 获取所有父母关系(支持出继/入继)
  1101. cursor.execute("""
  1102. SELECT p.id, p.name, p.simplified_name, p.name_word, p.name_word_generation,
  1103. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = p.id AND relation_type IN (1, 2)) as has_children,
  1104. r.sub_relation_type
  1105. FROM family_relation_info r
  1106. JOIN family_member_info p ON r.parent_mid = p.id
  1107. WHERE r.child_mid = %s AND r.relation_type IN (1, 2)
  1108. """, (current_id,))
  1109. parents = cursor.fetchall()
  1110. if not parents:
  1111. break
  1112. # 优先选择直系父母(非出继),如果都是出继/入继,选择入继
  1113. parent = None
  1114. adoptive_parent = None
  1115. for p in parents:
  1116. if p['sub_relation_type'] == 2: # 出继(亲生父母)
  1117. parent = p
  1118. elif p['sub_relation_type'] == 3: # 入继(养父母)
  1119. adoptive_parent = p
  1120. else: # 普通关系(亲生)
  1121. parent = p
  1122. # 如果没有找到普通父母,使用入继父母
  1123. if not parent:
  1124. parent = adoptive_parent
  1125. ancestor_ids.append(parent['id'])
  1126. displayed_ids.add(parent['id'])
  1127. # Get siblings of this ancestor (father's brothers)
  1128. # First get grandparent (parent's father)
  1129. cursor.execute("""
  1130. SELECT gp.id
  1131. FROM family_relation_info r
  1132. JOIN family_member_info gp ON r.parent_mid = gp.id
  1133. WHERE r.child_mid = %s AND r.relation_type IN (1, 2)
  1134. LIMIT 1
  1135. """, (parent['id'],))
  1136. grandparent = cursor.fetchone()
  1137. parent_siblings = []
  1138. if grandparent:
  1139. # Get siblings of parent (father's brothers)
  1140. cursor.execute("""
  1141. SELECT c.id, c.name, c.simplified_name, c.name_word, c.name_word_generation,
  1142. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = c.id AND relation_type IN (1, 2)) as has_children
  1143. FROM family_relation_info r
  1144. JOIN family_member_info c ON r.child_mid = c.id
  1145. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2) AND c.id != %s
  1146. ORDER BY COALESCE(r.child_order, 99999), c.id
  1147. LIMIT 30
  1148. """, (grandparent['id'], parent['id']))
  1149. parent_siblings = cursor.fetchall()
  1150. # Mark sibling IDs as displayed
  1151. for sibling in parent_siblings:
  1152. displayed_ids.add(sibling['id'])
  1153. # Check if parent has any children NOT already displayed
  1154. # Only show expand button if there are undisplayed children
  1155. cursor.execute("""
  1156. SELECT COUNT(*) as count
  1157. FROM family_relation_info r
  1158. JOIN family_member_info c ON r.child_mid = c.id
  1159. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2)
  1160. """, (parent['id'],))
  1161. total_children = cursor.fetchone()['count']
  1162. # Check if current child is displayed (current_id is the child of parent)
  1163. child_displayed = current_id in displayed_ids
  1164. # Show expand if there are children not displayed
  1165. show_expand = total_children > (1 if child_displayed else 0)
  1166. parent['show_expand'] = show_expand
  1167. generations.append({
  1168. 'ancestor': parent,
  1169. 'siblings': parent_siblings,
  1170. 'depth': depth
  1171. })
  1172. current_id = parent['id']
  1173. print(f"[Lineage Query] Step 2 - Get generations ({len(generations)}): {time.time() - step_start:.3f}s")
  1174. # Step 3: Get immediate children only (limited count)
  1175. step_start = time.time()
  1176. # 获取所有子女(包括出继和入继)
  1177. cursor.execute("""
  1178. SELECT c.id, c.name, c.simplified_name, c.name_word, c.name_word_generation,
  1179. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = c.id AND relation_type IN (1, 2)) as has_children,
  1180. r.sub_relation_type
  1181. FROM family_relation_info r
  1182. JOIN family_member_info c ON r.child_mid = c.id
  1183. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2)
  1184. ORDER BY COALESCE(r.child_order, 99999), c.id
  1185. LIMIT 30
  1186. """, (member_id,))
  1187. children = cursor.fetchall()
  1188. # 对于出继的子女,需要获取他们入继到的家庭信息
  1189. for child in children:
  1190. if child['sub_relation_type'] == 2: # 出继
  1191. # 查找该子女入继到的父母
  1192. cursor.execute("""
  1193. SELECT p.id, p.name, p.simplified_name
  1194. FROM family_relation_info r
  1195. JOIN family_member_info p ON r.parent_mid = p.id
  1196. WHERE r.child_mid = %s AND r.sub_relation_type = 3
  1197. LIMIT 1
  1198. """, (child['id'],))
  1199. adoptive_parent = cursor.fetchone()
  1200. if adoptive_parent:
  1201. child['adoptive_parent_name'] = adoptive_parent['name']
  1202. if adoptive_parent['simplified_name'] and adoptive_parent['simplified_name'] != adoptive_parent['name']:
  1203. child['adoptive_parent_name'] += f" ({adoptive_parent['simplified_name']})"
  1204. # Initialize children array
  1205. for child in children:
  1206. child['children'] = []
  1207. print(f"[Lineage Query] Step 3 - Get children ({len(children)}): {time.time() - step_start:.3f}s")
  1208. # Step 4: Get siblings of center person
  1209. step_start = time.time()
  1210. siblings = []
  1211. if generations:
  1212. parent_id = generations[0]['ancestor']['id'] # Father
  1213. cursor.execute("""
  1214. SELECT c.id, c.name, c.simplified_name, c.name_word, c.name_word_generation,
  1215. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = c.id AND relation_type IN (1, 2)) as has_children,
  1216. r.sub_relation_type
  1217. FROM family_relation_info r
  1218. JOIN family_member_info c ON r.child_mid = c.id
  1219. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2) AND c.id != %s
  1220. ORDER BY COALESCE(r.child_order, 99999), c.id
  1221. LIMIT 30
  1222. """, (parent_id, member_id))
  1223. siblings = cursor.fetchall()
  1224. print(f"[Lineage Query] Step 4 - Get siblings ({len(siblings)}): {time.time() - step_start:.3f}s")
  1225. total_time = time.time() - start_time
  1226. print(f"[Lineage Query] Total time: {total_time:.3f}s")
  1227. return jsonify({
  1228. "success": True,
  1229. "data": {
  1230. "center": center,
  1231. "generations": generations,
  1232. "ancestor_ids": ancestor_ids,
  1233. "siblings": siblings,
  1234. "children": children
  1235. }
  1236. })
  1237. except Exception as e:
  1238. print(f"[Lineage Query] Error: {e}")
  1239. return jsonify({"success": False, "message": str(e)})
  1240. finally:
  1241. conn.close()
  1242. @app.route('/manager/api/get_descendants/<int:parent_id>')
  1243. def get_descendants(parent_id):
  1244. if 'user_id' not in session:
  1245. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1246. # Get excluded IDs from query parameter
  1247. excluded_ids = request.args.get('exclude', '')
  1248. excluded_list = []
  1249. if excluded_ids:
  1250. excluded_list = [int(id.strip()) for id in excluded_ids.split(',') if id.strip().isdigit()]
  1251. print(f"[get_descendants] Parent ID: {parent_id}, Excluded IDs: {excluded_list}")
  1252. conn = get_db_connection()
  1253. try:
  1254. with conn.cursor() as cursor:
  1255. if excluded_list:
  1256. # Build query with exclusion
  1257. placeholders = ', '.join(['%s'] * len(excluded_list))
  1258. cursor.execute(f"""
  1259. SELECT c.id, c.name, c.simplified_name, c.name_word, c.name_word_generation,
  1260. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = c.id AND relation_type IN (1, 2)) as has_children
  1261. FROM family_relation_info r
  1262. JOIN family_member_info c ON r.child_mid = c.id
  1263. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2) AND c.id NOT IN ({placeholders})
  1264. ORDER BY COALESCE(r.child_order, 99999), c.id
  1265. LIMIT 20
  1266. """, (parent_id,) + tuple(excluded_list))
  1267. else:
  1268. cursor.execute("""
  1269. SELECT c.id, c.name, c.simplified_name, c.name_word, c.name_word_generation,
  1270. EXISTS(SELECT 1 FROM family_relation_info WHERE parent_mid = c.id AND relation_type IN (1, 2)) as has_children
  1271. FROM family_relation_info r
  1272. JOIN family_member_info c ON r.child_mid = c.id
  1273. WHERE r.parent_mid = %s AND r.relation_type IN (1, 2)
  1274. ORDER BY COALESCE(r.child_order, 99999), c.id
  1275. LIMIT 20
  1276. """, (parent_id,))
  1277. children = cursor.fetchall()
  1278. for child in children:
  1279. child['children'] = []
  1280. return jsonify({"success": True, "children": children})
  1281. finally:
  1282. conn.close()
  1283. @app.route('/manager/api/save_relation', methods=['POST'])
  1284. def save_relation():
  1285. if 'user_id' not in session:
  1286. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1287. data = request.json
  1288. source_mid = data.get('source_mid') # The member being dragged
  1289. target_mid = data.get('target_mid') # The member being dropped onto
  1290. rel_type = int(data.get('relation_type'))
  1291. sub_rel_type = int(data.get('sub_relation_type', 0))
  1292. if not source_mid or not target_mid or not rel_type:
  1293. return jsonify({"success": False, "message": "参数不完整"}), 400
  1294. conn = get_db_connection()
  1295. try:
  1296. with conn.cursor() as cursor:
  1297. # 简单处理:如果是父子/母子关系
  1298. # target_mid 是父辈,source_mid 是子辈
  1299. parent_mid = target_mid
  1300. child_mid = source_mid
  1301. gen_diff = 1
  1302. if rel_type == 10: # 夫妻
  1303. # 夫妻关系中,我们通常把关联人设为 parent_mid
  1304. parent_mid = target_mid
  1305. child_mid = source_mid
  1306. gen_diff = 0
  1307. elif rel_type in [11, 12]: # 兄弟姐妹
  1308. # 这里逻辑上比较复杂,通常兄弟姐妹有共同父母。
  1309. # 简化处理:暂时存为同级关系 (gen_diff=0)
  1310. parent_mid = target_mid
  1311. child_mid = source_mid
  1312. gen_diff = 0
  1313. # 删除旧关系
  1314. cursor.execute("DELETE FROM family_relation_info WHERE source_mid = %s", (source_mid,))
  1315. # 插入新关系
  1316. sql = """
  1317. INSERT INTO family_relation_info
  1318. (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff)
  1319. VALUES (%s, %s, %s, %s, %s, %s)
  1320. """
  1321. cursor.execute(sql, (parent_mid, child_mid, rel_type, sub_rel_type, source_mid, gen_diff))
  1322. conn.commit()
  1323. return jsonify({"success": True, "message": "关系已保存"})
  1324. except Exception as e:
  1325. return jsonify({"success": False, "message": str(e)}), 500
  1326. finally:
  1327. conn.close()
  1328. @app.route('/manager/api/members')
  1329. def get_members():
  1330. if 'user_id' not in session:
  1331. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1332. page = int(request.args.get('page', 1))
  1333. search = request.args.get('search', '')
  1334. per_page = 10
  1335. offset = (page - 1) * per_page
  1336. conn = get_db_connection()
  1337. try:
  1338. with conn.cursor() as cursor:
  1339. # Count total members
  1340. if search:
  1341. cursor.execute("SELECT COUNT(*) as total FROM family_member_info WHERE name LIKE %s OR simplified_name LIKE %s",
  1342. (f'%{search}%', f'%{search}%'))
  1343. else:
  1344. cursor.execute("SELECT COUNT(*) as total FROM family_member_info")
  1345. total_result = cursor.fetchone()
  1346. total = total_result['total'] if total_result else 0
  1347. # Get members for current page with father information
  1348. if search:
  1349. cursor.execute("""
  1350. SELECT
  1351. fmi.id, fmi.name, fmi.simplified_name, fmi.sex, fmi.name_word_generation,
  1352. father.name as father_name, father.simplified_name as father_simplified_name, father.name_word_generation as father_generation
  1353. FROM family_member_info fmi
  1354. LEFT JOIN family_relation_info fri ON fmi.id = fri.child_mid AND fri.relation_type IN (1, 2)
  1355. LEFT JOIN family_member_info father ON fri.parent_mid = father.id
  1356. WHERE fmi.name LIKE %s OR fmi.simplified_name LIKE %s
  1357. LIMIT %s OFFSET %s
  1358. """, (f'%{search}%', f'%{search}%', per_page, offset))
  1359. else:
  1360. cursor.execute("""
  1361. SELECT
  1362. fmi.id, fmi.name, fmi.simplified_name, fmi.sex, fmi.name_word_generation,
  1363. father.name as father_name, father.simplified_name as father_simplified_name, father.name_word_generation as father_generation
  1364. FROM family_member_info fmi
  1365. LEFT JOIN family_relation_info fri ON fmi.id = fri.child_mid AND fri.relation_type IN (1, 2)
  1366. LEFT JOIN family_member_info father ON fri.parent_mid = father.id
  1367. LIMIT %s OFFSET %s
  1368. """, (per_page, offset))
  1369. members = cursor.fetchall()
  1370. # Convert to list of dictionaries if needed
  1371. members_list = []
  1372. for member in members:
  1373. members_list.append({
  1374. 'id': member['id'],
  1375. 'name': member['name'],
  1376. 'simplified_name': member['simplified_name'],
  1377. 'sex': member['sex'],
  1378. 'name_word_generation': member.get('name_word_generation'),
  1379. 'father_name': member.get('father_name'),
  1380. 'father_simplified_name': member.get('father_simplified_name'),
  1381. 'father_generation': member.get('father_generation')
  1382. })
  1383. return jsonify({"success": True, "members": members_list, "total": total})
  1384. except Exception as e:
  1385. return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
  1386. finally:
  1387. conn.close()
  1388. def call_doubao_api(prompt, image_url=None):
  1389. """调用豆包API处理文本"""
  1390. api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
  1391. api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
  1392. payload = {
  1393. "model": "doubao-seed-1-8-251228",
  1394. "stream": False,
  1395. "input": [
  1396. {
  1397. "role": "user",
  1398. "content": [
  1399. {"type": "input_text", "text": prompt}
  1400. ]
  1401. }
  1402. ]
  1403. }
  1404. headers = {
  1405. "Authorization": f"Bearer {api_key}",
  1406. "Content-Type": "application/json"
  1407. }
  1408. try:
  1409. response = requests.post(
  1410. api_url,
  1411. json=payload,
  1412. headers=headers,
  1413. timeout=120,
  1414. verify=False,
  1415. proxies={"http": None, "https": None}
  1416. )
  1417. if response.status_code == 200:
  1418. result = response.json()
  1419. print(f"[AI API] Raw response: {result}")
  1420. # 解析响应 - 尝试多种格式
  1421. if 'output' in result:
  1422. for item in result['output']:
  1423. if item.get('type') == 'message':
  1424. content = item.get('content')
  1425. if isinstance(content, str):
  1426. return content
  1427. elif isinstance(content, list):
  1428. for part in content:
  1429. if isinstance(part, dict) and part.get('type') == 'text':
  1430. return part.get('text', '')
  1431. elif isinstance(content, dict) and 'text' in content:
  1432. return content.get('text', '')
  1433. # 尝试其他响应格式
  1434. if 'choices' in result and len(result['choices']) > 0:
  1435. message = result['choices'][0].get('message', {})
  1436. return message.get('content', '')
  1437. # 尝试直接获取文本内容
  1438. if 'text' in result:
  1439. return result['text']
  1440. # 尝试获取响应中的message
  1441. if 'message' in result:
  1442. msg = result['message']
  1443. if isinstance(msg, str):
  1444. return msg
  1445. elif isinstance(msg, dict) and 'content' in msg:
  1446. return msg['content']
  1447. # 返回字符串形式
  1448. return str(result)
  1449. else:
  1450. print(f"[AI API] Error: {response.status_code} - {response.text}")
  1451. return None
  1452. except Exception as e:
  1453. print(f"[AI API] Exception: {e}")
  1454. return None
  1455. def parse_ai_response(ai_response):
  1456. """解析AI响应,提取族谱原文"""
  1457. if not ai_response:
  1458. return None, None
  1459. # 尝试从响应中提取JSON
  1460. try:
  1461. # 移除可能的markdown代码块标记
  1462. text = ai_response.strip()
  1463. if text.startswith('```json'):
  1464. text = text[7:]
  1465. if text.endswith('```'):
  1466. text = text[:-3]
  1467. text = text.strip()
  1468. # 尝试解析JSON
  1469. result = json.loads(text)
  1470. traditional = result.get('genealogy_traditional', '')
  1471. simplified = result.get('genealogy_simplified', '')
  1472. if traditional or simplified:
  1473. return traditional, simplified
  1474. except json.JSONDecodeError:
  1475. print(f"[AI Parse] JSON decode error: {ai_response[:200]}")
  1476. # 如果JSON解析失败,尝试直接提取文本
  1477. # 尝试匹配模式
  1478. import re
  1479. traditional_match = re.search(r'genealogy_traditional["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
  1480. simplified_match = re.search(r'genealogy_simplified["\']?\s*[,:]\s*["\']([^"\']+)["\']', ai_response)
  1481. traditional = traditional_match.group(1) if traditional_match else ''
  1482. simplified = simplified_match.group(1) if simplified_match else ''
  1483. return traditional, simplified
  1484. @app.route('/manager/api/members/empty_genealogy', methods=['GET'])
  1485. def get_members_empty_genealogy():
  1486. """获取族谱原文为空的成员列表"""
  1487. if 'user_id' not in session:
  1488. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1489. page = int(request.args.get('page', 1))
  1490. per_page = int(request.args.get('per_page', 20))
  1491. offset = (page - 1) * per_page
  1492. conn = get_db_connection()
  1493. try:
  1494. with conn.cursor() as cursor:
  1495. # Count total
  1496. cursor.execute("""
  1497. SELECT COUNT(*) as total
  1498. FROM family_member_info
  1499. WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
  1500. AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
  1501. """)
  1502. total_result = cursor.fetchone()
  1503. total = total_result['total'] if total_result else 0
  1504. # Get members
  1505. cursor.execute("""
  1506. SELECT id, name, simplified_name, name_word_generation, sex, occupation, notes, birth_place
  1507. FROM family_member_info
  1508. WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
  1509. AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
  1510. LIMIT %s OFFSET %s
  1511. """, (per_page, offset))
  1512. members = cursor.fetchall()
  1513. # 关联查询父亲信息
  1514. member_list = []
  1515. for member in members:
  1516. cursor.execute("""
  1517. SELECT p.name, p.simplified_name, p.name_word_generation
  1518. FROM family_relation_info r
  1519. JOIN family_member_info p ON r.parent_mid = p.id
  1520. WHERE r.child_mid = %s AND r.relation_type = 1
  1521. LIMIT 1
  1522. """, (member['id'],))
  1523. father = cursor.fetchone()
  1524. cursor.execute("""
  1525. SELECT p.name, p.simplified_name
  1526. FROM family_relation_info r
  1527. JOIN family_member_info p ON r.parent_mid = p.id
  1528. WHERE r.child_mid = %s AND r.relation_type = 2
  1529. LIMIT 1
  1530. """, (member['id'],))
  1531. mother = cursor.fetchone()
  1532. member_list.append({
  1533. 'id': member['id'],
  1534. 'name': member['name'],
  1535. 'simplified_name': member['simplified_name'],
  1536. 'name_word_generation': member['name_word_generation'],
  1537. 'sex': member['sex'],
  1538. 'occupation': member['occupation'],
  1539. 'notes': member['notes'],
  1540. 'birth_place': member['birth_place'],
  1541. 'father_name': father['name'] if father else None,
  1542. 'father_simplified_name': father['simplified_name'] if father else None,
  1543. 'father_generation': father['name_word_generation'] if father else None,
  1544. 'mother_name': mother['name'] if mother else None,
  1545. 'mother_simplified_name': mother['simplified_name'] if mother else None
  1546. })
  1547. return jsonify({"success": True, "members": member_list, "total": total})
  1548. except Exception as e:
  1549. return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
  1550. finally:
  1551. conn.close()
  1552. @app.route('/manager/api/members/batch_process_genealogy', methods=['POST'])
  1553. def batch_process_genealogy():
  1554. """批量处理成员族谱原文"""
  1555. if 'user_id' not in session:
  1556. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1557. data = request.get_json()
  1558. member_ids = data.get('member_ids', [])
  1559. if not member_ids or len(member_ids) > 10:
  1560. return jsonify({"success": False, "message": "请选择1-10个成员进行处理"}), 400
  1561. conn = get_db_connection()
  1562. results = []
  1563. try:
  1564. for member_id in member_ids:
  1565. with conn.cursor() as cursor:
  1566. cursor.execute("""
  1567. SELECT id, name, simplified_name, name_word_generation,
  1568. birth_place, occupation, notes, sex
  1569. FROM family_member_info WHERE id = %s
  1570. """, (member_id,))
  1571. member = cursor.fetchone()
  1572. # 获取父亲信息
  1573. cursor.execute("""
  1574. SELECT p.name, p.simplified_name
  1575. FROM family_relation_info r
  1576. JOIN family_member_info p ON r.parent_mid = p.id
  1577. WHERE r.child_mid = %s AND r.relation_type = 1
  1578. LIMIT 1
  1579. """, (member_id,))
  1580. father = cursor.fetchone()
  1581. # 获取母亲信息
  1582. cursor.execute("""
  1583. SELECT p.name, p.simplified_name
  1584. FROM family_relation_info r
  1585. JOIN family_member_info p ON r.parent_mid = p.id
  1586. WHERE r.child_mid = %s AND r.relation_type = 2
  1587. LIMIT 1
  1588. """, (member_id,))
  1589. mother = cursor.fetchone()
  1590. member['father_name'] = father['name'] if father else None
  1591. member['father_simplified_name'] = father['simplified_name'] if father else None
  1592. member['mother_name'] = mother['name'] if mother else None
  1593. member['mother_simplified_name'] = mother['simplified_name'] if mother else None
  1594. if not member:
  1595. results.append({"member_id": member_id, "success": False, "message": "成员不存在"})
  1596. continue
  1597. # 构建AI提示词
  1598. member_info = f"""
  1599. 姓名(繁体):{member['name']}
  1600. 姓名(简体):{member['simplified_name'] or '未知'}
  1601. 世系世代:{member['name_word_generation'] or '未知'}
  1602. 父亲姓名:{member['father_name'] or '未知'}
  1603. 母亲姓名:{member['mother_name'] or '未知'}
  1604. 出生地:{member['birth_place'] or '未知'}
  1605. 职业:{member['occupation'] or '未知'}
  1606. 备注:{member['notes'] or '无'}
  1607. """
  1608. prompt = f"""
  1609. 请根据以下人员信息,模拟生成该人员的族谱原文:
  1610. {member_info}
  1611. 请输出两个字段:
  1612. 1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
  1613. 2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
  1614. 请严格按照JSON格式输出,不要包含任何额外解释:
  1615. {{
  1616. "genealogy_traditional": "繁体族谱原文内容",
  1617. "genealogy_simplified": "简体族谱原文内容"
  1618. }}
  1619. """
  1620. ai_response = call_doubao_api(prompt)
  1621. print(f"[AI Response] Member {member_id}: {ai_response}")
  1622. if ai_response:
  1623. # 使用新的解析函数
  1624. traditional, simplified = parse_ai_response(ai_response)
  1625. if traditional or simplified:
  1626. with conn.cursor() as cursor:
  1627. cursor.execute("""
  1628. UPDATE family_member_info
  1629. SET genealogy_original_traditional = %s,
  1630. genealogy_original_simplified = %s
  1631. WHERE id = %s
  1632. """, (traditional, simplified, member_id))
  1633. conn.commit()
  1634. results.append({
  1635. "member_id": member_id,
  1636. "name": member['name'],
  1637. "success": True,
  1638. "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
  1639. "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
  1640. })
  1641. else:
  1642. results.append({
  1643. "member_id": member_id,
  1644. "name": member['name'],
  1645. "success": False,
  1646. "message": "AI未返回有效数据"
  1647. })
  1648. else:
  1649. results.append({
  1650. "member_id": member_id,
  1651. "name": member['name'],
  1652. "success": False,
  1653. "message": "AI调用失败"
  1654. })
  1655. return jsonify({"success": True, "results": results})
  1656. except Exception as e:
  1657. print(f"[Batch Process] Exception: {e}")
  1658. return jsonify({"success": False, "message": f"批量处理失败: {e}"}), 500
  1659. finally:
  1660. conn.close()
  1661. @app.route('/manager/api/member/<int:member_id>')
  1662. def get_member(member_id):
  1663. if 'user_id' not in session:
  1664. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1665. conn = get_db_connection()
  1666. try:
  1667. with conn.cursor() as cursor:
  1668. cursor.execute("SELECT id, name, name_word_generation, source_record_id FROM family_member_info WHERE id = %s", (member_id,))
  1669. member = cursor.fetchone()
  1670. if not member:
  1671. return jsonify({"success": False, "message": "成员不存在"}), 404
  1672. return jsonify({"member": member})
  1673. except Exception as e:
  1674. return jsonify({"success": False, "message": f"获取成员失败: {e}"}), 500
  1675. finally:
  1676. conn.close()
  1677. @app.route('/manager/api/check_relations', methods=['POST'])
  1678. def check_relations():
  1679. if 'user_id' not in session:
  1680. return jsonify({"success": False, "message": "Unauthorized"}), 401
  1681. data = request.json
  1682. people = data.get('people', [])
  1683. if not people:
  1684. return jsonify({"success": False, "matches": {}})
  1685. conn = get_db_connection()
  1686. matches = {}
  1687. try:
  1688. with conn.cursor() as cursor:
  1689. # Collect all father names and spouse names to query
  1690. names_to_check = set()
  1691. for p in people:
  1692. if p.get('father_name'): names_to_check.add(p['father_name'])
  1693. if p.get('spouse_name'): names_to_check.add(p['spouse_name'])
  1694. if not names_to_check:
  1695. return jsonify({"success": True, "matches": {}})
  1696. # Query DB
  1697. format_strings = ','.join(['%s'] * len(names_to_check))
  1698. if names_to_check:
  1699. sql = "SELECT id, name, simplified_name, sex, birthday FROM family_member_info WHERE name IN (%s) OR simplified_name IN (%s)" % (format_strings, format_strings)
  1700. cursor.execute(sql, tuple(names_to_check) * 2)
  1701. results = cursor.fetchall()
  1702. else:
  1703. results = []
  1704. # Organize by name
  1705. db_map = {} # name -> [list of members]
  1706. for r in results:
  1707. # Add under 'name' (Traditional/Old Simplified)
  1708. if r['name'] not in db_map: db_map[r['name']] = []
  1709. db_map[r['name']].append(r)
  1710. # Add under 'simplified_name' if exists
  1711. if r.get('simplified_name'):
  1712. sname = r['simplified_name']
  1713. if sname not in db_map: db_map[sname] = []
  1714. # Avoid duplicates if simplified_name is same as name?
  1715. # The list might contain same object reference, which is fine.
  1716. if sname != r['name']:
  1717. db_map[sname].append(r)
  1718. # Build matches for each input person
  1719. for index, p in enumerate(people):
  1720. p_match = {}
  1721. # Check Father
  1722. fname = p.get('father_name')
  1723. if fname and fname in db_map:
  1724. candidates = db_map[fname]
  1725. # Filter: Father should be Male usually, and older than child (if birthday available)
  1726. valid_fathers = [c for c in candidates if c['sex'] == 1]
  1727. if valid_fathers:
  1728. p_match['father'] = valid_fathers # Return all candidates
  1729. # Check Spouse
  1730. sname = p.get('spouse_name')
  1731. if sname and sname in db_map:
  1732. candidates = db_map[sname]
  1733. # Filter: Spouse usually opposite sex
  1734. target_sex = 1 if p.get('sex') == '女' else 2
  1735. valid_spouses = [c for c in candidates if c['sex'] == target_sex]
  1736. if valid_spouses:
  1737. p_match['spouse'] = valid_spouses
  1738. if p_match:
  1739. matches[index] = p_match
  1740. return jsonify({"success": True, "matches": matches})
  1741. finally:
  1742. conn.close()
  1743. @app.route('/manager/add_member', methods=['GET', 'POST'])
  1744. def add_member():
  1745. if 'user_id' not in session:
  1746. return redirect(url_for('login'))
  1747. conn = get_db_connection()
  1748. try:
  1749. # Check for source_record_id (from GET or POST)
  1750. source_record_id = request.args.get('record_id') or request.form.get('source_record_id')
  1751. prefilled_content = None
  1752. source_oss_url = None
  1753. if source_record_id:
  1754. with conn.cursor() as cursor:
  1755. cursor.execute("SELECT oss_url, ai_content, ai_status FROM genealogy_records WHERE id = %s", (source_record_id,))
  1756. rec = cursor.fetchone()
  1757. if rec:
  1758. source_oss_url = rec['oss_url']
  1759. # Check ai_status (2 = success)
  1760. if rec['ai_status'] == 2 and rec['ai_content']:
  1761. prefilled_content = rec['ai_content']
  1762. if request.method == 'POST':
  1763. # 处理生日转换为 Unix 时间戳
  1764. birthday_str = request.form.get('birthday')
  1765. birthday_ts = 0
  1766. if birthday_str:
  1767. try:
  1768. birthday_ts = int(datetime.strptime(birthday_str, '%Y-%m-%d').timestamp())
  1769. except ValueError:
  1770. birthday_ts = 0
  1771. # 关系数据 - 支持多条关系
  1772. relations = []
  1773. # Parse relations from form data
  1774. i = 0
  1775. while True:
  1776. parent_mid = request.form.get(f'relations[{i}][parent_mid]')
  1777. rel_type = request.form.get(f'relations[{i}][relation_type]')
  1778. sub_rel_type = request.form.get(f'relations[{i}][sub_relation_type]', '0')
  1779. if not parent_mid or not rel_type:
  1780. break
  1781. relations.append({
  1782. 'parent_mid': int(parent_mid),
  1783. 'relation_type': int(rel_type),
  1784. 'sub_relation_type': int(sub_rel_type)
  1785. })
  1786. i += 1
  1787. # For backward compatibility, check old-style single relation
  1788. if not relations:
  1789. related_mid = request.form.get('related_mid')
  1790. relation_type = request.form.get('relation_type')
  1791. if related_mid and relation_type:
  1792. relations.append({
  1793. 'parent_mid': int(related_mid),
  1794. 'relation_type': int(relation_type),
  1795. 'sub_relation_type': int(request.form.get('sub_relation_type', '0'))
  1796. })
  1797. # 年龄校验逻辑
  1798. for rel in relations:
  1799. if rel['relation_type'] in [1, 2]: # 1:父子 2:母子
  1800. with conn.cursor() as cursor:
  1801. cursor.execute("SELECT name, birthday FROM family_member_info WHERE id = %s", (rel['parent_mid'],))
  1802. parent = cursor.fetchone()
  1803. if parent and parent['birthday'] > 0 and birthday_ts > 0:
  1804. if birthday_ts < parent['birthday']:
  1805. error_msg = f"数据冲突:成员年龄不能比其父亲/母亲({parent['name']})大,请检查并修正出生日期。"
  1806. flash(error_msg)
  1807. # Re-fetch data for rendering
  1808. cursor.execute("SELECT id, name FROM family_member_info ORDER BY name")
  1809. all_members = cursor.fetchall()
  1810. cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
  1811. images = cursor.fetchall()
  1812. if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
  1813. return jsonify({
  1814. "success": False,
  1815. "message": error_msg
  1816. }), 400
  1817. selected_member_name = ''
  1818. return render_template('add_member.html', all_members=all_members, images=images,
  1819. prefilled_content=prefilled_content, source_oss_url=source_oss_url, source_record_id=source_record_id, selected_member_name=selected_member_name)
  1820. break
  1821. # 获取表单数据
  1822. data = {
  1823. 'name': request.form['name'],
  1824. 'simplified_name': request.form.get('simplified_name'),
  1825. 'genealogy_original_traditional': request.form.get('genealogy_original_traditional'),
  1826. 'genealogy_original_simplified': request.form.get('genealogy_original_simplified'),
  1827. 'former_name': request.form.get('former_name'),
  1828. 'childhood_name': request.form.get('childhood_name'),
  1829. 'name_word': request.form.get('name_word'),
  1830. 'name_word_generation': ';'.join([g.strip() for g in request.form.getlist('lineage_generations[]') if g.strip()]),
  1831. 'name_title': request.form.get('name_title'),
  1832. 'sex': request.form['sex'],
  1833. 'birthday': birthday_ts,
  1834. 'is_pass_away': request.form.get('is_pass_away', 0),
  1835. 'marital_status': request.form.get('marital_status', 0),
  1836. 'birth_place': request.form.get('birth_place'),
  1837. 'branch_family_hall': request.form.get('branch_family_hall'),
  1838. 'cluster_place': request.form.get('cluster_place'),
  1839. 'nation': request.form.get('nation'),
  1840. 'residential_address': request.form.get('residential_address'),
  1841. 'phone': request.form.get('phone'),
  1842. 'mail': request.form.get('mail'),
  1843. 'wechat_account': request.form.get('wechat_account'),
  1844. 'id_number': request.form.get('id_number'),
  1845. 'occupation': request.form.get('occupation'),
  1846. 'educational': request.form.get('educational'),
  1847. 'blood_type': request.form.get('blood_type'),
  1848. 'religion': request.form.get('religion'),
  1849. 'hobbies': request.form.get('hobbies'),
  1850. 'personal_achievements': request.form.get('personal_achievements'),
  1851. 'family_rank': request.form.get('family_rank'),
  1852. 'tags': request.form.get('tags'),
  1853. 'notes': request.form.get('notes'),
  1854. 'suspected_error': request.form.get('suspected_error').strip() if request.form.get('suspected_error') else '',
  1855. 'source_record_id': request.form.get('source_record_id') or None, # Save source record ID
  1856. 'create_uid': session['user_id'] # 记录当前操作人
  1857. }
  1858. # ... (rest of logic) ...
  1859. with conn.cursor() as cursor:
  1860. print(f"[Add Member] Inserting member data: {data}")
  1861. fields = ", ".join(data.keys())
  1862. placeholders = ", ".join(["%s"] * len(data))
  1863. sql = f"INSERT INTO family_member_info ({fields}) VALUES ({placeholders})"
  1864. print(f"[Add Member] Executing SQL: {sql}")
  1865. print(f"[Add Member] SQL parameters: {list(data.values())}")
  1866. cursor.execute(sql, list(data.values()))
  1867. member_id = cursor.lastrowid
  1868. print(f"[Add Member] Inserted member with ID: {member_id}")
  1869. # 录入关系(支持多条)
  1870. sql_relation = """
  1871. INSERT INTO family_relation_info
  1872. (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff, child_order)
  1873. VALUES (%s, %s, %s, %s, %s, %s, %s)
  1874. """
  1875. for rel in relations:
  1876. rel_type = rel['relation_type']
  1877. parent_mid = rel['parent_mid']
  1878. sub_relation_type = rel['sub_relation_type']
  1879. child_order = rel.get('child_order') if rel_type in [1, 2] else None
  1880. gen_diff = 1 if rel_type in [1, 2] else 0
  1881. print(f"[Add Member] Inserting relation: parent_mid={parent_mid}, child_mid={member_id}, relation_type={rel_type}, sub_relation_type={sub_relation_type}, child_order={child_order}")
  1882. cursor.execute(sql_relation, (parent_mid, member_id, rel_type, sub_relation_type, member_id, gen_diff, child_order))
  1883. # Update AI Record Status if applicable
  1884. source_record_id = data.get('source_record_id')
  1885. source_index = request.form.get('source_index')
  1886. if source_record_id and source_index and source_index.isdigit():
  1887. try:
  1888. idx = int(source_index)
  1889. print(f"[Add Member] Updating AI record status: record_id={source_record_id}, index={idx}")
  1890. cursor.execute("SELECT ai_content FROM genealogy_records WHERE id = %s FOR UPDATE", (source_record_id,))
  1891. rec = cursor.fetchone()
  1892. if rec and rec['ai_content']:
  1893. import json
  1894. content = json.loads(rec['ai_content'])
  1895. # Ensure content is a list (it might be a dict if single object, though we try to normalize)
  1896. if isinstance(content, dict):
  1897. content = [content]
  1898. if isinstance(content, list):
  1899. updated = False
  1900. if 0 <= idx < len(content):
  1901. # Always update the status regardless of current value
  1902. content[idx]['is_imported'] = True
  1903. content[idx]['imported_member_id'] = member_id
  1904. updated = True
  1905. if updated:
  1906. new_content = json.dumps(content, ensure_ascii=False)
  1907. cursor.execute("UPDATE genealogy_records SET ai_content = %s WHERE id = %s", (new_content, source_record_id))
  1908. print(f"[Add Member] Updated AI record status")
  1909. except Exception as e:
  1910. print(f"[Add Member] Error updating AI content status: {e}")
  1911. print(f"[Add Member] Committing transaction")
  1912. if safe_commit(conn):
  1913. print(f"[Add Member] Transaction committed successfully")
  1914. if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
  1915. return jsonify({"success": True, "message": "成员录入成功", "member_id": member_id})
  1916. flash('成员录入成功')
  1917. return redirect(url_for('members'))
  1918. else:
  1919. print(f"[Add Member] Transaction commit failed!")
  1920. if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
  1921. return jsonify({"success": False, "message": "成员录入失败,事务提交失败"}), 500
  1922. flash('成员录入失败,事务提交失败')
  1923. return redirect(url_for('add_member'))
  1924. with conn.cursor() as cursor:
  1925. cursor.execute("SELECT id, name FROM family_member_info ORDER BY name")
  1926. all_members = cursor.fetchall()
  1927. cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
  1928. images = cursor.fetchall()
  1929. except Exception as e:
  1930. flash(f'发生错误: {e}')
  1931. all_members = []
  1932. images = []
  1933. finally:
  1934. conn.close()
  1935. selected_member_name = ''
  1936. return render_template('add_member.html', all_members=all_members, images=images,
  1937. prefilled_content=prefilled_content, source_oss_url=source_oss_url, source_record_id=source_record_id, selected_member_name=selected_member_name)
  1938. @app.route('/manager/edit_member/<int:member_id>', methods=['GET', 'POST'])
  1939. def edit_member(member_id):
  1940. if 'user_id' not in session:
  1941. return redirect(url_for('login'))
  1942. conn = get_db_connection()
  1943. try:
  1944. if request.method == 'POST':
  1945. birthday_str = request.form.get('birthday')
  1946. birthday_ts = 0
  1947. if birthday_str:
  1948. try:
  1949. birthday_ts = int(datetime.strptime(birthday_str, '%Y-%m-%d').timestamp())
  1950. except ValueError:
  1951. birthday_ts = 0
  1952. # 关系数据 - 支持多条关系
  1953. relations = []
  1954. i = 0
  1955. while True:
  1956. parent_mid = request.form.get(f'relations[{i}][parent_mid]')
  1957. rel_type = request.form.get(f'relations[{i}][relation_type]')
  1958. sub_rel_type = request.form.get(f'relations[{i}][sub_relation_type]', '0')
  1959. child_order_raw = request.form.get(f'relations[{i}][child_order]', '')
  1960. if not parent_mid or not rel_type:
  1961. break
  1962. child_order = int(child_order_raw) if child_order_raw.strip().isdigit() else None
  1963. relations.append({
  1964. 'parent_mid': int(parent_mid),
  1965. 'relation_type': int(rel_type),
  1966. 'sub_relation_type': int(sub_rel_type),
  1967. 'child_order': child_order,
  1968. })
  1969. i += 1
  1970. # For backward compatibility
  1971. if not relations:
  1972. related_mid = request.form.get('related_mid')
  1973. relation_type = request.form.get('relation_type')
  1974. if related_mid and relation_type:
  1975. child_order_raw = request.form.get('child_order', '')
  1976. relations.append({
  1977. 'parent_mid': int(related_mid),
  1978. 'relation_type': int(relation_type),
  1979. 'sub_relation_type': int(request.form.get('sub_relation_type', '0')),
  1980. 'child_order': int(child_order_raw) if child_order_raw.strip().isdigit() else None,
  1981. })
  1982. # 年龄校验逻辑
  1983. for rel in relations:
  1984. if rel['relation_type'] in [1, 2]:
  1985. with conn.cursor() as cursor:
  1986. cursor.execute("SELECT name, birthday FROM family_member_info WHERE id = %s", (rel['parent_mid'],))
  1987. parent = cursor.fetchone()
  1988. if parent and parent['birthday'] > 0 and birthday_ts > 0:
  1989. if birthday_ts < parent['birthday']:
  1990. flash(f"数据冲突:成员年龄不能比其父亲/母亲({parent['name']})大,请检查并修正出生日期。")
  1991. # 重新加载编辑页所需数据
  1992. cursor.execute("SELECT * FROM family_member_info WHERE id = %s", (member_id,))
  1993. member = cursor.fetchone()
  1994. member['birthday_date'] = birthday_str # 保持用户输入
  1995. cursor.execute("SELECT id, name FROM family_member_info WHERE id != %s ORDER BY name", (member_id,))
  1996. all_members = cursor.fetchall()
  1997. cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
  1998. images = cursor.fetchall()
  1999. if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
  2000. return jsonify({
  2001. "success": False,
  2002. "message": f"数据冲突:成员年龄不能比其父亲/母亲({parent['name']})大,请检查并修正出生日期。"
  2003. }), 400
  2004. selected_member_name = ''
  2005. return render_template('add_member.html', member=member, images=images, all_members=all_members, selected_member_name=selected_member_name)
  2006. break
  2007. data = {
  2008. 'name': request.form['name'],
  2009. 'simplified_name': request.form.get('simplified_name'),
  2010. 'genealogy_original_traditional': request.form.get('genealogy_original_traditional'),
  2011. 'genealogy_original_simplified': request.form.get('genealogy_original_simplified'),
  2012. 'former_name': request.form.get('former_name'),
  2013. 'childhood_name': request.form.get('childhood_name'),
  2014. 'name_word': request.form.get('name_word'),
  2015. 'name_word_generation': ';'.join([g.strip() for g in request.form.getlist('lineage_generations[]') if g.strip()]),
  2016. 'name_title': request.form.get('name_title'),
  2017. 'sex': request.form['sex'],
  2018. 'birthday': birthday_ts,
  2019. 'is_pass_away': request.form.get('is_pass_away', 0),
  2020. 'marital_status': request.form.get('marital_status', 0),
  2021. 'birth_place': request.form.get('birth_place'),
  2022. 'branch_family_hall': request.form.get('branch_family_hall'),
  2023. 'cluster_place': request.form.get('cluster_place'),
  2024. 'nation': request.form.get('nation'),
  2025. 'residential_address': request.form.get('residential_address'),
  2026. 'phone': request.form.get('phone'),
  2027. 'mail': request.form.get('mail'),
  2028. 'wechat_account': request.form.get('wechat_account'),
  2029. 'id_number': request.form.get('id_number'),
  2030. 'occupation': request.form.get('occupation'),
  2031. 'educational': request.form.get('educational'),
  2032. 'blood_type': request.form.get('blood_type'),
  2033. 'religion': request.form.get('religion'),
  2034. 'hobbies': request.form.get('hobbies'),
  2035. 'personal_achievements': request.form.get('personal_achievements'),
  2036. 'family_rank': request.form.get('family_rank'),
  2037. 'tags': request.form.get('tags'),
  2038. 'notes': request.form.get('notes'),
  2039. 'suspected_error': request.form.get('suspected_error').strip() if request.form.get('suspected_error') else '',
  2040. 'source_record_id': request.form.get('source_record_id') or None,
  2041. 'create_uid': session['user_id'] # 记录当前操作人
  2042. }
  2043. with conn.cursor() as cursor:
  2044. print(f"[Edit Member] Updating member data: {data}")
  2045. update_parts = [f"{k} = %s" for k in data.keys()]
  2046. sql = f"UPDATE family_member_info SET {', '.join(update_parts)} WHERE id = %s"
  2047. print(f"[Edit Member] Executing SQL: {sql}")
  2048. print(f"[Edit Member] SQL parameters: {list(data.values()) + [member_id]}")
  2049. cursor.execute(sql, list(data.values()) + [member_id])
  2050. print(f"[Edit Member] Updated member with ID: {member_id}")
  2051. # 更新关系(支持多条)
  2052. print(f"[Edit Member] Deleting existing relations for member ID: {member_id}")
  2053. cursor.execute("DELETE FROM family_relation_info WHERE source_mid = %s", (member_id,))
  2054. sql_relation = """
  2055. INSERT INTO family_relation_info
  2056. (parent_mid, child_mid, relation_type, sub_relation_type, source_mid, generation_diff, child_order)
  2057. VALUES (%s, %s, %s, %s, %s, %s, %s)
  2058. """
  2059. for rel in relations:
  2060. rel_type = rel['relation_type']
  2061. parent_mid = rel['parent_mid']
  2062. sub_relation_type = rel['sub_relation_type']
  2063. child_order = rel.get('child_order') if rel_type in [1, 2] else None
  2064. gen_diff = 1 if rel_type in [1, 2] else 0
  2065. print(f"[Edit Member] Inserting relation: parent_mid={parent_mid}, child_mid={member_id}, relation_type={rel_type}, sub_relation_type={sub_relation_type}, child_order={child_order}")
  2066. cursor.execute(sql_relation, (parent_mid, member_id, rel_type, sub_relation_type, member_id, gen_diff, child_order))
  2067. # Update AI Record Status if applicable
  2068. source_record_id = data.get('source_record_id')
  2069. source_index = request.form.get('source_index')
  2070. if source_record_id and source_index and source_index.isdigit():
  2071. try:
  2072. idx = int(source_index)
  2073. print(f"[Edit Member] Updating AI record status: record_id={source_record_id}, index={idx}")
  2074. cursor.execute("SELECT ai_content FROM genealogy_records WHERE id = %s FOR UPDATE", (source_record_id,))
  2075. rec = cursor.fetchone()
  2076. if rec and rec['ai_content']:
  2077. import json
  2078. content = json.loads(rec['ai_content'])
  2079. if isinstance(content, dict):
  2080. content = [content]
  2081. if isinstance(content, list):
  2082. updated = False
  2083. if 0 <= idx < len(content):
  2084. # Always update the status regardless of current value
  2085. content[idx]['is_imported'] = True
  2086. content[idx]['imported_member_id'] = member_id
  2087. updated = True
  2088. if updated:
  2089. new_content = json.dumps(content, ensure_ascii=False)
  2090. cursor.execute("UPDATE genealogy_records SET ai_content = %s WHERE id = %s", (new_content, source_record_id))
  2091. print(f"[Edit Member] Updated AI record status")
  2092. except Exception as e:
  2093. print(f"[Edit Member] Error updating AI content status: {e}")
  2094. print(f"[Edit Member] Committing transaction")
  2095. conn.commit()
  2096. print(f"[Edit Member] Transaction committed successfully")
  2097. if request.headers.get('X-Requested-With') == 'XMLHttpRequest' or request.is_json:
  2098. return jsonify({"success": True, "message": "成员信息更新成功"})
  2099. flash('成员信息更新成功')
  2100. return redirect(url_for('members'))
  2101. with conn.cursor() as cursor:
  2102. cursor.execute("SELECT * FROM family_member_info WHERE id = %s", (member_id,))
  2103. member = cursor.fetchone()
  2104. if not member:
  2105. flash('成员不存在')
  2106. return redirect(url_for('members'))
  2107. # 格式化日期供显示
  2108. if member.get('birthday'):
  2109. member['birthday_date'] = format_timestamp(member['birthday'])
  2110. # 获取现有关系
  2111. cursor.execute("SELECT * FROM family_relation_info WHERE source_mid = %s LIMIT 1", (member_id,))
  2112. current_relation = cursor.fetchone()
  2113. cursor.execute("SELECT id, name FROM family_member_info WHERE id != %s ORDER BY name", (member_id,))
  2114. all_members = cursor.fetchall()
  2115. cursor.execute("SELECT * FROM genealogy_records ORDER BY page_number ASC")
  2116. images = cursor.fetchall()
  2117. finally:
  2118. conn.close()
  2119. # Calculate selected_member_name based on current_relation
  2120. selected_member_name = ''
  2121. if current_relation and current_relation['parent_mid']:
  2122. for m in all_members:
  2123. if m['id'] == current_relation['parent_mid']:
  2124. selected_member_name = m['name']
  2125. break
  2126. # Get source_record_id from member data
  2127. source_record_id = member.get('source_record_id') if member else None
  2128. return render_template('add_member.html', member=member, images=images, all_members=all_members, current_relation=current_relation, selected_member_name=selected_member_name, source_record_id=source_record_id)
  2129. @app.route('/manager/member_detail/<int:member_id>')
  2130. def member_detail(member_id):
  2131. if 'user_id' not in session:
  2132. return redirect(url_for('login'))
  2133. conn = get_db_connection()
  2134. try:
  2135. with conn.cursor() as cursor:
  2136. # Join with genealogy_records to get source image info
  2137. sql = """
  2138. SELECT m.*, r.oss_url as source_image_url, r.page_number as source_page,
  2139. r.genealogy_version, r.genealogy_source, r.upload_person
  2140. FROM family_member_info m
  2141. LEFT JOIN genealogy_records r ON m.source_record_id = r.id
  2142. WHERE m.id = %s
  2143. """
  2144. cursor.execute(sql, (member_id,))
  2145. member = cursor.fetchone()
  2146. if not member:
  2147. flash('成员不存在')
  2148. return redirect(url_for('members'))
  2149. member['birthday_str'] = format_timestamp(member.get('birthday'))
  2150. # 获取关系(包含子类型)
  2151. cursor.execute("""
  2152. SELECT m.id, m.name, r.relation_type, r.sub_relation_type
  2153. FROM family_relation_info r
  2154. JOIN family_member_info m ON r.parent_mid = m.id
  2155. WHERE r.child_mid = %s
  2156. """, (member_id,))
  2157. parents = cursor.fetchall()
  2158. cursor.execute("""
  2159. SELECT m.id, m.name, r.relation_type, r.sub_relation_type
  2160. FROM family_relation_info r
  2161. JOIN family_member_info m ON r.child_mid = m.id
  2162. WHERE r.parent_mid = %s
  2163. """, (member_id,))
  2164. children = cursor.fetchall()
  2165. finally:
  2166. conn.close()
  2167. return render_template('member_detail.html', member=member, parents=parents, children=children)
  2168. @app.route('/manager/delete_member/<int:member_id>', methods=['POST'])
  2169. def delete_member(member_id):
  2170. if 'user_id' not in session:
  2171. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2172. conn = get_db_connection()
  2173. try:
  2174. with conn.cursor() as cursor:
  2175. # 1. 删除关系表中关联该成员的所有记录
  2176. cursor.execute("DELETE FROM family_relation_info WHERE parent_mid = %s OR child_mid = %s OR source_mid = %s",
  2177. (member_id, member_id, member_id))
  2178. # 2. 删除成员本身
  2179. cursor.execute("DELETE FROM family_member_info WHERE id = %s", (member_id,))
  2180. conn.commit()
  2181. flash('成员及其关系已成功删除')
  2182. return redirect(url_for('members'))
  2183. except Exception as e:
  2184. conn.rollback()
  2185. flash(f'删除失败: {e}')
  2186. return redirect(url_for('members'))
  2187. finally:
  2188. conn.close()
  2189. @app.route('/manager/home')
  2190. def home():
  2191. """Home page - Dashboard for the genealogy management system"""
  2192. if 'user_id' not in session:
  2193. return redirect(url_for('login'))
  2194. # Force re-login if is_super_admin not set in session (fresh login required)
  2195. if 'is_super_admin' not in session:
  2196. session.clear()
  2197. flash('请重新登录以获取最新权限')
  2198. return redirect(url_for('login'))
  2199. conn = get_db_connection()
  2200. try:
  2201. with conn.cursor() as cursor:
  2202. # Get member count
  2203. cursor.execute("SELECT COUNT(*) as count FROM family_member_info")
  2204. member_count = cursor.fetchone()['count']
  2205. # Get record count
  2206. cursor.execute("SELECT COUNT(*) as count FROM genealogy_records")
  2207. record_count = cursor.fetchone()['count']
  2208. # Get PDF count
  2209. cursor.execute("SELECT COUNT(*) as count FROM genealogy_pdfs")
  2210. pdf_count = cursor.fetchone()['count']
  2211. # Get suspected error count
  2212. cursor.execute("SELECT COUNT(*) as count FROM family_member_info WHERE suspected_error IS NOT NULL AND TRIM(suspected_error) != ''")
  2213. error_count = cursor.fetchone()['count']
  2214. finally:
  2215. conn.close()
  2216. return render_template('home.html',
  2217. member_count=member_count,
  2218. record_count=record_count,
  2219. pdf_count=pdf_count,
  2220. error_count=error_count)
  2221. @app.route('/manager/login', methods=['GET', 'POST'])
  2222. def login():
  2223. if request.method == 'POST':
  2224. username = request.form['username']
  2225. password = request.form['password']
  2226. try:
  2227. conn = get_db_connection()
  2228. try:
  2229. with conn.cursor() as cursor:
  2230. cursor.execute("SELECT * FROM users WHERE username=%s AND password=%s", (username, password))
  2231. user = cursor.fetchone()
  2232. if user:
  2233. session['user_id'] = user['id']
  2234. session['username'] = user['username']
  2235. session['is_super_admin'] = user.get('is_super_admin', 0) == 1
  2236. return redirect(url_for('home'))
  2237. else:
  2238. flash('用户名或密码错误')
  2239. finally:
  2240. conn.close()
  2241. except Exception as e:
  2242. flash(f'数据库连接错误: {str(e)}')
  2243. print(f'Login error: {str(e)}')
  2244. return render_template('login.html')
  2245. @app.route('/manager/logout')
  2246. def logout():
  2247. session.clear()
  2248. return redirect(url_for('login'))
  2249. @app.route('/manager/api/check_name')
  2250. def check_name():
  2251. if 'user_id' not in session:
  2252. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2253. name = request.args.get('name', '').strip()
  2254. if not name:
  2255. return jsonify({"success": True, "exists": False})
  2256. conn = get_db_connection()
  2257. try:
  2258. with conn.cursor() as cursor:
  2259. # Check for name or simplified_name match
  2260. cursor.execute("SELECT id, name, simplified_name, sex, birthday, is_pass_away FROM family_member_info WHERE name = %s OR simplified_name = %s", (name, name))
  2261. matches = cursor.fetchall()
  2262. if matches:
  2263. # Format birthday for display
  2264. for m in matches:
  2265. if m.get('birthday'):
  2266. m['birthday_str'] = format_timestamp(m['birthday'])
  2267. else:
  2268. m['birthday_str'] = '未知'
  2269. return jsonify({"success": True, "exists": True, "matches": matches})
  2270. else:
  2271. return jsonify({"success": True, "exists": False})
  2272. except Exception as e:
  2273. return jsonify({"success": False, "error": str(e)}), 500
  2274. finally:
  2275. conn.close()
  2276. import requests
  2277. import json
  2278. import re
  2279. @app.route('/manager/api/recognize_image', methods=['POST'])
  2280. def recognize_image():
  2281. if 'user_id' not in session:
  2282. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2283. data = request.json
  2284. image_url = data.get('image_url')
  2285. if not image_url:
  2286. return jsonify({"success": False, "message": "No image URL provided"}), 400
  2287. api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
  2288. api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
  2289. prompt = """
  2290. 请分析这张家谱图片,提取其中关于人物的信息。
  2291. 请务必将繁体字转换为简体字(original_name 字段除外)。
  2292. 特别注意:'name' 字段必须是纯简体中文,不能包含繁体字(例如:'學'应转换为'学','劉'应转换为'刘','萬'应转换为'万')。
  2293. 请提取以下字段(如果存在):
  2294. - original_name: 原始姓名(严格保持图片上的繁体字,不做任何修改或转换)
  2295. - name: 简体姓名(必须转换为简体中文,去除不需要的敬称)
  2296. - sex: 性别(男/女)
  2297. - birthday: 出生日期(尝试转换为YYYY-MM-DD格式,如果无法确定年份可只填月日)
  2298. - death_date: 逝世日期(如文本中出现“殁”、“葬”、“卒”等字眼及其对应的时间,请提取)
  2299. - father_name: 父亲姓名
  2300. - spouse_name: 配偶姓名
  2301. - generation: 第几世/代数
  2302. - name_word: 字辈(例如名字为“学勤公”,“学”为字辈;提取名字中的字辈信息)
  2303. - education: 学历/功名
  2304. - title: 官职/称号
  2305. 请严格以JSON列表格式返回,不要包含Markdown代码块标记(如 ```json ... ```),直接返回JSON数组。
  2306. 如果包含多个人物,请都提取出来。
  2307. """
  2308. ai_payload_url = get_normalized_base64_image(image_url)
  2309. payload = {
  2310. "model": "doubao-seed-1-8-251228",
  2311. "stream": True,
  2312. "input": [
  2313. {
  2314. "role": "user",
  2315. "content": [
  2316. {
  2317. "type": "input_image",
  2318. "image_url": ai_payload_url
  2319. },
  2320. {
  2321. "type": "input_text",
  2322. "text": prompt
  2323. }
  2324. ]
  2325. }
  2326. ]
  2327. }
  2328. headers = {
  2329. "Authorization": f"Bearer {api_key}",
  2330. "Content-Type": "application/json"
  2331. }
  2332. def generate():
  2333. yield "正在连接 AI 服务...\n"
  2334. try:
  2335. # 使用 stream=True, timeout=120
  2336. # 增加 verify=False 以防 SSL 问题(开发环境)
  2337. # 增加 proxies=None 以防本地代理干扰
  2338. with requests.post(
  2339. api_url,
  2340. json=payload,
  2341. headers=headers,
  2342. stream=True,
  2343. timeout=1200,
  2344. verify=False,
  2345. proxies={"http": None, "https": None}
  2346. ) as r:
  2347. if r.status_code != 200:
  2348. yield f"Error: API returned status code {r.status_code}. Response: {r.text}"
  2349. return
  2350. yield "连接成功,正在等待 AI 响应...\n"
  2351. full_reasoning = ""
  2352. json_started = False
  2353. for line in r.iter_lines():
  2354. if line:
  2355. line_str = line.decode('utf-8')
  2356. if line_str.startswith('data: '):
  2357. json_str = line_str[6:]
  2358. if json_str.strip() == '[DONE]':
  2359. break
  2360. try:
  2361. chunk = json.loads(json_str)
  2362. # 处理 standard OpenAI choices format (content)
  2363. if 'choices' in chunk and len(chunk['choices']) > 0:
  2364. delta = chunk['choices'][0].get('delta', {})
  2365. if 'content' in delta:
  2366. if not json_started:
  2367. yield "|||JSON_START|||"
  2368. json_started = True
  2369. yield delta['content']
  2370. # 处理 standard OpenAI choices format (reasoning_content) if any
  2371. if 'reasoning_content' in delta:
  2372. yield f"\n[推理]: {delta['reasoning_content']}"
  2373. # 处理 Doubao/Volcano specific formats
  2374. # Type: response.reasoning_summary_text.delta
  2375. if chunk.get('type') == 'response.reasoning_summary_text.delta':
  2376. if 'delta' in chunk:
  2377. yield chunk['delta']
  2378. # Type: response.text.delta
  2379. if chunk.get('type') == 'response.text.delta':
  2380. if 'delta' in chunk:
  2381. if not json_started:
  2382. yield "|||JSON_START|||"
  2383. json_started = True
  2384. yield chunk['delta']
  2385. # Type: response.output_item.added (May contain initial content or status)
  2386. # Type: response.reasoning_summary_part.added
  2387. except Exception as e:
  2388. print(f"Chunk parse error: {e}")
  2389. else:
  2390. # 尝试直接解析非 data: 开头的行
  2391. try:
  2392. chunk = json.loads(line_str)
  2393. if 'choices' in chunk and len(chunk['choices']) > 0:
  2394. content = chunk['choices'][0]['message']['content']
  2395. yield content
  2396. except:
  2397. pass
  2398. except Exception as e:
  2399. yield f"\n[Error: {str(e)}]"
  2400. return Response(stream_with_context(generate()), mimetype='text/plain')
  2401. @app.route('/manager/api/start_analysis/<int:record_id>', methods=['POST'])
  2402. def start_analysis(record_id):
  2403. if 'user_id' not in session:
  2404. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2405. conn = get_db_connection()
  2406. try:
  2407. with conn.cursor() as cursor:
  2408. # Check if record exists
  2409. cursor.execute("SELECT oss_url, ai_status FROM genealogy_records WHERE id = %s", (record_id,))
  2410. record = cursor.fetchone()
  2411. if not record:
  2412. return jsonify({"success": False, "message": "Record not found"}), 404
  2413. # Update status to processing (1)
  2414. cursor.execute("UPDATE genealogy_records SET ai_status = 1 WHERE id = %s", (record_id,))
  2415. conn.commit()
  2416. # Start background task
  2417. threading.Thread(target=process_ai_task, args=(record_id, record['oss_url'])).start()
  2418. return jsonify({"success": True, "message": "Analysis started"})
  2419. except Exception as e:
  2420. return jsonify({"success": False, "message": str(e)}), 500
  2421. finally:
  2422. conn.close()
  2423. def process_files_background(upload_folder, saved_files, manual_page, suggested_page, genealogy_version, genealogy_source, upload_person):
  2424. current_suggested_page = int(manual_page) if manual_page and str(manual_page).isdigit() else suggested_page
  2425. ensure_pdf_table()
  2426. for item in saved_files:
  2427. if len(item) >= 4:
  2428. filename, file_path, file_page, original_filename = item[0], item[1], item[2], item[3]
  2429. elif len(item) == 3:
  2430. filename, file_path, file_page = item
  2431. original_filename = filename
  2432. else:
  2433. filename, file_path = item[0], item[1]
  2434. file_page = None
  2435. original_filename = filename
  2436. try:
  2437. if filename.lower().endswith('.pdf'):
  2438. import uuid
  2439. display_pdf_name = (original_filename or filename).strip() or filename
  2440. oss_pdf_name = secure_filename(display_pdf_name)
  2441. if not oss_pdf_name or not oss_pdf_name.lower().endswith('.pdf'):
  2442. oss_pdf_name = f"genealogy_pdf_{uuid.uuid4().hex[:8]}.pdf"
  2443. pdf_oss_url = upload_to_oss(file_path, custom_filename=oss_pdf_name)
  2444. if pdf_oss_url:
  2445. desc_parts = []
  2446. if genealogy_version:
  2447. desc_parts.append(genealogy_version)
  2448. if genealogy_source:
  2449. desc_parts.append(genealogy_source)
  2450. pdf_description = ' · '.join(desc_parts) if desc_parts else ''
  2451. conn_pdf = get_db_connection()
  2452. try:
  2453. with conn_pdf.cursor() as cursor:
  2454. cursor.execute(
  2455. "INSERT INTO genealogy_pdfs (file_name, oss_url, description, uploader) VALUES (%s, %s, %s, %s)",
  2456. (display_pdf_name, pdf_oss_url, pdf_description, upload_person or '')
  2457. )
  2458. conn_pdf.commit()
  2459. except Exception as pdf_meta_e:
  2460. print(f"Error inserting genealogy_pdfs for {display_pdf_name}: {pdf_meta_e}")
  2461. finally:
  2462. conn_pdf.close()
  2463. else:
  2464. print(f"Warning: full PDF upload to OSS failed for {filename}, scan pages will still be processed.")
  2465. doc = fitz.open(file_path)
  2466. for page_index in range(len(doc)):
  2467. img_path = None
  2468. try:
  2469. page = doc.load_page(page_index)
  2470. max_dim = max(page.rect.width, page.rect.height)
  2471. zoom = 2000 / max_dim if max_dim > 0 else 2.0
  2472. if zoom > 2.5: zoom = 2.5
  2473. mat = fitz.Matrix(zoom, zoom)
  2474. # Use get_pixmap with matrix directly
  2475. pix = page.get_pixmap(matrix=mat)
  2476. final_page = current_suggested_page
  2477. if genealogy_version and genealogy_source:
  2478. if final_page is not None and str(final_page).strip() != '':
  2479. img_filename = f"{genealogy_version}_{genealogy_source}_{final_page}.jpg"
  2480. else:
  2481. img_filename = f"{genealogy_version}_{genealogy_source}.jpg"
  2482. else:
  2483. img_filename = f"{os.path.splitext(filename)[0]}_page_{page_index+1}.jpg"
  2484. img_path = os.path.join(upload_folder, img_filename)
  2485. # Save the pixmap to the image path
  2486. pix.save(img_path)
  2487. oss_url = upload_to_oss(img_path, custom_filename=img_filename)
  2488. if oss_url:
  2489. conn = get_db_connection()
  2490. try:
  2491. with conn.cursor() as cursor:
  2492. sql = """INSERT INTO genealogy_records
  2493. (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type)
  2494. VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
  2495. cursor.execute(sql, (img_filename, oss_url, final_page, genealogy_version, genealogy_source, upload_person, 'PDF'))
  2496. record_id = cursor.lastrowid
  2497. conn.commit()
  2498. threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
  2499. current_suggested_page += 1
  2500. finally:
  2501. conn.close()
  2502. except Exception as page_e:
  2503. print(f"Error processing page {page_index} of {filename}: {page_e}")
  2504. finally:
  2505. if img_path and os.path.exists(img_path):
  2506. try:
  2507. os.remove(img_path)
  2508. except:
  2509. pass
  2510. doc.close()
  2511. else:
  2512. img_path = compress_image_if_needed(file_path)
  2513. # Use explicitly set page number if provided, otherwise extract from filename or auto-increment
  2514. if file_page and str(file_page).isdigit():
  2515. final_page = int(file_page)
  2516. current_suggested_page = final_page + 1
  2517. page_num = final_page
  2518. else:
  2519. page_num = extract_page_number(img_path)
  2520. final_page = page_num if page_num else current_suggested_page
  2521. ext = os.path.splitext(img_path)[1]
  2522. if genealogy_version and genealogy_source:
  2523. if final_page is not None and str(final_page).strip() != '':
  2524. img_filename = f"{genealogy_version}_{genealogy_source}_{final_page}{ext}"
  2525. else:
  2526. img_filename = f"{genealogy_version}_{genealogy_source}{ext}"
  2527. else:
  2528. img_filename = os.path.basename(img_path)
  2529. oss_url = upload_to_oss(img_path, custom_filename=img_filename)
  2530. if oss_url:
  2531. conn = get_db_connection()
  2532. try:
  2533. with conn.cursor() as cursor:
  2534. sql = """INSERT INTO genealogy_records
  2535. (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type)
  2536. VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
  2537. cursor.execute(sql, (img_filename, oss_url, final_page, genealogy_version, genealogy_source, upload_person, '图片'))
  2538. record_id = cursor.lastrowid
  2539. conn.commit()
  2540. threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
  2541. if page_num:
  2542. current_suggested_page = page_num + 1
  2543. else:
  2544. current_suggested_page += 1
  2545. finally:
  2546. conn.close()
  2547. if img_path and img_path != file_path and os.path.exists(img_path):
  2548. try:
  2549. os.remove(img_path)
  2550. except:
  2551. pass
  2552. except Exception as e:
  2553. print(f"Error processing file {filename}: {e}")
  2554. finally:
  2555. if os.path.exists(file_path):
  2556. try:
  2557. os.remove(file_path)
  2558. except:
  2559. pass
  2560. @app.route('/manager/upload', methods=['GET', 'POST'])
  2561. def upload():
  2562. if 'user_id' not in session:
  2563. return redirect(url_for('login'))
  2564. # 获取建议页码 (当前最大页码 + 1)
  2565. conn = get_db_connection()
  2566. suggested_page = 1
  2567. try:
  2568. with conn.cursor() as cursor:
  2569. cursor.execute("SELECT MAX(page_number) as max_p FROM genealogy_records")
  2570. result = cursor.fetchone()
  2571. if result and result['max_p']:
  2572. suggested_page = result['max_p'] + 1
  2573. finally:
  2574. conn.close()
  2575. if request.method == 'POST':
  2576. if 'file' not in request.files:
  2577. flash('未选择文件')
  2578. return redirect(request.url)
  2579. files = request.files.getlist('file')
  2580. if not files or files[0].filename == '':
  2581. flash('未选择文件')
  2582. return redirect(request.url)
  2583. manual_page = request.form.get('manual_page')
  2584. genealogy_version = request.form.get('genealogy_version', '')
  2585. genealogy_source = request.form.get('genealogy_source', '')
  2586. upload_person = request.form.get('upload_person', '')
  2587. if not upload_person:
  2588. upload_person = session.get('username', '')
  2589. import uuid
  2590. saved_files = []
  2591. for i, file in enumerate(files):
  2592. if not file or not file.filename:
  2593. continue
  2594. original_filename = file.filename
  2595. ext = os.path.splitext(original_filename)[1].lower()
  2596. base_name = secure_filename(original_filename)
  2597. # If secure_filename removes all characters (e.g., pure Chinese name) or just leaves 'pdf'
  2598. if not base_name or base_name == ext.strip('.'):
  2599. filename = f"upload_{uuid.uuid4().hex[:8]}{ext}"
  2600. else:
  2601. # Ensure the extension is preserved
  2602. if not base_name.lower().endswith(ext):
  2603. filename = f"{base_name}{ext}"
  2604. else:
  2605. filename = base_name
  2606. file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
  2607. file.save(file_path)
  2608. # Fetch individual page number if it exists
  2609. file_page = request.form.get(f'page_number_{i}')
  2610. saved_files.append((filename, file_path, file_page, original_filename))
  2611. if saved_files:
  2612. threading.Thread(
  2613. target=process_files_background,
  2614. args=(app.config['UPLOAD_FOLDER'], saved_files, manual_page, suggested_page, genealogy_version, genealogy_source, upload_person)
  2615. ).start()
  2616. flash('上传完成,AI解析中,稍后查看')
  2617. time.sleep(1.5)
  2618. return redirect(url_for('index'))
  2619. return render_template('upload.html', suggested_page=suggested_page)
  2620. @app.route('/manager/save_upload', methods=['POST'])
  2621. def save_upload():
  2622. if 'user_id' not in session: return redirect(url_for('login'))
  2623. filename = request.form.get('filename')
  2624. oss_url = request.form.get('oss_url')
  2625. page_number = request.form.get('page_number')
  2626. genealogy_version = request.form.get('genealogy_version', '')
  2627. genealogy_source = request.form.get('genealogy_source', '')
  2628. upload_person = request.form.get('upload_person', session.get('username', ''))
  2629. file_type = request.form.get('file_type', '图片')
  2630. if not oss_url or not page_number:
  2631. flash('页码不能为空')
  2632. return redirect(url_for('upload'))
  2633. conn = get_db_connection()
  2634. try:
  2635. with conn.cursor() as cursor:
  2636. sql = """INSERT INTO genealogy_records
  2637. (file_name, oss_url, page_number, ai_status, genealogy_version, genealogy_source, upload_person, file_type)
  2638. VALUES (%s, %s, %s, 1, %s, %s, %s, %s)"""
  2639. cursor.execute(sql, (filename, oss_url, page_number, genealogy_version, genealogy_source, upload_person, file_type))
  2640. record_id = cursor.lastrowid
  2641. conn.commit()
  2642. # Start AI Task
  2643. threading.Thread(target=process_ai_task, args=(record_id, oss_url)).start()
  2644. flash('上传完成,AI解析中,稍后查看')
  2645. except Exception as e:
  2646. flash(f'保存失败: {e}')
  2647. finally:
  2648. conn.close()
  2649. return redirect(url_for('index'))
  2650. @app.route('/manager/delete_upload/<int:record_id>', methods=['POST'])
  2651. def delete_upload(record_id):
  2652. if 'user_id' not in session:
  2653. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2654. conn = get_db_connection()
  2655. try:
  2656. with conn.cursor() as cursor:
  2657. # 删除记录
  2658. cursor.execute("DELETE FROM genealogy_records WHERE id = %s", (record_id,))
  2659. conn.commit()
  2660. flash('文件记录已成功删除')
  2661. return redirect(url_for('index'))
  2662. except Exception as e:
  2663. conn.rollback()
  2664. flash(f'删除失败: {e}')
  2665. return redirect(url_for('index'))
  2666. finally:
  2667. conn.close()
  2668. @app.route('/manager/upload_pdf', methods=['GET', 'POST'])
  2669. def upload_pdf():
  2670. if 'user_id' not in session:
  2671. return redirect(url_for('login'))
  2672. if request.method == 'GET':
  2673. return render_template('upload_pdf.html')
  2674. # POST请求处理
  2675. if 'file' not in request.files:
  2676. flash('请选择要上传的PDF文件')
  2677. return redirect(request.url)
  2678. file = request.files['file']
  2679. if file.filename == '':
  2680. flash('请选择要上传的PDF文件')
  2681. return redirect(request.url)
  2682. # 检查文件类型
  2683. if not file.filename.lower().endswith('.pdf'):
  2684. flash('只支持PDF文件上传')
  2685. return redirect(request.url)
  2686. # 获取表单数据
  2687. version_name = request.form.get('version_name', '').strip()
  2688. version_source = request.form.get('version_source', '').strip()
  2689. file_provider = request.form.get('file_provider', '').strip()
  2690. # 验证必填字段
  2691. if not version_name:
  2692. flash('版本名称为必填项')
  2693. return redirect(request.url)
  2694. if not version_source:
  2695. flash('版本来源为必填项')
  2696. return redirect(request.url)
  2697. # 如果未提供文件提供人,使用当前登录用户
  2698. if not file_provider:
  2699. file_provider = session.get('user_id', '未知')
  2700. import uuid
  2701. original_filename = file.filename
  2702. ext = os.path.splitext(original_filename)[1].lower()
  2703. base_name = secure_filename(original_filename)
  2704. if not base_name or base_name == ext.strip('.'):
  2705. filename = f"genealogy_pdf_{uuid.uuid4().hex[:8]}{ext}"
  2706. else:
  2707. if not base_name.lower().endswith(ext):
  2708. filename = f"{base_name}{ext}"
  2709. else:
  2710. filename = base_name
  2711. file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
  2712. file.save(file_path)
  2713. try:
  2714. # Upload to OSS
  2715. oss_url = upload_to_oss(file_path, custom_filename=filename)
  2716. if not oss_url:
  2717. flash('文件上传失败')
  2718. return redirect(request.url)
  2719. # Save to database
  2720. conn = get_db_connection()
  2721. try:
  2722. with conn.cursor() as cursor:
  2723. cursor.execute(
  2724. "INSERT INTO genealogy_pdfs (file_name, oss_url, version_name, version_source, file_provider, upload_time) VALUES (%s, %s, %s, %s, %s, CURRENT_TIMESTAMP)",
  2725. (original_filename, oss_url, version_name, version_source, file_provider)
  2726. )
  2727. conn.commit()
  2728. flash('PDF文件上传成功')
  2729. return redirect(url_for('pdf_management'))
  2730. except Exception as e:
  2731. flash(f'保存失败: {e}')
  2732. return redirect(request.url)
  2733. finally:
  2734. conn.close()
  2735. finally:
  2736. if os.path.exists(file_path):
  2737. try:
  2738. os.remove(file_path)
  2739. except:
  2740. pass
  2741. def process_pdf_pages(file_path, pdf_oss_url, uploader):
  2742. """Process PDF pages and add them to genealogy records"""
  2743. try:
  2744. import fitz
  2745. doc = fitz.open(file_path)
  2746. # Get current max page number
  2747. conn = get_db_connection()
  2748. suggested_page = 1
  2749. try:
  2750. with conn.cursor() as cursor:
  2751. cursor.execute("SELECT MAX(page_number) as max_p FROM genealogy_records")
  2752. result = cursor.fetchone()
  2753. if result and result['max_p']:
  2754. suggested_page = result['max_p'] + 1
  2755. finally:
  2756. conn.close()
  2757. for page_index in range(len(doc)):
  2758. try:
  2759. page = doc[page_index]
  2760. pix = page.get_pixmap(dpi=150)
  2761. # Save as image
  2762. img_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}_page_{page_index+1}.jpg"
  2763. img_path = os.path.join(app.config['UPLOAD_FOLDER'], img_filename)
  2764. pix.save(img_path)
  2765. # Upload to OSS
  2766. img_oss_url = upload_to_oss(img_path, custom_filename=img_filename)
  2767. if img_oss_url:
  2768. # Save to genealogy_records
  2769. conn = get_db_connection()
  2770. try:
  2771. with conn.cursor() as cursor:
  2772. cursor.execute(
  2773. "INSERT INTO genealogy_records (file_name, oss_url, page_number, ai_status, upload_person, file_type) VALUES (%s, %s, %s, 1, %s, %s)",
  2774. (img_filename, img_oss_url, suggested_page + page_index, uploader, '图片')
  2775. )
  2776. record_id = cursor.lastrowid
  2777. conn.commit()
  2778. # Start AI processing
  2779. threading.Thread(target=process_ai_task, args=(record_id, img_oss_url)).start()
  2780. finally:
  2781. conn.close()
  2782. except Exception as e:
  2783. print(f"Error processing page {page_index+1}: {e}")
  2784. finally:
  2785. if 'img_path' in locals() and os.path.exists(img_path):
  2786. try:
  2787. os.remove(img_path)
  2788. except:
  2789. pass
  2790. except Exception as e:
  2791. print(f"Error processing PDF: {e}")
  2792. # --- Settlement Routes ---
  2793. @app.route('/manager/settlements')
  2794. def settlements():
  2795. if 'user_id' not in session:
  2796. return redirect(url_for('login'))
  2797. return render_template('settlements.html')
  2798. @app.route('/manager/api/settlements', methods=['GET'])
  2799. def get_settlements():
  2800. if 'user_id' not in session:
  2801. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2802. conn = get_db_connection()
  2803. try:
  2804. with conn.cursor() as cursor:
  2805. cursor.execute("""
  2806. SELECT s.*, m.name as representative_name, m.simplified_name as representative_simplified_name
  2807. FROM family_settlements s
  2808. LEFT JOIN family_member_info m ON s.representative_id = m.id
  2809. ORDER BY s.created_at DESC
  2810. """)
  2811. settlements = cursor.fetchall()
  2812. # Convert Decimal to float/int for JSON serialization
  2813. result = []
  2814. for s in settlements:
  2815. item = dict(s)
  2816. if item.get('latitude'):
  2817. item['latitude'] = float(item['latitude'])
  2818. if item.get('longitude'):
  2819. item['longitude'] = float(item['longitude'])
  2820. if item.get('population'):
  2821. item['population'] = int(item['population'])
  2822. result.append(item)
  2823. return jsonify({"success": True, "settlements": result})
  2824. finally:
  2825. conn.close()
  2826. @app.route('/manager/api/settlements/<int:id>', methods=['GET'])
  2827. def get_settlement(id):
  2828. if 'user_id' not in session:
  2829. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2830. conn = get_db_connection()
  2831. try:
  2832. with conn.cursor() as cursor:
  2833. cursor.execute("""
  2834. SELECT s.*, m.name as representative_name, m.simplified_name as representative_simplified_name
  2835. FROM family_settlements s
  2836. LEFT JOIN family_member_info m ON s.representative_id = m.id
  2837. WHERE s.id = %s
  2838. """, (id,))
  2839. settlement = cursor.fetchone()
  2840. if settlement:
  2841. # Convert Decimal to float/int for JSON serialization
  2842. item = dict(settlement)
  2843. if item.get('latitude'):
  2844. item['latitude'] = float(item['latitude'])
  2845. if item.get('longitude'):
  2846. item['longitude'] = float(item['longitude'])
  2847. if item.get('population'):
  2848. item['population'] = int(item['population'])
  2849. return jsonify({"success": True, "settlement": item})
  2850. else:
  2851. return jsonify({"success": False, "message": "聚落不存在"})
  2852. finally:
  2853. conn.close()
  2854. @app.route('/manager/api/settlements', methods=['POST'])
  2855. def add_settlement():
  2856. if 'user_id' not in session:
  2857. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2858. if not session.get('is_super_admin'):
  2859. return jsonify({"success": False, "message": "权限不足"}), 403
  2860. data = request.get_json()
  2861. conn = get_db_connection()
  2862. try:
  2863. with conn.cursor() as cursor:
  2864. cursor.execute("""
  2865. INSERT INTO family_settlements
  2866. (name, region, latitude, longitude, population, representative_id, description, surname_type, new_surname, enthusiastic_members)
  2867. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  2868. """, (
  2869. data.get('name'),
  2870. data.get('region'),
  2871. data.get('latitude') or None,
  2872. data.get('longitude') or None,
  2873. data.get('population') or 0,
  2874. data.get('representative_id') or None,
  2875. data.get('description'),
  2876. data.get('surname_type') or 0,
  2877. data.get('new_surname') or None,
  2878. data.get('enthusiastic_members') or None
  2879. ))
  2880. conn.commit()
  2881. return jsonify({"success": True, "message": "添加成功"})
  2882. finally:
  2883. conn.close()
  2884. @app.route('/manager/api/settlements/<int:id>', methods=['PUT'])
  2885. def update_settlement(id):
  2886. if 'user_id' not in session:
  2887. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2888. if not session.get('is_super_admin'):
  2889. return jsonify({"success": False, "message": "权限不足"}), 403
  2890. data = request.get_json()
  2891. conn = get_db_connection()
  2892. try:
  2893. with conn.cursor() as cursor:
  2894. cursor.execute("""
  2895. UPDATE family_settlements
  2896. SET name=%s, region=%s, latitude=%s, longitude=%s,
  2897. population=%s, representative_id=%s, description=%s,
  2898. surname_type=%s, new_surname=%s, enthusiastic_members=%s
  2899. WHERE id=%s
  2900. """, (
  2901. data.get('name'),
  2902. data.get('region'),
  2903. data.get('latitude') or None,
  2904. data.get('longitude') or None,
  2905. data.get('population') or 0,
  2906. data.get('representative_id') or None,
  2907. data.get('description'),
  2908. data.get('surname_type') or 0,
  2909. data.get('new_surname') or None,
  2910. data.get('enthusiastic_members') or None,
  2911. id
  2912. ))
  2913. conn.commit()
  2914. return jsonify({"success": True, "message": "更新成功"})
  2915. finally:
  2916. conn.close()
  2917. @app.route('/manager/api/settlements/<int:id>', methods=['DELETE'])
  2918. def delete_settlement(id):
  2919. if 'user_id' not in session:
  2920. return jsonify({"success": False, "message": "Unauthorized"}), 401
  2921. if not session.get('is_super_admin'):
  2922. return jsonify({"success": False, "message": "权限不足"}), 403
  2923. conn = get_db_connection()
  2924. try:
  2925. with conn.cursor() as cursor:
  2926. cursor.execute("DELETE FROM family_settlements WHERE id=%s", (id,))
  2927. conn.commit()
  2928. return jsonify({"success": True, "message": "删除成功"})
  2929. finally:
  2930. conn.close()
  2931. # 异步批量处理族谱原文功能
  2932. import uuid
  2933. def init_batch_task_table():
  2934. """初始化批量任务表(如果不存在)"""
  2935. conn = get_db_connection()
  2936. try:
  2937. with conn.cursor() as cursor:
  2938. cursor.execute("""
  2939. CREATE TABLE IF NOT EXISTS batch_genealogy_task (
  2940. id INT AUTO_INCREMENT PRIMARY KEY,
  2941. task_id VARCHAR(64) UNIQUE NOT NULL,
  2942. user_id INT NOT NULL,
  2943. status VARCHAR(20) DEFAULT 'pending',
  2944. total_count INT DEFAULT 0,
  2945. completed_count INT DEFAULT 0,
  2946. failed_count INT DEFAULT 0,
  2947. last_processed_id INT DEFAULT 0,
  2948. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  2949. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  2950. results TEXT
  2951. );
  2952. """)
  2953. # 检查是否存在last_processed_id字段,如果不存在则添加
  2954. cursor.execute("SHOW COLUMNS FROM batch_genealogy_task LIKE 'last_processed_id'")
  2955. if not cursor.fetchone():
  2956. cursor.execute("ALTER TABLE batch_genealogy_task ADD COLUMN last_processed_id INT DEFAULT 0")
  2957. conn.commit()
  2958. print("[Database] batch_genealogy_task table initialized")
  2959. except Exception as e:
  2960. print(f"[Database] Error creating batch_genealogy_task table: {e}")
  2961. finally:
  2962. conn.close()
  2963. # 初始化表
  2964. init_batch_task_table()
  2965. def migrate_child_order_column():
  2966. """为 family_relation_info 表添加 child_order 字段(如不存在)"""
  2967. conn = get_db_connection()
  2968. try:
  2969. with conn.cursor() as cursor:
  2970. cursor.execute("SHOW COLUMNS FROM family_relation_info LIKE 'child_order'")
  2971. if not cursor.fetchone():
  2972. cursor.execute(
  2973. "ALTER TABLE family_relation_info ADD COLUMN child_order INT DEFAULT NULL COMMENT '第几子,用于兄弟排序'"
  2974. )
  2975. conn.commit()
  2976. print("[DB Migrate] Added child_order column to family_relation_info")
  2977. else:
  2978. print("[DB Migrate] child_order column already exists")
  2979. except Exception as e:
  2980. print(f"[DB Migrate] Error adding child_order: {e}")
  2981. finally:
  2982. conn.close()
  2983. migrate_child_order_column()
  2984. def migrate_enthusiastic_members_column():
  2985. """为 family_settlements 表添加 enthusiastic_members 字段(如不存在)"""
  2986. conn = get_db_connection()
  2987. try:
  2988. with conn.cursor() as cursor:
  2989. cursor.execute("SHOW COLUMNS FROM family_settlements LIKE 'enthusiastic_members'")
  2990. if not cursor.fetchone():
  2991. cursor.execute(
  2992. "ALTER TABLE family_settlements ADD COLUMN enthusiastic_members TEXT DEFAULT NULL COMMENT '热心宗亲,多人以逗号分隔'"
  2993. )
  2994. conn.commit()
  2995. print("[DB Migrate] Added enthusiastic_members column to family_settlements")
  2996. else:
  2997. print("[DB Migrate] enthusiastic_members column already exists")
  2998. except Exception as e:
  2999. print(f"[DB Migrate] Error adding enthusiastic_members: {e}")
  3000. finally:
  3001. conn.close()
  3002. migrate_enthusiastic_members_column()
  3003. def async_process_genealogy_task(task_id, member_ids, user_id):
  3004. """异步处理族谱原文任务"""
  3005. results = []
  3006. conn = get_db_connection()
  3007. try:
  3008. # 更新任务状态为处理中
  3009. with conn.cursor() as cursor:
  3010. cursor.execute("""
  3011. UPDATE batch_genealogy_task
  3012. SET status = 'processing', total_count = %s
  3013. WHERE task_id = %s
  3014. """, (len(member_ids), task_id))
  3015. conn.commit()
  3016. completed_count = 0
  3017. failed_count = 0
  3018. for member_id in member_ids:
  3019. try:
  3020. with conn.cursor() as cursor:
  3021. cursor.execute("""
  3022. SELECT id, name, simplified_name, name_word_generation,
  3023. birth_place, occupation, notes, sex
  3024. FROM family_member_info WHERE id = %s
  3025. """, (member_id,))
  3026. member = cursor.fetchone()
  3027. # 获取父亲信息
  3028. cursor.execute("""
  3029. SELECT p.name, p.simplified_name
  3030. FROM family_relation_info r
  3031. JOIN family_member_info p ON r.parent_mid = p.id
  3032. WHERE r.child_mid = %s AND r.relation_type = 1
  3033. LIMIT 1
  3034. """, (member_id,))
  3035. father = cursor.fetchone()
  3036. # 获取母亲信息
  3037. cursor.execute("""
  3038. SELECT p.name, p.simplified_name
  3039. FROM family_relation_info r
  3040. JOIN family_member_info p ON r.parent_mid = p.id
  3041. WHERE r.child_mid = %s AND r.relation_type = 2
  3042. LIMIT 1
  3043. """, (member_id,))
  3044. mother = cursor.fetchone()
  3045. member['father_name'] = father['name'] if father else None
  3046. member['father_simplified_name'] = father['simplified_name'] if father else None
  3047. member['mother_name'] = mother['name'] if mother else None
  3048. member['mother_simplified_name'] = mother['simplified_name'] if mother else None
  3049. except Exception as e:
  3050. print(f"[Async Process] Error getting member {member_id}: {e}")
  3051. results.append({
  3052. "member_id": member_id,
  3053. "name": "未知",
  3054. "success": False,
  3055. "message": f"获取成员信息失败: {e}"
  3056. })
  3057. failed_count += 1
  3058. continue
  3059. if not member:
  3060. results.append({
  3061. "member_id": member_id,
  3062. "name": "未知",
  3063. "success": False,
  3064. "message": "成员不存在"
  3065. })
  3066. failed_count += 1
  3067. continue
  3068. # 构建AI提示词
  3069. member_info = f"""
  3070. 姓名(繁体):{member['name']}
  3071. 姓名(简体):{member['simplified_name'] or '未知'}
  3072. 世系世代:{member['name_word_generation'] or '未知'}
  3073. 父亲姓名:{member['father_name'] or '未知'}
  3074. 母亲姓名:{member['mother_name'] or '未知'}
  3075. 出生地:{member['birth_place'] or '未知'}
  3076. 职业:{member['occupation'] or '未知'}
  3077. 备注:{member['notes'] or '无'}
  3078. """
  3079. prompt = f"""
  3080. 请根据以下人员信息,模拟生成该人员的族谱原文:
  3081. {member_info}
  3082. 请输出两个字段:
  3083. 1. genealogy_traditional: 族谱原文(繁体中文,模仿传统族谱格式)
  3084. 2. genealogy_simplified: 族谱原文(简体中文,将繁体转换为简体)
  3085. 请严格按照JSON格式输出,不要包含任何额外解释:
  3086. {{
  3087. "genealogy_traditional": "繁体族谱原文内容",
  3088. "genealogy_simplified": "简体族谱原文内容"
  3089. }}
  3090. """
  3091. ai_response = call_doubao_api(prompt)
  3092. if ai_response:
  3093. traditional, simplified = parse_ai_response(ai_response)
  3094. if traditional or simplified:
  3095. try:
  3096. with conn.cursor() as cursor:
  3097. cursor.execute("""
  3098. UPDATE family_member_info
  3099. SET genealogy_original_traditional = %s,
  3100. genealogy_original_simplified = %s
  3101. WHERE id = %s
  3102. """, (traditional, simplified, member_id))
  3103. conn.commit()
  3104. results.append({
  3105. "member_id": member_id,
  3106. "name": member['name'],
  3107. "success": True,
  3108. "traditional": traditional[:100] + "..." if len(traditional) > 100 else traditional,
  3109. "simplified": simplified[:100] + "..." if len(simplified) > 100 else simplified
  3110. })
  3111. completed_count += 1
  3112. except Exception as e:
  3113. print(f"[Async Process] Error updating member {member_id}: {e}")
  3114. results.append({
  3115. "member_id": member_id,
  3116. "name": member['name'],
  3117. "success": False,
  3118. "message": f"保存失败: {e}"
  3119. })
  3120. failed_count += 1
  3121. else:
  3122. results.append({
  3123. "member_id": member_id,
  3124. "name": member['name'],
  3125. "success": False,
  3126. "message": "AI未返回有效数据"
  3127. })
  3128. failed_count += 1
  3129. else:
  3130. results.append({
  3131. "member_id": member_id,
  3132. "name": member['name'],
  3133. "success": False,
  3134. "message": "AI调用失败"
  3135. })
  3136. failed_count += 1
  3137. # 更新任务状态
  3138. status = 'completed' if failed_count == 0 else 'completed_with_errors'
  3139. with conn.cursor() as cursor:
  3140. cursor.execute("""
  3141. UPDATE batch_genealogy_task
  3142. SET status = %s, completed_count = %s, failed_count = %s, results = %s
  3143. WHERE task_id = %s
  3144. """, (status, completed_count, failed_count, json.dumps(results, ensure_ascii=False), task_id))
  3145. conn.commit()
  3146. print(f"[Async Process] Task {task_id} completed: {completed_count} success, {failed_count} failed")
  3147. except Exception as e:
  3148. print(f"[Async Process] Error in task {task_id}: {e}")
  3149. with conn.cursor() as cursor:
  3150. cursor.execute("""
  3151. UPDATE batch_genealogy_task
  3152. SET status = 'failed', results = %s
  3153. WHERE task_id = %s
  3154. """, (json.dumps({"error": str(e)}, ensure_ascii=False), task_id))
  3155. conn.commit()
  3156. finally:
  3157. conn.close()
  3158. @app.route('/manager/api/members/batch_process_genealogy_async', methods=['POST'])
  3159. def batch_process_genealogy_async():
  3160. """异步批量处理族谱原文"""
  3161. if 'user_id' not in session:
  3162. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3163. data = request.get_json()
  3164. member_ids = data.get('member_ids', [])
  3165. if not member_ids:
  3166. return jsonify({"success": False, "message": "请选择成员进行处理"}), 400
  3167. # 生成任务ID
  3168. task_id = str(uuid.uuid4())
  3169. # 保存任务到数据库
  3170. conn = get_db_connection()
  3171. try:
  3172. with conn.cursor() as cursor:
  3173. cursor.execute("""
  3174. INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count)
  3175. VALUES (%s, %s, 'pending', %s)
  3176. """, (task_id, session['user_id'], len(member_ids)))
  3177. conn.commit()
  3178. finally:
  3179. conn.close()
  3180. # 启动异步线程处理
  3181. threading.Thread(target=async_process_genealogy_task, args=(task_id, member_ids, session['user_id'])).start()
  3182. return jsonify({
  3183. "success": True,
  3184. "task_id": task_id,
  3185. "message": "任务已创建,正在后台处理中"
  3186. })
  3187. @app.route('/manager/api/members/batch_task_status/<task_id>', methods=['GET'])
  3188. def get_batch_task_status(task_id):
  3189. """获取批量任务状态"""
  3190. if 'user_id' not in session:
  3191. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3192. conn = get_db_connection()
  3193. try:
  3194. with conn.cursor() as cursor:
  3195. cursor.execute("""
  3196. SELECT task_id, status, total_count, completed_count, failed_count,
  3197. created_at, updated_at, results
  3198. FROM batch_genealogy_task
  3199. WHERE task_id = %s AND user_id = %s
  3200. """, (task_id, session['user_id']))
  3201. task = cursor.fetchone()
  3202. if task:
  3203. result = {
  3204. "task_id": task['task_id'],
  3205. "status": task['status'],
  3206. "total_count": task['total_count'],
  3207. "completed_count": task['completed_count'],
  3208. "failed_count": task['failed_count'],
  3209. "created_at": task['created_at'].isoformat() if task['created_at'] else None,
  3210. "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
  3211. }
  3212. if task['results']:
  3213. try:
  3214. result['results'] = json.loads(task['results'])
  3215. except:
  3216. result['results'] = task['results']
  3217. return jsonify({"success": True, "task": result})
  3218. else:
  3219. return jsonify({"success": False, "message": "任务不存在或无权访问"}), 404
  3220. finally:
  3221. conn.close()
  3222. @app.route('/manager/api/members/batch_tasks', methods=['GET'])
  3223. def get_batch_tasks():
  3224. """获取用户的批量任务列表"""
  3225. if 'user_id' not in session:
  3226. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3227. conn = get_db_connection()
  3228. try:
  3229. with conn.cursor() as cursor:
  3230. cursor.execute("""
  3231. SELECT task_id, status, total_count, completed_count, failed_count,
  3232. last_processed_id, created_at, updated_at
  3233. FROM batch_genealogy_task
  3234. WHERE user_id = %s
  3235. ORDER BY created_at DESC
  3236. LIMIT 20
  3237. """, (session['user_id'],))
  3238. tasks = cursor.fetchall()
  3239. result = []
  3240. for task in tasks:
  3241. result.append({
  3242. "task_id": task['task_id'],
  3243. "status": task['status'],
  3244. "total_count": task['total_count'],
  3245. "completed_count": task['completed_count'],
  3246. "failed_count": task['failed_count'],
  3247. "last_processed_id": task['last_processed_id'],
  3248. "created_at": task['created_at'].isoformat() if task['created_at'] else None,
  3249. "updated_at": task['updated_at'].isoformat() if task['updated_at'] else None
  3250. })
  3251. return jsonify({"success": True, "tasks": result})
  3252. finally:
  3253. conn.close()
  3254. def call_doubao_image_api(image_url, prompt):
  3255. """调用豆包API处理图片,提取文本内容"""
  3256. api_key = "a1800657-9212-4afe-9b7c-b49f015c54d3"
  3257. api_url = "https://ark.cn-beijing.volces.com/api/v3/responses"
  3258. ai_payload_url = get_normalized_base64_image(image_url)
  3259. payload = {
  3260. "model": "doubao-seed-1-8-251228",
  3261. "stream": False,
  3262. "input": [
  3263. {
  3264. "role": "user",
  3265. "content": [
  3266. {"type": "input_image", "image_url": ai_payload_url},
  3267. {"type": "input_text", "text": prompt}
  3268. ]
  3269. }
  3270. ]
  3271. }
  3272. headers = {
  3273. "Authorization": f"Bearer {api_key}",
  3274. "Content-Type": "application/json"
  3275. }
  3276. try:
  3277. response = requests.post(
  3278. api_url,
  3279. json=payload,
  3280. headers=headers,
  3281. timeout=120,
  3282. verify=False,
  3283. proxies={"http": None, "https": None}
  3284. )
  3285. if response.status_code == 200:
  3286. return response.json()
  3287. else:
  3288. print(f"[Image AI API] Error: {response.status_code} - {response.text}")
  3289. return None
  3290. except Exception as e:
  3291. print(f"[Image AI API] Exception: {e}")
  3292. return None
  3293. def extract_pure_text(response):
  3294. """从API响应中提取纯文本内容,优先返回 message 类型的最终答案"""
  3295. if not response:
  3296. return ''
  3297. # 优先从 output 列表中提取 message 类型(最终答案)
  3298. if 'output' in response:
  3299. # 第一遍:只找 message 类型
  3300. for item in response['output']:
  3301. if item.get('type') == 'message':
  3302. content = item.get('content')
  3303. if isinstance(content, str):
  3304. return content
  3305. elif isinstance(content, list):
  3306. text_parts = []
  3307. for part in content:
  3308. if isinstance(part, dict) and part.get('type') == 'text':
  3309. text_parts.append(part.get('text', ''))
  3310. elif isinstance(part, str):
  3311. text_parts.append(part)
  3312. result = ''.join(text_parts)
  3313. if result:
  3314. return result
  3315. # 第二遍:没有 message 时才使用 reasoning 内容作为兜底
  3316. for item in response['output']:
  3317. if item.get('type') == 'reasoning':
  3318. content = item.get('content')
  3319. all_text = ''
  3320. summary = item.get('summary', [])
  3321. for part in summary:
  3322. if isinstance(part, dict):
  3323. if part.get('type') in ('summary_text', 'text'):
  3324. all_text += part.get('text', '')
  3325. elif isinstance(part, str):
  3326. all_text += part
  3327. if isinstance(content, str):
  3328. all_text += content
  3329. elif isinstance(content, list):
  3330. for part in content:
  3331. if isinstance(part, dict) and part.get('type') == 'text':
  3332. all_text += part.get('text', '')
  3333. elif isinstance(part, str):
  3334. all_text += part
  3335. if all_text:
  3336. return all_text
  3337. # 第三遍:content 直接是字符串的情况
  3338. for item in response['output']:
  3339. content = item.get('content')
  3340. if isinstance(content, str) and content:
  3341. return content
  3342. # 尝试从 choices 中提取(兼容 OpenAI 格式)
  3343. if 'choices' in response and len(response['choices']) > 0:
  3344. message = response['choices'][0].get('message', {})
  3345. return message.get('content', '')
  3346. return str(response)
  3347. def build_genealogy_prompt(member_name):
  3348. """
  3349. 构建用于竖排繁体家谱图片 OCR 提取的 Prompt。
  3350. 家谱图片为竖排版式(从上到下、从右到左),每位人物记录通常包含:
  3351. 辈字+名讳、字号、行次、父子关系、配偶(配某氏)、生卒年、葬地、子嗣等。
  3352. """
  3353. return f"""这是一张竖排繁体中文家谱图片。图片文字采用竖排格式,从上到下、从右到左逐列阅读。
  3354. 每位人物的记录通常包含以下内容(不一定全有):
  3355. - 辈字加名讳(如:公諱光元)
  3356. - 字号(如:字維亮)
  3357. - 行次(如:行仁一)
  3358. - 与父亲的关系(如:某某公長子、次子、三子)
  3359. - 配偶(如:配李氏、娶王氏)
  3360. - 生卒年月(如:生於某年某月、卒於某年某月)
  3361. - 葬地(如:葬祖山某向、塟於某地)
  3362. - 子嗣(如:子二:長某某、次某某)
  3363. 任务:找到人物「{member_name}」在图片中的完整记录,将其繁体原文逐字准确复制输出。
  3364. 要求:
  3365. 1. 只输出「{member_name}」这一个人物的记录,不包含其他人的内容
  3366. 2. 保持繁体字原貌,不要转换为简体
  3367. 3. 保留原文中的标点符号
  3368. 4. 不要添加任何解释、标注、序号或额外说明
  3369. 5. 直接输出原文内容"""
  3370. def _extract_from_thinking_output(text):
  3371. """
  3372. 从推理模型的思维链输出中提取最终答案。
  3373. 推理模型(如 doubao-seed 系列)会在 message 内容里写出完整思考过程:
  3374. 反复写候选答案、说"不对"再修正,最后以"现在确认/所以输出这个内容"等结论收尾。
  3375. 本函数的策略:
  3376. 1. 找最后一个"答案引导词 + 冒号"之后的文本(如"准确的原文是:"、"准确复制:")
  3377. 2. 若无引导词,则取"现在确认"/"所以输出这个内容"之前的最后一段文本
  3378. 3. 以上均失败则原文返回
  3379. """
  3380. # 思维链特征词
  3381. THINKING_SIGNALS = ['不对,', '现在确认', '准确复制', '准确的原文是', '正确的输出是', '所以输出这个内容']
  3382. if not any(sig in text for sig in THINKING_SIGNALS):
  3383. return text # 非思维链输出,原样返回
  3384. print(f"[CleanText] Detected thinking-model output, extracting final answer")
  3385. # ---- 策略1:找最后一个答案引导词 ----
  3386. ANSWER_INTRO_PATTERNS = [
  3387. r'准确的原文是[::]\s*',
  3388. r'正确的输出是[::]\s*',
  3389. r'现在准确复制[::]\s*',
  3390. r'准确复制[::]\s*',
  3391. r'应该是[::]\s*',
  3392. r'因此输出[::]\s*',
  3393. r'所以正确.*?是[::]\s*',
  3394. r'原文是[::]\s*',
  3395. r'输出[::]\s*',
  3396. ]
  3397. last_end = -1
  3398. for pattern in ANSWER_INTRO_PATTERNS:
  3399. for m in re.finditer(pattern, text):
  3400. if m.end() > last_end:
  3401. last_end = m.end()
  3402. if last_end >= 0:
  3403. remaining = text[last_end:]
  3404. # 取到第一个"结束标志"前
  3405. END_MARKERS = ['不对', '现在确认', '但是', '然而', '\n\n']
  3406. end_pos = len(remaining)
  3407. for marker in END_MARKERS:
  3408. idx = remaining.find(marker)
  3409. if 0 < idx < end_pos:
  3410. end_pos = idx
  3411. candidate = remaining[:end_pos].strip()
  3412. if len(candidate) >= 5:
  3413. print(f"[CleanText] Extracted via answer-intro pattern: '{candidate[:80]}'")
  3414. return candidate
  3415. # ---- 策略2:取"现在确认"之前的最后一段 ----
  3416. for end_phrase in ['现在确认', '所以输出这个内容', '这就是.*?的完整记录']:
  3417. m = re.search(end_phrase, text)
  3418. if m:
  3419. before = text[:m.start()].rstrip()
  3420. # 找最后一个换行符,取之后的内容
  3421. last_nl = before.rfind('\n')
  3422. candidate = (before[last_nl + 1:] if last_nl >= 0 else before[-400:]).strip()
  3423. if len(candidate) >= 5:
  3424. print(f"[CleanText] Extracted before confirmation phrase: '{candidate[:80]}'")
  3425. return candidate
  3426. return text # 均失败则原样返回
  3427. def _apply_char_whitelist(text):
  3428. """只保留汉字(含扩展A区)和常见中文标点"""
  3429. return re.sub(
  3430. r'[^\u4e00-\u9fff\u3400-\u4dbf\u3000-\u303f\uff00-\uffef,。;:、()【】「」『』〔〕·~—…《》]',
  3431. '', text
  3432. ).strip()
  3433. def clean_genealogy_text(text):
  3434. """
  3435. 清理从 AI 响应中提取的族谱文本。
  3436. - 处理 Markdown/JSON 格式噪声
  3437. - 自动识别思维链推理模型输出,提取最终答案段落
  3438. - 保留中文字符和中文标点,去除英文说明行
  3439. """
  3440. if not text:
  3441. return ''
  3442. text = text.strip()
  3443. # 去除代码块标记
  3444. text = re.sub(r'^```[a-z]*\n?', '', text)
  3445. text = re.sub(r'\n?```$', '', text)
  3446. text = text.strip()
  3447. # 尝试解析 JSON,从已知字段提取
  3448. try:
  3449. result = json.loads(text)
  3450. if isinstance(result, dict):
  3451. for key in ['text', 'content', 'result', 'traditional', 'genealogy_traditional']:
  3452. if key in result:
  3453. text = str(result[key])
  3454. break
  3455. except (json.JSONDecodeError, ValueError):
  3456. pass
  3457. # 针对思维链推理模型输出,提取最终答案(必须在行过滤之前,因为推理文本中含有必要的换行结构)
  3458. text = _extract_from_thinking_output(text)
  3459. # 按行过滤:去除纯英文/数字行、空行及明显解释性前缀行
  3460. lines = text.splitlines()
  3461. kept_lines = []
  3462. for line in lines:
  3463. line = line.strip()
  3464. if not line:
  3465. continue
  3466. non_ascii = sum(1 for c in line if ord(c) > 127)
  3467. if non_ascii == 0:
  3468. continue
  3469. if re.match(r'^(注[::]|说明[::]|Note[::]|备注[::])', line):
  3470. continue
  3471. kept_lines.append(line)
  3472. text = ''.join(kept_lines)
  3473. # 字符白名单:只保留汉字和中文标点
  3474. text = _apply_char_whitelist(text)
  3475. return text
  3476. def async_process_all_empty_genealogy(task_id, user_id):
  3477. """
  3478. 异步批量处理族谱原文为空的成员,支持断点续跑。
  3479. 连接管理原则:DB 连接仅在快速读写期间持有,AI 调用(最长120s)期间
  3480. 不占用任何 DB 连接,避免影响其他用户的正常操作。
  3481. """
  3482. import time
  3483. # ── 1. 读取断点位置,立即释放连接 ──────────────────────────────────────
  3484. conn = get_db_connection()
  3485. try:
  3486. with conn.cursor() as cursor:
  3487. cursor.execute(
  3488. "SELECT last_processed_id FROM batch_genealogy_task WHERE task_id = %s",
  3489. (task_id,)
  3490. )
  3491. task = cursor.fetchone()
  3492. last_processed_id = task['last_processed_id'] if task else 0
  3493. finally:
  3494. conn.close()
  3495. completed_count = 0
  3496. failed_count = 0
  3497. results = []
  3498. while True:
  3499. # ── 2. 取下一条待处理成员(短暂占用连接后立即释放)────────────────
  3500. conn = get_db_connection()
  3501. try:
  3502. with conn.cursor() as cursor:
  3503. cursor.execute("""
  3504. SELECT m.id, m.name, m.name_word_generation, m.source_record_id,
  3505. r.oss_url AS image_url, r.ai_content AS record_ai_content
  3506. FROM family_member_info m
  3507. LEFT JOIN genealogy_records r ON m.source_record_id = r.id
  3508. WHERE (m.genealogy_original_traditional IS NULL
  3509. OR m.genealogy_original_traditional = ''
  3510. OR m.genealogy_original_traditional = 'None')
  3511. AND (m.genealogy_original_simplified IS NULL
  3512. OR m.genealogy_original_simplified = ''
  3513. OR m.genealogy_original_simplified = 'None')
  3514. AND m.id > %s
  3515. ORDER BY m.id ASC
  3516. LIMIT 1
  3517. """, (last_processed_id,))
  3518. member = cursor.fetchone()
  3519. finally:
  3520. conn.close()
  3521. if not member:
  3522. break
  3523. member_id = member['id']
  3524. member_name = member['name']
  3525. image_url = member['image_url']
  3526. record_ai_content = member['record_ai_content']
  3527. print(f"[Batch Process] Processing member {member_id}: {member_name}")
  3528. traditional = ""
  3529. simplified = ""
  3530. extract_source = "basic_info"
  3531. try:
  3532. # ── 3. AI 提取(此阶段不持有任何 DB 连接)────────────────────
  3533. if image_url:
  3534. print(f"[Batch Process] Extracting from image: {image_url}")
  3535. prompt = build_genealogy_prompt(member_name)
  3536. ai_response = call_doubao_image_api(image_url, prompt)
  3537. print(f"[Batch Process] AI response for {member_id}: {str(ai_response)[:300]}")
  3538. if ai_response:
  3539. raw_text = extract_pure_text(ai_response)
  3540. traditional = clean_genealogy_text(raw_text)
  3541. print(f"[Batch Process] Cleaned traditional: {traditional[:100]}")
  3542. name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
  3543. name_found = any(c in traditional for c in name_chars)
  3544. if traditional and len(traditional) >= 5 and name_found:
  3545. simplified = convert_to_simplified(traditional)
  3546. extract_source = "image"
  3547. print(f"[Batch Process] Image extract OK - trad: {traditional[:80]}")
  3548. else:
  3549. traditional = ""
  3550. simplified = ""
  3551. print(f"[Batch Process] Image extract invalid "
  3552. f"(name_found={name_found}, len={len(traditional)}), resetting")
  3553. # ── 4. 回退:从 record AI content 拼装(内存操作,无需 DB)──
  3554. if not (traditional and simplified) and record_ai_content:
  3555. print(f"[Batch Process] Fallback: trying record AI content")
  3556. try:
  3557. ai_content = json.loads(record_ai_content)
  3558. if isinstance(ai_content, list):
  3559. current_person = None
  3560. for person in ai_content:
  3561. person_name = person.get('original_name', person.get('name', '')).strip()
  3562. if person_name and (
  3563. member_name in person_name or person_name in member_name
  3564. ):
  3565. current_person = person
  3566. break
  3567. if current_person:
  3568. name = current_person.get('original_name',
  3569. current_person.get('name', member_name))
  3570. father_name = current_person.get('father_name', '')
  3571. spouse_name = current_person.get('spouse_name', '')
  3572. generation = current_person.get('generation',
  3573. member['name_word_generation'])
  3574. traditional = f"{name},{father_name}之子" if father_name else name
  3575. if spouse_name:
  3576. traditional += f",配{spouse_name}"
  3577. if generation:
  3578. traditional = f"第{generation}世 " + traditional
  3579. simplified = convert_to_simplified(traditional)
  3580. extract_source = "ai_content"
  3581. print(f"[Batch Process] AI content fallback: {traditional[:80]}")
  3582. else:
  3583. print(f"[Batch Process] No matching person for '{member_name}' in AI content")
  3584. except Exception as e:
  3585. print(f"[Batch Process] Failed to parse record AI content: {e}")
  3586. # ── 5. 最终回退:从关系表查父亲和配偶,短暂占用连接后立即释放 ──
  3587. if not (traditional and simplified):
  3588. print(f"[Batch Process] Fallback: basic info from DB")
  3589. conn = get_db_connection()
  3590. try:
  3591. with conn.cursor() as cursor:
  3592. cursor.execute("""
  3593. SELECT p.name FROM family_relation_info r
  3594. JOIN family_member_info p ON r.parent_mid = p.id
  3595. WHERE r.child_mid = %s AND r.relation_type = 1 LIMIT 1
  3596. """, (member_id,))
  3597. father = cursor.fetchone()
  3598. cursor.execute("""
  3599. SELECT p.name FROM family_relation_info r
  3600. JOIN family_member_info p ON r.parent_mid = p.id
  3601. WHERE r.child_mid = %s AND r.relation_type = 2 LIMIT 1
  3602. """, (member_id,))
  3603. spouse = cursor.fetchone()
  3604. finally:
  3605. conn.close()
  3606. father_name = father['name'] if father else ''
  3607. spouse_name = spouse['name'] if spouse else ''
  3608. generation = member['name_word_generation']
  3609. traditional = f"{member_name},{father_name}之子" if father_name else member_name
  3610. if spouse_name:
  3611. traditional += f",配{spouse_name}"
  3612. if generation:
  3613. traditional = f"第{generation}世 " + traditional
  3614. simplified = convert_to_simplified(traditional)
  3615. extract_source = "basic_info"
  3616. print(f"[Batch Process] Basic info fallback: {traditional[:80]}")
  3617. except Exception as extract_err:
  3618. print(f"[Batch Process] Extraction error for member {member_id}: {extract_err}")
  3619. traditional = ""
  3620. simplified = ""
  3621. # ── 6. 保存结果(短暂占用连接后立即释放)────────────────────────
  3622. last_processed_id = member_id
  3623. conn = get_db_connection()
  3624. try:
  3625. if traditional and simplified:
  3626. with conn.cursor() as cursor:
  3627. cursor.execute("""
  3628. UPDATE family_member_info
  3629. SET genealogy_original_traditional = %s,
  3630. genealogy_original_simplified = %s
  3631. WHERE id = %s
  3632. """, (traditional, simplified, member_id))
  3633. completed_count += 1
  3634. results.append({
  3635. "member_id": member_id,
  3636. "name": member_name,
  3637. "success": True,
  3638. "source": extract_source,
  3639. "traditional_length": len(traditional),
  3640. "simplified_length": len(simplified),
  3641. })
  3642. print(f"[Batch Process] Saved member {member_id} (source={extract_source})")
  3643. else:
  3644. failed_count += 1
  3645. results.append({
  3646. "member_id": member_id,
  3647. "name": member_name,
  3648. "success": False,
  3649. "message": "无法提取或生成族谱原文",
  3650. })
  3651. print(f"[Batch Process] Skipped member {member_id}: no valid text extracted")
  3652. with conn.cursor() as cursor:
  3653. cursor.execute("""
  3654. UPDATE batch_genealogy_task
  3655. SET completed_count = %s,
  3656. failed_count = %s,
  3657. last_processed_id = %s,
  3658. status = 'processing'
  3659. WHERE task_id = %s
  3660. """, (completed_count, failed_count, last_processed_id, task_id))
  3661. conn.commit()
  3662. except Exception as db_err:
  3663. print(f"[Batch Process] DB save error for member {member_id}: {db_err}")
  3664. failed_count += 1
  3665. finally:
  3666. conn.close()
  3667. # 每条处理完后短暂暂停,降低对 AI API 和服务器资源的压力
  3668. time.sleep(0.5)
  3669. # ── 7. 任务完成,写入最终状态 ─────────────────────────────────────────
  3670. conn = get_db_connection()
  3671. try:
  3672. status = 'completed' if failed_count == 0 else 'completed_with_errors'
  3673. with conn.cursor() as cursor:
  3674. cursor.execute("""
  3675. UPDATE batch_genealogy_task
  3676. SET status = %s,
  3677. completed_count = %s,
  3678. failed_count = %s,
  3679. results = %s
  3680. WHERE task_id = %s
  3681. """, (status, completed_count, failed_count,
  3682. json.dumps(results, ensure_ascii=False), task_id))
  3683. conn.commit()
  3684. print(f"[Batch Process] Task {task_id} done: "
  3685. f"{completed_count} success, {failed_count} failed")
  3686. except Exception as e:
  3687. print(f"[Batch Process] Error writing final status for {task_id}: {e}")
  3688. finally:
  3689. conn.close()
  3690. @app.route('/manager/api/members/extract_genealogy/<int:member_id>', methods=['GET'])
  3691. def extract_single_genealogy(member_id):
  3692. """单人员提取族谱原文,核心逻辑与批量处理一致,提取后写入数据库"""
  3693. if 'user_id' not in session:
  3694. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3695. conn = get_db_connection()
  3696. try:
  3697. # 查询成员信息
  3698. with conn.cursor() as cursor:
  3699. cursor.execute("""
  3700. SELECT
  3701. m.id, m.name, m.name_word_generation,
  3702. m.source_record_id, r.oss_url as image_url,
  3703. r.ai_content AS record_ai_content
  3704. FROM family_member_info m
  3705. LEFT JOIN genealogy_records r ON m.source_record_id = r.id
  3706. WHERE m.id = %s
  3707. """, (member_id,))
  3708. row = cursor.fetchone()
  3709. if not row:
  3710. return jsonify({"success": False, "message": "未找到成员"}), 404
  3711. # 处理字典或元组格式的返回
  3712. if isinstance(row, dict):
  3713. member = row
  3714. else:
  3715. member = {
  3716. 'id': row[0],
  3717. 'name': row[1],
  3718. 'name_word_generation': row[2],
  3719. 'source_record_id': row[3],
  3720. 'image_url': row[4],
  3721. 'record_ai_content': row[5]
  3722. }
  3723. # 调试:打印查询结果
  3724. print(f"[Single Extract] Query result - id: {member['id']}, name: '{member['name']}', name_word_generation: '{member['name_word_generation']}', source_record_id: {member['source_record_id']}, image_url: '{member['image_url']}', record_ai_content: '{member['record_ai_content'][:50] if member['record_ai_content'] else None}'")
  3725. traditional = ""
  3726. simplified = ""
  3727. source = "basic_info"
  3728. image_url = member['image_url']
  3729. record_ai_content = member['record_ai_content']
  3730. print(f"[Single Extract] Processing member {member_id}: {member['name']}")
  3731. # 优先从关联图片中提取族谱原文
  3732. if image_url:
  3733. print(f"[Single Extract] Extracting from image: {image_url}")
  3734. member_name = member['name']
  3735. prompt = build_genealogy_prompt(member_name)
  3736. ai_response = call_doubao_image_api(image_url, prompt)
  3737. print(f"[Single Extract] AI response: {str(ai_response)[:500]}")
  3738. if ai_response:
  3739. raw_text = extract_pure_text(ai_response)
  3740. print(f"[Single Extract] Raw text from response: '{raw_text[:300]}'")
  3741. traditional = clean_genealogy_text(raw_text)
  3742. print(f"[Single Extract] Cleaned traditional: '{traditional[:200]}', length: {len(traditional)}")
  3743. # 验证提取结果是否包含该人物的姓名(至少包含名字中的一个字)
  3744. name_chars = [c for c in member_name if '\u4e00' <= c <= '\u9fff']
  3745. name_found = any(c in traditional for c in name_chars)
  3746. if traditional and len(traditional) >= 5 and name_found:
  3747. simplified = convert_to_simplified(traditional)
  3748. source = "image"
  3749. print(f"[Single Extract] Extracted from image - traditional: {traditional[:100]}, simplified: {simplified[:100]}")
  3750. else:
  3751. traditional = ""
  3752. simplified = ""
  3753. if not name_found:
  3754. print(f"[Single Extract] Extracted text does not contain name '{member_name}', resetting")
  3755. else:
  3756. print(f"[Single Extract] Image extraction too short ({len(traditional)} chars), resetting")
  3757. else:
  3758. print(f"[Single Extract] AI response is None or empty")
  3759. else:
  3760. print(f"[Single Extract] No image URL found for member {member_id}")
  3761. # 如果从图片提取失败或没有图片,尝试从已有的AI解析内容中提取
  3762. if not (traditional and simplified) and record_ai_content:
  3763. print(f"[Single Extract] Trying to extract from record AI content")
  3764. try:
  3765. ai_content = json.loads(record_ai_content)
  3766. if isinstance(ai_content, list) and len(ai_content) > 0:
  3767. current_person = None
  3768. member_name = member['name']
  3769. for person in ai_content:
  3770. person_name = person.get('original_name', person.get('name', '')).strip()
  3771. if person_name and (member_name in person_name or person_name in member_name):
  3772. current_person = person
  3773. break
  3774. if current_person:
  3775. name = current_person.get('original_name', current_person.get('name', member['name']))
  3776. father_name = current_person.get('father_name', '')
  3777. spouse_name = current_person.get('spouse_name', '')
  3778. generation = current_person.get('generation', member['name_word_generation'])
  3779. traditional = f"{name},{father_name}之子"
  3780. if spouse_name:
  3781. traditional += f",配{spouse_name}"
  3782. if generation:
  3783. traditional = f"第{generation}世 " + traditional
  3784. simplified = convert_to_simplified(traditional)
  3785. source = "ai_content"
  3786. print(f"[Single Extract] Generated from AI content: {traditional}")
  3787. except Exception as e:
  3788. print(f"[Single Extract] Failed to parse record AI content: {e}")
  3789. # 如果还是没有内容,使用基本信息生成(标注来源为 basic_info)
  3790. if not (traditional and simplified):
  3791. print(f"[Single Extract] Generating from basic info")
  3792. with conn.cursor() as cursor:
  3793. cursor.execute("""
  3794. SELECT p.name, p.simplified_name
  3795. FROM family_relation_info r
  3796. JOIN family_member_info p ON r.parent_mid = p.id
  3797. WHERE r.child_mid = %s AND r.relation_type = 1
  3798. LIMIT 1
  3799. """, (member_id,))
  3800. father_row = cursor.fetchone()
  3801. father_name = father_row[0] if father_row else ''
  3802. cursor.execute("""
  3803. SELECT p.name, p.simplified_name
  3804. FROM family_relation_info r
  3805. JOIN family_member_info p ON r.parent_mid = p.id
  3806. WHERE r.child_mid = %s AND r.relation_type = 2
  3807. LIMIT 1
  3808. """, (member_id,))
  3809. spouse_row = cursor.fetchone()
  3810. spouse_name = spouse_row[0] if spouse_row else ''
  3811. generation = member['name_word_generation']
  3812. name = member['name']
  3813. traditional = f"{name},{father_name}之子" if father_name else name
  3814. if spouse_name:
  3815. traditional += f",配{spouse_name}"
  3816. if generation:
  3817. traditional = f"第{generation}世 " + traditional
  3818. simplified = convert_to_simplified(traditional)
  3819. source = "basic_info"
  3820. print(f"[Single Extract] Generated from basic info: {traditional}")
  3821. # 调试:打印最终结果
  3822. print(f"[Single Extract] Final result - traditional: '{traditional}', simplified: '{simplified}'")
  3823. # 写入数据库
  3824. if traditional and simplified:
  3825. with conn.cursor() as cursor:
  3826. cursor.execute("""
  3827. UPDATE family_member_info
  3828. SET genealogy_original_traditional = %s,
  3829. genealogy_original_simplified = %s
  3830. WHERE id = %s
  3831. """, (traditional, simplified, member_id))
  3832. conn.commit()
  3833. print(f"[Single Extract] Successfully saved to database")
  3834. return jsonify({
  3835. "success": True,
  3836. "member_id": member_id,
  3837. "name": member['name'],
  3838. "genealogy_traditional": traditional,
  3839. "genealogy_simplified": simplified,
  3840. "source": source
  3841. })
  3842. else:
  3843. return jsonify({
  3844. "success": False,
  3845. "member_id": member_id,
  3846. "message": "无法提取或生成族谱原文"
  3847. })
  3848. except Exception as e:
  3849. import traceback
  3850. print(f"[Single Extract] Error: {e}")
  3851. print(f"[Single Extract] Traceback: {traceback.format_exc()}")
  3852. return jsonify({
  3853. "success": False,
  3854. "member_id": member_id,
  3855. "message": str(e),
  3856. "error_type": type(e).__name__
  3857. })
  3858. finally:
  3859. conn.close()
  3860. @app.route('/manager/api/members/batch_resume_task', methods=['GET'])
  3861. def batch_resume_task():
  3862. """
  3863. 恢复因服务重启而中断的批量任务(GET,方便浏览器直接访问)。
  3864. 可选参数:?task_id=xxx 不传则自动找最近一条中断任务。
  3865. """
  3866. if 'user_id' not in session:
  3867. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3868. task_id = request.args.get('task_id')
  3869. conn = get_db_connection()
  3870. try:
  3871. with conn.cursor() as cursor:
  3872. if task_id:
  3873. cursor.execute("""
  3874. SELECT task_id, status, last_processed_id, total_count, completed_count, failed_count
  3875. FROM batch_genealogy_task
  3876. WHERE task_id = %s AND user_id = %s
  3877. """, (task_id, session['user_id']))
  3878. else:
  3879. # 找最近一条中断的任务
  3880. cursor.execute("""
  3881. SELECT task_id, status, last_processed_id, total_count, completed_count, failed_count
  3882. FROM batch_genealogy_task
  3883. WHERE user_id = %s AND status IN ('pending', 'processing', 'interrupted')
  3884. ORDER BY created_at DESC
  3885. LIMIT 1
  3886. """, (session['user_id'],))
  3887. task = cursor.fetchone()
  3888. if not task:
  3889. return jsonify({"success": False, "message": "未找到可恢复的任务"}), 404
  3890. task_id = task['task_id']
  3891. # 重新标记为 processing,准备恢复线程
  3892. with conn.cursor() as cursor:
  3893. cursor.execute("""
  3894. UPDATE batch_genealogy_task
  3895. SET status = 'processing'
  3896. WHERE task_id = %s
  3897. """, (task_id,))
  3898. conn.commit()
  3899. threading.Thread(
  3900. target=async_process_all_empty_genealogy,
  3901. args=(task_id, session['user_id']),
  3902. daemon=True
  3903. ).start()
  3904. return jsonify({
  3905. "success": True,
  3906. "task_id": task_id,
  3907. "message": f"任务已从断点恢复(已完成 {task['completed_count']},从 last_processed_id={task['last_processed_id']} 继续)",
  3908. "last_processed_id": task['last_processed_id'],
  3909. "completed_count": task['completed_count'],
  3910. "total_count": task['total_count'],
  3911. })
  3912. finally:
  3913. conn.close()
  3914. @app.route('/manager/api/members/batch_process_all_empty', methods=['GET'])
  3915. def batch_process_all_empty():
  3916. """简便批量处理接口:自动处理所有族谱原文为空的成员,支持断点续跑"""
  3917. if 'user_id' not in session:
  3918. return jsonify({"success": False, "message": "Unauthorized"}), 401
  3919. conn = get_db_connection()
  3920. try:
  3921. with conn.cursor() as cursor:
  3922. cursor.execute("""
  3923. SELECT COUNT(*) as count
  3924. FROM family_member_info
  3925. WHERE (genealogy_original_traditional IS NULL OR genealogy_original_traditional = '' OR genealogy_original_traditional = 'None')
  3926. AND (genealogy_original_simplified IS NULL OR genealogy_original_simplified = '' OR genealogy_original_simplified = 'None')
  3927. """)
  3928. result = cursor.fetchone()
  3929. total_empty = result['count'] if result else 0
  3930. cursor.execute("""
  3931. SELECT task_id, status, last_processed_id, total_count, completed_count, failed_count
  3932. FROM batch_genealogy_task
  3933. WHERE user_id = %s AND status IN ('pending', 'processing')
  3934. ORDER BY created_at DESC
  3935. LIMIT 1
  3936. """, (session['user_id'],))
  3937. running_task = cursor.fetchone()
  3938. if running_task:
  3939. return jsonify({
  3940. "success": False,
  3941. "message": "存在正在进行的任务,若服务已重启可调用 POST /manager/api/members/batch_resume_task 恢复",
  3942. "task_id": running_task['task_id'],
  3943. "status": running_task['status'],
  3944. "last_processed_id": running_task['last_processed_id'],
  3945. "completed_count": running_task['completed_count'],
  3946. "total_count": running_task['total_count'],
  3947. "resume_tip": "POST /manager/api/members/batch_resume_task body: {\"task_id\": \"" + running_task['task_id'] + "\"}"
  3948. })
  3949. task_id = str(uuid.uuid4())
  3950. with conn.cursor() as cursor:
  3951. cursor.execute("""
  3952. INSERT INTO batch_genealogy_task (task_id, user_id, status, total_count, last_processed_id)
  3953. VALUES (%s, %s, 'processing', %s, 0)
  3954. """, (task_id, session['user_id'], total_empty))
  3955. conn.commit()
  3956. threading.Thread(
  3957. target=async_process_all_empty_genealogy,
  3958. args=(task_id, session['user_id']),
  3959. daemon=True
  3960. ).start()
  3961. return jsonify({
  3962. "success": True,
  3963. "task_id": task_id,
  3964. "message": f"任务已创建,将处理 {total_empty} 个族谱原文为空的成员",
  3965. "total_count": total_empty
  3966. })
  3967. finally:
  3968. conn.close()
  3969. if __name__ == '__main__':
  3970. app.run(debug=False, port=5001)