|
@@ -547,9 +547,29 @@ def ensure_pdf_table():
|
|
|
oss_url TEXT NOT NULL,
|
|
oss_url TEXT NOT NULL,
|
|
|
description VARCHAR(500) DEFAULT '',
|
|
description VARCHAR(500) DEFAULT '',
|
|
|
upload_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
upload_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
- uploader VARCHAR(100) DEFAULT ''
|
|
|
|
|
|
|
+ uploader VARCHAR(100) DEFAULT '',
|
|
|
|
|
+ version_name VARCHAR(255) DEFAULT '',
|
|
|
|
|
+ version_source VARCHAR(255) DEFAULT '',
|
|
|
|
|
+ file_provider VARCHAR(100) DEFAULT '',
|
|
|
|
|
+ parse_status INT DEFAULT 0
|
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
|
|
""")
|
|
""")
|
|
|
|
|
+ # 检查是否存在parse_status字段,如果不存在则添加
|
|
|
|
|
+ cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'parse_status'")
|
|
|
|
|
+ if not cursor.fetchone():
|
|
|
|
|
+ cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN parse_status INT DEFAULT 0")
|
|
|
|
|
+ # 检查是否存在version_name字段,如果不存在则添加
|
|
|
|
|
+ cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_name'")
|
|
|
|
|
+ if not cursor.fetchone():
|
|
|
|
|
+ cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_name VARCHAR(255) DEFAULT ''")
|
|
|
|
|
+ # 检查是否存在version_source字段,如果不存在则添加
|
|
|
|
|
+ cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'version_source'")
|
|
|
|
|
+ if not cursor.fetchone():
|
|
|
|
|
+ cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN version_source VARCHAR(255) DEFAULT ''")
|
|
|
|
|
+ # 检查是否存在file_provider字段,如果不存在则添加
|
|
|
|
|
+ cursor.execute("SHOW COLUMNS FROM genealogy_pdfs LIKE 'file_provider'")
|
|
|
|
|
+ if not cursor.fetchone():
|
|
|
|
|
+ cursor.execute("ALTER TABLE genealogy_pdfs ADD COLUMN file_provider VARCHAR(100) DEFAULT ''")
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|
|
@@ -561,6 +581,7 @@ def pdf_management():
|
|
|
|
|
|
|
|
ensure_pdf_table()
|
|
ensure_pdf_table()
|
|
|
view_id = request.args.get('view', type=int)
|
|
view_id = request.args.get('view', type=int)
|
|
|
|
|
+ preview = request.args.get('preview', type=bool, default=False)
|
|
|
selected_pdf = None
|
|
selected_pdf = None
|
|
|
|
|
|
|
|
conn = get_db_connection()
|
|
conn = get_db_connection()
|
|
@@ -568,16 +589,139 @@ def pdf_management():
|
|
|
with conn.cursor() as cursor:
|
|
with conn.cursor() as cursor:
|
|
|
cursor.execute("SELECT * FROM genealogy_pdfs ORDER BY upload_time DESC")
|
|
cursor.execute("SELECT * FROM genealogy_pdfs ORDER BY upload_time DESC")
|
|
|
pdfs = cursor.fetchall()
|
|
pdfs = cursor.fetchall()
|
|
|
- if view_id:
|
|
|
|
|
|
|
+ if view_id and preview:
|
|
|
cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (view_id,))
|
|
cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (view_id,))
|
|
|
selected_pdf = cursor.fetchone()
|
|
selected_pdf = cursor.fetchone()
|
|
|
- elif pdfs:
|
|
|
|
|
- selected_pdf = pdfs[0]
|
|
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|
|
|
|
|
|
|
|
return render_template('pdf_management.html', pdfs=pdfs, selected_pdf=selected_pdf)
|
|
return render_template('pdf_management.html', pdfs=pdfs, selected_pdf=selected_pdf)
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@app.route('/manager/parse_pdf/<int:pdf_id>', methods=['POST'])
|
|
|
|
|
+def parse_pdf(pdf_id):
|
|
|
|
|
+ if 'user_id' not in session:
|
|
|
|
|
+ return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
|
|
+
|
|
|
|
|
+ # 标记PDF为解析中
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("UPDATE genealogy_pdfs SET parse_status = 1 WHERE id = %s", (pdf_id,))
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+ # 异步执行PDF解析
|
|
|
|
|
+ def parse_pdf_async():
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 获取PDF信息
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ pdf_info = None
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("SELECT * FROM genealogy_pdfs WHERE id = %s", (pdf_id,))
|
|
|
|
|
+ pdf_info = cursor.fetchone()
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+ if not pdf_info:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ # 下载PDF并拆分
|
|
|
|
|
+ pdf_url = pdf_info['oss_url']
|
|
|
|
|
+ response = requests.get(pdf_url)
|
|
|
|
|
+ response.raise_for_status()
|
|
|
|
|
+
|
|
|
|
|
+ # 保存临时PDF文件
|
|
|
|
|
+ temp_pdf_path = f"/tmp/{pdf_info['file_name']}"
|
|
|
|
|
+ with open(temp_pdf_path, 'wb') as f:
|
|
|
|
|
+ f.write(response.content)
|
|
|
|
|
+
|
|
|
|
|
+ # 使用PyMuPDF拆分PDF
|
|
|
|
|
+ doc = fitz.open(temp_pdf_path)
|
|
|
|
|
+ page_count = doc.page_count
|
|
|
|
|
+
|
|
|
|
|
+ # 获取当前最大页码
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ max_page = 0
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("SELECT MAX(page_number) as max_page FROM genealogy_records")
|
|
|
|
|
+ result = cursor.fetchone()
|
|
|
|
|
+ if result and result['max_page']:
|
|
|
|
|
+ max_page = result['max_page']
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+ # 逐页处理
|
|
|
|
|
+ for i in range(page_count):
|
|
|
|
|
+ page = doc[i]
|
|
|
|
|
+ pix = page.get_pixmap()
|
|
|
|
|
+ image_path = f"/tmp/{pdf_info['file_name']}_page_{i+1}.png"
|
|
|
|
|
+ pix.save(image_path)
|
|
|
|
|
+
|
|
|
|
|
+ # 上传图片到OSS
|
|
|
|
|
+ with open(image_path, 'rb') as f:
|
|
|
|
|
+ image_oss_url = upload_to_oss(f, f"{pdf_info['file_name']}_page_{i+1}.png")
|
|
|
|
|
+
|
|
|
|
|
+ # 保存到genealogy_records表
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("""
|
|
|
|
|
+ INSERT INTO genealogy_records
|
|
|
|
|
+ (file_name, oss_url, file_type, page_number, genealogy_version, genealogy_source, upload_person, upload_time)
|
|
|
|
|
+ VALUES (%s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP)
|
|
|
|
|
+ """, (
|
|
|
|
|
+ f"{pdf_info['file_name']}_page_{i+1}.png",
|
|
|
|
|
+ image_oss_url,
|
|
|
|
|
+ '图片',
|
|
|
|
|
+ max_page + i + 1,
|
|
|
|
|
+ pdf_info['version_name'],
|
|
|
|
|
+ pdf_info['version_source'],
|
|
|
|
|
+ pdf_info['file_provider']
|
|
|
|
|
+ ))
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+ # 删除临时图片文件
|
|
|
|
|
+ if os.path.exists(image_path):
|
|
|
|
|
+ os.remove(image_path)
|
|
|
|
|
+
|
|
|
|
|
+ # 删除临时PDF文件
|
|
|
|
|
+ if os.path.exists(temp_pdf_path):
|
|
|
|
|
+ os.remove(temp_pdf_path)
|
|
|
|
|
+
|
|
|
|
|
+ # 更新PDF解析状态为成功
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("UPDATE genealogy_pdfs SET parse_status = 2 WHERE id = %s", (pdf_id,))
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ # 更新PDF解析状态为失败
|
|
|
|
|
+ conn = get_db_connection()
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cursor:
|
|
|
|
|
+ cursor.execute("UPDATE genealogy_pdfs SET parse_status = 3 WHERE id = %s", (pdf_id,))
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+ print(f"PDF解析失败: {e}")
|
|
|
|
|
+
|
|
|
|
|
+ # 启动异步任务
|
|
|
|
|
+ thread = threading.Thread(target=parse_pdf_async)
|
|
|
|
|
+ thread.daemon = True
|
|
|
|
|
+ thread.start()
|
|
|
|
|
+
|
|
|
|
|
+ return jsonify({"success": True, "message": "PDF解析已开始,将在后台执行"})
|
|
|
|
|
+
|
|
|
@app.route('/manager/delete_pdf/<int:pdf_id>', methods=['POST'])
|
|
@app.route('/manager/delete_pdf/<int:pdf_id>', methods=['POST'])
|
|
|
def delete_pdf(pdf_id):
|
|
def delete_pdf(pdf_id):
|
|
|
if 'user_id' not in session:
|
|
if 'user_id' not in session:
|
|
@@ -1882,21 +2026,46 @@ def delete_upload(record_id):
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|
|
|
|
|
|
|
|
-@app.route('/manager/upload_pdf', methods=['POST'])
|
|
|
|
|
|
|
+@app.route('/manager/upload_pdf', methods=['GET', 'POST'])
|
|
|
def upload_pdf():
|
|
def upload_pdf():
|
|
|
if 'user_id' not in session:
|
|
if 'user_id' not in session:
|
|
|
- return jsonify({"success": False, "message": "Unauthorized"}), 401
|
|
|
|
|
|
|
+ return redirect(url_for('login'))
|
|
|
|
|
|
|
|
- if 'file' not in request.files:
|
|
|
|
|
- return jsonify({"success": False, "message": "未选择文件"}), 400
|
|
|
|
|
|
|
+ if request.method == 'GET':
|
|
|
|
|
+ return render_template('upload_pdf.html')
|
|
|
|
|
|
|
|
|
|
+ # POST请求处理
|
|
|
|
|
+ if 'file' not in request.files:
|
|
|
|
|
+ flash('请选择要上传的PDF文件')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
+
|
|
|
file = request.files['file']
|
|
file = request.files['file']
|
|
|
if file.filename == '':
|
|
if file.filename == '':
|
|
|
- return jsonify({"success": False, "message": "未选择文件"}), 400
|
|
|
|
|
-
|
|
|
|
|
|
|
+ flash('请选择要上传的PDF文件')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
+
|
|
|
|
|
+ # 检查文件类型
|
|
|
if not file.filename.lower().endswith('.pdf'):
|
|
if not file.filename.lower().endswith('.pdf'):
|
|
|
- return jsonify({"success": False, "message": "请上传PDF文件"}), 400
|
|
|
|
|
-
|
|
|
|
|
|
|
+ flash('只支持PDF文件上传')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
+
|
|
|
|
|
+ # 获取表单数据
|
|
|
|
|
+ version_name = request.form.get('version_name', '').strip()
|
|
|
|
|
+ version_source = request.form.get('version_source', '').strip()
|
|
|
|
|
+ file_provider = request.form.get('file_provider', '').strip()
|
|
|
|
|
+
|
|
|
|
|
+ # 验证必填字段
|
|
|
|
|
+ if not version_name:
|
|
|
|
|
+ flash('版本名称为必填项')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
+ if not version_source:
|
|
|
|
|
+ flash('版本来源为必填项')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
+
|
|
|
|
|
+ # 如果未提供文件提供人,使用当前登录用户
|
|
|
|
|
+ if not file_provider:
|
|
|
|
|
+ file_provider = session.get('user_id', '未知')
|
|
|
|
|
+
|
|
|
import uuid
|
|
import uuid
|
|
|
original_filename = file.filename
|
|
original_filename = file.filename
|
|
|
ext = os.path.splitext(original_filename)[1].lower()
|
|
ext = os.path.splitext(original_filename)[1].lower()
|
|
@@ -1917,33 +2086,24 @@ def upload_pdf():
|
|
|
# Upload to OSS
|
|
# Upload to OSS
|
|
|
oss_url = upload_to_oss(file_path, custom_filename=filename)
|
|
oss_url = upload_to_oss(file_path, custom_filename=filename)
|
|
|
if not oss_url:
|
|
if not oss_url:
|
|
|
- return jsonify({"success": False, "message": "文件上传失败"}), 500
|
|
|
|
|
-
|
|
|
|
|
- # Get form data
|
|
|
|
|
- uploader = request.form.get('uploader', session.get('username', ''))
|
|
|
|
|
- version_name = request.form.get('version_name', '')
|
|
|
|
|
- version_source = request.form.get('version_source', '')
|
|
|
|
|
- file_provider = request.form.get('file_provider', uploader)
|
|
|
|
|
|
|
+ flash('文件上传失败')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
|
|
|
|
|
# Save to database
|
|
# Save to database
|
|
|
conn = get_db_connection()
|
|
conn = get_db_connection()
|
|
|
try:
|
|
try:
|
|
|
with conn.cursor() as cursor:
|
|
with conn.cursor() as cursor:
|
|
|
cursor.execute(
|
|
cursor.execute(
|
|
|
- "INSERT INTO genealogy_pdfs (file_name, oss_url, uploader, version_name, version_source, file_provider) VALUES (%s, %s, %s, %s, %s, %s)",
|
|
|
|
|
- (original_filename, oss_url, uploader, version_name, version_source, file_provider)
|
|
|
|
|
|
|
+ "INSERT INTO genealogy_pdfs (file_name, oss_url, version_name, version_source, file_provider, upload_time) VALUES (%s, %s, %s, %s, %s, CURRENT_TIMESTAMP)",
|
|
|
|
|
+ (original_filename, oss_url, version_name, version_source, file_provider)
|
|
|
)
|
|
)
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
|
|
|
|
|
- # Start background processing for PDF pages
|
|
|
|
|
- threading.Thread(
|
|
|
|
|
- target=process_pdf_pages,
|
|
|
|
|
- args=(file_path, oss_url, uploader)
|
|
|
|
|
- ).start()
|
|
|
|
|
-
|
|
|
|
|
- return jsonify({"success": True, "message": "PDF文件上传成功,正在解析页面"})
|
|
|
|
|
|
|
+ flash('PDF文件上传成功')
|
|
|
|
|
+ return redirect(url_for('pdf_management'))
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- return jsonify({"success": False, "message": f"保存失败: {e}"}), 500
|
|
|
|
|
|
|
+ flash(f'保存失败: {e}')
|
|
|
|
|
+ return redirect(request.url)
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|
|
|
finally:
|
|
finally:
|