Hai Lin 2 周之前
父節點
當前提交
a2a4a5e3c2
共有 2 個文件被更改,包括 10 次插入1 次删除
  1. 7 0
      app.py
  2. 3 1
      duplicate_checker.py

+ 7 - 0
app.py

@@ -32,6 +32,13 @@ def check_duplicate():
     # 执行基于内容的查重
     result = checker.check_duplicate_by_content(question_data)
 
+    # 增加详细日志
+    top_score = result["top_similar"][0]["similarity"] if result.get("top_similar") else "N/A"
+    print(f"🔍 查重决策详情: status={result.get('status')}, "
+          f"is_duplicate={result.get('is_duplicate')}, "
+          f"max_score={top_score}, "
+          f"gpt_checked={result.get('gpt_checked', False)}")
+
     if result.get("status") == "error":
         return jsonify({"code": -1, "message": result.get("message")}), 500
 

+ 3 - 1
duplicate_checker.py

@@ -505,7 +505,7 @@ class QuestionDuplicateChecker:
 
     def sync_all_from_db(self, batch_size=50, max_workers=5):
         """同步数据库中所有题目到索引 (支持加权模式 + 批量 + 多线程)"""
-        print("正在进行全量同步 (优化版 - 加权模式)...")
+        print("🔄 开始全量同步 (优化版 - 加权模式)...")
         existing_ids = {m['id'] for m in self.metadata}
         try:
             conn = pymysql.connect(
@@ -518,9 +518,11 @@ class QuestionDuplicateChecker:
                 cursorclass=pymysql.cursors.DictCursor
             )
             with conn.cursor() as cursor:
+                print("📡 正在从数据库读取所有题目数据...")
                 sql = "SELECT id, stem, options, answer, solution FROM questions_tem"
                 cursor.execute(sql)
                 all_questions = cursor.fetchall()
+                print(f"📦 数据库加载完成,共计 {len(all_questions)} 条记录")
             
             new_questions = [q for q in all_questions if q['id'] not in existing_ids]
             total_new = len(new_questions)