app.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. from flask import Flask, request, jsonify
  2. from flask_cors import CORS
  3. from duplicate_checker import QuestionDuplicateChecker
  4. app = Flask(__name__)
  5. CORS(app) # 启用跨域支持
  6. # 初始化查重器(全局单例,避免重复加载索引)
  7. checker = QuestionDuplicateChecker()
  8. @app.route('/api/check_duplicate', methods=['POST'])
  9. def check_duplicate():
  10. """
  11. 题目查重 API 接口 (提交前预检模式)
  12. 参数: stem, options, answer, solution
  13. """
  14. data = request.get_json()
  15. print(f"📥 收到查重请求 (内容比对): {data}")
  16. if not data:
  17. return jsonify({"code": -1, "message": "Missing content"}), 400
  18. # 提取内容字段
  19. question_data = {
  20. "stem": data.get('stem', ''),
  21. "options": data.get('options', ''),
  22. "answer": data.get('answer', ''),
  23. "solution": data.get('solution', '')
  24. }
  25. if not question_data["stem"]:
  26. return jsonify({"code": -1, "message": "stem is required"}), 400
  27. # 确保索引已加载(多进程下避免空索引)
  28. checker.ensure_index_loaded()
  29. # 执行基于内容的查重
  30. result = checker.check_duplicate_by_content(question_data)
  31. # 增加详细日志
  32. top_score = result["top_similar"][0]["similarity"] if result.get("top_similar") else "N/A"
  33. print(f"🔍 查重决策详情: status={result.get('status')}, "
  34. f"is_duplicate={result.get('is_duplicate')}, "
  35. f"max_score={top_score}, "
  36. f"gpt_checked={result.get('gpt_checked', False)}")
  37. if result.get("status") == "error":
  38. return jsonify({"code": -1, "message": result.get("message")}), 500
  39. if result.get("is_duplicate"):
  40. item = result["top_similar"][0]
  41. gpt_info = " (经 GPT-4o 深度核验)" if result.get("gpt_checked") else ""
  42. return jsonify({
  43. "code": -1,
  44. "result": {
  45. "repeatIdList": [{
  46. "questionsId": item["id"],
  47. "repeatMsg": f"相似度: {item['similarity']}{gpt_info}。相似点: {item['similar_point']}"
  48. }]
  49. }
  50. })
  51. else:
  52. return jsonify({"code": 0, "result": "ok"})
  53. @app.route('/api/sync', methods=['POST'])
  54. def sync_index():
  55. """手动触发全量同步接口"""
  56. print("🔄 收到同步索引请求")
  57. try:
  58. checker.ensure_index_loaded()
  59. started = checker.sync_all_from_db()
  60. if started:
  61. return jsonify({"code": 0, "result": "Sync completed"})
  62. return jsonify({"code": 0, "result": "Sync already running"})
  63. except Exception as e:
  64. return jsonify({"code": -1, "message": str(e)}), 500
  65. @app.route('/api/confirm_repeat', methods=['POST'])
  66. def confirm_repeat():
  67. """
  68. 人工确认查重结果接口
  69. 参数: questionId, isRepeat (0: 无相似, 1: 有重复)
  70. """
  71. data = request.get_json()
  72. print(f"📥 收到确认结果请求: {data}")
  73. if not data:
  74. return jsonify({"code": -1, "message": "Missing JSON body"}), 400
  75. question_id = data.get('questionId')
  76. is_repeat = data.get('isRepeat')
  77. if question_id is None or is_repeat is None:
  78. return jsonify({"code": -1, "message": "Missing questionId or isRepeat"}), 400
  79. try:
  80. checker.ensure_index_loaded()
  81. success = checker.confirm_repeat(int(question_id), int(is_repeat))
  82. if success:
  83. return jsonify({"code": 0, "result": "ok"})
  84. else:
  85. return jsonify({"code": -1, "message": "Failed to update"}), 500
  86. except Exception as e:
  87. return jsonify({"code": -1, "message": str(e)}), 500
  88. @app.route('/api/question_info', methods=['GET'])
  89. def get_question_info():
  90. """
  91. 查询题目在向量库中的信息
  92. 参数: questionId
  93. """
  94. question_id = request.args.get('questionId')
  95. if not question_id:
  96. return jsonify({"code": -1, "message": "Missing questionId"}), 400
  97. try:
  98. checker.ensure_index_loaded()
  99. result = checker.get_question_data(int(question_id))
  100. return jsonify({
  101. "code": 0,
  102. "result": result
  103. })
  104. except ValueError:
  105. return jsonify({"code": -1, "message": "Invalid questionId format"}), 400
  106. except Exception as e:
  107. return jsonify({"code": -1, "message": str(e)}), 500
  108. @app.route('/api/index_info', methods=['GET'])
  109. def get_index_info():
  110. """查看当前索引文件路径及条数"""
  111. checker.ensure_index_loaded()
  112. index_count = int(checker.index.ntotal) if checker.index else 0
  113. return jsonify({
  114. "code": 0,
  115. "result": {
  116. "index_path": checker.index_path,
  117. "metadata_path": checker.metadata_path,
  118. "index_count": index_count,
  119. "metadata_count": len(checker.metadata)
  120. }
  121. })
  122. if __name__ == '__main__':
  123. # 启动服务,默认 5000 端口
  124. app.run(host='0.0.0.0', port=8888, debug=False)