test_similarity.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import json
  2. import numpy as np
  3. from duplicate_checker import QuestionDuplicateChecker
  4. checker = QuestionDuplicateChecker()
  5. user_input = {
  6. 'stem': '如图,两个同心圆中,大圆的半径为$5$,小圆的半径为$3$,若大圆的弦$AB$与小圆有公共点,则$AB$的取值范围是( )',
  7. 'options': '{"A": "$8\\le AB\\le10$", "B": "$8<AB\\le10$", "C": "$4\\le AB\\le5$", "D": "$4<AB\\le5$"}',
  8. 'answer': 'A',
  9. 'solution': '如图,过$O$点作$OC\\perp AB$于$C$,连接$OA$,则$AC=BC$。当$AB$与小圆相切时,大圆的弦$AB$与小圆有唯一公共点,$OC$取最大值$3$,此时$AC$取最小值,为$\\sqrt{5^2-3^2}=4$,∴弦$AB$的最小值为$2\\times4=8$;当点$C$与$O$重合时,$AB$的值最大,$AB$为大圆的直径,即$AB$的最大值为$10$,∴$AB$的取值范围是$8\\le AB\\le10$。<image src="https://file.chunsunqiuzhu.com/data/2026/01/17/20260117180759A644.png"/>'
  10. }
  11. # Fetch ID 3094
  12. q3094 = checker.fetch_question_from_db(3094)
  13. if not q3094:
  14. print("Question 3094 not found")
  15. exit()
  16. print(f"Comparing user input with ID 3094\n")
  17. # Calculate embeddings for each part to see where it drops
  18. parts = ['stem', 'options', 'answer', 'solution']
  19. for p in parts:
  20. u_text = user_input.get(p, '')
  21. db_text = str(q3094.get(p, '') or '')
  22. u_emb = checker.get_embedding(u_text)
  23. db_emb = checker.get_embedding(db_text)
  24. if u_emb is not None and db_emb is not None:
  25. # Normalize
  26. u_emb = u_emb / np.linalg.norm(u_emb)
  27. db_emb = db_emb / np.linalg.norm(db_emb)
  28. sim = np.dot(u_emb, db_emb)
  29. print(f"Part {p} similarity: {sim:.4f}")
  30. else:
  31. print(f"Part {p} failed to get embedding")
  32. mega_u = checker.get_weighted_embedding(user_input)
  33. mega_db = checker.get_weighted_embedding(q3094)
  34. if mega_u is not None and mega_db is not None:
  35. overall_sim = np.dot(mega_u, mega_db)
  36. print(f"\nOverall weighted similarity: {overall_sim:.4f}")
  37. else:
  38. print("\nMega vector calculation failed")