get_all_exchange_words.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # -*- coding: utf-8 -*-
  2. from tools.new_mysql import MySQLUploader
  3. import sys
  4. import os
  5. sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  6. m = MySQLUploader()
  7. s = "select Word,InflectedWordSpelling,Properties from dictionary_exchange"
  8. r = m.query_data(s)
  9. m.close_connection()
  10. all_exchange_words = set()
  11. all_exchange_words_dict = {}
  12. all_prototype_deformation_dict = {}
  13. prototype_deformation_dict2 = {}
  14. for i in r:
  15. prototype,deformation,properties= [i[0],i[1],i[2]]
  16. all_exchange_words.update({prototype,deformation})
  17. if properties == "原型":
  18. prototype_deformation_dict2[prototype] = deformation
  19. if deformation not in all_prototype_deformation_dict:
  20. all_prototype_deformation_dict[deformation] = prototype
  21. if prototype not in all_exchange_words_dict:
  22. all_exchange_words_dict[prototype] = [deformation]
  23. if deformation not in all_exchange_words_dict[prototype]:
  24. all_exchange_words_dict[prototype].append(deformation)
  25. def word_to_prototype(word:str) -> str:
  26. """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
  27. if word in all_exchange_words_dict:
  28. return word
  29. elif word.lower() in all_exchange_words_dict:
  30. return word.lower()
  31. elif word in all_prototype_deformation_dict:
  32. w = all_prototype_deformation_dict[word]
  33. if w in prototype_deformation_dict2:
  34. w = prototype_deformation_dict2[w]
  35. return w
  36. else:
  37. return word
  38. def get_word_exchange_list(word) -> list:
  39. prototype_word = word_to_prototype(word)
  40. all_exchange_words_list = all_exchange_words_dict.get(prototype_word,[])
  41. return all_exchange_words_list
  42. if __name__ == '__main__':
  43. print(word_to_prototype("was"))
  44. print(word_to_prototype("made"))