get_all_exchange_words.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # -*- coding: utf-8 -*-
  2. from tools.new_mysql import MySQLUploader
  3. import sys
  4. import os
  5. sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  6. m = MySQLUploader()
  7. s = "select Word,InflectedWordSpelling,Properties from dictionary_exchange"
  8. r = m.query_data(s)
  9. all_exchange_words = set()
  10. all_exchange_words_dict = {}
  11. all_prototype_deformation_dict = {}
  12. prototype_deformation_dict2 = {}
  13. for i in r:
  14. prototype,deformation,properties= [i[0],i[1],i[2]]
  15. all_exchange_words.update({prototype,deformation})
  16. if properties == "原型":
  17. prototype_deformation_dict2[prototype] = deformation
  18. if deformation not in all_prototype_deformation_dict:
  19. all_prototype_deformation_dict[deformation] = prototype
  20. if prototype not in all_exchange_words_dict:
  21. all_exchange_words_dict[prototype] = [deformation]
  22. if deformation not in all_exchange_words_dict[prototype]:
  23. all_exchange_words_dict[prototype].append(deformation)
  24. def word_to_prototype(word:str) -> str:
  25. """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
  26. if word in all_exchange_words_dict:
  27. return word
  28. elif word.lower() in all_exchange_words_dict:
  29. return word.lower()
  30. elif word in all_prototype_deformation_dict:
  31. w = all_prototype_deformation_dict[word]
  32. if w in prototype_deformation_dict2:
  33. w = prototype_deformation_dict2[w]
  34. return w
  35. else:
  36. return word
  37. def get_word_exchange_list(word) -> list:
  38. prototype_word = word_to_prototype(word)
  39. all_exchange_words_list = all_exchange_words_dict.get(prototype_word,[])
  40. return all_exchange_words_list
  41. if __name__ == '__main__':
  42. print(word_to_prototype("was"))
  43. print(word_to_prototype("made"))