gpt_check.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. # -*- coding:utf-8 -*-
  2. """
  3. 校验模块
  4. GPT回复的各个校验模块"""
  5. import json
  6. import re
  7. from typing import List
  8. from enum import Enum
  9. from pydantic import BaseModel,ValidationError
  10. from tools.loglog import log_err_e
  11. class CheckGptAnswer:
  12. @staticmethod
  13. def default_no_check(gpt_text: str):
  14. return True
  15. @staticmethod
  16. def score_value(gpt_text: str):
  17. if gpt_text.count("【取值0】") > 1:
  18. return False
  19. return True if re.findall("【取值.+?】", gpt_text) else False
  20. @staticmethod
  21. def original_modify(gpt_text: str):
  22. split_text = gpt_text.split("\n")
  23. for t in split_text:
  24. if "修改理由" in t and "错误" in t and len(t)<=25:
  25. return False
  26. elif "没有严重的语法错误" in t:
  27. return False
  28. if "【原句】" in gpt_text and "【修改后】" in gpt_text:
  29. return True
  30. else:
  31. return False
  32. @staticmethod
  33. def count_chinese_characters_50(s: str):
  34. chinese_count = 0
  35. for char in s:
  36. if '\u4e00' <= char <= '\u9fff':
  37. chinese_count += 1
  38. return True if s and chinese_count/len(s) >= 0.5 else False
  39. @staticmethod
  40. def count_english_count_30(s: str,english_words_count=30):
  41. words_count = len(re.findall(r"[a-zA-Z\']+",s))
  42. return True if words_count >= english_words_count else False
  43. @staticmethod
  44. def count_letter_percentages(s:str,letter_percentages=0.8):
  45. count_letter=0
  46. total_length = len(s)
  47. for char in s:
  48. if char.isalpha():
  49. count_letter += 1
  50. result = True if round(count_letter/total_length,2)>letter_percentages else False
  51. return result
  52. class CheckArticleResult:
  53. @staticmethod
  54. def default_no_check(gpt_text: str):
  55. return True
  56. @staticmethod
  57. def get_article_1(gpt_text: str):
  58. try:
  59. json_object = json.loads(gpt_text)
  60. except json.decoder.JSONDecodeError:
  61. return False
  62. if not all(i in json_object for i in ["englishArticle","chineseArticle","difficultSentences","usedMeanIds","questions"]):
  63. return False
  64. try:
  65. english_article = json_object['englishArticle']
  66. words_count_pct = len(re.findall(r"[^\u4e00-\u9fff]", english_article)) / len(english_article)
  67. if words_count_pct < 0.15:
  68. return False
  69. chinese_article = json_object['chineseArticle']
  70. words_count_pct = len(re.findall(r"[^\u4e00-\u9fff]", chinese_article)) / len(chinese_article)
  71. if words_count_pct > 0.85:
  72. return False
  73. analysis = json_object['questions'][0]['analysis']
  74. words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis))/len(analysis)
  75. if words_count_pct>0.5:
  76. return False
  77. except Exception as e:
  78. log_err_e(e,"get_article_1函数校验")
  79. return False
  80. return True
  81. class IsRight(Enum):
  82. RIGHT = 1
  83. WRONG = 0
  84. class Options(Enum):
  85. A = "A"
  86. B = "B"
  87. C = "C"
  88. D = "D"
  89. class DifficultSentence(BaseModel):
  90. english: str
  91. chinese: str
  92. class Candidate(BaseModel):
  93. label: Options
  94. text: str
  95. isRight: IsRight
  96. class Question(BaseModel):
  97. trunk: str
  98. analysis: str
  99. candidates: List[Candidate]
  100. class Article(BaseModel):
  101. difficultSentences: List[DifficultSentence]
  102. usedMeanIds: List[int]
  103. questions: List[Question]
  104. englishArticle: str
  105. chineseArticle: str
  106. class Annotation(BaseModel):
  107. annotation_text:str
  108. if __name__ == '__main__':
  109. text = """{
  110. "difficultSentences": [
  111. {
  112. "english": "However, even on his sick days, James carries a spirit of courage and never lets himself rest for too long.",
  113. "chinese": "然而,即使在他生病的日子里,詹姆斯仍带着勇气,从未让自己放松太久。"
  114. }
  115. ],
  116. "usedMeanIds": [749, 1945, 1597, 1953, 2038, 2162, 1625],
  117. "englishArticle": "Overcoming Illness with Courage\n\nJames has always been an active person. Whether it is running in the park, playing basketball, or hiking in the mountains, he never likes to rest. However, last week, he suddenly felt a horrible headache. Alongside the headache, he experienced toothache and fever. The illness made him sick and he had to lie in bed. Not only was he unable to move around freely, but he also had to place an ice pack on his knee and neck to ease the pain.\n\nHis family was worried and thought it would be a serious matter. His mother sat beside him, asking if he needed a break or any help. James, showing his spirit of courage, assured her that he would recover soon. He understood that even when life presents challenges, they must be faced with bravery.\n\nHowever, even on his sick days, James carries a spirit of courage and never lets himself rest for too long. With each passing day, he begins to feel a bit better. The fever decreases, and his headaches become less frequent. Soon, he looks forward to the day he can return to his active lifestyle, encouraged by the fact that he fought his illness with bravery and determination.",
  118. "chineseArticle": "勇敢地战胜疾病\n\n詹姆斯一直是个活跃的人。无论是在公园跑步,打篮球,还是爬山,他从不喜欢放松。然而,上周他突然感到头痛欲裂。除了头痛外,他还感到牙痛和发烧。这场病让他生病了,他不得不躺在床上。他不仅无法自由活动,还得在膝盖和颈部放冰袋来缓解疼痛。\n\n他的家人很担心,觉得这会是个严重的问题。他的母亲坐在他旁边,询问他是否需要间歇或者帮忙。詹姆斯展现了他的勇气,向她保证他会很快康复。他明白,即使生活给出挑战,也必须勇敢面对。\n\n然而,即使在他生病的日子里,詹姆斯仍带着勇气,从未让自己放松太久。随着每一天的过去,他开始感觉好一点了。发烧开始下降,头痛也减少了。他期待着能回到他积极的生活方式,鼓舞于他凭借勇气和决心战胜了疾病。",
  119. "questions": [
  120. {
  121. "trunk": "What activities does James enjoy?",
  122. "analysis": "根据文章,詹姆斯喜欢活跃的生活方式,比如跑步、打篮球和登山。",
  123. "candidates": [
  124. {
  125. "label": "A",
  126. "text": "Swimming",
  127. "isRight": 0
  128. },
  129. {
  130. "label": "B",
  131. "text": "Running, basketball, and hiking",
  132. "isRight": 1
  133. },
  134. {
  135. "label": "C",
  136. "text": "Reading books",
  137. "isRight": 0
  138. },
  139. {
  140. "label": "D",
  141. "text": "Painting",
  142. "isRight": 0
  143. }
  144. ]
  145. },
  146. {
  147. "trunk": "How did James's family react to his illness?",
  148. "analysis": "文章中提到,詹姆斯的家人担心他的健康情况。",
  149. "candidates": [
  150. {
  151. "label": "A",
  152. "text": "They were indifferent",
  153. "isRight": 0
  154. },
  155. {
  156. "label": "B",
  157. "text": "They were worried",
  158. "isRight": 1
  159. },
  160. {
  161. "label": "C",
  162. "text": "They laughed",
  163. "isRight": 0
  164. },
  165. {
  166. "label": "D",
  167. "text": "They scolded him",
  168. "isRight": 0
  169. }
  170. ]
  171. },
  172. {
  173. "trunk": "What symptoms did James experience?",
  174. "analysis": "根据文章,詹姆斯有头痛、牙痛和发烧等症状。",
  175. "candidates": [
  176. {
  177. "label": "A",
  178. "text": "Coughing and sneezing",
  179. "isRight": 0
  180. },
  181. {
  182. "label": "B",
  183. "text": "Headache, toothache, and fever",
  184. "isRight": 1
  185. },
  186. {
  187. "label": "C",
  188. "text": "Stomach ache",
  189. "isRight": 0
  190. },
  191. {
  192. "label": "D",
  193. "text": "Sore throat",
  194. "isRight": 0
  195. }
  196. ]
  197. },
  198. {
  199. "trunk": "How did James approach his recovery?",
  200. "analysis": "詹姆斯通过保持勇气和决心来对待他的康复,并没有让自己放松太久。",
  201. "candidates": [
  202. {
  203. "label": "A",
  204. "text": "With defeat",
  205. "isRight": 0
  206. },
  207. {
  208. "label": "B",
  209. "text": "With carelessness",
  210. "isRight": 0
  211. },
  212. {
  213. "label": "C",
  214. "text": "With courage and determination",
  215. "isRight": 1
  216. },
  217. {
  218. "label": "D",
  219. "text": "With anger",
  220. "isRight": 0
  221. }
  222. ]
  223. }
  224. ]
  225. }"""
  226. text2 = """{\n "englishArticle": "Simple Agreement in the Office\nSarah is a bright girl. She works as a clerk in a busy office. One day, her boss asked her to prepare a forecast about migration for the company. They had made an agreement to share their ideas every Friday.\nSarah worked hard and talked with other clerks. She used numbers and information to make her forecast. On Friday, Sarah and her friends joined a meeting room. They shared their agreements and ideas with the boss. The boss was happy because Sarah’s forecast was simple and easy to understand. All the clerks learned something new that day.",\n "chineseArticle": "办公室里的简单协议\n莎拉是个聪明的女孩。她在一家忙碌的办公室做职员。有一天,她的老板让她为公司准备一个有关迁移的预报。他们已经达成协议,每周五分享彼此的想法。\n莎拉很努力地工作,还与其他店员交流。她用数据和信息来做预测。周五,莎拉和她的朋友们一起进入会议室。他们和老板分享了自己的协议和想法。老板很开心,因为莎拉的预报简单易懂。那一天,所有的职员都学到了新东西。",\n "difficultSentences": [\n {\n "english": "They had made an agreement to share their ideas every Friday.",\n "chinese": "他们已经达成协议,每周五分享彼此的想法。"\n }\n ],\n "usedMeanIds": [1743, 2495, 5069, 1826, 1451],\n "questions": [\n {\n "trunk": "Who is the bright girl in the office?",\n "analysis": "由文章一开始介绍可知,莎拉是那个聪明的女孩,注意这考察对文章角色的理解。",\n "candidates": [\n {"label":"A", "text":"Sarah", "isRight":1},\n {"label":"B", "text":"The boss", "isRight":0},\n {"label":"C", "text":"The cook", "isRight":0},\n {"label":"D", "text":"John", "isRight":0}\n ]\n },\n {\n "trunk": "What did Sarah's boss ask her to prepare?",\n "analysis": "文章第二句明确提到老板让莎拉准备关于迁移的预报,考查文中信息定位。",\n "candidates": [\n {"label":"A", "text":"A forecast about migration", "isRight":1},\n {"label":"B", "text":"A birthday party", "isRight":0},\n {"label":"C", "text":"A sports game", "isRight":0},\n {"label":"D", "text":"A song", "isRight":0}\n ]\n },\n {\n "trunk": "When do Sarah and her friends share their ideas?",\n "analysis": "文章有一句话提到,他们每周五分享想法,此题考察细节记忆。",\n "candidates": [\n {"label":"A", "text":"Every Friday", "isRight":1},\n {"label":"B", "text":"Every Monday", "isRight":0},\n {"label":"C", "text":"Every Sunday", "isRight":0},\n {"label":"D", "text":"Every morning", "isRight":0}\n ]\n },\n {\n "trunk": "Why was the boss happy with Sarah's forecast?",\n "analysis": "文章最后一句说明老板因为莎拉的预报简单易懂而开心,此题考查对原因的理解。",\n "candidates": [\n {"label":"A", "text":"It was simple and easy to understand", "isRight":1},\n {"label":"B", "text":"It was very long", "isRight":0},\n {"label":"C", "text":"It was colorful", "isRight":0},\n {"label":"D", "text":"It had pictures", "isRight":0}\n ]\n }\n ]\n}"""
  227. json_text = json.loads(repr(text2))