get_article.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. # -*- coding: utf-8 -*-
  2. import random
  3. from gpt.chatgpt import get_answer_from_gpt
  4. from tools.new_mysql import MySQLUploader
  5. from tools.loglog import logger
  6. from tools.thread_pool_manager import pool_executor
  7. from common.common_data import all_exchange_words
  8. from common.split_text import *
  9. from data.get_all_exchange_words import get_word_exchange_list,word_to_prototype
  10. import requests
  11. import oss2
  12. from oss2.credentials import EnvironmentVariableCredentialsProvider
  13. from collections import OrderedDict
  14. from cachetools import TTLCache
  15. from concurrent.futures import Future, wait
  16. from random import randint
  17. import re
  18. import json
  19. import time
  20. import traceback
  21. class OtherBaseFunction:
  22. def __init__(self):
  23. self.m = MySQLUploader()
  24. self.fake_meaningid = {}
  25. self.callback_url_dict = {}
  26. self.real_ip_dict = {}
  27. self.demo_name = {}
  28. self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400)
  29. self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400)
  30. @staticmethod
  31. def _diffculty_control(student_stage, vocabulary) -> dict:
  32. """
  33. 根据学生学段或其词汇量,给与不同的难度控制
  34. :param student_stage: 学生学段,123,小学初中高中
  35. :param vocabulary: 学生的词汇量,1200小学,2400初中,4800高中
  36. :return:
  37. """
  38. if vocabulary <= 1200:
  39. difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1,"student_stage_str":"小学",
  40. "pragrapg_count": "生成的文章要求100词左右,三个段落以上。允许有简单句式的出现。"}
  41. elif 1200 < vocabulary <= 2400:
  42. difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3,"student_stage_str":"初中",
  43. "pragrapg_count": r"生成的文章要求150词左右,三个段落以上。用\n\n分段。"}
  44. else:
  45. difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5,"student_stage_str":"高中",
  46. "pragrapg_count": r"生成的文章要求250词左右,允许有3-5个段落。用\n\n分段。"}
  47. return difficult_control
  48. def _get_article_chinese_dict(self, title, r_article_sentences, task_id):
  49. """
  50. 获取文章的中文翻译。注意:这里切割的方法要与后面的split_article_make_json一致
  51. :param title: 标题
  52. :param r_article_sentences: 通过生词检验的文章句子列表
  53. :return:
  54. """
  55. def get_chinese_from_gpt(whole_article_sentences: list):
  56. q = f"""你是一名在中国的英语教师,下面我会为你提供一个英语句子的列表,请按列表顺序将每个句子翻译成中文,结果按列表顺序放在chinese为键的json数组内。
  57. 英语句子列表:{whole_article_sentences}
  58. 要求:
  59. 1.中文翻译的结果要按列表的顺序,依次放入sentence数组。回复的中文数量要与英语句子列表的数量一样,不要漏下。
  60. 2.回复json,格式:{{"chinese":[sentence1,sentence2...]}}
  61. """
  62. real_ip = self.real_ip_dict[task_id]
  63. demo_name = self.demo_name[task_id]
  64. for cou in range(3):
  65. try:
  66. r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
  67. r_article_chinese_list = r_json.get("chinese")
  68. if len(r_article_chinese_list) == len(whole_article_sentences):
  69. r_article_chinese_dict = {k: str(v) for k, v in zip(whole_article_sentences, r_article_chinese_list)}
  70. return r_article_chinese_dict
  71. logger.warning(f"警告:第{cou + 1}次,中文翻译与原句数量不一致")
  72. except json.decoder.JSONDecodeError:
  73. logger.error("gpt生成文章中文翻译,回复json格式化错误")
  74. except Exception as e:
  75. logger.error(f"gpt生成文章中文翻译回复其他错误.{type(e).__name__} {e}")
  76. logger.critical("严重错误:gpt生成文章中文翻译三次全错,请管理员检查")
  77. article_list = [title + "\n\n"] + r_article_sentences
  78. r_article_chinese_dict = get_chinese_from_gpt(whole_article_sentences=article_list)
  79. if r_article_chinese_dict:
  80. return r_article_chinese_dict
  81. @staticmethod
  82. def _calculate_new_word_rate(r_article_sentences):
  83. article = "".join(r_article_sentences)
  84. new_words = set()
  85. test_article = re.findall(r'\b\w+\'?\w*\b', article)
  86. for word in test_article:
  87. word2: str = word.split("'")[0] if "'" in word else word
  88. if len(word) <= 2:
  89. continue
  90. is_in_12000words = any([word2.lower() in all_exchange_words, word2.title() in all_exchange_words])
  91. if not is_in_12000words:
  92. new_words.add(word)
  93. new_word_rate = round(len(new_words) / len(article), 3)
  94. logger.info(f"开发调试生词率{new_word_rate}.生词{new_words}")
  95. new_words = list(new_words)
  96. return new_word_rate, new_words
  97. def insert_article_to_mysql(self, title, article, chinese, task_id, code=0):
  98. self.m.execute_("INSERT INTO new_word_article (title,article,chinese, taskId,code) VALUES (%s, %s,%s,%s,%s)",
  99. (title, article, chinese, task_id, code))
  100. def get_wordid_by_wordspelling(self, wordspelling:str):
  101. """加一个功能。大字典内没有这个单词就自动插入,返回id"""
  102. if wordspelling in self.query_cache_meaningid:
  103. return self.query_cache_wordspelling[wordspelling]
  104. s = "select Id from dictionary_word where wordspelling = %s"
  105. prototype_word = word_to_prototype(wordspelling)
  106. r = self.m.query_data(s, (prototype_word,))
  107. if r:
  108. wordid = r[0][0]
  109. else:
  110. wordid = 0
  111. self.query_cache_wordspelling[wordspelling] = wordid
  112. return wordid
  113. def get_meaning_by_meaningid(self, meaningid:int):
  114. """加一个功能。大字典内没有这个单词就自动插入,返回id"""
  115. if meaningid in self.query_cache_meaningid:
  116. return self.query_cache_meaningid[meaningid]
  117. s = "select WordMeaning from dictionary_meaningitem where Id = %s"
  118. r = self.m.query_data(s, (meaningid,))
  119. meaning = r[0][0] if r else ""
  120. self.query_cache_meaningid[meaningid] = meaning
  121. return meaning
  122. def _get_fake_meaningid(self,word):
  123. """获得假词义id。但是保证同一个单词是一个id"""
  124. if word in self.fake_meaningid:
  125. return self.fake_meaningid[word]
  126. s = "select Id from dictionary_meaningitem where WordSpelling = %s"
  127. r = self.m.query_data(s, (word,))
  128. if r:
  129. fake_meaningid = r[0][0]
  130. else:
  131. fake_meaningid = random.randint(10000,99999)
  132. self.fake_meaningid[word] = fake_meaningid
  133. return fake_meaningid
  134. @staticmethod
  135. def _clean_gpt_res(single_sentence: str, gpt_text: str,split_words:list) -> list:
  136. """# 解析成 键是句子+单词拼写,值是词义id"""
  137. return_data = []
  138. if not gpt_text:
  139. return []
  140. row_data = [i for i in gpt_text.split("\n") if "**" in i]
  141. already_spelling = set()
  142. for row in row_data:
  143. one_row_data_list = row.split("**")
  144. if len(one_row_data_list) < 1:
  145. continue
  146. one_row_data_list = [i.strip() for i in one_row_data_list]
  147. spelling, meaning_id = one_row_data_list[0:2]
  148. already_spelling.add(spelling)
  149. return_data.append([single_sentence, spelling, int(meaning_id)])
  150. for remaining_word in set(split_words).difference(already_spelling):
  151. return_data.append([single_sentence, remaining_word, 0])
  152. return return_data
  153. class GetArticle(OtherBaseFunction):
  154. def __init__(self):
  155. super().__init__()
  156. self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
  157. self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', 'qingti-private')
  158. self.article_result = {}
  159. self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
  160. all_exchange_words.update(self.punctuation)
  161. def __del__(self):...
  162. def submit_task(self, words_meaning_ids: list[int],callback_url:str,real_ip:str,demo_name:str,
  163. student_stage:int,vocabulary:int,class_id:int):
  164. """
  165. words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
  166. callback_url: 通知的回调地址
  167. demo_name: 项目名称
  168. student_stage: 学生学段,123
  169. vocabulary: 学生词汇量500
  170. """
  171. task_id = randint(10000000, 99999999)
  172. logger.info(f"生成文章id。task_id:{task_id}。词义id:{words_meaning_ids}.")
  173. self.callback_url_dict[task_id] = callback_url
  174. self.real_ip_dict[task_id] = real_ip
  175. self.demo_name[task_id] = demo_name
  176. words_meaning_str = ""
  177. for wordmeaning_id in words_meaning_ids:
  178. r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s",(wordmeaning_id,))
  179. try:
  180. words_meaning_str += str(r[0])
  181. except IndexError:
  182. err_msg = f"文章生成任务提交失败。task_id:{task_id},词义表内没有这个词义id:{wordmeaning_id}"
  183. logger.error(err_msg)
  184. return err_msg
  185. try:
  186. pool_executor.submit(self.run_task, words_meaning_str, task_id,student_stage,vocabulary,class_id)
  187. resp_result = {"id":task_id,"key":f"study/article/{task_id}"}
  188. logger.success(f"文章生成任务提交成功:{resp_result}")
  189. return resp_result
  190. except Exception as e:
  191. err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
  192. logger.error(err_msg)
  193. return err_msg
  194. def __get_article(self,words_meaning_str,task_id,student_stage,vocabulary) -> tuple:
  195. dc = self._diffculty_control(student_stage,vocabulary)
  196. q = f"""你是一名在中国的英语教师,下面我会为你提供一些带中文词义的英语种子单词,请根据这些种子单词的词义,生成一篇带标题的英语文章。
  197. 提供种子单词:{words_meaning_str}
  198. 要求:
  199. 1.必须用提供的这个词义的单词,文章的其他单词使用{dc["difficult_desc"]}单词。
  200. 2.文章应以自然、母语水平的英语撰写。请仅使用与种子单词难度相同或更简单的词汇,避免使用更高级的词汇和复杂的句子结构。请使用常用的高频英语词汇,避免使用不常见或专业的词汇。种子单词可以在文章中任意位置出现,不限制顺序。
  201. 2.{dc["paragraph_count"]},为确保词汇难度符合要求,请仅使用 **中国教育部{dc['student_stage_str']}英语词汇表** 中的单词。
  202. 3.请将文章返回一个一个带标点的句子,放在article_sentences里面的数组里。如果有分段,必须请在句子后面加\\n\\n。
  203. 4.回复json,格式:{{"title":标题,"article_sentences":[句子1,句子2]}}
  204. """
  205. try:
  206. real_ip = self.real_ip_dict[task_id]
  207. demo_name = self.demo_name[task_id]
  208. r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
  209. r_article_sentences = r_json.get("article_sentences")
  210. r_title = r_json.get("title")
  211. return r_title,r_article_sentences
  212. except json.decoder.JSONDecodeError:
  213. logger.error("gpt生成文章回复json格式化错误")
  214. except Exception as e:
  215. logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
  216. def __replace_new_word(self, old_article: str, new_words: list,task_id:int):
  217. new_words_str = ",".join(new_words)
  218. q = f"""你是一名在中国的英语教师,下面我会为你提供一篇英语文章和一些生词,请用其他单词使用简单、常见、难度低的单词将英语文章中的生词进行替换。
  219. 缩写引号用单引号'。最终回复替换后的英语文章。
  220. 英语文章:{old_article}
  221. 生词:{new_words_str}
  222. 要求:
  223. 1.替换掉所有生词,替换单词使用简单、常见、难度低的单词。
  224. 2.生成的文章要求150词左右,可以分段。
  225. 3.回复json,格式:{{"title":标题,"article":英语文章}}
  226. """
  227. try:
  228. real_ip = self.real_ip_dict[task_id]
  229. demo_name = self.demo_name[task_id]
  230. r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
  231. print(f"调试信息2 {r_json}")
  232. r_article = r_json.get("article")
  233. r_title = r_json.get("title")
  234. return r_title, r_article
  235. except json.decoder.JSONDecodeError:
  236. logger.error("gpt替换生词文章回复json格式化错误")
  237. except Exception as e:
  238. logger.error(f"gpt替换生词文章回复其他错误.{type(e).__name__} {e}")
  239. def run_get_article_task(self, words_meaning_str, task_id,student_stage,vocabulary) -> tuple:
  240. """
  241. :param vocabulary:
  242. :param student_stage:
  243. :param words_meaning_str: 数据库内查出来的单词和词义的拼接字符串
  244. :param task_id: 文章任务id
  245. :return: 标题,文章,句子翻译的字典
  246. """
  247. def get_article_chinese(title,r_article_sentences,task_id,code=0)-> tuple:
  248. r_article_chinese_dict = self._get_article_chinese_dict(title, r_article_sentences, task_id)
  249. chinese_str = "\n".join(r_article_chinese_dict.values())
  250. r_article = "".join(r_article_sentences)
  251. self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id,code=code)
  252. return r_title, r_article_sentences, r_article_chinese_dict
  253. r_title,r_article_sentences = self.__get_article(words_meaning_str,task_id,student_stage,vocabulary)
  254. new_word_rate, new_words = self._calculate_new_word_rate(r_article_sentences)
  255. if new_word_rate < 0.03:
  256. return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
  257. replace_article_gpt = "".join(r_article_sentences)
  258. for i in range(3):
  259. if tuple_data:=self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words,task_id=task_id):
  260. r_title,replace_article_gpt = tuple_data
  261. new_word_rate, new_words = self._calculate_new_word_rate(replace_article_gpt)
  262. if new_word_rate < 0.03 or i == 2:
  263. if i == 2:
  264. logger.warning(f"3次后生词率未到3%以下。task_id:{task_id}")
  265. return get_article_chinese(title=r_title,r_article_sentences=r_article_sentences,task_id=task_id)
  266. def split_article_make_json(self, task_id: int,title:str, r_article_sentences: list,r_article_chinese_dict:dict):
  267. article = "".join(r_article_sentences)
  268. article = title + "\n\n" + article
  269. all_sentence_word_meaningid_dict = self.run_query_word_meaning(article,task_id)
  270. word_count = get_article_words_count(title+article)
  271. create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  272. outside_json_dict = {"id": task_id, "body": article, "wordCount": word_count, "paragraphs": [],
  273. "createTime": create_time}
  274. article_paragraphs = article.split("\n\n")
  275. article_sentence_count = 0
  276. for paragraph in article_paragraphs:
  277. sentences = split_text_to_sentences(paragraph)
  278. p = {"sentences": []}
  279. for single_sentence in sentences:
  280. article_sentence_count += 1
  281. single_sentence_chinese = r_article_chinese_dict.get(single_sentence,"")
  282. w = {"words": [],"chinese":single_sentence_chinese}
  283. split_words:list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence)
  284. for originale_word in split_words:
  285. single_word = originale_word
  286. if not originale_word:
  287. continue
  288. if not re.search(r'[a-zA-Z]', originale_word):
  289. w["words"].append({"spell": originale_word, "type": "punctuation"})
  290. continue
  291. word_id = self.get_wordid_by_wordspelling(originale_word)
  292. x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0,0])
  293. if type_ == 0:
  294. single_word = originale_word.lower()
  295. x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0,0])
  296. if type_ == 0:
  297. single_word = word_to_prototype(single_word)
  298. x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word,[0,0])
  299. if type_ == 0:
  300. logger.warning(f"警告:type_还是0,那就是二次查询时,也没有给词义。有漏下的单词{originale_word}")
  301. continue
  302. if type_ == 1:
  303. meaning_id = x_data
  304. meaning = self.get_meaning_by_meaningid(x_data)
  305. elif type_ == 2:
  306. meaning_id = self._get_fake_meaningid(single_word)
  307. meaning = x_data
  308. else:
  309. logger.error(f"出错:未知的type_:{type_}")
  310. meaning_id = 9999999
  311. meaning = '无'
  312. word_prototype = word_to_prototype(originale_word)
  313. word_json = {"id": word_id, "meaningId": meaning_id,"meaning":meaning, "spell": originale_word,
  314. "exchanges": get_word_exchange_list(word=single_word),"prototype": word_prototype}
  315. w["words"].append(word_json)
  316. p["sentences"].append(w)
  317. outside_json_dict["paragraphs"].append(p)
  318. outside_json_dict["articleSentenceCount"] = article_sentence_count
  319. return outside_json_dict,word_count,article_sentence_count
  320. def run_query_word_meaning(self, article,task_id):
  321. futures = []
  322. article_paragraphs = article.split("\n\n")
  323. for paragraph in article_paragraphs:
  324. sentences = split_text_to_sentences(paragraph)
  325. for single_sentence in sentences:
  326. f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence,task_id)
  327. futures.append(f)
  328. wait(futures)
  329. all_sentence_word_meaningid_dict = {}
  330. for f in futures:
  331. f_result = f.result()
  332. all_sentence_word_meaningid_dict.update(f_result)
  333. return all_sentence_word_meaningid_dict
  334. def query_word_meaning_from_gpt(self, single_sentence,task_id) -> dict:
  335. """single_sentence 提交单个句子"""
  336. split_words = split_text_to_word(single_sentence)
  337. split_words = [word_to_prototype(w) for w in split_words if w]
  338. placeholders = ', '.join(['%s'] * len(split_words))
  339. sql = f"SELECT WordSpelling, Id, WordMeaning FROM dictionary_meaningitem WHERE WordSpelling IN ({placeholders})"
  340. r = self.m.query_data(sql, split_words)
  341. list_of_tuples = list(r)
  342. sorted_list_of_tuples = sorted(list_of_tuples, key=lambda x: split_words.index(x[0]))
  343. insert_question_data = OrderedDict()
  344. for spelling, meaning_id, word_meaning in sorted_list_of_tuples:
  345. if spelling not in insert_question_data:
  346. insert_question_data[spelling] = [(meaning_id, word_meaning)]
  347. else:
  348. insert_question_data[spelling].append((meaning_id, word_meaning))
  349. insert_question_data_list = [f"{spelling} 词义组:{data}" for spelling, data in insert_question_data.items()]
  350. insert_question_data_str = "\n".join(insert_question_data_list)
  351. q = f"""我会给你一个[英语句子]和[数据组],[数据组]由句子中的每个[固定单词]和[词义组]两部分组成,[词义组]又由多个(词义id,词义)组成。
  352. 我需要你帮我根据[英语句子]的语境,挑选这个[固定单词]的词义,在对应的在词义组内词义最贴近的id。按示例回复。
  353. 要求:
  354. 1.不用考虑词性,只要和英语句子中的词义相近就行。一个固定单词只对应一个词义id。
  355. 2.如果提供的[词义组]内没有句子对应的词义,返回id为0,例如:[固定单词] ** 0
  356. 3.回复的每行由固定单词,id两个部分组成,每个部分中间用**分隔。
  357. 4.所有固定单词都要回复,不要漏下。
  358. 英语句子:{single_sentence}.
  359. 数据组:\n{insert_question_data_str}
  360. 回复示例:
  361. beauty ** 302816
  362. apple ** 234567
  363. """
  364. real_ip = self.real_ip_dict[task_id]
  365. demo_name = self.demo_name[task_id]
  366. r_gpt = get_answer_from_gpt(q,real_ip=real_ip,demo_name=demo_name)
  367. already_data,need_twice_data = {},[]
  368. three_list = self._clean_gpt_res(single_sentence, r_gpt,split_words)
  369. for sentence, spelling, meaning_id in three_list:
  370. if meaning_id == 0:
  371. need_twice_data.append([sentence, spelling, meaning_id])
  372. else:
  373. already_data[sentence + spelling] = [meaning_id,1]
  374. for _, spelling, _ in need_twice_data:
  375. need_twice_words = ",".join([spelling])
  376. q2 = f"""我会给你一个英语句子,和句子中的几个单词。请给我这几个单词在句子中的中文词义。按示例回复json数据。
  377. 英语句子:{single_sentence}
  378. 单词:{need_twice_words}
  379. 要求:
  380. 1.给到的单词都要回复其中文词义。
  381. 2.回复的json,以单词为键,它的中文词义为键。
  382. 回复示例:
  383. {{"单词":"中文词义",...}}
  384. """
  385. r2 = get_answer_from_gpt(q2,real_ip=real_ip,demo_name=demo_name,json_resp=True)
  386. r2_json:dict = json.loads(r2)
  387. for w_spelling,chinese_meaning in r2_json.items():
  388. already_data[single_sentence + w_spelling] = [chinese_meaning,2]
  389. return already_data
  390. def upload_json_file_to_oss(self,article_id:int,data_dict:dict):
  391. json_data = json.dumps(data_dict, ensure_ascii=False)
  392. object_name = f'study/article/{article_id}'
  393. content = json_data.encode('utf-8')
  394. for _ in range(2):
  395. try:
  396. r = self.bucket.put_object(object_name, content)
  397. except Exception as e:
  398. logger.error(f"上传文件错误{type(e).__name__} {e},taskid:{article_id}")
  399. continue
  400. s = r.resp.status
  401. if s == 200:
  402. logger.success(f"上传oss成功 {article_id}")
  403. return True
  404. else:
  405. logger.critical(f"2次上传oss错误,taskid:{article_id}")
  406. def notice_teach_system(self,article_id:int,class_id:int,word_count:int,article_sentence_count:int):
  407. url = self.callback_url_dict.get(article_id)
  408. if not url or "localhost/callback" in url:
  409. return False
  410. json_data = {"classId": class_id,"articleId": article_id,"articleWordCount": word_count,"articleSentenceCount": article_sentence_count}
  411. for _ in range(3):
  412. try:
  413. r = requests.post(url,json=json_data)
  414. r.raise_for_status()
  415. self.callback_url_dict.pop(article_id,'')
  416. logger.success(f"通知成功{r.text}")
  417. return True
  418. except Exception as e:
  419. logger.warning(f"{type(e).__name__} {e}")
  420. logger.critical(f"通知接口失败,三次全错. article_id:{article_id} callback_url:{url}")
  421. def clean_source(self,article_id):
  422. self.callback_url_dict.pop(article_id, '')
  423. self.real_ip_dict.pop(article_id, '')
  424. def run_task(self,words_meaning_str, task_id,student_stage,vocabulary,class_id):
  425. try:
  426. title,r_article_sentences,r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id,student_stage,vocabulary)
  427. outside_json_dict,word_count,article_sentence_count = self.split_article_make_json(task_id,title,r_article_sentences,r_article_chinese_dict)
  428. self.upload_json_file_to_oss(article_id=task_id,data_dict=outside_json_dict)
  429. self.notice_teach_system(article_id=task_id,class_id=class_id,word_count=word_count,article_sentence_count=article_sentence_count)
  430. self.clean_source(article_id=task_id)
  431. logger.success(f"文章任务完成。taskid:{task_id}")
  432. except Exception as e:
  433. logger.error(f"{type(e).__name__} {e}")
  434. traceback_str = traceback.format_exc()
  435. logger.error(f"外围错误追溯:{traceback_str}")