|
@@ -1,7 +1,7 @@
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
from gpt.chatgpt import get_answer_from_gpt
|
|
from gpt.chatgpt import get_answer_from_gpt
|
|
-from gpt.gpt_check import CheckGptAnswer,CheckArticleResult
|
|
|
|
|
|
+from gpt.gpt_check import CheckGptAnswer, CheckArticleResult
|
|
from tools.new_mysql import MySQLUploader
|
|
from tools.new_mysql import MySQLUploader
|
|
from tools.loglog import logger, log_err_e
|
|
from tools.loglog import logger, log_err_e
|
|
from tools.thread_pool_manager import pool_executor
|
|
from tools.thread_pool_manager import pool_executor
|
|
@@ -36,7 +36,7 @@ def get_article_difficulty(article) -> int:
|
|
logger.error(f"错误状态码{response.status_code}")
|
|
logger.error(f"错误状态码{response.status_code}")
|
|
|
|
|
|
|
|
|
|
-def find_interval(number):
|
|
|
|
|
|
+def find_interval(number) -> int:
|
|
"""
|
|
"""
|
|
判断一个数字属于哪个难度等级区间。31级是例外情况,需要排查
|
|
判断一个数字属于哪个难度等级区间。31级是例外情况,需要排查
|
|
|
|
|
|
@@ -66,7 +66,7 @@ def merge_and_split(list1, list2):
|
|
|
|
|
|
|
|
|
|
total_length = len(combined)
|
|
total_length = len(combined)
|
|
- if total_length>15:
|
|
|
|
|
|
+ if total_length > 15:
|
|
two_thirds = combined[:15]
|
|
two_thirds = combined[:15]
|
|
one_third = combined[15:]
|
|
one_third = combined[15:]
|
|
else:
|
|
else:
|
|
@@ -121,12 +121,14 @@ class GetArticle:
|
|
self.m.execute_(sql, (article_json, difficult_value))
|
|
self.m.execute_(sql, (article_json, difficult_value))
|
|
|
|
|
|
|
|
|
|
- def submit_task(self, core_words: list, extend_words: list, take_count: int, student_stage: int, real_ip: str, demo_name: str):
|
|
|
|
|
|
+ def submit_task(self, core_words: list, extend_words: list, take_count: int, student_stage: int, real_ip: str,
|
|
|
|
+ demo_name: str, article_difficulty: int):
|
|
"""
|
|
"""
|
|
words_meaning_list: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
|
|
words_meaning_list: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
|
|
take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
|
|
take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
|
|
student_stage: 学段(int类型:1.小学;2.初中;3.高中;)
|
|
student_stage: 学段(int类型:1.小学;2.初中;3.高中;)
|
|
demo_name: 项目名称
|
|
demo_name: 项目名称
|
|
|
|
+ article_difficulty:文章难度值1-4200模糊范围
|
|
"""
|
|
"""
|
|
task_id = randint(10000000, 99999999)
|
|
task_id = randint(10000000, 99999999)
|
|
|
|
|
|
@@ -136,7 +138,7 @@ class GetArticle:
|
|
self.demo_name[task_id] = demo_name
|
|
self.demo_name[task_id] = demo_name
|
|
|
|
|
|
try:
|
|
try:
|
|
- resp_result = self.run_task(core_words, extend_words, task_id, take_count, student_stage)
|
|
|
|
|
|
+ resp_result = self.run_task(core_words, extend_words, task_id, take_count, student_stage, article_difficulty)
|
|
self.parser_insert_to_mysql(resp_result)
|
|
self.parser_insert_to_mysql(resp_result)
|
|
return resp_result
|
|
return resp_result
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -147,15 +149,27 @@ class GetArticle:
|
|
|
|
|
|
|
|
|
|
@retry(stop=stop_after_attempt(2), wait=wait_fixed(3), reraise=True)
|
|
@retry(stop=stop_after_attempt(2), wait=wait_fixed(3), reraise=True)
|
|
- def get_article(self, core_words: list, extend_words: list, student_stage: int, task_id: int, take_count: int) -> dict:
|
|
|
|
|
|
+ def get_article(self, core_words: list, extend_words: list, student_stage: int, task_id: int, take_count: int, article_difficulty) -> dict:
|
|
|
|
+ article_grade = find_interval(article_difficulty)
|
|
|
|
+ if 0 < article_grade <= 10:
|
|
|
|
+ article_word_count = 50 + 10 * article_grade
|
|
|
|
+ elif 10 < article_grade <= 20:
|
|
|
|
+ article_word_count = 150 + 30 * (article_grade - 10)
|
|
|
|
+ else:
|
|
|
|
+ article_word_count = 450 + 20 * (article_grade - 20)
|
|
|
|
+
|
|
|
|
+
|
|
diffculty_control = {
|
|
diffculty_control = {
|
|
- 1: {"grade": "小学", "article_word_count": 60, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": 1,
|
|
|
|
|
|
+ 1: {"grade": "小学", "article_word_count": article_word_count, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
|
|
"desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
|
|
"desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
|
|
"choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
|
|
"choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
|
|
- 2: {"grade": "初中", "article_word_count": 300, "desc_difficulty": "简单、常见、难度低", "paragraph_count": 3,
|
|
|
|
|
|
+ 2: {"grade": "初中", "article_word_count": article_word_count, "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
|
|
|
|
+ "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
|
|
|
|
+ "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
|
|
|
|
+ 3: {"grade": "初中", "article_word_count": article_word_count, "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
|
|
"desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
|
|
"desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
|
|
"choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
|
|
"choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
|
|
- 3: {"grade": "高中", "article_word_count": 600, "desc_difficulty": "常见、高中难度的", "paragraph_count": 5,
|
|
|
|
|
|
+ 4: {"grade": "高中", "article_word_count": article_word_count, "desc_difficulty": "常见、高中难度的", "paragraph_count": "3-5",
|
|
"desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
|
|
"desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
|
|
"choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
|
|
"choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
|
|
}
|
|
}
|
|
@@ -183,7 +197,7 @@ class GetArticle:
|
|
4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
|
|
4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
|
|
|
|
|
|
要求:
|
|
要求:
|
|
-1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}
|
|
|
|
|
|
+1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}{choice_desc}
|
|
2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
|
|
2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
|
|
3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
|
|
3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
|
|
4.生成的文章要求{select_word_count}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
|
|
4.生成的文章要求{select_word_count}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
|
|
@@ -196,11 +210,9 @@ class GetArticle:
|
|
try:
|
|
try:
|
|
real_ip = self.real_ip_dict[task_id]
|
|
real_ip = self.real_ip_dict[task_id]
|
|
demo_name = self.demo_name[task_id]
|
|
demo_name = self.demo_name[task_id]
|
|
- r_json = json.loads(get_answer_from_gpt(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name,model='gpt-4.1',
|
|
|
|
|
|
+ r_json = json.loads(get_answer_from_gpt(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
|
|
check_fucn=CheckArticleResult.get_article_1, max_tokens=8000, sys_prompt=sys_prompt))
|
|
check_fucn=CheckArticleResult.get_article_1, max_tokens=8000, sys_prompt=sys_prompt))
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
|
|
|
|
allWordAmount = 0
|
|
allWordAmount = 0
|
|
allWordAmount += len(split_text_to_word(r_json["englishArticle"]))
|
|
allWordAmount += len(split_text_to_word(r_json["englishArticle"]))
|
|
@@ -240,18 +252,19 @@ class GetArticle:
|
|
raise
|
|
raise
|
|
|
|
|
|
|
|
|
|
- def run_get_article_task(self, core_words, extend_words, task_id, take_count, student_stage) -> dict:
|
|
|
|
|
|
+ def run_get_article_task(self, core_words, extend_words, task_id, take_count, student_stage, article_difficulty) -> dict:
|
|
"""
|
|
"""
|
|
:param core_words: 核心单词数据,优先级1;可能为空
|
|
:param core_words: 核心单词数据,优先级1;可能为空
|
|
:param extend_words: 扩展单词数据,优先级2;可能为空
|
|
:param extend_words: 扩展单词数据,优先级2;可能为空
|
|
:param task_id: 任务id
|
|
:param task_id: 任务id
|
|
:param take_count: 文章数量
|
|
:param take_count: 文章数量
|
|
:param student_stage: 学段标识,整型,123
|
|
:param student_stage: 学段标识,整型,123
|
|
|
|
+ :param article_difficulty:文章难度1-4200模糊范围
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
futures = []
|
|
futures = []
|
|
for i in range(take_count):
|
|
for i in range(take_count):
|
|
- futures.append(pool_executor.submit(self.get_article, core_words, extend_words, student_stage, task_id, take_count))
|
|
|
|
|
|
+ futures.append(pool_executor.submit(self.get_article, core_words, extend_words, student_stage, task_id, take_count, article_difficulty))
|
|
wait(futures)
|
|
wait(futures)
|
|
return_json = {"articles": []}
|
|
return_json = {"articles": []}
|
|
for t in futures:
|
|
for t in futures:
|
|
@@ -259,9 +272,9 @@ class GetArticle:
|
|
return return_json
|
|
return return_json
|
|
|
|
|
|
|
|
|
|
- def run_task(self, core_words, extend_words, task_id, take_count, student_stage):
|
|
|
|
|
|
+ def run_task(self, core_words, extend_words, task_id, take_count, student_stage, article_difficulty):
|
|
try:
|
|
try:
|
|
- outside_json = self.run_get_article_task(core_words, extend_words, task_id, take_count, student_stage)
|
|
|
|
|
|
+ outside_json = self.run_get_article_task(core_words, extend_words, task_id, take_count, student_stage, article_difficulty)
|
|
logger.success(f"文章2任务完成。taskid:{task_id}\n{outside_json}")
|
|
logger.success(f"文章2任务完成。taskid:{task_id}\n{outside_json}")
|
|
return outside_json
|
|
return outside_json
|
|
except Exception as e:
|
|
except Exception as e:
|