Ver código fonte

1. 支持高并发的文章生成请求;
2. 改用直接的json_schema

xie 1 semana atrás
pai
commit
9b15c27155

+ 1 - 1
.gitignore

@@ -1,7 +1,7 @@
 .*
 /*.txt
 test*.py
-/test
+/test/
 /log
 *.docx
 *.pdf

+ 32 - 14
core/api_get_article2.py

@@ -1,17 +1,19 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path, Depends, BackgroundTasks
 from tools.loglog import logger,log_err_e
 
 from core.respone_format import *
 from gpt.get_article2 import GetArticle
-from pydantic import BaseModel, ValidationError, conint,Field
+from pydantic import BaseModel, ValidationError, conint,Field,conint
 from typing import List, Optional,Literal
-
+import asyncio
 
 router = APIRouter()
+
 get_article = GetArticle()
 
+
 class Word(BaseModel):
     meaning_id:int = Field(..., description="单词的词义id")
     word_id:int = Field(..., description="单词id")
@@ -21,27 +23,43 @@ class Word(BaseModel):
 
 class ArticleRequest(BaseModel):
     core_words: List[Word] = Field(..., description="单词列表")
-    extend_words: List[Word] = Field(..., description="单词列表")
     take_count: int = 2 
-    student_stage: Literal[1, 2, 3] 
     demo_name: Optional[str] = "无" 
-    reading_level: int = Field(default=-1, description="阅读水平,默认值为-1")
-    article_difficulty:int = Field(default=1000,description="文章难度,模糊范围1-4200,根据这个来调整文章难度和篇幅")
+    reading_level: conint(ge=1, le=30) = Field(default=10, description="阅读水平,默认值为10;[8,16,24]小学初中高中")
+    article_length:int = Field(default=None,description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
+    exercise_id:int = Field(default=0,description="学案ID,用于日志快速定位")
 
 
 @router.post("/article/reading-comprehension")
-def post_article(json_data:ArticleRequest,request:Request):
+def post_article(
+    json_data:ArticleRequest,
+    request:Request,
+    background_tasks: BackgroundTasks,
+):
+
     json_data = json_data.model_dump()
     real_ip = request.headers.get("X-Real-IP","0.0.0.0")
-    core_words,extend_words,take_count,student_stage,demo_name = json_data["core_words"],json_data["extend_words"],json_data["take_count"],json_data["student_stage"],json_data["demo_name"]
-    article_difficulty = json_data['article_difficulty']
-    reading_level = json_data.get("reading_level",-1) 
+
+    core_words = json_data["core_words"]
+    take_count = json_data["take_count"]
+    demo_name = json_data["demo_name"]
+    reading_level = json_data["reading_level"] 
+    article_length = json_data["article_length"]
+    exercise_id = json_data["exercise_id"]
 
     try:
-        r = get_article.submit_task(core_words=core_words,extend_words=extend_words, take_count=take_count,
-                                    student_stage=student_stage,real_ip=real_ip,demo_name=demo_name,article_difficulty=article_difficulty)
+        r = get_article.submit_task(
+            real_ip=real_ip,
+            core_words=core_words,
+            take_count=take_count,
+            demo_name=demo_name,
+            reading_level=reading_level,
+            article_length=article_length,
+            exercise_id=exercise_id,
+            background_tasks=background_tasks
+        )
         return r if not isinstance(r,str) else resp_500(message=r)
 
     except Exception as e:
-        log_err_e(e,msg="文章2接口错误/article/reading-comprehension;")
+        log_err_e(e, msg="文章2接口错误/article/reading-comprehension;")
         return resp_500(message=f"{type(e).__name__},{e}")

+ 48 - 22
gpt/chatgpt.py

@@ -7,9 +7,11 @@ import requests
 import random
 import json
 import time
-from tools.loglog import logger,simple_logger
+from tools.loglog import logger,simple_logger,log_err_e
 from tools.new_mysql import MySQLUploader
 from typing import Optional, Dict, Any,Union
+import httpx
+import asyncio
 
 
 m = MySQLUploader()
@@ -93,39 +95,62 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
 
     logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
 
-def get_article_gpt_pydantic(question,real_ip="localhost",demo_name="无",model="gpt-4o",max_tokens=3500,temperature:float=0,n=1,
-                        check_fucn=None,sys_prompt=None):
+def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", model="gpt-4.1", max_tokens=3500, temperature:float=0, n=1,
+                        check_fucn=None, sys_prompt=None):
+    """
+    异步获取文章
+    :param question: 问题
+    :param real_ip: 真实IP
+    :param demo_name: 项目名称
+    :param model: 模型名称
+    :param max_tokens: 最大token数
+    :param temperature: 温度
+    :param n: 生成数量
+    :param check_fucn: 校验函数
+    :param sys_prompt: 系统提示
+    :return: 文章内容
+    """
    
-    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "response_format":"article"}
+    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature,"n":n, "response_format":{'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'}, 'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}}, 'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'}, 'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}}, 'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'}, 'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'}, 'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'}, 'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'}, 'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions', 'englishArticle', 'chineseArticle', 'allWordAmount'], 'title': 'Article', 'type': 'object'}}}}
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
 
     for num_count in range(3):
         try:
-           
-            response = requests.post(f'http://170.106.108.95/get_article', json=d2)
-            r_str = response.json() 
-
-            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{r_str}")
+            response = requests.post('http://170.106.108.95/v1/chat/completions', json=d2)
+            r_json = response.json() 
+            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{r_json}")
+            return r_json
 
            
-            if not check_fucn:
-                return r_str
-
            
-            check_result = check_fucn(r_str)
+           
+            #
+           
+           
+            #
+           
+           
+           
+           
 
-            if check_result: 
-                return r_str
+        except httpx.HTTPError as e:
+            logger.error(f"HTTP请求错误: {str(e)}")
+            if num_count < 2: 
+                time.sleep(10)
             else:
-                raise Exception(f"第{num_count + 1}次共3次,GPT的校验没有通过,校验函数:{check_fucn.__name__}")
-
+                raise
         except Exception as e:
-            logger.info(f"小报错忽略{e}")
-        time.sleep(10)
+            log_err_e(e,"其他错误")
 
-    logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
+            if num_count < 2:
+                time.sleep(10)
+            else:
+                raise
+
+    logger.critical("get_article_gpt_pydantic 严重错误,3次后都失败了")
+    raise Exception("获取文章失败,已达到最大重试次数")
 
 
 def parse_gpt_phon_to_tuplelist(text:str) -> list:
@@ -158,6 +183,7 @@ if __name__ == '__main__':
 提供[单词组1]:4238 penalty:惩罚, 刑罚;4591 bare:赤裸的, 无遮蔽的;4227 stable:畜舍, 马厩;4236 psychology:心理学;4245 offense:进攻, 攻势, 冒犯, 触怒, 过错;4237 innocent:清白的, 无辜的, 天真的;4228 refrigerator:冰箱, 冷库;4247 tissue:(动植物)组织;4250 awareness:察觉, 觉悟, 意识;4234 mode:方式, 模式;4224 neat:整洁, 利索;4225 statistics:统计;4251 random:任意的, 随机的;4201 laundry:洗衣房;4545 barrel:桶, 一桶之量;4249 recruit:招募, 新成员;4229 pregnant:怀孕的, 孕育的;4235 relevant:有关的, 相关联的;4252 incentive:刺激, 激励, 鼓励;4194 grave:坟墓, 墓穴;
 提供[单词组2]:;
 """
-    resp = get_article_gpt_pydantic(question=q,temperature=0.9,sys_prompt=sys_prompt,model="gpt-4.1")
+    resp = get_answer_from_gpt(question=question,temperature=0.9,sys_prompt=sys_prompt,model="gpt-4.1")
     print(type(resp))
-    print(resp)
+    print(resp)
+

+ 185 - 127
gpt/get_article2.py

@@ -6,16 +6,22 @@ from tools.new_mysql import MySQLUploader
 from tools.loglog import logger, log_err_e
 from tools.thread_pool_manager import pool_executor
 from common.common_data import all_exchange_words
-from common.split_text import split_text_to_word
+from common.split_text import split_text_to_word, get_article_words_count
 
 from pydantic import BaseModel
 from cachetools import TTLCache
 from concurrent.futures import wait
 from random import randint, shuffle, sample
-import json
+import json,time
 import requests
 from openpyxl import load_workbook
 from tenacity import retry, stop_after_attempt, wait_fixed
+import httpx
+import asyncio
+from threading import Lock
+from collections import defaultdict
+from fastapi import BackgroundTasks
+
 
 
 def get_article_difficulty(article) -> int:
@@ -43,11 +49,14 @@ def find_interval(number) -> int:
     :param number: 要检查的数字。
     :return: 返回包含该数字的区间,如果没有找到,则返回 None。
     """
-    intervals = [(1, 200), (201, 250), (251, 300), (301, 350), (351, 400), (401, 450), (451, 550), (551, 650), (651, 750), (751, 850), (851, 950),
+    intervals = [(1, 200), (201, 250), (251, 300), (301, 350), (351, 400), (401, 450), (451, 550), (551, 650), (651, 750), (751, 850),
+                 (851, 950),
                  (951, 1100),
-                 (1101, 1250), (1251, 1400), (1401, 1550), (1551, 1700), (1701, 1900), (1901, 2100), (2101, 2300), (2301, 2600), (2601, 2900),
+                 (1101, 1250), (1251, 1400), (1401, 1550), (1551, 1700), (1701, 1900), (1901, 2100), (2101, 2300), (2301, 2600),
+                 (2601, 2900),
                  (2901, 3200),
-                 (3201, 3500), (3501, 3900), (3901, 4300), (4301, 4700), (4701, 5100), (5101, 5500), (5501, 5900), (5901, 6500), (6501, 99999)]
+                 (3201, 3500), (3501, 3900), (3901, 4300), (4301, 4700), (4701, 5100), (5101, 5500), (5501, 5900), (5901, 6500),
+                 (6501, 99999)]
     for index, (start, end) in enumerate(intervals, start=1):
         if start <= number <= end:
             return index
@@ -78,11 +87,13 @@ def merge_and_split(list1, list2):
 
 class GetArticle:
     def __init__(self):
-        self.m = MySQLUploader()
+        self.m = MySQLUploader() 
+
+       
+        self.callback_url_dict = defaultdict(str)
+        self.real_ip_dict = defaultdict(str) 
+        self.demo_name = defaultdict(str)
 
-        self.callback_url_dict = {}
-        self.real_ip_dict = {} 
-        self.demo_name = {}
 
         self.article_result = {} 
 
@@ -93,6 +104,8 @@ class GetArticle:
        
         self.exchange_data: dict[str, list] = {} 
         self.read_spring_bamboo_exchange_table()
+                    
+
 
    
     def read_spring_bamboo_exchange_table(self):
@@ -110,88 +123,160 @@ class GetArticle:
 
    
     def parser_insert_to_mysql(self, resp_result):
-        for single_article in resp_result['articles']:
+        try:
+            for single_article in resp_result['articles']:
+                article = single_article['body']
+                article_json = json.dumps(single_article)
+                difficult_value = find_interval(get_article_difficulty(article))
+                if not difficult_value:
+                    logger.error("文章难度等级为0;")
+                sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
+                self.m.execute_(sql, (article_json, difficult_value))
+        except Exception as e:
            
-            article = single_article['body']
-            article_json = json.dumps(single_article)
-            difficult_value = find_interval(get_article_difficulty(article)) 
-            if not difficult_value:
-                logger.error("文章难度等级为0;")
-            sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
-            self.m.execute_(sql, (article_json, difficult_value))
+            logger.error(f"插入数据库时发生错误: {str(e)}")
 
    
-    def submit_task(self, core_words: list, extend_words: list, take_count: int, student_stage: int, real_ip: str,
-                    demo_name: str, article_difficulty: int):
+    def submit_task(self, real_ip: str, core_words: list, take_count: int,
+                          demo_name: str, reading_level: int, article_length: int, exercise_id: int,
+                          background_tasks: BackgroundTasks):
         """
-        words_meaning_list: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
+        core_words: 词义数据组
         take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
-        student_stage: 学段(int类型:1.小学;2.初中;3.高中;)
         demo_name: 项目名称
-        article_difficulty:文章难度值1-4200模糊范围
+        reading_level:阅读等级
+        article_length:文章长度
+        exercise_id:学案id
+        background_tasks: FastAPI的后台任务管理器
         """
         task_id = randint(10000000, 99999999)
-       
-        logger.info(f"生成文章id。task_id:{task_id}")
-
-        self.real_ip_dict[task_id] = real_ip
-        self.demo_name[task_id] = demo_name
+        logger.info(f"reading-comprehension 生成文章id。学案id:{exercise_id},task_id:{task_id}")
 
         try:
-            resp_result = self.run_task(core_words, extend_words, task_id, take_count, student_stage, article_difficulty)
-            self.parser_insert_to_mysql(resp_result) 
+            self.real_ip_dict[task_id] = real_ip
+            self.demo_name[task_id] = demo_name
+
+            resp_result = self.run_task(core_words, task_id, take_count, reading_level, article_length)
+            
+           
+            background_tasks.add_task(self.parser_insert_to_mysql, resp_result)
+            
+            logger.success(f"reading-comprehension 文章2任务完成。学案id:{exercise_id},taskid:{task_id}")
             return resp_result
         except Exception as e:
             err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
             log_err_e(e, msg="GetArticle提交任务失败;")
-
             return err_msg
+        finally:
+           
+            self.real_ip_dict.pop(task_id, None)
+            self.demo_name.pop(task_id, None)
 
    
-    @retry(stop=stop_after_attempt(2), wait=wait_fixed(3), reraise=True)
-    def get_article(self, core_words: list, extend_words: list, student_stage: int, task_id: int, take_count: int, article_difficulty) -> dict:
-        article_grade = find_interval(article_difficulty)
-        if 0 < article_grade <= 10:
-            article_word_count = 50 + 10 * article_grade 
-        elif 10 < article_grade <= 20:
-            article_word_count = 150 + 30 * (article_grade - 10) 
-        else:
-            article_word_count = 450 + 20 * (article_grade - 20) 
+    def __parse_gpt_resp(self,gpt_resp:dict,core_words:list):
+        return_json = {"articles": []} 
+        for choice in gpt_resp["choices"]:
+            single_article_dict = json.loads(choice["message"]["content"])
+
+            allWordAmount = 0 
+           
+            articleWordAmount = get_article_words_count(single_article_dict["englishArticle"])
+            allWordAmount += articleWordAmount
+
+            for i in single_article_dict["questions"]:
+                count_trunk = get_article_words_count(i["trunk"])
+                count_candidates = sum([get_article_words_count(ii["text"]) for ii in i["candidates"]])
+                allWordAmount += count_trunk
+                allWordAmount += count_candidates
+
+           
+            usedMeanIds: list = single_article_dict['usedMeanIds'] 
+           
+            article_words = split_text_to_word(single_article_dict['englishArticle'])
+           
+            for i in core_words:
+                meaning_id = i.get('meaning_id', 0)
+                if not meaning_id:
+                    continue
+                word = i["spell"]
+                if meaning_id not in usedMeanIds and word in self.exchange_data: 
+                    words_exchanges_list = self.exchange_data[word]
+                    for exchange_word in words_exchanges_list:
+                        if exchange_word in article_words:
+                            usedMeanIds.append(meaning_id)
+                            break
+
+           
+            single_article_dict["body"] = single_article_dict.pop("englishArticle")
+            single_article_dict["chinese"] = single_article_dict.pop("chineseArticle")
+
+           
+            for q in single_article_dict['questions']:
+                data = q['candidates']
+                shuffled_candidates = sample(data, len(data))
+
+                labels = ['A', 'B', 'C', 'D']
+                for index, candidate in enumerate(shuffled_candidates):
+                    candidate['label'] = labels[index]
+                q['candidates'] = shuffled_candidates
+
+           
+            return_json['articles'].append({**single_article_dict, "allWordAmount": allWordAmount, "articleWordAmount": articleWordAmount})
+
+        return return_json
+
+
+
+   
+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
+    def get_article(self, core_words: list, task_id: int, reading_level, article_length,n) -> dict:
+       
+        if not article_length:
+            if 0 < reading_level <= 10:
+                article_length = 50 + 10 * reading_level
+            elif 10 < reading_level <= 20:
+                article_length = 150 + 30 * (reading_level - 10)
+            else:
+                article_length = 450 + 20 * (reading_level - 20)
+
+        for index, (start, end) in enumerate([(1, 8), (9, 16), (17, 24), (24, 30)], start=1):
+            if start <= reading_level <= end:
+                difficulty_control_stage = index
+                break
+        else: 
+            difficulty_control_stage = 2
 
        
         diffculty_control = {
-            1: {"grade": "小学", "article_word_count": article_word_count, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
+            1: {"grade": "小学", "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
-                "choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
-            2: {"grade": "初中", "article_word_count": article_word_count, "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
+                "choice_desc": "选择题难度尽可能简单,参考中国小学生水平"},
+            2: {"grade": "初中", "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
                 "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
-            3: {"grade": "初中", "article_word_count": article_word_count, "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
+            3: {"grade": "初中", "desc_difficulty": "简单、常见、难度低", "paragraph_count": "2-3",
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
                 "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
-            4: {"grade": "高中", "article_word_count": article_word_count, "desc_difficulty": "常见、高中难度的", "paragraph_count": "3-5",
+            4: {"grade": "高中", "desc_difficulty": "常见、高中难度的", "paragraph_count": "3-5",
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
                 "choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
         }
-        take_count_dict = {0: "", 1: "一", 2: "二", 3: "三", 4: "四", 5: "五", 6: "六", 7: "七", 8: "八", 9: "九"}
-        different_cou = take_count_dict.get(take_count, "")
+       
 
-        grade = diffculty_control[student_stage]["grade"] 
-        select_word_count = diffculty_control[student_stage]["article_word_count"] 
-        select_diffculty = diffculty_control[student_stage]["desc_difficulty"] 
-        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"] 
-        desc2 = diffculty_control[student_stage]["desc2"]
-        choice_desc = diffculty_control[student_stage]["choice_desc"] 
+        grade = diffculty_control[difficulty_control_stage]["grade"] 
+        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"] 
+        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"] 
+        desc2 = diffculty_control[difficulty_control_stage]["desc2"]
+        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"] 
 
        
         shuffle(core_words)
-        core_words_meaning_str = ";".join([str(i['meaning_id']) + ' ' + i["spell"] + ":" + i["meaning"] for i in core_words])
-        extend_words_meaning_str = ";".join([str(i['meaning_id']) + ' ' + i["spell"] + ":" + i["meaning"] for i in extend_words])
+        core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words])
 
         no_escape_code = r"\\n\\n"
-       
+
         sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
-        q = f"""下面我会为你提供两组数据,[单词组1]和[单词组2](里面包含词义id,英语单词,中文词义),优先使用[单词组1]内的单词,请根据这些单词的中文词义,\
+        q = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,\
 生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。\
 英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。\
 4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
@@ -200,98 +285,71 @@ class GetArticle:
 1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}{choice_desc}
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求{select_word_count}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
-5.生成文章优先使用[单词组1]的词义,其次可以挑选使用[单词组2]的词义。允许不使用[单词组1]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
+4.生成的文章要求{article_length}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
+5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
 6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
-提供[单词组1]:{core_words_meaning_str};
-提供[单词组2]:{extend_words_meaning_str};
+提供[单词组]:{core_words_meaning_str};
 """
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-           
-           
-
-            r_json = json.loads(get_article_gpt_pydantic(q, temperature=0.9, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
-                                                         check_fucn=CheckArticleResult.get_article_1, max_tokens=4000, sys_prompt=sys_prompt))
-           
-            allWordAmount = 0
-            allWordAmount += len(split_text_to_word(r_json["englishArticle"]))
-            for i in r_json["questions"]:
-                count_trunk = len(split_text_to_word(i["trunk"]))
-                count_candidates = sum([len(split_text_to_word(ii["text"])) for ii in i["candidates"]])
-                allWordAmount += count_trunk
-                allWordAmount += count_candidates
-
-           
-            usedMeanIds: list = r_json['usedMeanIds'] 
-           
-            article_words = split_text_to_word(r_json['englishArticle'])
-           
-            for i in core_words + extend_words:
-                meaning_id = i.get('meaning_id', 0)
-                if not meaning_id:
-                    continue
-                word = i["spell"]
-                if meaning_id not in usedMeanIds and word in self.exchange_data: 
-                    words_exchanges_list = self.exchange_data[word]
-                    for exchange_word in words_exchanges_list:
-                        if exchange_word in article_words:
-                            usedMeanIds.append(meaning_id)
-                            break
-
-           
-            r_json["body"] = r_json.pop("englishArticle")
-            r_json["chinese"] = r_json.pop("chineseArticle")
-
-           
-            for q in r_json['questions']:
-                data = q['candidates']
-                shuffled_candidates = sample(data, len(data))
 
-                labels = ['A', 'B', 'C', 'D']
-                for index, candidate in enumerate(shuffled_candidates):
-                    candidate['label'] = labels[index]
-                q['candidates'] = shuffled_candidates
+            gpt_resp = get_article_gpt_pydantic(q, temperature=1.2, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
+                                                               check_fucn=CheckArticleResult.get_article_1, max_tokens=8000,
+                                                               sys_prompt=sys_prompt,n=n)
+            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp,core_words=core_words)
+            return multi_articles_dict
 
-            return {**r_json, "allWordAmount": allWordAmount}
-        except json.decoder.JSONDecodeError:
-            logger.error("gpt生成文章回复json格式化错误")
+        except httpx.HTTPError as e:
+            logger.error(f"HTTP请求错误: {str(e)}")
+            raise
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON解析错误: {str(e)}")
             raise
         except Exception as e:
-            log_err_e(e,f"gpt生成文章回复其他错误.")
-
+            log_err_e(e, f"gpt生成文章回复其他错误.")
             raise
 
+
    
-    def run_get_article_task(self, core_words, extend_words, task_id, take_count, student_stage, article_difficulty) -> dict:
+    def run_get_article_task(self, core_words, task_id, take_count, reading_level, article_length) -> dict:
         """
         :param core_words: 核心单词数据,优先级1;可能为空
-        :param extend_words: 扩展单词数据,优先级2;可能为空
         :param task_id: 任务id
         :param take_count: 文章数量
-        :param student_stage: 学段标识,整型,123
-        :param article_difficulty:文章难度1-4200模糊范围
+        :param reading_level:阅读等级
+        :param article_length:文章长度
         :return:
         """
-        futures = []
-        for i in range(take_count):
-            futures.append(pool_executor.submit(self.get_article, core_words, extend_words, student_stage, task_id, take_count, article_difficulty))
-        wait(futures)
-        return_json = {"articles": []}
-        for t in futures:
-            return_json["articles"].append(t.result())
-        return return_json
+        try:
+            return_json = self.get_article(core_words, task_id, reading_level, article_length,n=take_count)
+            return return_json
+        except Exception as e:
+            logger.error(f"运行文章任务时发生错误: {str(e)}")
+            raise
 
    
-    def run_task(self, core_words, extend_words, task_id, take_count, student_stage, article_difficulty):
+    def run_task(self, core_words, task_id, take_count, reading_level, article_length):
         try:
-            outside_json = self.run_get_article_task(core_words, extend_words, task_id, take_count, student_stage, article_difficulty)
-            logger.success(f"文章2任务完成。taskid:{task_id}\n{outside_json}")
+            outside_json = self.run_get_article_task(core_words, task_id, take_count, reading_level, article_length)
             return outside_json
         except Exception as e:
-            logger.error(f"{type(e).__name__} {e}")
-        finally:
-            self.real_ip_dict.pop(task_id)
-            self.demo_name.pop(task_id)
+            log_err_e(e, msg="外层总任务捕获错误")
+
+    def cleanup(self):
+        """清理所有资源"""
+        pass
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+

+ 11 - 1
make_docx_demo/main_word_applet.py

@@ -1045,10 +1045,13 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             spell1, meaning1 = word_data_list2[row]
             spell2, meaning2 = word_data_list2[total_row + row]
 
+            cell1 = f"{row + 1}. {spell1}" if spell1 else ""
+            cell2 = f"□ {meaning1}" if meaning1 else ""
+
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
-            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4] 
+            data = [cell1,cell2, cell3, cell4] 
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
@@ -1084,6 +1087,13 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
 
         word_data_list1 = []
         word_data_list2 = []
+
+       
+        item_list:list = page['FilterTable']['Items']
+        item_count = len(item_list)
+        if item_count<100:
+            item_list.extend([{"Spell":"","Meaning":""} for _ in range(100-item_count)])
+
         for i in page['FilterTable']['Items']: 
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])

+ 203 - 33
mock/mock_request.py

@@ -1,16 +1,51 @@
 # -*- coding:utf-8 -*-
 #
-import requests
-import time
 import json 
+import time
 from functools import wraps
+from random import shuffle,sample,randint
+from threading import Thread
+from concurrent.futures import ThreadPoolExecutor,wait
+
+import httpx
+import requests
+from pydantic import BaseModel
+from typing import List
+
 
 product_adress = "http://111.231.167.191" 
 test_address = "http://111.231.167.191:8004" 
+test_address2 = "http://111.231.167.191:8003" 
+
 local_adress = "http://127.0.0.1:9000" 
 
 use_address = local_adress 
 
+class DifficultSentence(BaseModel):
+    english: str
+    chinese: str
+
+class Candidate(BaseModel):
+    label: str
+    text: str
+    isRight: int
+
+class Question(BaseModel):
+    trunk: str
+    analysis: str
+    candidates: List[Candidate]
+
+class Article(BaseModel):
+    difficultSentences: List[DifficultSentence]
+    usedMeanIds: List[int]
+    questions: List[Question]
+    englishArticle: str
+    chineseArticle: str
+    allWordAmount: int
+
+class ArticleData(BaseModel):
+    articles: List[Article]
+
 
 def time_use(fn):
     @wraps(fn)
@@ -23,7 +58,7 @@ def time_use(fn):
             print(f'函数:{fn.__name__} 一共用时', cha, '秒')
         return res 
 
-    return cc 
+    return cc
 
 
 def test_connect():
@@ -98,47 +133,163 @@ def get_article2():
 @time_use
 def get_article2_1():
     """新的获取文章"""
-    json_data = {'core_words': [{'spell': 'sudden', 'meaning': '突然的, 意外的', 'word_id': 1114468, 'meaning_id': 1734},
-                                {'spell': 'frighten', 'meaning': '惊吓, 惊恐', 'word_id': 899278, 'meaning_id': 1735},
-                                {'spell': 'relation', 'meaning': '关系, 联系, 亲戚, 亲属', 'word_id': 1061800, 'meaning_id': 1736},
-                                {'spell': 'Japanese', 'meaning': '日本的', 'word_id': 727384, 'meaning_id': 1737},
-                                {'spell': 'trick', 'meaning': '恶作剧, 戏法, 作假, 欺骗', 'word_id': 1140881, 'meaning_id': 1740},
-                                {'spell': 'yours', 'meaning': '你(们)的东西', 'word_id': 1169496, 'meaning_id': 1741},
-                                {'spell': 'panda', 'meaning': '熊猫', 'word_id': 1015908, 'meaning_id': 1742},
-                                {'spell': 'agreement', 'meaning': '协议,协定', 'word_id': 753401, 'meaning_id': 1743},
-                                {'spell': 'pool', 'meaning': '游泳池, 池子', 'word_id': 1035634, 'meaning_id': 1747},
-                                {'spell': 'risk', 'meaning': '冒险, 风险', 'word_id': 1069002, 'meaning_id': 1748},
-                                {'spell': 'centre', 'meaning': '中心', 'word_id': 806629, 'meaning_id': 1749},
-                                {'spell': 'shut', 'meaning': '关上, 关闭', 'word_id': 1088662, 'meaning_id': 1751},
-                                {'spell': 'piano', 'meaning': '钢琴', 'word_id': 1027211, 'meaning_id': 1752},
-                                {'spell': 'trust', 'meaning': '信任, 信赖', 'word_id': 1142977, 'meaning_id': 1753},
-                                {'spell': 'camera', 'meaning': '照相机', 'word_id': 799656, 'meaning_id': 1754},
-                                {'spell': 'course', 'meaning': '课程', 'word_id': 834016, 'meaning_id': 399},
-                                {'spell': 'carry', 'meaning': '携带', 'word_id': 803106, 'meaning_id': 460},
-                                {'spell': 'sometimes', 'meaning': '有时, 间或', 'word_id': 1097431, 'meaning_id': 495},
-                                {'spell': 'interesting', 'meaning': '有趣的, 令人感兴趣的', 'word_id': 944231, 'meaning_id': 600},
-                                {'spell': 'thought', 'meaning': '思想, 想法', 'word_id': 1130826, 'meaning_id': 685}],
-                 'extend_words': [{'spell': 'destroy', 'meaning': '破坏, 摧毁', 'word_id': 848592, 'meaning_id': 1288},
-                                  {'spell': 'project', 'meaning': '放映, 展现', 'word_id': 1044528, 'meaning_id': 1290},
-                                  {'spell': 'waste', 'meaning': '浪费, 荒芜, 废物', 'word_id': 1160701, 'meaning_id': 1292},
-                                  {'spell': 'environment', 'meaning': '环境, 外界', 'word_id': 873514, 'meaning_id': 1293},
-                                  {'spell': 'memory', 'meaning': '记忆, 记忆力, 回忆', 'word_id': 981104, 'meaning_id': 1294}],
-                 'take_count': 1, 'student_stage': 3, 'demo_name': '春笋英语',"article_difficulty":800}
+    core_words_list = [{'spell': 'sudden', 'meaning': '突然的, 意外的', 'word_id': 1114468, 'meaning_id': 1734},
+                       {'spell': 'frighten', 'meaning': '惊吓, 惊恐', 'word_id': 899278, 'meaning_id': 1735},
+                       {'spell': 'relation', 'meaning': '关系, 联系, 亲戚, 亲属', 'word_id': 1061800, 'meaning_id': 1736},
+                       {'spell': 'agreement', 'meaning': '协议,协定', 'word_id': 753401, 'meaning_id': 1743},
+                       {'spell': 'risk', 'meaning': '冒险, 风险', 'word_id': 1069002, 'meaning_id': 1748},
+                       {'spell': 'centre', 'meaning': '中心', 'word_id': 806629, 'meaning_id': 1749},
+                       {'spell': 'shut', 'meaning': '关上, 关闭', 'word_id': 1088662, 'meaning_id': 1751},
+                       {'spell': 'thought', 'meaning': '思想, 想法', 'word_id': 1130826, 'meaning_id': 685},
+                       {'spell': 'information', 'meaning': '消息, 信息', 'word_id': 940351, 'meaning_id': 487, 'serial': 330},
+                       {'spell': 'bright', 'meaning': '聪明的', 'word_id': 793695, 'meaning_id': 1451, 'serial': 1048},
+                       {'spell': 'international', 'meaning': '国际的', 'word_id': 945460, 'meaning_id': 1683, 'serial': 1232},
+                       {'spell': 'shelf', 'meaning': '架子, 搁板', 'word_id': 1086743, 'meaning_id': 1838, 'serial': 1366},
+                       {'spell': 'cave', 'meaning': '洞穴, 山洞', 'word_id': 805431, 'meaning_id': 2167, 'serial': 1639},
+                       {'spell': 'gym', 'meaning': '健身房, 体育馆', 'word_id': 915473, 'meaning_id': 2217, 'serial': 1683},
+                       {'spell': 'properly', 'meaning': '适当地, 正确地', 'word_id': 1045343, 'meaning_id': 2257, 'serial': 1720},
+                       {'spell': 'platform', 'meaning': '平台', 'word_id': 1031256, 'meaning_id': 2269, 'serial': 1730},
+                       {'spell': 'sweep', 'meaning': '打扫, 清扫', 'word_id': 1118098, 'meaning_id': 2321, 'serial': 1775},
+                       {'spell': 'clinic', 'meaning': '诊所, 门诊部', 'word_id': 815699, 'meaning_id': 2471, 'serial': 1898},
+                       {'spell': 'sauce', 'meaning': '酱油, 调味料', 'word_id': 1076452, 'meaning_id': 2501, 'serial': 1927},
+                       {'spell': 'retell', 'meaning': '重讲, 复述', 'word_id': 1065717, 'meaning_id': 2546, 'serial': 1970},
+                       {'spell': 'specific', 'meaning': '具体的, 明确的', 'word_id': 1099668, 'meaning_id': 3089, 'serial': 2421},
+                       {'spell': 'religion', 'meaning': '宗教', 'word_id': 1062490, 'meaning_id': 3358, 'serial': 2626},
+                       {'spell': 'collapse', 'meaning': '倒塌, 崩溃', 'word_id': 819500, 'meaning_id': 3667, 'serial': 2872},
+                       {'spell': 'bare', 'meaning': '光秃秃的', 'word_id': 777035, 'meaning_id': 4592, 'serial': 3650},
+                       {'spell': 'defendant', 'meaning': '被告的, 被告人', 'word_id': 1174797, 'meaning_id': 4975, 'serial': 3979},
+                       {'spell': 'interact', 'meaning': '互相作用, 互动', 'word_id': 943776, 'meaning_id': 5117, 'serial': 4103},
+                       {'spell': 'fact', 'meaning': '事实, 真相', 'word_id': 882302, 'meaning_id': 425, 'serial': 289},
+                       {'spell': 'except', 'meaning': '除了…之外', 'word_id': 878228, 'meaning_id': 814, 'serial': 561},
+                       {'spell': 'opposite', 'meaning': '相反, 对面', 'word_id': 1008508, 'meaning_id': 1650, 'serial': 1207},
+                       {'spell': 'clerk', 'meaning': '职员, 店员', 'word_id': 815428, 'meaning_id': 1826, 'serial': 1354},
+                       {'spell': 'chief', 'meaning': '主要的,首要的', 'word_id': 810493, 'meaning_id': 2067, 'serial': 1552},
+                       {'spell': 'congratulation', 'meaning': '祝贺, 贺辞', 'word_id': 826539, 'meaning_id': 2187, 'serial': 1657},
+                       {'spell': 'chest', 'meaning': '大箱子', 'word_id': 810293, 'meaning_id': 2223, 'serial': 1689},
+                       {'spell': 'monitor', 'meaning': '班长', 'word_id': 988984, 'meaning_id': 2262, 'serial': 1724},
+                       {'spell': 'accurate', 'meaning': '正确的, 精确的', 'word_id': 747138, 'meaning_id': 2278, 'serial': 1739},
+                       {'spell': 'investigate', 'meaning': '调查, 研究', 'word_id': 947316, 'meaning_id': 2359, 'serial': 1806},
+                       {'spell': 'forecast', 'meaning': '预报, 预测', 'word_id': 895859, 'meaning_id': 2495, 'serial': 1921},
+                       {'spell': 'sausage', 'meaning': '香肠, 腊肠', 'word_id': 1076506, 'meaning_id': 2536, 'serial': 1961},
+                       {'spell': 'insurance', 'meaning': '保险', 'word_id': 943100, 'meaning_id': 3044, 'serial': 2380},
+                       {'spell': 'reveal', 'meaning': '揭示, 暴露, 展现', 'word_id': 1066342, 'meaning_id': 3246, 'serial': 2544},
+                       {'spell': 'perception', 'meaning': '观念, 知觉, 觉察', 'word_id': 1174551, 'meaning_id': 3516, 'serial': 2749},
+                       {'spell': 'violation', 'meaning': '妨碍, 侵犯, 违犯', 'word_id': 1174695, 'meaning_id': 4452, 'serial': 3528},
+                       {'spell': 'convey', 'meaning': '表达', 'word_id': 830280, 'meaning_id': 4931, 'serial': 3938},
+                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
+                       ]
+    shuffle(core_words_list)
+    core_words_chiose_list = sample(core_words_list,5)
+    json_data = {'core_words': core_words_chiose_list,
+                 'take_count': 8, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100,999),
+                 "article_length": 120, "reading_level": 5}
 
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
     r_json = r.json()
+    print(r_json)
     try:
         return r_json
     except Exception as e:
         print("春笋文章reading-comprehension错误", e)
         print("错误数据", r_json)
 
+@time_use
+def get_article2_2():
+    """测试通过requests来直接访问openai"""
+    core_words_list = [{'spell': 'sudden', 'meaning': '突然的, 意外的', 'word_id': 1114468, 'meaning_id': 1734},
+                       {'spell': 'frighten', 'meaning': '惊吓, 惊恐', 'word_id': 899278, 'meaning_id': 1735},
+                       {'spell': 'relation', 'meaning': '关系, 联系, 亲戚, 亲属', 'word_id': 1061800, 'meaning_id': 1736},
+                       {'spell': 'agreement', 'meaning': '协议,协定', 'word_id': 753401, 'meaning_id': 1743},
+                       {'spell': 'risk', 'meaning': '冒险, 风险', 'word_id': 1069002, 'meaning_id': 1748},
+                       {'spell': 'centre', 'meaning': '中心', 'word_id': 806629, 'meaning_id': 1749},
+                       {'spell': 'shut', 'meaning': '关上, 关闭', 'word_id': 1088662, 'meaning_id': 1751},
+                       {'spell': 'thought', 'meaning': '思想, 想法', 'word_id': 1130826, 'meaning_id': 685},
+                       {'spell': 'information', 'meaning': '消息, 信息', 'word_id': 940351, 'meaning_id': 487, 'serial': 330},
+                       {'spell': 'bright', 'meaning': '聪明的', 'word_id': 793695, 'meaning_id': 1451, 'serial': 1048},
+                       {'spell': 'international', 'meaning': '国际的', 'word_id': 945460, 'meaning_id': 1683, 'serial': 1232},
+                       {'spell': 'shelf', 'meaning': '架子, 搁板', 'word_id': 1086743, 'meaning_id': 1838, 'serial': 1366},
+                       {'spell': 'cave', 'meaning': '洞穴, 山洞', 'word_id': 805431, 'meaning_id': 2167, 'serial': 1639},
+                       {'spell': 'gym', 'meaning': '健身房, 体育馆', 'word_id': 915473, 'meaning_id': 2217, 'serial': 1683},
+                       {'spell': 'properly', 'meaning': '适当地, 正确地', 'word_id': 1045343, 'meaning_id': 2257, 'serial': 1720},
+                       {'spell': 'platform', 'meaning': '平台', 'word_id': 1031256, 'meaning_id': 2269, 'serial': 1730},
+                       {'spell': 'sweep', 'meaning': '打扫, 清扫', 'word_id': 1118098, 'meaning_id': 2321, 'serial': 1775},
+                       {'spell': 'clinic', 'meaning': '诊所, 门诊部', 'word_id': 815699, 'meaning_id': 2471, 'serial': 1898},
+                       {'spell': 'sauce', 'meaning': '酱油, 调味料', 'word_id': 1076452, 'meaning_id': 2501, 'serial': 1927},
+                       {'spell': 'retell', 'meaning': '重讲, 复述', 'word_id': 1065717, 'meaning_id': 2546, 'serial': 1970},
+                       {'spell': 'specific', 'meaning': '具体的, 明确的', 'word_id': 1099668, 'meaning_id': 3089, 'serial': 2421},
+                       {'spell': 'religion', 'meaning': '宗教', 'word_id': 1062490, 'meaning_id': 3358, 'serial': 2626},
+                       {'spell': 'collapse', 'meaning': '倒塌, 崩溃', 'word_id': 819500, 'meaning_id': 3667, 'serial': 2872},
+                       {'spell': 'bare', 'meaning': '光秃秃的', 'word_id': 777035, 'meaning_id': 4592, 'serial': 3650},
+                       {'spell': 'defendant', 'meaning': '被告的, 被告人', 'word_id': 1174797, 'meaning_id': 4975, 'serial': 3979},
+                       {'spell': 'interact', 'meaning': '互相作用, 互动', 'word_id': 943776, 'meaning_id': 5117, 'serial': 4103},
+                       {'spell': 'fact', 'meaning': '事实, 真相', 'word_id': 882302, 'meaning_id': 425, 'serial': 289},
+                       {'spell': 'except', 'meaning': '除了…之外', 'word_id': 878228, 'meaning_id': 814, 'serial': 561},
+                       {'spell': 'opposite', 'meaning': '相反, 对面', 'word_id': 1008508, 'meaning_id': 1650, 'serial': 1207},
+                       {'spell': 'clerk', 'meaning': '职员, 店员', 'word_id': 815428, 'meaning_id': 1826, 'serial': 1354},
+                       {'spell': 'chief', 'meaning': '主要的,首要的', 'word_id': 810493, 'meaning_id': 2067, 'serial': 1552},
+                       {'spell': 'congratulation', 'meaning': '祝贺, 贺辞', 'word_id': 826539, 'meaning_id': 2187, 'serial': 1657},
+                       {'spell': 'chest', 'meaning': '大箱子', 'word_id': 810293, 'meaning_id': 2223, 'serial': 1689},
+                       {'spell': 'monitor', 'meaning': '班长', 'word_id': 988984, 'meaning_id': 2262, 'serial': 1724},
+                       {'spell': 'accurate', 'meaning': '正确的, 精确的', 'word_id': 747138, 'meaning_id': 2278, 'serial': 1739},
+                       {'spell': 'investigate', 'meaning': '调查, 研究', 'word_id': 947316, 'meaning_id': 2359, 'serial': 1806},
+                       {'spell': 'forecast', 'meaning': '预报, 预测', 'word_id': 895859, 'meaning_id': 2495, 'serial': 1921},
+                       {'spell': 'sausage', 'meaning': '香肠, 腊肠', 'word_id': 1076506, 'meaning_id': 2536, 'serial': 1961},
+                       {'spell': 'insurance', 'meaning': '保险', 'word_id': 943100, 'meaning_id': 3044, 'serial': 2380},
+                       {'spell': 'reveal', 'meaning': '揭示, 暴露, 展现', 'word_id': 1066342, 'meaning_id': 3246, 'serial': 2544},
+                       {'spell': 'perception', 'meaning': '观念, 知觉, 觉察', 'word_id': 1174551, 'meaning_id': 3516, 'serial': 2749},
+                       {'spell': 'violation', 'meaning': '妨碍, 侵犯, 违犯', 'word_id': 1174695, 'meaning_id': 4452, 'serial': 3528},
+                       {'spell': 'convey', 'meaning': '表达', 'word_id': 830280, 'meaning_id': 4931, 'serial': 3938},
+                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
+                       ]
+    shuffle(core_words_list)
+    core_words_chiose_list = sample(core_words_list,5)
+    core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words_chiose_list])
+
+    question = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
+
+要求:
+1.必须用提供的这个词义的单词,其他单词使用最简单最容易没有难度的单词。文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。选择题难度尽可能简单,参考中国小学生水平
+2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
+3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
+4.生成的文章要求120词左右,可以用\\n\\n字符分段,一般1-2个段落左右。第一段是文章标题。
+5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
+6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
+
+提供[单词组]:{core_words_meaning_str}
+"""
+
+    url = "http://170.106.108.95/v1/chat/completions"
+
+   
+    headers = {
+        "Authorization": f"Bearer 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",
+        "Content-Type": "application/json"
+    }
+
+   
+    data = {
+        "model": "gpt-4.1", 
+        "messages": [
+           
+            {"role": "user", "content": question}
+        ],
+        "max_tokens": 4000, 
+        "temperature": 1.2, 
+        "n":8,
+        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'}, 'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}}, 'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'}, 'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}}, 'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'}, 'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'}, 'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'}, 'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'}, 'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions', 'englishArticle', 'chineseArticle', 'allWordAmount'], 'title': 'Article', 'type': 'object'}}}
+    }
+
+   
+    response = httpx.post(url, headers=headers, json=data,timeout=300)
+    print(response.json())
+    return response.json()
+
 
 def download_word():
-    from make_docx_demo.data import test_json1
+    from make_docx_demo.data import test_json2
     params = {"document_format": 2, "scanpage_format": 1}
 
-    r = requests.post(f"{use_address}/make_word/vocabulary_assault", params=params, json=test_json1)
+    r = requests.post(f"{use_address}/make_word/vocabulary_assault", params=params, json=test_json2)
     r.raise_for_status()
     suffix = {1: "docx", 2: "pdf"}[params['document_format']]
     with open(f"test.{suffix}", "wb") as f:
@@ -193,12 +344,31 @@ def run_all_test_cese():
    
 
 
+@time_use
+def multi_request():
+    with ThreadPoolExecutor(max_workers=50) as executor:
+       
+        futures = [executor.submit(get_article2_1) for _ in range(30)]
+       
+
+       
+        wait(futures)
+        print("完成等待")
+        for index,future in enumerate(futures,start=1):
+            future.result() 
+            print(f"完成循环{index}")
+
+
 if __name__ == '__main__':
    
-   
+    multi_request()
 
+   
+   
 
    
-    print(get_article2_1())
+
    
    
+
+   

+ 1 - 1
tools/audio.py

@@ -82,7 +82,7 @@ class GetAudio:
         self.token = None
         self.lock = Lock()
 
-        self.re_compile = re.compile("[.!?;*]")
+        self.re_compile = re.compile("[.!?;*\"]")
 
    
     def upload_file_to_oss(self, word_or_hash_name, oss_file_name, local_file_path):

+ 1 - 1
tools/new_mysql.py

@@ -13,7 +13,7 @@ class MySQLUploader:
 
     def __new__(cls, *args, **kwargs):
         if not cls._instance:
-            cls._instance = super(MySQLUploader, cls).__new__(cls, *args, **kwargs)
+            cls._instance = super(MySQLUploader, cls).__new__(cls)
         return cls._instance
 
     def __init__(self, database='qbank_db'):

+ 1 - 1
tools/thread_pool_manager.py

@@ -2,4 +2,4 @@
 from concurrent.futures import ThreadPoolExecutor, wait
 
 
-pool_executor = ThreadPoolExecutor(max_workers=20)
+pool_executor = ThreadPoolExecutor(max_workers=200)