Browse Source

增加词义标注接口

xie 3 weeks ago
parent
commit
3c20162585
52 changed files with 1634 additions and 1782 deletions
  1. 5 9
      common/common_data.py
  2. 21 15
      common/split_text.py
  3. 4 4
      config/read_config.py
  4. 35 0
      core/api_article_annotation.py
  5. 22 22
      core/api_get_article.py
  6. 18 19
      core/api_get_article2.py
  7. 16 14
      core/api_get_article3.py
  8. 14 16
      core/api_get_audio.py
  9. 11 13
      core/api_get_spoken_language.py
  10. 7 14
      core/api_get_word.py
  11. 21 20
      core/api_routes_jwt.py
  12. 5 2
      core/respone_format.py
  13. 12 13
      data/get_all_exchange_words.py
  14. 8 8
      data/get_frequency_script.py
  15. 0 0
      data/json_word_frequency.json
  16. BIN
      data/春笋词义表.xlsx
  17. 15 18
      deepseek/ds_api.py
  18. 27 46
      deepseek/get_article3.py
  19. 203 0
      gpt/article_annotation.py
  20. 128 132
      gpt/chatgpt.py
  21. 110 139
      gpt/get_article.py
  22. 51 96
      gpt/get_article2.py
  23. 23 27
      gpt/gpt.py
  24. 67 36
      gpt/gpt_check.py
  25. 9 4
      gpt/query_oss_file.py
  26. 16 17
      main.py
  27. 19 15
      main_9000.py
  28. 3 4
      make_docx_demo/check_test_table/aaaaaaaaaa.py
  29. 7 6
      make_docx_demo/check_test_table/baidu_ocr.py
  30. 75 126
      make_docx_demo/check_test_table/image_preprocess.py
  31. 72 115
      make_docx_demo/check_test_table/image_preprocess2.py
  32. 4 8
      make_docx_demo/check_test_table/mark_ocr_loca.py
  33. 0 2
      make_docx_demo/data.py
  34. 38 42
      make_docx_demo/docx_other_func.py
  35. 9 11
      make_docx_demo/get_standard_data.py
  36. 114 169
      make_docx_demo/main_word.py
  37. 147 244
      make_docx_demo/main_word_applet.py
  38. 10 8
      make_docx_demo/new_word2pdf.py
  39. 11 25
      make_docx_demo/word2pdf.py
  40. 6 9
      make_docx_demo/word_component/make_rectangle.py
  41. 104 62
      mock/mock_request.py
  42. 2 1
      spoken_language/common/utils.py
  43. 4 7
      spoken_language/read_config.py
  44. 15 15
      spoken_language/soe/speaking_assessment.py
  45. 23 40
      spoken_language/soeexample.py
  46. 8 13
      tools/ali_log.py
  47. 36 62
      tools/audio.py
  48. 9 12
      tools/del_expire_file.py
  49. 27 18
      tools/loglog.py
  50. 16 39
      tools/new_mysql.py
  51. 25 42
      tools/sql_format.py
  52. 2 3
      tools/thread_pool_manager.py

+ 5 - 9
common/common_data.py

@@ -1,21 +1,17 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-import os
 import json
 import json
+import os
 
 
 SECRET_KEY = os.getenv("key")
 SECRET_KEY = os.getenv("key")
 
 
 try:
 try:
-    with open("data/json_word_frequency.json","r",encoding="utf-8") as f:
+    with open("data/json_word_frequency.json", "r", encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
         word_frequency = json.loads(f.read())
-except FileNotFoundError: 
+except FileNotFoundError:
     with open(r"C:\Users\pan\Desktop\demo\qback\data\json_word_frequency.json", "r", encoding="utf-8") as f:
     with open(r"C:\Users\pan\Desktop\demo\qback\data\json_word_frequency.json", "r", encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
         word_frequency = json.loads(f.read())
 
 
+all_json_words_set = {word for key, word in word_frequency.items()}
 
 
-all_json_words_set = {word for key,word in word_frequency.items()}
-
-
-with open("data/all_exchange_words.txt","r",encoding="utf-8") as f:
+with open("data/all_exchange_words.txt", "r", encoding="utf-8") as f:
     all_exchange_words = set(f.read().split("\n"))
     all_exchange_words = set(f.read().split("\n"))
-
-

+ 21 - 15
common/split_text.py

@@ -1,27 +1,36 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import re
 import re
 
 
-def split_text_to_word(text:str):
-    words_list = re.findall(r'\b[-\'\w]+\b',text)
+
+def split_text_to_word(text: str, split_hyphen=False):
+    """
+    split_hyphen: 是否分拆-连字符,默认不拆
+    """
+    if split_hyphen:
+        words_list = re.findall(r'\b[\'\w]+\b', text)
+    else:
+        words_list = re.findall(r'\b[-\'\w]+\b', text)
     return words_list
     return words_list
 
 
-def get_article_words_count(text:str):
+
+def get_article_words_count(text: str):
     return len(split_text_to_word(text))
     return len(split_text_to_word(text))
 
 
 
 
-def split_text_to_sentences(text:str) -> list:
-    sentences = re.split(r'(?<=[.!?;])', text) 
+def split_text_to_sentences(text: str) -> list:
+    sentences = re.split(r'(?<=[.!?;])', text)
     sentences = [i for i in sentences if i.replace(" ", "")]
     sentences = [i for i in sentences if i.replace(" ", "")]
     return sentences
     return sentences
 
 
 
 
-def split_text_to_word_punctuation(text:str):
-    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n',text)
+def split_text_to_word_punctuation(text: str):
+    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n', text)
     return word_punctuation_list
     return word_punctuation_list
 
 
-def is_word(single_word:str,strict:bool=False):
+
+def is_word(single_word: str, strict: bool = False):
     """strict 严格模式,默认不开。严格模式下,每个实体字符必须是字母。全部都是字母才算是单词
     """strict 严格模式,默认不开。严格模式下,每个实体字符必须是字母。全部都是字母才算是单词
-    非严格模式下,有一个字母就算是单词。即使是 op123
+    非严格模式下,有一个字母就算是单词。即使是 op123,it's
     """
     """
     single_word = single_word.strip()
     single_word = single_word.strip()
     if strict:
     if strict:
@@ -30,15 +39,12 @@ def is_word(single_word:str,strict:bool=False):
             return True
             return True
         return False
         return False
 
 
-    if re.search(r'[a-zA-Z]', single_word):
+    if re.search(r'[\'a-zA-Z]', single_word):
         return True
         return True
     return False
     return False
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-
     a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415"
     a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415"
-   
-   
-   
-    print(is_word("student34",strict=True))
+
+    print(is_word("student34", strict=True))

+ 4 - 4
config/read_config.py

@@ -3,15 +3,15 @@ import yaml
 
 
 
 
 def read_config():
 def read_config():
-   
-    with open("config/env.yaml", "r",encoding="utf-8") as file:
+    with open("config/env.yaml", "r", encoding="utf-8") as file:
         config = yaml.safe_load(file)
         config = yaml.safe_load(file)
         return config
         return config
 
 
-address = "https://dcjxb.yunzhixue.cn" if read_config()['env']=='product' else "http://dcjxbtest.yunzhixue.cn"
 
 
+address = "https://dcjxb.yunzhixue.cn" if read_config()['env'] == 'product' else "http://dcjxbtest.yunzhixue.cn"
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     import os
     import os
+
     os.chdir(r'C:\Users\86131\Desktop\demo\ai_qback')
     os.chdir(r'C:\Users\86131\Desktop\demo\ai_qback')
-    print(read_config()['env'])
+    print(read_config()['env'])

+ 35 - 0
core/api_article_annotation.py

@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+
+from fastapi import Request, APIRouter, Query
+from pydantic import BaseModel
+
+from core.respone_format import *
+from gpt.article_annotation import Annotation
+
+router_article_annotation = APIRouter()
+annotation_obj = Annotation()
+
+
+class Annotation(BaseModel):
+    english_text: str
+
+
+@router_article_annotation.post("/article/meaning/annotation")
+def post_annotation(json_data: Annotation, request: Request, ):
+    """词义标注的同步接口"""
+    json_data = json_data.model_dump()
+    english_text = json_data.get("english_text")
+    real_ip = request.headers.get("X-Real-IP", "0.0.0.0")
+
+    resp = annotation_obj.submit_task(
+        english_text=english_text,
+        real_ip=real_ip,
+    )
+    return resp_200(data=resp)
+
+
+@router_article_annotation.get("/article/query_annotation")
+async def query_annotation(task_id: int = Query(...)):
+    resp = await annotation_obj.query_result_by_taskid(task_id)
+    return resp

+ 22 - 22
core/api_get_article.py

@@ -1,44 +1,45 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
-from tools.loglog import logger
-from gpt.get_article import GetArticle
-from gpt.query_oss_file import query_file_content
-from core.respone_format import *
-
-from pydantic import BaseModel, ValidationError, conint
 from typing import List, Optional
 from typing import List, Optional
 
 
+from fastapi import Request, APIRouter
+from pydantic import BaseModel, conint
+
+from core.respone_format import *
+from gpt.get_article import GetArticle
+from gpt.query_oss_file import query_file_content
+from tools.loglog import logger
 
 
 router = APIRouter()
 router = APIRouter()
 get_article = GetArticle()
 get_article = GetArticle()
 
 
 
 
 class ArticleRequest(BaseModel):
 class ArticleRequest(BaseModel):
-    meaning_ids: List[conint(ge=1)] 
-    callback_url: Optional[str] = None 
-    demo_name: Optional[str] = "无" 
-    student_stage: Optional[int] = 1 
-    vocabulary: Optional[int] = 500 
-    class_id :Optional[int]
+    meaning_ids: List[conint(ge=1)]
+    callback_url: Optional[str] = None
+    demo_name: Optional[str] = "无"
+    student_stage: Optional[int] = 1
+    vocabulary: Optional[int] = 500
+    class_id: Optional[int]
+
 
 
 @router.post("/article")
 @router.post("/article")
-def post_article(json_data:ArticleRequest,request:Request):
-    real_ip = request.headers.get("X-Real-IP","localhost")
+def post_article(json_data: ArticleRequest, request: Request):
+    real_ip = request.headers.get("X-Real-IP", "localhost")
     words_meaning_ids: list = json_data.meaning_ids
     words_meaning_ids: list = json_data.meaning_ids
     callback_url = json_data.callback_url
     callback_url = json_data.callback_url
     demo_name = json_data.demo_name
     demo_name = json_data.demo_name
     student_stage = json_data.student_stage
     student_stage = json_data.student_stage
     vocabulary = json_data.vocabulary
     vocabulary = json_data.vocabulary
-    class_id = json_data.class_id 
+    class_id = json_data.class_id
 
 
     try:
     try:
         if not words_meaning_ids:
         if not words_meaning_ids:
             return resp_404(message="没有词义id")
             return resp_404(message="没有词义id")
 
 
-        r = get_article.submit_task(words_meaning_ids=words_meaning_ids,callback_url=callback_url,
-                                    real_ip=real_ip,demo_name=demo_name,
-                                    student_stage=student_stage,vocabulary=vocabulary,class_id=class_id)
-        return r if not isinstance(r,str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_ids=words_meaning_ids, callback_url=callback_url,
+                                    real_ip=real_ip, demo_name=demo_name,
+                                    student_stage=student_stage, vocabulary=vocabulary, class_id=class_id)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
 
     except Exception as e:
     except Exception as e:
         logger.error(f"{type(e).__name__},{e}")
         logger.error(f"{type(e).__name__},{e}")
@@ -46,7 +47,7 @@ def post_article(json_data:ArticleRequest,request:Request):
 
 
 
 
 @router.post("/query_oss_file")
 @router.post("/query_oss_file")
-def query_oss_file(json_data:dict,request:Request):
+def query_oss_file(json_data: dict, request: Request):
     oss_key = json_data.get("key")
     oss_key = json_data.get("key")
 
 
     if not oss_key:
     if not oss_key:
@@ -55,4 +56,3 @@ def query_oss_file(json_data:dict,request:Request):
     if j == 0:
     if j == 0:
         return resp_500(message="错误:没有这个文件")
         return resp_500(message="错误:没有这个文件")
     return JSONResponse(j)
     return JSONResponse(j)
-

+ 18 - 19
core/api_get_article2.py

@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path, Depends, BackgroundTasks
-from tools.loglog import logger,log_err_e
+from typing import List, Optional
+
+from fastapi import Request, APIRouter, BackgroundTasks
+from pydantic import BaseModel, Field, conint
 
 
 from core.respone_format import *
 from core.respone_format import *
 from gpt.get_article2 import GetArticle
 from gpt.get_article2 import GetArticle
-from pydantic import BaseModel, ValidationError, conint,Field,conint
-from typing import List, Optional,Literal
-import asyncio
+from tools.loglog import log_err_e
 
 
 router = APIRouter()
 router = APIRouter()
 
 
@@ -15,35 +15,34 @@ get_article = GetArticle()
 
 
 
 
 class Word(BaseModel):
 class Word(BaseModel):
-    meaning_id:int = Field(..., description="单词的词义id")
-    word_id:int = Field(..., description="单词id")
+    meaning_id: int = Field(..., description="单词的词义id")
+    word_id: int = Field(..., description="单词id")
     spell: str = Field(..., description="单词的拼写")
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
     meaning: str = Field(..., description="单词的意思")
 
 
 
 
 class ArticleRequest(BaseModel):
 class ArticleRequest(BaseModel):
     core_words: List[Word] = Field(..., description="单词列表")
     core_words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2 
-    demo_name: Optional[str] = "无" 
+    take_count: int = 2
+    demo_name: Optional[str] = "无"
     reading_level: conint(ge=1, le=30) = Field(default=10, description="阅读水平,默认值为10;[8,16,24]小学初中高中")
     reading_level: conint(ge=1, le=30) = Field(default=10, description="阅读水平,默认值为10;[8,16,24]小学初中高中")
-    article_length:int = Field(default=None,description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
-    exercise_id:int = Field(default=0,description="学案ID,用于日志快速定位")
+    article_length: int = Field(default=None, description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
+    exercise_id: int = Field(default=0, description="学案ID,用于日志快速定位")
 
 
 
 
 @router.post("/article/reading-comprehension")
 @router.post("/article/reading-comprehension")
 def post_article(
 def post_article(
-    json_data:ArticleRequest,
-    request:Request,
-    background_tasks: BackgroundTasks,
+        json_data: ArticleRequest,
+        request: Request,
+        background_tasks: BackgroundTasks,
 ):
 ):
-
     json_data = json_data.model_dump()
     json_data = json_data.model_dump()
-    real_ip = request.headers.get("X-Real-IP","0.0.0.0")
+    real_ip = request.headers.get("X-Real-IP", "0.0.0.0")
 
 
     core_words = json_data["core_words"]
     core_words = json_data["core_words"]
     take_count = json_data["take_count"]
     take_count = json_data["take_count"]
     demo_name = json_data["demo_name"]
     demo_name = json_data["demo_name"]
-    reading_level = json_data["reading_level"] 
+    reading_level = json_data["reading_level"]
     article_length = json_data["article_length"]
     article_length = json_data["article_length"]
     exercise_id = json_data["exercise_id"]
     exercise_id = json_data["exercise_id"]
 
 
@@ -58,8 +57,8 @@ def post_article(
             exercise_id=exercise_id,
             exercise_id=exercise_id,
             background_tasks=background_tasks
             background_tasks=background_tasks
         )
         )
-        return r if not isinstance(r,str) else resp_500(message=r)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
 
     except Exception as e:
     except Exception as e:
         log_err_e(e, msg="文章2接口错误/article/reading-comprehension;")
         log_err_e(e, msg="文章2接口错误/article/reading-comprehension;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 16 - 14
core/api_get_article3.py

@@ -1,17 +1,18 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
-from tools.loglog import logger,log_err_e
+from typing import List, Optional, Literal
+
+from fastapi import Request, APIRouter
+from pydantic import BaseModel, Field
 
 
 from core.respone_format import *
 from core.respone_format import *
 from deepseek.get_article3 import GetArticle
 from deepseek.get_article3 import GetArticle
-from pydantic import BaseModel, ValidationError, conint,Field
-from typing import List, Optional,Literal
-
+from tools.loglog import log_err_e
 
 
 router = APIRouter(tags=['deepseek接口'])
 router = APIRouter(tags=['deepseek接口'])
 get_article = GetArticle()
 get_article = GetArticle()
 
 
+
 class Word(BaseModel):
 class Word(BaseModel):
     spell: str = Field(..., description="单词的拼写")
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
     meaning: str = Field(..., description="单词的意思")
@@ -19,21 +20,22 @@ class Word(BaseModel):
 
 
 class ArticleRequest(BaseModel):
 class ArticleRequest(BaseModel):
     words: List[Word] = Field(..., description="单词列表")
     words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2 
-    student_stage: Literal[1, 2, 3] 
-    demo_name: Optional[str] = "无" 
+    take_count: int = 2
+    student_stage: Literal[1, 2, 3]
+    demo_name: Optional[str] = "无"
 
 
 
 
 @router.post("/article/reading-comprehension/deepseek")
 @router.post("/article/reading-comprehension/deepseek")
-def post_article(json_data:ArticleRequest,request:Request):
+def post_article(json_data: ArticleRequest, request: Request):
     json_data = json_data.dict()
     json_data = json_data.dict()
     real_ip = request.headers.get("X-Real-IP")
     real_ip = request.headers.get("X-Real-IP")
-    words,take_count,student_stage,demo_name = json_data["words"],json_data["take_count"],json_data["student_stage"],json_data["demo_name"]
+    words, take_count, student_stage, demo_name = json_data["words"], json_data["take_count"], json_data["student_stage"], json_data["demo_name"]
 
 
     try:
     try:
-        r = get_article.submit_task(words_meaning_list=words, take_count=take_count,student_stage=student_stage,real_ip=real_ip,demo_name=demo_name)
-        return r if not isinstance(r,str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_list=words, take_count=take_count, student_stage=student_stage, real_ip=real_ip,
+                                    demo_name=demo_name)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
 
     except Exception as e:
     except Exception as e:
-        log_err_e(e,msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        log_err_e(e, msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 14 - 16
core/api_get_audio.py

@@ -1,24 +1,25 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+import traceback
+
+from fastapi import Request, APIRouter, Query
 from fastapi.responses import StreamingResponse
 from fastapi.responses import StreamingResponse
-from tools.loglog import logger
-from tools.audio import GetAudio
+
 from core.respone_format import *
 from core.respone_format import *
-import traceback
+from tools.audio import GetAudio
+from tools.loglog import logger
 
 
 router = APIRouter()
 router = APIRouter()
 get_audio = GetAudio()
 get_audio = GetAudio()
 
 
-
-
 """
 """
 生成音频tts接口,传递两个参数,word和resp_type; word和resp_type回复设计:0返回oss路径,1 二进制文件,2 url三种;
 生成音频tts接口,传递两个参数,word和resp_type; word和resp_type回复设计:0返回oss路径,1 二进制文件,2 url三种;
 """
 """
 
 
+
 @router.get("/tts")
 @router.get("/tts")
-def get_tts(word:str=Query(None, max_length=300)):
+def get_tts(word: str = Query(None, max_length=300)):
     try:
     try:
-        f = get_audio.submit_task(word_or_phrase=word,resp_type=0) 
+        f = get_audio.submit_task(word_or_phrase=word, resp_type=0)
         r = f.result()
         r = f.result()
         if r:
         if r:
             return resp_200(data=r)
             return resp_200(data=r)
@@ -30,24 +31,21 @@ def get_tts(word:str=Query(None, max_length=300)):
 
 
 
 
 @router.post("/tts")
 @router.post("/tts")
-def get_tts(json_data:dict,request:Request):
-   
-
+def get_tts(json_data: dict, request: Request):
     word_or_phrase = json_data["text"]
     word_or_phrase = json_data["text"]
     resp_type = json_data.get("type")
     resp_type = json_data.get("type")
 
 
-   
     if len(word_or_phrase) >= 300:
     if len(word_or_phrase) >= 300:
         logger.error(f"单词或短语过长")
         logger.error(f"单词或短语过长")
         return resp_400(message="单词或短语过长")
         return resp_400(message="单词或短语过长")
-    if resp_type not in [0,1,2]:
+    if resp_type not in [0, 1, 2]:
         logger.error(f"type参数不是012")
         logger.error(f"type参数不是012")
         return resp_400(message="type参数不是012")
         return resp_400(message="type参数不是012")
 
 
     try:
     try:
-        f = get_audio.submit_task(word_or_phrase=word_or_phrase,resp_type=resp_type)
+        f = get_audio.submit_task(word_or_phrase=word_or_phrase, resp_type=resp_type)
         r = f.result()
         r = f.result()
-        if r and resp_type in [0,2]:
+        if r and resp_type in [0, 2]:
             return resp_200(data=r)
             return resp_200(data=r)
         if r and resp_type == 1:
         if r and resp_type == 1:
             return StreamingResponse(content=r, media_type='audio/mpeg')
             return StreamingResponse(content=r, media_type='audio/mpeg')
@@ -57,4 +55,4 @@ def get_tts(json_data:dict,request:Request):
         traceback_str = traceback.format_exc()
         traceback_str = traceback.format_exc()
         logger.error(traceback_str)
         logger.error(traceback_str)
         logger.error(f"{type(e).__name__},{e}")
         logger.error(f"{type(e).__name__},{e}")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 11 - 13
core/api_get_spoken_language.py

@@ -1,34 +1,32 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path,UploadFile,File
-from tools.loglog import logger,log_err_e
-from spoken_language.soeexample import spoken_result,make_spoken
 from random import randint
 from random import randint
-from core.respone_format import *
 
 
+from fastapi import Form, Request, APIRouter, UploadFile, File
+
+from core.respone_format import *
+from spoken_language.soeexample import make_spoken
+from tools.loglog import logger, log_err_e
 
 
 router = APIRouter()
 router = APIRouter()
 
 
 
 
 @router.post("/spoken_language")
 @router.post("/spoken_language")
-async def post_article(request:Request,url=Form(""),file: UploadFile = File(None),text=Form(...)):
-
+async def post_article(request: Request, url=Form(""), file: UploadFile = File(None), text=Form(...)):
     if not url and not file:
     if not url and not file:
         logger.error("错误:请上传mp3文件url参数或者二进制文件file参数")
         logger.error("错误:请上传mp3文件url参数或者二进制文件file参数")
         return resp_404(message="错误:请上传mp3文件url参数或者二进制文件file参数")
         return resp_404(message="错误:请上传mp3文件url参数或者二进制文件file参数")
     try:
     try:
-        task_id = randint(10000,99999)
-       
+        task_id = randint(10000, 99999)
+
         if file:
         if file:
             file_content = await file.read()
             file_content = await file.read()
         else:
         else:
             file_content = None
             file_content = None
-        data:dict = make_spoken(task_id,url,file_content,text)
+        data: dict = make_spoken(task_id, url, file_content, text)
         if data:
         if data:
             logger.success(f"完成spoken_language请求:{data}")
             logger.success(f"完成spoken_language请求:{data}")
-           
+
             return data
             return data
     except Exception as e:
     except Exception as e:
-        log_err_e(e,msg="口语评测接口")
+        log_err_e(e, msg="口语评测接口")
         return resp_500(message=f"{type(e).__name__},{e}")
         return resp_500(message=f"{type(e).__name__},{e}")
-
-

+ 7 - 14
core/api_get_word.py

@@ -1,11 +1,7 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
-from fastapi.responses import FileResponse,PlainTextResponse
-from tools.loglog import logger
-from tools.audio import GetAudio
-from core.respone_format import *
-import traceback
+from fastapi import Request, APIRouter, Query
+from fastapi.responses import FileResponse, PlainTextResponse
 
 
 from make_docx_demo.main_word_applet import start_make_word as s2
 from make_docx_demo.main_word_applet import start_make_word as s2
 
 
@@ -28,15 +24,12 @@ def make_word(json_data:dict,request:Request,
 """
 """
 
 
 
 
-
 @router.post("/make_word/vocabulary_assault")
 @router.post("/make_word/vocabulary_assault")
-def make_word(json_data:dict,request:Request,
-            document_format:int=Query(1,description="1:docx;2.pdf"),
-            scanpage_format:int=Query(1,description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
-
-   
+def make_word(json_data: dict, request: Request,
+              document_format: int = Query(1, description="1:docx;2.pdf"),
+              scanpage_format: int = Query(1, description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
     headers = {"Content-Type": "application/octet-stream"}
     headers = {"Content-Type": "application/octet-stream"}
     if path := s2(json_data, document_format, scanpage_format):
     if path := s2(json_data, document_format, scanpage_format):
-        return FileResponse(path=path,headers=headers, media_type='application/octet-stream')
+        return FileResponse(path=path, headers=headers, media_type='application/octet-stream')
     else:
     else:
-        return PlainTextResponse(status_code=500,content="服务器内部错误")
+        return PlainTextResponse(status_code=500, content="服务器内部错误")

+ 21 - 20
core/api_routes_jwt.py

@@ -1,14 +1,16 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter
-import jwt
-from jwt.exceptions import ExpiredSignatureError,DecodeError,InvalidAlgorithmError
-from core.respone_format import *
-from tools.sql_format import UserCRUD
-import datetime
 import asyncio
 import asyncio
-from tools.loglog import logger
+import datetime
 import traceback
 import traceback
+
+import jwt
+from fastapi import Form, Request, APIRouter
+from jwt.exceptions import ExpiredSignatureError, DecodeError, InvalidAlgorithmError
+
 from common.common_data import SECRET_KEY
 from common.common_data import SECRET_KEY
+from core.respone_format import *
+from tools.loglog import logger
+from tools.sql_format import UserCRUD
 
 
 router = APIRouter()
 router = APIRouter()
 user_crud = UserCRUD()
 user_crud = UserCRUD()
@@ -29,7 +31,6 @@ def create_access_token(username: str):
         logger.error(f"{type(e).__name__}, {e}")
         logger.error(f"{type(e).__name__}, {e}")
 
 
 
 
-
 def verify_token_sync(token: str):
 def verify_token_sync(token: str):
     if not token:
     if not token:
         return 1
         return 1
@@ -38,10 +39,10 @@ def verify_token_sync(token: str):
         if not decoded_payload.get("username"):
         if not decoded_payload.get("username"):
             return 2
             return 2
         else:
         else:
-            return 0 
+            return 0
     except ExpiredSignatureError:
     except ExpiredSignatureError:
         return 3
         return 3
-    except (InvalidAlgorithmError,DecodeError):
+    except (InvalidAlgorithmError, DecodeError):
         return 4
         return 4
 
 
 
 
@@ -49,6 +50,7 @@ async def verify_token(token: str):
     loop = asyncio.get_event_loop()
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(None, verify_token_sync, token)
     return await loop.run_in_executor(None, verify_token_sync, token)
 
 
+
 async def verify_token2(token):
 async def verify_token2(token):
     msg_verify_code = await verify_token(token)
     msg_verify_code = await verify_token(token)
     if msg_verify_code != 0:
     if msg_verify_code != 0:
@@ -66,13 +68,13 @@ async def verify_token2(token):
 
 
 @router.post("/user/login")
 @router.post("/user/login")
 async def get_token(username: str = Form(...), password: str = Form(...)):
 async def get_token(username: str = Form(...), password: str = Form(...)):
-    user_info = user_crud.get_userinfo_by_account(username) 
+    user_info = user_crud.get_userinfo_by_account(username)
     if user_info:
     if user_info:
         userid, account, true_pwd, uname, create_time = user_info
         userid, account, true_pwd, uname, create_time = user_info
     else:
     else:
         return resp_400(message="user does not exist")
         return resp_400(message="user does not exist")
 
 
-    if password==true_pwd:
+    if password == true_pwd:
         access_token = create_access_token(username)
         access_token = create_access_token(username)
         return_data = {"access_token": access_token}
         return_data = {"access_token": access_token}
         return resp_200(data=return_data)
         return resp_200(data=return_data)
@@ -81,30 +83,29 @@ async def get_token(username: str = Form(...), password: str = Form(...)):
 
 
 
 
 @router.get("/user")
 @router.get("/user")
-async def get_user(request:Request):
-   
+async def get_user(request: Request):
     token = request.headers.get("Authorization")
     token = request.headers.get("Authorization")
     try:
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username") 
+        account = decoded_payload.get("username")
         user_info = user_crud.get_userinfo_by_account(account=account)
         user_info = user_crud.get_userinfo_by_account(account=account)
         userid, account, true_pwd, uname, create_time = user_info
         userid, account, true_pwd, uname, create_time = user_info
-        data = {"id":userid,"name":uname,"account":account,"create_time":create_time}
+        data = {"id": userid, "name": uname, "account": account, "create_time": create_time}
         return resp_200(data=data)
         return resp_200(data=data)
     except ExpiredSignatureError:
     except ExpiredSignatureError:
         return resp_401(message="The token has expired")
         return resp_401(message="The token has expired")
-    except (InvalidAlgorithmError,DecodeError):
+    except (InvalidAlgorithmError, DecodeError):
         return resp_400(message="Token decoding error")
         return resp_400(message="Token decoding error")
     except Exception as e:
     except Exception as e:
         return resp_400(message=f"Error in get user information.{e}")
         return resp_400(message=f"Error in get user information.{e}")
 
 
 
 
 @router.post("/user/logout")
 @router.post("/user/logout")
-async def get_token(request:Request):
+async def get_token(request: Request):
     token = request.headers.get("Authorization")
     token = request.headers.get("Authorization")
     try:
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username") 
+        account = decoded_payload.get("username")
         logger.info(f"账号:{account}注销成功")
         logger.info(f"账号:{account}注销成功")
         data = {"result": "注销成功"}
         data = {"result": "注销成功"}
         return resp_200(data=data)
         return resp_200(data=data)
@@ -113,4 +114,4 @@ async def get_token(request:Request):
     except (InvalidAlgorithmError, DecodeError):
     except (InvalidAlgorithmError, DecodeError):
         return resp_400(message="Token decoding error")
         return resp_400(message="Token decoding error")
     except Exception as e:
     except Exception as e:
-        return resp_400(message=f"User logout error.{e}")
+        return resp_400(message=f"User logout error.{e}")

+ 5 - 2
core/respone_format.py

@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
+from typing import Union
+
 from fastapi import status
 from fastapi import status
 from fastapi.responses import JSONResponse
 from fastapi.responses import JSONResponse
-from typing import Union
 
 
 
 
 def resp_200(*, data: Union[list, dict, str]) -> JSONResponse:
 def resp_200(*, data: Union[list, dict, str]) -> JSONResponse:
@@ -17,12 +18,14 @@ def resp_400(*, message: str = "Bad Request", data: Union[list, dict, str] = Non
         content={"code": 400, "message": message, "data": data}
         content={"code": 400, "message": message, "data": data}
     )
     )
 
 
+
 def resp_401(*, message: str = "The token has expired", data: Union[list, dict, str] = None) -> JSONResponse:
 def resp_401(*, message: str = "The token has expired", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
     return JSONResponse(
         status_code=status.HTTP_401_UNAUTHORIZED,
         status_code=status.HTTP_401_UNAUTHORIZED,
         content={"code": 401, "message": message, "data": data}
         content={"code": 401, "message": message, "data": data}
     )
     )
 
 
+
 def resp_404(*, message: str = "Not Found", data: Union[list, dict, str] = None) -> JSONResponse:
 def resp_404(*, message: str = "Not Found", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
     return JSONResponse(
         status_code=status.HTTP_404_NOT_FOUND,
         status_code=status.HTTP_404_NOT_FOUND,
@@ -34,4 +37,4 @@ def resp_500(*, message: str = "Internal Server Error", data: Union[list, dict,
     return JSONResponse(
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={"code": 500, "message": message, "data": data}
         content={"code": 500, "message": message, "data": data}
-    )
+    )

+ 12 - 13
data/get_all_exchange_words.py

@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from tools.new_mysql import MySQLUploader
-import sys
 import os
 import os
+import sys
+
+from tools.new_mysql import MySQLUploader
 
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
 
 m = MySQLUploader()
 m = MySQLUploader()
 s = "select Word,InflectedWordSpelling,Properties from dictionary_exchange"
 s = "select Word,InflectedWordSpelling,Properties from dictionary_exchange"
 r = m.query_data(s)
 r = m.query_data(s)
-m.close_connection()
 
 
 all_exchange_words = set()
 all_exchange_words = set()
 all_exchange_words_dict = {}
 all_exchange_words_dict = {}
@@ -16,15 +16,14 @@ all_prototype_deformation_dict = {}
 prototype_deformation_dict2 = {}
 prototype_deformation_dict2 = {}
 
 
 for i in r:
 for i in r:
-   
-    prototype,deformation,properties= [i[0],i[1],i[2]]
-   
-    all_exchange_words.update({prototype,deformation})
+
+    prototype, deformation, properties = [i[0], i[1], i[2]]
+
+    all_exchange_words.update({prototype, deformation})
 
 
     if properties == "原型":
     if properties == "原型":
         prototype_deformation_dict2[prototype] = deformation
         prototype_deformation_dict2[prototype] = deformation
 
 
-   
     if deformation not in all_prototype_deformation_dict:
     if deformation not in all_prototype_deformation_dict:
         all_prototype_deformation_dict[deformation] = prototype
         all_prototype_deformation_dict[deformation] = prototype
 
 
@@ -34,29 +33,29 @@ for i in r:
         all_exchange_words_dict[prototype].append(deformation)
         all_exchange_words_dict[prototype].append(deformation)
 
 
 
 
-
-def word_to_prototype(word:str) -> str:
+def word_to_prototype(word: str) -> str:
     """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
     """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
     if word in all_exchange_words_dict:
     if word in all_exchange_words_dict:
         return word
         return word
     elif word.lower() in all_exchange_words_dict:
     elif word.lower() in all_exchange_words_dict:
         return word.lower()
         return word.lower()
-   
+
     elif word in all_prototype_deformation_dict:
     elif word in all_prototype_deformation_dict:
         w = all_prototype_deformation_dict[word]
         w = all_prototype_deformation_dict[word]
         if w in prototype_deformation_dict2:
         if w in prototype_deformation_dict2:
             w = prototype_deformation_dict2[w]
             w = prototype_deformation_dict2[w]
         return w
         return w
+
     else:
     else:
         return word
         return word
 
 
 
 
 def get_word_exchange_list(word) -> list:
 def get_word_exchange_list(word) -> list:
     prototype_word = word_to_prototype(word)
     prototype_word = word_to_prototype(word)
-    all_exchange_words_list = all_exchange_words_dict.get(prototype_word,[])
+    all_exchange_words_list = all_exchange_words_dict.get(prototype_word, [])
     return all_exchange_words_list
     return all_exchange_words_list
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     print(word_to_prototype("was"))
     print(word_to_prototype("was"))
-    print(word_to_prototype("made"))
+    print(word_to_prototype("made"))

+ 8 - 8
data/get_frequency_script.py

@@ -1,18 +1,18 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from openpyxl.worksheet.worksheet import Worksheet
-from openpyxl import load_workbook
 import json
 import json
 
 
-wb = load_workbook(r"单词词义表.xlsx",read_only=True)
+from openpyxl import load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+
+wb = load_workbook(r"单词词义表.xlsx", read_only=True)
 word_dict = {}
 word_dict = {}
 ws: Worksheet = wb["Sheet1"]
 ws: Worksheet = wb["Sheet1"]
 for row in ws.values:
 for row in ws.values:
-    _,word,frequency = row
+    _, word, frequency = row
     frequency = int(frequency)
     frequency = int(frequency)
-   
+
     word_dict[frequency] = word
     word_dict[frequency] = word
 wb.close()
 wb.close()
-with open("json_word_frequency.json",mode="w",encoding="utf-8") as f:
-   
+with open("json_word_frequency.json", mode="w", encoding="utf-8") as f:
     write_data = json.dumps(word_dict)
     write_data = json.dumps(word_dict)
-    f.write(write_data)
+    f.write(write_data)

File diff suppressed because it is too large
+ 0 - 0
data/json_word_frequency.json


BIN
data/春笋词义表.xlsx


+ 15 - 18
deepseek/ds_api.py

@@ -1,11 +1,10 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import json
 import json
-
-from openai import OpenAI
 import os
 import os
-from tools.loglog import SimpleLogger
 
 
+from openai import OpenAI
 
 
+from tools.loglog import SimpleLogger
 
 
 
 
 class DS:
 class DS:
@@ -16,7 +15,7 @@ class DS:
         )
         )
         self.logger = SimpleLogger(base_file_name="deepseek")
         self.logger = SimpleLogger(base_file_name="deepseek")
 
 
-    def write_log(self, message:str, log_type="info"):
+    def write_log(self, message: str, log_type="info"):
         """写入日志"""
         """写入日志"""
         log_methods = {
         log_methods = {
             "warning": self.logger.warning,
             "warning": self.logger.warning,
@@ -35,9 +34,9 @@ class DS:
             self.write_log(f"Response validation error: {e}", log_type="error")
             self.write_log(f"Response validation error: {e}", log_type="error")
             return False
             return False
 
 
-    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8, 
-                   json_resp: bool = False, real_ip: str = "", demo_name: str = "", 
-                   max_tokens: int = 5192) -> str:
+    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8,
+                    json_resp: bool = False, real_ip: str = "", demo_name: str = "",
+                    max_tokens: int = 5192) -> str:
         """获取AI生成的文章
         """获取AI生成的文章
         
         
         Args:
         Args:
@@ -56,29 +55,27 @@ class DS:
         if sys_prompt:
         if sys_prompt:
             messages.append({'role': 'system', 'content': sys_prompt})
             messages.append({'role': 'system', 'content': sys_prompt})
         messages.append({'role': 'user', 'content': user_prompt})
         messages.append({'role': 'user', 'content': user_prompt})
-        
+
         response_format = {"type": "json_object"} if json_resp else {"type": "text"}
         response_format = {"type": "json_object"} if json_resp else {"type": "text"}
-        
-       
+
         resp = ""
         resp = ""
         for _ in range(3):
         for _ in range(3):
             completion = self.client.chat.completions.create(
             completion = self.client.chat.completions.create(
-                model="deepseek-v3", 
+                model="deepseek-v3",
                 messages=messages,
                 messages=messages,
                 temperature=temperature,
                 temperature=temperature,
                 response_format=response_format,
                 response_format=response_format,
-                max_tokens=max_tokens 
+                max_tokens=max_tokens
             )
             )
             resp = completion.choices[0].message.content
             resp = completion.choices[0].message.content
             if self.check_article_response(resp):
             if self.check_article_response(resp):
                 break
                 break
-        
-       
+
         if sys_prompt and resp:
         if sys_prompt and resp:
             self.write_log(sys_prompt)
             self.write_log(sys_prompt)
         self.write_log(user_prompt)
         self.write_log(user_prompt)
         self.write_log(resp)
         self.write_log(resp)
-        
+
         return resp
         return resp
 
 
 
 
@@ -100,11 +97,11 @@ if __name__ == '__main__':
 
 
 """
 """
     ds = DS()
     ds = DS()
-    resp = ds.get_article(user_prompt=p,json_resp=True)
+    resp = ds.get_article(user_prompt=p, json_resp=True)
     print(resp)
     print(resp)
     print()
     print()
 
 
-    print(resp.replace(r'\"n','\n').replace(r"\\n",'\n'))
+    print(resp.replace(r'\"n', '\n').replace(r"\\n", '\n'))
     print()
     print()
 
 
-    print(json.loads(resp))
+    print(json.loads(resp))

+ 27 - 46
deepseek/get_article3.py

@@ -1,22 +1,19 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
-import re
 import json
 import json
-from deepseek.ds_api import DS
-
-from tools.new_mysql import MySQLUploader
-from tools.loglog import logger, log_err_e
-from tools.thread_pool_manager import pool_executor
-from common.common_data import all_exchange_words
-from common.split_text import split_text_to_word
-
-from pydantic import BaseModel
-from cachetools import TTLCache
+import re
 from concurrent.futures import wait
 from concurrent.futures import wait
 from random import randint, shuffle
 from random import randint, shuffle
-import json
+
 import requests
 import requests
 
 
+from common.common_data import all_exchange_words
+from common.split_text import split_text_to_word
+from deepseek.ds_api import DS
+from tools.loglog import logger, log_err_e
+from tools.new_mysql import MySQLUploader
+from tools.thread_pool_manager import pool_executor
+
 
 
 def get_article_difficulty(article) -> int:
 def get_article_difficulty(article) -> int:
     """获取文章的难度值"""
     """获取文章的难度值"""
@@ -58,23 +55,20 @@ def find_interval(number):
 def parse_question(question_block):
 def parse_question(question_block):
     question_info = {}
     question_info = {}
 
 
-   
     question_match = re.search(r'问题:\s*(.*)', question_block)
     question_match = re.search(r'问题:\s*(.*)', question_block)
     if question_match:
     if question_match:
         question_info['trunk'] = question_match.group(1).strip()
         question_info['trunk'] = question_match.group(1).strip()
 
 
-   
     analysis_match = re.search(r'解析:\s*(.*)', question_block)
     analysis_match = re.search(r'解析:\s*(.*)', question_block)
     if analysis_match:
     if analysis_match:
         question_info['analysis'] = analysis_match.group(1).strip()
         question_info['analysis'] = analysis_match.group(1).strip()
 
 
-   
     options_match = re.search(r'选项:(.*)', question_block)
     options_match = re.search(r'选项:(.*)', question_block)
     if options_match:
     if options_match:
         options_text = options_match.group(1).strip()
         options_text = options_match.group(1).strip()
         options_list = re.split(r'\s*[BCDA]\.\s*', options_text)[1:]
         options_list = re.split(r'\s*[BCDA]\.\s*', options_text)[1:]
         candidates = []
         candidates = []
-        for i, option_text in enumerate(options_list, start=65): 
+        for i, option_text in enumerate(options_list, start=65):
             label = chr(i)
             label = chr(i)
             text = option_text.strip()
             text = option_text.strip()
             candidates.append({
             candidates.append({
@@ -84,7 +78,6 @@ def parse_question(question_block):
             })
             })
         question_info['candidates'] = candidates
         question_info['candidates'] = candidates
 
 
-   
     answer_match = re.search(r'答案:([ABCD])', question_block)
     answer_match = re.search(r'答案:([ABCD])', question_block)
     if answer_match and 'candidates' in question_info:
     if answer_match and 'candidates' in question_info:
         correct_label = answer_match.group(1)
         correct_label = answer_match.group(1)
@@ -101,27 +94,23 @@ class GetArticle:
         self.ds = DS()
         self.ds = DS()
 
 
         self.callback_url_dict = {}
         self.callback_url_dict = {}
-        self.real_ip_dict = {} 
+        self.real_ip_dict = {}
         self.demo_name = {}
         self.demo_name = {}
 
 
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
         all_exchange_words.update(self.punctuation)
 
 
-
-   
     def parser_insert_to_mysql(self, resp_result):
     def parser_insert_to_mysql(self, resp_result):
         for single_article in resp_result['articles']:
         for single_article in resp_result['articles']:
-           
+
             article = single_article['body']
             article = single_article['body']
             article_json = json.dumps(single_article)
             article_json = json.dumps(single_article)
-            difficult_value = find_interval(get_article_difficulty(article)) 
+            difficult_value = find_interval(get_article_difficulty(article))
             if not difficult_value:
             if not difficult_value:
                 logger.error("文章难度等级为0;")
                 logger.error("文章难度等级为0;")
             sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
             sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
             self.m.execute_(sql, (article_json, difficult_value))
             self.m.execute_(sql, (article_json, difficult_value))
 
 
-   
     def submit_task(self, words_meaning_list: list, take_count: int, student_stage: int, real_ip: str, demo_name: str):
     def submit_task(self, words_meaning_list: list, take_count: int, student_stage: int, real_ip: str, demo_name: str):
         """
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
@@ -130,7 +119,7 @@ class GetArticle:
         demo_name: 项目名称
         demo_name: 项目名称
         """
         """
         task_id = randint(10000000, 99999999)
         task_id = randint(10000000, 99999999)
-       
+
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
         logger.info(f"生成文章id。task_id:{task_id}。词义组:{words_meaning_str}.")
         logger.info(f"生成文章id。task_id:{task_id}。词义组:{words_meaning_str}.")
 
 
@@ -138,9 +127,9 @@ class GetArticle:
         self.demo_name[task_id] = demo_name
         self.demo_name[task_id] = demo_name
 
 
         try:
         try:
-           
+
             resp_result = self.run_task(words_meaning_list, task_id, take_count, student_stage)
             resp_result = self.run_task(words_meaning_list, task_id, take_count, student_stage)
-            self.parser_insert_to_mysql(resp_result) 
+            self.parser_insert_to_mysql(resp_result)
             return resp_result
             return resp_result
         except Exception as e:
         except Exception as e:
             err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
             err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
@@ -148,31 +137,28 @@ class GetArticle:
 
 
             return err_msg
             return err_msg
 
 
-   
     def get_article(self, words_meaning_list, student_stage, task_id, take_count) -> dict:
     def get_article(self, words_meaning_list, student_stage, task_id, take_count) -> dict:
         diffculty_control = {
         diffculty_control = {
             1: {"grade": "小学", "article_word_count": 60, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": 1,
             1: {"grade": "小学", "article_word_count": 60, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": 1,
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
-                "choice_desc":"选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
+                "choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
             2: {"grade": "初中", "article_word_count": 200, "desc_difficulty": "简单、常见、难度低", "paragraph_count": 3,
             2: {"grade": "初中", "article_word_count": 200, "desc_difficulty": "简单、常见、难度低", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
-                "choice_desc":"选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
+                "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
             3: {"grade": "高中", "article_word_count": 300, "desc_difficulty": "常见、高中难度的", "paragraph_count": 3,
             3: {"grade": "高中", "article_word_count": 300, "desc_difficulty": "常见、高中难度的", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-4,雅思5分这样的难度标准。",
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-4,雅思5分这样的难度标准。",
-                "choice_desc":"选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
+                "choice_desc": "选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
         }
         }
         take_count_dict = {0: "", 1: "一", 2: "二", 3: "三", 4: "四", 5: "五", 6: "六", 7: "七", 8: "八"}
         take_count_dict = {0: "", 1: "一", 2: "二", 3: "三", 4: "四", 5: "五", 6: "六", 7: "七", 8: "八"}
         different_cou = take_count_dict.get(take_count, "")
         different_cou = take_count_dict.get(take_count, "")
 
 
-        grade = diffculty_control[student_stage]["grade"] 
-        select_word_count = diffculty_control[student_stage]["article_word_count"] 
-        select_diffculty = diffculty_control[student_stage]["desc_difficulty"] 
-        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"] 
-        desc2 = diffculty_control[student_stage]["desc2"] 
-        choice_desc = diffculty_control[student_stage]["choice_desc"] 
-       
+        grade = diffculty_control[student_stage]["grade"]
+        select_word_count = diffculty_control[student_stage]["article_word_count"]
+        select_diffculty = diffculty_control[student_stage]["desc_difficulty"]
+        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"]
+        desc2 = diffculty_control[student_stage]["desc2"]
+        choice_desc = diffculty_control[student_stage]["choice_desc"]
 
 
-       
         shuffle(words_meaning_list)
         shuffle(words_meaning_list)
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
 
 
@@ -195,9 +181,8 @@ class GetArticle:
         try:
         try:
             real_ip = self.real_ip_dict[task_id]
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name,max_tokens=8000))
+            r_json = json.loads(self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name, max_tokens=8000))
 
 
-           
             r_json["body"] = r_json["title"] + "\n\n" + r_json["english"]
             r_json["body"] = r_json["title"] + "\n\n" + r_json["english"]
             del r_json["title"]
             del r_json["title"]
 
 
@@ -219,12 +204,11 @@ class GetArticle:
 
 
             resp_text = self.ds.get_article(q_choice_question, temperature=1, real_ip=real_ip, demo_name=demo_name, max_tokens=8000)
             resp_text = self.ds.get_article(q_choice_question, temperature=1, real_ip=real_ip, demo_name=demo_name, max_tokens=8000)
             questions = resp_text.strip().split('\n\n')
             questions = resp_text.strip().split('\n\n')
-           
+
             parsed_questions = [parse_question(q) for q in questions]
             parsed_questions = [parse_question(q) for q in questions]
 
 
             json_data = {"questions": parsed_questions}
             json_data = {"questions": parsed_questions}
 
 
-           
             allWordAmount = 0
             allWordAmount = 0
             allWordAmount += len(split_text_to_word(r_json["english"]))
             allWordAmount += len(split_text_to_word(r_json["english"]))
             for i in json_data["questions"]:
             for i in json_data["questions"]:
@@ -239,8 +223,6 @@ class GetArticle:
         except Exception as e:
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
 
-
-   
     def run_get_article_task(self, words_meaning_list, task_id, take_count, student_stage) -> dict:
     def run_get_article_task(self, words_meaning_list, task_id, take_count, student_stage) -> dict:
         """
         """
         :param words_meaning_list: 数据库内查出来的单词和词义的列表
         :param words_meaning_list: 数据库内查出来的单词和词义的列表
@@ -258,7 +240,6 @@ class GetArticle:
             return_json["articles"].append(t.result())
             return_json["articles"].append(t.result())
         return return_json
         return return_json
 
 
-   
     def run_task(self, words_meaning_list, task_id, take_count, student_stage):
     def run_task(self, words_meaning_list, task_id, take_count, student_stage):
         try:
         try:
             outside_json = self.run_get_article_task(words_meaning_list, task_id, take_count, student_stage)
             outside_json = self.run_get_article_task(words_meaning_list, task_id, take_count, student_stage)

+ 203 - 0
gpt/article_annotation.py

@@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+
+
+"""
+1.对拿到的文章或句子,进行单词的切割;考虑,如何是大文本应该进行小段切割,防止巨量文本的传入;遇到巨量文本,切割后分多个问题,调用正常的运行流程;把custom_id的后缀加上123456标记
+2.判断文章是否有缩写;将缩写还原,缩写标注第一个单词;例如it’s 标注it的词义id; 缩写获得的形式,写入mysql的缩写表
+3.获取每个单词的词义数据包;
+
+20250522
+文章标注接口,分为2个接口,传文本同步返回,传文章对象异步返回。
+下面这些情况,暂时不标注;1.缩写  2.连字符  3.春笋词义表内没有数据
+注意:传入的文本将使用空格切分,连在一起的字符会被当做一个单词处理;
+"""
+
+import json
+from random import randint
+
+from cachetools import TTLCache
+from openpyxl import load_workbook
+
+from core.respone_format import *
+from data.get_all_exchange_words import word_to_prototype
+from gpt.chatgpt import get_annotation_gpt_pydantic
+from tools.loglog import log_err_e, logger
+from tools.thread_pool_manager import pool_executor
+
+
+class Annotation:
+
+    def __init__(self):
+
+        self.all_task_data: dict[int, list] = {}
+
+        self.all_task_result = TTLCache(maxsize=1000, ttl=3600)
+
+        self.word_meaning_dict: dict[str, list[tuple[int, str, str]]] = {}
+
+        self.prototype_words = set()
+        self.change_prototype_dict = {}
+
+        self.get_excel_meaning_data()
+        self.get_excel_change_data()
+
+    def submit_task(self, english_text, real_ip):
+        task_id = randint(10000000, 99999999)
+        logger.info(f"/article/annotation 生成id。task_id:{task_id},real_ip:{real_ip}")
+
+        f = pool_executor.submit(self.main_annotation, task_id, english_text)
+        r = f.result()
+
+        return r
+
+    def __run(self):
+
+        for task_id, task_data in self.all_task_data.items():
+            english_text, = task_data
+            self.main_annotation(task_id, english_text)
+
+    def main_annotation(self, task_id: int, english_text: str):
+
+        split_words = english_text.split()
+
+        meanings_data = self.query_meanings_data(split_words=split_words)
+
+        result_annotation = self.__ai_annotation(english_text=english_text, meanings_data=meanings_data)
+
+        self.all_task_result[task_id] = result_annotation
+
+        return result_annotation
+
+    async def query_result_by_taskid(self, task_id):
+
+        if task_id in self.all_task_result:
+            r = self.all_task_result[task_id]
+            return resp_200(data=r)
+
+        return resp_200(data={})
+
+    def get_excel_meaning_data(self):
+        """读取外部的春笋词义表,结构化到字典;单词为键,值[((词义id,中文词义))]"""
+        spring_bamboo_meaning_path = "data/春笋词义表.xlsx"
+        wb = load_workbook(spring_bamboo_meaning_path, read_only=True, data_only=True)
+        ws = wb.active
+        try:
+            for index, row in enumerate(ws.values, start=1):
+                if index == 1:
+                    continue
+                word = row[3]
+                id_and_meaning = (row[0], word, row[2])
+                if word not in self.word_meaning_dict:
+                    self.word_meaning_dict[word] = [id_and_meaning]
+                else:
+                    self.word_meaning_dict[word].append(id_and_meaning)
+        except Exception as e:
+            log_err_e(e, msg="打开春笋词义表错误")
+        finally:
+            wb.close()
+
+    def get_excel_change_data(self):
+        """读取外部的春笋变形表"""
+        spring_bamboo_change_path = "data/春笋单词对照变形.xlsx"
+        wb = load_workbook(spring_bamboo_change_path, read_only=True, data_only=True)
+        ws = wb.active
+        try:
+            for row in ws.values:
+                word_prototype = row[0]
+                word_change = row[1]
+                self.prototype_words.add(word_prototype)
+                self.change_prototype_dict[word_change] = word_prototype
+
+        except Exception as e:
+            log_err_e(e, msg="打开春笋变形表错误")
+        finally:
+            wb.close()
+
+    def to_prototype_word(self, word):
+        if word in self.prototype_words:
+            w_prototype = word
+        elif word.lower() in self.prototype_words:
+            w_prototype = word.lower()
+        elif word in self.change_prototype_dict:
+            w_prototype = self.change_prototype_dict[word]
+        else:
+            w_prototype = word_to_prototype(word)
+        return w_prototype
+
+    def __query_meaning(self, word: str) -> str:
+        """
+        :param word: 单个单词
+        :return: 加工好的词义文本
+        """
+
+        meaning_data1 = []
+        if word in self.word_meaning_dict:
+            meaning_data1.extend(self.word_meaning_dict[word])
+            meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
+            return meaning_data_str
+        elif word.lower() in self.word_meaning_dict:
+            meaning_data1.extend(self.word_meaning_dict[word.lower()])
+            meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
+            return meaning_data_str
+
+        w_prototype = self.to_prototype_word(word)
+
+        key_to_check = w_prototype if w_prototype in self.word_meaning_dict else w_prototype.lower()
+        if key_to_check in self.word_meaning_dict:
+            meaning_data = self.word_meaning_dict[key_to_check]
+            meaning_data1.extend(meaning_data)
+            meaning_data1 = list(set(meaning_data1))
+            meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
+            return meaning_data_str
+
+    def query_meanings_data(self, split_words: list):
+        """
+        查询所有单词的词义数据包
+        :param split_words: 文章或句子被切割后的单词列表,连字符也拆开
+        :return:
+        """
+        all_words_meaning_list = set()
+        for word in split_words:
+            result_query_meaning: str = self.__query_meaning(word)
+            if result_query_meaning:
+                all_words_meaning_list.add(f"【{word} {result_query_meaning}】")
+
+        new_data_str = "\n词义数据包:\n" + "\n".join(all_words_meaning_list) + "\n\n"
+        return new_data_str
+
+    @staticmethod
+    def __parse_gpt_resp(gpt_resp: dict):
+        """
+        解析ai-gpt的回复
+        :param gpt_resp: GPT原始的回复
+        :return:
+        """
+
+        r = json.loads(gpt_resp["choices"][0]["message"]["content"])
+        return r
+
+    def __ai_annotation(self, english_text, meanings_data):
+        """
+        AI词义标注
+        :param english_text: 英语文本
+        :param meanings_data: 词义数据包
+        :return:
+        """
+        sys_question = """你是一个英语文本的词义标注师,工作是按要求对句子或文章进行词义id的标注。下面我将提供一篇英语文本以及一个包含单词ID和词义的数据包。
+你的工作是对英语文本中的每个单词的原型,根据提供的词义数据包选择这个单词原型最合适的词义,并在单词后附上对应的词义ID。标注格式为:word[word_id]。
+要求:
+1.如果词义数据包中没有该单词或找不到合适的词义,请标注该单词在文中词义的中文翻译。示例:seismography[地震学] car[猫]。
+2.如果是[连字符-、中文、标点符号、数字、百分比、序号A.B.C.D.或者日期],这些不是英语单词,不用标记,保持原样不变。示例`1999 2025 18:00 苹果 ____ A. B. C. D. e-mail Exhaust-fans`,这些都不标记。
+3.标注每个英语单词,不是短语。错误示例:be good at[擅长]。正确示例:be[11] good[12] at[13]。
+4.如果没有提供词义,则不标注。
+
+回复格式要求如下:
+- 请按照用户原文顺序和格式返回处理后的文本。空格和换行\\n,不用改变,不要加减空格,与原文一致。
+- 每个单词后面标注上其对应的词义ID,格式为:`word[word_id]`。
+
+最终回复示例:If[1] a[2] dog[3] causes[4] a[5] cat[6] accident[7] and[8] gets[9] killed[10]
+请确保理解上述说明并准备好接收英语文本及词义数据包。"""
+        user_question = "英语文本:\n" + english_text + meanings_data
+        gpt_resp = get_annotation_gpt_pydantic(question=user_question, sys_prompt=sys_question, max_tokens=8000)
+        result_annotation = self.__parse_gpt_resp(gpt_resp=gpt_resp)
+        return result_annotation

+ 128 - 132
gpt/chatgpt.py

@@ -1,16 +1,14 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
-if __name__ == '__main__':
-    import os
-
-    os.chdir("..")
 
 
+import json
 import time
 import time
 from typing import Dict, Any, Union
 from typing import Dict, Any, Union
 
 
-import httpx
 import requests
 import requests
+from pydantic import ValidationError
 
 
-from tools.loglog import logger, simple_logger, log_err_e
+from gpt.gpt_check import Article, Annotation
+from tools.loglog import logger, simple_logger, log_err_e, temp_logger
 from tools.new_mysql import MySQLUploader
 from tools.new_mysql import MySQLUploader
 
 
 m = MySQLUploader()
 m = MySQLUploader()
@@ -94,7 +92,7 @@ def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="g
 
 
 
 
 def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", model="gpt-4.1", max_tokens=3500, temperature: float = 0, n=1,
 def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", model="gpt-4.1", max_tokens=3500, temperature: float = 0, n=1,
-                             check_fucn=None, sys_prompt=None):
+                             check_fucn=None, sys_prompt=None, task_id=0, exercise_id=0):
     """
     """
     异步获取文章
     异步获取文章
     :param question: 问题
     :param question: 问题
@@ -106,96 +104,66 @@ def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", mod
     :param n: 生成数量
     :param n: 生成数量
     :param check_fucn: 校验函数
     :param check_fucn: 校验函数
     :param sys_prompt: 系统提示
     :param sys_prompt: 系统提示
+    :param task_id: 任务id
+    :param exercise_id: 学案id
+
     :return: 文章内容
     :return: 文章内容
     """
     """
 
 
-    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "n": n, "response_format": {'type': 'json_schema',
-                                                                                                                            'json_schema': {
-                                                                                                                                'name': 'Article',
-                                                                                                                                'schema': {'$defs': {
-                                                                                                                                    'Candidate': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'label': {
-                                                                                                                                                'title': 'Label',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'text': {
-                                                                                                                                                'title': 'Text',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'isRight': {
-                                                                                                                                                'title': 'Isright',
-                                                                                                                                                'type': 'integer'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'label',
-                                                                                                                                            'text',
-                                                                                                                                            'isRight'],
-                                                                                                                                        'title': 'Candidate',
-                                                                                                                                        'type': 'object'},
-                                                                                                                                    'DifficultSentence': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'english': {
-                                                                                                                                                'title': 'English',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'chinese': {
-                                                                                                                                                'title': 'Chinese',
-                                                                                                                                                'type': 'string'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'english',
-                                                                                                                                            'chinese'],
-                                                                                                                                        'title': 'DifficultSentence',
-                                                                                                                                        'type': 'object'},
-                                                                                                                                    'Question': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'trunk': {
-                                                                                                                                                'title': 'Trunk',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'analysis': {
-                                                                                                                                                'title': 'Analysis',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'candidates': {
-                                                                                                                                                'items': {
-                                                                                                                                                    '$ref': '#/$defs/Candidate'},
-                                                                                                                                                'title': 'Candidates',
-                                                                                                                                                'type': 'array'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'trunk',
-                                                                                                                                            'analysis',
-                                                                                                                                            'candidates'],
-                                                                                                                                        'title': 'Question',
-                                                                                                                                        'type': 'object'}},
-                                                                                                                                           'properties': {
-                                                                                                                                               'difficultSentences': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       '$ref': '#/$defs/DifficultSentence'},
-                                                                                                                                                   'title': 'Difficultsentences',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'usedMeanIds': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       'type': 'integer'},
-                                                                                                                                                   'title': 'Usedmeanids',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'questions': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       '$ref': '#/$defs/Question'},
-                                                                                                                                                   'title': 'Questions',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'englishArticle': {
-                                                                                                                                                   'title': 'Englisharticle',
-                                                                                                                                                   'type': 'string'},
-                                                                                                                                               'chineseArticle': {
-                                                                                                                                                   'title': 'Chinesearticle',
-                                                                                                                                                   'type': 'string'},
-                                                                                                                                               'allWordAmount': {
-                                                                                                                                                   'title': 'Allwordamount',
-                                                                                                                                                   'type': 'integer'}},
-                                                                                                                                           'required': [
-                                                                                                                                               'difficultSentences',
-                                                                                                                                               'usedMeanIds',
-                                                                                                                                               'questions',
-                                                                                                                                               'englishArticle',
-                                                                                                                                               'chineseArticle',
-                                                                                                                                               'allWordAmount'],
-                                                                                                                                           'title': 'Article',
-                                                                                                                                           'type': 'object'}}}}
+    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "n": n,
+          "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {
+              'properties': {'label': {'allOf': [{'$ref': '#/$defs/Options'}], 'description': 'ABCD序号的一种', 'title': '序号'},
+                             'text': {'description': '英文,ABCD选项的文本', 'title': '选项文本', 'type': 'string'},
+                             'isRight': {'allOf': [{'$ref': '#/$defs/IsRight'}], 'description': '1是正确,0是错误', 'title': '是否是正确答案'}},
+              'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {
+              'properties': {'english': {'description': '文章中的一句难句', 'title': '英语难句', 'type': 'string'},
+                             'chinese': {'description': '对英语难句的翻译', 'title': '中文难句', 'type': 'string'}}, 'required': ['english', 'chinese'],
+              'title': 'DifficultSentence', 'type': 'object'}, 'IsRight': {'enum': [1, 0], 'title': 'IsRight', 'type': 'integer'},
+                                                                                                             'Options': {'enum': ['A', 'B', 'C', 'D'],
+                                                                                                                         'title': 'Options',
+                                                                                                                         'type': 'string'},
+                                                                                                             'Question': {'properties': {'trunk': {
+                                                                                                                 'description': '用英语给出的选择题题目',
+                                                                                                                 'title': '选择题题目', 'type': 'string'},
+                                                                                                                                         'analysis': {
+                                                                                                                                             'description': '中文,选择题的分析思路;不要给出答案的ABCD序号',
+                                                                                                                                             'title': '选择题分析',
+                                                                                                                                             'type': 'string'},
+                                                                                                                                         'candidates': {
+                                                                                                                                             'description': '一共4个选择题',
+                                                                                                                                             'items': {
+                                                                                                                                                 '$ref': '#/$defs/Candidate'},
+                                                                                                                                             'title': '选项对象',
+                                                                                                                                             'type': 'array'}},
+                                                                                                                          'required': ['trunk',
+                                                                                                                                       'analysis',
+                                                                                                                                       'candidates'],
+                                                                                                                          'title': 'Question',
+                                                                                                                          'type': 'object'}},
+                                                                                                   'properties': {'difficultSentences': {
+                                                                                                       'description': '挑选一句难句对象',
+                                                                                                       'items': {'$ref': '#/$defs/DifficultSentence'},
+                                                                                                       'title': '难句对象', 'type': 'array'},
+                                                                                                                  'usedMeanIds': {
+                                                                                                                      'items': {'type': 'integer'},
+                                                                                                                      'title': '用到的词义id',
+                                                                                                                      'type': 'array'}, 'questions': {
+                                                                                                           'description': '针对英语文章的选择题',
+                                                                                                           'items': {'$ref': '#/$defs/Question'},
+                                                                                                           'title': '问题对象', 'type': 'array'},
+                                                                                                                  'englishArticle': {
+                                                                                                                      'description': '',
+                                                                                                                      'title': '英语文章',
+                                                                                                                      'type': 'string'},
+                                                                                                                  'chineseArticle': {
+                                                                                                                      'description': '',
+                                                                                                                      'title': '中文翻译',
+                                                                                                                      'type': 'string'}},
+                                                                                                   'required': ['difficultSentences', 'usedMeanIds',
+                                                                                                                'questions', 'englishArticle',
+                                                                                                                'chineseArticle'], 'title': 'Article',
+                                                                                                   'type': 'object'}}}
+          }
     if sys_prompt:
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
     d2['messages'].append({"role": "user", "content": question})
@@ -204,33 +172,82 @@ def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", mod
         try:
         try:
             response = requests.post('http://170.106.108.95/v1/chat/completions', json=d2)
             response = requests.post('http://170.106.108.95/v1/chat/completions', json=d2)
             r_json = response.json()
             r_json = response.json()
-            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{r_json}")
+
+            for choice in r_json["choices"]:
+                Article.model_validate_json(choice["message"]["content"])
+
+            simple_logger.info(f"问题日志task_id:{task_id},exercise_id:{exercise_id}\n回答日志:\n{r_json}")
             return r_json
             return r_json
 
 
-            #
+        except ValidationError as e:
+            logger.error(f"gpt回复校验失败task_id:{task_id},exercise_id:{exercise_id}:")
 
 
-            #
+        except requests.exceptions.RequestException as e:
+            logger.error(f"HTTP请求错误task_id:{task_id},exercise_id:{exercise_id}: {str(e)}")
+            time.sleep(1)
 
 
+        except json.decoder.JSONDecodeError as e:
+            if 'response' in locals() and response is not None:
+                logger.error(f"json格式化错误task_id:{task_id},exercise_id:{exercise_id}:{response.text}")
 
 
+        except Exception as e:
+            log_err_e(e, f"其他错误task_id:{task_id},exercise_id:{exercise_id}")
 
 
 
 
+def get_annotation_gpt_pydantic(question, real_ip="localhost", demo_name="无", model="gpt-4.1", max_tokens=3500, temperature: float = 0, n=1,
+                                check_fucn=None, sys_prompt=None, task_id=0, exercise_id=0):
+    """
+    异步获取文章
+    :param question: 问题
+    :param real_ip: 真实IP
+    :param demo_name: 项目名称
+    :param model: 模型名称
+    :param max_tokens: 最大token数
+    :param temperature: 温度
+    :param n: 生成数量
+    :param check_fucn: 校验函数
+    :param sys_prompt: 系统提示
+    :param task_id: 任务id
+    :param exercise_id: 学案id
 
 
-        except httpx.HTTPError as e:
-            logger.error(f"HTTP请求错误: {str(e)}")
-            if num_count < 2:
-                time.sleep(10)
-            else:
-                raise
-        except Exception as e:
-            log_err_e(e, "其他错误")
+    :return: 标注内容
+    """
 
 
-            if num_count < 2:
-                time.sleep(10)
-            else:
-                raise
+    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "n": n,
+          "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Annotation', 'schema': {'properties': {
+              'annotation_text': {'description': '对句子或文章的每个单词进行词义id的标注', 'examples': ['an[33] apple[123]'], 'title': '标注文本', 'type': 'string'}},
+                                                                                                      'required': ['annotation_text'],
+                                                                                                      'title': 'Annotation', 'type': 'object'}}}
+          }
+    if sys_prompt:
+        d2['messages'].append({"role": "system", "content": sys_prompt})
+    d2['messages'].append({"role": "user", "content": question})
+
+    for num_count in range(3):
+        try:
+            response = requests.post('http://170.106.108.95/v1/chat/completions', json=d2)
+            r_json = response.json()
+
+            for choice in r_json["choices"]:
+                Annotation.model_validate_json(choice["message"]["content"])
+
+            temp_logger.info(f"日志task_id:{task_id},exercise_id:{exercise_id}:\n问题日志:\n{question}")
+            simple_logger.info(f"日志task_id:{task_id},exercise_id:{exercise_id}:\n回答日志:\n{r_json}")
+            return r_json
+
+        except ValidationError as e:
+            logger.error(f"gpt回复校验失败task_id:{task_id},exercise_id:{exercise_id}:")
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"HTTP请求错误task_id:{task_id},exercise_id:{exercise_id}: {str(e)}")
+            time.sleep(1)
 
 
-    logger.critical("get_article_gpt_pydantic 严重错误,3次后都失败了")
-    raise Exception("获取文章失败,已达到最大重试次数")
+        except json.decoder.JSONDecodeError as e:
+            if 'response' in locals() and response is not None:
+                logger.error(f"json格式化错误task_id:{task_id},exercise_id:{exercise_id}:{response.text}")
+
+        except Exception as e:
+            log_err_e(e, f"其他错误task_id:{task_id},exercise_id:{exercise_id}")
 
 
 
 
 def parse_gpt_phon_to_tuplelist(text: str) -> list:
 def parse_gpt_phon_to_tuplelist(text: str) -> list:
@@ -243,24 +260,3 @@ def parse_gpt_phon_to_tuplelist(text: str) -> list:
         if len(ii) >= 3:
         if len(ii) >= 3:
             result.append((ii[0].strip(), ii[1].strip(), ii[2].strip()))
             result.append((ii[0].strip(), ii[1].strip(), ii[2].strip()))
     return result
     return result
-
-
-if __name__ == '__main__':
-    question = "hello"
-
-    sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
-    q = """下面我会为你提供两组数据,[单词组1]和[单词组2](里面包含词义id,英语单词,中文词义),优先使用[单词组1]内的单词,请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
-
-要求:
-1.必须用提供的这个词义的单词,其他单词使用常见、高中难度的的单词。文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。
-2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
-3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求600词左右,可以用\\n\\n字符分段,一般5个段落左右。第一段是文章标题。
-5.生成文章优先使用[单词组1]的词义,其次可以挑选使用[单词组2]的词义。允许不使用[单词组1]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
-
-提供[单词组1]:4238 penalty:惩罚, 刑罚;4591 bare:赤裸的, 无遮蔽的;4227 stable:畜舍, 马厩;4236 psychology:心理学;4245 offense:进攻, 攻势, 冒犯, 触怒, 过错;4237 innocent:清白的, 无辜的, 天真的;4228 refrigerator:冰箱, 冷库;4247 tissue:(动植物)组织;4250 awareness:察觉, 觉悟, 意识;4234 mode:方式, 模式;4224 neat:整洁, 利索;4225 statistics:统计;4251 random:任意的, 随机的;4201 laundry:洗衣房;4545 barrel:桶, 一桶之量;4249 recruit:招募, 新成员;4229 pregnant:怀孕的, 孕育的;4235 relevant:有关的, 相关联的;4252 incentive:刺激, 激励, 鼓励;4194 grave:坟墓, 墓穴;
-提供[单词组2]:;
-"""
-    resp = get_answer_from_gpt(question=question, temperature=0.9, sys_prompt=sys_prompt, model="gpt-4.1")
-    print(type(resp))
-    print(resp)

+ 110 - 139
gpt/get_article.py

@@ -1,37 +1,37 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
+import json
 import random
 import random
+import re
+import time
+import traceback
+from collections import OrderedDict
+from concurrent.futures import wait
+from random import randint
+
+import oss2
+import requests
+from cachetools import TTLCache
+from oss2.credentials import EnvironmentVariableCredentialsProvider
 
 
+from common.common_data import all_exchange_words
+from common.split_text import *
+from data.get_all_exchange_words import get_word_exchange_list, word_to_prototype
 from gpt.chatgpt import get_answer_from_gpt
 from gpt.chatgpt import get_answer_from_gpt
-from tools.new_mysql import MySQLUploader
 from tools.loglog import logger
 from tools.loglog import logger
+from tools.new_mysql import MySQLUploader
 from tools.thread_pool_manager import pool_executor
 from tools.thread_pool_manager import pool_executor
-from common.common_data import all_exchange_words
-from common.split_text import *
-from data.get_all_exchange_words import get_word_exchange_list,word_to_prototype
-
-import requests
-import oss2
-from oss2.credentials import EnvironmentVariableCredentialsProvider
-from collections import OrderedDict
-from cachetools import TTLCache
-from concurrent.futures import Future, wait
-from random import randint
-import re
-import json
-import time
-import traceback
 
 
 
 
 class OtherBaseFunction:
 class OtherBaseFunction:
     def __init__(self):
     def __init__(self):
         self.m = MySQLUploader()
         self.m = MySQLUploader()
-        self.fake_meaningid = {} 
+        self.fake_meaningid = {}
 
 
-        self.callback_url_dict = {} 
-        self.real_ip_dict = {} 
-        self.demo_name = {} 
-        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400) 
-        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400) 
+        self.callback_url_dict = {}
+        self.real_ip_dict = {}
+        self.demo_name = {}
+        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400)
+        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400)
 
 
     @staticmethod
     @staticmethod
     def _diffculty_control(student_stage, vocabulary) -> dict:
     def _diffculty_control(student_stage, vocabulary) -> dict:
@@ -42,17 +42,16 @@ class OtherBaseFunction:
         :return:
         :return:
         """
         """
         if vocabulary <= 1200:
         if vocabulary <= 1200:
-            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1,"student_stage_str":"小学",
+            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1, "student_stage_str": "小学",
                                  "pragrapg_count": "生成的文章要求100词左右,三个段落以上。允许有简单句式的出现。"}
                                  "pragrapg_count": "生成的文章要求100词左右,三个段落以上。允许有简单句式的出现。"}
         elif 1200 < vocabulary <= 2400:
         elif 1200 < vocabulary <= 2400:
-            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3,"student_stage_str":"初中",
+            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3, "student_stage_str": "初中",
                                  "pragrapg_count": r"生成的文章要求150词左右,三个段落以上。用\n\n分段。"}
                                  "pragrapg_count": r"生成的文章要求150词左右,三个段落以上。用\n\n分段。"}
         else:
         else:
-            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5,"student_stage_str":"高中",
+            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5, "student_stage_str": "高中",
                                  "pragrapg_count": r"生成的文章要求250词左右,允许有3-5个段落。用\n\n分段。"}
                                  "pragrapg_count": r"生成的文章要求250词左右,允许有3-5个段落。用\n\n分段。"}
         return difficult_control
         return difficult_control
 
 
-   
     def _get_article_chinese_dict(self, title, r_article_sentences, task_id):
     def _get_article_chinese_dict(self, title, r_article_sentences, task_id):
         """
         """
         获取文章的中文翻译。注意:这里切割的方法要与后面的split_article_make_json一致
         获取文章的中文翻译。注意:这里切割的方法要与后面的split_article_make_json一致
@@ -86,41 +85,37 @@ class OtherBaseFunction:
 
 
             logger.critical("严重错误:gpt生成文章中文翻译三次全错,请管理员检查")
             logger.critical("严重错误:gpt生成文章中文翻译三次全错,请管理员检查")
 
 
-       
         article_list = [title + "\n\n"] + r_article_sentences
         article_list = [title + "\n\n"] + r_article_sentences
 
 
-       
         r_article_chinese_dict = get_chinese_from_gpt(whole_article_sentences=article_list)
         r_article_chinese_dict = get_chinese_from_gpt(whole_article_sentences=article_list)
-       
+
         if r_article_chinese_dict:
         if r_article_chinese_dict:
             return r_article_chinese_dict
             return r_article_chinese_dict
 
 
-   
     @staticmethod
     @staticmethod
     def _calculate_new_word_rate(r_article_sentences):
     def _calculate_new_word_rate(r_article_sentences):
         article = "".join(r_article_sentences)
         article = "".join(r_article_sentences)
-        new_words = set() 
+        new_words = set()
         test_article = re.findall(r'\b\w+\'?\w*\b', article)
         test_article = re.findall(r'\b\w+\'?\w*\b', article)
         for word in test_article:
         for word in test_article:
             word2: str = word.split("'")[0] if "'" in word else word
             word2: str = word.split("'")[0] if "'" in word else word
-            if len(word) <= 2: 
+            if len(word) <= 2:
                 continue
                 continue
             is_in_12000words = any([word2.lower() in all_exchange_words, word2.title() in all_exchange_words])
             is_in_12000words = any([word2.lower() in all_exchange_words, word2.title() in all_exchange_words])
             if not is_in_12000words:
             if not is_in_12000words:
                 new_words.add(word)
                 new_words.add(word)
         new_word_rate = round(len(new_words) / len(article), 3)
         new_word_rate = round(len(new_words) / len(article), 3)
         logger.info(f"开发调试生词率{new_word_rate}.生词{new_words}")
         logger.info(f"开发调试生词率{new_word_rate}.生词{new_words}")
-       
+
         new_words = list(new_words)
         new_words = list(new_words)
         return new_word_rate, new_words
         return new_word_rate, new_words
 
 
-   
     def insert_article_to_mysql(self, title, article, chinese, task_id, code=0):
     def insert_article_to_mysql(self, title, article, chinese, task_id, code=0):
-       
+
         self.m.execute_("INSERT INTO new_word_article (title,article,chinese, taskId,code) VALUES (%s, %s,%s,%s,%s)",
         self.m.execute_("INSERT INTO new_word_article (title,article,chinese, taskId,code) VALUES (%s, %s,%s,%s,%s)",
                         (title, article, chinese, task_id, code))
                         (title, article, chinese, task_id, code))
 
 
-    def get_wordid_by_wordspelling(self, wordspelling:str):
+    def get_wordid_by_wordspelling(self, wordspelling: str):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if wordspelling in self.query_cache_meaningid:
         if wordspelling in self.query_cache_meaningid:
             return self.query_cache_wordspelling[wordspelling]
             return self.query_cache_wordspelling[wordspelling]
@@ -129,16 +124,16 @@ class OtherBaseFunction:
         prototype_word = word_to_prototype(wordspelling)
         prototype_word = word_to_prototype(wordspelling)
         r = self.m.query_data(s, (prototype_word,))
         r = self.m.query_data(s, (prototype_word,))
         if r:
         if r:
-           
+
             wordid = r[0][0]
             wordid = r[0][0]
         else:
         else:
-           
+
             wordid = 0
             wordid = 0
 
 
         self.query_cache_wordspelling[wordspelling] = wordid
         self.query_cache_wordspelling[wordspelling] = wordid
         return wordid
         return wordid
 
 
-    def get_meaning_by_meaningid(self, meaningid:int):
+    def get_meaning_by_meaningid(self, meaningid: int):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if meaningid in self.query_cache_meaningid:
         if meaningid in self.query_cache_meaningid:
             return self.query_cache_meaningid[meaningid]
             return self.query_cache_meaningid[meaningid]
@@ -149,7 +144,7 @@ class OtherBaseFunction:
         self.query_cache_meaningid[meaningid] = meaning
         self.query_cache_meaningid[meaningid] = meaning
         return meaning
         return meaning
 
 
-    def _get_fake_meaningid(self,word):
+    def _get_fake_meaningid(self, word):
         """获得假词义id。但是保证同一个单词是一个id"""
         """获得假词义id。但是保证同一个单词是一个id"""
         if word in self.fake_meaningid:
         if word in self.fake_meaningid:
             return self.fake_meaningid[word]
             return self.fake_meaningid[word]
@@ -158,33 +153,31 @@ class OtherBaseFunction:
         if r:
         if r:
             fake_meaningid = r[0][0]
             fake_meaningid = r[0][0]
         else:
         else:
-            fake_meaningid = random.randint(10000,99999) 
+            fake_meaningid = random.randint(10000, 99999)
 
 
         self.fake_meaningid[word] = fake_meaningid
         self.fake_meaningid[word] = fake_meaningid
         return fake_meaningid
         return fake_meaningid
 
 
-   
     @staticmethod
     @staticmethod
-    def _clean_gpt_res(single_sentence: str, gpt_text: str,split_words:list) -> list:
+    def _clean_gpt_res(single_sentence: str, gpt_text: str, split_words: list) -> list:
         """# 解析成  键是句子+单词拼写,值是词义id"""
         """# 解析成  键是句子+单词拼写,值是词义id"""
         return_data = []
         return_data = []
         if not gpt_text:
         if not gpt_text:
             return []
             return []
 
 
-        row_data = [i for i in gpt_text.split("\n") if "**" in i] 
+        row_data = [i for i in gpt_text.split("\n") if "**" in i]
 
 
         already_spelling = set()
         already_spelling = set()
         for row in row_data:
         for row in row_data:
             one_row_data_list = row.split("**")
             one_row_data_list = row.split("**")
-            if len(one_row_data_list) < 1: 
+            if len(one_row_data_list) < 1:
                 continue
                 continue
-            one_row_data_list = [i.strip() for i in one_row_data_list] 
+            one_row_data_list = [i.strip() for i in one_row_data_list]
             spelling, meaning_id = one_row_data_list[0:2]
             spelling, meaning_id = one_row_data_list[0:2]
 
 
             already_spelling.add(spelling)
             already_spelling.add(spelling)
             return_data.append([single_sentence, spelling, int(meaning_id)])
             return_data.append([single_sentence, spelling, int(meaning_id)])
 
 
-       
         for remaining_word in set(split_words).difference(already_spelling):
         for remaining_word in set(split_words).difference(already_spelling):
             return_data.append([single_sentence, remaining_word, 0])
             return_data.append([single_sentence, remaining_word, 0])
 
 
@@ -197,17 +190,16 @@ class GetArticle(OtherBaseFunction):
         self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
         self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
         self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', 'qingti-private')
         self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', 'qingti-private')
 
 
-        self.article_result = {} 
+        self.article_result = {}
 
 
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
         all_exchange_words.update(self.punctuation)
 
 
-    def __del__(self):...
+    def __del__(self):
+        ...
 
 
-   
-    def submit_task(self, words_meaning_ids: list[int],callback_url:str,real_ip:str,demo_name:str,
-                    student_stage:int,vocabulary:int,class_id:int):
+    def submit_task(self, words_meaning_ids: list[int], callback_url: str, real_ip: str, demo_name: str,
+                    student_stage: int, vocabulary: int, class_id: int):
         """
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
         callback_url: 通知的回调地址
         callback_url: 通知的回调地址
@@ -218,14 +210,13 @@ class GetArticle(OtherBaseFunction):
         task_id = randint(10000000, 99999999)
         task_id = randint(10000000, 99999999)
         logger.info(f"生成文章id。task_id:{task_id}。词义id:{words_meaning_ids}.")
         logger.info(f"生成文章id。task_id:{task_id}。词义id:{words_meaning_ids}.")
 
 
-       
         self.callback_url_dict[task_id] = callback_url
         self.callback_url_dict[task_id] = callback_url
         self.real_ip_dict[task_id] = real_ip
         self.real_ip_dict[task_id] = real_ip
         self.demo_name[task_id] = demo_name
         self.demo_name[task_id] = demo_name
 
 
         words_meaning_str = ""
         words_meaning_str = ""
         for wordmeaning_id in words_meaning_ids:
         for wordmeaning_id in words_meaning_ids:
-            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s",(wordmeaning_id,))
+            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s", (wordmeaning_id,))
             try:
             try:
                 words_meaning_str += str(r[0])
                 words_meaning_str += str(r[0])
             except IndexError:
             except IndexError:
@@ -234,10 +225,10 @@ class GetArticle(OtherBaseFunction):
                 return err_msg
                 return err_msg
 
 
         try:
         try:
-           
-            pool_executor.submit(self.run_task, words_meaning_str, task_id,student_stage,vocabulary,class_id)
-           
-            resp_result = {"id":task_id,"key":f"study/article/{task_id}"}
+
+            pool_executor.submit(self.run_task, words_meaning_str, task_id, student_stage, vocabulary, class_id)
+
+            resp_result = {"id": task_id, "key": f"study/article/{task_id}"}
             logger.success(f"文章生成任务提交成功:{resp_result}")
             logger.success(f"文章生成任务提交成功:{resp_result}")
             return resp_result
             return resp_result
         except Exception as e:
         except Exception as e:
@@ -245,9 +236,8 @@ class GetArticle(OtherBaseFunction):
             logger.error(err_msg)
             logger.error(err_msg)
             return err_msg
             return err_msg
 
 
-   
-    def __get_article(self,words_meaning_str,task_id,student_stage,vocabulary) -> tuple:
-        dc = self._diffculty_control(student_stage,vocabulary)
+    def __get_article(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
+        dc = self._diffculty_control(student_stage, vocabulary)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一些带中文词义的英语种子单词,请根据这些种子单词的词义,生成一篇带标题的英语文章。
         q = f"""你是一名在中国的英语教师,下面我会为你提供一些带中文词义的英语种子单词,请根据这些种子单词的词义,生成一篇带标题的英语文章。
 提供种子单词:{words_meaning_str}
 提供种子单词:{words_meaning_str}
 
 
@@ -261,17 +251,16 @@ class GetArticle(OtherBaseFunction):
         try:
         try:
             real_ip = self.real_ip_dict[task_id]
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
             r_article_sentences = r_json.get("article_sentences")
             r_article_sentences = r_json.get("article_sentences")
             r_title = r_json.get("title")
             r_title = r_json.get("title")
-            return r_title,r_article_sentences
+            return r_title, r_article_sentences
         except json.decoder.JSONDecodeError:
         except json.decoder.JSONDecodeError:
             logger.error("gpt生成文章回复json格式化错误")
             logger.error("gpt生成文章回复json格式化错误")
         except Exception as e:
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
 
-   
-    def __replace_new_word(self, old_article: str, new_words: list,task_id:int):
+    def __replace_new_word(self, old_article: str, new_words: list, task_id: int):
         new_words_str = ",".join(new_words)
         new_words_str = ",".join(new_words)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一篇英语文章和一些生词,请用其他单词使用简单、常见、难度低的单词将英语文章中的生词进行替换。
         q = f"""你是一名在中国的英语教师,下面我会为你提供一篇英语文章和一些生词,请用其他单词使用简单、常见、难度低的单词将英语文章中的生词进行替换。
 缩写引号用单引号'。最终回复替换后的英语文章。
 缩写引号用单引号'。最终回复替换后的英语文章。
@@ -287,7 +276,7 @@ class GetArticle(OtherBaseFunction):
         try:
         try:
             real_ip = self.real_ip_dict[task_id]
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
             print(f"调试信息2 {r_json}")
             print(f"调试信息2 {r_json}")
             r_article = r_json.get("article")
             r_article = r_json.get("article")
             r_title = r_json.get("title")
             r_title = r_json.get("title")
@@ -297,8 +286,7 @@ class GetArticle(OtherBaseFunction):
         except Exception as e:
         except Exception as e:
             logger.error(f"gpt替换生词文章回复其他错误.{type(e).__name__} {e}")
             logger.error(f"gpt替换生词文章回复其他错误.{type(e).__name__} {e}")
 
 
-   
-    def run_get_article_task(self, words_meaning_str, task_id,student_stage,vocabulary) -> tuple:
+    def run_get_article_task(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
         """
         """
         :param vocabulary:
         :param vocabulary:
         :param student_stage:
         :param student_stage:
@@ -307,47 +295,40 @@ class GetArticle(OtherBaseFunction):
         :return: 标题,文章,句子翻译的字典
         :return: 标题,文章,句子翻译的字典
         """
         """
 
 
-        def get_article_chinese(title,r_article_sentences,task_id,code=0)-> tuple:
+        def get_article_chinese(title, r_article_sentences, task_id, code=0) -> tuple:
             r_article_chinese_dict = self._get_article_chinese_dict(title, r_article_sentences, task_id)
             r_article_chinese_dict = self._get_article_chinese_dict(title, r_article_sentences, task_id)
             chinese_str = "\n".join(r_article_chinese_dict.values())
             chinese_str = "\n".join(r_article_chinese_dict.values())
             r_article = "".join(r_article_sentences)
             r_article = "".join(r_article_sentences)
 
 
-            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id,code=code)
+            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id, code=code)
             return r_title, r_article_sentences, r_article_chinese_dict
             return r_title, r_article_sentences, r_article_chinese_dict
 
 
-       
-        r_title,r_article_sentences = self.__get_article(words_meaning_str,task_id,student_stage,vocabulary)
+        r_title, r_article_sentences = self.__get_article(words_meaning_str, task_id, student_stage, vocabulary)
 
 
         new_word_rate, new_words = self._calculate_new_word_rate(r_article_sentences)
         new_word_rate, new_words = self._calculate_new_word_rate(r_article_sentences)
         if new_word_rate < 0.03:
         if new_word_rate < 0.03:
             return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
             return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
 
 
-       
         replace_article_gpt = "".join(r_article_sentences)
         replace_article_gpt = "".join(r_article_sentences)
         for i in range(3):
         for i in range(3):
-            if tuple_data:=self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words,task_id=task_id):
-                r_title,replace_article_gpt = tuple_data
+            if tuple_data := self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words, task_id=task_id):
+                r_title, replace_article_gpt = tuple_data
 
 
                 new_word_rate, new_words = self._calculate_new_word_rate(replace_article_gpt)
                 new_word_rate, new_words = self._calculate_new_word_rate(replace_article_gpt)
                 if new_word_rate < 0.03 or i == 2:
                 if new_word_rate < 0.03 or i == 2:
                     if i == 2:
                     if i == 2:
                         logger.warning(f"3次后生词率未到3%以下。task_id:{task_id}")
                         logger.warning(f"3次后生词率未到3%以下。task_id:{task_id}")
-                    return get_article_chinese(title=r_title,r_article_sentences=r_article_sentences,task_id=task_id)
+                    return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
 
 
-   
-    def split_article_make_json(self, task_id: int,title:str, r_article_sentences: list,r_article_chinese_dict:dict):
+    def split_article_make_json(self, task_id: int, title: str, r_article_sentences: list, r_article_chinese_dict: dict):
 
 
-       
         article = "".join(r_article_sentences)
         article = "".join(r_article_sentences)
         article = title + "\n\n" + article
         article = title + "\n\n" + article
 
 
-       
-        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article,task_id)
+        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article, task_id)
 
 
-       
-        word_count = get_article_words_count(title+article)
+        word_count = get_article_words_count(title + article)
 
 
-       
         create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         outside_json_dict = {"id": task_id, "body": article, "wordCount": word_count, "paragraphs": [],
         outside_json_dict = {"id": task_id, "body": article, "wordCount": word_count, "paragraphs": [],
                              "createTime": create_time}
                              "createTime": create_time}
@@ -355,17 +336,16 @@ class GetArticle(OtherBaseFunction):
         article_paragraphs = article.split("\n\n")
         article_paragraphs = article.split("\n\n")
         article_sentence_count = 0
         article_sentence_count = 0
         for paragraph in article_paragraphs:
         for paragraph in article_paragraphs:
-            sentences = split_text_to_sentences(paragraph) 
+            sentences = split_text_to_sentences(paragraph)
 
 
             p = {"sentences": []}
             p = {"sentences": []}
             for single_sentence in sentences:
             for single_sentence in sentences:
                 article_sentence_count += 1
                 article_sentence_count += 1
-                single_sentence_chinese = r_article_chinese_dict.get(single_sentence,"")
-               
-                w = {"words": [],"chinese":single_sentence_chinese}
-                split_words:list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence) 
+                single_sentence_chinese = r_article_chinese_dict.get(single_sentence, "")
+
+                w = {"words": [], "chinese": single_sentence_chinese}
+                split_words: list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence)
 
 
-               
                 for originale_word in split_words:
                 for originale_word in split_words:
                     single_word = originale_word
                     single_word = originale_word
                     if not originale_word:
                     if not originale_word:
@@ -374,26 +354,24 @@ class GetArticle(OtherBaseFunction):
                         w["words"].append({"spell": originale_word, "type": "punctuation"})
                         w["words"].append({"spell": originale_word, "type": "punctuation"})
                         continue
                         continue
 
 
-                   
                     word_id = self.get_wordid_by_wordspelling(originale_word)
                     word_id = self.get_wordid_by_wordspelling(originale_word)
 
 
-                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0,0])
-                    if type_ == 0: 
+                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0, 0])
+                    if type_ == 0:
                         single_word = originale_word.lower()
                         single_word = originale_word.lower()
-                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0,0])
-                        if type_ == 0: 
+                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
+                        if type_ == 0:
                             single_word = word_to_prototype(single_word)
                             single_word = word_to_prototype(single_word)
-                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word,[0,0])
+                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
 
 
                     if type_ == 0:
                     if type_ == 0:
                         logger.warning(f"警告:type_还是0,那就是二次查询时,也没有给词义。有漏下的单词{originale_word}")
                         logger.warning(f"警告:type_还是0,那就是二次查询时,也没有给词义。有漏下的单词{originale_word}")
                         continue
                         continue
 
 
-                   
-                    if type_ == 1: 
+                    if type_ == 1:
                         meaning_id = x_data
                         meaning_id = x_data
                         meaning = self.get_meaning_by_meaningid(x_data)
                         meaning = self.get_meaning_by_meaningid(x_data)
-                    elif type_ == 2: 
+                    elif type_ == 2:
                         meaning_id = self._get_fake_meaningid(single_word)
                         meaning_id = self._get_fake_meaningid(single_word)
                         meaning = x_data
                         meaning = x_data
                     else:
                     else:
@@ -401,11 +379,10 @@ class GetArticle(OtherBaseFunction):
                         meaning_id = 9999999
                         meaning_id = 9999999
                         meaning = '无'
                         meaning = '无'
 
 
-                   
                     word_prototype = word_to_prototype(originale_word)
                     word_prototype = word_to_prototype(originale_word)
 
 
-                    word_json = {"id": word_id, "meaningId": meaning_id,"meaning":meaning, "spell": originale_word,
-                                 "exchanges": get_word_exchange_list(word=single_word),"prototype": word_prototype}
+                    word_json = {"id": word_id, "meaningId": meaning_id, "meaning": meaning, "spell": originale_word,
+                                 "exchanges": get_word_exchange_list(word=single_word), "prototype": word_prototype}
                     w["words"].append(word_json)
                     w["words"].append(word_json)
 
 
                 p["sentences"].append(w)
                 p["sentences"].append(w)
@@ -413,10 +390,9 @@ class GetArticle(OtherBaseFunction):
             outside_json_dict["paragraphs"].append(p)
             outside_json_dict["paragraphs"].append(p)
 
 
         outside_json_dict["articleSentenceCount"] = article_sentence_count
         outside_json_dict["articleSentenceCount"] = article_sentence_count
-        return outside_json_dict,word_count,article_sentence_count
+        return outside_json_dict, word_count, article_sentence_count
 
 
-   
-    def run_query_word_meaning(self, article,task_id):
+    def run_query_word_meaning(self, article, task_id):
         futures = []
         futures = []
         article_paragraphs = article.split("\n\n")
         article_paragraphs = article.split("\n\n")
 
 
@@ -424,21 +400,20 @@ class GetArticle(OtherBaseFunction):
             sentences = split_text_to_sentences(paragraph)
             sentences = split_text_to_sentences(paragraph)
 
 
             for single_sentence in sentences:
             for single_sentence in sentences:
-                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence,task_id)
+                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence, task_id)
                 futures.append(f)
                 futures.append(f)
 
 
         wait(futures)
         wait(futures)
         all_sentence_word_meaningid_dict = {}
         all_sentence_word_meaningid_dict = {}
         for f in futures:
         for f in futures:
-            f_result = f.result() 
+            f_result = f.result()
             all_sentence_word_meaningid_dict.update(f_result)
             all_sentence_word_meaningid_dict.update(f_result)
         return all_sentence_word_meaningid_dict
         return all_sentence_word_meaningid_dict
 
 
-   
-    def query_word_meaning_from_gpt(self, single_sentence,task_id) -> dict:
+    def query_word_meaning_from_gpt(self, single_sentence, task_id) -> dict:
         """single_sentence 提交单个句子"""
         """single_sentence 提交单个句子"""
         split_words = split_text_to_word(single_sentence)
         split_words = split_text_to_word(single_sentence)
-       
+
         split_words = [word_to_prototype(w) for w in split_words if w]
         split_words = [word_to_prototype(w) for w in split_words if w]
 
 
         placeholders = ', '.join(['%s'] * len(split_words))
         placeholders = ', '.join(['%s'] * len(split_words))
@@ -476,20 +451,19 @@ apple ** 234567
 
 
         real_ip = self.real_ip_dict[task_id]
         real_ip = self.real_ip_dict[task_id]
         demo_name = self.demo_name[task_id]
         demo_name = self.demo_name[task_id]
-        r_gpt = get_answer_from_gpt(q,real_ip=real_ip,demo_name=demo_name)
+        r_gpt = get_answer_from_gpt(q, real_ip=real_ip, demo_name=demo_name)
 
 
-       
-        already_data,need_twice_data = {},[]
+        already_data, need_twice_data = {}, []
 
 
-        three_list = self._clean_gpt_res(single_sentence, r_gpt,split_words)
+        three_list = self._clean_gpt_res(single_sentence, r_gpt, split_words)
 
 
         for sentence, spelling, meaning_id in three_list:
         for sentence, spelling, meaning_id in three_list:
-           
+
             if meaning_id == 0:
             if meaning_id == 0:
                 need_twice_data.append([sentence, spelling, meaning_id])
                 need_twice_data.append([sentence, spelling, meaning_id])
             else:
             else:
-               
-                already_data[sentence + spelling] = [meaning_id,1]
+
+                already_data[sentence + spelling] = [meaning_id, 1]
 
 
         for _, spelling, _ in need_twice_data:
         for _, spelling, _ in need_twice_data:
             need_twice_words = ",".join([spelling])
             need_twice_words = ",".join([spelling])
@@ -504,18 +478,17 @@ apple ** 234567
     回复示例:
     回复示例:
     {{"单词":"中文词义",...}}
     {{"单词":"中文词义",...}}
     """
     """
-            r2 = get_answer_from_gpt(q2,real_ip=real_ip,demo_name=demo_name,json_resp=True)
-            r2_json:dict = json.loads(r2)
-            for w_spelling,chinese_meaning in r2_json.items():
-                already_data[single_sentence + w_spelling] = [chinese_meaning,2]
+            r2 = get_answer_from_gpt(q2, real_ip=real_ip, demo_name=demo_name, json_resp=True)
+            r2_json: dict = json.loads(r2)
+            for w_spelling, chinese_meaning in r2_json.items():
+                already_data[single_sentence + w_spelling] = [chinese_meaning, 2]
 
 
         return already_data
         return already_data
 
 
-   
-    def upload_json_file_to_oss(self,article_id:int,data_dict:dict):
+    def upload_json_file_to_oss(self, article_id: int, data_dict: dict):
         json_data = json.dumps(data_dict, ensure_ascii=False)
         json_data = json.dumps(data_dict, ensure_ascii=False)
         object_name = f'study/article/{article_id}'
         object_name = f'study/article/{article_id}'
-        content = json_data.encode('utf-8') 
+        content = json_data.encode('utf-8')
         for _ in range(2):
         for _ in range(2):
             try:
             try:
                 r = self.bucket.put_object(object_name, content)
                 r = self.bucket.put_object(object_name, content)
@@ -529,18 +502,17 @@ apple ** 234567
         else:
         else:
             logger.critical(f"2次上传oss错误,taskid:{article_id}")
             logger.critical(f"2次上传oss错误,taskid:{article_id}")
 
 
-   
-    def notice_teach_system(self,article_id:int,class_id:int,word_count:int,article_sentence_count:int):
+    def notice_teach_system(self, article_id: int, class_id: int, word_count: int, article_sentence_count: int):
         url = self.callback_url_dict.get(article_id)
         url = self.callback_url_dict.get(article_id)
         if not url or "localhost/callback" in url:
         if not url or "localhost/callback" in url:
             return False
             return False
 
 
-        json_data = {"classId": class_id,"articleId": article_id,"articleWordCount": word_count,"articleSentenceCount": article_sentence_count}
+        json_data = {"classId": class_id, "articleId": article_id, "articleWordCount": word_count, "articleSentenceCount": article_sentence_count}
         for _ in range(3):
         for _ in range(3):
             try:
             try:
-                r = requests.post(url,json=json_data)
+                r = requests.post(url, json=json_data)
                 r.raise_for_status()
                 r.raise_for_status()
-                self.callback_url_dict.pop(article_id,'')
+                self.callback_url_dict.pop(article_id, '')
                 logger.success(f"通知成功{r.text}")
                 logger.success(f"通知成功{r.text}")
                 return True
                 return True
             except Exception as e:
             except Exception as e:
@@ -548,23 +520,22 @@ apple ** 234567
 
 
         logger.critical(f"通知接口失败,三次全错. article_id:{article_id} callback_url:{url}")
         logger.critical(f"通知接口失败,三次全错. article_id:{article_id} callback_url:{url}")
 
 
-   
-    def clean_source(self,article_id):
+    def clean_source(self, article_id):
         self.callback_url_dict.pop(article_id, '')
         self.callback_url_dict.pop(article_id, '')
         self.real_ip_dict.pop(article_id, '')
         self.real_ip_dict.pop(article_id, '')
 
 
-   
-    def run_task(self,words_meaning_str, task_id,student_stage,vocabulary,class_id):
+    def run_task(self, words_meaning_str, task_id, student_stage, vocabulary, class_id):
         try:
         try:
-            title,r_article_sentences,r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id,student_stage,vocabulary)
+            title, r_article_sentences, r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id, student_stage, vocabulary)
 
 
-            outside_json_dict,word_count,article_sentence_count = self.split_article_make_json(task_id,title,r_article_sentences,r_article_chinese_dict)
-            self.upload_json_file_to_oss(article_id=task_id,data_dict=outside_json_dict)
-            self.notice_teach_system(article_id=task_id,class_id=class_id,word_count=word_count,article_sentence_count=article_sentence_count)
+            outside_json_dict, word_count, article_sentence_count = self.split_article_make_json(task_id, title, r_article_sentences,
+                                                                                                 r_article_chinese_dict)
+            self.upload_json_file_to_oss(article_id=task_id, data_dict=outside_json_dict)
+            self.notice_teach_system(article_id=task_id, class_id=class_id, word_count=word_count, article_sentence_count=article_sentence_count)
             self.clean_source(article_id=task_id)
             self.clean_source(article_id=task_id)
             logger.success(f"文章任务完成。taskid:{task_id}")
             logger.success(f"文章任务完成。taskid:{task_id}")
 
 
         except Exception as e:
         except Exception as e:
             logger.error(f"{type(e).__name__} {e}")
             logger.error(f"{type(e).__name__} {e}")
             traceback_str = traceback.format_exc()
             traceback_str = traceback.format_exc()
-            logger.error(f"外围错误追溯:{traceback_str}")
+            logger.error(f"外围错误追溯:{traceback_str}")

+ 51 - 96
gpt/get_article2.py

@@ -1,27 +1,21 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 
 
-from gpt.chatgpt import get_answer_from_gpt, get_article_gpt_pydantic
-from gpt.gpt_check import CheckGptAnswer, CheckArticleResult
-from tools.new_mysql import MySQLUploader
-from tools.loglog import logger, log_err_e
-from tools.thread_pool_manager import pool_executor
-from common.common_data import all_exchange_words
-from common.split_text import split_text_to_word, get_article_words_count
-
-from pydantic import BaseModel
-from cachetools import TTLCache
-from concurrent.futures import wait
+import json
+from collections import defaultdict
 from random import randint, shuffle, sample
 from random import randint, shuffle, sample
-import json,time
+
+import httpx
 import requests
 import requests
+from fastapi import BackgroundTasks
 from openpyxl import load_workbook
 from openpyxl import load_workbook
 from tenacity import retry, stop_after_attempt, wait_fixed
 from tenacity import retry, stop_after_attempt, wait_fixed
-import httpx
-import asyncio
-from threading import Lock
-from collections import defaultdict
-from fastapi import BackgroundTasks
 
 
+from common.common_data import all_exchange_words
+from common.split_text import split_text_to_word, get_article_words_count
+from gpt.chatgpt import get_article_gpt_pydantic
+from gpt.gpt_check import CheckArticleResult
+from tools.loglog import logger, log_err_e
+from tools.new_mysql import MySQLUploader
 
 
 
 
 def get_article_difficulty(article) -> int:
 def get_article_difficulty(article) -> int:
@@ -69,11 +63,9 @@ def merge_and_split(list1, list2):
     import random
     import random
     random.shuffle(combined)
     random.shuffle(combined)
 
 
-   
     two_thirds = []
     two_thirds = []
     one_third = []
     one_third = []
 
 
-   
     total_length = len(combined)
     total_length = len(combined)
     if total_length > 15:
     if total_length > 15:
         two_thirds = combined[:15]
         two_thirds = combined[:15]
@@ -87,41 +79,33 @@ def merge_and_split(list1, list2):
 
 
 class GetArticle:
 class GetArticle:
     def __init__(self):
     def __init__(self):
-        self.m = MySQLUploader() 
+        self.m = MySQLUploader()
 
 
-       
         self.callback_url_dict = defaultdict(str)
         self.callback_url_dict = defaultdict(str)
-        self.real_ip_dict = defaultdict(str) 
+        self.real_ip_dict = defaultdict(str)
         self.demo_name = defaultdict(str)
         self.demo_name = defaultdict(str)
 
 
+        self.article_result = {}
 
 
-        self.article_result = {} 
-
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
         all_exchange_words.update(self.punctuation)
 
 
-       
-        self.exchange_data: dict[str, list] = {} 
+        self.exchange_data: dict[str, list] = {}
         self.read_spring_bamboo_exchange_table()
         self.read_spring_bamboo_exchange_table()
-                    
 
 
-
-   
     def read_spring_bamboo_exchange_table(self):
     def read_spring_bamboo_exchange_table(self):
         """变形是键,原型是值"""
         """变形是键,原型是值"""
         wb = load_workbook(r"data/春笋单词对照变形.xlsx", read_only=True, data_only=True)
         wb = load_workbook(r"data/春笋单词对照变形.xlsx", read_only=True, data_only=True)
         ws = wb.active
         ws = wb.active
         for row in ws.values:
         for row in ws.values:
-            prototype = row[0] 
-            exchange = row[1] 
+            prototype = row[0]
+            exchange = row[1]
             if prototype not in self.exchange_data:
             if prototype not in self.exchange_data:
                 self.exchange_data[prototype] = [exchange]
                 self.exchange_data[prototype] = [exchange]
             else:
             else:
                 self.exchange_data[prototype].append(exchange)
                 self.exchange_data[prototype].append(exchange)
         wb.close()
         wb.close()
 
 
-   
     def parser_insert_to_mysql(self, resp_result):
     def parser_insert_to_mysql(self, resp_result):
         try:
         try:
             for single_article in resp_result['articles']:
             for single_article in resp_result['articles']:
@@ -133,13 +117,12 @@ class GetArticle:
                 sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
                 sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
                 self.m.execute_(sql, (article_json, difficult_value))
                 self.m.execute_(sql, (article_json, difficult_value))
         except Exception as e:
         except Exception as e:
-           
+
             logger.error(f"插入数据库时发生错误: {str(e)}")
             logger.error(f"插入数据库时发生错误: {str(e)}")
 
 
-   
     def submit_task(self, real_ip: str, core_words: list, take_count: int,
     def submit_task(self, real_ip: str, core_words: list, take_count: int,
-                          demo_name: str, reading_level: int, article_length: int, exercise_id: int,
-                          background_tasks: BackgroundTasks):
+                    demo_name: str, reading_level: int, article_length: int, exercise_id: int,
+                    background_tasks: BackgroundTasks):
         """
         """
         core_words: 词义数据组
         core_words: 词义数据组
         take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
         take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
@@ -156,11 +139,10 @@ class GetArticle:
             self.real_ip_dict[task_id] = real_ip
             self.real_ip_dict[task_id] = real_ip
             self.demo_name[task_id] = demo_name
             self.demo_name[task_id] = demo_name
 
 
-            resp_result = self.run_task(core_words, task_id, take_count, reading_level, article_length)
-            
-           
+            resp_result = self.run_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
+
             background_tasks.add_task(self.parser_insert_to_mysql, resp_result)
             background_tasks.add_task(self.parser_insert_to_mysql, resp_result)
-            
+
             logger.success(f"reading-comprehension 文章2任务完成。学案id:{exercise_id},taskid:{task_id}")
             logger.success(f"reading-comprehension 文章2任务完成。学案id:{exercise_id},taskid:{task_id}")
             return resp_result
             return resp_result
         except Exception as e:
         except Exception as e:
@@ -168,18 +150,17 @@ class GetArticle:
             log_err_e(e, msg="GetArticle提交任务失败;")
             log_err_e(e, msg="GetArticle提交任务失败;")
             return err_msg
             return err_msg
         finally:
         finally:
-           
+
             self.real_ip_dict.pop(task_id, None)
             self.real_ip_dict.pop(task_id, None)
             self.demo_name.pop(task_id, None)
             self.demo_name.pop(task_id, None)
 
 
-   
-    def __parse_gpt_resp(self,gpt_resp:dict,core_words:list):
-        return_json = {"articles": []} 
+    def __parse_gpt_resp(self, gpt_resp: dict, core_words: list):
+        return_json = {"articles": []}
         for choice in gpt_resp["choices"]:
         for choice in gpt_resp["choices"]:
             single_article_dict = json.loads(choice["message"]["content"])
             single_article_dict = json.loads(choice["message"]["content"])
 
 
-            allWordAmount = 0 
-           
+            allWordAmount = 0
+
             articleWordAmount = get_article_words_count(single_article_dict["englishArticle"])
             articleWordAmount = get_article_words_count(single_article_dict["englishArticle"])
             allWordAmount += articleWordAmount
             allWordAmount += articleWordAmount
 
 
@@ -189,28 +170,25 @@ class GetArticle:
                 allWordAmount += count_trunk
                 allWordAmount += count_trunk
                 allWordAmount += count_candidates
                 allWordAmount += count_candidates
 
 
-           
-            usedMeanIds: list = single_article_dict['usedMeanIds'] 
-           
+            usedMeanIds: list = single_article_dict['usedMeanIds']
+
             article_words = split_text_to_word(single_article_dict['englishArticle'])
             article_words = split_text_to_word(single_article_dict['englishArticle'])
-           
+
             for i in core_words:
             for i in core_words:
                 meaning_id = i.get('meaning_id', 0)
                 meaning_id = i.get('meaning_id', 0)
                 if not meaning_id:
                 if not meaning_id:
                     continue
                     continue
                 word = i["spell"]
                 word = i["spell"]
-                if meaning_id not in usedMeanIds and word in self.exchange_data: 
+                if meaning_id not in usedMeanIds and word in self.exchange_data:
                     words_exchanges_list = self.exchange_data[word]
                     words_exchanges_list = self.exchange_data[word]
                     for exchange_word in words_exchanges_list:
                     for exchange_word in words_exchanges_list:
                         if exchange_word in article_words:
                         if exchange_word in article_words:
                             usedMeanIds.append(meaning_id)
                             usedMeanIds.append(meaning_id)
                             break
                             break
 
 
-           
             single_article_dict["body"] = single_article_dict.pop("englishArticle")
             single_article_dict["body"] = single_article_dict.pop("englishArticle")
             single_article_dict["chinese"] = single_article_dict.pop("chineseArticle")
             single_article_dict["chinese"] = single_article_dict.pop("chineseArticle")
 
 
-           
             for q in single_article_dict['questions']:
             for q in single_article_dict['questions']:
                 data = q['candidates']
                 data = q['candidates']
                 shuffled_candidates = sample(data, len(data))
                 shuffled_candidates = sample(data, len(data))
@@ -220,17 +198,13 @@ class GetArticle:
                     candidate['label'] = labels[index]
                     candidate['label'] = labels[index]
                 q['candidates'] = shuffled_candidates
                 q['candidates'] = shuffled_candidates
 
 
-           
             return_json['articles'].append({**single_article_dict, "allWordAmount": allWordAmount, "articleWordAmount": articleWordAmount})
             return_json['articles'].append({**single_article_dict, "allWordAmount": allWordAmount, "articleWordAmount": articleWordAmount})
 
 
         return return_json
         return return_json
 
 
-
-
-   
     @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
     @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
-    def get_article(self, core_words: list, task_id: int, reading_level, article_length,n) -> dict:
-       
+    def get_article(self, core_words: list, task_id: int, exercise_id: int, reading_level, article_length, n) -> dict:
+
         if not article_length:
         if not article_length:
             if 0 < reading_level <= 10:
             if 0 < reading_level <= 10:
                 article_length = 50 + 10 * reading_level
                 article_length = 50 + 10 * reading_level
@@ -243,10 +217,9 @@ class GetArticle:
             if start <= reading_level <= end:
             if start <= reading_level <= end:
                 difficulty_control_stage = index
                 difficulty_control_stage = index
                 break
                 break
-        else: 
+        else:
             difficulty_control_stage = 2
             difficulty_control_stage = 2
 
 
-       
         diffculty_control = {
         diffculty_control = {
             1: {"grade": "小学", "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
             1: {"grade": "小学", "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
@@ -261,44 +234,42 @@ class GetArticle:
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
                 "choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
                 "choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
         }
         }
-       
 
 
-        grade = diffculty_control[difficulty_control_stage]["grade"] 
-        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"] 
-        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"] 
+        grade = diffculty_control[difficulty_control_stage]["grade"]
+        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"]
+        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"]
         desc2 = diffculty_control[difficulty_control_stage]["desc2"]
         desc2 = diffculty_control[difficulty_control_stage]["desc2"]
-        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"] 
+        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"]
 
 
-       
         shuffle(core_words)
         shuffle(core_words)
         core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words])
         core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words])
 
 
         no_escape_code = r"\\n\\n"
         no_escape_code = r"\\n\\n"
 
 
         sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
         sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
+
         q = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,\
         q = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,\
-生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。\
-英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。\
-4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
+生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。特别注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义,例如我提供单词[change 零钱],就不要使用[变化]的词义。
 
 
 要求:
 要求:
 1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}{choice_desc}
 1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}{choice_desc}
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求{article_length}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
+4.生成的文章要求{article_length}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。不需要markdown格式。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
-6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
 
 提供[单词组]:{core_words_meaning_str};
 提供[单词组]:{core_words_meaning_str};
 """
 """
+
         try:
         try:
             real_ip = self.real_ip_dict[task_id]
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
             demo_name = self.demo_name[task_id]
 
 
             gpt_resp = get_article_gpt_pydantic(q, temperature=1.2, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
             gpt_resp = get_article_gpt_pydantic(q, temperature=1.2, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
-                                                               check_fucn=CheckArticleResult.get_article_1, max_tokens=8000,
-                                                               sys_prompt=sys_prompt,n=n)
-            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp,core_words=core_words)
+                                                check_fucn=CheckArticleResult.get_article_1, max_tokens=15000,
+                                                sys_prompt=sys_prompt, n=n, task_id=task_id, exercise_id=exercise_id)
+
+            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp, core_words=core_words)
             return multi_articles_dict
             return multi_articles_dict
 
 
         except httpx.HTTPError as e:
         except httpx.HTTPError as e:
@@ -311,9 +282,7 @@ class GetArticle:
             log_err_e(e, f"gpt生成文章回复其他错误.")
             log_err_e(e, f"gpt生成文章回复其他错误.")
             raise
             raise
 
 
-
-   
-    def run_get_article_task(self, core_words, task_id, take_count, reading_level, article_length) -> dict:
+    def run_get_article_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length) -> dict:
         """
         """
         :param core_words: 核心单词数据,优先级1;可能为空
         :param core_words: 核心单词数据,优先级1;可能为空
         :param task_id: 任务id
         :param task_id: 任务id
@@ -323,16 +292,15 @@ class GetArticle:
         :return:
         :return:
         """
         """
         try:
         try:
-            return_json = self.get_article(core_words, task_id, reading_level, article_length,n=take_count)
+            return_json = self.get_article(core_words, task_id, exercise_id, reading_level, article_length, n=take_count)
             return return_json
             return return_json
         except Exception as e:
         except Exception as e:
             logger.error(f"运行文章任务时发生错误: {str(e)}")
             logger.error(f"运行文章任务时发生错误: {str(e)}")
             raise
             raise
 
 
-   
-    def run_task(self, core_words, task_id, take_count, reading_level, article_length):
+    def run_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length):
         try:
         try:
-            outside_json = self.run_get_article_task(core_words, task_id, take_count, reading_level, article_length)
+            outside_json = self.run_get_article_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
             return outside_json
             return outside_json
         except Exception as e:
         except Exception as e:
             log_err_e(e, msg="外层总任务捕获错误")
             log_err_e(e, msg="外层总任务捕获错误")
@@ -340,16 +308,3 @@ class GetArticle:
     def cleanup(self):
     def cleanup(self):
         """清理所有资源"""
         """清理所有资源"""
         pass
         pass
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-

+ 23 - 27
gpt/gpt.py

@@ -1,26 +1,26 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 if __name__ == '__main__':
 if __name__ == '__main__':
     import os
     import os
+
     os.chdir("..")
     os.chdir("..")
 
 
-import requests
-import random
 import time
 import time
-from tools.loglog import logger,simple_logger
+
+import requests
+
+from tools.loglog import logger, simple_logger
 from tools.new_mysql import MySQLUploader
 from tools.new_mysql import MySQLUploader
 
 
 m = MySQLUploader()
 m = MySQLUploader()
 
 
 
 
-def insert_ip_token(ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens):
+def insert_ip_token(ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens):
     sql = "insert into consumer_token (ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens) values (%s,%s,%s,%s,%s,%s)"
     sql = "insert into consumer_token (ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens) values (%s,%s,%s,%s,%s,%s)"
-    m.execute_(sql,(ip,demo_name,str(gpt_content),prompt_tokens,completion_tokens,total_tokens))
+    m.execute_(sql, (ip, demo_name, str(gpt_content), prompt_tokens, completion_tokens, total_tokens))
 
 
-def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-4o",max_tokens=3500,temperature:float=0,json_resp=False,n=1,sys_prompt=None):
-   
-   
-   
 
 
+def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="gpt-4o", max_tokens=3500, temperature: float = 0, json_resp=False, n=1,
+                        sys_prompt=None):
     if "3.5" in model or "3.5-turbo" in model or "3.5turbo" in model:
     if "3.5" in model or "3.5-turbo" in model or "3.5turbo" in model:
         model = "gpt-3.5-turbo"
         model = "gpt-3.5-turbo"
     elif "4o" in model or "gpt4o" in model:
     elif "4o" in model or "gpt4o" in model:
@@ -28,19 +28,17 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
     elif "4turbo" in model or "4-turbo" in model:
     elif "4turbo" in model or "4-turbo" in model:
         model = "gpt-4-turbo"
         model = "gpt-4-turbo"
 
 
-   
     d2 = {
     d2 = {
-    "model": model,
-    "messages": [],
-    "max_tokens": max_tokens,
-    "temperature": temperature,
-    'n': n}
+        "model": model,
+        "messages": [],
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        'n': n}
 
 
     if sys_prompt:
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
     d2['messages'].append({"role": "user", "content": question})
 
 
-
     if json_resp is True:
     if json_resp is True:
         d2["response_format"] = {"type": "json_object"}
         d2["response_format"] = {"type": "json_object"}
     elif json_resp is False:
     elif json_resp is False:
@@ -50,23 +48,22 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
 
 
     for _ in range(3):
     for _ in range(3):
         try:
         try:
-           
+
             response = requests.post(f'http://170.106.108.95/v1/chat/completions', json=d2)
             response = requests.post(f'http://170.106.108.95/v1/chat/completions', json=d2)
             r_json = response.json()
             r_json = response.json()
-            if r2:= r_json.get("choices",None):
-                if n>1:
+            if r2 := r_json.get("choices", None):
+                if n > 1:
                     gpt_res = []
                     gpt_res = []
                     for i in r2:
                     for i in r2:
                         gpt_res.append(i["message"]["content"])
                         gpt_res.append(i["message"]["content"])
                 else:
                 else:
-                    gpt_res= r2[0]["message"]["content"]
+                    gpt_res = r2[0]["message"]["content"]
 
 
-               
                 gpt_content = str(gpt_res)
                 gpt_content = str(gpt_res)
                 prompt_tokens = r_json["usage"]["prompt_tokens"]
                 prompt_tokens = r_json["usage"]["prompt_tokens"]
                 completion_tokens = r_json["usage"]["completion_tokens"]
                 completion_tokens = r_json["usage"]["completion_tokens"]
                 total_tokens = r_json["usage"]["total_tokens"]
                 total_tokens = r_json["usage"]["total_tokens"]
-                insert_ip_token(real_ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens)
+                insert_ip_token(real_ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens)
 
 
                 simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{gpt_res}")
                 simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{gpt_res}")
                 return gpt_res
                 return gpt_res
@@ -83,21 +80,20 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
     logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
     logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
 
 
 
 
-
-def parse_gpt_phon_to_tuplelist(text:str) -> list:
+def parse_gpt_phon_to_tuplelist(text: str) -> list:
     """解析gpt返回的音标数据"""
     """解析gpt返回的音标数据"""
     result = []
     result = []
     if not text:
     if not text:
         return []
         return []
     for i in text.split("\n"):
     for i in text.split("\n"):
         ii = i.split("***")
         ii = i.split("***")
-        if len(ii)>=3:
-            result.append((ii[0].strip(),ii[1].strip(),ii[2].strip()))
+        if len(ii) >= 3:
+            result.append((ii[0].strip(), ii[1].strip(), ii[2].strip()))
     return result
     return result
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     pass
     pass
 
 
-    resp = get_answer_from_gpt("hello",temperature=0.8,model='gpt-4o')
+    resp = get_answer_from_gpt("hello", temperature=0.8, model='gpt-4o')
     print(resp)
     print(resp)

+ 67 - 36
gpt/gpt_check.py

@@ -4,29 +4,31 @@
 GPT回复的各个校验模块"""
 GPT回复的各个校验模块"""
 import json
 import json
 import re
 import re
+from enum import Enum
+from typing import List
+
+from pydantic import BaseModel
 
 
 
 
 class CheckGptAnswer:
 class CheckGptAnswer:
     @staticmethod
     @staticmethod
     def default_no_check(gpt_text: str):
     def default_no_check(gpt_text: str):
-       
+
         return True
         return True
 
 
-   
     @staticmethod
     @staticmethod
     def score_value(gpt_text: str):
     def score_value(gpt_text: str):
-       
+
         if gpt_text.count("【取值0】") > 1:
         if gpt_text.count("【取值0】") > 1:
             return False
             return False
         return True if re.findall("【取值.+?】", gpt_text) else False
         return True if re.findall("【取值.+?】", gpt_text) else False
 
 
-   
     @staticmethod
     @staticmethod
     def original_modify(gpt_text: str):
     def original_modify(gpt_text: str):
         split_text = gpt_text.split("\n")
         split_text = gpt_text.split("\n")
         for t in split_text:
         for t in split_text:
-           
-            if "修改理由" in t and "错误" in t and len(t)<=25:
+
+            if "修改理由" in t and "错误" in t and len(t) <= 25:
                 return False
                 return False
             elif "没有严重的语法错误" in t:
             elif "没有严重的语法错误" in t:
                 return False
                 return False
@@ -36,74 +38,105 @@ class CheckGptAnswer:
         else:
         else:
             return False
             return False
 
 
-   
     @staticmethod
     @staticmethod
     def count_chinese_characters_50(s: str):
     def count_chinese_characters_50(s: str):
         chinese_count = 0
         chinese_count = 0
         for char in s:
         for char in s:
-           
+
             if '\u4e00' <= char <= '\u9fff':
             if '\u4e00' <= char <= '\u9fff':
                 chinese_count += 1
                 chinese_count += 1
-        return True if s and chinese_count/len(s) >= 0.5 else False
+        return True if s and chinese_count / len(s) >= 0.5 else False
 
 
-   
     @staticmethod
     @staticmethod
-    def count_english_count_30(s: str,english_words_count=30):
-        words_count = len(re.findall(r"[a-zA-Z\']+",s))
+    def count_english_count_30(s: str, english_words_count=30):
+        words_count = len(re.findall(r"[a-zA-Z\']+", s))
         return True if words_count >= english_words_count else False
         return True if words_count >= english_words_count else False
 
 
-   
     @staticmethod
     @staticmethod
-    def count_letter_percentages(s:str,letter_percentages=0.8):
-        count_letter=0
-       
+    def count_letter_percentages(s: str, letter_percentages=0.8):
+        count_letter = 0
+
         total_length = len(s)
         total_length = len(s)
 
 
-       
         for char in s:
         for char in s:
-           
+
             if char.isalpha():
             if char.isalpha():
-               
                 count_letter += 1
                 count_letter += 1
-        result = True if round(count_letter/total_length,2)>letter_percentages else False
+        result = True if round(count_letter / total_length, 2) > letter_percentages else False
         return result
         return result
 
 
 
 
 class CheckArticleResult:
 class CheckArticleResult:
     @staticmethod
     @staticmethod
     def default_no_check(gpt_text: str):
     def default_no_check(gpt_text: str):
-       
+
         return True
         return True
 
 
     @staticmethod
     @staticmethod
     def get_article_1(gpt_text: str):
     def get_article_1(gpt_text: str):
-       
+
         try:
         try:
             json_object = json.loads(gpt_text)
             json_object = json.loads(gpt_text)
         except json.decoder.JSONDecodeError:
         except json.decoder.JSONDecodeError:
             return False
             return False
-       
-        if not all(i in json_object for i in ["englishArticle","chineseArticle","difficultSentences","usedMeanIds","questions"]):
+
+        if not all(i in json_object for i in ["englishArticle", "chineseArticle", "difficultSentences", "usedMeanIds", "questions"]):
             return False
             return False
-       
+
         try:
         try:
             for question in json_object['questions']:
             for question in json_object['questions']:
-                analysis = question['analysis'] 
-                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis))/len(analysis)
-                if words_count_pct>0.5:
+                analysis = question['analysis']
+                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis)) / len(analysis)
+                if words_count_pct > 0.5:
                     return False
                     return False
         except:
         except:
             return False
             return False
 
 
         return True
         return True
 
 
-if __name__ == '__main__':
-   
-   
-   
-   
-   
 
 
+class IsRight(Enum):
+    RIGHT = 1
+    WRONG = 0
+
+
+class Options(Enum):
+    A = "A"
+    B = "B"
+    C = "C"
+    D = "D"
+
+
+class DifficultSentence(BaseModel):
+    english: str
+    chinese: str
+
+
+class Candidate(BaseModel):
+    label: Options
+    text: str
+    isRight: IsRight
+
+
+class Question(BaseModel):
+    trunk: str
+    analysis: str
+    candidates: List[Candidate]
+
+
+class Article(BaseModel):
+    difficultSentences: List[DifficultSentence]
+    usedMeanIds: List[int]
+    questions: List[Question]
+    englishArticle: str
+    chineseArticle: str
+
+
+class Annotation(BaseModel):
+    annotation_text: str
+
+
+if __name__ == '__main__':
     text = """{
     text = """{
   "difficultSentences": [
   "difficultSentences": [
     {
     {
@@ -349,5 +382,3 @@ if __name__ == '__main__':
 }"""
 }"""
     json_text = json.loads(text2)
     json_text = json.loads(text2)
     print(json_text)
     print(json_text)
-   
-   

+ 9 - 4
gpt/query_oss_file.py

@@ -1,12 +1,16 @@
 # -*- coding: UTF-8 -*-
 # -*- coding: UTF-8 -*-
 if __name__ == '__main__':
 if __name__ == '__main__':
     import os
     import os
+
     os.chdir("..")
     os.chdir("..")
 
 
-from tools.loglog import logger
+import json
+
 import oss2
 import oss2
 from oss2.credentials import EnvironmentVariableCredentialsProvider
 from oss2.credentials import EnvironmentVariableCredentialsProvider
-import json
+
+from tools.loglog import logger
+
 
 
 def query_file_content(key):
 def query_file_content(key):
     """
     """
@@ -18,7 +22,7 @@ def query_file_content(key):
     try:
     try:
         object_stream = bucket.get_object(key)
         object_stream = bucket.get_object(key)
         content = b''.join(object_stream)
         content = b''.join(object_stream)
-       
+
         text_content = content.decode('utf-8')
         text_content = content.decode('utf-8')
         json_content = json.loads(text_content)
         json_content = json.loads(text_content)
         return json_content
         return json_content
@@ -27,5 +31,6 @@ def query_file_content(key):
     except Exception as e:
     except Exception as e:
         logger.error(f"{type(e).__name__}: {e}")
         logger.error(f"{type(e).__name__}: {e}")
 
 
+
 if __name__ == '__main__':
 if __name__ == '__main__':
-    print(query_file_content('study/article/10613145')["id"])
+    print(query_file_content('study/article/10613145')["id"])

+ 16 - 17
main.py

@@ -1,20 +1,21 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import time
 import time
 from threading import Thread
 from threading import Thread
+from typing import Callable
 
 
-from fastapi import FastAPI,Request
+from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from fastapi.responses import PlainTextResponse
-from typing import Callable
+
+from core.api_article_annotation import router_article_annotation as r7
 from core.api_get_article import router as r1
 from core.api_get_article import router as r1
-from core.api_get_audio import router as r2
 from core.api_get_article2 import router as r3
 from core.api_get_article2 import router as r3
-from core.api_get_word import router as r4
-from core.api_get_spoken_language import router as r5
 from core.api_get_article3 import router as r6
 from core.api_get_article3 import router as r6
-
-from tools.loglog import logger
-from tools.del_expire_file import run_del_normal
+from core.api_get_audio import router as r2
+from core.api_get_spoken_language import router as r5
+from core.api_get_word import router as r4
 from core.respone_format import *
 from core.respone_format import *
+from tools.del_expire_file import run_del_normal
+from tools.loglog import logger, log_err_e
 
 
 app = FastAPI(title="AI相关功能接口", version="1.1")
 app = FastAPI(title="AI相关功能接口", version="1.1")
 
 
@@ -25,6 +26,8 @@ app.include_router(r3, tags=["春笋文章管理"])
 app.include_router(r4, tags=["生成word文档"])
 app.include_router(r4, tags=["生成word文档"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r6, tags=["deepseek文章"])
 app.include_router(r6, tags=["deepseek文章"])
+app.include_router(r7, tags=["文章词义标注"])
+
 
 
 @app.middleware("http")
 @app.middleware("http")
 async def add_process_time_header(request: Request, call_next: Callable):
 async def add_process_time_header(request: Request, call_next: Callable):
@@ -35,34 +38,30 @@ async def add_process_time_header(request: Request, call_next: Callable):
     try:
     try:
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
     except:
     except:
-        body =""
+        body = ""
     logger.info(f"\n正式接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
     logger.info(f"\n正式接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
 
 
     try:
     try:
         response = await call_next(request)
         response = await call_next(request)
     except Exception as e:
     except Exception as e:
-        logger.error(f"{type(e).__name__},{e}")
+        log_err_e(e, msg="http中间件错误捕捉")
         return resp_500(message=f"{type(e).__name__},{e}")
         return resp_500(message=f"{type(e).__name__},{e}")
 
 
     process_time = str(round(time.time() - start_time, 2))
     process_time = str(round(time.time() - start_time, 2))
     response.headers["X-Process-Time"] = process_time
     response.headers["X-Process-Time"] = process_time
 
 
-   
-    if path not in ['/','/tts']:
+    if path not in ['/', '/tts']:
         with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
         with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response
     return response
 
 
+
 @app.get("/")
 @app.get("/")
 @app.post("/")
 @app.post("/")
 def hello():
 def hello():
     return PlainTextResponse("hello world")
     return PlainTextResponse("hello world")
 
 
+
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread.start()
 del_file_thread.start()
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("main:app",host="0.0.0.0", port=8000)

+ 19 - 15
main_9000.py

@@ -1,20 +1,21 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import time
 import time
 from threading import Thread
 from threading import Thread
+from typing import Callable
 
 
-from fastapi import FastAPI,Request
+from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from fastapi.responses import PlainTextResponse
-from typing import Callable
+
+from core.api_article_annotation import router_article_annotation as r7
 from core.api_get_article import router as r1
 from core.api_get_article import router as r1
-from core.api_get_audio import router as r2
 from core.api_get_article2 import router as r3
 from core.api_get_article2 import router as r3
-from core.api_get_word import router as r4
-from core.api_get_spoken_language import router as r5
 from core.api_get_article3 import router as r6
 from core.api_get_article3 import router as r6
-
-from tools.loglog import logger
-from tools.del_expire_file import run_del_normal
+from core.api_get_audio import router as r2
+from core.api_get_spoken_language import router as r5
+from core.api_get_word import router as r4
 from core.respone_format import *
 from core.respone_format import *
+from tools.del_expire_file import run_del_normal
+from tools.loglog import logger, log_err_e
 
 
 app = FastAPI(title="AI相关功能接口", version="1.1")
 app = FastAPI(title="AI相关功能接口", version="1.1")
 
 
@@ -25,6 +26,8 @@ app.include_router(r3, tags=["春笋文章管理"])
 app.include_router(r4, tags=["生成word文档"])
 app.include_router(r4, tags=["生成word文档"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r6, tags=["deepseek文章"])
 app.include_router(r6, tags=["deepseek文章"])
+app.include_router(r7, tags=["文章词义标注"])
+
 
 
 @app.middleware("http")
 @app.middleware("http")
 async def add_process_time_header(request: Request, call_next: Callable):
 async def add_process_time_header(request: Request, call_next: Callable):
@@ -35,34 +38,35 @@ async def add_process_time_header(request: Request, call_next: Callable):
     try:
     try:
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
     except:
     except:
-        body =""
+        body = ""
     logger.info(f"\n测试接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
     logger.info(f"\n测试接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
 
 
     try:
     try:
         response = await call_next(request)
         response = await call_next(request)
     except Exception as e:
     except Exception as e:
-        logger.error(f"{type(e).__name__},{e}")
+        log_err_e(e, msg="http中间件错误捕捉")
         return resp_500(message=f"{type(e).__name__},{e}")
         return resp_500(message=f"{type(e).__name__},{e}")
 
 
-    process_time = str(round(time.time() - start_time,2))
+    process_time = str(round(time.time() - start_time, 2))
     response.headers["X-Process-Time"] = process_time
     response.headers["X-Process-Time"] = process_time
 
 
-   
-    if path not in ['/','/tts']:
+    if path not in ['/', '/tts']:
         with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
         with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response
     return response
 
 
+
 @app.get("/")
 @app.get("/")
 @app.post("/")
 @app.post("/")
 def hello():
 def hello():
     return PlainTextResponse("hello world")
     return PlainTextResponse("hello world")
 
 
+
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread.start()
 del_file_thread.start()
 
 
-
 if __name__ == "__main__":
 if __name__ == "__main__":
     import uvicorn
     import uvicorn
-    uvicorn.run("main_9000:app", port=9000)
+
+    uvicorn.run("main_9000:app", port=9000)

File diff suppressed because it is too large
+ 3 - 4
make_docx_demo/check_test_table/aaaaaaaaaa.py


+ 7 - 6
make_docx_demo/check_test_table/baidu_ocr.py

@@ -1,8 +1,9 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 import base64
 import base64
-import requests
 import time
 import time
 
 
+import requests
+
 access_token = None
 access_token = None
 token_time = 0
 token_time = 0
 
 
@@ -15,25 +16,25 @@ def high_ocr_location(pic_path):
     with open(pic_path, 'rb') as f:
     with open(pic_path, 'rb') as f:
         img = base64.b64encode(f.read())
         img = base64.b64encode(f.read())
 
 
-    if time.time()-token_time>3600*8:
+    if time.time() - token_time > 3600 * 8:
         print("获取token啦")
         print("获取token啦")
         url_token = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=BaL3yDflxe7Z5001vF8rAzKu&client_secret=xs40HshFLDDyWgCCfgnz86zWhQ8X1s5f'
         url_token = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=BaL3yDflxe7Z5001vF8rAzKu&client_secret=xs40HshFLDDyWgCCfgnz86zWhQ8X1s5f'
         token = requests.post(url_token).json()
         token = requests.post(url_token).json()
-       
+
         access_token = token['access_token']
         access_token = token['access_token']
         token_time = time.time()
         token_time = time.time()
 
 
     request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate"
     request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate"
 
 
-    params = {"image": img,"recognize_granularity":"small"}
+    params = {"image": img, "recognize_granularity": "small"}
     request_url = request_url + "?access_token=" + access_token
     request_url = request_url + "?access_token=" + access_token
     headers = {'content-type': 'application/x-www-form-urlencoded'}
     headers = {'content-type': 'application/x-www-form-urlencoded'}
     response = requests.post(request_url, data=params, headers=headers)
     response = requests.post(request_url, data=params, headers=headers)
     if response:
     if response:
         r_json = response.json()
         r_json = response.json()
-       
+
         return r_json
         return r_json
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))
+    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))

+ 75 - 126
make_docx_demo/check_test_table/image_preprocess.py

@@ -3,14 +3,14 @@
 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
 
 
 """
 """
+import json
 import re
 import re
-import time
+from pathlib import Path
 
 
-from PIL import Image, ImageFilter
-import numpy as np
 import cv2
 import cv2
-import json
-from pathlib import Path
+import numpy as np
+from PIL import Image, ImageFilter
+
 from baidu_ocr import high_ocr_location
 from baidu_ocr import high_ocr_location
 
 
 
 
@@ -23,56 +23,39 @@ def test_log(text: str):
 
 
 class PreprocessImage:
 class PreprocessImage:
     def __init__(self, image_path):
     def __init__(self, image_path):
-        self.image_path = image_path 
-        self.template_image_path = "template.jpg" 
+        self.image_path = image_path
+        self.template_image_path = "template.jpg"
 
 
-        self.image = cv2.imread(image_path) 
+        self.image = cv2.imread(image_path)
         self.template_image = cv2.imread(self.template_image_path)
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2] 
+        self.temp_h, self.temp_w = self.template_image.shape[:2]
 
 
-    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
         """图像矫正
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
         sharpen_image = cv2.imread(image_path)
 
 
         src_points = np.float32(point_tuple)
         src_points = np.float32(point_tuple)
 
 
-       
-       
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
 
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-       
+
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
 
-       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
 
-       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
 
-       
-       
-
-       
-       
-       
-       
-       
-       
-       
-       
-
-       
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
-       
+
         cv2.imwrite('transformed_image.jpg', image_rgb)
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
 
     def sharpen_image(self):
     def sharpen_image(self):
-       
+
         img = Image.open(self.image_path)
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg') 
+        sharpened_img.save('sharpen_image.jpg')
 
 
     @staticmethod
     @staticmethod
     def parser_ocr(ocr_data):
     def parser_ocr(ocr_data):
@@ -81,27 +64,16 @@ class PreprocessImage:
             text: str = word_item['words']
             text: str = word_item['words']
             if text.startswith("1."):
             if text.startswith("1."):
                 left_char_location = word_item['chars'][0]['location']
                 left_char_location = word_item['chars'][0]['location']
-                p1 = (left_char_location['left'], left_char_location['top']) 
+                p1 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("51."):
             elif text.startswith("51."):
                 left_char_location = word_item['chars'][0]['location']
                 left_char_location = word_item['chars'][0]['location']
-                p2 = (left_char_location['left'], left_char_location['top']) 
+                p2 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("50."):
             elif text.startswith("50."):
                 left_char_location = word_item['chars'][0]['location']
                 left_char_location = word_item['chars'][0]['location']
-                p3 = (left_char_location['left'], left_char_location['top']) 
+                p3 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("100."):
             elif text.startswith("100."):
                 left_char_location = word_item['chars'][0]['location']
                 left_char_location = word_item['chars'][0]['location']
-                p4 = (left_char_location['left'], left_char_location['top']) 
-
-           
-           
-           
-           
-           
-           
-           
-           
-           
-           
+                p4 = (left_char_location['left'], left_char_location['top'])
 
 
         if any([not p1, not p2, not p3, not p4]):
         if any([not p1, not p2, not p3, not p4]):
             print([p1, p2, p3, p4])
             print([p1, p2, p3, p4])
@@ -110,8 +82,8 @@ class PreprocessImage:
         return [p1, p2, p3, p4]
         return [p1, p2, p3, p4]
 
 
     def run(self):
     def run(self):
-       
-        self.sharpen_image() 
+
+        self.sharpen_image()
         ocr_data = high_ocr_location(self.image_path)
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
         self.correct_image(point_tuple)
@@ -121,28 +93,24 @@ class ComparisonAlgorithm:
     """比较算法核心"""
     """比较算法核心"""
 
 
     def __init__(self, transformed_image, ocr_data):
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image) 
-        self.ocr_data = ocr_data 
-        self.order_ocr_data = {} 
-        self.already_find_index = set() 
+        self.transformed_image = cv2.imread(transformed_image)
+        self.ocr_data = ocr_data
+        self.order_ocr_data = {}
+        self.already_find_index = set()
 
 
-        self.image = Image.open(transformed_image) 
+        self.image = Image.open(transformed_image)
 
 
     @staticmethod
     @staticmethod
     def separate_numbers_and_letters(text):
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text)) 
-        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
+        numbers = "".join(re.findall(r'\d+', text))
+        letters = "".join(re.findall(r'[a-zA-Z]+', text))
         return numbers, letters
         return numbers, letters
 
 
     def is_line_word(self, x, y):
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
         指定要查询的点的坐标 (x, y)"""
 
 
-       
-       
-       
-
         rgb_color = self.image.getpixel((x, y))
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
         r, g, b = rgb_color
 
 
@@ -153,16 +121,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2: 
-               
+            if word[0].isdigit() and len(word) >= 2:
+
                 word_text = word_item['words']
                 word_text = word_item['words']
-                location = word_item['location'] 
-                first_char_location = word_item['chars'][0]['location'] 
-                end_char_location = word_item['chars'][-1]['location'] 
-                chars_location = word_item['chars'] 
+                location = word_item['location']
+                first_char_location = word_item['chars'][0]['location']
+                end_char_location = word_item['chars'][-1]['location']
+                chars_location = word_item['chars']
 
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data: 
+                if numbers not in self.order_ocr_data:
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
 
@@ -174,25 +142,23 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
 
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
 
-           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height 
+            bottom_location_y = b_top + b_height
 
 
             if int_index == 50 or int_index == 100:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
 
             for y in range(bottom_location_y, next_word_top_location):
             for y in range(bottom_location_y, next_word_top_location):
@@ -202,12 +168,10 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_1 / (word_location['width'])
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8: 
+        if black_count_per > 0.8:
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_2(self, int_index, word_location, word):
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
         """颜色算法2,单词自身中间的黑点率
@@ -215,11 +179,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        black_count_2 = 0 
+        black_count_2 = 0
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
 
             mid = word_location['top'] + int(word_location['height'] / 2)
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5 
+            bottom = word_location['top'] + int(word_location['height']) + 5
 
 
             for y in range(mid, bottom):
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
                 result = self.is_line_word(x, y)
@@ -228,12 +192,10 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_2 / (word_location['width'])
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92: 
+        if black_count_per > 0.92:
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
         """
@@ -243,34 +205,33 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
         moving_distance = 20
         moving_distance = 20
 
 
         """这是在获取所有需要的横向左右x坐标"""
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = [] 
+        all_x = []
         for i in range(word_location['left'] - moving_distance, word_location['left']):
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2 
+        word_right_loca = word_location['left'] + word_location['width'] + 2
         for i in range(word_right_loca, word_right_loca + moving_distance):
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
             all_x.append(i)
 
 
         b_top, b_height = word_location['top'], int(word_location['height'])
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height 
-       
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
+        bottom_location_y = b_top + b_height
+
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
 
 
         for x in all_x:
         for x in all_x:
             if int_index == 50 or int_index == 100:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3 
+                next_word_top_location = next_word_location['top'] + 3
             else:
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
 
             for y in range(bottom_location_y_half, next_word_top_location):
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -280,63 +241,55 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_1 / len(all_x)
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4: 
+        if black_count_per > 0.4:
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
         """灰度图极差算法"""
-       
-       
 
 
         for char_index, char_dict in enumerate(chars_location):
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 point_location, point_char_index = char_dict['location'], char_index
                 break
                 break
-        else: 
+        else:
             char_index = 2
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
 
         white_block = 0
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height']//2
+        point_location_half = point_location['top'] + point_location['height'] // 2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
 
-        if white_block/point_location['width'] < 0.1:
+        if white_block / point_location['width'] < 0.1:
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
             return int_index
             return int_index
 
 
-       
         white_block = 0
         white_block = 0
         end_char_location = chars_location[-2]['location']
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom+2, bottom + end_char_location['height']-10
+        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
 
-        if white_block/point_location['width'] < 0.1:
+        if white_block / point_location['width'] < 0.1:
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
             return int_index
             return int_index
 
 
-
-
-   
     def core_algorithm(self):
     def core_algorithm(self):
         self.__make_order_ocr_data()
         self.__make_order_ocr_data()
 
 
@@ -346,10 +299,10 @@ class ComparisonAlgorithm:
                 continue
                 continue
 
 
             current_dict = self.order_ocr_data[current_index]
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word'] 
-            word_location = current_dict['location'] 
-            first_char_location = current_dict['first_char_location'] 
-            end_char_location = current_dict['end_char_location'] 
+            word = current_dict['word']
+            word_location = current_dict['location']
+            first_char_location = current_dict['first_char_location']
+            end_char_location = current_dict['end_char_location']
             chars_location = current_dict['chars_location']
             chars_location = current_dict['chars_location']
 
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -366,23 +319,19 @@ class ComparisonAlgorithm:
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
 
-   
     script_path = Path(__file__).resolve()
     script_path = Path(__file__).resolve()
-   
+
     script_directory = script_path.parent
     script_directory = script_path.parent
-   
+
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
 
-   
     pi = PreprocessImage(image_path)
     pi = PreprocessImage(image_path)
     pi.run()
     pi.run()
 
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
-    test_log(transformed_image_ocr_data) 
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
+    test_log(transformed_image_ocr_data)
 
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
     ca.core_algorithm()
-

+ 72 - 115
make_docx_demo/check_test_table/image_preprocess2.py

@@ -3,14 +3,14 @@
 20250114 在单词上划线,分别有斜杠、反斜杠、横着划线三种方式;找到它们的位置
 20250114 在单词上划线,分别有斜杠、反斜杠、横着划线三种方式;找到它们的位置
 
 
 """
 """
+import json
 import re
 import re
-import time
+from pathlib import Path
 
 
-from PIL import Image, ImageFilter
-import numpy as np
 import cv2
 import cv2
-import json
-from pathlib import Path
+import numpy as np
+from PIL import Image, ImageFilter
+
 from baidu_ocr import high_ocr_location
 from baidu_ocr import high_ocr_location
 
 
 
 
@@ -23,70 +23,50 @@ def test_log(text: str):
 
 
 class PreprocessImage:
 class PreprocessImage:
     def __init__(self, image_path):
     def __init__(self, image_path):
-        self.image_path = image_path 
-        self.template_image_path = "template.jpg" 
+        self.image_path = image_path
+        self.template_image_path = "template.jpg"
 
 
-        self.image = cv2.imread(image_path) 
+        self.image = cv2.imread(image_path)
         self.template_image = cv2.imread(self.template_image_path)
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2] 
+        self.temp_h, self.temp_w = self.template_image.shape[:2]
 
 
-    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
         """图像矫正
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
         sharpen_image = cv2.imread(image_path)
 
 
         src_points = np.float32(point_tuple)
         src_points = np.float32(point_tuple)
 
 
-       
-       
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
 
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-       
+
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
 
-       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
 
-       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
 
-       
-       
-
-       
-       
-       
-       
-       
-       
-       
-       
-
-       
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
-       
+
         cv2.imwrite('transformed_image.jpg', image_rgb)
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
 
     def sharpen_image(self):
     def sharpen_image(self):
-       
+
         img = Image.open(self.image_path)
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg') 
+        sharpened_img.save('sharpen_image.jpg')
 
 
     @staticmethod
     @staticmethod
     def parser_ocr(ocr_data):
     def parser_ocr(ocr_data):
         for word_item in ocr_data['words_result']:
         for word_item in ocr_data['words_result']:
-           
-            for char_item in word_item['chars']:
 
 
+            for char_item in word_item['chars']:
                 pass
                 pass
 
 
-
-
     def run(self):
     def run(self):
-       
-        self.sharpen_image() 
+
+        self.sharpen_image()
         ocr_data = high_ocr_location(self.image_path)
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
         self.correct_image(point_tuple)
@@ -96,28 +76,24 @@ class ComparisonAlgorithm:
     """比较算法核心"""
     """比较算法核心"""
 
 
     def __init__(self, transformed_image, ocr_data):
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image) 
-        self.ocr_data = ocr_data 
-        self.order_ocr_data = {} 
-        self.already_find_index = set() 
+        self.transformed_image = cv2.imread(transformed_image)
+        self.ocr_data = ocr_data
+        self.order_ocr_data = {}
+        self.already_find_index = set()
 
 
-        self.image = Image.open(transformed_image) 
+        self.image = Image.open(transformed_image)
 
 
     @staticmethod
     @staticmethod
     def separate_numbers_and_letters(text):
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text)) 
-        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
+        numbers = "".join(re.findall(r'\d+', text))
+        letters = "".join(re.findall(r'[a-zA-Z]+', text))
         return numbers, letters
         return numbers, letters
 
 
     def is_line_word(self, x, y):
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
         指定要查询的点的坐标 (x, y)"""
 
 
-       
-       
-       
-
         rgb_color = self.image.getpixel((x, y))
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
         r, g, b = rgb_color
 
 
@@ -128,16 +104,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2: 
-               
+            if word[0].isdigit() and len(word) >= 2:
+
                 word_text = word_item['words']
                 word_text = word_item['words']
-                location = word_item['location'] 
-                first_char_location = word_item['chars'][0]['location'] 
-                end_char_location = word_item['chars'][-1]['location'] 
-                chars_location = word_item['chars'] 
+                location = word_item['location']
+                first_char_location = word_item['chars'][0]['location']
+                end_char_location = word_item['chars'][-1]['location']
+                chars_location = word_item['chars']
 
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data: 
+                if numbers not in self.order_ocr_data:
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
 
@@ -149,25 +125,23 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
 
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
 
-           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height 
+            bottom_location_y = b_top + b_height
 
 
             if int_index == 50 or int_index == 100:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
 
             for y in range(bottom_location_y, next_word_top_location):
             for y in range(bottom_location_y, next_word_top_location):
@@ -177,12 +151,10 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_1 / (word_location['width'])
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8: 
+        if black_count_per > 0.8:
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_2(self, int_index, word_location, word):
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
         """颜色算法2,单词自身中间的黑点率
@@ -190,11 +162,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        black_count_2 = 0 
+        black_count_2 = 0
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
 
             mid = word_location['top'] + int(word_location['height'] / 2)
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5 
+            bottom = word_location['top'] + int(word_location['height']) + 5
 
 
             for y in range(mid, bottom):
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
                 result = self.is_line_word(x, y)
@@ -203,12 +175,10 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_2 / (word_location['width'])
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92: 
+        if black_count_per > 0.92:
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
         """
@@ -218,34 +188,33 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         word:具体序号的单词,标识用
         """
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
         moving_distance = 20
         moving_distance = 20
 
 
         """这是在获取所有需要的横向左右x坐标"""
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = [] 
+        all_x = []
         for i in range(word_location['left'] - moving_distance, word_location['left']):
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2 
+        word_right_loca = word_location['left'] + word_location['width'] + 2
         for i in range(word_right_loca, word_right_loca + moving_distance):
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
             all_x.append(i)
 
 
         b_top, b_height = word_location['top'], int(word_location['height'])
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height 
-       
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
+        bottom_location_y = b_top + b_height
+
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
 
 
         for x in all_x:
         for x in all_x:
             if int_index == 50 or int_index == 100:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3 
+                next_word_top_location = next_word_location['top'] + 3
             else:
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
 
             for y in range(bottom_location_y_half, next_word_top_location):
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -255,63 +224,55 @@ class ComparisonAlgorithm:
                     break
                     break
 
 
         black_count_per = black_count_1 / len(all_x)
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4: 
+        if black_count_per > 0.4:
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
         """灰度图极差算法"""
-       
-       
 
 
         for char_index, char_dict in enumerate(chars_location):
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 point_location, point_char_index = char_dict['location'], char_index
                 break
                 break
-        else: 
+        else:
             char_index = 2
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
 
         white_block = 0
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height']//2
+        point_location_half = point_location['top'] + point_location['height'] // 2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
 
-        if white_block/point_location['width'] < 0.1:
+        if white_block / point_location['width'] < 0.1:
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
             return int_index
             return int_index
 
 
-       
         white_block = 0
         white_block = 0
         end_char_location = chars_location[-2]['location']
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom+2, bottom + end_char_location['height']-10
+        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
 
-        if white_block/point_location['width'] < 0.1:
+        if white_block / point_location['width'] < 0.1:
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             self.already_find_index.add(int_index)
             return int_index
             return int_index
 
 
-
-
-   
     def core_algorithm(self):
     def core_algorithm(self):
         self.__make_order_ocr_data()
         self.__make_order_ocr_data()
 
 
@@ -321,10 +282,10 @@ class ComparisonAlgorithm:
                 continue
                 continue
 
 
             current_dict = self.order_ocr_data[current_index]
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word'] 
-            word_location = current_dict['location'] 
-            first_char_location = current_dict['first_char_location'] 
-            end_char_location = current_dict['end_char_location'] 
+            word = current_dict['word']
+            word_location = current_dict['location']
+            first_char_location = current_dict['first_char_location']
+            end_char_location = current_dict['end_char_location']
             chars_location = current_dict['chars_location']
             chars_location = current_dict['chars_location']
 
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -341,23 +302,19 @@ class ComparisonAlgorithm:
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
 
-   
     script_path = Path(__file__).resolve()
     script_path = Path(__file__).resolve()
-   
+
     script_directory = script_path.parent
     script_directory = script_path.parent
-   
+
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
 
-   
     pi = PreprocessImage(image_path)
     pi = PreprocessImage(image_path)
     pi.run()
     pi.run()
 
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
-    test_log(transformed_image_ocr_data) 
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
+    test_log(transformed_image_ocr_data)
 
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
     ca.core_algorithm()
-

+ 4 - 8
make_docx_demo/check_test_table/mark_ocr_loca.py

@@ -1,29 +1,25 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 """测试ocr的位置,与预期是否一致"""
 """测试ocr的位置,与预期是否一致"""
-from PIL import Image, ImageDraw
 import json
 import json
 from pathlib import Path
 from pathlib import Path
 
 
+from PIL import Image, ImageDraw
+
 
 
 def draw_rectangles_on_image(image_path, rectangles, output_path):
 def draw_rectangles_on_image(image_path, rectangles, output_path):
-   
     image = Image.open(image_path)
     image = Image.open(image_path)
     draw = ImageDraw.Draw(image)
     draw = ImageDraw.Draw(image)
 
 
-   
     for rectangle in rectangles:
     for rectangle in rectangles:
         top_left = (rectangle['left'], rectangle['top'])
         top_left = (rectangle['left'], rectangle['top'])
         bottom_right = (rectangle['left'] + rectangle['width'], rectangle['top'] + rectangle['height'])
         bottom_right = (rectangle['left'] + rectangle['width'], rectangle['top'] + rectangle['height'])
         draw.rectangle([top_left, bottom_right], outline='red', width=2)
         draw.rectangle([top_left, bottom_right], outline='red', width=2)
 
 
-   
     image.save(output_path)
     image.save(output_path)
 
 
 
 
 rectangles = [
 rectangles = [
-   
-   
-   
+
 ]
 ]
 
 
 with open("log.txt", "r", encoding="utf-8") as f:
 with open("log.txt", "r", encoding="utf-8") as f:
@@ -39,4 +35,4 @@ for i in ocr_data['words_result']:
 script_path = Path(__file__).resolve()
 script_path = Path(__file__).resolve()
 script_directory = script_path.parent
 script_directory = script_path.parent
 transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
-draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')
+draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')

File diff suppressed because it is too large
+ 0 - 2
make_docx_demo/data.py


+ 38 - 42
make_docx_demo/docx_other_func.py

@@ -1,18 +1,17 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from functools import wraps
-import time
+import datetime
 import io
 import io
-import qrcode
-from docx.shared import RGBColor
+import time
 from base64 import b64decode
 from base64 import b64decode
-import datetime
+from functools import wraps
 
 
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
+import qrcode
+from docx.shared import RGBColor
+
 plt.switch_backend('Agg')
 plt.switch_backend('Agg')
 from io import BytesIO
 from io import BytesIO
 from tools.loglog import logger, log_err_e
 from tools.loglog import logger, log_err_e
-from docx import Document
-from docx.shared import Inches,Cm
 from threading import Lock
 from threading import Lock
 from config.read_config import address
 from config.read_config import address
 
 
@@ -22,40 +21,43 @@ width_cm, height_cm = 5.4, 3
 width_in = width_cm
 width_in = width_cm
 height_in = height_cm
 height_in = height_cm
 
 
-plt.figure(figsize=(width_in, height_in)) 
+plt.figure(figsize=(width_in, height_in))
 
 
 
 
-def hex_to_rgb(hex_color:str):
-    hex_color = hex_color.lstrip('#') 
+def hex_to_rgb(hex_color: str):
+    hex_color = hex_color.lstrip('#')
     return RGBColor(int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16))
     return RGBColor(int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16))
 
 
+
 def rgb_to_hex(r, g, b):
 def rgb_to_hex(r, g, b):
     return '{:02x}{:02x}{:02x}'.format(r, g, b)
     return '{:02x}{:02x}{:02x}'.format(r, g, b)
 
 
+
 def is_base64(text):
 def is_base64(text):
     try:
     try:
-       
-        image_bytes =b64decode(text)
+
+        image_bytes = b64decode(text)
         return image_bytes
         return image_bytes
     except Exception:
     except Exception:
-       
+
         return False
         return False
 
 
 
 
 def time_use(fn):
 def time_use(fn):
     @wraps(fn)
     @wraps(fn)
-    def cc(*args,**kwargs): 
+    def cc(*args, **kwargs):
         f_time = time.time()
         f_time = time.time()
-        res = fn(*args,**kwargs)
+        res = fn(*args, **kwargs)
 
 
-        cha = round(time.time()-f_time,3)
+        cha = round(time.time() - f_time, 3)
         if cha > 0.3:
         if cha > 0.3:
-            print(f'函数:{fn.__name__} 一共用时',cha,'秒')
-        return res 
-    return cc 
+            print(f'函数:{fn.__name__} 一共用时', cha, '秒')
+        return res
 
 
+    return cc
 
 
-def qrcode_maker(id_text=None,full_url=None) -> BytesIO:
+
+def qrcode_maker(id_text=None, full_url=None) -> BytesIO:
     """
     """
     :param id_text: id_text 提供id,二维码地址是春笋筛查表的地址;http://dcjxb.yunzhixue.cn/link?type=scanpage&id=999;
     :param id_text: id_text 提供id,二维码地址是春笋筛查表的地址;http://dcjxb.yunzhixue.cn/link?type=scanpage&id=999;
     :param full_url: 如果提供,直接使用这个文本来生成二维码的地址
     :param full_url: 如果提供,直接使用这个文本来生成二维码的地址
@@ -73,14 +75,12 @@ def qrcode_maker(id_text=None,full_url=None) -> BytesIO:
     qr.add_data(text)
     qr.add_data(text)
     qr.make(fit=True)
     qr.make(fit=True)
 
 
-   
     img = qr.make_image(fill_color="black", back_color="white")
     img = qr.make_image(fill_color="black", back_color="white")
     img_byte_arr = io.BytesIO()
     img_byte_arr = io.BytesIO()
     img.save(img_byte_arr, format='PNG')
     img.save(img_byte_arr, format='PNG')
     img_byte_arr.seek(0)
     img_byte_arr.seek(0)
-   
-    return img_byte_arr
 
 
+    return img_byte_arr
 
 
 
 
 def get_weekday():
 def get_weekday():
@@ -91,7 +91,7 @@ def get_weekday():
     return weekday_chinese
     return weekday_chinese
 
 
 
 
-def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y_axis_label=None):
+def make_chart(x_axis_data, y_axis_datas, title, sub_title_list, x_axis_label=None, y_axis_label=None):
     """
     """
     :param sub_title_list: 小标题集合,放在右上角,用来标记每个y轴的数据标题
     :param sub_title_list: 小标题集合,放在右上角,用来标记每个y轴的数据标题
     :param y_axis_label:Y轴文本
     :param y_axis_label:Y轴文本
@@ -103,7 +103,6 @@ def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y
     """
     """
     x_len = len(x_axis_data)
     x_len = len(x_axis_data)
 
 
-   
     image_io = BytesIO()
     image_io = BytesIO()
 
 
     font1 = {'family': 'SimSun', 'weight': 'normal', 'size': 14}
     font1 = {'family': 'SimSun', 'weight': 'normal', 'size': 14}
@@ -113,34 +112,31 @@ def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y
         for y in y_axis_datas:
         for y in y_axis_datas:
             if len(y) != x_len:
             if len(y) != x_len:
                 logger.error("x轴的y轴的数据个数不一致")
                 logger.error("x轴的y轴的数据个数不一致")
-            plt.plot(x_axis_data, y, marker='o',label="zxs") 
+            plt.plot(x_axis_data, y, marker='o', label="zxs")
 
 
-        plt.title(title) 
+        plt.title(title)
         if x_axis_label:
         if x_axis_label:
-            plt.xlabel(x_axis_label) 
+            plt.xlabel(x_axis_label)
         if y_axis_label:
         if y_axis_label:
-            plt.ylabel(y_axis_label) 
-        plt.grid(True) 
+            plt.ylabel(y_axis_label)
+        plt.grid(True)
 
 
-        for index,sub_title in enumerate(sub_title_list):
-            plt.text(0.95, 0.9-index*0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right', backgroundcolor='w')
+        for index, sub_title in enumerate(sub_title_list):
+            plt.text(0.95, 0.9 - index * 0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right', backgroundcolor='w')
         with lock:
         with lock:
-            plt.savefig(image_io, format='png', bbox_inches='tight') 
-            image_io.seek(0) 
+            plt.savefig(image_io, format='png', bbox_inches='tight')
+            image_io.seek(0)
 
 
         return image_io
         return image_io
     except Exception as e:
     except Exception as e:
-        log_err_e(e,"折线图生成错误")
+        log_err_e(e, "折线图生成错误")
         image_io.close()
         image_io.close()
         return None
         return None
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-   
-   
-
-    t= time.time()
-    io = qrcode_maker('',"http://111.231.167.191:8001/mp3")
-    with open("1.jpg",'wb') as f:
+    t = time.time()
+    io = qrcode_maker('', "http://111.231.167.191:8001/mp3")
+    with open("1.jpg", 'wb') as f:
         f.write(io.read())
         f.write(io.read())
-    print(time.time()-t)
+    print(time.time() - t)

+ 9 - 11
make_docx_demo/get_standard_data.py

@@ -1,30 +1,28 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 """获取学段标准数据"""
 """获取学段标准数据"""
-from cachetools import TTLCache,cached
 import requests
 import requests
-from tools.loglog import logger, log_err_e
-
+from cachetools import TTLCache
 
 
 cache = TTLCache(maxsize=100, ttl=86400)
 cache = TTLCache(maxsize=100, ttl=86400)
 
 
 
 
-def get_standard_data(student_stage:int):
+def get_standard_data(student_stage: int):
     if student_stage in cache:
     if student_stage in cache:
         return cache[student_stage]
         return cache[student_stage]
 
 
     url = "https://dcjxb.yunzhixue.cn/api-dev/standard/study"
     url = "https://dcjxb.yunzhixue.cn/api-dev/standard/study"
-    params = {"stage":student_stage}
-    response = requests.get(url,params=params)
+    params = {"stage": student_stage}
+    response = requests.get(url, params=params)
     if response.status_code == 200:
     if response.status_code == 200:
-        data_obj = response.json()['data'] 
-       
-        return_data = data_obj['totalVocabulary'],data_obj['readingAccuracy'],data_obj['readingLevel'],data_obj['readingSpeed']
+        data_obj = response.json()['data']
+
+        return_data = data_obj['totalVocabulary'], data_obj['readingAccuracy'], data_obj['readingLevel'], data_obj['readingSpeed']
 
 
-        cache[student_stage] = return_data 
+        cache[student_stage] = return_data
         return return_data
         return return_data
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     print(get_standard_data(3))
     print(get_standard_data(3))
     print(cache)
     print(cache)
-    print(1 in cache,2 in cache,3 in cache)
+    print(1 in cache, 2 in cache, 3 in cache)

+ 114 - 169
make_docx_demo/main_word.py

@@ -1,29 +1,28 @@
 # -*- coding: UTF-8 -*-
 # -*- coding: UTF-8 -*-
-import time
-import re
 import math
 import math
+import re
+import time
+from docx_base import Word, Table, ParagraphBase
 from io import BytesIO
 from io import BytesIO
-from random import randint, shuffle
+from random import shuffle
 from threading import Thread
 from threading import Thread
 
 
-from docx_base import Word, Table, hex_to_rgb, rgb_to_hex, ParagraphBase
-from docx.shared import Pt, Inches, Cm, RGBColor
-from docx.enum.text import WD_COLOR_INDEX
+from docx.shared import Inches
+
 from make_docx_demo.data import *
 from make_docx_demo.data import *
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday, make_chart
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday, make_chart
-from tools.loglog import logger, log_err_e
 from make_docx_demo.word2pdf import convert_word_to_pdf
 from make_docx_demo.word2pdf import convert_word_to_pdf
+from tools.loglog import log_err_e
 
 
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
 
 
-
 @time_use
 @time_use
 def header_maker(docx: Word, json_data):
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
-    exercise_title = json_data.get("ExerciseTitle", "") 
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
+    exercise_title = json_data.get("ExerciseTitle", "")
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '')
@@ -41,15 +40,12 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
-       
 
 
-    target_section = docx.doc.sections[-1] 
+    target_section = docx.doc.sections[-1]
     target_section.header.is_linked_to_previous = False
     target_section.header.is_linked_to_previous = False
-   
+
     for paragraph in target_section.header.paragraphs:
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear() 
-   
-   
+        paragraph.clear()
 
 
     target_section.header_distance = 0
     target_section.header_distance = 0
     target_section.footer_distance = 280000
     target_section.footer_distance = 280000
@@ -60,9 +56,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     p = docx.add_blank_paragraph()
     line_width = 205
     line_width = 205
     main_rect_x = line_width + 10
     main_rect_x = line_width + 10
-    main_rect_width = 150 
+    main_rect_width = 150
 
 
-    right_line_x = main_rect_x + main_rect_width + 10 
+    right_line_x = main_rect_x + main_rect_width + 10
 
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
                     font_size=18)
@@ -82,28 +78,27 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
     exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed'] 
-    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy'] 
-    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed']
+    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy']
+    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
 
-   
     chart_width = 5.4
     chart_width = 5.4
     all_chart = json_data['StudentInfo']['StudentStudy']['ChartData']
     all_chart = json_data['StudentInfo']['StudentStudy']['ChartData']
-    reading_speed_chart = all_chart["ReadingSpeed"] 
-    reading_accuracy_chart = all_chart["ReadingAccuracy"] 
-    reading_difficult_chart = all_chart["ReadingDifficulties"] 
+    reading_speed_chart = all_chart["ReadingSpeed"]
+    reading_accuracy_chart = all_chart["ReadingAccuracy"]
+    reading_difficult_chart = all_chart["ReadingDifficulties"]
 
 
-    reading_speed_x_data = reading_speed_chart['XAxis'] 
-    reading_speed_sub_title = reading_speed_chart['Legend'] 
-    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']] 
+    reading_speed_x_data = reading_speed_chart['XAxis']
+    reading_speed_sub_title = reading_speed_chart['Legend']
+    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']]
 
 
-    reading_accuracy_x_data = reading_accuracy_chart['XAxis'] 
-    reading_accuracy_sub_title = reading_accuracy_chart['Legend'] 
-    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']] 
+    reading_accuracy_x_data = reading_accuracy_chart['XAxis']
+    reading_accuracy_sub_title = reading_accuracy_chart['Legend']
+    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']]
 
 
-    reading_difficult_x_data = reading_difficult_chart['XAxis'] 
-    reading_difficult_sub_title = reading_difficult_chart['Legend'] 
-    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']] 
+    reading_difficult_x_data = reading_difficult_chart['XAxis']
+    reading_difficult_sub_title = reading_difficult_chart['Legend']
+    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']]
 
 
     "开始版面-------------------------------------------------"
     "开始版面-------------------------------------------------"
 
 
@@ -133,26 +128,26 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     chart1_io = make_chart(x_axis_data=reading_speed_x_data, y_axis_datas=reading_speed_y_datas, title="阅读速度",
     chart1_io = make_chart(x_axis_data=reading_speed_x_data, y_axis_datas=reading_speed_y_datas, title="阅读速度",
                            sub_title_list=reading_speed_sub_title)
                            sub_title_list=reading_speed_sub_title)
     run1.add_pic(chart1_io, width=chart_width)
     run1.add_pic(chart1_io, width=chart_width)
-    chart1_io.close() 
+    chart1_io.close()
 
 
     p2 = tb3.get_cell_paragraph(0, 1, dq=15, dh=15)
     p2 = tb3.get_cell_paragraph(0, 1, dq=15, dh=15)
     run2 = ParagraphBase(p2)
     run2 = ParagraphBase(p2)
     chart2_io = make_chart(x_axis_data=reading_accuracy_x_data, y_axis_datas=reading_accuracy_y_datas, title="阅读准确率",
     chart2_io = make_chart(x_axis_data=reading_accuracy_x_data, y_axis_datas=reading_accuracy_y_datas, title="阅读准确率",
                            sub_title_list=reading_accuracy_sub_title)
                            sub_title_list=reading_accuracy_sub_title)
     run2.add_pic(chart2_io, width=chart_width)
     run2.add_pic(chart2_io, width=chart_width)
-    chart2_io.close() 
+    chart2_io.close()
 
 
     p3 = tb3.get_cell_paragraph(0, 2, dq=15, dh=15)
     p3 = tb3.get_cell_paragraph(0, 2, dq=15, dh=15)
     run3 = ParagraphBase(p3)
     run3 = ParagraphBase(p3)
     chart3_io = make_chart(x_axis_data=reading_difficult_x_data, y_axis_datas=reading_difficult_y_datas, title="阅读难度",
     chart3_io = make_chart(x_axis_data=reading_difficult_x_data, y_axis_datas=reading_difficult_y_datas, title="阅读难度",
                            sub_title_list=reading_difficult_sub_title)
                            sub_title_list=reading_difficult_sub_title)
     run3.add_pic(chart3_io, width=chart_width)
     run3.add_pic(chart3_io, width=chart_width)
-    chart3_io.close() 
+    chart3_io.close()
 
 
     docx.add_blank_paragraph()
     docx.add_blank_paragraph()
 
 
     tb4 = Table(docx, rows=5, cols=5, border=True, tb_name="自主复习记录")
     tb4 = Table(docx, rows=5, cols=5, border=True, tb_name="自主复习记录")
-   
+
     tb4.set_table_width_xml([2000, 3000, 2000, 2000, 2000])
     tb4.set_table_width_xml([2000, 3000, 2000, 2000, 2000])
 
 
     first_cell = tb4.get_cell(0, 0)
     first_cell = tb4.get_cell(0, 0)
@@ -246,7 +241,7 @@ def section_2(docx: Word, json_data, *args, **kwargs):
 
 
     docx.add_blank_paragraph(dq=2, dh=2)
     docx.add_blank_paragraph(dq=2, dh=2)
     docx.add_paragraph("北京云知学科技有限公司", align="right", size=10)
     docx.add_paragraph("北京云知学科技有限公司", align="right", size=10)
-   
+
     docx.add_page_section()
     docx.add_page_section()
 
 
 
 
@@ -333,15 +328,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
 
         tb_inside.merge_cell(0, 0, 0, 1)
         tb_inside.merge_cell(0, 0, 0, 1)
-        tb_inside.merge_cell(1, 0, 1, 1) 
-        tb_inside.merge_cell(0, 2, 1, 2) 
-        tb_inside.merge_cell(2, 0, 2, 2) 
-        tb_inside.merge_cell(3, 0, 3, 2) 
-        tb_inside.merge_cell(4, 0, 4, 2) 
+        tb_inside.merge_cell(1, 0, 1, 1)
+        tb_inside.merge_cell(0, 2, 1, 2)
+        tb_inside.merge_cell(2, 0, 2, 2)
+        tb_inside.merge_cell(3, 0, 3, 2)
+        tb_inside.merge_cell(4, 0, 4, 2)
 
 
-       
-
-       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -349,19 +341,17 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
                                 bk_color=(240, 240, 240))
 
 
-       
-        image_io:BytesIO = qrcode_result.get(data[9], "") 
+        image_io: BytesIO = qrcode_result.get(data[9], "")
         if image_io:
         if image_io:
             cell_p = tb_inside.get_cell_paragraph(0, 2, dq=5)
             cell_p = tb_inside.get_cell_paragraph(0, 2, dq=5)
             p_base = ParagraphBase(cell_p)
             p_base = ParagraphBase(cell_p)
             p_base.add_pic(image_io, width=1.5)
             p_base.add_pic(image_io, width=1.5)
             image_io.close()
             image_io.close()
 
 
-       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
-        cell_p_1.add_run_to_p("   " + data[4], size=8) 
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
+        cell_p_1.add_run_to_p("   " + data[4], size=8)
 
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
         cell_p_1 = ParagraphBase(cell_p)
@@ -375,40 +365,39 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = [] 
+    strange_words_data = []
     strange_words = json_data.get('StrangeWords')
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = [] 
+    qrcode_thread = []
     qrcode_result = {}
     qrcode_result = {}
 
 
     for item in strange_words:
     for item in strange_words:
-        spell = item['Spell'] 
-        word_id = item['WordId'] 
+        spell = item['Spell']
+        word_id = item['WordId']
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
 
-        symbols_en = "英" + f'[{en}]' 
-        symbols_am = "美" + f'[{am}]' 
+        symbols_en = "英" + f'[{en}]'
+        symbols_am = "美" + f'[{am}]'
 
 
-       
         tts_url = f"https://dcjxb.yunzhixue.cn/exercise/word?id={word_id}"
         tts_url = f"https://dcjxb.yunzhixue.cn/exercise/word?id={word_id}"
         t = Thread(target=qrcode_maker, args=(tts_url, qrcode_result))
         t = Thread(target=qrcode_maker, args=(tts_url, qrcode_result))
         qrcode_thread.append(t)
         qrcode_thread.append(t)
         t.start()
         t.start()
 
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
-        word_meanings = item.get('Meaning', "") 
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
+        word_meanings = item.get('Meaning', "")
         word_changes = ";".join([s["Type"] + ":" + s["Spell"] for s in item["WordChanges"]])
         word_changes = ";".join([s["Type"] + ":" + s["Spell"] for s in item["WordChanges"]])
 
 
         if item['Sentences']:
         if item['Sentences']:
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
         else:
             sentences = ""
             sentences = ""
-       
+
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences, tts_url)
                              "词汇变形", word_changes, "例句", sentences, tts_url)
         strange_words_data.append(single_word_tuple)
         strange_words_data.append(single_word_tuple)
 
 
-    rows = math.ceil(len(strange_words_data) / 2) 
+    rows = math.ceil(len(strange_words_data) / 2)
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
     tb_outside.set_tb_colum_width(width=[230, 230])
 
 
@@ -428,12 +417,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 
 @time_use
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
 def section_5(docx: Word, json_data, *args, **kwargs):
-   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3 
+    random_copy_word_list = copy_word_list * 3
     shuffle(random_copy_word_list)
     shuffle(random_copy_word_list)
 
 
-   
     first_copy_word_list = copy_word_list.copy()
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
     shuffle(copy_word_list_add_num)
@@ -486,8 +473,6 @@ def section_6(docx: Word, json_data, *args, **kwargs):
         p.add_run_to_p("☆ ", size=10, font_name="MS Gothic")
         p.add_run_to_p("☆ ", size=10, font_name="MS Gothic")
         p.add_run_to_p(t, size=10)
         p.add_run_to_p(t, size=10)
 
 
-   
-
     data = ["1. I have no chance to go sightseeing this summer.	(chance)",
     data = ["1. I have no chance to go sightseeing this summer.	(chance)",
             "2. And with that, we conclude the third and final example.	(third)",
             "2. And with that, we conclude the third and final example.	(third)",
             "3. He lives a healthy and normal life and has a strong body.	(healthy)",
             "3. He lives a healthy and normal life and has a strong body.	(healthy)",
@@ -505,8 +490,8 @@ def section_6(docx: Word, json_data, *args, **kwargs):
             "15. His performance at the concert last night proved that he is in the top of international pianists.	(concert)"]
             "15. His performance at the concert last night proved that he is in the top of international pianists.	(concert)"]
 
 
     for i in example_sentence:
     for i in example_sentence:
-        p = docx.add_blank_paragraph(dq=4,dh=4)
-        p.add_run_to_p("□  ", size=12,font_name="宋体")
+        p = docx.add_blank_paragraph(dq=4, dh=4)
+        p.add_run_to_p("□  ", size=12, font_name="宋体")
         p.add_run_to_p(i + "___________")
         p.add_run_to_p(i + "___________")
 
 
     docx.add_page_section()
     docx.add_page_section()
@@ -514,22 +499,20 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 
 @time_use
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
 def section_7(docx: Word, json_data, *args, **kwargs):
-   
     def wanxing(index, article_single):
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
 
-       
         select_text = []
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             single_select_text = ''
             for s in candidates['Candidates']:
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle'] 
+                participle = s['Participle']
                 if participle:
                 if participle:
                     single_select_text += participle + ' \n'
                     single_select_text += participle + ' \n'
                 else:
                 else:
@@ -538,14 +521,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
             select_text.append(f"{ques_index}. {single_select_text}")
             select_text.append(f"{ques_index}. {single_select_text}")
 
 
-       
         all_select_text = "\n".join(select_text)
         all_select_text = "\n".join(select_text)
 
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
         article_main_list = article_main.split(" ")
 
 
-       
         explanatory_words = "\n\n".join(
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -588,8 +568,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
         tb2.set_tb_colum_width(width=[320, 140])
 
 
-       
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
             word = re.search(r"\[(\d+)]", w)
@@ -600,7 +578,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -618,9 +596,8 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
         docx.add_blank_paragraph()
 
 
-   
     def reading(index, article_single):
     def reading(index, article_single):
-       
+
         all_article_length = 0
         all_article_length = 0
 
 
         def single_yuedu(index, a):
         def single_yuedu(index, a):
@@ -628,20 +605,19 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             article_length = a['AllWordAmount']
             article_length = a['AllWordAmount']
             nonlocal all_article_length
             nonlocal all_article_length
             all_article_length += article_length
             all_article_length += article_length
-           
+
             strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
             strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-           
+
             explanatory_words_ids = [i['MeaningId'] for i in a['ExplanatoryWords']]
             explanatory_words_ids = [i['MeaningId'] for i in a['ExplanatoryWords']]
 
 
-           
             select_text = []
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
                 single_select_text = ''
-               
-                subject = candidates['Subject'] + '\n' 
+
+                subject = candidates['Subject'] + '\n'
                 for s in candidates['Candidates']:
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. ' 
-                    participle = s['Participle'] 
+                    single_select_text += s['Label'] + '. '
+                    participle = s['Participle']
                     if participle:
                     if participle:
                         single_select_text += participle + ' \n'
                         single_select_text += participle + ' \n'
                     else:
                     else:
@@ -649,14 +625,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
 
-           
             all_select_text = "\n".join(select_text)
             all_select_text = "\n".join(select_text)
 
 
-           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
             article_main_list = article_main.split(" ")
             article_main_list = article_main.split(" ")
 
 
-           
             explanatory_words = "\n\n".join(
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
                 [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -672,7 +645,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
             tb2.set_tb_colum_width(width=[320, 140])
             tb2.set_tb_colum_width(width=[320, 140])
 
 
-           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
                 word = re.search(r"\[(\d+)]", w)
@@ -683,7 +655,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif meaning_id in explanatory_words_ids:
                     elif meaning_id in explanatory_words_ids:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
                         tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                    else: 
+                    else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                 else:
                 else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -696,9 +668,9 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             docx.add_blank_paragraph()
             docx.add_blank_paragraph()
 
 
         "---------------------开始单篇运行---------------------"
         "---------------------开始单篇运行---------------------"
-        if index == 1: 
+        if index == 1:
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "春笋智学, 高效学习专家")
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "春笋智学, 高效学习专家")
-           
+
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用红笔加以标记,以便日后快速回顾。\n",
             text = ["阅读中不认识的单词,尽量猜测词义,并用红笔加以标记,以便日后快速回顾。\n",
@@ -723,21 +695,19 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
         docx.add_blank_paragraph()
 
 
-   
     def seven_to_five(index, article_single):
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
 
-       
         select_text = []
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle'] 
+            participle = s_candidates['Participle']
             if participle:
             if participle:
                 single_select_text += participle
                 single_select_text += participle
             else:
             else:
@@ -746,14 +716,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
             select_text.append(f"{single_select_text}")
             select_text.append(f"{single_select_text}")
 
 
-       
         all_select_text = "\n".join(select_text)
         all_select_text = "\n".join(select_text)
 
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
         article_main_list = article_main.split(" ")
 
 
-       
         explanatory_words = "\n\n".join(
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -785,7 +752,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
         tb2.set_tb_colum_width(width=[320, 140])
 
 
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
             word = re.search(r"\[(\d+)]", w)
@@ -796,7 +762,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -813,19 +779,18 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
     "判断题型;根据题型选择----------------------------"
     "判断题型;根据题型选择----------------------------"
     for index, article_single in enumerate(json_data['Articles'], start=1):
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
 
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
 
         assert article_type in article_type_select
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
 
 
     docx.add_page_section()
     docx.add_page_section()
 
 
 
 
 @time_use
 @time_use
 def section_8(docx: Word, json_data, *args, **kwargs):
 def section_8(docx: Word, json_data, *args, **kwargs):
-   
     sub_title_maker(docx, "单词趣味填", "趣味练习,多维提升和巩固")
     sub_title_maker(docx, "单词趣味填", "趣味练习,多维提升和巩固")
     docx.add_pic_single_paragraph("make_docx_demo/static/happy_word.jpg", align="center", width=14.58)
     docx.add_pic_single_paragraph("make_docx_demo/static/happy_word.jpg", align="center", width=14.58)
     docx.add_page_section()
     docx.add_page_section()
@@ -835,51 +800,46 @@ def section_8(docx: Word, json_data, *args, **kwargs):
 def section_9(docx: Word, json_data, *args, **kwargs):
 def section_9(docx: Word, json_data, *args, **kwargs):
     def wanxing(index, article_single):
     def wanxing(index, article_single):
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
         docx.add_paragraph(text, size=9)
 
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip() 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip()
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip()
                     abcd_label += abcd_selected['Label'].strip()
 
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
 
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
 
-   
     def reading(index, article_single):
     def reading(index, article_single):
-        all_analysis = '' 
-        all_difficult_sentences = [] 
+        all_analysis = ''
+        all_difficult_sentences = []
 
 
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
 
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip("\n") 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip("\n")
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip("\n")
                     abcd_label += abcd_selected['Label'].strip("\n")
 
 
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}\n"
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}\n"
 
 
-       
         all_analysis += '\n'
         all_analysis += '\n'
 
 
-       
         for difficult_sentence_item in article_single['DifficultSentences']:
         for difficult_sentence_item in article_single['DifficultSentences']:
             all_difficult_sentences.append(difficult_sentence_item['Chinese'])
             all_difficult_sentences.append(difficult_sentence_item['Chinese'])
 
 
@@ -898,19 +858,19 @@ def section_9(docx: Word, json_data, *args, **kwargs):
 
 
     def seven_to_five(index, article_single):
     def seven_to_five(index, article_single):
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
         docx.add_paragraph(text, size=9)
-       
+
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'] 
-            abcd_label = '' 
+            analysis = question_item['Analysis']
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label']
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
 
@@ -922,10 +882,10 @@ def section_9(docx: Word, json_data, *args, **kwargs):
     "判断题型;根据题型选择----------------------------"
     "判断题型;根据题型选择----------------------------"
     sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯")
     sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯")
     for index, article_single in enumerate(json_data['Articles'], start=1):
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
         docx.add_blank_paragraph()
         docx.add_blank_paragraph()
 
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
@@ -973,17 +933,16 @@ def section_10(docx: Word, json_data, scanpage_format, **kwargs):
 
 
 
 
 def two_check_page(docx: Word, json_data, **kwargs):
 def two_check_page(docx: Word, json_data, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
         tb.set_tb_colum_width(width=[40, 100, 100])
 
 
         p_cell = tb.get_cell_paragraph(0, 0, dq=10)
         p_cell = tb.get_cell_paragraph(0, 0, dq=10)
         p = ParagraphBase(p_cell)
         p = ParagraphBase(p_cell)
-        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2)) 
+        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2))
 
 
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -994,9 +953,9 @@ def two_check_page(docx: Word, json_data, **kwargs):
 
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
 
-        half_count = int(len(word_data_list) / 2) 
-        for index,row in enumerate(range(half_count)):
-            first_word, second_word = word_data_list[row],word_data_list[row + half_count]
+        half_count = int(len(word_data_list) / 2)
+        for index, row in enumerate(range(half_count)):
+            first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell4 = "□ ___________________________" if second_word else ""
             cell4 = "□ ___________________________" if second_word else ""
 
 
@@ -1007,13 +966,12 @@ def two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[40, 130, 130, 150, 70])
         tb.set_tb_colum_width(width=[40, 130, 130, 150, 70])
@@ -1039,15 +997,10 @@ def two_check_page(docx: Word, json_data, **kwargs):
         tb = Table(docx, rows=0, cols=4, tb_name="第二页筛查表")
         tb = Table(docx, rows=0, cols=4, tb_name="第二页筛查表")
 
 
         ## 1234横着放
         ## 1234横着放
-       
-       
-       
+
         #
         #
-       
-       
+
         #
         #
-       
-       
 
 
         ## 1234竖着放
         ## 1234竖着放
         total_row = int(len(word_data_list2) / 2)
         total_row = int(len(word_data_list2) / 2)
@@ -1058,13 +1011,13 @@ def two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
 
-            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4] 
+            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6)
         tb.set_row_height(13.6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1: 
+        if article_type == 1:
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -1072,45 +1025,39 @@ def two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
                                font_name="仿宋", dq=5)
 
 
-
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '') 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
-    article_type = json_data['Articles'][0]['Category'] 
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '')
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+    article_type = json_data['Articles'][0]['Category']
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
 
 
     """---------------------------------------------------------------------------------"""
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
         page_id = str(page['PageId']).rjust(11, "0")
 
 
-       
         if index >= 2:
         if index >= 2:
             docx.add_page_break()
             docx.add_page_break()
 
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
 
         word_data_list1 = []
         word_data_list1 = []
         word_data_list2 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
 
-       
         if is_add_empty_filter_page:
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
 
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 
 
 @time_use
 @time_use
 def other(docx, json_data, **kwargs):
 def other(docx, json_data, **kwargs):
-   
     sections = docx.doc.sections
     sections = docx.doc.sections
     for section in sections[:-1]:
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
         section.top_margin = Inches(0.3)
@@ -1143,7 +1090,6 @@ def start_make_word(json_data, document_format, scanpage_format):
         for s in menu:
         for s in menu:
             s(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
             s(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
 
 
-       
         docx.save_docx()
         docx.save_docx()
         if document_format == 1:
         if document_format == 1:
             return "develop.docx"
             return "develop.docx"
@@ -1161,6 +1107,5 @@ if __name__ == '__main__':
     t = time.time()
     t = time.time()
     os.chdir("..")
     os.chdir("..")
 
 
-   
     start_make_word(test_json5, 1, 1)
     start_make_word(test_json5, 1, 1)
     print(time.time() - t)
     print(time.time() - t)

+ 147 - 244
make_docx_demo/main_word_applet.py

@@ -1,34 +1,31 @@
 # -*- coding: UTF-8 -*-
 # -*- coding: UTF-8 -*-
 """专为鲍利提分小程序,制作的word文档;apifox接口在-单词教学宝-词汇突击学案文档生成接口"""
 """专为鲍利提分小程序,制作的word文档;apifox接口在-单词教学宝-词汇突击学案文档生成接口"""
 
 
-import time
-import re
-import os
 import math
 import math
-import yaml
-from random import randint, shuffle
+import re
+import time
+from docx_base import Word, Table, ParagraphBase
+from random import shuffle
 
 
-from docx.shared import Pt, Inches, Cm, RGBColor
-from docx.enum.text import WD_COLOR_INDEX
+from docx.shared import Inches
+
+from common.split_text import split_text_to_word_punctuation
+from config.read_config import address
 from make_docx_demo.data import *
 from make_docx_demo.data import *
-from docx_base import Word, Table, hex_to_rgb, rgb_to_hex, ParagraphBase
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday
-from tools.loglog import logger, log_err_e
-from make_docx_demo.word2pdf import convert_word_to_pdf
 from make_docx_demo.get_standard_data import get_standard_data
 from make_docx_demo.get_standard_data import get_standard_data
-from common.split_text import split_text_to_word_punctuation
-from config.read_config import address
+from make_docx_demo.word2pdf import convert_word_to_pdf
+from tools.loglog import log_err_e
 
 
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
 
 
-
 @time_use
 @time_use
 def header_maker(docx: Word, json_data):
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
-    exercise_title = json_data.get("ExerciseTitle", "") 
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
+    exercise_title = json_data.get("ExerciseTitle", "")
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
     class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
@@ -46,15 +43,12 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
-       
 
 
-    target_section = docx.doc.sections[-1] 
+    target_section = docx.doc.sections[-1]
     target_section.header.is_linked_to_previous = False
     target_section.header.is_linked_to_previous = False
-   
+
     for paragraph in target_section.header.paragraphs:
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear() 
-   
-   
+        paragraph.clear()
 
 
     target_section.header_distance = 0
     target_section.header_distance = 0
     target_section.footer_distance = 280000
     target_section.footer_distance = 280000
@@ -65,9 +59,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     p = docx.add_blank_paragraph()
     line_width = 200
     line_width = 200
     main_rect_x = line_width + 10
     main_rect_x = line_width + 10
-    main_rect_width = 150 
+    main_rect_width = 150
 
 
-    right_line_x = main_rect_x + main_rect_width + 10 
+    right_line_x = main_rect_x + main_rect_width + 10
 
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
                     font_size=18)
@@ -84,32 +78,25 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
 
 
 @time_use
 @time_use
 def section_1(docx: Word, json_data, *args, **kwargs):
 def section_1(docx: Word, json_data, *args, **kwargs):
-   
-    exercise_id_int = json_data.get("ExerciseId", "") 
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    student_stage = json_data.get("StudentInfo").get("StudentStage") 
+    exercise_id_int = json_data.get("ExerciseId", "")
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    student_stage = json_data.get("StudentInfo").get("StudentStage")
     grade_name = {1: "小学", 2: "初中", 3: "高中"}.get(student_stage)
     grade_name = {1: "小学", 2: "初中", 3: "高中"}.get(student_stage)
-    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
+    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 
 
-   
     totalVocabulary, readingAccuracy, readingLevel, readingSpeed = get_standard_data(student_stage)
     totalVocabulary, readingAccuracy, readingLevel, readingSpeed = get_standard_data(student_stage)
 
 
-    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary'] 
-    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary'] 
-    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume'] 
+    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary']
+    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary']
+    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume']
 
 
-   
-   
-   
-    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
 
-   
-    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']]) 
-    r8 = r6 
-    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']] 
-    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0 
+    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']])
+    r8 = r6
+    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']]
+    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0
 
 
-   
     InspirationalMessage = json_data.get('InspirationalMessage')
     InspirationalMessage = json_data.get('InspirationalMessage')
     "开始版面-------------------------------------------------"
     "开始版面-------------------------------------------------"
 
 
@@ -156,12 +143,10 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     t5.set_row_height(row_height=50)
     t5.set_row_height(row_height=50)
     t5.set_tb_colum_width(0, 500)
     t5.set_tb_colum_width(0, 500)
 
 
-   
     docx.add_paragraph(text="多媒体辅助", size=16, align="left", bold=True, dq=10, dh=5)
     docx.add_paragraph(text="多媒体辅助", size=16, align="left", bold=True, dq=10, dh=5)
     docx.add_paragraph(text="需要示范的的学员,扫以下二维码获取音频、视频示范:", size=12, align="left", dq=5, dh=5)
     docx.add_paragraph(text="需要示范的的学员,扫以下二维码获取音频、视频示范:", size=12, align="left", dq=5, dh=5)
     p = docx.add_blank_paragraph()
     p = docx.add_blank_paragraph()
 
 
-   
     img_io = qrcode_maker(full_url=f"{address}/link?type=exercise&id={exercise_id_int}&from=bltf")
     img_io = qrcode_maker(full_url=f"{address}/link?type=exercise&id={exercise_id_int}&from=bltf")
     p.add_pic(img_io, width=2)
     p.add_pic(img_io, width=2)
     img_io.close()
     img_io.close()
@@ -197,17 +182,13 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         cell_outside = tb_outside.get_cell(row, col, delete_default_para=True)
         cell_outside = tb_outside.get_cell(row, col, delete_default_para=True)
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
 
-       
         tb_inside.merge_cell(0, 0, 0, 2)
         tb_inside.merge_cell(0, 0, 0, 2)
-        tb_inside.merge_cell(1, 0, 1, 2) 
-       
-        tb_inside.merge_cell(2, 0, 2, 2) 
-        tb_inside.merge_cell(3, 0, 3, 2) 
-        tb_inside.merge_cell(4, 0, 4, 2) 
+        tb_inside.merge_cell(1, 0, 1, 2)
 
 
-       
+        tb_inside.merge_cell(2, 0, 2, 2)
+        tb_inside.merge_cell(3, 0, 3, 2)
+        tb_inside.merge_cell(4, 0, 4, 2)
 
 
-       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -215,19 +196,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
                                 bk_color=(240, 240, 240))
 
 
-       
-       
-       
-       
-       
-       
-       
-
-       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
-        cell_p_1.add_run_to_p("   " + data[4], size=8) 
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
+        cell_p_1.add_run_to_p("   " + data[4], size=8)
 
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
         cell_p_1 = ParagraphBase(cell_p)
@@ -241,31 +213,25 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = [] 
+    strange_words_data = []
     strange_words = json_data.get('StrangeWords')
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = [] 
+    qrcode_thread = []
     qrcode_result = {}
     qrcode_result = {}
 
 
     for item in strange_words:
     for item in strange_words:
-        spell = item['Spell'] 
-        word_id = item['WordId'] 
+        spell = item['Spell']
+        word_id = item['WordId']
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
 
-        symbols_en = "英" + f'[{en}]' 
-        symbols_am = "美" + f'[{am}]' 
-
-       
-       
-       
-       
-       
+        symbols_en = "英" + f'[{en}]'
+        symbols_am = "美" + f'[{am}]'
 
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
-        word_meanings = item.get('Meaning', "") 
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
+        word_meanings = item.get('Meaning', "")
         word_changes_list = []
         word_changes_list = []
-        for idx, s in enumerate(item["WordChanges"],start=1):
-            s_type,s_spell = s['Type'], s['Spell']
+        for idx, s in enumerate(item["WordChanges"], start=1):
+            s_type, s_spell = s['Type'], s['Spell']
             if "原型" in s_type or "大小写" in s_type:
             if "原型" in s_type or "大小写" in s_type:
                 continue
                 continue
             tail = '\n' if idx != len(item["WordChanges"]) else ''
             tail = '\n' if idx != len(item["WordChanges"]) else ''
@@ -276,12 +242,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
         else:
             sentences = ""
             sentences = ""
-       
+
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences)
                              "词汇变形", word_changes, "例句", sentences)
         strange_words_data.append(single_word_tuple)
         strange_words_data.append(single_word_tuple)
 
 
-    rows = math.ceil(len(strange_words_data) / 2) 
+    rows = math.ceil(len(strange_words_data) / 2)
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
     tb_outside.set_tb_colum_width(width=[230, 230])
 
 
@@ -301,12 +267,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 
 @time_use
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
 def section_5(docx: Word, json_data, *args, **kwargs):
-   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3 
+    random_copy_word_list = copy_word_list * 3
     shuffle(random_copy_word_list)
     shuffle(random_copy_word_list)
 
 
-   
     first_copy_word_list = copy_word_list.copy()
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
     shuffle(copy_word_list_add_num)
@@ -369,22 +333,20 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 
 @time_use
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
 def section_7(docx: Word, json_data, *args, **kwargs):
-   
     def wanxing(index, article_single):
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
 
-       
         select_text = []
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             single_select_text = ''
             for s in candidates['Candidates']:
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle'] 
+                participle = s['Participle']
                 if participle:
                 if participle:
                     single_select_text += participle + ' \n'
                     single_select_text += participle + ' \n'
                 else:
                 else:
@@ -393,14 +355,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
             select_text.append(f"{ques_index}. {single_select_text}")
             select_text.append(f"{ques_index}. {single_select_text}")
 
 
-       
         all_select_text = "\n".join(select_text)
         all_select_text = "\n".join(select_text)
 
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
         article_main_list = article_main.split(" ")
 
 
-       
         explanatory_words = "\n\n".join(
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -442,8 +401,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
         tb2.set_tb_colum_width(width=[320, 140])
 
 
-       
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
             word = re.search(r"\[(\d+)]", w)
@@ -454,7 +411,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -470,42 +427,40 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
         docx.add_blank_paragraph()
 
 
-   
     def reading(index, article_single):
     def reading(index, article_single):
 
 
         def single_yuedu(index, a):
         def single_yuedu(index, a):
             article_id = a['Id']
             article_id = a['Id']
-            article_length = a['AllWordAmount'] 
+            article_length = a['AllWordAmount']
 
 
-            strange_words_ids = set() 
-            explanatory_words_ids = set() 
-            bold_word = set() 
-            italics_word = set() 
-            italics_index_dict = {} 
+            strange_words_ids = set()
+            explanatory_words_ids = set()
+            bold_word = set()
+            italics_word = set()
+            italics_index_dict = {}
 
 
             for i in json_data['StrangeWords']:
             for i in json_data['StrangeWords']:
                 strange_words_ids.add(i['MeanId'])
                 strange_words_ids.add(i['MeanId'])
                 bold_word.add(i['Spell'])
                 bold_word.add(i['Spell'])
                 bold_word.update([change_word['Spell'] for change_word in i['WordChanges']])
                 bold_word.update([change_word['Spell'] for change_word in i['WordChanges']])
-            for italics_index,ii in enumerate(a['ExplanatoryWords'], start=1):
+            for italics_index, ii in enumerate(a['ExplanatoryWords'], start=1):
                 explanatory_words_ids.add(ii['MeaningId'])
                 explanatory_words_ids.add(ii['MeaningId'])
                 italics_word.add(ii['Spell'])
                 italics_word.add(ii['Spell'])
                 if 'WordChanges' in ii:
                 if 'WordChanges' in ii:
                     italics_word.update([change_word['Spell'] for change_word in ii['WordChanges']])
                     italics_word.update([change_word['Spell'] for change_word in ii['WordChanges']])
-                    italics_index_dict.update({change_word['Spell']:f"[{italics_index}]" for change_word in ii['WordChanges']})
-               
+                    italics_index_dict.update({change_word['Spell']: f"[{italics_index}]" for change_word in ii['WordChanges']})
+
                 italics_index_dict[ii['MeaningId']] = f"[{italics_index}]"
                 italics_index_dict[ii['MeaningId']] = f"[{italics_index}]"
                 italics_index_dict[ii['Spell']] = f"[{italics_index}]"
                 italics_index_dict[ii['Spell']] = f"[{italics_index}]"
 
 
-           
             select_text = []
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
                 single_select_text = ''
-               
-                subject = candidates['Subject'] + '\n' 
+
+                subject = candidates['Subject'] + '\n'
                 for s in candidates['Candidates']:
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. ' 
-                    participle = s['Participle'] 
+                    single_select_text += s['Label'] + '. '
+                    participle = s['Participle']
                     if participle:
                     if participle:
                         single_select_text += participle + ' \n'
                         single_select_text += participle + ' \n'
                     else:
                     else:
@@ -513,14 +468,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
 
-           
             all_select_text = "\n".join(select_text)
             all_select_text = "\n".join(select_text)
 
 
-           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n" + all_select_text
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n" + all_select_text
             article_main_list = split_text_to_word_punctuation(article_main)
             article_main_list = split_text_to_word_punctuation(article_main)
 
 
-           
             explanatory_words = "\n\n".join(
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']}\n [{i['SymbolsEn']}] [{i['SymbolsAm']}]\n {i['Meaning']}" for index, i in
                 [f"{index}. {i['Spell']}\n [{i['SymbolsEn']}] [{i['SymbolsAm']}]\n {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -536,7 +488,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="阅读")
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="阅读")
             tb2.set_tb_colum_width(width=[320, 140])
             tb2.set_tb_colum_width(width=[320, 140])
 
 
-           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
                 word = re.search(r"\[(\d+)]", w)
@@ -551,7 +502,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
 
-                else: 
+                else:
                     if w in bold_word:
                     if w in bold_word:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif w in italics_word:
                     elif w in italics_word:
@@ -560,7 +511,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
 
-            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False,line_spacing=300)
+            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False, line_spacing=300)
 
 
             docx.add_blank_paragraph()
             docx.add_blank_paragraph()
             tail_zhushi = """完成时间:_____点_____分_____秒,本篇用时:_____秒。"""
             tail_zhushi = """完成时间:_____点_____分_____秒,本篇用时:_____秒。"""
@@ -569,7 +520,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
         def top_header():
         def top_header():
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "鲍利提分, 高效学习专家")
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "鲍利提分, 高效学习专家")
-           
+
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用斜线划掉,以便拍照报告给我们。\n",
             text = ["阅读中不认识的单词,尽量猜测词义,并用斜线划掉,以便拍照报告给我们。\n",
@@ -590,27 +541,25 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     pp.add_run_to_p(t, size=10)
                     pp.add_run_to_p(t, size=10)
 
 
             docx.add_blank_paragraph()
             docx.add_blank_paragraph()
+
         "---------------------开始单篇运行---------------------"
         "---------------------开始单篇运行---------------------"
-        if index == 1: 
+        if index == 1:
             top_header()
             top_header()
         single_yuedu(index, article_single)
         single_yuedu(index, article_single)
 
 
-
-   
     def seven_to_five(index, article_single):
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
 
-       
         select_text = []
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle'] 
+            participle = s_candidates['Participle']
             if participle:
             if participle:
                 single_select_text += participle
                 single_select_text += participle
             else:
             else:
@@ -619,14 +568,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
             select_text.append(f"{single_select_text}")
             select_text.append(f"{single_select_text}")
 
 
-       
         all_select_text = "\n".join(select_text)
         all_select_text = "\n".join(select_text)
 
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
         article_main_list = article_main.split(" ")
 
 
-       
         explanatory_words = "\n\n".join(
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -657,7 +603,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
         tb2.set_tb_colum_width(width=[320, 140])
 
 
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
             word = re.search(r"\[(\d+)]", w)
@@ -668,7 +613,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -678,18 +623,17 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_blank_paragraph()
         docx.add_blank_paragraph()
 
 
     "判断题型;根据题型选择----------------------------"
     "判断题型;根据题型选择----------------------------"
-   
+
     all_article_length = 0
     all_article_length = 0
 
 
     for index, article_single in enumerate(json_data['Articles'], start=1):
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
 
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
 
         assert article_type in article_type_select
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
 
 
-       
         article_length = article_single['AllWordAmount']
         article_length = article_single['AllWordAmount']
         all_article_length += article_length
         all_article_length += article_length
 
 
@@ -703,53 +647,49 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 
 @time_use
 @time_use
 def section_9(docx: Word, json_data, *args, **kwargs):
 def section_9(docx: Word, json_data, *args, **kwargs):
-    def wanxing(index,article_count, article_single):
+    def wanxing(index, article_count, article_single):
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
 
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip() 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip()
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip()
                     abcd_label += abcd_selected['Label'].strip()
 
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
 
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
 
-   
-    def reading(index,article_count, article_single):
+    def reading(index, article_count, article_single):
         """
         """
         index : 外面传入,从1开始。如果只有
         index : 外面传入,从1开始。如果只有
         """
         """
-        all_analysis = '' 
-        all_difficult_sentences = [] 
+        all_analysis = ''
+        all_difficult_sentences = []
 
 
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
 
 
-       
         questions = article_single['Questions']
         questions = article_single['Questions']
         for ques_index, question_item in enumerate(questions, start=1):
         for ques_index, question_item in enumerate(questions, start=1):
-            analysis = question_item['Analysis'].strip("\n") 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip("\n")
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip("\n")
                     abcd_label += abcd_selected['Label'].strip("\n")
 
 
-            new_line = "" if ques_index==len(questions) else "\n"
+            new_line = "" if ques_index == len(questions) else "\n"
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}{new_line}"
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}{new_line}"
 
 
-       
-        if index!=article_count:
+        if index != article_count:
             all_analysis += '\n'
             all_analysis += '\n'
 
 
         docx.add_paragraph(f"Passage {index}", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True, size=16)
         docx.add_paragraph(f"Passage {index}", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True, size=16)
@@ -759,20 +699,19 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph(all_analysis, size=9)
 
 
-
-    def seven_to_five(index,article_count, article_single):
+    def seven_to_five(index, article_count, article_single):
         chinese_article = article_single['Chinese']
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
-       
+
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'] 
-            abcd_label = '' 
+            analysis = question_item['Analysis']
+            abcd_label = ''
 
 
             candidates = question_item['Candidates']
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label']
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
 
@@ -782,14 +721,14 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
 
     "判断题型;根据题型选择----------------------------"
     "判断题型;根据题型选择----------------------------"
-    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯","鲍利提分,你的智能教练")
+    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯", "鲍利提分,你的智能教练")
     articles = json_data['Articles']
     articles = json_data['Articles']
     article_count = len(articles)
     article_count = len(articles)
     for index, article_single in enumerate(articles, start=1):
     for index, article_single in enumerate(articles, start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
         assert article_type in article_type_select
-        article_type_select[article_type](index,article_count, article_single) 
+        article_type_select[article_type](index, article_count, article_single)
 
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
     docx.add_page_section()
     docx.add_page_section()
@@ -837,20 +776,15 @@ def section_10(docx: Word, json_data, scanpage_format, *args, **kwargs):
 
 
 @time_use
 @time_use
 def two_check_page(docx: Word, json_data, *args, **kwargs):
 def two_check_page(docx: Word, json_data, *args, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
-        page_sub_title = "词汇训练" 
+        page_sub_title = "词汇训练"
 
 
         if len(word_data_list) % 2 != 0:
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
         tb.set_tb_colum_width(width=[40, 100, 100])
 
 
-       
-       
-       
-
         tb.set_tb_colum_width(0, 100)
         tb.set_tb_colum_width(0, 100)
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
@@ -862,7 +796,7 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
 
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
 
-        half_count = int(len(word_data_list) / 2) 
+        half_count = int(len(word_data_list) / 2)
         for index, row in enumerate(range(half_count)):
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -875,14 +809,13 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
         blank_count = " " * 80
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
                           foot_description, foot_description2, article_type, word_data_list2):
-        page_sub_title = "词汇训练" 
+        page_sub_title = "词汇训练"
         if len(word_data_list2) % 2 != 0:
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
@@ -914,73 +847,56 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
             cell3 = f"{spell2}" if spell2 else ""
             cell3 = f"{spell2}" if spell2 else ""
             cell4 = f"{total_row + row + 1}. {meaning2}" if meaning2 else ""
             cell4 = f"{total_row + row + 1}. {meaning2}" if meaning2 else ""
 
 
-            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4] 
+            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9], alignment=['right', 'left', 'right', 'left'])
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9], alignment=['right', 'left', 'right', 'left'])
 
 
-       
         tb.set_row_height(13.8)
         tb.set_row_height(13.8)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
 
 
         docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
         docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                            font_name="仿宋", dq=5)
                            font_name="仿宋", dq=5)
 
 
-
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "") 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
     article_type = 1
     article_type = 1
     try:
     try:
-        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
+        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
     except Exception as e:
     except Exception as e:
         log_err_e(e, "学案类型不存在就取1,词汇突击里面只有阅读理解")
         log_err_e(e, "学案类型不存在就取1,词汇突击里面只有阅读理解")
 
 
-   
-
     """---------------------------------------------------------------------------------"""
     """---------------------------------------------------------------------------------"""
     screening_scanPages = json_data['ScreeningScanPages']
     screening_scanPages = json_data['ScreeningScanPages']
     for index, page in enumerate(screening_scanPages, start=1):
     for index, page in enumerate(screening_scanPages, start=1):
         page_id = str(page['PageId']).rjust(11, "0")
         page_id = str(page['PageId']).rjust(11, "0")
 
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
-       
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
 
         word_data_list1 = []
         word_data_list1 = []
         word_data_list2 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
 
-       
-       
-       
-
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
                           foot_description, foot_description2, article_type, word_data_list2)
-       
-        if index!=len(screening_scanPages):
-            pass 
-        docx.add_page_break()
 
 
+        if index != len(screening_scanPages):
+            pass
+        docx.add_page_break()
 
 
 
 
 def old_two_check_page(docx: Word, json_data, **kwargs):
 def old_two_check_page(docx: Word, json_data, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[140, 100, 100])
         tb.set_tb_colum_width(width=[140, 100, 100])
 
 
-       
-       
-       
-
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -991,7 +907,7 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
 
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
 
-        half_count = int(len(word_data_list) / 2) 
+        half_count = int(len(word_data_list) / 2)
         for index, row in enumerate(range(half_count)):
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -1004,21 +920,16 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
 
 
-       
-       
-       
-
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8)
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8)
         tb.set_cell_text(0, 2, f"{page_id}", border=False, size=16, dh=2, bold=True, font_name="黑体")
         tb.set_cell_text(0, 2, f"{page_id}", border=False, size=16, dh=2, bold=True, font_name="黑体")
@@ -1051,13 +962,13 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
 
-            data = [cell1,cell2, cell3, cell4] 
+            data = [cell1, cell2, cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6, first_row_h=6)
         tb.set_row_height(13.6, first_row_h=6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1: 
+        if article_type == 1:
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -1065,51 +976,45 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
                                font_name="仿宋", dq=5)
 
 
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '') 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
-    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '')
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
 
 
     """---------------------------------------------------------------------------------"""
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
         page_id = str(page['PageId']).rjust(11, "0")
 
 
-       
         if index >= 2:
         if index >= 2:
             docx.add_page_break()
             docx.add_page_break()
 
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
 
         word_data_list1 = []
         word_data_list1 = []
         word_data_list2 = []
         word_data_list2 = []
 
 
-       
-        item_list:list = page['FilterTable']['Items']
+        item_list: list = page['FilterTable']['Items']
         item_count = len(item_list)
         item_count = len(item_list)
-        if item_count<100:
-            item_list.extend([{"Spell":"","Meaning":""} for _ in range(100-item_count)])
+        if item_count < 100:
+            item_list.extend([{"Spell": "", "Meaning": ""} for _ in range(100 - item_count)])
 
 
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
 
-       
         if is_add_empty_filter_page:
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
 
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 
 
 @time_use
 @time_use
 def other(docx, json_data, *args, **kwargs):
 def other(docx, json_data, *args, **kwargs):
-   
     sections = docx.doc.sections
     sections = docx.doc.sections
     for section in sections[:-1]:
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
         section.top_margin = Inches(0.3)
@@ -1127,21 +1032,20 @@ def other(docx, json_data, *args, **kwargs):
 
 
 
 
 def start_make_word(json_data, document_format, scanpage_format):
 def start_make_word(json_data, document_format, scanpage_format):
-    parent_path = "make_docx_demo/file_result/" 
+    parent_path = "make_docx_demo/file_result/"
     if not os.path.exists(parent_path):
     if not os.path.exists(parent_path):
         os.makedirs(parent_path)
         os.makedirs(parent_path)
     try:
     try:
-        exercise_id = json_data['ExerciseId'] 
+        exercise_id = json_data['ExerciseId']
 
 
-       
         docx = Word(save_file_name=f"{parent_path}{exercise_id}.docx",
         docx = Word(save_file_name=f"{parent_path}{exercise_id}.docx",
                     start_template_name="make_docx_demo/word_component/start_template.docx")
                     start_template_name="make_docx_demo/word_component/start_template.docx")
-       
+
         section_1(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
         section_1(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-       
+
         section_4(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
         section_4(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-       
-        for exercise_json in json_data['WordAndArticleContents']: 
+
+        for exercise_json in json_data['WordAndArticleContents']:
             section_4_1(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_4_1(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_5(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_5(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_6(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_6(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
@@ -1149,9 +1053,9 @@ def start_make_word(json_data, document_format, scanpage_format):
             section_9(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_9(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
 
 
         if scanpage_format == 1:
         if scanpage_format == 1:
-           
+
             two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
             two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-           
+
             old_two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
             old_two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
         elif scanpage_format == 2:
         elif scanpage_format == 2:
             section_10(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
             section_10(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
@@ -1178,6 +1082,5 @@ if __name__ == '__main__':
     t = time.time()
     t = time.time()
     os.chdir("..")
     os.chdir("..")
 
 
-   
     start_make_word(test_json1, 1, 1)
     start_make_word(test_json1, 1, 1)
     print(time.time() - t)
     print(time.time() - t)

+ 10 - 8
make_docx_demo/new_word2pdf.py

@@ -1,41 +1,43 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 
 
-import os
 import time
 import time
-from docx2pdf import convert
-import win32com.client
-from concurrent.futures import ProcessPoolExecutor
 from multiprocessing import Process
 from multiprocessing import Process
+
 import pythoncom
 import pythoncom
+import win32com.client
+from docx2pdf import convert
 
 
 
 
 def convert_word_to_pdf(input_file):
 def convert_word_to_pdf(input_file):
     output_file = input_file.replace('.docx', '.pdf')
     output_file = input_file.replace('.docx', '.pdf')
     word = win32com.client.Dispatch("Word.Application")
     word = win32com.client.Dispatch("Word.Application")
-    word.Visible = False 
+    word.Visible = False
     doc = word.Documents.Open(input_file)
     doc = word.Documents.Open(input_file)
-    doc.SaveAs(output_file, FileFormat=17) 
+    doc.SaveAs(output_file, FileFormat=17)
     doc.Close()
     doc.Close()
     word.Quit()
     word.Quit()
 
 
+
 def convert_word_to_pdf2(input_file):
 def convert_word_to_pdf2(input_file):
     pythoncom.CoInitialize()
     pythoncom.CoInitialize()
     convert(input_file)
     convert(input_file)
     pythoncom.CoUninitialize()
     pythoncom.CoUninitialize()
 
 
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     import os
     import os
+
     files = os.listdir(r"C:\Users\86131\Desktop\回收\潘资料")
     files = os.listdir(r"C:\Users\86131\Desktop\回收\潘资料")
     print(files)
     print(files)
 
 
     t = time.time()
     t = time.time()
     p_lists = []
     p_lists = []
     for file in files:
     for file in files:
-        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\"+file,))
+        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\" + file,))
         p1.start()
         p1.start()
         p_lists.append(p1)
         p_lists.append(p1)
 
 
     for p in p_lists:
     for p in p_lists:
         p.join()
         p.join()
 
 
-    print(time.time() - t)
+    print(time.time() - t)

+ 11 - 25
make_docx_demo/word2pdf.py

@@ -1,10 +1,11 @@
 # -*- coding=utf-8 -*-
 # -*- coding=utf-8 -*-
-from docx2pdf import convert
-import pythoncom
-import time
 import os
 import os
+import time
 from threading import Lock
 from threading import Lock
 
 
+import pythoncom
+from docx2pdf import convert
+
 ll = Lock()
 ll = Lock()
 
 
 
 
@@ -14,24 +15,25 @@ def convert_word_to_pdf(pdf_name):
             ll.acquire()
             ll.acquire()
             print('加锁,进入转pdf')
             print('加锁,进入转pdf')
             pythoncom.CoInitialize()
             pythoncom.CoInitialize()
-            convert(f'{pdf_name}.docx') 
+            convert(f'{pdf_name}.docx')
             for i in range(30):
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
                     break
                 time.sleep(0.5)
                 time.sleep(0.5)
             break
             break
         except Exception as ee:
         except Exception as ee:
-           
+
             print(ee)
             print(ee)
         finally:
         finally:
             pythoncom.CoUninitialize()
             pythoncom.CoUninitialize()
             print('解锁,转pdf完成')
             print('解锁,转pdf完成')
-            ll.release() 
+            ll.release()
+
 
 
 def convert_word_to_pdf2(pdf_name):
 def convert_word_to_pdf2(pdf_name):
     for cccou in range(3):
     for cccou in range(3):
         try:
         try:
-            convert(f'{pdf_name}.docx') 
+            convert(f'{pdf_name}.docx')
             for i in range(30):
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
                     break
@@ -42,22 +44,6 @@ def convert_word_to_pdf2(pdf_name):
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    import multiprocessing
-
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-   
-   
-   
-   
-   
+    pass
+
     #
     #
-   

+ 6 - 9
make_docx_demo/word_component/make_rectangle.py

@@ -1,15 +1,16 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 from random import randint
 from random import randint
 
 
+
 def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color, font_size, boder_color, chinese_font, english_font, dash,
 def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color, font_size, boder_color, chinese_font, english_font, dash,
-                   shape_type='rect',rotate_angle=0,behindDoc=0):
+                   shape_type='rect', rotate_angle=0, behindDoc=0):
     """
     """
     rotate_angle:角度,顺时针30,60,90等
     rotate_angle:角度,顺时针30,60,90等
     behindDoc为0浮于文字上方,为1浮于文字下方"""
     behindDoc为0浮于文字上方,为1浮于文字下方"""
     if x > 600: x = 600
     if x > 600: x = 600
     if y > 800: y = 800
     if y > 800: y = 800
     font_size = font_size * 2
     font_size = font_size * 2
-    boder_size = boder_size * 12700 
+    boder_size = boder_size * 12700
     dash_elem = '<a:prstDash val="dash"/>' if dash else ''
     dash_elem = '<a:prstDash val="dash"/>' if dash else ''
 
 
     idid = randint(1, 99999)
     idid = randint(1, 99999)
@@ -26,9 +27,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         boder = f"""<a:lnRef idx="2"><a:srgbClr val="{boder_color}"/></a:lnRef>"""
         boder = f"""<a:lnRef idx="2"><a:srgbClr val="{boder_color}"/></a:lnRef>"""
         noboder = f"""<a:ln w="{boder_size}"><a:srgbClr val="{boder_color}"/>{dash_elem}</a:ln>"""
         noboder = f"""<a:ln w="{boder_size}"><a:srgbClr val="{boder_color}"/>{dash_elem}</a:ln>"""
     else:
     else:
-       
-       
-       
+
         boder = """<a:lnRef idx="2"><a:noFill/></a:lnRef>"""
         boder = """<a:lnRef idx="2"><a:noFill/></a:lnRef>"""
         noboder = """<a:ln w="12700"><a:noFill/></a:ln>"""
         noboder = """<a:ln w="12700"><a:noFill/></a:ln>"""
 
 
@@ -63,7 +62,6 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
     else:
     else:
         insert_text_xml = ''
         insert_text_xml = ''
 
 
-   
     shape_geom_map = {
     shape_geom_map = {
         'rect': '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>',
         'rect': '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>',
         'circle': '<a:prstGeom prst="ellipse"><a:avLst/></a:prstGeom>',
         'circle': '<a:prstGeom prst="ellipse"><a:avLst/></a:prstGeom>',
@@ -82,7 +80,6 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         'arc': '<a:prstGeom prst="arc"><a:avLst/></a:prstGeom>',
         'arc': '<a:prstGeom prst="arc"><a:avLst/></a:prstGeom>',
     }
     }
 
 
-   
     shape_geom = shape_geom_map.get(shape_type, '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>')
     shape_geom = shape_geom_map.get(shape_type, '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>')
 
 
     r = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
     r = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@@ -151,7 +148,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 										<wps:wsp>
 										<wps:wsp>
 											<wps:cNvSpPr/>
 											<wps:cNvSpPr/>
 											<wps:spPr>
 											<wps:spPr>
-												<a:xfrm rot="{60000*rotate_angle}">
+												<a:xfrm rot="{60000 * rotate_angle}">
 													<a:off x="0"
 													<a:off x="0"
 													       y="0"/>
 													       y="0"/>
 													<a:ext cx="1777593"
 													<a:ext cx="1777593"
@@ -224,4 +221,4 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 		</w:sectPr>
 		</w:sectPr>
 	</w:body>
 	</w:body>
 </w:document>"""
 </w:document>"""
-    return r
+    return r

+ 104 - 62
mock/mock_request.py

@@ -1,40 +1,42 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 #
 #
-import json 
+import json
 import time
 import time
+from concurrent.futures import ThreadPoolExecutor, wait
 from functools import wraps
 from functools import wraps
-from random import shuffle,sample,randint
-from threading import Thread
-from concurrent.futures import ThreadPoolExecutor,wait
+from random import shuffle, sample, randint
+from typing import List
 
 
 import httpx
 import httpx
 import requests
 import requests
 from pydantic import BaseModel
 from pydantic import BaseModel
-from typing import List
 
 
+product_adress = "http://111.231.167.191"
+test_address = "http://111.231.167.191:8004"
+test_address2 = "http://111.231.167.191:8003"
 
 
-product_adress = "http://111.231.167.191" 
-test_address = "http://111.231.167.191:8004" 
-test_address2 = "http://111.231.167.191:8003" 
+local_adress = "http://127.0.0.1:9000"
 
 
-local_adress = "http://127.0.0.1:9000" 
+use_address = test_address
 
 
-use_address = local_adress 
 
 
 class DifficultSentence(BaseModel):
 class DifficultSentence(BaseModel):
     english: str
     english: str
     chinese: str
     chinese: str
 
 
+
 class Candidate(BaseModel):
 class Candidate(BaseModel):
     label: str
     label: str
     text: str
     text: str
     isRight: int
     isRight: int
 
 
+
 class Question(BaseModel):
 class Question(BaseModel):
     trunk: str
     trunk: str
     analysis: str
     analysis: str
     candidates: List[Candidate]
     candidates: List[Candidate]
 
 
+
 class Article(BaseModel):
 class Article(BaseModel):
     difficultSentences: List[DifficultSentence]
     difficultSentences: List[DifficultSentence]
     usedMeanIds: List[int]
     usedMeanIds: List[int]
@@ -43,20 +45,21 @@ class Article(BaseModel):
     chineseArticle: str
     chineseArticle: str
     allWordAmount: int
     allWordAmount: int
 
 
+
 class ArticleData(BaseModel):
 class ArticleData(BaseModel):
     articles: List[Article]
     articles: List[Article]
 
 
 
 
 def time_use(fn):
 def time_use(fn):
     @wraps(fn)
     @wraps(fn)
-    def cc(*args, **kwargs): 
+    def cc(*args, **kwargs):
         f_time = time.time()
         f_time = time.time()
         res = fn(*args, **kwargs)
         res = fn(*args, **kwargs)
 
 
         cha = round(time.time() - f_time, 3)
         cha = round(time.time() - f_time, 3)
         if cha > 0.1:
         if cha > 0.1:
             print(f'函数:{fn.__name__} 一共用时', cha, '秒')
             print(f'函数:{fn.__name__} 一共用时', cha, '秒')
-        return res 
+        return res
 
 
     return cc
     return cc
 
 
@@ -79,7 +82,7 @@ def get_article():
                  "student_stage": 1, "vocabulary": 700, "class_id": 123456}
                  "student_stage": 1, "vocabulary": 700, "class_id": 123456}
 
 
     r = requests.post(f"{use_address}/article", json=json_data)
     r = requests.post(f"{use_address}/article", json=json_data)
-   
+
     key = r.json()['key']
     key = r.json()['key']
     time.sleep(120)
     time.sleep(120)
     query_file_content(key)
     query_file_content(key)
@@ -89,7 +92,7 @@ def query_file_content(key):
     json_data = {"key": key}
     json_data = {"key": key}
     try:
     try:
         r = requests.post(f"{use_address}/query_oss_file", json=json_data)
         r = requests.post(f"{use_address}/query_oss_file", json=json_data)
-        r.raise_for_status() 
+        r.raise_for_status()
         response_data = r.json()
         response_data = r.json()
         assert response_data['wordCount'] > 0, "词数为0"
         assert response_data['wordCount'] > 0, "词数为0"
     except requests.RequestException as e:
     except requests.RequestException as e:
@@ -104,9 +107,9 @@ def query_file_content(key):
 
 
 def get_audio():
 def get_audio():
     word = "cat"
     word = "cat"
-    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0}) 
-    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2}) 
-    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1}) 
+    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0})
+    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2})
+    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1})
     assert r1.json()['code'] == 200
     assert r1.json()['code'] == 200
     assert r2.json()['code'] == 200
     assert r2.json()['code'] == 200
     assert r3.status_code == 200
     assert r3.status_code == 200
@@ -176,23 +179,43 @@ def get_article2_1():
                        {'spell': 'perception', 'meaning': '观念, 知觉, 觉察', 'word_id': 1174551, 'meaning_id': 3516, 'serial': 2749},
                        {'spell': 'perception', 'meaning': '观念, 知觉, 觉察', 'word_id': 1174551, 'meaning_id': 3516, 'serial': 2749},
                        {'spell': 'violation', 'meaning': '妨碍, 侵犯, 违犯', 'word_id': 1174695, 'meaning_id': 4452, 'serial': 3528},
                        {'spell': 'violation', 'meaning': '妨碍, 侵犯, 违犯', 'word_id': 1174695, 'meaning_id': 4452, 'serial': 3528},
                        {'spell': 'convey', 'meaning': '表达', 'word_id': 830280, 'meaning_id': 4931, 'serial': 3938},
                        {'spell': 'convey', 'meaning': '表达', 'word_id': 830280, 'meaning_id': 4931, 'serial': 3938},
-                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
+                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063},
+                       {'spell': 'carry', 'meaning': '携带', 'word_id': 803106, 'meaning_id': 460, 'serial': 313},
+                       {'spell': 'area', 'meaning': '领域', 'word_id': 765328, 'meaning_id': 572, 'serial': 388},
+                       {'spell': 'lie', 'meaning': '说谎, 谎言', 'word_id': 963062, 'meaning_id': 602, 'serial': 409},
+                       {'spell': 'company', 'meaning': '陪伴', 'word_id': 822886, 'meaning_id': 642, 'serial': 433},
+                       {'spell': 'else', 'meaning': '别的, 另外的, 其他的', 'word_id': 869964, 'meaning_id': 654, 'serial': 443},
+                       {'spell': 'cover', 'meaning': '覆盖', 'word_id': 834220, 'meaning_id': 687, 'serial': 472},
+                       {'spell': 'effect', 'meaning': '引起', 'word_id': 866665, 'meaning_id': 709, 'serial': 486},
+                       {'spell': 'design', 'meaning': '设计, 计划', 'word_id': 848239, 'meaning_id': 714, 'serial': 490},
+                       {'spell': 'century', 'meaning': '世纪, 百年', 'word_id': 806994, 'meaning_id': 725, 'serial': 498},
+                       {'spell': 'above', 'meaning': '上面,之上,超过', 'word_id': 745232, 'meaning_id': 736, 'serial': 508},
+                       {'spell': 'sign', 'meaning': '手势, 符号, 签名', 'word_id': 1089428, 'meaning_id': 752, 'serial': 517},
+                       {'spell': 'remain', 'meaning': '保持不变', 'word_id': 1062570, 'meaning_id': 774, 'serial': 530},
+                       {'spell': 'line', 'meaning': '线, 画线', 'word_id': 964670, 'meaning_id': 777, 'serial': 532},
+                       {'spell': 'likely', 'meaning': '有可能的', 'word_id': 964153, 'meaning_id': 781, 'serial': 534},
+                       {'spell': 'fail', 'meaning': '失败, 不及格', 'word_id': 882595, 'meaning_id': 787, 'serial': 540},
+                       {'spell': 'control', 'meaning': '控制, 支配, 操纵', 'word_id': 829355, 'meaning_id': 794, 'serial': 545},
+                       {'spell': 'power', 'meaning': '权力, 力量', 'word_id': 1038172, 'meaning_id': 800, 'serial': 549},
+                       {'spell': 'reply', 'meaning': '回答, 回应, 答辩', 'word_id': 1063609, 'meaning_id': 817, 'serial': 563},
+                       {'spell': 'unless', 'meaning': '除非, 如果不', 'word_id': 1150079, 'meaning_id': 822, 'serial': 567},
+                       {'spell': 'offer', 'meaning': '提出, 提供', 'word_id': 1005291, 'meaning_id': 824, 'serial': 569}
                        ]
                        ]
     shuffle(core_words_list)
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list,5)
+    core_words_chiose_list = sample(core_words_list, 15)
     json_data = {'core_words': core_words_chiose_list,
     json_data = {'core_words': core_words_chiose_list,
-                 'take_count': 8, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100,999),
-                 "article_length": 120, "reading_level": 5}
+                 'take_count': 1, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100, 999),
+                 "article_length": 220, "reading_level": 25}
 
 
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
     r_json = r.json()
     r_json = r.json()
-    print(r_json)
     try:
     try:
         return r_json
         return r_json
     except Exception as e:
     except Exception as e:
         print("春笋文章reading-comprehension错误", e)
         print("春笋文章reading-comprehension错误", e)
         print("错误数据", r_json)
         print("错误数据", r_json)
 
 
+
 @time_use
 @time_use
 def get_article2_2():
 def get_article2_2():
     """测试通过requests来直接访问openai"""
     """测试通过requests来直接访问openai"""
@@ -242,7 +265,7 @@ def get_article2_2():
                        {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
                        {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
                        ]
                        ]
     shuffle(core_words_list)
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list,5)
+    core_words_chiose_list = sample(core_words_list, 15)
     core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words_chiose_list])
     core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words_chiose_list])
 
 
     question = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
     question = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
@@ -251,38 +274,52 @@ def get_article2_2():
 1.必须用提供的这个词义的单词,其他单词使用最简单最容易没有难度的单词。文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。选择题难度尽可能简单,参考中国小学生水平
 1.必须用提供的这个词义的单词,其他单词使用最简单最容易没有难度的单词。文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。选择题难度尽可能简单,参考中国小学生水平
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求120词左右,可以用\\n\\n字符分段,一般1-2个段落左右。第一段是文章标题。
+4.生成的文章要求320词左右,可以用\\n\\n字符分段,一般3-4个段落左右。第一段是文章标题。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
 6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
 
 提供[单词组]:{core_words_meaning_str}
 提供[单词组]:{core_words_meaning_str}
 """
 """
 
 
-    url = "http://170.106.108.95/v1/chat/completions"
+    url = 'http://170.106.108.95/v1/chat/completions'
 
 
-   
     headers = {
     headers = {
         "Authorization": f"Bearer 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",
         "Authorization": f"Bearer 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",
         "Content-Type": "application/json"
         "Content-Type": "application/json"
     }
     }
 
 
-   
     data = {
     data = {
-        "model": "gpt-4.1", 
+        "model": "gpt-4.1",
         "messages": [
         "messages": [
-           
+
             {"role": "user", "content": question}
             {"role": "user", "content": question}
         ],
         ],
-        "max_tokens": 4000, 
-        "temperature": 1.2, 
-        "n":8,
-        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'}, 'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}}, 'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'}, 'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}}, 'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'}, 'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'}, 'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'}, 'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'}, 'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions', 'englishArticle', 'chineseArticle', 'allWordAmount'], 'title': 'Article', 'type': 'object'}}}
+        "max_tokens": 8000,
+        "temperature": 1.2,
+        "n": 4,
+        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {
+            'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'},
+                           'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate',
+            'type': 'object'}, 'DifficultSentence': {
+            'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}},
+            'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {
+            'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'},
+                           'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}},
+            'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {
+            'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'},
+            'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'},
+            'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'},
+            'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'},
+            'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions',
+                                                                                          'englishArticle', 'chineseArticle', 'allWordAmount'],
+                                                                                                 'title': 'Article', 'type': 'object'}}}
     }
     }
 
 
-   
-    response = httpx.post(url, headers=headers, json=data,timeout=300)
-    print(response.json())
-    return response.json()
+    try:
+        response = httpx.post(url, headers=headers, json=data, timeout=300)
+        return response.json()
+    except Exception as e:
+        print(f"错误:{type(e).__name__}: {e}")
 
 
 
 
 def download_word():
 def download_word():
@@ -333,42 +370,47 @@ def get_article3():
 
 
 @time_use
 @time_use
 def run_all_test_cese():
 def run_all_test_cese():
-    test_connect() 
-
-    get_audio() 
-    spoken_language() 
-    download_word() 
-    print(get_article2_1()) 
+    test_connect()
 
 
-   
-   
+    get_audio()
+    spoken_language()
+    download_word()
+    get_article2_1()
 
 
 
 
 @time_use
 @time_use
 def multi_request():
 def multi_request():
-    with ThreadPoolExecutor(max_workers=50) as executor:
-       
-        futures = [executor.submit(get_article2_1) for _ in range(30)]
-       
+    with ThreadPoolExecutor(max_workers=150) as executor:
+
+        futures = [executor.submit(get_article2_1) for _ in range(100)]
 
 
-       
         wait(futures)
         wait(futures)
         print("完成等待")
         print("完成等待")
-        for index,future in enumerate(futures,start=1):
-            future.result() 
-            print(f"完成循环{index}")
+        f = open("result.txt", 'w', encoding='utf-8')
 
 
+        try:
+            for index, future in enumerate(futures, start=1):
+                f.write(str(future.result()) + '\n')
+        except Exception as e:
+            print(f"错误:{type(e).__name__}: {e}")
+        finally:
+            f.close()
 
 
-if __name__ == '__main__':
-   
-    multi_request()
 
 
-   
-   
+@time_use
+def article_annotation():
+    text = """The expression, “Everybody’s doing it,” is very much at the center of the concept of peer pressure. It is a social influence applied on an individual in order to get that person to act or believe in a(n) ____1____ way as a larger group. 
+This influence can be negative or positive, and can exist in both large and small groups.People are social creatures by nature, and so it is hardly ____2____ that some part of their self-respect comes from the approval of others. This instinct explains why the approval of peers, and the fear of ____3____, is such a powerful force in many people’s lives. This instinct drives people to dress one way at home and another way at work, or to answer a simple “fine” when a stranger asks “How are you?” even if it is not necessarily true. There is a(n) ____4____ aspect to this: It helps society to function efficiently, and encourages a general level of self-discipline that ____5____ day-to-day interaction between people.
+For certain individuals, seeking social acceptance is so important that it becomes a(n) ____6____: in order to satisfy the desire, they may go so far as to ____7____ their sense of right and wrong. Teens and young adults may feel forced to use drugs, or join gangs that ____8____ criminal behavior. Mature adults may sometimes feel ____9____ to cover up illegal activity at the company where they work, or end up in debt because they are unable to hold back the desire to buy a house or car that they can’t afford in an effort to ____10____ the peers.
+However, peer pressure is not always negative. A student whose friends are good at contests may be ____11____ to work harder and get good grades. Players on a sports team may feel driven to play harder in order to help the team win. This type of ____12____ can also get a friend off drugs, or to help an adult take up a good habit or drop a bad one.
+Although peer pressure is sometimes quite obvious, it can also be so ____13____ that a person may not even notice that it is affecting his or her behavior. For this reason, when making important decisions, simply going with a(n) ____14____ is risky. Instead, people should seriously consider why they feel drawn to taking a particular action, and whether the real ____15____ is simply that everyone else is doing the same thing."""
 
 
-   
+    json_data = {"english_text": "hello , please give me an apple ."}
 
 
-   
-   
+    r = requests.post(f"{use_address}/article/annotation", json=json_data)
+    r_json = r.json()
+    print(r_json)
 
 
-   
+
+if __name__ == '__main__':
+    article_annotation()

+ 2 - 1
spoken_language/common/utils.py

@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import sys
 import sys
 
 
+
 def is_python3():
 def is_python3():
     if sys.version > '3':
     if sys.version > '3':
         return True
         return True
-    return False
+    return False

+ 4 - 7
spoken_language/read_config.py

@@ -1,20 +1,17 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 import yaml
 import yaml
 
 
+
 def read_config(parent_dir="."):
 def read_config(parent_dir="."):
-   
-    with open(parent_dir +"/config/tencent_config.yaml", "r",encoding="utf-8") as file:
+    with open(parent_dir + "/config/tencent_config.yaml", "r", encoding="utf-8") as file:
         config = yaml.safe_load(file)
         config = yaml.safe_load(file)
         return config
         return config
-   
-   
-
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    import sys,os
+    import sys, os
 
 
     current_dir = os.path.dirname(os.path.abspath(__file__))
     current_dir = os.path.dirname(os.path.abspath(__file__))
     parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
     parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
     sys.path.append(parent_dir)
     sys.path.append(parent_dir)
-    print(read_config(parent_dir))
+    print(read_config(parent_dir))

+ 15 - 15
spoken_language/soe/speaking_assessment.py

@@ -1,18 +1,20 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-import sys
-import hmac
-import hashlib
 import base64
 import base64
-import time
+import hashlib
+import hmac
 import json
 import json
+import sys
 import threading
 import threading
+import time
 import urllib
 import urllib
-
-import websocket
 import uuid
 import uuid
 from urllib.parse import quote
 from urllib.parse import quote
+
+import websocket
+
 from tools.loglog import logger
 from tools.loglog import logger
 
 
+
 def is_python3():
 def is_python3():
     if sys.version > '3':
     if sys.version > '3':
         return True
         return True
@@ -104,7 +106,7 @@ class SpeakingAssessment:
 
 
     def set_text_mode(self, text_mode):
     def set_text_mode(self, text_mode):
         self.text_mode = text_mode
         self.text_mode = text_mode
-    
+
     def set_rec_mode(self, rec_mode):
     def set_rec_mode(self, rec_mode):
         self.rec_mode = rec_mode
         self.rec_mode = rec_mode
 
 
@@ -210,7 +212,7 @@ class SpeakingAssessment:
 
 
     def start(self):
     def start(self):
         def on_message(ws, message):
         def on_message(ws, message):
-           
+
             response = json.loads(message)
             response = json.loads(message)
             response['voice_id'] = self.voice_id
             response['voice_id'] = self.voice_id
             if response['code'] != 0:
             if response['code'] != 0:
@@ -222,7 +224,7 @@ class SpeakingAssessment:
                 self.status = FINAL
                 self.status = FINAL
                 self.result = message
                 self.result = message
                 self.listener.on_recognition_complete(response)
                 self.listener.on_recognition_complete(response)
-               
+
                 self.ws.close()
                 self.ws.close()
                 return
                 return
             else:
             else:
@@ -238,9 +240,8 @@ class SpeakingAssessment:
                          (format(error), self.voice_id))
                          (format(error), self.voice_id))
             self.status = ERROR
             self.status = ERROR
 
 
-        def on_close(ws,close_status_code, close_msg):
-           
-           
+        def on_close(ws, close_status_code, close_msg):
+
             self.status = CLOSED
             self.status = CLOSED
             logger.info("websocket closed  voice id %s" %
             logger.info("websocket closed  voice id %s" %
                         self.voice_id)
                         self.voice_id)
@@ -256,13 +257,13 @@ class SpeakingAssessment:
         signstr = self.format_sign_string(query)
         signstr = self.format_sign_string(query)
         autho = self.sign(signstr, self.credential.secret_key)
         autho = self.sign(signstr, self.credential.secret_key)
         requrl = self.create_query_string(query_arr)
         requrl = self.create_query_string(query_arr)
-       
+
         if is_python3():
         if is_python3():
             autho = urllib.parse.quote(autho)
             autho = urllib.parse.quote(autho)
         else:
         else:
             autho = urllib.quote(autho)
             autho = urllib.quote(autho)
         requrl += "&signature=%s" % autho
         requrl += "&signature=%s" % autho
-       
+
         self.ws = websocket.WebSocketApp(requrl, None,
         self.ws = websocket.WebSocketApp(requrl, None,
                                          on_error=on_error, on_close=on_close, on_message=on_message)
                                          on_error=on_error, on_close=on_close, on_message=on_message)
         self.ws.on_open = on_open
         self.ws.on_open = on_open
@@ -272,4 +273,3 @@ class SpeakingAssessment:
         self.status = STARTED
         self.status = STARTED
         response = {'voice_id': self.voice_id}
         response = {'voice_id': self.voice_id}
         self.listener.on_recognition_start(response)
         self.listener.on_recognition_start(response)
-       

+ 23 - 40
spoken_language/soeexample.py

@@ -1,19 +1,17 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
+import json
 import os
 import os
+import threading
 import time
 import time
+
 import requests
 import requests
-import threading
-from datetime import datetime
-import json
 
 
 from spoken_language.common import credential
 from spoken_language.common import credential
-from spoken_language.soe import speaking_assessment
-
 from spoken_language.read_config import read_config
 from spoken_language.read_config import read_config
-
+from spoken_language.soe import speaking_assessment
 
 
 config_data = read_config()
 config_data = read_config()
-app_id,secret_id,secret_key= config_data['appId'],config_data['SecretId'],config_data['SecretKey']
+app_id, secret_id, secret_key = config_data['appId'], config_data['SecretId'], config_data['SecretKey']
 
 
 APPID = app_id
 APPID = app_id
 SECRET_ID = secret_id
 SECRET_ID = secret_id
@@ -31,33 +29,25 @@ class MySpeechRecognitionListener(speaking_assessment.SpeakingAssessmentListener
 
 
     def on_recognition_start(self, response):
     def on_recognition_start(self, response):
         pass
         pass
-       
-       
 
 
     def on_intermediate_result(self, response):
     def on_intermediate_result(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
         rsp_str = json.dumps(response, ensure_ascii=False)
-       
-       
 
 
     def on_recognition_complete(self, response):
     def on_recognition_complete(self, response):
         global spoken_result
         global spoken_result
         spoken_result[self.id] = response
         spoken_result[self.id] = response
-       
-       
-       
 
 
     def on_fail(self, response):
     def on_fail(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
         rsp_str = json.dumps(response, ensure_ascii=False)
-       
-       
+
 
 
 def process(id):
 def process(id):
     audio = r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3"
     audio = r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3"
     listener = MySpeechRecognitionListener(id)
     listener = MySpeechRecognitionListener(id)
-   
+
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
     recognizer.set_text_mode(0)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text("anyway")
     recognizer.set_ref_text("anyway")
     recognizer.set_eval_mode(0)
     recognizer.set_eval_mode(0)
@@ -71,36 +61,34 @@ def process(id):
             while content:
             while content:
                 recognizer.write(content)
                 recognizer.write(content)
                 content = f.read(SLICE_SIZE)
                 content = f.read(SLICE_SIZE)
-                #sleep模拟实际实时语音发送间隔
-               
-               
+                # sleep模拟实际实时语音发送间隔
+
                 time.sleep(0.2)
                 time.sleep(0.2)
     except Exception as e:
     except Exception as e:
         print(e)
         print(e)
     finally:
     finally:
         recognizer.stop()
         recognizer.stop()
 
 
-def process_rec(task_id,audio_path,audio_text,audio_binary=None):
+
+def process_rec(task_id, audio_path, audio_text, audio_binary=None):
     audio = audio_path
     audio = audio_path
     listener = MySpeechRecognitionListener(task_id)
     listener = MySpeechRecognitionListener(task_id)
-   
+
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
     recognizer.set_text_mode(0)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text(audio_text)
     recognizer.set_ref_text(audio_text)
     recognizer.set_eval_mode(1)
     recognizer.set_eval_mode(1)
     recognizer.set_keyword("")
     recognizer.set_keyword("")
     recognizer.set_sentence_info_enabled(0)
     recognizer.set_sentence_info_enabled(0)
     recognizer.set_voice_format(2)
     recognizer.set_voice_format(2)
-   
-   
+
     recognizer.set_rec_mode(1)
     recognizer.set_rec_mode(1)
     try:
     try:
         recognizer.start()
         recognizer.start()
-        if audio_binary: 
-           
-           
+        if audio_binary:
+
             recognizer.write(audio_binary)
             recognizer.write(audio_binary)
         else:
         else:
             with open(f"{task_id}.mp3", 'rb') as f:
             with open(f"{task_id}.mp3", 'rb') as f:
@@ -123,18 +111,17 @@ def process_multithread(number):
         thread.join()
         thread.join()
 
 
 
 
-def make_spoken(task_id,audio_url,audio_content,audio_text):
-
+def make_spoken(task_id, audio_url, audio_content, audio_text):
     if audio_url:
     if audio_url:
         print("有url,应该去下载mp3文件")
         print("有url,应该去下载mp3文件")
-       
+
         r = requests.get(audio_url)
         r = requests.get(audio_url)
         audio_content = r.content
         audio_content = r.content
     else:
     else:
-        with open(f"{task_id}.mp3",'wb') as f:
+        with open(f"{task_id}.mp3", 'wb') as f:
             f.write(audio_content)
             f.write(audio_content)
 
 
-    process_rec(task_id,audio_path=f"",audio_text=audio_text,audio_binary=audio_content)
+    process_rec(task_id, audio_path=f"", audio_text=audio_text, audio_binary=audio_content)
     global spoken_result
     global spoken_result
     for _ in range(60):
     for _ in range(60):
         if task_id in spoken_result:
         if task_id in spoken_result:
@@ -146,10 +133,6 @@ def make_spoken(task_id,audio_url,audio_content,audio_text):
         time.sleep(0.5)
         time.sleep(0.5)
     return None
     return None
 
 
-if __name__ == "__main__":
 
 
-   
-   
-   
-    process_rec(0,r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3","You must study to be frank with the world apple")
-   
+if __name__ == "__main__":
+    process_rec(0, r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3", "You must study to be frank with the world apple")

+ 8 - 13
tools/ali_log.py

@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from aliyun.log import LogClient, PutLogsRequest, LogItem, GetLogsRequest, IndexConfig
-import time
 import os
 import os
+import time
+
+from aliyun.log import LogClient, PutLogsRequest, LogItem, GetLogsRequest, IndexConfig
 
 
 accessKeyId = os.getenv("OSS_ACCESS_KEY_ID")
 accessKeyId = os.getenv("OSS_ACCESS_KEY_ID")
 accessKey = os.getenv("OSS_ACCESS_KEY_SECRET")
 accessKey = os.getenv("OSS_ACCESS_KEY_SECRET")
@@ -28,18 +29,21 @@ logstore_index = {'line': {
 from_time = int(time.time()) - 3600
 from_time = int(time.time()) - 3600
 to_time = time.time() + 3600
 to_time = time.time() + 3600
 
 
+
 def create_project():
 def create_project():
     print("ready to create project %s" % project_name)
     print("ready to create project %s" % project_name)
     client.create_project(project_name, project_des="")
     client.create_project(project_name, project_des="")
     print("create project %s success " % project_name)
     print("create project %s success " % project_name)
     time.sleep(60)
     time.sleep(60)
 
 
+
 def create_logstore():
 def create_logstore():
     print("ready to create logstore %s" % logstore_name)
     print("ready to create logstore %s" % logstore_name)
     client.create_logstore(project_name, logstore_name, ttl=3, shard_count=2)
     client.create_logstore(project_name, logstore_name, ttl=3, shard_count=2)
     print("create logstore %s success " % project_name)
     print("create logstore %s success " % project_name)
     time.sleep(30)
     time.sleep(30)
 
 
+
 def create_index():
 def create_index():
     print("ready to create index for %s" % logstore_name)
     print("ready to create index for %s" % logstore_name)
     index_config = IndexConfig()
     index_config = IndexConfig()
@@ -48,7 +52,8 @@ def create_index():
     print("create index for %s success " % logstore_name)
     print("create index for %s success " % logstore_name)
     time.sleep(60 * 2)
     time.sleep(60 * 2)
 
 
-def put_logs(msg:str):
+
+def put_logs(msg: str):
     log_group = []
     log_group = []
 
 
     log_item = LogItem()
     log_item = LogItem()
@@ -61,7 +66,6 @@ def put_logs(msg:str):
     client.put_logs(request)
     client.put_logs(request)
 
 
 
 
-
 def get_logs():
 def get_logs():
     print("ready to query logs from logstore %s" % logstore_name)
     print("ready to query logs from logstore %s" % logstore_name)
     request = GetLogsRequest(project_name, logstore_name, from_time, to_time, query=query)
     request = GetLogsRequest(project_name, logstore_name, from_time, to_time, query=query)
@@ -73,13 +77,4 @@ def get_logs():
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-   
-   
-   
-   
-   
-   
-   
     put_logs("测试")
     put_logs("测试")
-   
-   

File diff suppressed because it is too large
+ 36 - 62
tools/audio.py


+ 9 - 12
tools/del_expire_file.py

@@ -2,12 +2,12 @@
 """
 """
 删除过期的文件资源
 删除过期的文件资源
 """
 """
-import os
 import datetime
 import datetime
+import os
 from time import sleep
 from time import sleep
 
 
 
 
-def del_file(folder_path,expired_days=10):
+def del_file(folder_path, expired_days=10):
     """
     """
     删除文件夹内过时的文件
     删除文件夹内过时的文件
     folder_path: 需要删除过期文件的文件夹
     folder_path: 需要删除过期文件的文件夹
@@ -18,31 +18,28 @@ def del_file(folder_path,expired_days=10):
         print("文件夹不存在")
         print("文件夹不存在")
         return None
         return None
 
 
-   
     now = datetime.datetime.now()
     now = datetime.datetime.now()
 
 
-   
     for filename in os.listdir(folder_path):
     for filename in os.listdir(folder_path):
         file_path = os.path.join(folder_path, filename)
         file_path = os.path.join(folder_path, filename)
-       
+
         if os.path.isfile(file_path):
         if os.path.isfile(file_path):
-           
+
             create_time = os.path.getctime(file_path)
             create_time = os.path.getctime(file_path)
             create_date = datetime.datetime.fromtimestamp(create_time)
             create_date = datetime.datetime.fromtimestamp(create_time)
-           
+
             delta = now - create_date
             delta = now - create_date
-           
+
             if delta.days > expired_days:
             if delta.days > expired_days:
                 os.remove(file_path)
                 os.remove(file_path)
-               
 
 
 
 
 def run_del_normal():
 def run_del_normal():
     """这是小程序项目内的正常删除机制"""
     """这是小程序项目内的正常删除机制"""
     while True:
     while True:
-        del_file("make_docx_demo/file_result",expired_days=15)
-        sleep(3600*24)
+        del_file("make_docx_demo/file_result", expired_days=15)
+        sleep(3600 * 24)
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    run_del_normal()
+    run_del_normal()

+ 27 - 18
tools/loglog.py

@@ -1,19 +1,23 @@
 # -*- coding=utf-8 -*-
 # -*- coding=utf-8 -*-
-import time, os
+import os
+import time
 import traceback
 import traceback
-from loguru import logger
 from threading import Lock
 from threading import Lock
+
+from loguru import logger
+
 from tools.ali_log import put_logs
 from tools.ali_log import put_logs
 
 
+
 class SimpleLogger:
 class SimpleLogger:
     """
     """
     简易日志:存放几乎没用的大量gpt日志
     简易日志:存放几乎没用的大量gpt日志
     """
     """
 
 
     def __init__(self, base_file_name: str = "ai_log"):
     def __init__(self, base_file_name: str = "ai_log"):
-        self.base_file_name = "log/" + base_file_name 
+        self.base_file_name = "log/" + base_file_name
         self.file_ext = ".txt"
         self.file_ext = ".txt"
-        self.max_size = 10 * 1024 * 1024 
+        self.max_size = 10 * 1024 * 1024
         self.current_file = self._get_current_file()
         self.current_file = self._get_current_file()
         self.lock = Lock()
         self.lock = Lock()
         if not os.path.exists("log"):
         if not os.path.exists("log"):
@@ -37,7 +41,7 @@ class SimpleLogger:
             with open(f"{self.base_file_name}_1{self.file_ext}", "a", encoding="utf-8") as log_file:
             with open(f"{self.base_file_name}_1{self.file_ext}", "a", encoding="utf-8") as log_file:
                 log_file.write(f"Hello World\n")
                 log_file.write(f"Hello World\n")
 
 
-    def log(self, message:str, level="INFO"):
+    def log(self, message: str, level="INFO"):
         """记录日志到文件"""
         """记录日志到文件"""
         self._check_file_size()
         self._check_file_size()
         date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
@@ -45,27 +49,31 @@ class SimpleLogger:
             with open(self.current_file, "a", encoding="utf-8") as log_file:
             with open(self.current_file, "a", encoding="utf-8") as log_file:
                 log_file.write(f"{date_time} 【{level}】 {str(message)}\n\n")
                 log_file.write(f"{date_time} 【{level}】 {str(message)}\n\n")
 
 
-    def info(self, message:str):
+    def info(self, message: str):
         """记录INFO级别的日志"""
         """记录INFO级别的日志"""
         self.log(message, "INFO")
         self.log(message, "INFO")
 
 
-    def warning(self, message:str):
+    def warning(self, message: str):
         """记录WARNING级别的日志"""
         """记录WARNING级别的日志"""
         self.log(message, "WARNING")
         self.log(message, "WARNING")
 
 
-    def error(self, message:str):
+    def error(self, message: str):
         """记录ERROR级别的日志"""
         """记录ERROR级别的日志"""
         message = "\n" + "-" * 20 + "\n" + message + "\n" + "-" * 20
         message = "\n" + "-" * 20 + "\n" + message + "\n" + "-" * 20
         self.log(message, "ERROR")
         self.log(message, "ERROR")
 
 
-    def debug(self, message:str):
+    def debug(self, message: str):
         """记录DEBUG级别的日志"""
         """记录DEBUG级别的日志"""
         self.log(message, "DEBUG")
         self.log(message, "DEBUG")
 
 
-logger.remove(handler_id=None) 
+
+logger.remove(handler_id=None)
 logger.add('log/log.log', level="INFO", rotation="5 MB", encoding="utf-8", retention="7 days")
 logger.add('log/log.log', level="INFO", rotation="5 MB", encoding="utf-8", retention="7 days")
 logger.add('log/error.log', level="ERROR", rotation="5 MB", encoding="utf-8", retention="7 days")
 logger.add('log/error.log', level="ERROR", rotation="5 MB", encoding="utf-8", retention="7 days")
+
 simple_logger = SimpleLogger()
 simple_logger = SimpleLogger()
+temp_logger = SimpleLogger(base_file_name="temp_log")
+
 
 
 def exception_handler(func):
 def exception_handler(func):
     def wrapper(*args, **kwargs):
     def wrapper(*args, **kwargs):
@@ -75,31 +83,32 @@ def exception_handler(func):
             logger.error(f"{type(e).__name__}: {e}")
             logger.error(f"{type(e).__name__}: {e}")
             traceback_str = traceback.format_exc()
             traceback_str = traceback.format_exc()
             logger.error(f"错误追溯:{traceback_str}")
             logger.error(f"错误追溯:{traceback_str}")
+
     return wrapper
     return wrapper
 
 
-def log_err_e(e:Exception,msg=None):
+
+def log_err_e(e: Exception, msg=None):
     if msg:
     if msg:
         logger.error(f"{msg}{type(e).__name__}:{e}")
         logger.error(f"{msg}{type(e).__name__}:{e}")
     traceback_str = traceback.format_exc()
     traceback_str = traceback.format_exc()
     logger.error(traceback_str)
     logger.error(traceback_str)
 
 
+
 class AliyunLogHandler:
 class AliyunLogHandler:
     @staticmethod
     @staticmethod
     def write(message):
     def write(message):
         put_logs(message)
         put_logs(message)
 
 
+
 if os.getenv("env") != "development":
 if os.getenv("env") != "development":
     print("这是正式环境,加载阿里云日志")
     print("这是正式环境,加载阿里云日志")
     aliyun_log_handler = AliyunLogHandler()
     aliyun_log_handler = AliyunLogHandler()
-    logger.add(aliyun_log_handler, enqueue=True) 
-
+    logger.add(aliyun_log_handler, enqueue=True)
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-   
-   
-   
     #
     #
-   
+
     import os
     import os
+
     os.chdir("..")
     os.chdir("..")
-    logger.error("test信息0123456789.*/-")
+    logger.error("test信息0123456789.*/-")

+ 16 - 39
tools/new_mysql.py

@@ -1,9 +1,11 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 import os
 import os
-from tools.loglog import logger
+import time
+
 import pymysql
 import pymysql
 from dbutils.pooled_db import PooledDB
 from dbutils.pooled_db import PooledDB
-import time
+
+from tools.loglog import logger
 
 
 
 
 class MySQLUploader:
 class MySQLUploader:
@@ -53,7 +55,7 @@ class MySQLUploader:
 
 
     def execute_(self, query, params=None):
     def execute_(self, query, params=None):
         for _ in range(3):
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             cursor = conn.cursor()
             try:
             try:
                 if params:
                 if params:
@@ -76,7 +78,7 @@ class MySQLUploader:
     def bulk_insert(self, query, data_list):
     def bulk_insert(self, query, data_list):
         """执行批量插入"""
         """执行批量插入"""
         for _ in range(3):
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             cursor = conn.cursor()
             try:
             try:
                 cursor.executemany(query, data_list)
                 cursor.executemany(query, data_list)
@@ -84,7 +86,7 @@ class MySQLUploader:
                 return True
                 return True
             except pymysql.MySQLError as e:
             except pymysql.MySQLError as e:
                 logger.warning(f"可忽略的错误 bulk_insert数据库批量插入错误{type(e).__name__}:{e}")
                 logger.warning(f"可忽略的错误 bulk_insert数据库批量插入错误{type(e).__name__}:{e}")
-                conn.rollback() 
+                conn.rollback()
                 time.sleep(0.5)
                 time.sleep(0.5)
             finally:
             finally:
                 cursor.close()
                 cursor.close()
@@ -96,14 +98,14 @@ class MySQLUploader:
     def query_data(self, query, params=None):
     def query_data(self, query, params=None):
         """执行查询并返回结果"""
         """执行查询并返回结果"""
         for _ in range(3):
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             cursor = conn.cursor()
             try:
             try:
                 if params:
                 if params:
                     cursor.execute(query, params)
                     cursor.execute(query, params)
                 else:
                 else:
                     cursor.execute(query)
                     cursor.execute(query)
-               
+
                 results = cursor.fetchall()
                 results = cursor.fetchall()
                 return results
                 return results
             except pymysql.MySQLError as e:
             except pymysql.MySQLError as e:
@@ -116,26 +118,24 @@ class MySQLUploader:
         logger.critical(f"query_data 3次没成功.{query} {params}")
         logger.critical(f"query_data 3次没成功.{query} {params}")
         return False
         return False
 
 
-    def execute_sql_file(self,script_file_path):
+    def execute_sql_file(self, script_file_path):
         """执行sql脚本:传入路径或者sql路径都可以"""
         """执行sql脚本:传入路径或者sql路径都可以"""
+
         def execute_file(path):
         def execute_file(path):
 
 
-           
             with open(path, 'r', encoding='utf-8') as file:
             with open(path, 'r', encoding='utf-8') as file:
                 sql_script = file.read()
                 sql_script = file.read()
 
 
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             cursor = conn.cursor()
-           
-           
+
             cursor.execute(sql_script)
             cursor.execute(sql_script)
-           
+
             conn.commit()
             conn.commit()
 
 
             cursor.close()
             cursor.close()
             conn.close()
             conn.close()
 
 
-        
         if os.path.isdir(script_file_path):
         if os.path.isdir(script_file_path):
             for file in os.listdir(script_file_path):
             for file in os.listdir(script_file_path):
                 execute_file(script_file_path + "\\" + file)
                 execute_file(script_file_path + "\\" + file)
@@ -143,38 +143,15 @@ class MySQLUploader:
             if script_file_path.endswith(".sql"):
             if script_file_path.endswith(".sql"):
                 execute_file(script_file_path)
                 execute_file(script_file_path)
 
 
-    def close_connection(self):...
-
+    def close_connection(self):
+        ...
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-
     m = MySQLUploader()
     m = MySQLUploader()
     s = "select Id,BritishPronunciation from dictionary_word where wordspelling = %s"
     s = "select Id,BritishPronunciation from dictionary_word where wordspelling = %s"
     r = m.query_data(s, ("sky",))
     r = m.query_data(s, ("sky",))
     print(r)
     print(r)
     input()
     input()
 
 
-   
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-   
-   
     #
     #
-   
-   

+ 25 - 42
tools/sql_format.py

@@ -1,44 +1,40 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from tools.new_mysql import MySQLUploader
-from tools.loglog import logger
-
 from core.respone_format import *
 from core.respone_format import *
+from tools.loglog import logger
+from tools.new_mysql import MySQLUploader
 
 
 
 
 class CRUD:
 class CRUD:
     def __init__(self):
     def __init__(self):
         self.m = MySQLUploader()
         self.m = MySQLUploader()
-        self.people_place_name = [] 
+        self.people_place_name = []
         self.get_people_place_name()
         self.get_people_place_name()
 
 
     def get_word_by_wordid(self, wordid):
     def get_word_by_wordid(self, wordid):
         s = "select WordSpelling from dictionary_word where Id = %s"
         s = "select WordSpelling from dictionary_word where Id = %s"
         r = self.m.query_data(s, (wordid,))
         r = self.m.query_data(s, (wordid,))
         if r:
         if r:
-           
             word = r[0][0]
             word = r[0][0]
             return word
             return word
         return None
         return None
 
 
-    def get_wordid_by_wordspelling(self, wordspelling,auto_insert=False):
+    def get_wordid_by_wordspelling(self, wordspelling, auto_insert=False):
         """加一个功能。大字典内没有这个单词就自动插入,返回id。auto_insert为真,自动插入大字典,获取其id"""
         """加一个功能。大字典内没有这个单词就自动插入,返回id。auto_insert为真,自动插入大字典,获取其id"""
         s = "select Id from dictionary_word where wordspelling = %s"
         s = "select Id from dictionary_word where wordspelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         r = self.m.query_data(s, (wordspelling,))
         if r:
         if r:
-           
             wordid = r[0][0]
             wordid = r[0][0]
             return wordid
             return wordid
 
 
         if auto_insert:
         if auto_insert:
             s = "insert into dictionary_word (WordSpelling) VALUES (%s);"
             s = "insert into dictionary_word (WordSpelling) VALUES (%s);"
-            self.m.execute_(s,(wordspelling,))
+            self.m.execute_(s, (wordspelling,))
             s = "select Id from dictionary_word where wordspelling = %s"
             s = "select Id from dictionary_word where wordspelling = %s"
             r = self.m.query_data(s, (wordspelling,))
             r = self.m.query_data(s, (wordspelling,))
             wordid = r[0][0]
             wordid = r[0][0]
             return wordid
             return wordid
 
 
-   
-    def get_exchange_prototype(self,wordspelling):
+    def get_exchange_prototype(self, wordspelling):
         s = "select Word from dictionary_exchange where Word = %s"
         s = "select Word from dictionary_exchange where Word = %s"
         r = self.m.query_data(s, (wordspelling,))
         r = self.m.query_data(s, (wordspelling,))
         if r:
         if r:
@@ -53,37 +49,30 @@ class CRUD:
         r = self.m.query_data(s, (wordid,))
         r = self.m.query_data(s, (wordid,))
         return r
         return r
 
 
-   
     def get_people_place_name(self):
     def get_people_place_name(self):
         s2 = "select word from people_place_name"
         s2 = "select word from people_place_name"
         r = self.m.query_data(s2)
         r = self.m.query_data(s2)
         for i in r:
         for i in r:
             self.people_place_name.append(i[0])
             self.people_place_name.append(i[0])
 
 
-
-   
     def get_word_meaning_by_wordspelling(self, wordspelling, frequency):
     def get_word_meaning_by_wordspelling(self, wordspelling, frequency):
         """根据单词获取其全部词义"""
         """根据单词获取其全部词义"""
-       
+
         wordid = self.get_wordid_by_wordspelling(wordspelling)
         wordid = self.get_wordid_by_wordspelling(wordspelling)
 
 
-       
         return_data = {"word_id": wordid, "frequency": frequency, "word": wordspelling,
         return_data = {"word_id": wordid, "frequency": frequency, "word": wordspelling,
                        "meanings": {"default": [], "sun_english": {"name": "春笋英语", "items": []}, "oxford": {"name": "牛津", "items": []}}}
                        "meanings": {"default": [], "sun_english": {"name": "春笋英语", "items": []}, "oxford": {"name": "牛津", "items": []}}}
 
 
-       
         s = "select Id,WordMeaning from dictionary_meaningitem where WordSpelling = %s"
         s = "select Id,WordMeaning from dictionary_meaningitem where WordSpelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         r = self.m.query_data(s, (wordspelling,))
         for row_data in r:
         for row_data in r:
             return_data["meanings"]["default"].append({"id": row_data[0], "text": row_data[1]})
             return_data["meanings"]["default"].append({"id": row_data[0], "text": row_data[1]})
 
 
-       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_spring_bamboo where WordSpelling = %s"
         s2 = "select Id,WordMeaning from dictionary_meaningitem_spring_bamboo where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
         for row_data in r2:
             return_data["meanings"]["sun_english"]["items"].append({"id": row_data[0], "text": row_data[1]})
             return_data["meanings"]["sun_english"]["items"].append({"id": row_data[0], "text": row_data[1]})
 
 
-       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_oxford where WordSpelling = %s"
         s2 = "select Id,WordMeaning from dictionary_meaningitem_oxford where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
         for row_data in r2:
@@ -97,8 +86,7 @@ class CRUD:
         logger.info(f"根据词义id删除,{wordmeaningid}。结果{r}")
         logger.info(f"根据词义id删除,{wordmeaningid}。结果{r}")
         return True if r is True else False
         return True if r is True else False
 
 
-
-    def get_word_all_info(self,word_id, spell,frequency):
+    def get_word_all_info(self, word_id, spell, frequency):
         def get_associational_words_info(word_meaning_id) -> list:
         def get_associational_words_info(word_meaning_id) -> list:
             return_data = []
             return_data = []
             s = "select Id,BaseWordMeaningId,BaseWord,BaseWordMeaning,AssociationalWord,AssociationalWordMeaningId,AssociationalWordMeaning," \
             s = "select Id,BaseWordMeaningId,BaseWord,BaseWordMeaning,AssociationalWord,AssociationalWordMeaningId,AssociationalWordMeaning," \
@@ -110,12 +98,13 @@ class CRUD:
 
 
             for single_meaning in r:
             for single_meaning in r:
                 associational_id, base_word_meaning_id, base_word, base_word_meaning, associational_word, \
                 associational_id, base_word_meaning_id, base_word, base_word_meaning, associational_word, \
-                associational_word_meaning_id, associational_word_meaning, association_reason,\
+                associational_word_meaning_id, associational_word_meaning, association_reason, \
                 reverse_association_reason, created_time, updated_time = single_meaning
                 reverse_association_reason, created_time, updated_time = single_meaning
-                r_data = {"id":associational_id,"base_word":{"word":base_word,"meaning_id":base_word_meaning_id,"meaning":base_word_meaning},
-                          "associational_word":{"word":associational_word,"meaning_id":associational_word_meaning_id,"meaning":associational_word_meaning},
-                          "association_reason":association_reason,"reverse_association_reason":reverse_association_reason,
-                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                r_data = {"id": associational_id, "base_word": {"word": base_word, "meaning_id": base_word_meaning_id, "meaning": base_word_meaning},
+                          "associational_word": {"word": associational_word, "meaning_id": associational_word_meaning_id,
+                                                 "meaning": associational_word_meaning},
+                          "association_reason": association_reason, "reverse_association_reason": reverse_association_reason,
+                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'), "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
                 return_data.append(r_data)
 
 
             return return_data
             return return_data
@@ -129,8 +118,8 @@ class CRUD:
                 return return_data
                 return return_data
             for single_phrase in r:
             for single_phrase in r:
                 phrase_id, phrase_spelling_text, phrase_chinese_translation, from_type, created_time, updated_time = single_phrase
                 phrase_id, phrase_spelling_text, phrase_chinese_translation, from_type, created_time, updated_time = single_phrase
-                r_data = {"id":phrase_id,"english":phrase_spelling_text,"chinese":phrase_chinese_translation,"from":from_type,
-                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                r_data = {"id": phrase_id, "english": phrase_spelling_text, "chinese": phrase_chinese_translation, "from": from_type,
+                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'), "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
                 return_data.append(r_data)
 
 
             return return_data
             return return_data
@@ -143,24 +132,24 @@ class CRUD:
             if not r:
             if not r:
                 return return_data
                 return return_data
             for single_exchange in r:
             for single_exchange in r:
-                exchange_id,spell,properties,description = single_exchange
+                exchange_id, spell, properties, description = single_exchange
                 r_data = {"id": exchange_id, "spell": spell, "properties": properties, "description": description}
                 r_data = {"id": exchange_id, "spell": spell, "properties": properties, "description": description}
                 return_data.append(r_data)
                 return_data.append(r_data)
 
 
             return return_data
             return return_data
 
 
-        return_data_all = {"word_id":word_id,"spell":spell,"frequency":frequency,"meanings":[],"exchanges":[]}
+        return_data_all = {"word_id": word_id, "spell": spell, "frequency": frequency, "meanings": [], "exchanges": []}
         if spell in self.people_place_name:
         if spell in self.people_place_name:
             return_data_all["type"] = "人名地名"
             return_data_all["type"] = "人名地名"
         return_data_all["type"] = "一般词汇"
         return_data_all["type"] = "一般词汇"
 
 
         s = "select Id,WordMeaning,OperateAccount from dictionary_meaningitem where WordId = %s"
         s = "select Id,WordMeaning,OperateAccount from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         r = self.m.query_data(s, (word_id,))
-        if not r: 
+        if not r:
             return resp_200(data=return_data_all)
             return resp_200(data=return_data_all)
         for single_meaning in r:
         for single_meaning in r:
             meaning_id, word_meaning, operate_account = single_meaning
             meaning_id, word_meaning, operate_account = single_meaning
-            single_meaning_item = {"id":meaning_id,"text":word_meaning,"editor":operate_account}
+            single_meaning_item = {"id": meaning_id, "text": word_meaning, "editor": operate_account}
             associational_words_list = get_associational_words_info(meaning_id)
             associational_words_list = get_associational_words_info(meaning_id)
             single_meaning_item["associational_words"] = associational_words_list
             single_meaning_item["associational_words"] = associational_words_list
             phrases_list = get_phrases_info(meaning_id)
             phrases_list = get_phrases_info(meaning_id)
@@ -172,7 +161,7 @@ class CRUD:
 
 
         return resp_200(data=return_data_all)
         return resp_200(data=return_data_all)
 
 
-    def delete_associational_word(self,word_id,associational_id):
+    def delete_associational_word(self, word_id, associational_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         r = self.m.query_data(s, (word_id,))
         if not r:
         if not r:
@@ -181,8 +170,8 @@ class CRUD:
 
 
         s = "select BaseWordMeaningId from dictionary_associational_word where Id = %s"
         s = "select BaseWordMeaningId from dictionary_associational_word where Id = %s"
         r = self.m.query_data(s, (associational_id,))
         r = self.m.query_data(s, (associational_id,))
-       
-        if r and r[0][0]==meaning_id:
+
+        if r and r[0][0] == meaning_id:
             s = "DELETE FROM dictionary_associational_word where Id = %s"
             s = "DELETE FROM dictionary_associational_word where Id = %s"
             r = self.m.execute_(s, (associational_id,))
             r = self.m.execute_(s, (associational_id,))
             logger.info(f"根据联想词id删除,{associational_id}。结果{r}")
             logger.info(f"根据联想词id删除,{associational_id}。结果{r}")
@@ -192,7 +181,7 @@ class CRUD:
             logger.info(f"删除联想词时,单词id与联想词id校验失败。{r} {meaning_id}")
             logger.info(f"删除联想词时,单词id与联想词id校验失败。{r} {meaning_id}")
             return resp_400(message="单词id与联想词id校验失败")
             return resp_400(message="单词id与联想词id校验失败")
 
 
-    def delete_phrese_word(self,word_id,phrase_id):
+    def delete_phrese_word(self, word_id, phrase_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         r = self.m.query_data(s, (word_id,))
         if not r:
         if not r:
@@ -201,7 +190,7 @@ class CRUD:
 
 
         s = "select WordMeaningId from dictionary_phrase where Id = %s"
         s = "select WordMeaningId from dictionary_phrase where Id = %s"
         r = self.m.query_data(s, (phrase_id,))
         r = self.m.query_data(s, (phrase_id,))
-       
+
         if r and r[0][0] == meaning_id:
         if r and r[0][0] == meaning_id:
             s = "DELETE FROM dictionary_phrase where Id = %s"
             s = "DELETE FROM dictionary_phrase where Id = %s"
             r = self.m.execute_(s, (phrase_id,))
             r = self.m.execute_(s, (phrase_id,))
@@ -225,7 +214,6 @@ class UserCRUD:
         s = "select id,account,password,uname,create_time from user where account = %s"
         s = "select id,account,password,uname,create_time from user where account = %s"
         r = self.m.query_data(s, (account,))
         r = self.m.query_data(s, (account,))
         if r:
         if r:
-           
             user_info = (r[0][0], r[0][1], r[0][2], r[0][3], r[0][4].strftime('%Y-%m-%d %H:%M:%S'))
             user_info = (r[0][0], r[0][1], r[0][2], r[0][3], r[0][4].strftime('%Y-%m-%d %H:%M:%S'))
             return user_info
             return user_info
         return None
         return None
@@ -237,11 +225,6 @@ class UserCRUD:
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     crud = CRUD()
     crud = CRUD()
-   
-   
-   
-   
-   
 
 
     r = crud.get_wordid_by_wordspelling("abcdefg")
     r = crud.get_wordid_by_wordspelling("abcdefg")
     print(type(r))
     print(type(r))

+ 2 - 3
tools/thread_pool_manager.py

@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
-from concurrent.futures import ThreadPoolExecutor, wait
+from concurrent.futures import ThreadPoolExecutor
 
 
-
-pool_executor = ThreadPoolExecutor(max_workers=200)
+pool_executor = ThreadPoolExecutor(max_workers=50)

Some files were not shown because too many files changed in this diff