Răsfoiți Sursa

1.文章生成提速

notepad 1 săptămână în urmă
părinte
comite
d5b75b5f25
49 a modificat fișierele cu 1109 adăugiri și 1591 ștergeri
  1. 4 8
      common/common_data.py
  2. 13 13
      common/split_text.py
  3. 4 4
      config/read_config.py
  4. 16 17
      core/api_get_article.py
  5. 17 18
      core/api_get_article2.py
  6. 16 14
      core/api_get_article3.py
  7. 9 13
      core/api_get_audio.py
  8. 9 13
      core/api_get_spoken_language.py
  9. 7 10
      core/api_get_word.py
  10. 14 15
      core/api_routes_jwt.py
  11. 3 1
      core/respone_format.py
  12. 8 10
      data/get_all_exchange_words.py
  13. 5 6
      data/get_frequency_script.py
  14. 0 0
      data/json_word_frequency.json
  15. 13 17
      deepseek/ds_api.py
  16. 25 37
      deepseek/get_article3.py
  17. 53 105
      gpt/chatgpt.py
  18. 100 125
      gpt/get_article.py
  19. 41 81
      gpt/get_article2.py
  20. 20 25
      gpt/gpt.py
  21. 22 36
      gpt/gpt_check.py
  22. 5 2
      gpt/query_oss_file.py
  23. 9 7
      main.py
  24. 10 8
      main_9000.py
  25. 1 1
      make_docx_demo/check_test_table/aaaaaaaaaa.py
  26. 5 5
      make_docx_demo/check_test_table/baidu_ocr.py
  27. 74 123
      make_docx_demo/check_test_table/image_preprocess.py
  28. 71 112
      make_docx_demo/check_test_table/image_preprocess2.py
  29. 2 7
      make_docx_demo/check_test_table/mark_ocr_loca.py
  30. 0 1
      make_docx_demo/data.py
  31. 35 36
      make_docx_demo/docx_other_func.py
  32. 9 10
      make_docx_demo/get_standard_data.py
  33. 107 162
      make_docx_demo/main_word.py
  34. 137 232
      make_docx_demo/main_word_applet.py
  35. 7 4
      make_docx_demo/new_word2pdf.py
  36. 5 20
      make_docx_demo/word2pdf.py
  37. 6 9
      make_docx_demo/word_component/make_rectangle.py
  38. 93 63
      mock/mock_request.py
  39. 2 1
      spoken_language/common/utils.py
  40. 4 7
      spoken_language/read_config.py
  41. 8 9
      spoken_language/soe/speaking_assessment.py
  42. 19 35
      spoken_language/soeexample.py
  43. 5 11
      tools/ali_log.py
  44. 26 51
      tools/audio.py
  45. 8 11
      tools/del_expire_file.py
  46. 20 16
      tools/loglog.py
  47. 12 37
      tools/new_mysql.py
  48. 29 41
      tools/sql_format.py
  49. 1 2
      tools/thread_pool_manager.py

+ 4 - 8
common/common_data.py

@@ -5,17 +5,13 @@ import json
 SECRET_KEY = os.getenv("key")
 
 try:
-    with open("data/json_word_frequency.json","r",encoding="utf-8") as f:
+    with open("data/json_word_frequency.json", "r", encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
-except FileNotFoundError: 
+except FileNotFoundError:
     with open(r"C:\Users\pan\Desktop\demo\qback\data\json_word_frequency.json", "r", encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
 
+all_json_words_set = {word for key, word in word_frequency.items()}
 
-all_json_words_set = {word for key,word in word_frequency.items()}
-
-
-with open("data/all_exchange_words.txt","r",encoding="utf-8") as f:
+with open("data/all_exchange_words.txt", "r", encoding="utf-8") as f:
     all_exchange_words = set(f.read().split("\n"))
-
-

+ 13 - 13
common/split_text.py

@@ -1,25 +1,28 @@
 # -*- coding: utf-8 -*-
 import re
 
-def split_text_to_word(text:str):
-    words_list = re.findall(r'\b[-\'\w]+\b',text)
+
+def split_text_to_word(text: str):
+    words_list = re.findall(r'\b[-\'\w]+\b', text)
     return words_list
 
-def get_article_words_count(text:str):
+
+def get_article_words_count(text: str):
     return len(split_text_to_word(text))
 
 
-def split_text_to_sentences(text:str) -> list:
-    sentences = re.split(r'(?<=[.!?;])', text) 
+def split_text_to_sentences(text: str) -> list:
+    sentences = re.split(r'(?<=[.!?;])', text)
     sentences = [i for i in sentences if i.replace(" ", "")]
     return sentences
 
 
-def split_text_to_word_punctuation(text:str):
-    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n',text)
+def split_text_to_word_punctuation(text: str):
+    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n', text)
     return word_punctuation_list
 
-def is_word(single_word:str,strict:bool=False):
+
+def is_word(single_word: str, strict: bool = False):
     """strict 严格模式,默认不开。严格模式下,每个实体字符必须是字母。全部都是字母才算是单词
     非严格模式下,有一个字母就算是单词。即使是 op123
     """
@@ -36,9 +39,6 @@ def is_word(single_word:str,strict:bool=False):
 
 
 if __name__ == '__main__':
-
     a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415"
-   
-   
-   
-    print(is_word("student34",strict=True))
+
+    print(is_word("student34", strict=True))

+ 4 - 4
config/read_config.py

@@ -3,15 +3,15 @@ import yaml
 
 
 def read_config():
-   
-    with open("config/env.yaml", "r",encoding="utf-8") as file:
+    with open("config/env.yaml", "r", encoding="utf-8") as file:
         config = yaml.safe_load(file)
         return config
 
-address = "https://dcjxb.yunzhixue.cn" if read_config()['env']=='product' else "http://dcjxbtest.yunzhixue.cn"
 
+address = "https://dcjxb.yunzhixue.cn" if read_config()['env'] == 'product' else "http://dcjxbtest.yunzhixue.cn"
 
 if __name__ == '__main__':
     import os
+
     os.chdir(r'C:\Users\86131\Desktop\demo\ai_qback')
-    print(read_config()['env'])
+    print(read_config()['env'])

+ 16 - 17
core/api_get_article.py

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path
 from tools.loglog import logger
 from gpt.get_article import GetArticle
 from gpt.query_oss_file import query_file_content
@@ -8,37 +8,37 @@ from core.respone_format import *
 from pydantic import BaseModel, ValidationError, conint
 from typing import List, Optional
 
-
 router = APIRouter()
 get_article = GetArticle()
 
 
 class ArticleRequest(BaseModel):
-    meaning_ids: List[conint(ge=1)] 
-    callback_url: Optional[str] = None 
-    demo_name: Optional[str] = "无" 
-    student_stage: Optional[int] = 1 
-    vocabulary: Optional[int] = 500 
-    class_id :Optional[int]
+    meaning_ids: List[conint(ge=1)]
+    callback_url: Optional[str] = None
+    demo_name: Optional[str] = "无"
+    student_stage: Optional[int] = 1
+    vocabulary: Optional[int] = 500
+    class_id: Optional[int]
+
 
 @router.post("/article")
-def post_article(json_data:ArticleRequest,request:Request):
-    real_ip = request.headers.get("X-Real-IP","localhost")
+def post_article(json_data: ArticleRequest, request: Request):
+    real_ip = request.headers.get("X-Real-IP", "localhost")
     words_meaning_ids: list = json_data.meaning_ids
     callback_url = json_data.callback_url
     demo_name = json_data.demo_name
     student_stage = json_data.student_stage
     vocabulary = json_data.vocabulary
-    class_id = json_data.class_id 
+    class_id = json_data.class_id
 
     try:
         if not words_meaning_ids:
             return resp_404(message="没有词义id")
 
-        r = get_article.submit_task(words_meaning_ids=words_meaning_ids,callback_url=callback_url,
-                                    real_ip=real_ip,demo_name=demo_name,
-                                    student_stage=student_stage,vocabulary=vocabulary,class_id=class_id)
-        return r if not isinstance(r,str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_ids=words_meaning_ids, callback_url=callback_url,
+                                    real_ip=real_ip, demo_name=demo_name,
+                                    student_stage=student_stage, vocabulary=vocabulary, class_id=class_id)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
     except Exception as e:
         logger.error(f"{type(e).__name__},{e}")
@@ -46,7 +46,7 @@ def post_article(json_data:ArticleRequest,request:Request):
 
 
 @router.post("/query_oss_file")
-def query_oss_file(json_data:dict,request:Request):
+def query_oss_file(json_data: dict, request: Request):
     oss_key = json_data.get("key")
 
     if not oss_key:
@@ -55,4 +55,3 @@ def query_oss_file(json_data:dict,request:Request):
     if j == 0:
         return resp_500(message="错误:没有这个文件")
     return JSONResponse(j)
-

+ 17 - 18
core/api_get_article2.py

@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path, Depends, BackgroundTasks
-from tools.loglog import logger,log_err_e
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path, Depends, BackgroundTasks
+from tools.loglog import logger, log_err_e
 
 from core.respone_format import *
 from gpt.get_article2 import GetArticle
-from pydantic import BaseModel, ValidationError, conint,Field,conint
-from typing import List, Optional,Literal
+from pydantic import BaseModel, ValidationError, conint, Field, conint
+from typing import List, Optional, Literal
 import asyncio
 
 router = APIRouter()
@@ -15,35 +15,34 @@ get_article = GetArticle()
 
 
 class Word(BaseModel):
-    meaning_id:int = Field(..., description="单词的词义id")
-    word_id:int = Field(..., description="单词id")
+    meaning_id: int = Field(..., description="单词的词义id")
+    word_id: int = Field(..., description="单词id")
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
 
 
 class ArticleRequest(BaseModel):
     core_words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2 
-    demo_name: Optional[str] = "无" 
+    take_count: int = 2
+    demo_name: Optional[str] = "无"
     reading_level: conint(ge=1, le=30) = Field(default=10, description="阅读水平,默认值为10;[8,16,24]小学初中高中")
-    article_length:int = Field(default=None,description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
-    exercise_id:int = Field(default=0,description="学案ID,用于日志快速定位")
+    article_length: int = Field(default=None, description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
+    exercise_id: int = Field(default=0, description="学案ID,用于日志快速定位")
 
 
 @router.post("/article/reading-comprehension")
 def post_article(
-    json_data:ArticleRequest,
-    request:Request,
-    background_tasks: BackgroundTasks,
+        json_data: ArticleRequest,
+        request: Request,
+        background_tasks: BackgroundTasks,
 ):
-
     json_data = json_data.model_dump()
-    real_ip = request.headers.get("X-Real-IP","0.0.0.0")
+    real_ip = request.headers.get("X-Real-IP", "0.0.0.0")
 
     core_words = json_data["core_words"]
     take_count = json_data["take_count"]
     demo_name = json_data["demo_name"]
-    reading_level = json_data["reading_level"] 
+    reading_level = json_data["reading_level"]
     article_length = json_data["article_length"]
     exercise_id = json_data["exercise_id"]
 
@@ -58,8 +57,8 @@ def post_article(
             exercise_id=exercise_id,
             background_tasks=background_tasks
         )
-        return r if not isinstance(r,str) else resp_500(message=r)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
     except Exception as e:
         log_err_e(e, msg="文章2接口错误/article/reading-comprehension;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 16 - 14
core/api_get_article3.py

@@ -1,17 +1,17 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
-from tools.loglog import logger,log_err_e
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path
+from tools.loglog import logger, log_err_e
 
 from core.respone_format import *
 from deepseek.get_article3 import GetArticle
-from pydantic import BaseModel, ValidationError, conint,Field
-from typing import List, Optional,Literal
-
+from pydantic import BaseModel, ValidationError, conint, Field
+from typing import List, Optional, Literal
 
 router = APIRouter(tags=['deepseek接口'])
 get_article = GetArticle()
 
+
 class Word(BaseModel):
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
@@ -19,21 +19,23 @@ class Word(BaseModel):
 
 class ArticleRequest(BaseModel):
     words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2 
-    student_stage: Literal[1, 2, 3] 
-    demo_name: Optional[str] = "无" 
+    take_count: int = 2
+    student_stage: Literal[1, 2, 3]
+    demo_name: Optional[str] = "无"
 
 
 @router.post("/article/reading-comprehension/deepseek")
-def post_article(json_data:ArticleRequest,request:Request):
+def post_article(json_data: ArticleRequest, request: Request):
     json_data = json_data.dict()
     real_ip = request.headers.get("X-Real-IP")
-    words,take_count,student_stage,demo_name = json_data["words"],json_data["take_count"],json_data["student_stage"],json_data["demo_name"]
+    words, take_count, student_stage, demo_name = json_data["words"], json_data["take_count"], json_data["student_stage"], json_data[
+        "demo_name"]
 
     try:
-        r = get_article.submit_task(words_meaning_list=words, take_count=take_count,student_stage=student_stage,real_ip=real_ip,demo_name=demo_name)
-        return r if not isinstance(r,str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_list=words, take_count=take_count, student_stage=student_stage, real_ip=real_ip,
+                                    demo_name=demo_name)
+        return r if not isinstance(r, str) else resp_500(message=r)
 
     except Exception as e:
-        log_err_e(e,msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        log_err_e(e, msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 9 - 13
core/api_get_audio.py

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path
 from fastapi.responses import StreamingResponse
 from tools.loglog import logger
 from tools.audio import GetAudio
@@ -9,16 +9,15 @@ import traceback
 router = APIRouter()
 get_audio = GetAudio()
 
-
-
 """
 生成音频tts接口,传递两个参数,word和resp_type; word和resp_type回复设计:0返回oss路径,1 二进制文件,2 url三种;
 """
 
+
 @router.get("/tts")
-def get_tts(word:str=Query(None, max_length=300)):
+def get_tts(word: str = Query(None, max_length=300)):
     try:
-        f = get_audio.submit_task(word_or_phrase=word,resp_type=0) 
+        f = get_audio.submit_task(word_or_phrase=word, resp_type=0)
         r = f.result()
         if r:
             return resp_200(data=r)
@@ -30,24 +29,21 @@ def get_tts(word:str=Query(None, max_length=300)):
 
 
 @router.post("/tts")
-def get_tts(json_data:dict,request:Request):
-   
-
+def get_tts(json_data: dict, request: Request):
     word_or_phrase = json_data["text"]
     resp_type = json_data.get("type")
 
-   
     if len(word_or_phrase) >= 300:
         logger.error(f"单词或短语过长")
         return resp_400(message="单词或短语过长")
-    if resp_type not in [0,1,2]:
+    if resp_type not in [0, 1, 2]:
         logger.error(f"type参数不是012")
         return resp_400(message="type参数不是012")
 
     try:
-        f = get_audio.submit_task(word_or_phrase=word_or_phrase,resp_type=resp_type)
+        f = get_audio.submit_task(word_or_phrase=word_or_phrase, resp_type=resp_type)
         r = f.result()
-        if r and resp_type in [0,2]:
+        if r and resp_type in [0, 2]:
             return resp_200(data=r)
         if r and resp_type == 1:
             return StreamingResponse(content=r, media_type='audio/mpeg')
@@ -57,4 +53,4 @@ def get_tts(json_data:dict,request:Request):
         traceback_str = traceback.format_exc()
         logger.error(traceback_str)
         logger.error(f"{type(e).__name__},{e}")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 9 - 13
core/api_get_spoken_language.py

@@ -1,34 +1,30 @@
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path,UploadFile,File
-from tools.loglog import logger,log_err_e
-from spoken_language.soeexample import spoken_result,make_spoken
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path, UploadFile, File
+from tools.loglog import logger, log_err_e
+from spoken_language.soeexample import spoken_result, make_spoken
 from random import randint
 from core.respone_format import *
 
-
 router = APIRouter()
 
 
 @router.post("/spoken_language")
-async def post_article(request:Request,url=Form(""),file: UploadFile = File(None),text=Form(...)):
-
+async def post_article(request: Request, url=Form(""), file: UploadFile = File(None), text=Form(...)):
     if not url and not file:
         logger.error("错误:请上传mp3文件url参数或者二进制文件file参数")
         return resp_404(message="错误:请上传mp3文件url参数或者二进制文件file参数")
     try:
-        task_id = randint(10000,99999)
-       
+        task_id = randint(10000, 99999)
+
         if file:
             file_content = await file.read()
         else:
             file_content = None
-        data:dict = make_spoken(task_id,url,file_content,text)
+        data: dict = make_spoken(task_id, url, file_content, text)
         if data:
             logger.success(f"完成spoken_language请求:{data}")
-           
+
             return data
     except Exception as e:
-        log_err_e(e,msg="口语评测接口")
+        log_err_e(e, msg="口语评测接口")
         return resp_500(message=f"{type(e).__name__},{e}")
-
-

+ 7 - 10
core/api_get_word.py

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
-from fastapi.responses import FileResponse,PlainTextResponse
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter, Query, Path
+from fastapi.responses import FileResponse, PlainTextResponse
 from tools.loglog import logger
 from tools.audio import GetAudio
 from core.respone_format import *
@@ -28,15 +28,12 @@ def make_word(json_data:dict,request:Request,
 """
 
 
-
 @router.post("/make_word/vocabulary_assault")
-def make_word(json_data:dict,request:Request,
-            document_format:int=Query(1,description="1:docx;2.pdf"),
-            scanpage_format:int=Query(1,description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
-
-   
+def make_word(json_data: dict, request: Request,
+              document_format: int = Query(1, description="1:docx;2.pdf"),
+              scanpage_format: int = Query(1, description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
     headers = {"Content-Type": "application/octet-stream"}
     if path := s2(json_data, document_format, scanpage_format):
-        return FileResponse(path=path,headers=headers, media_type='application/octet-stream')
+        return FileResponse(path=path, headers=headers, media_type='application/octet-stream')
     else:
-        return PlainTextResponse(status_code=500,content="服务器内部错误")
+        return PlainTextResponse(status_code=500, content="服务器内部错误")

+ 14 - 15
core/api_routes_jwt.py

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter
+from fastapi import FastAPI, Form, HTTPException, Request, status, APIRouter
 import jwt
-from jwt.exceptions import ExpiredSignatureError,DecodeError,InvalidAlgorithmError
+from jwt.exceptions import ExpiredSignatureError, DecodeError, InvalidAlgorithmError
 from core.respone_format import *
 from tools.sql_format import UserCRUD
 import datetime
@@ -29,7 +29,6 @@ def create_access_token(username: str):
         logger.error(f"{type(e).__name__}, {e}")
 
 
-
 def verify_token_sync(token: str):
     if not token:
         return 1
@@ -38,10 +37,10 @@ def verify_token_sync(token: str):
         if not decoded_payload.get("username"):
             return 2
         else:
-            return 0 
+            return 0
     except ExpiredSignatureError:
         return 3
-    except (InvalidAlgorithmError,DecodeError):
+    except (InvalidAlgorithmError, DecodeError):
         return 4
 
 
@@ -49,6 +48,7 @@ async def verify_token(token: str):
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(None, verify_token_sync, token)
 
+
 async def verify_token2(token):
     msg_verify_code = await verify_token(token)
     if msg_verify_code != 0:
@@ -66,13 +66,13 @@ async def verify_token2(token):
 
 @router.post("/user/login")
 async def get_token(username: str = Form(...), password: str = Form(...)):
-    user_info = user_crud.get_userinfo_by_account(username) 
+    user_info = user_crud.get_userinfo_by_account(username)
     if user_info:
         userid, account, true_pwd, uname, create_time = user_info
     else:
         return resp_400(message="user does not exist")
 
-    if password==true_pwd:
+    if password == true_pwd:
         access_token = create_access_token(username)
         return_data = {"access_token": access_token}
         return resp_200(data=return_data)
@@ -81,30 +81,29 @@ async def get_token(username: str = Form(...), password: str = Form(...)):
 
 
 @router.get("/user")
-async def get_user(request:Request):
-   
+async def get_user(request: Request):
     token = request.headers.get("Authorization")
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username") 
+        account = decoded_payload.get("username")
         user_info = user_crud.get_userinfo_by_account(account=account)
         userid, account, true_pwd, uname, create_time = user_info
-        data = {"id":userid,"name":uname,"account":account,"create_time":create_time}
+        data = {"id": userid, "name": uname, "account": account, "create_time": create_time}
         return resp_200(data=data)
     except ExpiredSignatureError:
         return resp_401(message="The token has expired")
-    except (InvalidAlgorithmError,DecodeError):
+    except (InvalidAlgorithmError, DecodeError):
         return resp_400(message="Token decoding error")
     except Exception as e:
         return resp_400(message=f"Error in get user information.{e}")
 
 
 @router.post("/user/logout")
-async def get_token(request:Request):
+async def get_token(request: Request):
     token = request.headers.get("Authorization")
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username") 
+        account = decoded_payload.get("username")
         logger.info(f"账号:{account}注销成功")
         data = {"result": "注销成功"}
         return resp_200(data=data)
@@ -113,4 +112,4 @@ async def get_token(request:Request):
     except (InvalidAlgorithmError, DecodeError):
         return resp_400(message="Token decoding error")
     except Exception as e:
-        return resp_400(message=f"User logout error.{e}")
+        return resp_400(message=f"User logout error.{e}")

+ 3 - 1
core/respone_format.py

@@ -17,12 +17,14 @@ def resp_400(*, message: str = "Bad Request", data: Union[list, dict, str] = Non
         content={"code": 400, "message": message, "data": data}
     )
 
+
 def resp_401(*, message: str = "The token has expired", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
         status_code=status.HTTP_401_UNAUTHORIZED,
         content={"code": 401, "message": message, "data": data}
     )
 
+
 def resp_404(*, message: str = "Not Found", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
         status_code=status.HTTP_404_NOT_FOUND,
@@ -34,4 +36,4 @@ def resp_500(*, message: str = "Internal Server Error", data: Union[list, dict,
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={"code": 500, "message": message, "data": data}
-    )
+    )

+ 8 - 10
data/get_all_exchange_words.py

@@ -16,15 +16,14 @@ all_prototype_deformation_dict = {}
 prototype_deformation_dict2 = {}
 
 for i in r:
-   
-    prototype,deformation,properties= [i[0],i[1],i[2]]
-   
-    all_exchange_words.update({prototype,deformation})
+
+    prototype, deformation, properties = [i[0], i[1], i[2]]
+
+    all_exchange_words.update({prototype, deformation})
 
     if properties == "原型":
         prototype_deformation_dict2[prototype] = deformation
 
-   
     if deformation not in all_prototype_deformation_dict:
         all_prototype_deformation_dict[deformation] = prototype
 
@@ -34,14 +33,13 @@ for i in r:
         all_exchange_words_dict[prototype].append(deformation)
 
 
-
-def word_to_prototype(word:str) -> str:
+def word_to_prototype(word: str) -> str:
     """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
     if word in all_exchange_words_dict:
         return word
     elif word.lower() in all_exchange_words_dict:
         return word.lower()
-   
+
     elif word in all_prototype_deformation_dict:
         w = all_prototype_deformation_dict[word]
         if w in prototype_deformation_dict2:
@@ -53,10 +51,10 @@ def word_to_prototype(word:str) -> str:
 
 def get_word_exchange_list(word) -> list:
     prototype_word = word_to_prototype(word)
-    all_exchange_words_list = all_exchange_words_dict.get(prototype_word,[])
+    all_exchange_words_list = all_exchange_words_dict.get(prototype_word, [])
     return all_exchange_words_list
 
 
 if __name__ == '__main__':
     print(word_to_prototype("was"))
-    print(word_to_prototype("made"))
+    print(word_to_prototype("made"))

+ 5 - 6
data/get_frequency_script.py

@@ -3,16 +3,15 @@ from openpyxl.worksheet.worksheet import Worksheet
 from openpyxl import load_workbook
 import json
 
-wb = load_workbook(r"单词词义表.xlsx",read_only=True)
+wb = load_workbook(r"单词词义表.xlsx", read_only=True)
 word_dict = {}
 ws: Worksheet = wb["Sheet1"]
 for row in ws.values:
-    _,word,frequency = row
+    _, word, frequency = row
     frequency = int(frequency)
-   
+
     word_dict[frequency] = word
 wb.close()
-with open("json_word_frequency.json",mode="w",encoding="utf-8") as f:
-   
+with open("json_word_frequency.json", mode="w", encoding="utf-8") as f:
     write_data = json.dumps(word_dict)
-    f.write(write_data)
+    f.write(write_data)

Fișier diff suprimat deoarece este prea mare
+ 0 - 0
data/json_word_frequency.json


+ 13 - 17
deepseek/ds_api.py

@@ -6,8 +6,6 @@ import os
 from tools.loglog import SimpleLogger
 
 
-
-
 class DS:
     def __init__(self):
         self.client = OpenAI(
@@ -16,7 +14,7 @@ class DS:
         )
         self.logger = SimpleLogger(base_file_name="deepseek")
 
-    def write_log(self, message:str, log_type="info"):
+    def write_log(self, message: str, log_type="info"):
         """写入日志"""
         log_methods = {
             "warning": self.logger.warning,
@@ -35,9 +33,9 @@ class DS:
             self.write_log(f"Response validation error: {e}", log_type="error")
             return False
 
-    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8, 
-                   json_resp: bool = False, real_ip: str = "", demo_name: str = "", 
-                   max_tokens: int = 5192) -> str:
+    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8,
+                    json_resp: bool = False, real_ip: str = "", demo_name: str = "",
+                    max_tokens: int = 5192) -> str:
         """获取AI生成的文章
         
         Args:
@@ -56,29 +54,27 @@ class DS:
         if sys_prompt:
             messages.append({'role': 'system', 'content': sys_prompt})
         messages.append({'role': 'user', 'content': user_prompt})
-        
+
         response_format = {"type": "json_object"} if json_resp else {"type": "text"}
-        
-       
+
         resp = ""
         for _ in range(3):
             completion = self.client.chat.completions.create(
-                model="deepseek-v3", 
+                model="deepseek-v3",
                 messages=messages,
                 temperature=temperature,
                 response_format=response_format,
-                max_tokens=max_tokens 
+                max_tokens=max_tokens
             )
             resp = completion.choices[0].message.content
             if self.check_article_response(resp):
                 break
-        
-       
+
         if sys_prompt and resp:
             self.write_log(sys_prompt)
         self.write_log(user_prompt)
         self.write_log(resp)
-        
+
         return resp
 
 
@@ -100,11 +96,11 @@ if __name__ == '__main__':
 
 """
     ds = DS()
-    resp = ds.get_article(user_prompt=p,json_resp=True)
+    resp = ds.get_article(user_prompt=p, json_resp=True)
     print(resp)
     print()
 
-    print(resp.replace(r'\"n','\n').replace(r"\\n",'\n'))
+    print(resp.replace(r'\"n', '\n').replace(r"\\n", '\n'))
     print()
 
-    print(json.loads(resp))
+    print(json.loads(resp))

+ 25 - 37
deepseek/get_article3.py

@@ -43,11 +43,14 @@ def find_interval(number):
     :param number: 要检查的数字。
     :return: 返回包含该数字的区间,如果没有找到,则返回 None。
     """
-    intervals = [(1, 200), (201, 250), (251, 300), (301, 350), (351, 400), (401, 450), (451, 550), (551, 650), (651, 750), (751, 850), (851, 950),
+    intervals = [(1, 200), (201, 250), (251, 300), (301, 350), (351, 400), (401, 450), (451, 550), (551, 650), (651, 750), (751, 850),
+                 (851, 950),
                  (951, 1100),
-                 (1101, 1250), (1251, 1400), (1401, 1550), (1551, 1700), (1701, 1900), (1901, 2100), (2101, 2300), (2301, 2600), (2601, 2900),
+                 (1101, 1250), (1251, 1400), (1401, 1550), (1551, 1700), (1701, 1900), (1901, 2100), (2101, 2300), (2301, 2600),
+                 (2601, 2900),
                  (2901, 3200),
-                 (3201, 3500), (3501, 3900), (3901, 4300), (4301, 4700), (4701, 5100), (5101, 5500), (5501, 5900), (5901, 6500), (6501, 99999)]
+                 (3201, 3500), (3501, 3900), (3901, 4300), (4301, 4700), (4701, 5100), (5101, 5500), (5501, 5900), (5901, 6500),
+                 (6501, 99999)]
     for index, (start, end) in enumerate(intervals, start=1):
         if start <= number <= end:
             return index
@@ -58,23 +61,20 @@ def find_interval(number):
 def parse_question(question_block):
     question_info = {}
 
-   
     question_match = re.search(r'问题:\s*(.*)', question_block)
     if question_match:
         question_info['trunk'] = question_match.group(1).strip()
 
-   
     analysis_match = re.search(r'解析:\s*(.*)', question_block)
     if analysis_match:
         question_info['analysis'] = analysis_match.group(1).strip()
 
-   
     options_match = re.search(r'选项:(.*)', question_block)
     if options_match:
         options_text = options_match.group(1).strip()
         options_list = re.split(r'\s*[BCDA]\.\s*', options_text)[1:]
         candidates = []
-        for i, option_text in enumerate(options_list, start=65): 
+        for i, option_text in enumerate(options_list, start=65):
             label = chr(i)
             text = option_text.strip()
             candidates.append({
@@ -84,7 +84,6 @@ def parse_question(question_block):
             })
         question_info['candidates'] = candidates
 
-   
     answer_match = re.search(r'答案:([ABCD])', question_block)
     if answer_match and 'candidates' in question_info:
         correct_label = answer_match.group(1)
@@ -101,27 +100,23 @@ class GetArticle:
         self.ds = DS()
 
         self.callback_url_dict = {}
-        self.real_ip_dict = {} 
+        self.real_ip_dict = {}
         self.demo_name = {}
 
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
-
-   
     def parser_insert_to_mysql(self, resp_result):
         for single_article in resp_result['articles']:
-           
+
             article = single_article['body']
             article_json = json.dumps(single_article)
-            difficult_value = find_interval(get_article_difficulty(article)) 
+            difficult_value = find_interval(get_article_difficulty(article))
             if not difficult_value:
                 logger.error("文章难度等级为0;")
             sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
             self.m.execute_(sql, (article_json, difficult_value))
 
-   
     def submit_task(self, words_meaning_list: list, take_count: int, student_stage: int, real_ip: str, demo_name: str):
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
@@ -130,7 +125,7 @@ class GetArticle:
         demo_name: 项目名称
         """
         task_id = randint(10000000, 99999999)
-       
+
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
         logger.info(f"生成文章id。task_id:{task_id}。词义组:{words_meaning_str}.")
 
@@ -138,9 +133,9 @@ class GetArticle:
         self.demo_name[task_id] = demo_name
 
         try:
-           
+
             resp_result = self.run_task(words_meaning_list, task_id, take_count, student_stage)
-            self.parser_insert_to_mysql(resp_result) 
+            self.parser_insert_to_mysql(resp_result)
             return resp_result
         except Exception as e:
             err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
@@ -148,31 +143,28 @@ class GetArticle:
 
             return err_msg
 
-   
     def get_article(self, words_meaning_list, student_stage, task_id, take_count) -> dict:
         diffculty_control = {
             1: {"grade": "小学", "article_word_count": 60, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": 1,
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
-                "choice_desc":"选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
+                "choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
             2: {"grade": "初中", "article_word_count": 200, "desc_difficulty": "简单、常见、难度低", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
-                "choice_desc":"选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
+                "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
             3: {"grade": "高中", "article_word_count": 300, "desc_difficulty": "常见、高中难度的", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-4,雅思5分这样的难度标准。",
-                "choice_desc":"选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
+                "choice_desc": "选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
         }
         take_count_dict = {0: "", 1: "一", 2: "二", 3: "三", 4: "四", 5: "五", 6: "六", 7: "七", 8: "八"}
         different_cou = take_count_dict.get(take_count, "")
 
-        grade = diffculty_control[student_stage]["grade"] 
-        select_word_count = diffculty_control[student_stage]["article_word_count"] 
-        select_diffculty = diffculty_control[student_stage]["desc_difficulty"] 
-        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"] 
-        desc2 = diffculty_control[student_stage]["desc2"] 
-        choice_desc = diffculty_control[student_stage]["choice_desc"] 
-       
+        grade = diffculty_control[student_stage]["grade"]
+        select_word_count = diffculty_control[student_stage]["article_word_count"]
+        select_diffculty = diffculty_control[student_stage]["desc_difficulty"]
+        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"]
+        desc2 = diffculty_control[student_stage]["desc2"]
+        choice_desc = diffculty_control[student_stage]["choice_desc"]
 
-       
         shuffle(words_meaning_list)
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
 
@@ -195,9 +187,9 @@ class GetArticle:
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name,max_tokens=8000))
+            r_json = json.loads(
+                self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name, max_tokens=8000))
 
-           
             r_json["body"] = r_json["title"] + "\n\n" + r_json["english"]
             del r_json["title"]
 
@@ -219,12 +211,11 @@ class GetArticle:
 
             resp_text = self.ds.get_article(q_choice_question, temperature=1, real_ip=real_ip, demo_name=demo_name, max_tokens=8000)
             questions = resp_text.strip().split('\n\n')
-           
+
             parsed_questions = [parse_question(q) for q in questions]
 
             json_data = {"questions": parsed_questions}
 
-           
             allWordAmount = 0
             allWordAmount += len(split_text_to_word(r_json["english"]))
             for i in json_data["questions"]:
@@ -239,8 +230,6 @@ class GetArticle:
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
-
-   
     def run_get_article_task(self, words_meaning_list, task_id, take_count, student_stage) -> dict:
         """
         :param words_meaning_list: 数据库内查出来的单词和词义的列表
@@ -258,7 +247,6 @@ class GetArticle:
             return_json["articles"].append(t.result())
         return return_json
 
-   
     def run_task(self, words_meaning_list, task_id, take_count, student_stage):
         try:
             outside_json = self.run_get_article_task(words_meaning_list, task_id, take_count, student_stage)

+ 53 - 105
gpt/chatgpt.py

@@ -4,14 +4,15 @@ if __name__ == '__main__':
 
     os.chdir("..")
 
-import time
-from typing import Dict, Any, Union
-
-import httpx
 import requests
-
+import random
+import json
+import time
 from tools.loglog import logger, simple_logger, log_err_e
 from tools.new_mysql import MySQLUploader
+from typing import Optional, Dict, Any, Union
+import httpx
+import asyncio
 
 m = MySQLUploader()
 
@@ -94,7 +95,7 @@ def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="g
 
 
 def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", model="gpt-4.1", max_tokens=3500, temperature: float = 0, n=1,
-                             check_fucn=None, sys_prompt=None):
+                             check_fucn=None, sys_prompt=None, task_id=0, exercise_id=0):
     """
     异步获取文章
     :param question: 问题
@@ -106,96 +107,36 @@ def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", mod
     :param n: 生成数量
     :param check_fucn: 校验函数
     :param sys_prompt: 系统提示
+    :param task_id: 任务id
+    :param exercise_id: 学案id
+
     :return: 文章内容
     """
 
-    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "n": n, "response_format": {'type': 'json_schema',
-                                                                                                                            'json_schema': {
-                                                                                                                                'name': 'Article',
-                                                                                                                                'schema': {'$defs': {
-                                                                                                                                    'Candidate': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'label': {
-                                                                                                                                                'title': 'Label',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'text': {
-                                                                                                                                                'title': 'Text',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'isRight': {
-                                                                                                                                                'title': 'Isright',
-                                                                                                                                                'type': 'integer'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'label',
-                                                                                                                                            'text',
-                                                                                                                                            'isRight'],
-                                                                                                                                        'title': 'Candidate',
-                                                                                                                                        'type': 'object'},
-                                                                                                                                    'DifficultSentence': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'english': {
-                                                                                                                                                'title': 'English',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'chinese': {
-                                                                                                                                                'title': 'Chinese',
-                                                                                                                                                'type': 'string'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'english',
-                                                                                                                                            'chinese'],
-                                                                                                                                        'title': 'DifficultSentence',
-                                                                                                                                        'type': 'object'},
-                                                                                                                                    'Question': {
-                                                                                                                                        'properties': {
-                                                                                                                                            'trunk': {
-                                                                                                                                                'title': 'Trunk',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'analysis': {
-                                                                                                                                                'title': 'Analysis',
-                                                                                                                                                'type': 'string'},
-                                                                                                                                            'candidates': {
-                                                                                                                                                'items': {
-                                                                                                                                                    '$ref': '#/$defs/Candidate'},
-                                                                                                                                                'title': 'Candidates',
-                                                                                                                                                'type': 'array'}},
-                                                                                                                                        'required': [
-                                                                                                                                            'trunk',
-                                                                                                                                            'analysis',
-                                                                                                                                            'candidates'],
-                                                                                                                                        'title': 'Question',
-                                                                                                                                        'type': 'object'}},
-                                                                                                                                           'properties': {
-                                                                                                                                               'difficultSentences': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       '$ref': '#/$defs/DifficultSentence'},
-                                                                                                                                                   'title': 'Difficultsentences',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'usedMeanIds': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       'type': 'integer'},
-                                                                                                                                                   'title': 'Usedmeanids',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'questions': {
-                                                                                                                                                   'items': {
-                                                                                                                                                       '$ref': '#/$defs/Question'},
-                                                                                                                                                   'title': 'Questions',
-                                                                                                                                                   'type': 'array'},
-                                                                                                                                               'englishArticle': {
-                                                                                                                                                   'title': 'Englisharticle',
-                                                                                                                                                   'type': 'string'},
-                                                                                                                                               'chineseArticle': {
-                                                                                                                                                   'title': 'Chinesearticle',
-                                                                                                                                                   'type': 'string'},
-                                                                                                                                               'allWordAmount': {
-                                                                                                                                                   'title': 'Allwordamount',
-                                                                                                                                                   'type': 'integer'}},
-                                                                                                                                           'required': [
-                                                                                                                                               'difficultSentences',
-                                                                                                                                               'usedMeanIds',
-                                                                                                                                               'questions',
-                                                                                                                                               'englishArticle',
-                                                                                                                                               'chineseArticle',
-                                                                                                                                               'allWordAmount'],
-                                                                                                                                           'title': 'Article',
-                                                                                                                                           'type': 'object'}}}}
+    d2 = {"model": model, "messages": [], "max_tokens": max_tokens, "temperature": temperature, "n": n,
+          "response_format": {"type": "json_schema", 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {
+              'properties': {'label': {'description': 'ABCD序号的一种', 'title': '序号', 'type': 'string'},
+                             'text': {'description': '英文,ABCD选项的文本', 'title': '选项文本', 'type': 'string'},
+                             'isRight': {'description': '1是正确,0是错误', 'title': '是否是正确答案', 'type': 'integer'}},
+              'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {
+              'properties': {'english': {'description': '文章中的一句难句', 'title': '英语难句', 'type': 'string'},
+                             'chinese': {'description': '对英语难句的翻译', 'title': '中文难句', 'type': 'string'}},
+              'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {
+              'properties': {'trunk': {'description': '用英语给出的选择题题目', 'title': '选择题题目', 'type': 'string'},
+                             'analysis': {'description': '中文,选择题答案的分析思路', 'title': '选择题分析', 'type': 'string'},
+                             'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': '选项对象', 'type': 'array'}},
+              'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {
+              'difficultSentences': {'description': '挑选一句难句对象', 'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': '难句对象',
+                                     'type': 'array'},
+              'usedMeanIds': {'items': {'type': 'integer'}, 'title': '用到的词义id', 'type': 'array'},
+              'questions': {'description': '针对英语文章的选择题', 'items': {'$ref': '#/$defs/Question'}, 'title': '问题对象',
+                            'type': 'array'}, 'englishArticle': {'description': '', 'title': '英语文章', 'type': 'string'},
+              'chineseArticle': {'description': '', 'title': '中文翻译', 'type': 'string'}}, 'required': ['difficultSentences',
+                                                                                                          'usedMeanIds', 'questions',
+                                                                                                          'englishArticle',
+                                                                                                          'chineseArticle'],
+                                                                                                   'title': 'Article', 'type': 'object'}}}
+          }
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
@@ -204,7 +145,7 @@ def get_article_gpt_pydantic(question, real_ip="localhost", demo_name="无", mod
         try:
             response = requests.post('http://170.106.108.95/v1/chat/completions', json=d2)
             r_json = response.json()
-            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{r_json}")
+            simple_logger.info(f"问题日志task_id:{task_id},exercise_id:{exercise_id}:\n{question}\n回答日志:\n{r_json}")
             return r_json
 
             #
@@ -246,21 +187,28 @@ def parse_gpt_phon_to_tuplelist(text: str) -> list:
 
 
 if __name__ == '__main__':
-    question = "hello"
 
-    sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
-    q = """下面我会为你提供两组数据,[单词组1]和[单词组2](里面包含词义id,英语单词,中文词义),优先使用[单词组1]内的单词,请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
+    def get_article():
+        """可以在这里测试提示词的好坏"""
 
+        sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。注意:生成的文章用到提供的单词,其词义一定要是提供的中文词义,不要使用同个单词,与提供词义不同的单词。例如:单词might的词义可能有[1.可能 2.强大力量]两种词义,不要混用同个单词的词义,一定要按照提供词义来使用。"
+        q = """下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
+    
 要求:
-1.必须用提供的这个词义的单词,其他单词使用常见、高中难度的的单词。文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。
+1.必须用提供的这个词义的单词,其他单词使用最简单最容易没有难度的单词。文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。选择题难度尽可能简单,参考中国小学生水平
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求600词左右,可以用\\n\\n字符分段,一般5个段落左右。第一段是文章标题。
-5.生成文章优先使用[单词组1]的词义,其次可以挑选使用[单词组2]的词义。允许不使用[单词组1]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
+4.生成的文章要求70词左右,可以用\\n\\n字符分段,一般1-2个段落左右。第一段是文章标题。
+5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
+6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
-提供[单词组1]:4238 penalty:惩罚, 刑罚;4591 bare:赤裸的, 无遮蔽的;4227 stable:畜舍, 马厩;4236 psychology:心理学;4245 offense:进攻, 攻势, 冒犯, 触怒, 过错;4237 innocent:清白的, 无辜的, 天真的;4228 refrigerator:冰箱, 冷库;4247 tissue:(动植物)组织;4250 awareness:察觉, 觉悟, 意识;4234 mode:方式, 模式;4224 neat:整洁, 利索;4225 statistics:统计;4251 random:任意的, 随机的;4201 laundry:洗衣房;4545 barrel:桶, 一桶之量;4249 recruit:招募, 新成员;4229 pregnant:怀孕的, 孕育的;4235 relevant:有关的, 相关联的;4252 incentive:刺激, 激励, 鼓励;4194 grave:坟墓, 墓穴;
-提供[单词组2]:;
+提供[单词组]:[260  important 重要的]; [287  off 离开, 从…下去]; [74  get 变得]; [251  change 零钱]; [219  put 放]; [212  feel 触, 摸]; [239  few 很少的, 几乎没有的]; [262  still 仍然]; [283  country 国家]; [270  part 部分, 部件];
 """
-    resp = get_answer_from_gpt(question=question, temperature=0.9, sys_prompt=sys_prompt, model="gpt-4.1")
-    print(type(resp))
-    print(resp)
+        gpt_resp = get_article_gpt_pydantic(question=q, temperature=1.2, sys_prompt=sys_prompt, model="gpt-4.1", task_id=888,
+                                            exercise_id=999, n=4)
+
+        return_json = {"articles": []}
+        for choice in gpt_resp["choices"]:
+            single_article_dict = json.loads(choice["message"]["content"])
+            return_json["articles"].append(single_article_dict)
+        return return_json

+ 100 - 125
gpt/get_article.py

@@ -7,7 +7,7 @@ from tools.loglog import logger
 from tools.thread_pool_manager import pool_executor
 from common.common_data import all_exchange_words
 from common.split_text import *
-from data.get_all_exchange_words import get_word_exchange_list,word_to_prototype
+from data.get_all_exchange_words import get_word_exchange_list, word_to_prototype
 
 import requests
 import oss2
@@ -25,13 +25,13 @@ import traceback
 class OtherBaseFunction:
     def __init__(self):
         self.m = MySQLUploader()
-        self.fake_meaningid = {} 
+        self.fake_meaningid = {}
 
-        self.callback_url_dict = {} 
-        self.real_ip_dict = {} 
-        self.demo_name = {} 
-        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400) 
-        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400) 
+        self.callback_url_dict = {}
+        self.real_ip_dict = {}
+        self.demo_name = {}
+        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400)
+        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400)
 
     @staticmethod
     def _diffculty_control(student_stage, vocabulary) -> dict:
@@ -42,17 +42,17 @@ class OtherBaseFunction:
         :return:
         """
         if vocabulary <= 1200:
-            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1,"student_stage_str":"小学",
+            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1,
+                                 "student_stage_str": "小学",
                                  "pragrapg_count": "生成的文章要求100词左右,三个段落以上。允许有简单句式的出现。"}
         elif 1200 < vocabulary <= 2400:
-            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3,"student_stage_str":"初中",
+            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3, "student_stage_str": "初中",
                                  "pragrapg_count": r"生成的文章要求150词左右,三个段落以上。用\n\n分段。"}
         else:
-            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5,"student_stage_str":"高中",
+            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5, "student_stage_str": "高中",
                                  "pragrapg_count": r"生成的文章要求250词左右,允许有3-5个段落。用\n\n分段。"}
         return difficult_control
 
-   
     def _get_article_chinese_dict(self, title, r_article_sentences, task_id):
         """
         获取文章的中文翻译。注意:这里切割的方法要与后面的split_article_make_json一致
@@ -86,41 +86,37 @@ class OtherBaseFunction:
 
             logger.critical("严重错误:gpt生成文章中文翻译三次全错,请管理员检查")
 
-       
         article_list = [title + "\n\n"] + r_article_sentences
 
-       
         r_article_chinese_dict = get_chinese_from_gpt(whole_article_sentences=article_list)
-       
+
         if r_article_chinese_dict:
             return r_article_chinese_dict
 
-   
     @staticmethod
     def _calculate_new_word_rate(r_article_sentences):
         article = "".join(r_article_sentences)
-        new_words = set() 
+        new_words = set()
         test_article = re.findall(r'\b\w+\'?\w*\b', article)
         for word in test_article:
             word2: str = word.split("'")[0] if "'" in word else word
-            if len(word) <= 2: 
+            if len(word) <= 2:
                 continue
             is_in_12000words = any([word2.lower() in all_exchange_words, word2.title() in all_exchange_words])
             if not is_in_12000words:
                 new_words.add(word)
         new_word_rate = round(len(new_words) / len(article), 3)
         logger.info(f"开发调试生词率{new_word_rate}.生词{new_words}")
-       
+
         new_words = list(new_words)
         return new_word_rate, new_words
 
-   
     def insert_article_to_mysql(self, title, article, chinese, task_id, code=0):
-       
+
         self.m.execute_("INSERT INTO new_word_article (title,article,chinese, taskId,code) VALUES (%s, %s,%s,%s,%s)",
                         (title, article, chinese, task_id, code))
 
-    def get_wordid_by_wordspelling(self, wordspelling:str):
+    def get_wordid_by_wordspelling(self, wordspelling: str):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if wordspelling in self.query_cache_meaningid:
             return self.query_cache_wordspelling[wordspelling]
@@ -129,16 +125,16 @@ class OtherBaseFunction:
         prototype_word = word_to_prototype(wordspelling)
         r = self.m.query_data(s, (prototype_word,))
         if r:
-           
+
             wordid = r[0][0]
         else:
-           
+
             wordid = 0
 
         self.query_cache_wordspelling[wordspelling] = wordid
         return wordid
 
-    def get_meaning_by_meaningid(self, meaningid:int):
+    def get_meaning_by_meaningid(self, meaningid: int):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if meaningid in self.query_cache_meaningid:
             return self.query_cache_meaningid[meaningid]
@@ -149,7 +145,7 @@ class OtherBaseFunction:
         self.query_cache_meaningid[meaningid] = meaning
         return meaning
 
-    def _get_fake_meaningid(self,word):
+    def _get_fake_meaningid(self, word):
         """获得假词义id。但是保证同一个单词是一个id"""
         if word in self.fake_meaningid:
             return self.fake_meaningid[word]
@@ -158,33 +154,31 @@ class OtherBaseFunction:
         if r:
             fake_meaningid = r[0][0]
         else:
-            fake_meaningid = random.randint(10000,99999) 
+            fake_meaningid = random.randint(10000, 99999)
 
         self.fake_meaningid[word] = fake_meaningid
         return fake_meaningid
 
-   
     @staticmethod
-    def _clean_gpt_res(single_sentence: str, gpt_text: str,split_words:list) -> list:
+    def _clean_gpt_res(single_sentence: str, gpt_text: str, split_words: list) -> list:
         """# 解析成  键是句子+单词拼写,值是词义id"""
         return_data = []
         if not gpt_text:
             return []
 
-        row_data = [i for i in gpt_text.split("\n") if "**" in i] 
+        row_data = [i for i in gpt_text.split("\n") if "**" in i]
 
         already_spelling = set()
         for row in row_data:
             one_row_data_list = row.split("**")
-            if len(one_row_data_list) < 1: 
+            if len(one_row_data_list) < 1:
                 continue
-            one_row_data_list = [i.strip() for i in one_row_data_list] 
+            one_row_data_list = [i.strip() for i in one_row_data_list]
             spelling, meaning_id = one_row_data_list[0:2]
 
             already_spelling.add(spelling)
             return_data.append([single_sentence, spelling, int(meaning_id)])
 
-       
         for remaining_word in set(split_words).difference(already_spelling):
             return_data.append([single_sentence, remaining_word, 0])
 
@@ -197,17 +191,16 @@ class GetArticle(OtherBaseFunction):
         self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
         self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', 'qingti-private')
 
-        self.article_result = {} 
+        self.article_result = {}
 
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
-    def __del__(self):...
+    def __del__(self):
+        ...
 
-   
-    def submit_task(self, words_meaning_ids: list[int],callback_url:str,real_ip:str,demo_name:str,
-                    student_stage:int,vocabulary:int,class_id:int):
+    def submit_task(self, words_meaning_ids: list[int], callback_url: str, real_ip: str, demo_name: str,
+                    student_stage: int, vocabulary: int, class_id: int):
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
         callback_url: 通知的回调地址
@@ -218,14 +211,13 @@ class GetArticle(OtherBaseFunction):
         task_id = randint(10000000, 99999999)
         logger.info(f"生成文章id。task_id:{task_id}。词义id:{words_meaning_ids}.")
 
-       
         self.callback_url_dict[task_id] = callback_url
         self.real_ip_dict[task_id] = real_ip
         self.demo_name[task_id] = demo_name
 
         words_meaning_str = ""
         for wordmeaning_id in words_meaning_ids:
-            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s",(wordmeaning_id,))
+            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s", (wordmeaning_id,))
             try:
                 words_meaning_str += str(r[0])
             except IndexError:
@@ -234,10 +226,10 @@ class GetArticle(OtherBaseFunction):
                 return err_msg
 
         try:
-           
-            pool_executor.submit(self.run_task, words_meaning_str, task_id,student_stage,vocabulary,class_id)
-           
-            resp_result = {"id":task_id,"key":f"study/article/{task_id}"}
+
+            pool_executor.submit(self.run_task, words_meaning_str, task_id, student_stage, vocabulary, class_id)
+
+            resp_result = {"id": task_id, "key": f"study/article/{task_id}"}
             logger.success(f"文章生成任务提交成功:{resp_result}")
             return resp_result
         except Exception as e:
@@ -245,9 +237,8 @@ class GetArticle(OtherBaseFunction):
             logger.error(err_msg)
             return err_msg
 
-   
-    def __get_article(self,words_meaning_str,task_id,student_stage,vocabulary) -> tuple:
-        dc = self._diffculty_control(student_stage,vocabulary)
+    def __get_article(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
+        dc = self._diffculty_control(student_stage, vocabulary)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一些带中文词义的英语种子单词,请根据这些种子单词的词义,生成一篇带标题的英语文章。
 提供种子单词:{words_meaning_str}
 
@@ -261,17 +252,16 @@ class GetArticle(OtherBaseFunction):
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
             r_article_sentences = r_json.get("article_sentences")
             r_title = r_json.get("title")
-            return r_title,r_article_sentences
+            return r_title, r_article_sentences
         except json.decoder.JSONDecodeError:
             logger.error("gpt生成文章回复json格式化错误")
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
-   
-    def __replace_new_word(self, old_article: str, new_words: list,task_id:int):
+    def __replace_new_word(self, old_article: str, new_words: list, task_id: int):
         new_words_str = ",".join(new_words)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一篇英语文章和一些生词,请用其他单词使用简单、常见、难度低的单词将英语文章中的生词进行替换。
 缩写引号用单引号'。最终回复替换后的英语文章。
@@ -287,7 +277,7 @@ class GetArticle(OtherBaseFunction):
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
             print(f"调试信息2 {r_json}")
             r_article = r_json.get("article")
             r_title = r_json.get("title")
@@ -297,8 +287,7 @@ class GetArticle(OtherBaseFunction):
         except Exception as e:
             logger.error(f"gpt替换生词文章回复其他错误.{type(e).__name__} {e}")
 
-   
-    def run_get_article_task(self, words_meaning_str, task_id,student_stage,vocabulary) -> tuple:
+    def run_get_article_task(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
         """
         :param vocabulary:
         :param student_stage:
@@ -307,47 +296,40 @@ class GetArticle(OtherBaseFunction):
         :return: 标题,文章,句子翻译的字典
         """
 
-        def get_article_chinese(title,r_article_sentences,task_id,code=0)-> tuple:
+        def get_article_chinese(title, r_article_sentences, task_id, code=0) -> tuple:
             r_article_chinese_dict = self._get_article_chinese_dict(title, r_article_sentences, task_id)
             chinese_str = "\n".join(r_article_chinese_dict.values())
             r_article = "".join(r_article_sentences)
 
-            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id,code=code)
+            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id, code=code)
             return r_title, r_article_sentences, r_article_chinese_dict
 
-       
-        r_title,r_article_sentences = self.__get_article(words_meaning_str,task_id,student_stage,vocabulary)
+        r_title, r_article_sentences = self.__get_article(words_meaning_str, task_id, student_stage, vocabulary)
 
         new_word_rate, new_words = self._calculate_new_word_rate(r_article_sentences)
         if new_word_rate < 0.03:
             return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
 
-       
         replace_article_gpt = "".join(r_article_sentences)
         for i in range(3):
-            if tuple_data:=self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words,task_id=task_id):
-                r_title,replace_article_gpt = tuple_data
+            if tuple_data := self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words, task_id=task_id):
+                r_title, replace_article_gpt = tuple_data
 
                 new_word_rate, new_words = self._calculate_new_word_rate(replace_article_gpt)
                 if new_word_rate < 0.03 or i == 2:
                     if i == 2:
                         logger.warning(f"3次后生词率未到3%以下。task_id:{task_id}")
-                    return get_article_chinese(title=r_title,r_article_sentences=r_article_sentences,task_id=task_id)
+                    return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
 
-   
-    def split_article_make_json(self, task_id: int,title:str, r_article_sentences: list,r_article_chinese_dict:dict):
+    def split_article_make_json(self, task_id: int, title: str, r_article_sentences: list, r_article_chinese_dict: dict):
 
-       
         article = "".join(r_article_sentences)
         article = title + "\n\n" + article
 
-       
-        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article,task_id)
+        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article, task_id)
 
-       
-        word_count = get_article_words_count(title+article)
+        word_count = get_article_words_count(title + article)
 
-       
         create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         outside_json_dict = {"id": task_id, "body": article, "wordCount": word_count, "paragraphs": [],
                              "createTime": create_time}
@@ -355,17 +337,16 @@ class GetArticle(OtherBaseFunction):
         article_paragraphs = article.split("\n\n")
         article_sentence_count = 0
         for paragraph in article_paragraphs:
-            sentences = split_text_to_sentences(paragraph) 
+            sentences = split_text_to_sentences(paragraph)
 
             p = {"sentences": []}
             for single_sentence in sentences:
                 article_sentence_count += 1
-                single_sentence_chinese = r_article_chinese_dict.get(single_sentence,"")
-               
-                w = {"words": [],"chinese":single_sentence_chinese}
-                split_words:list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence) 
+                single_sentence_chinese = r_article_chinese_dict.get(single_sentence, "")
+
+                w = {"words": [], "chinese": single_sentence_chinese}
+                split_words: list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence)
 
-               
                 for originale_word in split_words:
                     single_word = originale_word
                     if not originale_word:
@@ -374,26 +355,24 @@ class GetArticle(OtherBaseFunction):
                         w["words"].append({"spell": originale_word, "type": "punctuation"})
                         continue
 
-                   
                     word_id = self.get_wordid_by_wordspelling(originale_word)
 
-                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0,0])
-                    if type_ == 0: 
+                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0, 0])
+                    if type_ == 0:
                         single_word = originale_word.lower()
-                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0,0])
-                        if type_ == 0: 
+                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
+                        if type_ == 0:
                             single_word = word_to_prototype(single_word)
-                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word,[0,0])
+                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
 
                     if type_ == 0:
                         logger.warning(f"警告:type_还是0,那就是二次查询时,也没有给词义。有漏下的单词{originale_word}")
                         continue
 
-                   
-                    if type_ == 1: 
+                    if type_ == 1:
                         meaning_id = x_data
                         meaning = self.get_meaning_by_meaningid(x_data)
-                    elif type_ == 2: 
+                    elif type_ == 2:
                         meaning_id = self._get_fake_meaningid(single_word)
                         meaning = x_data
                     else:
@@ -401,11 +380,10 @@ class GetArticle(OtherBaseFunction):
                         meaning_id = 9999999
                         meaning = '无'
 
-                   
                     word_prototype = word_to_prototype(originale_word)
 
-                    word_json = {"id": word_id, "meaningId": meaning_id,"meaning":meaning, "spell": originale_word,
-                                 "exchanges": get_word_exchange_list(word=single_word),"prototype": word_prototype}
+                    word_json = {"id": word_id, "meaningId": meaning_id, "meaning": meaning, "spell": originale_word,
+                                 "exchanges": get_word_exchange_list(word=single_word), "prototype": word_prototype}
                     w["words"].append(word_json)
 
                 p["sentences"].append(w)
@@ -413,10 +391,9 @@ class GetArticle(OtherBaseFunction):
             outside_json_dict["paragraphs"].append(p)
 
         outside_json_dict["articleSentenceCount"] = article_sentence_count
-        return outside_json_dict,word_count,article_sentence_count
+        return outside_json_dict, word_count, article_sentence_count
 
-   
-    def run_query_word_meaning(self, article,task_id):
+    def run_query_word_meaning(self, article, task_id):
         futures = []
         article_paragraphs = article.split("\n\n")
 
@@ -424,21 +401,20 @@ class GetArticle(OtherBaseFunction):
             sentences = split_text_to_sentences(paragraph)
 
             for single_sentence in sentences:
-                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence,task_id)
+                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence, task_id)
                 futures.append(f)
 
         wait(futures)
         all_sentence_word_meaningid_dict = {}
         for f in futures:
-            f_result = f.result() 
+            f_result = f.result()
             all_sentence_word_meaningid_dict.update(f_result)
         return all_sentence_word_meaningid_dict
 
-   
-    def query_word_meaning_from_gpt(self, single_sentence,task_id) -> dict:
+    def query_word_meaning_from_gpt(self, single_sentence, task_id) -> dict:
         """single_sentence 提交单个句子"""
         split_words = split_text_to_word(single_sentence)
-       
+
         split_words = [word_to_prototype(w) for w in split_words if w]
 
         placeholders = ', '.join(['%s'] * len(split_words))
@@ -476,20 +452,19 @@ apple ** 234567
 
         real_ip = self.real_ip_dict[task_id]
         demo_name = self.demo_name[task_id]
-        r_gpt = get_answer_from_gpt(q,real_ip=real_ip,demo_name=demo_name)
+        r_gpt = get_answer_from_gpt(q, real_ip=real_ip, demo_name=demo_name)
 
-       
-        already_data,need_twice_data = {},[]
+        already_data, need_twice_data = {}, []
 
-        three_list = self._clean_gpt_res(single_sentence, r_gpt,split_words)
+        three_list = self._clean_gpt_res(single_sentence, r_gpt, split_words)
 
         for sentence, spelling, meaning_id in three_list:
-           
+
             if meaning_id == 0:
                 need_twice_data.append([sentence, spelling, meaning_id])
             else:
-               
-                already_data[sentence + spelling] = [meaning_id,1]
+
+                already_data[sentence + spelling] = [meaning_id, 1]
 
         for _, spelling, _ in need_twice_data:
             need_twice_words = ",".join([spelling])
@@ -504,18 +479,17 @@ apple ** 234567
     回复示例:
     {{"单词":"中文词义",...}}
     """
-            r2 = get_answer_from_gpt(q2,real_ip=real_ip,demo_name=demo_name,json_resp=True)
-            r2_json:dict = json.loads(r2)
-            for w_spelling,chinese_meaning in r2_json.items():
-                already_data[single_sentence + w_spelling] = [chinese_meaning,2]
+            r2 = get_answer_from_gpt(q2, real_ip=real_ip, demo_name=demo_name, json_resp=True)
+            r2_json: dict = json.loads(r2)
+            for w_spelling, chinese_meaning in r2_json.items():
+                already_data[single_sentence + w_spelling] = [chinese_meaning, 2]
 
         return already_data
 
-   
-    def upload_json_file_to_oss(self,article_id:int,data_dict:dict):
+    def upload_json_file_to_oss(self, article_id: int, data_dict: dict):
         json_data = json.dumps(data_dict, ensure_ascii=False)
         object_name = f'study/article/{article_id}'
-        content = json_data.encode('utf-8') 
+        content = json_data.encode('utf-8')
         for _ in range(2):
             try:
                 r = self.bucket.put_object(object_name, content)
@@ -529,18 +503,18 @@ apple ** 234567
         else:
             logger.critical(f"2次上传oss错误,taskid:{article_id}")
 
-   
-    def notice_teach_system(self,article_id:int,class_id:int,word_count:int,article_sentence_count:int):
+    def notice_teach_system(self, article_id: int, class_id: int, word_count: int, article_sentence_count: int):
         url = self.callback_url_dict.get(article_id)
         if not url or "localhost/callback" in url:
             return False
 
-        json_data = {"classId": class_id,"articleId": article_id,"articleWordCount": word_count,"articleSentenceCount": article_sentence_count}
+        json_data = {"classId": class_id, "articleId": article_id, "articleWordCount": word_count,
+                     "articleSentenceCount": article_sentence_count}
         for _ in range(3):
             try:
-                r = requests.post(url,json=json_data)
+                r = requests.post(url, json=json_data)
                 r.raise_for_status()
-                self.callback_url_dict.pop(article_id,'')
+                self.callback_url_dict.pop(article_id, '')
                 logger.success(f"通知成功{r.text}")
                 return True
             except Exception as e:
@@ -548,23 +522,24 @@ apple ** 234567
 
         logger.critical(f"通知接口失败,三次全错. article_id:{article_id} callback_url:{url}")
 
-   
-    def clean_source(self,article_id):
+    def clean_source(self, article_id):
         self.callback_url_dict.pop(article_id, '')
         self.real_ip_dict.pop(article_id, '')
 
-   
-    def run_task(self,words_meaning_str, task_id,student_stage,vocabulary,class_id):
+    def run_task(self, words_meaning_str, task_id, student_stage, vocabulary, class_id):
         try:
-            title,r_article_sentences,r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id,student_stage,vocabulary)
-
-            outside_json_dict,word_count,article_sentence_count = self.split_article_make_json(task_id,title,r_article_sentences,r_article_chinese_dict)
-            self.upload_json_file_to_oss(article_id=task_id,data_dict=outside_json_dict)
-            self.notice_teach_system(article_id=task_id,class_id=class_id,word_count=word_count,article_sentence_count=article_sentence_count)
+            title, r_article_sentences, r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id, student_stage,
+                                                                                           vocabulary)
+
+            outside_json_dict, word_count, article_sentence_count = self.split_article_make_json(task_id, title, r_article_sentences,
+                                                                                                 r_article_chinese_dict)
+            self.upload_json_file_to_oss(article_id=task_id, data_dict=outside_json_dict)
+            self.notice_teach_system(article_id=task_id, class_id=class_id, word_count=word_count,
+                                     article_sentence_count=article_sentence_count)
             self.clean_source(article_id=task_id)
             logger.success(f"文章任务完成。taskid:{task_id}")
 
         except Exception as e:
             logger.error(f"{type(e).__name__} {e}")
             traceback_str = traceback.format_exc()
-            logger.error(f"外围错误追溯:{traceback_str}")
+            logger.error(f"外围错误追溯:{traceback_str}")

+ 41 - 81
gpt/get_article2.py

@@ -12,7 +12,7 @@ from pydantic import BaseModel
 from cachetools import TTLCache
 from concurrent.futures import wait
 from random import randint, shuffle, sample
-import json,time
+import json, time
 import requests
 from openpyxl import load_workbook
 from tenacity import retry, stop_after_attempt, wait_fixed
@@ -23,7 +23,6 @@ from collections import defaultdict
 from fastapi import BackgroundTasks
 
 
-
 def get_article_difficulty(article) -> int:
     """获取文章的难度值"""
     url = "http://qbank.yunzhixue.cn/api/article/analysis"
@@ -69,11 +68,9 @@ def merge_and_split(list1, list2):
     import random
     random.shuffle(combined)
 
-   
     two_thirds = []
     one_third = []
 
-   
     total_length = len(combined)
     if total_length > 15:
         two_thirds = combined[:15]
@@ -87,41 +84,33 @@ def merge_and_split(list1, list2):
 
 class GetArticle:
     def __init__(self):
-        self.m = MySQLUploader() 
+        self.m = MySQLUploader()
 
-       
         self.callback_url_dict = defaultdict(str)
-        self.real_ip_dict = defaultdict(str) 
+        self.real_ip_dict = defaultdict(str)
         self.demo_name = defaultdict(str)
 
+        self.article_result = {}
 
-        self.article_result = {} 
-
-       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
-       
-        self.exchange_data: dict[str, list] = {} 
+        self.exchange_data: dict[str, list] = {}
         self.read_spring_bamboo_exchange_table()
-                    
-
 
-   
     def read_spring_bamboo_exchange_table(self):
         """变形是键,原型是值"""
         wb = load_workbook(r"data/春笋单词对照变形.xlsx", read_only=True, data_only=True)
         ws = wb.active
         for row in ws.values:
-            prototype = row[0] 
-            exchange = row[1] 
+            prototype = row[0]
+            exchange = row[1]
             if prototype not in self.exchange_data:
                 self.exchange_data[prototype] = [exchange]
             else:
                 self.exchange_data[prototype].append(exchange)
         wb.close()
 
-   
     def parser_insert_to_mysql(self, resp_result):
         try:
             for single_article in resp_result['articles']:
@@ -133,13 +122,12 @@ class GetArticle:
                 sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
                 self.m.execute_(sql, (article_json, difficult_value))
         except Exception as e:
-           
+
             logger.error(f"插入数据库时发生错误: {str(e)}")
 
-   
     def submit_task(self, real_ip: str, core_words: list, take_count: int,
-                          demo_name: str, reading_level: int, article_length: int, exercise_id: int,
-                          background_tasks: BackgroundTasks):
+                    demo_name: str, reading_level: int, article_length: int, exercise_id: int,
+                    background_tasks: BackgroundTasks):
         """
         core_words: 词义数据组
         take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
@@ -156,11 +144,10 @@ class GetArticle:
             self.real_ip_dict[task_id] = real_ip
             self.demo_name[task_id] = demo_name
 
-            resp_result = self.run_task(core_words, task_id, take_count, reading_level, article_length)
-            
-           
+            resp_result = self.run_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
+
             background_tasks.add_task(self.parser_insert_to_mysql, resp_result)
-            
+
             logger.success(f"reading-comprehension 文章2任务完成。学案id:{exercise_id},taskid:{task_id}")
             return resp_result
         except Exception as e:
@@ -168,18 +155,17 @@ class GetArticle:
             log_err_e(e, msg="GetArticle提交任务失败;")
             return err_msg
         finally:
-           
+
             self.real_ip_dict.pop(task_id, None)
             self.demo_name.pop(task_id, None)
 
-   
-    def __parse_gpt_resp(self,gpt_resp:dict,core_words:list):
-        return_json = {"articles": []} 
+    def __parse_gpt_resp(self, gpt_resp: dict, core_words: list):
+        return_json = {"articles": []}
         for choice in gpt_resp["choices"]:
             single_article_dict = json.loads(choice["message"]["content"])
 
-            allWordAmount = 0 
-           
+            allWordAmount = 0
+
             articleWordAmount = get_article_words_count(single_article_dict["englishArticle"])
             allWordAmount += articleWordAmount
 
@@ -189,28 +175,25 @@ class GetArticle:
                 allWordAmount += count_trunk
                 allWordAmount += count_candidates
 
-           
-            usedMeanIds: list = single_article_dict['usedMeanIds'] 
-           
+            usedMeanIds: list = single_article_dict['usedMeanIds']
+
             article_words = split_text_to_word(single_article_dict['englishArticle'])
-           
+
             for i in core_words:
                 meaning_id = i.get('meaning_id', 0)
                 if not meaning_id:
                     continue
                 word = i["spell"]
-                if meaning_id not in usedMeanIds and word in self.exchange_data: 
+                if meaning_id not in usedMeanIds and word in self.exchange_data:
                     words_exchanges_list = self.exchange_data[word]
                     for exchange_word in words_exchanges_list:
                         if exchange_word in article_words:
                             usedMeanIds.append(meaning_id)
                             break
 
-           
             single_article_dict["body"] = single_article_dict.pop("englishArticle")
             single_article_dict["chinese"] = single_article_dict.pop("chineseArticle")
 
-           
             for q in single_article_dict['questions']:
                 data = q['candidates']
                 shuffled_candidates = sample(data, len(data))
@@ -220,17 +203,13 @@ class GetArticle:
                     candidate['label'] = labels[index]
                 q['candidates'] = shuffled_candidates
 
-           
             return_json['articles'].append({**single_article_dict, "allWordAmount": allWordAmount, "articleWordAmount": articleWordAmount})
 
         return return_json
 
-
-
-   
     @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
-    def get_article(self, core_words: list, task_id: int, reading_level, article_length,n) -> dict:
-       
+    def get_article(self, core_words: list, task_id: int, exercise_id: int, reading_level, article_length, n) -> dict:
+
         if not article_length:
             if 0 < reading_level <= 10:
                 article_length = 50 + 10 * reading_level
@@ -243,10 +222,9 @@ class GetArticle:
             if start <= reading_level <= end:
                 difficulty_control_stage = index
                 break
-        else: 
+        else:
             difficulty_control_stage = 2
 
-       
         diffculty_control = {
             1: {"grade": "小学", "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
@@ -261,44 +239,42 @@ class GetArticle:
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
                 "choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
         }
-       
 
-        grade = diffculty_control[difficulty_control_stage]["grade"] 
-        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"] 
-        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"] 
+        grade = diffculty_control[difficulty_control_stage]["grade"]
+        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"]
+        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"]
         desc2 = diffculty_control[difficulty_control_stage]["desc2"]
-        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"] 
+        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"]
 
-       
         shuffle(core_words)
         core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words])
 
         no_escape_code = r"\\n\\n"
 
         sys_prompt = "你是一个专业的英语老师,擅长根据用户提供的词汇生成对应的英语文章和中文翻译和4个配套选择题。"
+
         q = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,\
-生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。\
-英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。\
-4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
+生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。特别注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义,例如我提供单词[change 零钱],就不要使用[变化]的词义。
 
 要求:
 1.必须用提供的这个词义的单词,其他单词使用{select_diffculty}的单词。{desc2}{choice_desc}
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求{article_length}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
+4.生成的文章要求{article_length}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。不需要markdown格式。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
-6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
 提供[单词组]:{core_words_meaning_str};
 """
+
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
 
             gpt_resp = get_article_gpt_pydantic(q, temperature=1.2, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
-                                                               check_fucn=CheckArticleResult.get_article_1, max_tokens=8000,
-                                                               sys_prompt=sys_prompt,n=n)
-            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp,core_words=core_words)
+                                                check_fucn=CheckArticleResult.get_article_1, max_tokens=15000,
+                                                sys_prompt=sys_prompt, n=n, task_id=task_id, exercise_id=exercise_id)
+
+            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp, core_words=core_words)
             return multi_articles_dict
 
         except httpx.HTTPError as e:
@@ -311,9 +287,7 @@ class GetArticle:
             log_err_e(e, f"gpt生成文章回复其他错误.")
             raise
 
-
-   
-    def run_get_article_task(self, core_words, task_id, take_count, reading_level, article_length) -> dict:
+    def run_get_article_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length) -> dict:
         """
         :param core_words: 核心单词数据,优先级1;可能为空
         :param task_id: 任务id
@@ -323,16 +297,15 @@ class GetArticle:
         :return:
         """
         try:
-            return_json = self.get_article(core_words, task_id, reading_level, article_length,n=take_count)
+            return_json = self.get_article(core_words, task_id, exercise_id, reading_level, article_length, n=take_count)
             return return_json
         except Exception as e:
             logger.error(f"运行文章任务时发生错误: {str(e)}")
             raise
 
-   
-    def run_task(self, core_words, task_id, take_count, reading_level, article_length):
+    def run_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length):
         try:
-            outside_json = self.run_get_article_task(core_words, task_id, take_count, reading_level, article_length)
+            outside_json = self.run_get_article_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
             return outside_json
         except Exception as e:
             log_err_e(e, msg="外层总任务捕获错误")
@@ -340,16 +313,3 @@ class GetArticle:
     def cleanup(self):
         """清理所有资源"""
         pass
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-

+ 20 - 25
gpt/gpt.py

@@ -1,26 +1,25 @@
 # -*- coding:utf-8 -*-
 if __name__ == '__main__':
     import os
+
     os.chdir("..")
 
 import requests
 import random
 import time
-from tools.loglog import logger,simple_logger
+from tools.loglog import logger, simple_logger
 from tools.new_mysql import MySQLUploader
 
 m = MySQLUploader()
 
 
-def insert_ip_token(ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens):
+def insert_ip_token(ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens):
     sql = "insert into consumer_token (ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens) values (%s,%s,%s,%s,%s,%s)"
-    m.execute_(sql,(ip,demo_name,str(gpt_content),prompt_tokens,completion_tokens,total_tokens))
+    m.execute_(sql, (ip, demo_name, str(gpt_content), prompt_tokens, completion_tokens, total_tokens))
 
-def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-4o",max_tokens=3500,temperature:float=0,json_resp=False,n=1,sys_prompt=None):
-   
-   
-   
 
+def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="gpt-4o", max_tokens=3500, temperature: float = 0,
+                        json_resp=False, n=1, sys_prompt=None):
     if "3.5" in model or "3.5-turbo" in model or "3.5turbo" in model:
         model = "gpt-3.5-turbo"
     elif "4o" in model or "gpt4o" in model:
@@ -28,19 +27,17 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
     elif "4turbo" in model or "4-turbo" in model:
         model = "gpt-4-turbo"
 
-   
     d2 = {
-    "model": model,
-    "messages": [],
-    "max_tokens": max_tokens,
-    "temperature": temperature,
-    'n': n}
+        "model": model,
+        "messages": [],
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        'n': n}
 
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
 
-
     if json_resp is True:
         d2["response_format"] = {"type": "json_object"}
     elif json_resp is False:
@@ -50,23 +47,22 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
 
     for _ in range(3):
         try:
-           
+
             response = requests.post(f'http://170.106.108.95/v1/chat/completions', json=d2)
             r_json = response.json()
-            if r2:= r_json.get("choices",None):
-                if n>1:
+            if r2 := r_json.get("choices", None):
+                if n > 1:
                     gpt_res = []
                     for i in r2:
                         gpt_res.append(i["message"]["content"])
                 else:
-                    gpt_res= r2[0]["message"]["content"]
+                    gpt_res = r2[0]["message"]["content"]
 
-               
                 gpt_content = str(gpt_res)
                 prompt_tokens = r_json["usage"]["prompt_tokens"]
                 completion_tokens = r_json["usage"]["completion_tokens"]
                 total_tokens = r_json["usage"]["total_tokens"]
-                insert_ip_token(real_ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens)
+                insert_ip_token(real_ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens)
 
                 simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{gpt_res}")
                 return gpt_res
@@ -83,21 +79,20 @@ def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-
     logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
 
 
-
-def parse_gpt_phon_to_tuplelist(text:str) -> list:
+def parse_gpt_phon_to_tuplelist(text: str) -> list:
     """解析gpt返回的音标数据"""
     result = []
     if not text:
         return []
     for i in text.split("\n"):
         ii = i.split("***")
-        if len(ii)>=3:
-            result.append((ii[0].strip(),ii[1].strip(),ii[2].strip()))
+        if len(ii) >= 3:
+            result.append((ii[0].strip(), ii[1].strip(), ii[2].strip()))
     return result
 
 
 if __name__ == '__main__':
     pass
 
-    resp = get_answer_from_gpt("hello",temperature=0.8,model='gpt-4o')
+    resp = get_answer_from_gpt("hello", temperature=0.8, model='gpt-4o')
     print(resp)

+ 22 - 36
gpt/gpt_check.py

@@ -9,24 +9,22 @@ import re
 class CheckGptAnswer:
     @staticmethod
     def default_no_check(gpt_text: str):
-       
+
         return True
 
-   
     @staticmethod
     def score_value(gpt_text: str):
-       
+
         if gpt_text.count("【取值0】") > 1:
             return False
         return True if re.findall("【取值.+?】", gpt_text) else False
 
-   
     @staticmethod
     def original_modify(gpt_text: str):
         split_text = gpt_text.split("\n")
         for t in split_text:
-           
-            if "修改理由" in t and "错误" in t and len(t)<=25:
+
+            if "修改理由" in t and "错误" in t and len(t) <= 25:
                 return False
             elif "没有严重的语法错误" in t:
                 return False
@@ -36,74 +34,64 @@ class CheckGptAnswer:
         else:
             return False
 
-   
     @staticmethod
     def count_chinese_characters_50(s: str):
         chinese_count = 0
         for char in s:
-           
+
             if '\u4e00' <= char <= '\u9fff':
                 chinese_count += 1
-        return True if s and chinese_count/len(s) >= 0.5 else False
+        return True if s and chinese_count / len(s) >= 0.5 else False
 
-   
     @staticmethod
-    def count_english_count_30(s: str,english_words_count=30):
-        words_count = len(re.findall(r"[a-zA-Z\']+",s))
+    def count_english_count_30(s: str, english_words_count=30):
+        words_count = len(re.findall(r"[a-zA-Z\']+", s))
         return True if words_count >= english_words_count else False
 
-   
     @staticmethod
-    def count_letter_percentages(s:str,letter_percentages=0.8):
-        count_letter=0
-       
+    def count_letter_percentages(s: str, letter_percentages=0.8):
+        count_letter = 0
+
         total_length = len(s)
 
-       
         for char in s:
-           
+
             if char.isalpha():
-               
                 count_letter += 1
-        result = True if round(count_letter/total_length,2)>letter_percentages else False
+        result = True if round(count_letter / total_length, 2) > letter_percentages else False
         return result
 
 
 class CheckArticleResult:
     @staticmethod
     def default_no_check(gpt_text: str):
-       
+
         return True
 
     @staticmethod
     def get_article_1(gpt_text: str):
-       
+
         try:
             json_object = json.loads(gpt_text)
         except json.decoder.JSONDecodeError:
             return False
-       
-        if not all(i in json_object for i in ["englishArticle","chineseArticle","difficultSentences","usedMeanIds","questions"]):
+
+        if not all(i in json_object for i in ["englishArticle", "chineseArticle", "difficultSentences", "usedMeanIds", "questions"]):
             return False
-       
+
         try:
             for question in json_object['questions']:
-                analysis = question['analysis'] 
-                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis))/len(analysis)
-                if words_count_pct>0.5:
+                analysis = question['analysis']
+                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis)) / len(analysis)
+                if words_count_pct > 0.5:
                     return False
         except:
             return False
 
         return True
 
-if __name__ == '__main__':
-   
-   
-   
-   
-   
 
+if __name__ == '__main__':
     text = """{
   "difficultSentences": [
     {
@@ -349,5 +337,3 @@ if __name__ == '__main__':
 }"""
     json_text = json.loads(text2)
     print(json_text)
-   
-   

+ 5 - 2
gpt/query_oss_file.py

@@ -1,6 +1,7 @@
 # -*- coding: UTF-8 -*-
 if __name__ == '__main__':
     import os
+
     os.chdir("..")
 
 from tools.loglog import logger
@@ -8,6 +9,7 @@ import oss2
 from oss2.credentials import EnvironmentVariableCredentialsProvider
 import json
 
+
 def query_file_content(key):
     """
     :param key: OSS的键;key = 'study/article/14138566'
@@ -18,7 +20,7 @@ def query_file_content(key):
     try:
         object_stream = bucket.get_object(key)
         content = b''.join(object_stream)
-       
+
         text_content = content.decode('utf-8')
         json_content = json.loads(text_content)
         return json_content
@@ -27,5 +29,6 @@ def query_file_content(key):
     except Exception as e:
         logger.error(f"{type(e).__name__}: {e}")
 
+
 if __name__ == '__main__':
-    print(query_file_content('study/article/10613145')["id"])
+    print(query_file_content('study/article/10613145')["id"])

+ 9 - 7
main.py

@@ -2,7 +2,7 @@
 import time
 from threading import Thread
 
-from fastapi import FastAPI,Request
+from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from typing import Callable
 from core.api_get_article import router as r1
@@ -26,6 +26,7 @@ app.include_router(r4, tags=["生成word文档"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r6, tags=["deepseek文章"])
 
+
 @app.middleware("http")
 async def add_process_time_header(request: Request, call_next: Callable):
     start_time = time.time()
@@ -35,7 +36,7 @@ async def add_process_time_header(request: Request, call_next: Callable):
     try:
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
     except:
-        body =""
+        body = ""
     logger.info(f"\n正式接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
 
     try:
@@ -47,22 +48,23 @@ async def add_process_time_header(request: Request, call_next: Callable):
     process_time = str(round(time.time() - start_time, 2))
     response.headers["X-Process-Time"] = process_time
 
-   
-    if path not in ['/','/tts']:
-        with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
+    if path not in ['/', '/tts']:
+        with open('log/time_log.txt', encoding='utf-8', mode='a') as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response
 
+
 @app.get("/")
 @app.post("/")
 def hello():
     return PlainTextResponse("hello world")
 
+
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread.start()
 
-
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("main:app",host="0.0.0.0", port=8000)
+
+    uvicorn.run("main:app", host="0.0.0.0", port=8000)

+ 10 - 8
main_9000.py

@@ -2,7 +2,7 @@
 import time
 from threading import Thread
 
-from fastapi import FastAPI,Request
+from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from typing import Callable
 from core.api_get_article import router as r1
@@ -26,6 +26,7 @@ app.include_router(r4, tags=["生成word文档"])
 app.include_router(r5, tags=["口语评测"])
 app.include_router(r6, tags=["deepseek文章"])
 
+
 @app.middleware("http")
 async def add_process_time_header(request: Request, call_next: Callable):
     start_time = time.time()
@@ -35,7 +36,7 @@ async def add_process_time_header(request: Request, call_next: Callable):
     try:
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
     except:
-        body =""
+        body = ""
     logger.info(f"\n测试接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
 
     try:
@@ -44,25 +45,26 @@ async def add_process_time_header(request: Request, call_next: Callable):
         logger.error(f"{type(e).__name__},{e}")
         return resp_500(message=f"{type(e).__name__},{e}")
 
-    process_time = str(round(time.time() - start_time,2))
+    process_time = str(round(time.time() - start_time, 2))
     response.headers["X-Process-Time"] = process_time
 
-   
-    if path not in ['/','/tts']:
-        with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
+    if path not in ['/', '/tts']:
+        with open('log/time_log.txt', encoding='utf-8', mode='a') as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response
 
+
 @app.get("/")
 @app.post("/")
 def hello():
     return PlainTextResponse("hello world")
 
+
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread.start()
 
-
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run("main_9000:app", port=9000)
+
+    uvicorn.run("main_9000:app", port=9000)

Fișier diff suprimat deoarece este prea mare
+ 1 - 1
make_docx_demo/check_test_table/aaaaaaaaaa.py


+ 5 - 5
make_docx_demo/check_test_table/baidu_ocr.py

@@ -15,25 +15,25 @@ def high_ocr_location(pic_path):
     with open(pic_path, 'rb') as f:
         img = base64.b64encode(f.read())
 
-    if time.time()-token_time>3600*8:
+    if time.time() - token_time > 3600 * 8:
         print("获取token啦")
         url_token = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=BaL3yDflxe7Z5001vF8rAzKu&client_secret=xs40HshFLDDyWgCCfgnz86zWhQ8X1s5f'
         token = requests.post(url_token).json()
-       
+
         access_token = token['access_token']
         token_time = time.time()
 
     request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate"
 
-    params = {"image": img,"recognize_granularity":"small"}
+    params = {"image": img, "recognize_granularity": "small"}
     request_url = request_url + "?access_token=" + access_token
     headers = {'content-type': 'application/x-www-form-urlencoded'}
     response = requests.post(request_url, data=params, headers=headers)
     if response:
         r_json = response.json()
-       
+
         return r_json
 
 
 if __name__ == '__main__':
-    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))
+    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))

+ 74 - 123
make_docx_demo/check_test_table/image_preprocess.py

@@ -23,56 +23,39 @@ def test_log(text: str):
 
 class PreprocessImage:
     def __init__(self, image_path):
-        self.image_path = image_path 
-        self.template_image_path = "template.jpg" 
+        self.image_path = image_path
+        self.template_image_path = "template.jpg"
 
-        self.image = cv2.imread(image_path) 
+        self.image = cv2.imread(image_path)
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2] 
+        self.temp_h, self.temp_w = self.template_image.shape[:2]
 
-    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
 
         src_points = np.float32(point_tuple)
 
-       
-       
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-       
+
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
-       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
-       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
-       
-       
-
-       
-       
-       
-       
-       
-       
-       
-       
-
-       
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
-       
+
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
     def sharpen_image(self):
-       
+
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg') 
+        sharpened_img.save('sharpen_image.jpg')
 
     @staticmethod
     def parser_ocr(ocr_data):
@@ -81,27 +64,16 @@ class PreprocessImage:
             text: str = word_item['words']
             if text.startswith("1."):
                 left_char_location = word_item['chars'][0]['location']
-                p1 = (left_char_location['left'], left_char_location['top']) 
+                p1 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("51."):
                 left_char_location = word_item['chars'][0]['location']
-                p2 = (left_char_location['left'], left_char_location['top']) 
+                p2 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("50."):
                 left_char_location = word_item['chars'][0]['location']
-                p3 = (left_char_location['left'], left_char_location['top']) 
+                p3 = (left_char_location['left'], left_char_location['top'])
             elif text.startswith("100."):
                 left_char_location = word_item['chars'][0]['location']
-                p4 = (left_char_location['left'], left_char_location['top']) 
-
-           
-           
-           
-           
-           
-           
-           
-           
-           
-           
+                p4 = (left_char_location['left'], left_char_location['top'])
 
         if any([not p1, not p2, not p3, not p4]):
             print([p1, p2, p3, p4])
@@ -110,8 +82,8 @@ class PreprocessImage:
         return [p1, p2, p3, p4]
 
     def run(self):
-       
-        self.sharpen_image() 
+
+        self.sharpen_image()
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
@@ -121,28 +93,24 @@ class ComparisonAlgorithm:
     """比较算法核心"""
 
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image) 
-        self.ocr_data = ocr_data 
-        self.order_ocr_data = {} 
-        self.already_find_index = set() 
+        self.transformed_image = cv2.imread(transformed_image)
+        self.ocr_data = ocr_data
+        self.order_ocr_data = {}
+        self.already_find_index = set()
 
-        self.image = Image.open(transformed_image) 
+        self.image = Image.open(transformed_image)
 
     @staticmethod
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text)) 
-        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
+        numbers = "".join(re.findall(r'\d+', text))
+        letters = "".join(re.findall(r'[a-zA-Z]+', text))
         return numbers, letters
 
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
 
-       
-       
-       
-
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
 
@@ -153,16 +121,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2: 
-               
+            if word[0].isdigit() and len(word) >= 2:
+
                 word_text = word_item['words']
-                location = word_item['location'] 
-                first_char_location = word_item['chars'][0]['location'] 
-                end_char_location = word_item['chars'][-1]['location'] 
-                chars_location = word_item['chars'] 
+                location = word_item['location']
+                first_char_location = word_item['chars'][0]['location']
+                end_char_location = word_item['chars'][-1]['location']
+                chars_location = word_item['chars']
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data: 
+                if numbers not in self.order_ocr_data:
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
@@ -174,25 +142,23 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
-           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height 
+            bottom_location_y = b_top + b_height
 
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
             for y in range(bottom_location_y, next_word_top_location):
@@ -202,12 +168,10 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8: 
+        if black_count_per > 0.8:
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
@@ -215,11 +179,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         """
-        black_count_2 = 0 
+        black_count_2 = 0
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5 
+            bottom = word_location['top'] + int(word_location['height']) + 5
 
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
@@ -228,12 +192,10 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92: 
+        if black_count_per > 0.92:
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
@@ -243,34 +205,33 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
         moving_distance = 20
 
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = [] 
+        all_x = []
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2 
+        word_right_loca = word_location['left'] + word_location['width'] + 2
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
 
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height 
-       
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
+        bottom_location_y = b_top + b_height
+
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
 
         for x in all_x:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3 
+                next_word_top_location = next_word_location['top'] + 3
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -280,63 +241,57 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4: 
+        if black_count_per > 0.4:
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
-       
-       
 
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 break
-        else: 
+        else:
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height']//2
+        point_location_half = point_location['top'] + point_location['height'] // 2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
-        if white_block/point_location['width'] < 0.1:
-            print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
+        if white_block / point_location['width'] < 0.1:
+            print(
+                f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
-       
         white_block = 0
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom+2, bottom + end_char_location['height']-10
+        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
-        if white_block/point_location['width'] < 0.1:
-            print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
+        if white_block / point_location['width'] < 0.1:
+            print(
+                f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
-
-
-   
     def core_algorithm(self):
         self.__make_order_ocr_data()
 
@@ -346,10 +301,10 @@ class ComparisonAlgorithm:
                 continue
 
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word'] 
-            word_location = current_dict['location'] 
-            first_char_location = current_dict['first_char_location'] 
-            end_char_location = current_dict['end_char_location'] 
+            word = current_dict['word']
+            word_location = current_dict['location']
+            first_char_location = current_dict['first_char_location']
+            end_char_location = current_dict['end_char_location']
             chars_location = current_dict['chars_location']
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -366,23 +321,19 @@ class ComparisonAlgorithm:
 
 
 if __name__ == '__main__':
-   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
-   
     script_path = Path(__file__).resolve()
-   
+
     script_directory = script_path.parent
-   
+
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
-   
     pi = PreprocessImage(image_path)
     pi.run()
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
-    test_log(transformed_image_ocr_data) 
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
+    test_log(transformed_image_ocr_data)
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
-

+ 71 - 112
make_docx_demo/check_test_table/image_preprocess2.py

@@ -23,70 +23,50 @@ def test_log(text: str):
 
 class PreprocessImage:
     def __init__(self, image_path):
-        self.image_path = image_path 
-        self.template_image_path = "template.jpg" 
+        self.image_path = image_path
+        self.template_image_path = "template.jpg"
 
-        self.image = cv2.imread(image_path) 
+        self.image = cv2.imread(image_path)
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2] 
+        self.temp_h, self.temp_w = self.template_image.shape[:2]
 
-    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
 
         src_points = np.float32(point_tuple)
 
-       
-       
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-       
+
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
-       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
-       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
-       
-       
-
-       
-       
-       
-       
-       
-       
-       
-       
-
-       
         image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
-       
+
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
     def sharpen_image(self):
-       
+
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg') 
+        sharpened_img.save('sharpen_image.jpg')
 
     @staticmethod
     def parser_ocr(ocr_data):
         for word_item in ocr_data['words_result']:
-           
-            for char_item in word_item['chars']:
 
+            for char_item in word_item['chars']:
                 pass
 
-
-
     def run(self):
-       
-        self.sharpen_image() 
+
+        self.sharpen_image()
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
@@ -96,28 +76,24 @@ class ComparisonAlgorithm:
     """比较算法核心"""
 
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image) 
-        self.ocr_data = ocr_data 
-        self.order_ocr_data = {} 
-        self.already_find_index = set() 
+        self.transformed_image = cv2.imread(transformed_image)
+        self.ocr_data = ocr_data
+        self.order_ocr_data = {}
+        self.already_find_index = set()
 
-        self.image = Image.open(transformed_image) 
+        self.image = Image.open(transformed_image)
 
     @staticmethod
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text)) 
-        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
+        numbers = "".join(re.findall(r'\d+', text))
+        letters = "".join(re.findall(r'[a-zA-Z]+', text))
         return numbers, letters
 
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
 
-       
-       
-       
-
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
 
@@ -128,16 +104,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2: 
-               
+            if word[0].isdigit() and len(word) >= 2:
+
                 word_text = word_item['words']
-                location = word_item['location'] 
-                first_char_location = word_item['chars'][0]['location'] 
-                end_char_location = word_item['chars'][-1]['location'] 
-                chars_location = word_item['chars'] 
+                location = word_item['location']
+                first_char_location = word_item['chars'][0]['location']
+                end_char_location = word_item['chars'][-1]['location']
+                chars_location = word_item['chars']
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data: 
+                if numbers not in self.order_ocr_data:
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
@@ -149,25 +125,23 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
-           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height 
+            bottom_location_y = b_top + b_height
 
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
             for y in range(bottom_location_y, next_word_top_location):
@@ -177,12 +151,10 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8: 
+        if black_count_per > 0.8:
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
@@ -190,11 +162,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         """
-        black_count_2 = 0 
+        black_count_2 = 0
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5 
+            bottom = word_location['top'] + int(word_location['height']) + 5
 
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
@@ -203,12 +175,10 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92: 
+        if black_count_per > 0.92:
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
@@ -218,34 +188,33 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1) 
-        black_count_1 = 0 
+        next_index = str(int_index + 1)
+        black_count_1 = 0
         moving_distance = 20
 
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = [] 
+        all_x = []
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2 
+        word_right_loca = word_location['left'] + word_location['width'] + 2
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
 
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height 
-       
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
+        bottom_location_y = b_top + b_height
+
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
 
         for x in all_x:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-           
+
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3 
+                next_word_top_location = next_word_location['top'] + 3
             else:
-               
-               
+
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -255,63 +224,57 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4: 
+        if black_count_per > 0.4:
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index 
-       
-       
+            return int_index
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
-       
-       
 
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 break
-        else: 
+        else:
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height']//2
+        point_location_half = point_location['top'] + point_location['height'] // 2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
-        if white_block/point_location['width'] < 0.1:
-            print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
+        if white_block / point_location['width'] < 0.1:
+            print(
+                f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
-       
         white_block = 0
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom+2, bottom + end_char_location['height']-10
+        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val>110 or range_value < 90:
-                white_block +=1
+            if min_val > 110 or range_value < 90:
+                white_block += 1
 
-        if white_block/point_location['width'] < 0.1:
-            print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
+        if white_block / point_location['width'] < 0.1:
+            print(
+                f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
-
-
-   
     def core_algorithm(self):
         self.__make_order_ocr_data()
 
@@ -321,10 +284,10 @@ class ComparisonAlgorithm:
                 continue
 
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word'] 
-            word_location = current_dict['location'] 
-            first_char_location = current_dict['first_char_location'] 
-            end_char_location = current_dict['end_char_location'] 
+            word = current_dict['word']
+            word_location = current_dict['location']
+            first_char_location = current_dict['first_char_location']
+            end_char_location = current_dict['end_char_location']
             chars_location = current_dict['chars_location']
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -341,23 +304,19 @@ class ComparisonAlgorithm:
 
 
 if __name__ == '__main__':
-   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
-   
     script_path = Path(__file__).resolve()
-   
+
     script_directory = script_path.parent
-   
+
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
-   
     pi = PreprocessImage(image_path)
     pi.run()
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
-    test_log(transformed_image_ocr_data) 
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
+    test_log(transformed_image_ocr_data)
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
-

+ 2 - 7
make_docx_demo/check_test_table/mark_ocr_loca.py

@@ -6,24 +6,19 @@ from pathlib import Path
 
 
 def draw_rectangles_on_image(image_path, rectangles, output_path):
-   
     image = Image.open(image_path)
     draw = ImageDraw.Draw(image)
 
-   
     for rectangle in rectangles:
         top_left = (rectangle['left'], rectangle['top'])
         bottom_right = (rectangle['left'] + rectangle['width'], rectangle['top'] + rectangle['height'])
         draw.rectangle([top_left, bottom_right], outline='red', width=2)
 
-   
     image.save(output_path)
 
 
 rectangles = [
-   
-   
-   
+
 ]
 
 with open("log.txt", "r", encoding="utf-8") as f:
@@ -39,4 +34,4 @@ for i in ocr_data['words_result']:
 script_path = Path(__file__).resolve()
 script_directory = script_path.parent
 transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
-draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')
+draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')

Fișier diff suprimat deoarece este prea mare
+ 0 - 1
make_docx_demo/data.py


+ 35 - 36
make_docx_demo/docx_other_func.py

@@ -8,11 +8,12 @@ from base64 import b64decode
 import datetime
 
 import matplotlib.pyplot as plt
+
 plt.switch_backend('Agg')
 from io import BytesIO
 from tools.loglog import logger, log_err_e
 from docx import Document
-from docx.shared import Inches,Cm
+from docx.shared import Inches, Cm
 from threading import Lock
 from config.read_config import address
 
@@ -22,40 +23,43 @@ width_cm, height_cm = 5.4, 3
 width_in = width_cm
 height_in = height_cm
 
-plt.figure(figsize=(width_in, height_in)) 
+plt.figure(figsize=(width_in, height_in))
 
 
-def hex_to_rgb(hex_color:str):
-    hex_color = hex_color.lstrip('#') 
+def hex_to_rgb(hex_color: str):
+    hex_color = hex_color.lstrip('#')
     return RGBColor(int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16))
 
+
 def rgb_to_hex(r, g, b):
     return '{:02x}{:02x}{:02x}'.format(r, g, b)
 
+
 def is_base64(text):
     try:
-       
-        image_bytes =b64decode(text)
+
+        image_bytes = b64decode(text)
         return image_bytes
     except Exception:
-       
+
         return False
 
 
 def time_use(fn):
     @wraps(fn)
-    def cc(*args,**kwargs): 
+    def cc(*args, **kwargs):
         f_time = time.time()
-        res = fn(*args,**kwargs)
+        res = fn(*args, **kwargs)
 
-        cha = round(time.time()-f_time,3)
+        cha = round(time.time() - f_time, 3)
         if cha > 0.3:
-            print(f'函数:{fn.__name__} 一共用时',cha,'秒')
-        return res 
-    return cc 
+            print(f'函数:{fn.__name__} 一共用时', cha, '秒')
+        return res
 
+    return cc
 
-def qrcode_maker(id_text=None,full_url=None) -> BytesIO:
+
+def qrcode_maker(id_text=None, full_url=None) -> BytesIO:
     """
     :param id_text: id_text 提供id,二维码地址是春笋筛查表的地址;http://dcjxb.yunzhixue.cn/link?type=scanpage&id=999;
     :param full_url: 如果提供,直接使用这个文本来生成二维码的地址
@@ -73,14 +77,12 @@ def qrcode_maker(id_text=None,full_url=None) -> BytesIO:
     qr.add_data(text)
     qr.make(fit=True)
 
-   
     img = qr.make_image(fill_color="black", back_color="white")
     img_byte_arr = io.BytesIO()
     img.save(img_byte_arr, format='PNG')
     img_byte_arr.seek(0)
-   
-    return img_byte_arr
 
+    return img_byte_arr
 
 
 def get_weekday():
@@ -91,7 +93,7 @@ def get_weekday():
     return weekday_chinese
 
 
-def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y_axis_label=None):
+def make_chart(x_axis_data, y_axis_datas, title, sub_title_list, x_axis_label=None, y_axis_label=None):
     """
     :param sub_title_list: 小标题集合,放在右上角,用来标记每个y轴的数据标题
     :param y_axis_label:Y轴文本
@@ -103,7 +105,6 @@ def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y
     """
     x_len = len(x_axis_data)
 
-   
     image_io = BytesIO()
 
     font1 = {'family': 'SimSun', 'weight': 'normal', 'size': 14}
@@ -113,34 +114,32 @@ def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y
         for y in y_axis_datas:
             if len(y) != x_len:
                 logger.error("x轴的y轴的数据个数不一致")
-            plt.plot(x_axis_data, y, marker='o',label="zxs") 
+            plt.plot(x_axis_data, y, marker='o', label="zxs")
 
-        plt.title(title) 
+        plt.title(title)
         if x_axis_label:
-            plt.xlabel(x_axis_label) 
+            plt.xlabel(x_axis_label)
         if y_axis_label:
-            plt.ylabel(y_axis_label) 
-        plt.grid(True) 
+            plt.ylabel(y_axis_label)
+        plt.grid(True)
 
-        for index,sub_title in enumerate(sub_title_list):
-            plt.text(0.95, 0.9-index*0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right', backgroundcolor='w')
+        for index, sub_title in enumerate(sub_title_list):
+            plt.text(0.95, 0.9 - index * 0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right',
+                     backgroundcolor='w')
         with lock:
-            plt.savefig(image_io, format='png', bbox_inches='tight') 
-            image_io.seek(0) 
+            plt.savefig(image_io, format='png', bbox_inches='tight')
+            image_io.seek(0)
 
         return image_io
     except Exception as e:
-        log_err_e(e,"折线图生成错误")
+        log_err_e(e, "折线图生成错误")
         image_io.close()
         return None
 
 
 if __name__ == '__main__':
-   
-   
-
-    t= time.time()
-    io = qrcode_maker('',"http://111.231.167.191:8001/mp3")
-    with open("1.jpg",'wb') as f:
+    t = time.time()
+    io = qrcode_maker('', "http://111.231.167.191:8001/mp3")
+    with open("1.jpg", 'wb') as f:
         f.write(io.read())
-    print(time.time()-t)
+    print(time.time() - t)

+ 9 - 10
make_docx_demo/get_standard_data.py

@@ -1,30 +1,29 @@
 # -*- coding:utf-8 -*-
 """获取学段标准数据"""
-from cachetools import TTLCache,cached
+from cachetools import TTLCache, cached
 import requests
 from tools.loglog import logger, log_err_e
 
-
 cache = TTLCache(maxsize=100, ttl=86400)
 
 
-def get_standard_data(student_stage:int):
+def get_standard_data(student_stage: int):
     if student_stage in cache:
         return cache[student_stage]
 
     url = "https://dcjxb.yunzhixue.cn/api-dev/standard/study"
-    params = {"stage":student_stage}
-    response = requests.get(url,params=params)
+    params = {"stage": student_stage}
+    response = requests.get(url, params=params)
     if response.status_code == 200:
-        data_obj = response.json()['data'] 
-       
-        return_data = data_obj['totalVocabulary'],data_obj['readingAccuracy'],data_obj['readingLevel'],data_obj['readingSpeed']
+        data_obj = response.json()['data']
+
+        return_data = data_obj['totalVocabulary'], data_obj['readingAccuracy'], data_obj['readingLevel'], data_obj['readingSpeed']
 
-        cache[student_stage] = return_data 
+        cache[student_stage] = return_data
         return return_data
 
 
 if __name__ == '__main__':
     print(get_standard_data(3))
     print(cache)
-    print(1 in cache,2 in cache,3 in cache)
+    print(1 in cache, 2 in cache, 3 in cache)

+ 107 - 162
make_docx_demo/main_word.py

@@ -18,12 +18,11 @@ num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼"
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
-
 @time_use
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
-    exercise_title = json_data.get("ExerciseTitle", "") 
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
+    exercise_title = json_data.get("ExerciseTitle", "")
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '')
@@ -41,15 +40,12 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
-       
 
-    target_section = docx.doc.sections[-1] 
+    target_section = docx.doc.sections[-1]
     target_section.header.is_linked_to_previous = False
-   
+
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear() 
-   
-   
+        paragraph.clear()
 
     target_section.header_distance = 0
     target_section.footer_distance = 280000
@@ -60,9 +56,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     line_width = 205
     main_rect_x = line_width + 10
-    main_rect_width = 150 
+    main_rect_width = 150
 
-    right_line_x = main_rect_x + main_rect_width + 10 
+    right_line_x = main_rect_x + main_rect_width + 10
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
@@ -82,28 +78,27 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed'] 
-    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy'] 
-    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed']
+    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy']
+    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
-   
     chart_width = 5.4
     all_chart = json_data['StudentInfo']['StudentStudy']['ChartData']
-    reading_speed_chart = all_chart["ReadingSpeed"] 
-    reading_accuracy_chart = all_chart["ReadingAccuracy"] 
-    reading_difficult_chart = all_chart["ReadingDifficulties"] 
+    reading_speed_chart = all_chart["ReadingSpeed"]
+    reading_accuracy_chart = all_chart["ReadingAccuracy"]
+    reading_difficult_chart = all_chart["ReadingDifficulties"]
 
-    reading_speed_x_data = reading_speed_chart['XAxis'] 
-    reading_speed_sub_title = reading_speed_chart['Legend'] 
-    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']] 
+    reading_speed_x_data = reading_speed_chart['XAxis']
+    reading_speed_sub_title = reading_speed_chart['Legend']
+    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']]
 
-    reading_accuracy_x_data = reading_accuracy_chart['XAxis'] 
-    reading_accuracy_sub_title = reading_accuracy_chart['Legend'] 
-    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']] 
+    reading_accuracy_x_data = reading_accuracy_chart['XAxis']
+    reading_accuracy_sub_title = reading_accuracy_chart['Legend']
+    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']]
 
-    reading_difficult_x_data = reading_difficult_chart['XAxis'] 
-    reading_difficult_sub_title = reading_difficult_chart['Legend'] 
-    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']] 
+    reading_difficult_x_data = reading_difficult_chart['XAxis']
+    reading_difficult_sub_title = reading_difficult_chart['Legend']
+    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']]
 
     "开始版面-------------------------------------------------"
 
@@ -133,26 +128,26 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     chart1_io = make_chart(x_axis_data=reading_speed_x_data, y_axis_datas=reading_speed_y_datas, title="阅读速度",
                            sub_title_list=reading_speed_sub_title)
     run1.add_pic(chart1_io, width=chart_width)
-    chart1_io.close() 
+    chart1_io.close()
 
     p2 = tb3.get_cell_paragraph(0, 1, dq=15, dh=15)
     run2 = ParagraphBase(p2)
     chart2_io = make_chart(x_axis_data=reading_accuracy_x_data, y_axis_datas=reading_accuracy_y_datas, title="阅读准确率",
                            sub_title_list=reading_accuracy_sub_title)
     run2.add_pic(chart2_io, width=chart_width)
-    chart2_io.close() 
+    chart2_io.close()
 
     p3 = tb3.get_cell_paragraph(0, 2, dq=15, dh=15)
     run3 = ParagraphBase(p3)
     chart3_io = make_chart(x_axis_data=reading_difficult_x_data, y_axis_datas=reading_difficult_y_datas, title="阅读难度",
                            sub_title_list=reading_difficult_sub_title)
     run3.add_pic(chart3_io, width=chart_width)
-    chart3_io.close() 
+    chart3_io.close()
 
     docx.add_blank_paragraph()
 
     tb4 = Table(docx, rows=5, cols=5, border=True, tb_name="自主复习记录")
-   
+
     tb4.set_table_width_xml([2000, 3000, 2000, 2000, 2000])
 
     first_cell = tb4.get_cell(0, 0)
@@ -246,7 +241,7 @@ def section_2(docx: Word, json_data, *args, **kwargs):
 
     docx.add_blank_paragraph(dq=2, dh=2)
     docx.add_paragraph("北京云知学科技有限公司", align="right", size=10)
-   
+
     docx.add_page_section()
 
 
@@ -333,15 +328,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
         tb_inside.merge_cell(0, 0, 0, 1)
-        tb_inside.merge_cell(1, 0, 1, 1) 
-        tb_inside.merge_cell(0, 2, 1, 2) 
-        tb_inside.merge_cell(2, 0, 2, 2) 
-        tb_inside.merge_cell(3, 0, 3, 2) 
-        tb_inside.merge_cell(4, 0, 4, 2) 
-
-       
+        tb_inside.merge_cell(1, 0, 1, 1)
+        tb_inside.merge_cell(0, 2, 1, 2)
+        tb_inside.merge_cell(2, 0, 2, 2)
+        tb_inside.merge_cell(3, 0, 3, 2)
+        tb_inside.merge_cell(4, 0, 4, 2)
 
-       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -349,19 +341,17 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
 
-       
-        image_io:BytesIO = qrcode_result.get(data[9], "") 
+        image_io: BytesIO = qrcode_result.get(data[9], "")
         if image_io:
             cell_p = tb_inside.get_cell_paragraph(0, 2, dq=5)
             p_base = ParagraphBase(cell_p)
             p_base.add_pic(image_io, width=1.5)
             image_io.close()
 
-       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
-        cell_p_1.add_run_to_p("   " + data[4], size=8) 
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
+        cell_p_1.add_run_to_p("   " + data[4], size=8)
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
@@ -375,40 +365,39 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = [] 
+    strange_words_data = []
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = [] 
+    qrcode_thread = []
     qrcode_result = {}
 
     for item in strange_words:
-        spell = item['Spell'] 
-        word_id = item['WordId'] 
+        spell = item['Spell']
+        word_id = item['WordId']
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
-        symbols_en = "英" + f'[{en}]' 
-        symbols_am = "美" + f'[{am}]' 
+        symbols_en = "英" + f'[{en}]'
+        symbols_am = "美" + f'[{am}]'
 
-       
         tts_url = f"https://dcjxb.yunzhixue.cn/exercise/word?id={word_id}"
         t = Thread(target=qrcode_maker, args=(tts_url, qrcode_result))
         qrcode_thread.append(t)
         t.start()
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
-        word_meanings = item.get('Meaning', "") 
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
+        word_meanings = item.get('Meaning', "")
         word_changes = ";".join([s["Type"] + ":" + s["Spell"] for s in item["WordChanges"]])
 
         if item['Sentences']:
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
             sentences = ""
-       
+
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences, tts_url)
         strange_words_data.append(single_word_tuple)
 
-    rows = math.ceil(len(strange_words_data) / 2) 
+    rows = math.ceil(len(strange_words_data) / 2)
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
 
@@ -428,12 +417,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
-   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3 
+    random_copy_word_list = copy_word_list * 3
     shuffle(random_copy_word_list)
 
-   
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
@@ -486,8 +473,6 @@ def section_6(docx: Word, json_data, *args, **kwargs):
         p.add_run_to_p("☆ ", size=10, font_name="MS Gothic")
         p.add_run_to_p(t, size=10)
 
-   
-
     data = ["1. I have no chance to go sightseeing this summer.	(chance)",
             "2. And with that, we conclude the third and final example.	(third)",
             "3. He lives a healthy and normal life and has a strong body.	(healthy)",
@@ -505,8 +490,8 @@ def section_6(docx: Word, json_data, *args, **kwargs):
             "15. His performance at the concert last night proved that he is in the top of international pianists.	(concert)"]
 
     for i in example_sentence:
-        p = docx.add_blank_paragraph(dq=4,dh=4)
-        p.add_run_to_p("□  ", size=12,font_name="宋体")
+        p = docx.add_blank_paragraph(dq=4, dh=4)
+        p.add_run_to_p("□  ", size=12, font_name="宋体")
         p.add_run_to_p(i + "___________")
 
     docx.add_page_section()
@@ -514,22 +499,20 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
-   
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
-       
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle'] 
+                participle = s['Participle']
                 if participle:
                     single_select_text += participle + ' \n'
                 else:
@@ -538,14 +521,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{ques_index}. {single_select_text}")
 
-       
         all_select_text = "\n".join(select_text)
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
-       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -588,8 +568,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
 
-       
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -600,7 +578,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -618,9 +596,8 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
-   
     def reading(index, article_single):
-       
+
         all_article_length = 0
 
         def single_yuedu(index, a):
@@ -628,20 +605,19 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             article_length = a['AllWordAmount']
             nonlocal all_article_length
             all_article_length += article_length
-           
+
             strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-           
+
             explanatory_words_ids = [i['MeaningId'] for i in a['ExplanatoryWords']]
 
-           
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
-               
-                subject = candidates['Subject'] + '\n' 
+
+                subject = candidates['Subject'] + '\n'
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. ' 
-                    participle = s['Participle'] 
+                    single_select_text += s['Label'] + '. '
+                    participle = s['Participle']
                     if participle:
                         single_select_text += participle + ' \n'
                     else:
@@ -649,14 +625,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
-           
             all_select_text = "\n".join(select_text)
 
-           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
             article_main_list = article_main.split(" ")
 
-           
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -672,7 +645,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
             tb2.set_tb_colum_width(width=[320, 140])
 
-           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
@@ -683,7 +655,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif meaning_id in explanatory_words_ids:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                    else: 
+                    else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                 else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -696,9 +668,9 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             docx.add_blank_paragraph()
 
         "---------------------开始单篇运行---------------------"
-        if index == 1: 
+        if index == 1:
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "春笋智学, 高效学习专家")
-           
+
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用红笔加以标记,以便日后快速回顾。\n",
@@ -723,21 +695,19 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
-   
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
-       
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle'] 
+            participle = s_candidates['Participle']
             if participle:
                 single_select_text += participle
             else:
@@ -746,14 +716,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{single_select_text}")
 
-       
         all_select_text = "\n".join(select_text)
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
-       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -785,7 +752,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
 
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -796,7 +762,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -813,19 +779,18 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
     "判断题型;根据题型选择----------------------------"
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
 
     docx.add_page_section()
 
 
 @time_use
 def section_8(docx: Word, json_data, *args, **kwargs):
-   
     sub_title_maker(docx, "单词趣味填", "趣味练习,多维提升和巩固")
     docx.add_pic_single_paragraph("make_docx_demo/static/happy_word.jpg", align="center", width=14.58)
     docx.add_page_section()
@@ -835,51 +800,46 @@ def section_8(docx: Word, json_data, *args, **kwargs):
 def section_9(docx: Word, json_data, *args, **kwargs):
     def wanxing(index, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip() 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip()
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip()
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
-   
     def reading(index, article_single):
-        all_analysis = '' 
-        all_difficult_sentences = [] 
+        all_analysis = ''
+        all_difficult_sentences = []
 
         chinese_article = article_single['Chinese']
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip("\n") 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip("\n")
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip("\n")
 
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}\n"
 
-       
         all_analysis += '\n'
 
-       
         for difficult_sentence_item in article_single['DifficultSentences']:
             all_difficult_sentences.append(difficult_sentence_item['Chinese'])
 
@@ -898,19 +858,19 @@ def section_9(docx: Word, json_data, *args, **kwargs):
 
     def seven_to_five(index, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
-       
+
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'] 
-            abcd_label = '' 
+            analysis = question_item['Analysis']
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
@@ -922,10 +882,10 @@ def section_9(docx: Word, json_data, *args, **kwargs):
     "判断题型;根据题型选择----------------------------"
     sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯")
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
         docx.add_blank_paragraph()
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
@@ -973,17 +933,16 @@ def section_10(docx: Word, json_data, scanpage_format, **kwargs):
 
 
 def two_check_page(docx: Word, json_data, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
 
         p_cell = tb.get_cell_paragraph(0, 0, dq=10)
         p = ParagraphBase(p_cell)
-        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2)) 
+        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2))
 
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -994,9 +953,9 @@ def two_check_page(docx: Word, json_data, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2) 
-        for index,row in enumerate(range(half_count)):
-            first_word, second_word = word_data_list[row],word_data_list[row + half_count]
+        half_count = int(len(word_data_list) / 2)
+        for index, row in enumerate(range(half_count)):
+            first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell4 = "□ ___________________________" if second_word else ""
 
@@ -1007,13 +966,12 @@ def two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[40, 130, 130, 150, 70])
@@ -1039,15 +997,10 @@ def two_check_page(docx: Word, json_data, **kwargs):
         tb = Table(docx, rows=0, cols=4, tb_name="第二页筛查表")
 
         ## 1234横着放
-       
-       
-       
+
         #
-       
-       
+
         #
-       
-       
 
         ## 1234竖着放
         total_row = int(len(word_data_list2) / 2)
@@ -1058,13 +1011,13 @@ def two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
-            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4] 
+            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1: 
+        if article_type == 1:
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -1072,45 +1025,39 @@ def two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
 
-
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '') 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
-    article_type = json_data['Articles'][0]['Category'] 
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '')
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+    article_type = json_data['Articles'][0]['Category']
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
 
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
-       
         if index >= 2:
             docx.add_page_break()
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
         word_data_list1 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
-       
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 @time_use
 def other(docx, json_data, **kwargs):
-   
     sections = docx.doc.sections
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
@@ -1143,7 +1090,6 @@ def start_make_word(json_data, document_format, scanpage_format):
         for s in menu:
             s(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
 
-       
         docx.save_docx()
         if document_format == 1:
             return "develop.docx"
@@ -1161,6 +1107,5 @@ if __name__ == '__main__':
     t = time.time()
     os.chdir("..")
 
-   
     start_make_word(test_json5, 1, 1)
     print(time.time() - t)

+ 137 - 232
make_docx_demo/main_word_applet.py

@@ -23,12 +23,11 @@ num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼"
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
-
 @time_use
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
-    exercise_title = json_data.get("ExerciseTitle", "") 
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
+    exercise_title = json_data.get("ExerciseTitle", "")
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
@@ -46,15 +45,12 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
-       
 
-    target_section = docx.doc.sections[-1] 
+    target_section = docx.doc.sections[-1]
     target_section.header.is_linked_to_previous = False
-   
+
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear() 
-   
-   
+        paragraph.clear()
 
     target_section.header_distance = 0
     target_section.footer_distance = 280000
@@ -65,9 +61,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     line_width = 200
     main_rect_x = line_width + 10
-    main_rect_width = 150 
+    main_rect_width = 150
 
-    right_line_x = main_rect_x + main_rect_width + 10 
+    right_line_x = main_rect_x + main_rect_width + 10
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
@@ -84,32 +80,25 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
 
 @time_use
 def section_1(docx: Word, json_data, *args, **kwargs):
-   
-    exercise_id_int = json_data.get("ExerciseId", "") 
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    student_stage = json_data.get("StudentInfo").get("StudentStage") 
+    exercise_id_int = json_data.get("ExerciseId", "")
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    student_stage = json_data.get("StudentInfo").get("StudentStage")
     grade_name = {1: "小学", 2: "初中", 3: "高中"}.get(student_stage)
-    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
+    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 
-   
     totalVocabulary, readingAccuracy, readingLevel, readingSpeed = get_standard_data(student_stage)
 
-    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary'] 
-    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary'] 
-    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume'] 
+    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary']
+    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary']
+    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume']
 
-   
-   
-   
-    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
+    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
 
-   
-    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']]) 
-    r8 = r6 
-    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']] 
-    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0 
+    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']])
+    r8 = r6
+    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']]
+    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0
 
-   
     InspirationalMessage = json_data.get('InspirationalMessage')
     "开始版面-------------------------------------------------"
 
@@ -156,12 +145,10 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     t5.set_row_height(row_height=50)
     t5.set_tb_colum_width(0, 500)
 
-   
     docx.add_paragraph(text="多媒体辅助", size=16, align="left", bold=True, dq=10, dh=5)
     docx.add_paragraph(text="需要示范的的学员,扫以下二维码获取音频、视频示范:", size=12, align="left", dq=5, dh=5)
     p = docx.add_blank_paragraph()
 
-   
     img_io = qrcode_maker(full_url=f"{address}/link?type=exercise&id={exercise_id_int}&from=bltf")
     p.add_pic(img_io, width=2)
     img_io.close()
@@ -197,17 +184,13 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         cell_outside = tb_outside.get_cell(row, col, delete_default_para=True)
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
-       
         tb_inside.merge_cell(0, 0, 0, 2)
-        tb_inside.merge_cell(1, 0, 1, 2) 
-       
-        tb_inside.merge_cell(2, 0, 2, 2) 
-        tb_inside.merge_cell(3, 0, 3, 2) 
-        tb_inside.merge_cell(4, 0, 4, 2) 
+        tb_inside.merge_cell(1, 0, 1, 2)
 
-       
+        tb_inside.merge_cell(2, 0, 2, 2)
+        tb_inside.merge_cell(3, 0, 3, 2)
+        tb_inside.merge_cell(4, 0, 4, 2)
 
-       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -215,19 +198,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
 
-       
-       
-       
-       
-       
-       
-       
-
-       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
-        cell_p_1.add_run_to_p("   " + data[4], size=8) 
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
+        cell_p_1.add_run_to_p("   " + data[4], size=8)
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
@@ -241,31 +215,25 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = [] 
+    strange_words_data = []
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = [] 
+    qrcode_thread = []
     qrcode_result = {}
 
     for item in strange_words:
-        spell = item['Spell'] 
-        word_id = item['WordId'] 
+        spell = item['Spell']
+        word_id = item['WordId']
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
-        symbols_en = "英" + f'[{en}]' 
-        symbols_am = "美" + f'[{am}]' 
-
-       
-       
-       
-       
-       
+        symbols_en = "英" + f'[{en}]'
+        symbols_am = "美" + f'[{am}]'
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
-        word_meanings = item.get('Meaning', "") 
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
+        word_meanings = item.get('Meaning', "")
         word_changes_list = []
-        for idx, s in enumerate(item["WordChanges"],start=1):
-            s_type,s_spell = s['Type'], s['Spell']
+        for idx, s in enumerate(item["WordChanges"], start=1):
+            s_type, s_spell = s['Type'], s['Spell']
             if "原型" in s_type or "大小写" in s_type:
                 continue
             tail = '\n' if idx != len(item["WordChanges"]) else ''
@@ -276,12 +244,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
             sentences = ""
-       
+
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences)
         strange_words_data.append(single_word_tuple)
 
-    rows = math.ceil(len(strange_words_data) / 2) 
+    rows = math.ceil(len(strange_words_data) / 2)
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
 
@@ -301,12 +269,10 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
-   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3 
+    random_copy_word_list = copy_word_list * 3
     shuffle(random_copy_word_list)
 
-   
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
@@ -369,22 +335,20 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
-   
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
-       
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle'] 
+                participle = s['Participle']
                 if participle:
                     single_select_text += participle + ' \n'
                 else:
@@ -393,14 +357,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{ques_index}. {single_select_text}")
 
-       
         all_select_text = "\n".join(select_text)
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
-       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -442,8 +403,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
 
-       
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -454,7 +413,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -470,42 +429,40 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
-   
     def reading(index, article_single):
 
         def single_yuedu(index, a):
             article_id = a['Id']
-            article_length = a['AllWordAmount'] 
+            article_length = a['AllWordAmount']
 
-            strange_words_ids = set() 
-            explanatory_words_ids = set() 
-            bold_word = set() 
-            italics_word = set() 
-            italics_index_dict = {} 
+            strange_words_ids = set()
+            explanatory_words_ids = set()
+            bold_word = set()
+            italics_word = set()
+            italics_index_dict = {}
 
             for i in json_data['StrangeWords']:
                 strange_words_ids.add(i['MeanId'])
                 bold_word.add(i['Spell'])
                 bold_word.update([change_word['Spell'] for change_word in i['WordChanges']])
-            for italics_index,ii in enumerate(a['ExplanatoryWords'], start=1):
+            for italics_index, ii in enumerate(a['ExplanatoryWords'], start=1):
                 explanatory_words_ids.add(ii['MeaningId'])
                 italics_word.add(ii['Spell'])
                 if 'WordChanges' in ii:
                     italics_word.update([change_word['Spell'] for change_word in ii['WordChanges']])
-                    italics_index_dict.update({change_word['Spell']:f"[{italics_index}]" for change_word in ii['WordChanges']})
-               
+                    italics_index_dict.update({change_word['Spell']: f"[{italics_index}]" for change_word in ii['WordChanges']})
+
                 italics_index_dict[ii['MeaningId']] = f"[{italics_index}]"
                 italics_index_dict[ii['Spell']] = f"[{italics_index}]"
 
-           
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
-               
-                subject = candidates['Subject'] + '\n' 
+
+                subject = candidates['Subject'] + '\n'
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. ' 
-                    participle = s['Participle'] 
+                    single_select_text += s['Label'] + '. '
+                    participle = s['Participle']
                     if participle:
                         single_select_text += participle + ' \n'
                     else:
@@ -513,14 +470,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
-           
             all_select_text = "\n".join(select_text)
 
-           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n" + all_select_text
             article_main_list = split_text_to_word_punctuation(article_main)
 
-           
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']}\n [{i['SymbolsEn']}] [{i['SymbolsAm']}]\n {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -536,7 +490,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="阅读")
             tb2.set_tb_colum_width(width=[320, 140])
 
-           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
@@ -551,7 +504,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
-                else: 
+                else:
                     if w in bold_word:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif w in italics_word:
@@ -560,7 +513,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
-            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False,line_spacing=300)
+            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False, line_spacing=300)
 
             docx.add_blank_paragraph()
             tail_zhushi = """完成时间:_____点_____分_____秒,本篇用时:_____秒。"""
@@ -569,7 +522,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
         def top_header():
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "鲍利提分, 高效学习专家")
-           
+
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用斜线划掉,以便拍照报告给我们。\n",
@@ -590,27 +543,25 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     pp.add_run_to_p(t, size=10)
 
             docx.add_blank_paragraph()
+
         "---------------------开始单篇运行---------------------"
-        if index == 1: 
+        if index == 1:
             top_header()
         single_yuedu(index, article_single)
 
-
-   
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-       
+
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-       
+
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
-       
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle'] 
+            participle = s_candidates['Participle']
             if participle:
                 single_select_text += participle
             else:
@@ -619,14 +570,11 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{single_select_text}")
 
-       
         all_select_text = "\n".join(select_text)
 
-       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
-       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -657,7 +605,6 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
 
-       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -668,7 +615,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else: 
+                else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -678,18 +625,17 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_blank_paragraph()
 
     "判断题型;根据题型选择----------------------------"
-   
+
     all_article_length = 0
 
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single) 
+        article_type_select[article_type](index, article_single)
 
-       
         article_length = article_single['AllWordAmount']
         all_article_length += article_length
 
@@ -703,53 +649,49 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_9(docx: Word, json_data, *args, **kwargs):
-    def wanxing(index,article_count, article_single):
+    def wanxing(index, article_count, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
 
-       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip() 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip()
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip()
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
-   
-    def reading(index,article_count, article_single):
+    def reading(index, article_count, article_single):
         """
         index : 外面传入,从1开始。如果只有
         """
-        all_analysis = '' 
-        all_difficult_sentences = [] 
+        all_analysis = ''
+        all_difficult_sentences = []
 
         chinese_article = article_single['Chinese']
 
-       
         questions = article_single['Questions']
         for ques_index, question_item in enumerate(questions, start=1):
-            analysis = question_item['Analysis'].strip("\n") 
-            abcd_label = '' 
+            analysis = question_item['Analysis'].strip("\n")
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label'].strip("\n")
 
-            new_line = "" if ques_index==len(questions) else "\n"
+            new_line = "" if ques_index == len(questions) else "\n"
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}{new_line}"
 
-       
-        if index!=article_count:
+        if index != article_count:
             all_analysis += '\n'
 
         docx.add_paragraph(f"Passage {index}", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True, size=16)
@@ -759,20 +701,19 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph(all_analysis, size=9)
 
-
-    def seven_to_five(index,article_count, article_single):
+    def seven_to_five(index, article_count, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = '' 
+        all_analysis = ''
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
-       
+
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'] 
-            abcd_label = '' 
+            analysis = question_item['Analysis']
+            abcd_label = ''
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']: 
+                if abcd_selected['IsRight']:
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
@@ -782,14 +723,14 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
     "判断题型;根据题型选择----------------------------"
-    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯","鲍利提分,你的智能教练")
+    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯", "鲍利提分,你的智能教练")
     articles = json_data['Articles']
     article_count = len(articles)
     for index, article_single in enumerate(articles, start=1):
-        article_type = article_single['Category'] 
+        article_type = article_single['Category']
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
-        article_type_select[article_type](index,article_count, article_single) 
+        article_type_select[article_type](index, article_count, article_single)
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
     docx.add_page_section()
@@ -837,20 +778,15 @@ def section_10(docx: Word, json_data, scanpage_format, *args, **kwargs):
 
 @time_use
 def two_check_page(docx: Word, json_data, *args, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
-        page_sub_title = "词汇训练" 
+        page_sub_title = "词汇训练"
 
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
 
-       
-       
-       
-
         tb.set_tb_colum_width(0, 100)
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
@@ -862,7 +798,7 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2) 
+        half_count = int(len(word_data_list) / 2)
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -875,14 +811,13 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
-        page_sub_title = "词汇训练" 
+        page_sub_title = "词汇训练"
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
@@ -914,73 +849,56 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
             cell3 = f"{spell2}" if spell2 else ""
             cell4 = f"{total_row + row + 1}. {meaning2}" if meaning2 else ""
 
-            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4] 
+            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9], alignment=['right', 'left', 'right', 'left'])
 
-       
         tb.set_row_height(13.8)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
 
         docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                            font_name="仿宋", dq=5)
 
-
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "") 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
     article_type = 1
     try:
-        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
+        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
     except Exception as e:
         log_err_e(e, "学案类型不存在就取1,词汇突击里面只有阅读理解")
 
-   
-
     """---------------------------------------------------------------------------------"""
     screening_scanPages = json_data['ScreeningScanPages']
     for index, page in enumerate(screening_scanPages, start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
-       
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
         word_data_list1 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
-       
-       
-       
-
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
-       
-        if index!=len(screening_scanPages):
-            pass 
-        docx.add_page_break()
 
+        if index != len(screening_scanPages):
+            pass
+        docx.add_page_break()
 
 
 def old_two_check_page(docx: Word, json_data, **kwargs):
-   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("") 
+            word_data_list.append("")
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[140, 100, 100])
 
-       
-       
-       
-
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -991,7 +909,7 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2) 
+        half_count = int(len(word_data_list) / 2)
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -1004,21 +922,16 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break() 
+        docx.add_page_break()
 
-   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""]) 
+            word_data_list2.append(["", ""])
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
 
-       
-       
-       
-
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8)
         tb.set_cell_text(0, 2, f"{page_id}", border=False, size=16, dh=2, bold=True, font_name="黑体")
@@ -1051,13 +964,13 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
-            data = [cell1,cell2, cell3, cell4] 
+            data = [cell1, cell2, cell3, cell4]
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6, first_row_h=6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1: 
+        if article_type == 1:
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -1065,51 +978,45 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
 
-   
-    student_name = json_data.get("StudentInfo").get("StudentName", '') 
-    class_name = json_data.get("StudentInfo").get("ClassName", '') 
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
-    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
+    student_name = json_data.get("StudentInfo").get("StudentName", '')
+    class_name = json_data.get("StudentInfo").get("ClassName", '')
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
 
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
-       
         if index >= 2:
             docx.add_page_break()
 
-        page_title = page['Title'] 
-        page_sub_title = page['SubTitle'] 
-        foot_description = page['FootDescription'] 
-        foot_description2 = page['FootDescription2'] 
+        page_title = page['Title']
+        page_sub_title = page['SubTitle']
+        foot_description = page['FootDescription']
+        foot_description2 = page['FootDescription2']
 
         word_data_list1 = []
         word_data_list2 = []
 
-       
-        item_list:list = page['FilterTable']['Items']
+        item_list: list = page['FilterTable']['Items']
         item_count = len(item_list)
-        if item_count<100:
-            item_list.extend([{"Spell":"","Meaning":""} for _ in range(100-item_count)])
+        if item_count < 100:
+            item_list.extend([{"Spell": "", "Meaning": ""} for _ in range(100 - item_count)])
 
-        for i in page['FilterTable']['Items']: 
+        for i in page['FilterTable']['Items']:
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
-       
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
-       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 @time_use
 def other(docx, json_data, *args, **kwargs):
-   
     sections = docx.doc.sections
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
@@ -1127,21 +1034,20 @@ def other(docx, json_data, *args, **kwargs):
 
 
 def start_make_word(json_data, document_format, scanpage_format):
-    parent_path = "make_docx_demo/file_result/" 
+    parent_path = "make_docx_demo/file_result/"
     if not os.path.exists(parent_path):
         os.makedirs(parent_path)
     try:
-        exercise_id = json_data['ExerciseId'] 
+        exercise_id = json_data['ExerciseId']
 
-       
         docx = Word(save_file_name=f"{parent_path}{exercise_id}.docx",
                     start_template_name="make_docx_demo/word_component/start_template.docx")
-       
+
         section_1(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-       
+
         section_4(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-       
-        for exercise_json in json_data['WordAndArticleContents']: 
+
+        for exercise_json in json_data['WordAndArticleContents']:
             section_4_1(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_5(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_6(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
@@ -1149,9 +1055,9 @@ def start_make_word(json_data, document_format, scanpage_format):
             section_9(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
 
         if scanpage_format == 1:
-           
+
             two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-           
+
             old_two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
         elif scanpage_format == 2:
             section_10(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
@@ -1178,6 +1084,5 @@ if __name__ == '__main__':
     t = time.time()
     os.chdir("..")
 
-   
     start_make_word(test_json1, 1, 1)
     print(time.time() - t)

+ 7 - 4
make_docx_demo/new_word2pdf.py

@@ -12,30 +12,33 @@ import pythoncom
 def convert_word_to_pdf(input_file):
     output_file = input_file.replace('.docx', '.pdf')
     word = win32com.client.Dispatch("Word.Application")
-    word.Visible = False 
+    word.Visible = False
     doc = word.Documents.Open(input_file)
-    doc.SaveAs(output_file, FileFormat=17) 
+    doc.SaveAs(output_file, FileFormat=17)
     doc.Close()
     word.Quit()
 
+
 def convert_word_to_pdf2(input_file):
     pythoncom.CoInitialize()
     convert(input_file)
     pythoncom.CoUninitialize()
 
+
 if __name__ == '__main__':
     import os
+
     files = os.listdir(r"C:\Users\86131\Desktop\回收\潘资料")
     print(files)
 
     t = time.time()
     p_lists = []
     for file in files:
-        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\"+file,))
+        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\" + file,))
         p1.start()
         p_lists.append(p1)
 
     for p in p_lists:
         p.join()
 
-    print(time.time() - t)
+    print(time.time() - t)

+ 5 - 20
make_docx_demo/word2pdf.py

@@ -14,24 +14,25 @@ def convert_word_to_pdf(pdf_name):
             ll.acquire()
             print('加锁,进入转pdf')
             pythoncom.CoInitialize()
-            convert(f'{pdf_name}.docx') 
+            convert(f'{pdf_name}.docx')
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
                 time.sleep(0.5)
             break
         except Exception as ee:
-           
+
             print(ee)
         finally:
             pythoncom.CoUninitialize()
             print('解锁,转pdf完成')
-            ll.release() 
+            ll.release()
+
 
 def convert_word_to_pdf2(pdf_name):
     for cccou in range(3):
         try:
-            convert(f'{pdf_name}.docx') 
+            convert(f'{pdf_name}.docx')
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
@@ -44,20 +45,4 @@ def convert_word_to_pdf2(pdf_name):
 if __name__ == '__main__':
     import multiprocessing
 
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-   
-   
-   
-   
-   
     #
-   

+ 6 - 9
make_docx_demo/word_component/make_rectangle.py

@@ -1,15 +1,16 @@
 # -*- coding:utf-8 -*-
 from random import randint
 
+
 def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color, font_size, boder_color, chinese_font, english_font, dash,
-                   shape_type='rect',rotate_angle=0,behindDoc=0):
+                   shape_type='rect', rotate_angle=0, behindDoc=0):
     """
     rotate_angle:角度,顺时针30,60,90等
     behindDoc为0浮于文字上方,为1浮于文字下方"""
     if x > 600: x = 600
     if y > 800: y = 800
     font_size = font_size * 2
-    boder_size = boder_size * 12700 
+    boder_size = boder_size * 12700
     dash_elem = '<a:prstDash val="dash"/>' if dash else ''
 
     idid = randint(1, 99999)
@@ -26,9 +27,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         boder = f"""<a:lnRef idx="2"><a:srgbClr val="{boder_color}"/></a:lnRef>"""
         noboder = f"""<a:ln w="{boder_size}"><a:srgbClr val="{boder_color}"/>{dash_elem}</a:ln>"""
     else:
-       
-       
-       
+
         boder = """<a:lnRef idx="2"><a:noFill/></a:lnRef>"""
         noboder = """<a:ln w="12700"><a:noFill/></a:ln>"""
 
@@ -63,7 +62,6 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
     else:
         insert_text_xml = ''
 
-   
     shape_geom_map = {
         'rect': '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>',
         'circle': '<a:prstGeom prst="ellipse"><a:avLst/></a:prstGeom>',
@@ -82,7 +80,6 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         'arc': '<a:prstGeom prst="arc"><a:avLst/></a:prstGeom>',
     }
 
-   
     shape_geom = shape_geom_map.get(shape_type, '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>')
 
     r = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@@ -151,7 +148,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 										<wps:wsp>
 											<wps:cNvSpPr/>
 											<wps:spPr>
-												<a:xfrm rot="{60000*rotate_angle}">
+												<a:xfrm rot="{60000 * rotate_angle}">
 													<a:off x="0"
 													       y="0"/>
 													<a:ext cx="1777593"
@@ -224,4 +221,4 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 		</w:sectPr>
 	</w:body>
 </w:document>"""
-    return r
+    return r

+ 93 - 63
mock/mock_request.py

@@ -1,40 +1,43 @@
 # -*- coding:utf-8 -*-
 #
-import json 
+import json
 import time
 from functools import wraps
-from random import shuffle,sample,randint
+from random import shuffle, sample, randint
 from threading import Thread
-from concurrent.futures import ThreadPoolExecutor,wait
+from concurrent.futures import ThreadPoolExecutor, wait
 
 import httpx
 import requests
 from pydantic import BaseModel
 from typing import List
 
+product_adress = "http://111.231.167.191"
+test_address = "http://111.231.167.191:8004"
+test_address2 = "http://111.231.167.191:8003"
 
-product_adress = "http://111.231.167.191" 
-test_address = "http://111.231.167.191:8004" 
-test_address2 = "http://111.231.167.191:8003" 
+local_adress = "http://127.0.0.1:9000"
 
-local_adress = "http://127.0.0.1:9000" 
+use_address = test_address
 
-use_address = local_adress 
 
 class DifficultSentence(BaseModel):
     english: str
     chinese: str
 
+
 class Candidate(BaseModel):
     label: str
     text: str
     isRight: int
 
+
 class Question(BaseModel):
     trunk: str
     analysis: str
     candidates: List[Candidate]
 
+
 class Article(BaseModel):
     difficultSentences: List[DifficultSentence]
     usedMeanIds: List[int]
@@ -43,20 +46,21 @@ class Article(BaseModel):
     chineseArticle: str
     allWordAmount: int
 
+
 class ArticleData(BaseModel):
     articles: List[Article]
 
 
 def time_use(fn):
     @wraps(fn)
-    def cc(*args, **kwargs): 
+    def cc(*args, **kwargs):
         f_time = time.time()
         res = fn(*args, **kwargs)
 
         cha = round(time.time() - f_time, 3)
         if cha > 0.1:
             print(f'函数:{fn.__name__} 一共用时', cha, '秒')
-        return res 
+        return res
 
     return cc
 
@@ -79,7 +83,7 @@ def get_article():
                  "student_stage": 1, "vocabulary": 700, "class_id": 123456}
 
     r = requests.post(f"{use_address}/article", json=json_data)
-   
+
     key = r.json()['key']
     time.sleep(120)
     query_file_content(key)
@@ -89,7 +93,7 @@ def query_file_content(key):
     json_data = {"key": key}
     try:
         r = requests.post(f"{use_address}/query_oss_file", json=json_data)
-        r.raise_for_status() 
+        r.raise_for_status()
         response_data = r.json()
         assert response_data['wordCount'] > 0, "词数为0"
     except requests.RequestException as e:
@@ -104,9 +108,9 @@ def query_file_content(key):
 
 def get_audio():
     word = "cat"
-    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0}) 
-    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2}) 
-    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1}) 
+    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0})
+    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2})
+    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1})
     assert r1.json()['code'] == 200
     assert r2.json()['code'] == 200
     assert r3.status_code == 200
@@ -176,23 +180,43 @@ def get_article2_1():
                        {'spell': 'perception', 'meaning': '观念, 知觉, 觉察', 'word_id': 1174551, 'meaning_id': 3516, 'serial': 2749},
                        {'spell': 'violation', 'meaning': '妨碍, 侵犯, 违犯', 'word_id': 1174695, 'meaning_id': 4452, 'serial': 3528},
                        {'spell': 'convey', 'meaning': '表达', 'word_id': 830280, 'meaning_id': 4931, 'serial': 3938},
-                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
+                       {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063},
+                       {'spell': 'carry', 'meaning': '携带', 'word_id': 803106, 'meaning_id': 460, 'serial': 313},
+                       {'spell': 'area', 'meaning': '领域', 'word_id': 765328, 'meaning_id': 572, 'serial': 388},
+                       {'spell': 'lie', 'meaning': '说谎, 谎言', 'word_id': 963062, 'meaning_id': 602, 'serial': 409},
+                       {'spell': 'company', 'meaning': '陪伴', 'word_id': 822886, 'meaning_id': 642, 'serial': 433},
+                       {'spell': 'else', 'meaning': '别的, 另外的, 其他的', 'word_id': 869964, 'meaning_id': 654, 'serial': 443},
+                       {'spell': 'cover', 'meaning': '覆盖', 'word_id': 834220, 'meaning_id': 687, 'serial': 472},
+                       {'spell': 'effect', 'meaning': '引起', 'word_id': 866665, 'meaning_id': 709, 'serial': 486},
+                       {'spell': 'design', 'meaning': '设计, 计划', 'word_id': 848239, 'meaning_id': 714, 'serial': 490},
+                       {'spell': 'century', 'meaning': '世纪, 百年', 'word_id': 806994, 'meaning_id': 725, 'serial': 498},
+                       {'spell': 'above', 'meaning': '上面,之上,超过', 'word_id': 745232, 'meaning_id': 736, 'serial': 508},
+                       {'spell': 'sign', 'meaning': '手势, 符号, 签名', 'word_id': 1089428, 'meaning_id': 752, 'serial': 517},
+                       {'spell': 'remain', 'meaning': '保持不变', 'word_id': 1062570, 'meaning_id': 774, 'serial': 530},
+                       {'spell': 'line', 'meaning': '线, 画线', 'word_id': 964670, 'meaning_id': 777, 'serial': 532},
+                       {'spell': 'likely', 'meaning': '有可能的', 'word_id': 964153, 'meaning_id': 781, 'serial': 534},
+                       {'spell': 'fail', 'meaning': '失败, 不及格', 'word_id': 882595, 'meaning_id': 787, 'serial': 540},
+                       {'spell': 'control', 'meaning': '控制, 支配, 操纵', 'word_id': 829355, 'meaning_id': 794, 'serial': 545},
+                       {'spell': 'power', 'meaning': '权力, 力量', 'word_id': 1038172, 'meaning_id': 800, 'serial': 549},
+                       {'spell': 'reply', 'meaning': '回答, 回应, 答辩', 'word_id': 1063609, 'meaning_id': 817, 'serial': 563},
+                       {'spell': 'unless', 'meaning': '除非, 如果不', 'word_id': 1150079, 'meaning_id': 822, 'serial': 567},
+                       {'spell': 'offer', 'meaning': '提出, 提供', 'word_id': 1005291, 'meaning_id': 824, 'serial': 569}
                        ]
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list,5)
+    core_words_chiose_list = sample(core_words_list, 15)
     json_data = {'core_words': core_words_chiose_list,
-                 'take_count': 8, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100,999),
-                 "article_length": 120, "reading_level": 5}
+                 'take_count': 1, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100, 999),
+                 "article_length": 220, "reading_level": 25}
 
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
     r_json = r.json()
-    print(r_json)
     try:
         return r_json
     except Exception as e:
         print("春笋文章reading-comprehension错误", e)
         print("错误数据", r_json)
 
+
 @time_use
 def get_article2_2():
     """测试通过requests来直接访问openai"""
@@ -242,7 +266,7 @@ def get_article2_2():
                        {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
                        ]
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list,5)
+    core_words_chiose_list = sample(core_words_list, 15)
     core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words_chiose_list])
 
     question = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
@@ -251,38 +275,54 @@ def get_article2_2():
 1.必须用提供的这个词义的单词,其他单词使用最简单最容易没有难度的单词。文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。选择题难度尽可能简单,参考中国小学生水平
 2.优先保证文章语句通顺,意思不要太生硬。不要为了使用特定的单词,造成文章语义前后不搭,允许不使用个别词义。
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
-4.生成的文章要求120词左右,可以用\\n\\n字符分段,一般1-2个段落左右。第一段是文章标题。
+4.生成的文章要求320词左右,可以用\\n\\n字符分段,一般3-4个段落左右。第一段是文章标题。
 5.允许不使用[单词组]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
 6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
 提供[单词组]:{core_words_meaning_str}
 """
 
-    url = "http://170.106.108.95/v1/chat/completions"
+    url = 'http://170.106.108.95/v1/chat/completions'
 
-   
     headers = {
         "Authorization": f"Bearer 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",
         "Content-Type": "application/json"
     }
 
-   
     data = {
-        "model": "gpt-4.1", 
+        "model": "gpt-4.1",
         "messages": [
-           
+
             {"role": "user", "content": question}
         ],
-        "max_tokens": 4000, 
-        "temperature": 1.2, 
-        "n":8,
-        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'}, 'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}}, 'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'}, 'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}}, 'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'}, 'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'}, 'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'}, 'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'}, 'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions', 'englishArticle', 'chineseArticle', 'allWordAmount'], 'title': 'Article', 'type': 'object'}}}
+        "max_tokens": 8000,
+        "temperature": 1.2,
+        "n": 4,
+        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {
+            'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'},
+                           'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'],
+            'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {
+            'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}},
+            'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {
+            'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'},
+                           'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}},
+            'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {
+            'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'},
+            'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'},
+            'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'},
+            'englishArticle': {'title': 'Englisharticle', 'type': 'string'},
+            'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'},
+            'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions',
+                                                                                          'englishArticle', 'chineseArticle',
+                                                                                          'allWordAmount'], 'title': 'Article',
+                                                                                                 'type': 'object'}}}
     }
 
-   
-    response = httpx.post(url, headers=headers, json=data,timeout=300)
-    print(response.json())
-    return response.json()
+    try:
+        response = httpx.post(url, headers=headers, json=data, timeout=300)
+        return response.json()
+    except Exception as e:
+        print(f"错误:{type(e).__name__}: {e}")
 
 
 def download_word():
@@ -333,42 +373,32 @@ def get_article3():
 
 @time_use
 def run_all_test_cese():
-    test_connect() 
-
-    get_audio() 
-    spoken_language() 
-    download_word() 
-    print(get_article2_1()) 
+    test_connect()
 
-   
-   
+    get_audio()
+    spoken_language()
+    download_word()
+    get_article2_1()
 
 
 @time_use
 def multi_request():
-    with ThreadPoolExecutor(max_workers=50) as executor:
-       
-        futures = [executor.submit(get_article2_1) for _ in range(30)]
-       
+    with ThreadPoolExecutor(max_workers=150) as executor:
+
+        futures = [executor.submit(get_article2_1) for _ in range(100)]
 
-       
         wait(futures)
         print("完成等待")
-        for index,future in enumerate(futures,start=1):
-            future.result() 
-            print(f"完成循环{index}")
-
+        f = open("result.txt", 'w', encoding='utf-8')
 
-if __name__ == '__main__':
-   
-    multi_request()
-
-   
-   
+        try:
+            for index, future in enumerate(futures, start=1):
+                f.write(str(future.result()) + '\n')
+        except Exception as e:
+            print(f"错误:{type(e).__name__}: {e}")
+        finally:
+            f.close()
 
-   
 
-   
-   
-
-   
+if __name__ == '__main__':
+    print(get_article2_1())

+ 2 - 1
spoken_language/common/utils.py

@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 import sys
 
+
 def is_python3():
     if sys.version > '3':
         return True
-    return False
+    return False

+ 4 - 7
spoken_language/read_config.py

@@ -1,20 +1,17 @@
 # -*- coding:utf-8 -*-
 import yaml
 
+
 def read_config(parent_dir="."):
-   
-    with open(parent_dir +"/config/tencent_config.yaml", "r",encoding="utf-8") as file:
+    with open(parent_dir + "/config/tencent_config.yaml", "r", encoding="utf-8") as file:
         config = yaml.safe_load(file)
         return config
-   
-   
-
 
 
 if __name__ == '__main__':
-    import sys,os
+    import sys, os
 
     current_dir = os.path.dirname(os.path.abspath(__file__))
     parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
     sys.path.append(parent_dir)
-    print(read_config(parent_dir))
+    print(read_config(parent_dir))

+ 8 - 9
spoken_language/soe/speaking_assessment.py

@@ -13,6 +13,7 @@ import uuid
 from urllib.parse import quote
 from tools.loglog import logger
 
+
 def is_python3():
     if sys.version > '3':
         return True
@@ -104,7 +105,7 @@ class SpeakingAssessment:
 
     def set_text_mode(self, text_mode):
         self.text_mode = text_mode
-    
+
     def set_rec_mode(self, rec_mode):
         self.rec_mode = rec_mode
 
@@ -210,7 +211,7 @@ class SpeakingAssessment:
 
     def start(self):
         def on_message(ws, message):
-           
+
             response = json.loads(message)
             response['voice_id'] = self.voice_id
             if response['code'] != 0:
@@ -222,7 +223,7 @@ class SpeakingAssessment:
                 self.status = FINAL
                 self.result = message
                 self.listener.on_recognition_complete(response)
-               
+
                 self.ws.close()
                 return
             else:
@@ -238,9 +239,8 @@ class SpeakingAssessment:
                          (format(error), self.voice_id))
             self.status = ERROR
 
-        def on_close(ws,close_status_code, close_msg):
-           
-           
+        def on_close(ws, close_status_code, close_msg):
+
             self.status = CLOSED
             logger.info("websocket closed  voice id %s" %
                         self.voice_id)
@@ -256,13 +256,13 @@ class SpeakingAssessment:
         signstr = self.format_sign_string(query)
         autho = self.sign(signstr, self.credential.secret_key)
         requrl = self.create_query_string(query_arr)
-       
+
         if is_python3():
             autho = urllib.parse.quote(autho)
         else:
             autho = urllib.quote(autho)
         requrl += "&signature=%s" % autho
-       
+
         self.ws = websocket.WebSocketApp(requrl, None,
                                          on_error=on_error, on_close=on_close, on_message=on_message)
         self.ws.on_open = on_open
@@ -272,4 +272,3 @@ class SpeakingAssessment:
         self.status = STARTED
         response = {'voice_id': self.voice_id}
         self.listener.on_recognition_start(response)
-       

+ 19 - 35
spoken_language/soeexample.py

@@ -11,9 +11,8 @@ from spoken_language.soe import speaking_assessment
 
 from spoken_language.read_config import read_config
 
-
 config_data = read_config()
-app_id,secret_id,secret_key= config_data['appId'],config_data['SecretId'],config_data['SecretKey']
+app_id, secret_id, secret_key = config_data['appId'], config_data['SecretId'], config_data['SecretKey']
 
 APPID = app_id
 SECRET_ID = secret_id
@@ -31,33 +30,25 @@ class MySpeechRecognitionListener(speaking_assessment.SpeakingAssessmentListener
 
     def on_recognition_start(self, response):
         pass
-       
-       
 
     def on_intermediate_result(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
-       
-       
 
     def on_recognition_complete(self, response):
         global spoken_result
         spoken_result[self.id] = response
-       
-       
-       
 
     def on_fail(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
-       
-       
+
 
 def process(id):
     audio = r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3"
     listener = MySpeechRecognitionListener(id)
-   
+
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text("anyway")
     recognizer.set_eval_mode(0)
@@ -71,36 +62,34 @@ def process(id):
             while content:
                 recognizer.write(content)
                 content = f.read(SLICE_SIZE)
-                #sleep模拟实际实时语音发送间隔
-               
-               
+                # sleep模拟实际实时语音发送间隔
+
                 time.sleep(0.2)
     except Exception as e:
         print(e)
     finally:
         recognizer.stop()
 
-def process_rec(task_id,audio_path,audio_text,audio_binary=None):
+
+def process_rec(task_id, audio_path, audio_text, audio_binary=None):
     audio = audio_path
     listener = MySpeechRecognitionListener(task_id)
-   
+
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text(audio_text)
     recognizer.set_eval_mode(1)
     recognizer.set_keyword("")
     recognizer.set_sentence_info_enabled(0)
     recognizer.set_voice_format(2)
-   
-   
+
     recognizer.set_rec_mode(1)
     try:
         recognizer.start()
-        if audio_binary: 
-           
-           
+        if audio_binary:
+
             recognizer.write(audio_binary)
         else:
             with open(f"{task_id}.mp3", 'rb') as f:
@@ -123,18 +112,17 @@ def process_multithread(number):
         thread.join()
 
 
-def make_spoken(task_id,audio_url,audio_content,audio_text):
-
+def make_spoken(task_id, audio_url, audio_content, audio_text):
     if audio_url:
         print("有url,应该去下载mp3文件")
-       
+
         r = requests.get(audio_url)
         audio_content = r.content
     else:
-        with open(f"{task_id}.mp3",'wb') as f:
+        with open(f"{task_id}.mp3", 'wb') as f:
             f.write(audio_content)
 
-    process_rec(task_id,audio_path=f"",audio_text=audio_text,audio_binary=audio_content)
+    process_rec(task_id, audio_path=f"", audio_text=audio_text, audio_binary=audio_content)
     global spoken_result
     for _ in range(60):
         if task_id in spoken_result:
@@ -146,10 +134,6 @@ def make_spoken(task_id,audio_url,audio_content,audio_text):
         time.sleep(0.5)
     return None
 
-if __name__ == "__main__":
 
-   
-   
-   
-    process_rec(0,r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3","You must study to be frank with the world apple")
-   
+if __name__ == "__main__":
+    process_rec(0, r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3", "You must study to be frank with the world apple")

+ 5 - 11
tools/ali_log.py

@@ -28,18 +28,21 @@ logstore_index = {'line': {
 from_time = int(time.time()) - 3600
 to_time = time.time() + 3600
 
+
 def create_project():
     print("ready to create project %s" % project_name)
     client.create_project(project_name, project_des="")
     print("create project %s success " % project_name)
     time.sleep(60)
 
+
 def create_logstore():
     print("ready to create logstore %s" % logstore_name)
     client.create_logstore(project_name, logstore_name, ttl=3, shard_count=2)
     print("create logstore %s success " % project_name)
     time.sleep(30)
 
+
 def create_index():
     print("ready to create index for %s" % logstore_name)
     index_config = IndexConfig()
@@ -48,7 +51,8 @@ def create_index():
     print("create index for %s success " % logstore_name)
     time.sleep(60 * 2)
 
-def put_logs(msg:str):
+
+def put_logs(msg: str):
     log_group = []
 
     log_item = LogItem()
@@ -61,7 +65,6 @@ def put_logs(msg:str):
     client.put_logs(request)
 
 
-
 def get_logs():
     print("ready to query logs from logstore %s" % logstore_name)
     request = GetLogsRequest(project_name, logstore_name, from_time, to_time, query=query)
@@ -73,13 +76,4 @@ def get_logs():
 
 
 if __name__ == '__main__':
-   
-   
-   
-   
-   
-   
-   
     put_logs("测试")
-   
-   

Fișier diff suprimat deoarece este prea mare
+ 26 - 51
tools/audio.py


+ 8 - 11
tools/del_expire_file.py

@@ -7,7 +7,7 @@ import datetime
 from time import sleep
 
 
-def del_file(folder_path,expired_days=10):
+def del_file(folder_path, expired_days=10):
     """
     删除文件夹内过时的文件
     folder_path: 需要删除过期文件的文件夹
@@ -18,31 +18,28 @@ def del_file(folder_path,expired_days=10):
         print("文件夹不存在")
         return None
 
-   
     now = datetime.datetime.now()
 
-   
     for filename in os.listdir(folder_path):
         file_path = os.path.join(folder_path, filename)
-       
+
         if os.path.isfile(file_path):
-           
+
             create_time = os.path.getctime(file_path)
             create_date = datetime.datetime.fromtimestamp(create_time)
-           
+
             delta = now - create_date
-           
+
             if delta.days > expired_days:
                 os.remove(file_path)
-               
 
 
 def run_del_normal():
     """这是小程序项目内的正常删除机制"""
     while True:
-        del_file("make_docx_demo/file_result",expired_days=15)
-        sleep(3600*24)
+        del_file("make_docx_demo/file_result", expired_days=15)
+        sleep(3600 * 24)
 
 
 if __name__ == '__main__':
-    run_del_normal()
+    run_del_normal()

+ 20 - 16
tools/loglog.py

@@ -5,15 +5,16 @@ from loguru import logger
 from threading import Lock
 from tools.ali_log import put_logs
 
+
 class SimpleLogger:
     """
     简易日志:存放几乎没用的大量gpt日志
     """
 
     def __init__(self, base_file_name: str = "ai_log"):
-        self.base_file_name = "log/" + base_file_name 
+        self.base_file_name = "log/" + base_file_name
         self.file_ext = ".txt"
-        self.max_size = 10 * 1024 * 1024 
+        self.max_size = 10 * 1024 * 1024
         self.current_file = self._get_current_file()
         self.lock = Lock()
         if not os.path.exists("log"):
@@ -37,7 +38,7 @@ class SimpleLogger:
             with open(f"{self.base_file_name}_1{self.file_ext}", "a", encoding="utf-8") as log_file:
                 log_file.write(f"Hello World\n")
 
-    def log(self, message:str, level="INFO"):
+    def log(self, message: str, level="INFO"):
         """记录日志到文件"""
         self._check_file_size()
         date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
@@ -45,28 +46,30 @@ class SimpleLogger:
             with open(self.current_file, "a", encoding="utf-8") as log_file:
                 log_file.write(f"{date_time} 【{level}】 {str(message)}\n\n")
 
-    def info(self, message:str):
+    def info(self, message: str):
         """记录INFO级别的日志"""
         self.log(message, "INFO")
 
-    def warning(self, message:str):
+    def warning(self, message: str):
         """记录WARNING级别的日志"""
         self.log(message, "WARNING")
 
-    def error(self, message:str):
+    def error(self, message: str):
         """记录ERROR级别的日志"""
         message = "\n" + "-" * 20 + "\n" + message + "\n" + "-" * 20
         self.log(message, "ERROR")
 
-    def debug(self, message:str):
+    def debug(self, message: str):
         """记录DEBUG级别的日志"""
         self.log(message, "DEBUG")
 
-logger.remove(handler_id=None) 
+
+logger.remove(handler_id=None)
 logger.add('log/log.log', level="INFO", rotation="5 MB", encoding="utf-8", retention="7 days")
 logger.add('log/error.log', level="ERROR", rotation="5 MB", encoding="utf-8", retention="7 days")
 simple_logger = SimpleLogger()
 
+
 def exception_handler(func):
     def wrapper(*args, **kwargs):
         try:
@@ -75,31 +78,32 @@ def exception_handler(func):
             logger.error(f"{type(e).__name__}: {e}")
             traceback_str = traceback.format_exc()
             logger.error(f"错误追溯:{traceback_str}")
+
     return wrapper
 
-def log_err_e(e:Exception,msg=None):
+
+def log_err_e(e: Exception, msg=None):
     if msg:
         logger.error(f"{msg}{type(e).__name__}:{e}")
     traceback_str = traceback.format_exc()
     logger.error(traceback_str)
 
+
 class AliyunLogHandler:
     @staticmethod
     def write(message):
         put_logs(message)
 
+
 if os.getenv("env") != "development":
     print("这是正式环境,加载阿里云日志")
     aliyun_log_handler = AliyunLogHandler()
-    logger.add(aliyun_log_handler, enqueue=True) 
-
+    logger.add(aliyun_log_handler, enqueue=True)
 
 if __name__ == '__main__':
-   
-   
-   
     #
-   
+
     import os
+
     os.chdir("..")
-    logger.error("test信息0123456789.*/-")
+    logger.error("test信息0123456789.*/-")

+ 12 - 37
tools/new_mysql.py

@@ -53,7 +53,7 @@ class MySQLUploader:
 
     def execute_(self, query, params=None):
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             try:
                 if params:
@@ -76,7 +76,7 @@ class MySQLUploader:
     def bulk_insert(self, query, data_list):
         """执行批量插入"""
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             try:
                 cursor.executemany(query, data_list)
@@ -84,7 +84,7 @@ class MySQLUploader:
                 return True
             except pymysql.MySQLError as e:
                 logger.warning(f"可忽略的错误 bulk_insert数据库批量插入错误{type(e).__name__}:{e}")
-                conn.rollback() 
+                conn.rollback()
                 time.sleep(0.5)
             finally:
                 cursor.close()
@@ -96,14 +96,14 @@ class MySQLUploader:
     def query_data(self, query, params=None):
         """执行查询并返回结果"""
         for _ in range(3):
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
             try:
                 if params:
                     cursor.execute(query, params)
                 else:
                     cursor.execute(query)
-               
+
                 results = cursor.fetchall()
                 return results
             except pymysql.MySQLError as e:
@@ -116,26 +116,24 @@ class MySQLUploader:
         logger.critical(f"query_data 3次没成功.{query} {params}")
         return False
 
-    def execute_sql_file(self,script_file_path):
+    def execute_sql_file(self, script_file_path):
         """执行sql脚本:传入路径或者sql路径都可以"""
+
         def execute_file(path):
 
-           
             with open(path, 'r', encoding='utf-8') as file:
                 sql_script = file.read()
 
-            conn = self._pool.connection() 
+            conn = self._pool.connection()
             cursor = conn.cursor()
-           
-           
+
             cursor.execute(sql_script)
-           
+
             conn.commit()
 
             cursor.close()
             conn.close()
 
-        
         if os.path.isdir(script_file_path):
             for file in os.listdir(script_file_path):
                 execute_file(script_file_path + "\\" + file)
@@ -143,38 +141,15 @@ class MySQLUploader:
             if script_file_path.endswith(".sql"):
                 execute_file(script_file_path)
 
-    def close_connection(self):...
-
+    def close_connection(self):
+        ...
 
 
 if __name__ == '__main__':
-
     m = MySQLUploader()
     s = "select Id,BritishPronunciation from dictionary_word where wordspelling = %s"
     r = m.query_data(s, ("sky",))
     print(r)
     input()
 
-   
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-
-   
-   
-   
-   
-   
-   
-   
-   
     #
-   
-   

+ 29 - 41
tools/sql_format.py

@@ -8,37 +8,34 @@ from core.respone_format import *
 class CRUD:
     def __init__(self):
         self.m = MySQLUploader()
-        self.people_place_name = [] 
+        self.people_place_name = []
         self.get_people_place_name()
 
     def get_word_by_wordid(self, wordid):
         s = "select WordSpelling from dictionary_word where Id = %s"
         r = self.m.query_data(s, (wordid,))
         if r:
-           
             word = r[0][0]
             return word
         return None
 
-    def get_wordid_by_wordspelling(self, wordspelling,auto_insert=False):
+    def get_wordid_by_wordspelling(self, wordspelling, auto_insert=False):
         """加一个功能。大字典内没有这个单词就自动插入,返回id。auto_insert为真,自动插入大字典,获取其id"""
         s = "select Id from dictionary_word where wordspelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         if r:
-           
             wordid = r[0][0]
             return wordid
 
         if auto_insert:
             s = "insert into dictionary_word (WordSpelling) VALUES (%s);"
-            self.m.execute_(s,(wordspelling,))
+            self.m.execute_(s, (wordspelling,))
             s = "select Id from dictionary_word where wordspelling = %s"
             r = self.m.query_data(s, (wordspelling,))
             wordid = r[0][0]
             return wordid
 
-   
-    def get_exchange_prototype(self,wordspelling):
+    def get_exchange_prototype(self, wordspelling):
         s = "select Word from dictionary_exchange where Word = %s"
         r = self.m.query_data(s, (wordspelling,))
         if r:
@@ -53,37 +50,31 @@ class CRUD:
         r = self.m.query_data(s, (wordid,))
         return r
 
-   
     def get_people_place_name(self):
         s2 = "select word from people_place_name"
         r = self.m.query_data(s2)
         for i in r:
             self.people_place_name.append(i[0])
 
-
-   
     def get_word_meaning_by_wordspelling(self, wordspelling, frequency):
         """根据单词获取其全部词义"""
-       
+
         wordid = self.get_wordid_by_wordspelling(wordspelling)
 
-       
         return_data = {"word_id": wordid, "frequency": frequency, "word": wordspelling,
-                       "meanings": {"default": [], "sun_english": {"name": "春笋英语", "items": []}, "oxford": {"name": "牛津", "items": []}}}
+                       "meanings": {"default": [], "sun_english": {"name": "春笋英语", "items": []},
+                                    "oxford": {"name": "牛津", "items": []}}}
 
-       
         s = "select Id,WordMeaning from dictionary_meaningitem where WordSpelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         for row_data in r:
             return_data["meanings"]["default"].append({"id": row_data[0], "text": row_data[1]})
 
-       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_spring_bamboo where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
             return_data["meanings"]["sun_english"]["items"].append({"id": row_data[0], "text": row_data[1]})
 
-       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_oxford where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
@@ -97,8 +88,7 @@ class CRUD:
         logger.info(f"根据词义id删除,{wordmeaningid}。结果{r}")
         return True if r is True else False
 
-
-    def get_word_all_info(self,word_id, spell,frequency):
+    def get_word_all_info(self, word_id, spell, frequency):
         def get_associational_words_info(word_meaning_id) -> list:
             return_data = []
             s = "select Id,BaseWordMeaningId,BaseWord,BaseWordMeaning,AssociationalWord,AssociationalWordMeaningId,AssociationalWordMeaning," \
@@ -110,12 +100,15 @@ class CRUD:
 
             for single_meaning in r:
                 associational_id, base_word_meaning_id, base_word, base_word_meaning, associational_word, \
-                associational_word_meaning_id, associational_word_meaning, association_reason,\
-                reverse_association_reason, created_time, updated_time = single_meaning
-                r_data = {"id":associational_id,"base_word":{"word":base_word,"meaning_id":base_word_meaning_id,"meaning":base_word_meaning},
-                          "associational_word":{"word":associational_word,"meaning_id":associational_word_meaning_id,"meaning":associational_word_meaning},
-                          "association_reason":association_reason,"reverse_association_reason":reverse_association_reason,
-                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                    associational_word_meaning_id, associational_word_meaning, association_reason, \
+                    reverse_association_reason, created_time, updated_time = single_meaning
+                r_data = {"id": associational_id,
+                          "base_word": {"word": base_word, "meaning_id": base_word_meaning_id, "meaning": base_word_meaning},
+                          "associational_word": {"word": associational_word, "meaning_id": associational_word_meaning_id,
+                                                 "meaning": associational_word_meaning},
+                          "association_reason": association_reason, "reverse_association_reason": reverse_association_reason,
+                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'),
+                          "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
 
             return return_data
@@ -129,8 +122,9 @@ class CRUD:
                 return return_data
             for single_phrase in r:
                 phrase_id, phrase_spelling_text, phrase_chinese_translation, from_type, created_time, updated_time = single_phrase
-                r_data = {"id":phrase_id,"english":phrase_spelling_text,"chinese":phrase_chinese_translation,"from":from_type,
-                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                r_data = {"id": phrase_id, "english": phrase_spelling_text, "chinese": phrase_chinese_translation, "from": from_type,
+                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'),
+                          "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
 
             return return_data
@@ -143,24 +137,24 @@ class CRUD:
             if not r:
                 return return_data
             for single_exchange in r:
-                exchange_id,spell,properties,description = single_exchange
+                exchange_id, spell, properties, description = single_exchange
                 r_data = {"id": exchange_id, "spell": spell, "properties": properties, "description": description}
                 return_data.append(r_data)
 
             return return_data
 
-        return_data_all = {"word_id":word_id,"spell":spell,"frequency":frequency,"meanings":[],"exchanges":[]}
+        return_data_all = {"word_id": word_id, "spell": spell, "frequency": frequency, "meanings": [], "exchanges": []}
         if spell in self.people_place_name:
             return_data_all["type"] = "人名地名"
         return_data_all["type"] = "一般词汇"
 
         s = "select Id,WordMeaning,OperateAccount from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
-        if not r: 
+        if not r:
             return resp_200(data=return_data_all)
         for single_meaning in r:
             meaning_id, word_meaning, operate_account = single_meaning
-            single_meaning_item = {"id":meaning_id,"text":word_meaning,"editor":operate_account}
+            single_meaning_item = {"id": meaning_id, "text": word_meaning, "editor": operate_account}
             associational_words_list = get_associational_words_info(meaning_id)
             single_meaning_item["associational_words"] = associational_words_list
             phrases_list = get_phrases_info(meaning_id)
@@ -172,7 +166,7 @@ class CRUD:
 
         return resp_200(data=return_data_all)
 
-    def delete_associational_word(self,word_id,associational_id):
+    def delete_associational_word(self, word_id, associational_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         if not r:
@@ -181,8 +175,8 @@ class CRUD:
 
         s = "select BaseWordMeaningId from dictionary_associational_word where Id = %s"
         r = self.m.query_data(s, (associational_id,))
-       
-        if r and r[0][0]==meaning_id:
+
+        if r and r[0][0] == meaning_id:
             s = "DELETE FROM dictionary_associational_word where Id = %s"
             r = self.m.execute_(s, (associational_id,))
             logger.info(f"根据联想词id删除,{associational_id}。结果{r}")
@@ -192,7 +186,7 @@ class CRUD:
             logger.info(f"删除联想词时,单词id与联想词id校验失败。{r} {meaning_id}")
             return resp_400(message="单词id与联想词id校验失败")
 
-    def delete_phrese_word(self,word_id,phrase_id):
+    def delete_phrese_word(self, word_id, phrase_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         if not r:
@@ -201,7 +195,7 @@ class CRUD:
 
         s = "select WordMeaningId from dictionary_phrase where Id = %s"
         r = self.m.query_data(s, (phrase_id,))
-       
+
         if r and r[0][0] == meaning_id:
             s = "DELETE FROM dictionary_phrase where Id = %s"
             r = self.m.execute_(s, (phrase_id,))
@@ -225,7 +219,6 @@ class UserCRUD:
         s = "select id,account,password,uname,create_time from user where account = %s"
         r = self.m.query_data(s, (account,))
         if r:
-           
             user_info = (r[0][0], r[0][1], r[0][2], r[0][3], r[0][4].strftime('%Y-%m-%d %H:%M:%S'))
             return user_info
         return None
@@ -237,11 +230,6 @@ class UserCRUD:
 
 if __name__ == '__main__':
     crud = CRUD()
-   
-   
-   
-   
-   
 
     r = crud.get_wordid_by_wordspelling("abcdefg")
     print(type(r))

+ 1 - 2
tools/thread_pool_manager.py

@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 from concurrent.futures import ThreadPoolExecutor, wait
 
-
-pool_executor = ThreadPoolExecutor(max_workers=200)
+pool_executor = ThreadPoolExecutor(max_workers=200)

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff