Browse Source

增加文章校验

xie 2 weeks ago
parent
commit
f97cccd119
51 changed files with 1780 additions and 15848 deletions
  1. 9 5
      common/common_data.py
  2. 15 14
      common/split_text.py
  3. 4 4
      config/read_config.py
  4. 15 8
      core/api_article_annotation.py
  5. 22 22
      core/api_get_article.py
  6. 19 18
      core/api_get_article2.py
  7. 14 16
      core/api_get_article3.py
  8. 16 14
      core/api_get_audio.py
  9. 13 11
      core/api_get_spoken_language.py
  10. 14 7
      core/api_get_word.py
  11. 20 21
      core/api_routes_jwt.py
  12. 2 5
      core/respone_format.py
  13. 14 12
      data/get_all_exchange_words.py
  14. 8 8
      data/get_frequency_script.py
  15. 0 11995
      data/json_word_frequency.json
  16. 18 15
      deepseek/ds_api.py
  17. 46 27
      deepseek/get_article3.py
  18. 81 45
      gpt/article_annotation.py
  19. 27 79
      gpt/chatgpt.py
  20. 139 110
      gpt/get_article.py
  21. 91 46
      gpt/get_article2.py
  22. 27 23
      gpt/gpt.py
  23. 51 28
      gpt/gpt_check.py
  24. 4 10
      gpt/query_oss_file.py
  25. 3 4
      main.py
  26. 16 17
      main_9000.py
  27. 4 1031
      make_docx_demo/check_test_table/aaaaaaaaaa.py
  28. 6 7
      make_docx_demo/check_test_table/baidu_ocr.py
  29. 126 75
      make_docx_demo/check_test_table/image_preprocess.py
  30. 115 72
      make_docx_demo/check_test_table/image_preprocess2.py
  31. 8 4
      make_docx_demo/check_test_table/mark_ocr_loca.py
  32. 2 1535
      make_docx_demo/data.py
  33. 42 38
      make_docx_demo/docx_other_func.py
  34. 11 9
      make_docx_demo/get_standard_data.py
  35. 169 114
      make_docx_demo/main_word.py
  36. 244 147
      make_docx_demo/main_word_applet.py
  37. 8 10
      make_docx_demo/new_word2pdf.py
  38. 25 11
      make_docx_demo/word2pdf.py
  39. 9 6
      make_docx_demo/word_component/make_rectangle.py
  40. 71 59
      mock/mock_request.py
  41. 1 2
      spoken_language/common/utils.py
  42. 7 4
      spoken_language/read_config.py
  43. 15 15
      spoken_language/soe/speaking_assessment.py
  44. 40 23
      spoken_language/soeexample.py
  45. 13 8
      tools/ali_log.py
  46. 61 35
      tools/audio.py
  47. 12 9
      tools/del_expire_file.py
  48. 19 27
      tools/loglog.py
  49. 39 16
      tools/new_mysql.py
  50. 42 25
      tools/sql_format.py
  51. 3 2
      tools/thread_pool_manager.py

+ 9 - 5
common/common_data.py

@@ -1,17 +1,21 @@
 # -*- coding: utf-8 -*-
-import json
 import os
+import json
 
 SECRET_KEY = os.getenv("key")
 
 try:
-    with open("data/json_word_frequency.json", "r", encoding="utf-8") as f:
+    with open("data/json_word_frequency.json","r",encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
-except FileNotFoundError:
+except FileNotFoundError: 
     with open(r"C:\Users\pan\Desktop\demo\qback\data\json_word_frequency.json", "r", encoding="utf-8") as f:
         word_frequency = json.loads(f.read())
 
-all_json_words_set = {word for key, word in word_frequency.items()}
 
-with open("data/all_exchange_words.txt", "r", encoding="utf-8") as f:
+all_json_words_set = {word for key,word in word_frequency.items()}
+
+
+with open("data/all_exchange_words.txt","r",encoding="utf-8") as f:
     all_exchange_words = set(f.read().split("\n"))
+
+

+ 15 - 14
common/split_text.py

@@ -1,34 +1,32 @@
 # -*- coding: utf-8 -*-
 import re
 
-
-def split_text_to_word(text: str, split_hyphen=False):
+def split_text_to_word(text:str,split_hyphen=False):
     """
     split_hyphen: 是否分拆-连字符,默认不拆
     """
     if split_hyphen:
-        words_list = re.findall(r'\b[\'\w]+\b', text)
+        words_list = re.findall(r'\b[\'\w]+\b',text) 
     else:
-        words_list = re.findall(r'\b[-\'\w]+\b', text)
+        words_list = re.findall(r'\b[-\'\w]+\b', text) 
     return words_list
 
-
-def get_article_words_count(text: str):
+def get_article_words_count(text:str):
     return len(split_text_to_word(text))
 
 
-def split_text_to_sentences(text: str) -> list:
-    sentences = re.split(r'(?<=[.!?;])', text)
+def split_text_to_sentences(text:str) -> list:
+    sentences = re.split(r'(?<=[.!?;])', text) 
     sentences = [i for i in sentences if i.replace(" ", "")]
     return sentences
 
 
-def split_text_to_word_punctuation(text: str):
-    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n', text)
+def split_text_to_word_punctuation(text:str):
+   
+    word_punctuation_list = re.findall(r'\b[-\'\w]+\b|[^\w\s]|\n',text)
     return word_punctuation_list
 
-
-def is_word(single_word: str, strict: bool = False):
+def is_word(single_word:str,strict:bool=False):
     """strict 严格模式,默认不开。严格模式下,每个实体字符必须是字母。全部都是字母才算是单词
     非严格模式下,有一个字母就算是单词。即使是 op123,it's
     """
@@ -45,6 +43,9 @@ def is_word(single_word: str, strict: bool = False):
 
 
 if __name__ == '__main__':
-    a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415"
 
-    print(is_word("student34", strict=True))
+    a = "fdh fgdhf fgd-y i'am a student.gfddfgfd dfhgfd ! fdgh,fdgh fght. 3.1415"
+   
+   
+   
+    print(is_word("student34",strict=True))

+ 4 - 4
config/read_config.py

@@ -3,15 +3,15 @@ import yaml
 
 
 def read_config():
-    with open("config/env.yaml", "r", encoding="utf-8") as file:
+   
+    with open("config/env.yaml", "r",encoding="utf-8") as file:
         config = yaml.safe_load(file)
         return config
 
+address = "https://dcjxb.yunzhixue.cn" if read_config()['env']=='product' else "http://dcjxbtest.yunzhixue.cn"
 
-address = "https://dcjxb.yunzhixue.cn" if read_config()['env'] == 'product' else "http://dcjxbtest.yunzhixue.cn"
 
 if __name__ == '__main__':
     import os
-
     os.chdir(r'C:\Users\86131\Desktop\demo\ai_qback')
-    print(read_config()['env'])
+    print(read_config()['env'])

+ 15 - 8
core/api_article_annotation.py

@@ -1,35 +1,42 @@
 # -*- coding: utf-8 -*-
 
 
-from fastapi import Request, APIRouter, Query
-from pydantic import BaseModel
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path, Depends, BackgroundTasks
+from tools.loglog import logger,log_err_e
 
 from core.respone_format import *
 from gpt.article_annotation import Annotation
+from pydantic import BaseModel, ValidationError, conint,Field,conint
+from typing import List, Optional,Literal
+import asyncio
+
 
 router_article_annotation = APIRouter()
 annotation_obj = Annotation()
 
-
 class Annotation(BaseModel):
-    english_text: str
-
+    english_text:str
+    split_blank:bool = True
 
 @router_article_annotation.post("/article/meaning/annotation")
-def post_annotation(json_data: Annotation, request: Request, ):
+def post_annotation(json_data:Annotation,request:Request,):
     """词义标注的同步接口"""
     json_data = json_data.model_dump()
     english_text = json_data.get("english_text")
-    real_ip = request.headers.get("X-Real-IP", "0.0.0.0")
+    split_blank = json_data.get("split_blank")
+    real_ip = request.headers.get("X-Real-IP","0.0.0.0")
 
     resp = annotation_obj.submit_task(
         english_text=english_text,
+        split_blank=split_blank,
         real_ip=real_ip,
     )
     return resp_200(data=resp)
 
 
+
 @router_article_annotation.get("/article/query_annotation")
-async def query_annotation(task_id: int = Query(...)):
+async def query_annotation(task_id:int=Query(...)):
     resp = await annotation_obj.query_result_by_taskid(task_id)
     return resp
+

+ 22 - 22
core/api_get_article.py

@@ -1,45 +1,44 @@
 # -*- coding: utf-8 -*-
-from typing import List, Optional
-
-from fastapi import Request, APIRouter
-from pydantic import BaseModel, conint
-
-from core.respone_format import *
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from tools.loglog import logger
 from gpt.get_article import GetArticle
 from gpt.query_oss_file import query_file_content
-from tools.loglog import logger
+from core.respone_format import *
+
+from pydantic import BaseModel, ValidationError, conint
+from typing import List, Optional
+
 
 router = APIRouter()
 get_article = GetArticle()
 
 
 class ArticleRequest(BaseModel):
-    meaning_ids: List[conint(ge=1)]
-    callback_url: Optional[str] = None
-    demo_name: Optional[str] = "无"
-    student_stage: Optional[int] = 1
-    vocabulary: Optional[int] = 500
-    class_id: Optional[int]
-
+    meaning_ids: List[conint(ge=1)] 
+    callback_url: Optional[str] = None 
+    demo_name: Optional[str] = "无" 
+    student_stage: Optional[int] = 1 
+    vocabulary: Optional[int] = 500 
+    class_id :Optional[int]
 
 @router.post("/article")
-def post_article(json_data: ArticleRequest, request: Request):
-    real_ip = request.headers.get("X-Real-IP", "localhost")
+def post_article(json_data:ArticleRequest,request:Request):
+    real_ip = request.headers.get("X-Real-IP","localhost")
     words_meaning_ids: list = json_data.meaning_ids
     callback_url = json_data.callback_url
     demo_name = json_data.demo_name
     student_stage = json_data.student_stage
     vocabulary = json_data.vocabulary
-    class_id = json_data.class_id
+    class_id = json_data.class_id 
 
     try:
         if not words_meaning_ids:
             return resp_404(message="没有词义id")
 
-        r = get_article.submit_task(words_meaning_ids=words_meaning_ids, callback_url=callback_url,
-                                    real_ip=real_ip, demo_name=demo_name,
-                                    student_stage=student_stage, vocabulary=vocabulary, class_id=class_id)
-        return r if not isinstance(r, str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_ids=words_meaning_ids,callback_url=callback_url,
+                                    real_ip=real_ip,demo_name=demo_name,
+                                    student_stage=student_stage,vocabulary=vocabulary,class_id=class_id)
+        return r if not isinstance(r,str) else resp_500(message=r)
 
     except Exception as e:
         logger.error(f"{type(e).__name__},{e}")
@@ -47,7 +46,7 @@ def post_article(json_data: ArticleRequest, request: Request):
 
 
 @router.post("/query_oss_file")
-def query_oss_file(json_data: dict, request: Request):
+def query_oss_file(json_data:dict,request:Request):
     oss_key = json_data.get("key")
 
     if not oss_key:
@@ -56,3 +55,4 @@ def query_oss_file(json_data: dict, request: Request):
     if j == 0:
         return resp_500(message="错误:没有这个文件")
     return JSONResponse(j)
+

+ 19 - 18
core/api_get_article2.py

@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 
-from typing import List, Optional
-
-from fastapi import Request, APIRouter, BackgroundTasks
-from pydantic import BaseModel, Field, conint
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path, Depends, BackgroundTasks
+from tools.loglog import logger,log_err_e
 
 from core.respone_format import *
 from gpt.get_article2 import GetArticle
-from tools.loglog import log_err_e
+from pydantic import BaseModel, ValidationError, conint,Field,conint
+from typing import List, Optional,Literal
+import asyncio
 
 router = APIRouter()
 
@@ -15,34 +15,35 @@ get_article = GetArticle()
 
 
 class Word(BaseModel):
-    meaning_id: int = Field(..., description="单词的词义id")
-    word_id: int = Field(..., description="单词id")
+    meaning_id:int = Field(..., description="单词的词义id")
+    word_id:int = Field(..., description="单词id")
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
 
 
 class ArticleRequest(BaseModel):
     core_words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2
-    demo_name: Optional[str] = "无"
+    take_count: int = 2 
+    demo_name: Optional[str] = "无" 
     reading_level: conint(ge=1, le=30) = Field(default=10, description="阅读水平,默认值为10;[8,16,24]小学初中高中")
-    article_length: int = Field(default=None, description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
-    exercise_id: int = Field(default=0, description="学案ID,用于日志快速定位")
+    article_length:int = Field(default=None,description="需要生成的文章长度,可以不传,不传自己根据reading_level判断")
+    exercise_id:int = Field(default=0,description="学案ID,用于日志快速定位")
 
 
 @router.post("/article/reading-comprehension")
 def post_article(
-        json_data: ArticleRequest,
-        request: Request,
-        background_tasks: BackgroundTasks,
+    json_data:ArticleRequest,
+    request:Request,
+    background_tasks: BackgroundTasks,
 ):
+
     json_data = json_data.model_dump()
-    real_ip = request.headers.get("X-Real-IP", "0.0.0.0")
+    real_ip = request.headers.get("X-Real-IP","0.0.0.0")
 
     core_words = json_data["core_words"]
     take_count = json_data["take_count"]
     demo_name = json_data["demo_name"]
-    reading_level = json_data["reading_level"]
+    reading_level = json_data["reading_level"] 
     article_length = json_data["article_length"]
     exercise_id = json_data["exercise_id"]
 
@@ -57,8 +58,8 @@ def post_article(
             exercise_id=exercise_id,
             background_tasks=background_tasks
         )
-        return r if not isinstance(r, str) else resp_500(message=r)
+        return r if not isinstance(r,str) else resp_500(message=r)
 
     except Exception as e:
         log_err_e(e, msg="文章2接口错误/article/reading-comprehension;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 14 - 16
core/api_get_article3.py

@@ -1,18 +1,17 @@
 # -*- coding: utf-8 -*-
 
-from typing import List, Optional, Literal
-
-from fastapi import Request, APIRouter
-from pydantic import BaseModel, Field
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from tools.loglog import logger,log_err_e
 
 from core.respone_format import *
 from deepseek.get_article3 import GetArticle
-from tools.loglog import log_err_e
+from pydantic import BaseModel, ValidationError, conint,Field
+from typing import List, Optional,Literal
+
 
 router = APIRouter(tags=['deepseek接口'])
 get_article = GetArticle()
 
-
 class Word(BaseModel):
     spell: str = Field(..., description="单词的拼写")
     meaning: str = Field(..., description="单词的意思")
@@ -20,22 +19,21 @@ class Word(BaseModel):
 
 class ArticleRequest(BaseModel):
     words: List[Word] = Field(..., description="单词列表")
-    take_count: int = 2
-    student_stage: Literal[1, 2, 3]
-    demo_name: Optional[str] = "无"
+    take_count: int = 2 
+    student_stage: Literal[1, 2, 3] 
+    demo_name: Optional[str] = "无" 
 
 
 @router.post("/article/reading-comprehension/deepseek")
-def post_article(json_data: ArticleRequest, request: Request):
+def post_article(json_data:ArticleRequest,request:Request):
     json_data = json_data.dict()
     real_ip = request.headers.get("X-Real-IP")
-    words, take_count, student_stage, demo_name = json_data["words"], json_data["take_count"], json_data["student_stage"], json_data["demo_name"]
+    words,take_count,student_stage,demo_name = json_data["words"],json_data["take_count"],json_data["student_stage"],json_data["demo_name"]
 
     try:
-        r = get_article.submit_task(words_meaning_list=words, take_count=take_count, student_stage=student_stage, real_ip=real_ip,
-                                    demo_name=demo_name)
-        return r if not isinstance(r, str) else resp_500(message=r)
+        r = get_article.submit_task(words_meaning_list=words, take_count=take_count,student_stage=student_stage,real_ip=real_ip,demo_name=demo_name)
+        return r if not isinstance(r,str) else resp_500(message=r)
 
     except Exception as e:
-        log_err_e(e, msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        log_err_e(e,msg="文章3,ds接口错误/article/reading-comprehension/deepseek;")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 16 - 14
core/api_get_audio.py

@@ -1,25 +1,24 @@
 # -*- coding: utf-8 -*-
-import traceback
-
-from fastapi import Request, APIRouter, Query
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
 from fastapi.responses import StreamingResponse
-
-from core.respone_format import *
-from tools.audio import GetAudio
 from tools.loglog import logger
+from tools.audio import GetAudio
+from core.respone_format import *
+import traceback
 
 router = APIRouter()
 get_audio = GetAudio()
 
+
+
 """
 生成音频tts接口,传递两个参数,word和resp_type; word和resp_type回复设计:0返回oss路径,1 二进制文件,2 url三种;
 """
 
-
 @router.get("/tts")
-def get_tts(word: str = Query(None, max_length=300)):
+def get_tts(word:str=Query(None, max_length=300)):
     try:
-        f = get_audio.submit_task(word_or_phrase=word, resp_type=0)
+        f = get_audio.submit_task(word_or_phrase=word,resp_type=0) 
         r = f.result()
         if r:
             return resp_200(data=r)
@@ -31,21 +30,24 @@ def get_tts(word: str = Query(None, max_length=300)):
 
 
 @router.post("/tts")
-def get_tts(json_data: dict, request: Request):
+def get_tts(json_data:dict,request:Request):
+   
+
     word_or_phrase = json_data["text"]
     resp_type = json_data.get("type")
 
+   
     if len(word_or_phrase) >= 300:
         logger.error(f"单词或短语过长")
         return resp_400(message="单词或短语过长")
-    if resp_type not in [0, 1, 2]:
+    if resp_type not in [0,1,2]:
         logger.error(f"type参数不是012")
         return resp_400(message="type参数不是012")
 
     try:
-        f = get_audio.submit_task(word_or_phrase=word_or_phrase, resp_type=resp_type)
+        f = get_audio.submit_task(word_or_phrase=word_or_phrase,resp_type=resp_type)
         r = f.result()
-        if r and resp_type in [0, 2]:
+        if r and resp_type in [0,2]:
             return resp_200(data=r)
         if r and resp_type == 1:
             return StreamingResponse(content=r, media_type='audio/mpeg')
@@ -55,4 +57,4 @@ def get_tts(json_data: dict, request: Request):
         traceback_str = traceback.format_exc()
         logger.error(traceback_str)
         logger.error(f"{type(e).__name__},{e}")
-        return resp_500(message=f"{type(e).__name__},{e}")
+        return resp_500(message=f"{type(e).__name__},{e}")

+ 13 - 11
core/api_get_spoken_language.py

@@ -1,32 +1,34 @@
 # -*- coding: utf-8 -*-
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path,UploadFile,File
+from tools.loglog import logger,log_err_e
+from spoken_language.soeexample import spoken_result,make_spoken
 from random import randint
-
-from fastapi import Form, Request, APIRouter, UploadFile, File
-
 from core.respone_format import *
-from spoken_language.soeexample import make_spoken
-from tools.loglog import logger, log_err_e
+
 
 router = APIRouter()
 
 
 @router.post("/spoken_language")
-async def post_article(request: Request, url=Form(""), file: UploadFile = File(None), text=Form(...)):
+async def post_article(request:Request,url=Form(""),file: UploadFile = File(None),text=Form(...)):
+
     if not url and not file:
         logger.error("错误:请上传mp3文件url参数或者二进制文件file参数")
         return resp_404(message="错误:请上传mp3文件url参数或者二进制文件file参数")
     try:
-        task_id = randint(10000, 99999)
-
+        task_id = randint(10000,99999)
+       
         if file:
             file_content = await file.read()
         else:
             file_content = None
-        data: dict = make_spoken(task_id, url, file_content, text)
+        data:dict = make_spoken(task_id,url,file_content,text)
         if data:
             logger.success(f"完成spoken_language请求:{data}")
-
+           
             return data
     except Exception as e:
-        log_err_e(e, msg="口语评测接口")
+        log_err_e(e,msg="口语评测接口")
         return resp_500(message=f"{type(e).__name__},{e}")
+
+

+ 14 - 7
core/api_get_word.py

@@ -1,7 +1,11 @@
 # -*- coding: utf-8 -*-
 
-from fastapi import Request, APIRouter, Query
-from fastapi.responses import FileResponse, PlainTextResponse
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter,Query,Path
+from fastapi.responses import FileResponse,PlainTextResponse
+from tools.loglog import logger
+from tools.audio import GetAudio
+from core.respone_format import *
+import traceback
 
 from make_docx_demo.main_word_applet import start_make_word as s2
 
@@ -24,12 +28,15 @@ def make_word(json_data:dict,request:Request,
 """
 
 
+
 @router.post("/make_word/vocabulary_assault")
-def make_word(json_data: dict, request: Request,
-              document_format: int = Query(1, description="1:docx;2.pdf"),
-              scanpage_format: int = Query(1, description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
+def make_word(json_data:dict,request:Request,
+            document_format:int=Query(1,description="1:docx;2.pdf"),
+            scanpage_format:int=Query(1,description="1:老版筛查表;2.新版筛查表;3.老版+新版筛查表")):
+
+   
     headers = {"Content-Type": "application/octet-stream"}
     if path := s2(json_data, document_format, scanpage_format):
-        return FileResponse(path=path, headers=headers, media_type='application/octet-stream')
+        return FileResponse(path=path,headers=headers, media_type='application/octet-stream')
     else:
-        return PlainTextResponse(status_code=500, content="服务器内部错误")
+        return PlainTextResponse(status_code=500,content="服务器内部错误")

+ 20 - 21
core/api_routes_jwt.py

@@ -1,16 +1,14 @@
 # -*- coding: utf-8 -*-
-import asyncio
-import datetime
-import traceback
-
+from fastapi import FastAPI, Form, HTTPException, Request,status,APIRouter
 import jwt
-from fastapi import Form, Request, APIRouter
-from jwt.exceptions import ExpiredSignatureError, DecodeError, InvalidAlgorithmError
-
-from common.common_data import SECRET_KEY
+from jwt.exceptions import ExpiredSignatureError,DecodeError,InvalidAlgorithmError
 from core.respone_format import *
-from tools.loglog import logger
 from tools.sql_format import UserCRUD
+import datetime
+import asyncio
+from tools.loglog import logger
+import traceback
+from common.common_data import SECRET_KEY
 
 router = APIRouter()
 user_crud = UserCRUD()
@@ -31,6 +29,7 @@ def create_access_token(username: str):
         logger.error(f"{type(e).__name__}, {e}")
 
 
+
 def verify_token_sync(token: str):
     if not token:
         return 1
@@ -39,10 +38,10 @@ def verify_token_sync(token: str):
         if not decoded_payload.get("username"):
             return 2
         else:
-            return 0
+            return 0 
     except ExpiredSignatureError:
         return 3
-    except (InvalidAlgorithmError, DecodeError):
+    except (InvalidAlgorithmError,DecodeError):
         return 4
 
 
@@ -50,7 +49,6 @@ async def verify_token(token: str):
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(None, verify_token_sync, token)
 
-
 async def verify_token2(token):
     msg_verify_code = await verify_token(token)
     if msg_verify_code != 0:
@@ -68,13 +66,13 @@ async def verify_token2(token):
 
 @router.post("/user/login")
 async def get_token(username: str = Form(...), password: str = Form(...)):
-    user_info = user_crud.get_userinfo_by_account(username)
+    user_info = user_crud.get_userinfo_by_account(username) 
     if user_info:
         userid, account, true_pwd, uname, create_time = user_info
     else:
         return resp_400(message="user does not exist")
 
-    if password == true_pwd:
+    if password==true_pwd:
         access_token = create_access_token(username)
         return_data = {"access_token": access_token}
         return resp_200(data=return_data)
@@ -83,29 +81,30 @@ async def get_token(username: str = Form(...), password: str = Form(...)):
 
 
 @router.get("/user")
-async def get_user(request: Request):
+async def get_user(request:Request):
+   
     token = request.headers.get("Authorization")
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username")
+        account = decoded_payload.get("username") 
         user_info = user_crud.get_userinfo_by_account(account=account)
         userid, account, true_pwd, uname, create_time = user_info
-        data = {"id": userid, "name": uname, "account": account, "create_time": create_time}
+        data = {"id":userid,"name":uname,"account":account,"create_time":create_time}
         return resp_200(data=data)
     except ExpiredSignatureError:
         return resp_401(message="The token has expired")
-    except (InvalidAlgorithmError, DecodeError):
+    except (InvalidAlgorithmError,DecodeError):
         return resp_400(message="Token decoding error")
     except Exception as e:
         return resp_400(message=f"Error in get user information.{e}")
 
 
 @router.post("/user/logout")
-async def get_token(request: Request):
+async def get_token(request:Request):
     token = request.headers.get("Authorization")
     try:
         decoded_payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
-        account = decoded_payload.get("username")
+        account = decoded_payload.get("username") 
         logger.info(f"账号:{account}注销成功")
         data = {"result": "注销成功"}
         return resp_200(data=data)
@@ -114,4 +113,4 @@ async def get_token(request: Request):
     except (InvalidAlgorithmError, DecodeError):
         return resp_400(message="Token decoding error")
     except Exception as e:
-        return resp_400(message=f"User logout error.{e}")
+        return resp_400(message=f"User logout error.{e}")

+ 2 - 5
core/respone_format.py

@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
-from typing import Union
-
 from fastapi import status
 from fastapi.responses import JSONResponse
+from typing import Union
 
 
 def resp_200(*, data: Union[list, dict, str]) -> JSONResponse:
@@ -18,14 +17,12 @@ def resp_400(*, message: str = "Bad Request", data: Union[list, dict, str] = Non
         content={"code": 400, "message": message, "data": data}
     )
 
-
 def resp_401(*, message: str = "The token has expired", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
         status_code=status.HTTP_401_UNAUTHORIZED,
         content={"code": 401, "message": message, "data": data}
     )
 
-
 def resp_404(*, message: str = "Not Found", data: Union[list, dict, str] = None) -> JSONResponse:
     return JSONResponse(
         status_code=status.HTTP_404_NOT_FOUND,
@@ -37,4 +34,4 @@ def resp_500(*, message: str = "Internal Server Error", data: Union[list, dict,
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={"code": 500, "message": message, "data": data}
-    )
+    )

+ 14 - 12
data/get_all_exchange_words.py

@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
-import os
-import sys
-
 from tools.new_mysql import MySQLUploader
+import sys
+import os
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
@@ -10,20 +9,22 @@ m = MySQLUploader()
 s = "select Word,InflectedWordSpelling,Properties from dictionary_exchange"
 r = m.query_data(s)
 
+
 all_exchange_words = set()
 all_exchange_words_dict = {}
 all_prototype_deformation_dict = {}
 prototype_deformation_dict2 = {}
 
 for i in r:
-
-    prototype, deformation, properties = [i[0], i[1], i[2]]
-
-    all_exchange_words.update({prototype, deformation})
+   
+    prototype,deformation,properties= [i[0],i[1],i[2]]
+   
+    all_exchange_words.update({prototype,deformation})
 
     if properties == "原型":
         prototype_deformation_dict2[prototype] = deformation
 
+   
     if deformation not in all_prototype_deformation_dict:
         all_prototype_deformation_dict[deformation] = prototype
 
@@ -33,29 +34,30 @@ for i in r:
         all_exchange_words_dict[prototype].append(deformation)
 
 
-def word_to_prototype(word: str) -> str:
+
+def word_to_prototype(word:str) -> str:
     """依次按顺序查询。1.先查原型 2.最后小写再查变形对应的原型 3.再查变形对应的原型。这样才能保证,不过滤有特殊意义的大写"""
     if word in all_exchange_words_dict:
         return word
     elif word.lower() in all_exchange_words_dict:
         return word.lower()
-
+   
     elif word in all_prototype_deformation_dict:
         w = all_prototype_deformation_dict[word]
         if w in prototype_deformation_dict2:
             w = prototype_deformation_dict2[w]
         return w
 
-    else:
+    else: 
         return word
 
 
 def get_word_exchange_list(word) -> list:
     prototype_word = word_to_prototype(word)
-    all_exchange_words_list = all_exchange_words_dict.get(prototype_word, [])
+    all_exchange_words_list = all_exchange_words_dict.get(prototype_word,[])
     return all_exchange_words_list
 
 
 if __name__ == '__main__':
     print(word_to_prototype("was"))
-    print(word_to_prototype("made"))
+    print(word_to_prototype("made"))

+ 8 - 8
data/get_frequency_script.py

@@ -1,18 +1,18 @@
 # -*- coding: utf-8 -*-
-import json
-
-from openpyxl import load_workbook
 from openpyxl.worksheet.worksheet import Worksheet
+from openpyxl import load_workbook
+import json
 
-wb = load_workbook(r"单词词义表.xlsx", read_only=True)
+wb = load_workbook(r"单词词义表.xlsx",read_only=True)
 word_dict = {}
 ws: Worksheet = wb["Sheet1"]
 for row in ws.values:
-    _, word, frequency = row
+    _,word,frequency = row
     frequency = int(frequency)
-
+   
     word_dict[frequency] = word
 wb.close()
-with open("json_word_frequency.json", mode="w", encoding="utf-8") as f:
+with open("json_word_frequency.json",mode="w",encoding="utf-8") as f:
+   
     write_data = json.dumps(word_dict)
-    f.write(write_data)
+    f.write(write_data)

File diff suppressed because it is too large
+ 0 - 11995
data/json_word_frequency.json


+ 18 - 15
deepseek/ds_api.py

@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
 import json
-import os
 
 from openai import OpenAI
-
+import os
 from tools.loglog import SimpleLogger
 
 
+
+
 class DS:
     def __init__(self):
         self.client = OpenAI(
@@ -15,7 +16,7 @@ class DS:
         )
         self.logger = SimpleLogger(base_file_name="deepseek")
 
-    def write_log(self, message: str, log_type="info"):
+    def write_log(self, message:str, log_type="info"):
         """写入日志"""
         log_methods = {
             "warning": self.logger.warning,
@@ -34,9 +35,9 @@ class DS:
             self.write_log(f"Response validation error: {e}", log_type="error")
             return False
 
-    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8,
-                    json_resp: bool = False, real_ip: str = "", demo_name: str = "",
-                    max_tokens: int = 5192) -> str:
+    def get_article(self, user_prompt: str, sys_prompt: str = None, temperature: float = 0.8, 
+                   json_resp: bool = False, real_ip: str = "", demo_name: str = "", 
+                   max_tokens: int = 5192) -> str:
         """获取AI生成的文章
         
         Args:
@@ -55,27 +56,29 @@ class DS:
         if sys_prompt:
             messages.append({'role': 'system', 'content': sys_prompt})
         messages.append({'role': 'user', 'content': user_prompt})
-
+        
         response_format = {"type": "json_object"} if json_resp else {"type": "text"}
-
+        
+       
         resp = ""
         for _ in range(3):
             completion = self.client.chat.completions.create(
-                model="deepseek-v3",
+                model="deepseek-v3", 
                 messages=messages,
                 temperature=temperature,
                 response_format=response_format,
-                max_tokens=max_tokens
+                max_tokens=max_tokens 
             )
             resp = completion.choices[0].message.content
             if self.check_article_response(resp):
                 break
-
+        
+       
         if sys_prompt and resp:
             self.write_log(sys_prompt)
         self.write_log(user_prompt)
         self.write_log(resp)
-
+        
         return resp
 
 
@@ -97,11 +100,11 @@ if __name__ == '__main__':
 
 """
     ds = DS()
-    resp = ds.get_article(user_prompt=p, json_resp=True)
+    resp = ds.get_article(user_prompt=p,json_resp=True)
     print(resp)
     print()
 
-    print(resp.replace(r'\"n', '\n').replace(r"\\n", '\n'))
+    print(resp.replace(r'\"n','\n').replace(r"\\n",'\n'))
     print()
 
-    print(json.loads(resp))
+    print(json.loads(resp))

+ 46 - 27
deepseek/get_article3.py

@@ -1,18 +1,21 @@
 # -*- coding: utf-8 -*-
 
-import json
 import re
-from concurrent.futures import wait
-from random import randint, shuffle
-
-import requests
-
-from common.common_data import all_exchange_words
-from common.split_text import split_text_to_word
+import json
 from deepseek.ds_api import DS
-from tools.loglog import logger, log_err_e
+
 from tools.new_mysql import MySQLUploader
+from tools.loglog import logger, log_err_e
 from tools.thread_pool_manager import pool_executor
+from common.common_data import all_exchange_words
+from common.split_text import split_text_to_word
+
+from pydantic import BaseModel
+from cachetools import TTLCache
+from concurrent.futures import wait
+from random import randint, shuffle
+import json
+import requests
 
 
 def get_article_difficulty(article) -> int:
@@ -55,20 +58,23 @@ def find_interval(number):
 def parse_question(question_block):
     question_info = {}
 
+   
     question_match = re.search(r'问题:\s*(.*)', question_block)
     if question_match:
         question_info['trunk'] = question_match.group(1).strip()
 
+   
     analysis_match = re.search(r'解析:\s*(.*)', question_block)
     if analysis_match:
         question_info['analysis'] = analysis_match.group(1).strip()
 
+   
     options_match = re.search(r'选项:(.*)', question_block)
     if options_match:
         options_text = options_match.group(1).strip()
         options_list = re.split(r'\s*[BCDA]\.\s*', options_text)[1:]
         candidates = []
-        for i, option_text in enumerate(options_list, start=65):
+        for i, option_text in enumerate(options_list, start=65): 
             label = chr(i)
             text = option_text.strip()
             candidates.append({
@@ -78,6 +84,7 @@ def parse_question(question_block):
             })
         question_info['candidates'] = candidates
 
+   
     answer_match = re.search(r'答案:([ABCD])', question_block)
     if answer_match and 'candidates' in question_info:
         correct_label = answer_match.group(1)
@@ -94,23 +101,27 @@ class GetArticle:
         self.ds = DS()
 
         self.callback_url_dict = {}
-        self.real_ip_dict = {}
+        self.real_ip_dict = {} 
         self.demo_name = {}
 
+       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
+
+   
     def parser_insert_to_mysql(self, resp_result):
         for single_article in resp_result['articles']:
-
+           
             article = single_article['body']
             article_json = json.dumps(single_article)
-            difficult_value = find_interval(get_article_difficulty(article))
+            difficult_value = find_interval(get_article_difficulty(article)) 
             if not difficult_value:
                 logger.error("文章难度等级为0;")
             sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
             self.m.execute_(sql, (article_json, difficult_value))
 
+   
     def submit_task(self, words_meaning_list: list, take_count: int, student_stage: int, real_ip: str, demo_name: str):
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
@@ -119,7 +130,7 @@ class GetArticle:
         demo_name: 项目名称
         """
         task_id = randint(10000000, 99999999)
-
+       
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
         logger.info(f"生成文章id。task_id:{task_id}。词义组:{words_meaning_str}.")
 
@@ -127,9 +138,9 @@ class GetArticle:
         self.demo_name[task_id] = demo_name
 
         try:
-
+           
             resp_result = self.run_task(words_meaning_list, task_id, take_count, student_stage)
-            self.parser_insert_to_mysql(resp_result)
+            self.parser_insert_to_mysql(resp_result) 
             return resp_result
         except Exception as e:
             err_msg = f"GetArticle提交任务失败{type(e).__name__},{e}"
@@ -137,28 +148,31 @@ class GetArticle:
 
             return err_msg
 
+   
     def get_article(self, words_meaning_list, student_stage, task_id, take_count) -> dict:
         diffculty_control = {
             1: {"grade": "小学", "article_word_count": 60, "desc_difficulty": "最简单最容易没有难度", "paragraph_count": 1,
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
-                "choice_desc": "选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
+                "choice_desc":"选择题难度尽可能简单,但是不要让所有选择题让其直接在文中找到答案,允许1-2个选择题很简单,参考中国小学生水平"},
             2: {"grade": "初中", "article_word_count": 200, "desc_difficulty": "简单、常见、难度低", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国初中生,中国CET-3,雅思4分这样的难度标准。",
-                "choice_desc": "选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
+                "choice_desc":"选择题难度适中,但是不要所有选择题让其直接在文中找到答案,参考中国初中生水平,中考标准。"},
             3: {"grade": "高中", "article_word_count": 300, "desc_difficulty": "常见、高中难度的", "paragraph_count": 3,
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-4,雅思5分这样的难度标准。",
-                "choice_desc": "选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
+                "choice_desc":"选择题难度偏难,要有迷惑性,不要出现直接在文中找到答案,参考中国高中生水平,高考标准。"}
         }
         take_count_dict = {0: "", 1: "一", 2: "二", 3: "三", 4: "四", 5: "五", 6: "六", 7: "七", 8: "八"}
         different_cou = take_count_dict.get(take_count, "")
 
-        grade = diffculty_control[student_stage]["grade"]
-        select_word_count = diffculty_control[student_stage]["article_word_count"]
-        select_diffculty = diffculty_control[student_stage]["desc_difficulty"]
-        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"]
-        desc2 = diffculty_control[student_stage]["desc2"]
-        choice_desc = diffculty_control[student_stage]["choice_desc"]
+        grade = diffculty_control[student_stage]["grade"] 
+        select_word_count = diffculty_control[student_stage]["article_word_count"] 
+        select_diffculty = diffculty_control[student_stage]["desc_difficulty"] 
+        select_paragraph_count = diffculty_control[student_stage]["paragraph_count"] 
+        desc2 = diffculty_control[student_stage]["desc2"] 
+        choice_desc = diffculty_control[student_stage]["choice_desc"] 
+       
 
+       
         shuffle(words_meaning_list)
         words_meaning_str = ";".join([i["spell"] + ":" + i["meaning"] for i in words_meaning_list])
 
@@ -181,8 +195,9 @@ class GetArticle:
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name, max_tokens=8000))
+            r_json = json.loads(self.ds.get_article(q, temperature=1, json_resp=True, real_ip=real_ip, demo_name=demo_name,max_tokens=8000))
 
+           
             r_json["body"] = r_json["title"] + "\n\n" + r_json["english"]
             del r_json["title"]
 
@@ -204,11 +219,12 @@ class GetArticle:
 
             resp_text = self.ds.get_article(q_choice_question, temperature=1, real_ip=real_ip, demo_name=demo_name, max_tokens=8000)
             questions = resp_text.strip().split('\n\n')
-
+           
             parsed_questions = [parse_question(q) for q in questions]
 
             json_data = {"questions": parsed_questions}
 
+           
             allWordAmount = 0
             allWordAmount += len(split_text_to_word(r_json["english"]))
             for i in json_data["questions"]:
@@ -223,6 +239,8 @@ class GetArticle:
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
+
+   
     def run_get_article_task(self, words_meaning_list, task_id, take_count, student_stage) -> dict:
         """
         :param words_meaning_list: 数据库内查出来的单词和词义的列表
@@ -240,6 +258,7 @@ class GetArticle:
             return_json["articles"].append(t.result())
         return return_json
 
+   
     def run_task(self, words_meaning_list, task_id, take_count, student_stage):
         try:
             outside_json = self.run_get_article_task(words_meaning_list, task_id, take_count, student_stage)

+ 81 - 45
gpt/article_annotation.py

@@ -14,68 +14,82 @@
 
 import json
 from random import randint
-
 from cachetools import TTLCache
 from openpyxl import load_workbook
+import warnings
 
 from core.respone_format import *
+from common.split_text import split_text_to_word
 from data.get_all_exchange_words import word_to_prototype
 from gpt.chatgpt import get_annotation_gpt_pydantic
-from tools.loglog import log_err_e, logger
+from tools.loglog import log_err_e,logger
 from tools.thread_pool_manager import pool_executor
 
 
 class Annotation:
 
     def __init__(self):
-
+       
         self.all_task_data: dict[int, list] = {}
-
-        self.all_task_result = TTLCache(maxsize=1000, ttl=3600)
-
+       
+        self.all_task_result = TTLCache(maxsize=1000,ttl=3600)
+       
         self.word_meaning_dict: dict[str, list[tuple[int, str, str]]] = {}
 
-        self.prototype_words = set()
+       
+        self.prototype_words = set() 
         self.change_prototype_dict = {}
 
+       
         self.get_excel_meaning_data()
         self.get_excel_change_data()
 
-    def submit_task(self, english_text, real_ip):
+
+   
+    def submit_task(self, english_text,split_blank,real_ip):
         task_id = randint(10000000, 99999999)
-        logger.info(f"/article/annotation 生成id。task_id:{task_id},real_ip:{real_ip}")
+        logger.info(f"/article/annotation 生成id。task_id:{task_id},split_blank:{split_blank},real_ip:{real_ip}")
 
-        f = pool_executor.submit(self.main_annotation, task_id, english_text)
+        f = pool_executor.submit(self.main_annotation,task_id, english_text,split_blank)
         r = f.result()
 
         return r
 
+   
     def __run(self):
+        warnings.warn("废弃函数",DeprecationWarning,stacklevel=2)
+       
+       
+       
+       
+
+   
+    def main_annotation(self, task_id:int, english_text:str,split_blank:bool):
+       
+        if split_blank:
+            split_words = english_text.split()
+        else:
+            split_words = split_text_to_word(english_text, split_hyphen=False)
 
-        for task_id, task_data in self.all_task_data.items():
-            english_text, = task_data
-            self.main_annotation(task_id, english_text)
-
-    def main_annotation(self, task_id: int, english_text: str):
-
-        split_words = english_text.split()
-
+       
         meanings_data = self.query_meanings_data(split_words=split_words)
-
-        result_annotation = self.__ai_annotation(english_text=english_text, meanings_data=meanings_data)
-
+       
+        result_annotation = self.__ai_annotation(english_text=english_text,meanings_data=meanings_data)
+       
         self.all_task_result[task_id] = result_annotation
-
+       
         return result_annotation
 
-    async def query_result_by_taskid(self, task_id):
+   
+    async def query_result_by_taskid(self,task_id):
 
         if task_id in self.all_task_result:
             r = self.all_task_result[task_id]
             return resp_200(data=r)
-
+       
         return resp_200(data={})
 
+   
     def get_excel_meaning_data(self):
         """读取外部的春笋词义表,结构化到字典;单词为键,值[((词义id,中文词义))]"""
         spring_bamboo_meaning_path = "data/春笋词义表.xlsx"
@@ -85,8 +99,8 @@ class Annotation:
             for index, row in enumerate(ws.values, start=1):
                 if index == 1:
                     continue
-                word = row[3]
-                id_and_meaning = (row[0], word, row[2])
+                word = row[3] 
+                id_and_meaning = (row[0], word, row[2]) 
                 if word not in self.word_meaning_dict:
                     self.word_meaning_dict[word] = [id_and_meaning]
                 else:
@@ -96,6 +110,7 @@ class Annotation:
         finally:
             wb.close()
 
+   
     def get_excel_change_data(self):
         """读取外部的春笋变形表"""
         spring_bamboo_change_path = "data/春笋单词对照变形.xlsx"
@@ -103,8 +118,8 @@ class Annotation:
         ws = wb.active
         try:
             for row in ws.values:
-                word_prototype = row[0]
-                word_change = row[1]
+                word_prototype = row[0] 
+                word_change = row[1] 
                 self.prototype_words.add(word_prototype)
                 self.change_prototype_dict[word_change] = word_prototype
 
@@ -113,24 +128,31 @@ class Annotation:
         finally:
             wb.close()
 
-    def to_prototype_word(self, word):
-        if word in self.prototype_words:
+   
+    def to_prototype_word(self,word):
+        lower_word = word.lower()
+        if word in self.prototype_words: 
             w_prototype = word
-        elif word.lower() in self.prototype_words:
-            w_prototype = word.lower()
-        elif word in self.change_prototype_dict:
+        elif lower_word in self.prototype_words: 
+            w_prototype = lower_word
+        elif word in self.change_prototype_dict: 
             w_prototype = self.change_prototype_dict[word]
+        elif lower_word in self.change_prototype_dict: 
+            w_prototype = self.change_prototype_dict[lower_word]
         else:
-            w_prototype = word_to_prototype(word)
+            w_prototype = word_to_prototype(word) 
         return w_prototype
 
+   
     def __query_meaning(self, word: str) -> str:
         """
         :param word: 单个单词
         :return: 加工好的词义文本
         """
+       
 
-        meaning_data1 = []
+       
+        meaning_data1 = [] 
         if word in self.word_meaning_dict:
             meaning_data1.extend(self.word_meaning_dict[word])
             meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
@@ -140,16 +162,19 @@ class Annotation:
             meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
             return meaning_data_str
 
+       
         w_prototype = self.to_prototype_word(word)
 
-        key_to_check = w_prototype if w_prototype in self.word_meaning_dict else w_prototype.lower()
+        key_to_check = w_prototype if w_prototype in self.word_meaning_dict else w_prototype.lower() 
         if key_to_check in self.word_meaning_dict:
-            meaning_data = self.word_meaning_dict[key_to_check]
+            meaning_data = self.word_meaning_dict[key_to_check] 
             meaning_data1.extend(meaning_data)
-            meaning_data1 = list(set(meaning_data1))
+            meaning_data1 = list(set(meaning_data1)) 
             meaning_data_str = "".join([f"[{i[0]} {i[1]} {i[2]}]" for i in meaning_data1])
             return meaning_data_str
 
+
+   
     def query_meanings_data(self, split_words: list):
         """
         查询所有单词的词义数据包
@@ -158,25 +183,30 @@ class Annotation:
         """
         all_words_meaning_list = set()
         for word in split_words:
-            result_query_meaning: str = self.__query_meaning(word)
+            result_query_meaning:str = self.__query_meaning(word)
             if result_query_meaning:
                 all_words_meaning_list.add(f"【{word} {result_query_meaning}】")
 
-        new_data_str = "\n词义数据包:\n" + "\n".join(all_words_meaning_list) + "\n\n"
+        new_data_str = "\n词义数据包:\n" + "\n".join(all_words_meaning_list) + "\n\n" 
         return new_data_str
 
+   
     @staticmethod
-    def __parse_gpt_resp(gpt_resp: dict):
+    def __parse_gpt_resp(gpt_resp:dict):
         """
         解析ai-gpt的回复
         :param gpt_resp: GPT原始的回复
         :return:
         """
+       
+       
+       
 
         r = json.loads(gpt_resp["choices"][0]["message"]["content"])
         return r
 
-    def __ai_annotation(self, english_text, meanings_data):
+   
+    def __ai_annotation(self,english_text,meanings_data):
         """
         AI词义标注
         :param english_text: 英语文本
@@ -187,17 +217,23 @@ class Annotation:
 你的工作是对英语文本中的每个单词的原型,根据提供的词义数据包选择这个单词原型最合适的词义,并在单词后附上对应的词义ID。标注格式为:word[word_id]。
 要求:
 1.如果词义数据包中没有该单词或找不到合适的词义,请标注该单词在文中词义的中文翻译。示例:seismography[地震学] car[猫]。
-2.如果是[连字符-、中文、标点符号、数字、百分比、序号A.B.C.D.或者日期],这些不是英语单词,不用标记,保持原样不变。示例`1999 2025 18:00 苹果 ____ A. B. C. D. e-mail Exhaust-fans`,这些都不标记。
+2.如果是[缩写字符’和',连字符-、中文、标点符号、数字、百分比、序号A.B.C.D.或者日期],这些不是英语单词,不用标记,保持原样不变。\
+示例`It’s writer's 1999 2025 18:00 苹果 ____ A. B. C. D. e-mail Exhaust-fans`,这些都不标记。
 3.标注每个英语单词,不是短语。错误示例:be good at[擅长]。正确示例:be[11] good[12] at[13]。
 4.如果没有提供词义,则不标注。
+5.任何缩写单词,不标注忽略,保持不变,如示例2。例如It’s,It's,It’ s,It' s。
 
 回复格式要求如下:
 - 请按照用户原文顺序和格式返回处理后的文本。空格和换行\\n,不用改变,不要加减空格,与原文一致。
 - 每个单词后面标注上其对应的词义ID,格式为:`word[word_id]`。
 
-最终回复示例:If[1] a[2] dog[3] causes[4] a[5] cat[6] accident[7] and[8] gets[9] killed[10]
+最终回复
+示例1:If[1] a[2] dog[3] causes[4] a[5] cat[6] accident[7] and[8] gets[9] killed[10]
+示例2:It’s cold[672] and[9] snowy[2286] .
+
 请确保理解上述说明并准备好接收英语文本及词义数据包。"""
         user_question = "英语文本:\n" + english_text + meanings_data
-        gpt_resp = get_annotation_gpt_pydantic(question=user_question, sys_prompt=sys_question, max_tokens=8000)
+        gpt_resp = get_annotation_gpt_pydantic(question=user_question,sys_prompt=sys_question,max_tokens=8000)
         result_annotation = self.__parse_gpt_resp(gpt_resp=gpt_resp)
         return result_annotation
+

File diff suppressed because it is too large
+ 27 - 79
gpt/chatgpt.py


+ 139 - 110
gpt/get_article.py

@@ -1,37 +1,37 @@
 # -*- coding: utf-8 -*-
-import json
 import random
-import re
-import time
-import traceback
-from collections import OrderedDict
-from concurrent.futures import wait
-from random import randint
-
-import oss2
-import requests
-from cachetools import TTLCache
-from oss2.credentials import EnvironmentVariableCredentialsProvider
 
-from common.common_data import all_exchange_words
-from common.split_text import *
-from data.get_all_exchange_words import get_word_exchange_list, word_to_prototype
 from gpt.chatgpt import get_answer_from_gpt
-from tools.loglog import logger
 from tools.new_mysql import MySQLUploader
+from tools.loglog import logger
 from tools.thread_pool_manager import pool_executor
+from common.common_data import all_exchange_words
+from common.split_text import *
+from data.get_all_exchange_words import get_word_exchange_list,word_to_prototype
+
+import requests
+import oss2
+from oss2.credentials import EnvironmentVariableCredentialsProvider
+from collections import OrderedDict
+from cachetools import TTLCache
+from concurrent.futures import Future, wait
+from random import randint
+import re
+import json
+import time
+import traceback
 
 
 class OtherBaseFunction:
     def __init__(self):
         self.m = MySQLUploader()
-        self.fake_meaningid = {}
+        self.fake_meaningid = {} 
 
-        self.callback_url_dict = {}
-        self.real_ip_dict = {}
-        self.demo_name = {}
-        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400)
-        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400)
+        self.callback_url_dict = {} 
+        self.real_ip_dict = {} 
+        self.demo_name = {} 
+        self.query_cache_wordspelling = TTLCache(maxsize=2000, ttl=86400) 
+        self.query_cache_meaningid = TTLCache(maxsize=2000, ttl=86400) 
 
     @staticmethod
     def _diffculty_control(student_stage, vocabulary) -> dict:
@@ -42,16 +42,17 @@ class OtherBaseFunction:
         :return:
         """
         if vocabulary <= 1200:
-            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1, "student_stage_str": "小学",
+            difficult_control = {"difficult_desc": "最简单最基础的入门的初级的幼儿园的毫无难度的", "paragraph_count": 1,"student_stage_str":"小学",
                                  "pragrapg_count": "生成的文章要求100词左右,三个段落以上。允许有简单句式的出现。"}
         elif 1200 < vocabulary <= 2400:
-            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3, "student_stage_str": "初中",
+            difficult_control = {"difficult_desc": "简单的容易的常见的难度低的", "paragraph_count": 3,"student_stage_str":"初中",
                                  "pragrapg_count": r"生成的文章要求150词左右,三个段落以上。用\n\n分段。"}
         else:
-            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5, "student_stage_str": "高中",
+            difficult_control = {"difficult_desc": "常见的初级的中国高考的", "paragraph_count": 5,"student_stage_str":"高中",
                                  "pragrapg_count": r"生成的文章要求250词左右,允许有3-5个段落。用\n\n分段。"}
         return difficult_control
 
+   
     def _get_article_chinese_dict(self, title, r_article_sentences, task_id):
         """
         获取文章的中文翻译。注意:这里切割的方法要与后面的split_article_make_json一致
@@ -85,37 +86,41 @@ class OtherBaseFunction:
 
             logger.critical("严重错误:gpt生成文章中文翻译三次全错,请管理员检查")
 
+       
         article_list = [title + "\n\n"] + r_article_sentences
 
+       
         r_article_chinese_dict = get_chinese_from_gpt(whole_article_sentences=article_list)
-
+       
         if r_article_chinese_dict:
             return r_article_chinese_dict
 
+   
     @staticmethod
     def _calculate_new_word_rate(r_article_sentences):
         article = "".join(r_article_sentences)
-        new_words = set()
+        new_words = set() 
         test_article = re.findall(r'\b\w+\'?\w*\b', article)
         for word in test_article:
             word2: str = word.split("'")[0] if "'" in word else word
-            if len(word) <= 2:
+            if len(word) <= 2: 
                 continue
             is_in_12000words = any([word2.lower() in all_exchange_words, word2.title() in all_exchange_words])
             if not is_in_12000words:
                 new_words.add(word)
         new_word_rate = round(len(new_words) / len(article), 3)
         logger.info(f"开发调试生词率{new_word_rate}.生词{new_words}")
-
+       
         new_words = list(new_words)
         return new_word_rate, new_words
 
+   
     def insert_article_to_mysql(self, title, article, chinese, task_id, code=0):
-
+       
         self.m.execute_("INSERT INTO new_word_article (title,article,chinese, taskId,code) VALUES (%s, %s,%s,%s,%s)",
                         (title, article, chinese, task_id, code))
 
-    def get_wordid_by_wordspelling(self, wordspelling: str):
+    def get_wordid_by_wordspelling(self, wordspelling:str):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if wordspelling in self.query_cache_meaningid:
             return self.query_cache_wordspelling[wordspelling]
@@ -124,16 +129,16 @@ class OtherBaseFunction:
         prototype_word = word_to_prototype(wordspelling)
         r = self.m.query_data(s, (prototype_word,))
         if r:
-
+           
             wordid = r[0][0]
         else:
-
+           
             wordid = 0
 
         self.query_cache_wordspelling[wordspelling] = wordid
         return wordid
 
-    def get_meaning_by_meaningid(self, meaningid: int):
+    def get_meaning_by_meaningid(self, meaningid:int):
         """加一个功能。大字典内没有这个单词就自动插入,返回id"""
         if meaningid in self.query_cache_meaningid:
             return self.query_cache_meaningid[meaningid]
@@ -144,7 +149,7 @@ class OtherBaseFunction:
         self.query_cache_meaningid[meaningid] = meaning
         return meaning
 
-    def _get_fake_meaningid(self, word):
+    def _get_fake_meaningid(self,word):
         """获得假词义id。但是保证同一个单词是一个id"""
         if word in self.fake_meaningid:
             return self.fake_meaningid[word]
@@ -153,31 +158,33 @@ class OtherBaseFunction:
         if r:
             fake_meaningid = r[0][0]
         else:
-            fake_meaningid = random.randint(10000, 99999)
+            fake_meaningid = random.randint(10000,99999) 
 
         self.fake_meaningid[word] = fake_meaningid
         return fake_meaningid
 
+   
     @staticmethod
-    def _clean_gpt_res(single_sentence: str, gpt_text: str, split_words: list) -> list:
+    def _clean_gpt_res(single_sentence: str, gpt_text: str,split_words:list) -> list:
         """# 解析成  键是句子+单词拼写,值是词义id"""
         return_data = []
         if not gpt_text:
             return []
 
-        row_data = [i for i in gpt_text.split("\n") if "**" in i]
+        row_data = [i for i in gpt_text.split("\n") if "**" in i] 
 
         already_spelling = set()
         for row in row_data:
             one_row_data_list = row.split("**")
-            if len(one_row_data_list) < 1:
+            if len(one_row_data_list) < 1: 
                 continue
-            one_row_data_list = [i.strip() for i in one_row_data_list]
+            one_row_data_list = [i.strip() for i in one_row_data_list] 
             spelling, meaning_id = one_row_data_list[0:2]
 
             already_spelling.add(spelling)
             return_data.append([single_sentence, spelling, int(meaning_id)])
 
+       
         for remaining_word in set(split_words).difference(already_spelling):
             return_data.append([single_sentence, remaining_word, 0])
 
@@ -190,16 +197,17 @@ class GetArticle(OtherBaseFunction):
         self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
         self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', 'qingti-private')
 
-        self.article_result = {}
+        self.article_result = {} 
 
+       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
-    def __del__(self):
-        ...
+    def __del__(self):...
 
-    def submit_task(self, words_meaning_ids: list[int], callback_url: str, real_ip: str, demo_name: str,
-                    student_stage: int, vocabulary: int, class_id: int):
+   
+    def submit_task(self, words_meaning_ids: list[int],callback_url:str,real_ip:str,demo_name:str,
+                    student_stage:int,vocabulary:int,class_id:int):
         """
         words_meaning_ids: 词义id 包含词义ID的数组集合,用于生成文章。- 示例:[110, 111, 112, 113, 114]
         callback_url: 通知的回调地址
@@ -210,13 +218,14 @@ class GetArticle(OtherBaseFunction):
         task_id = randint(10000000, 99999999)
         logger.info(f"生成文章id。task_id:{task_id}。词义id:{words_meaning_ids}.")
 
+       
         self.callback_url_dict[task_id] = callback_url
         self.real_ip_dict[task_id] = real_ip
         self.demo_name[task_id] = demo_name
 
         words_meaning_str = ""
         for wordmeaning_id in words_meaning_ids:
-            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s", (wordmeaning_id,))
+            r = self.m.query_data("select WordSpelling,WordMeaning from dictionary_meaningitem where Id = %s",(wordmeaning_id,))
             try:
                 words_meaning_str += str(r[0])
             except IndexError:
@@ -225,10 +234,10 @@ class GetArticle(OtherBaseFunction):
                 return err_msg
 
         try:
-
-            pool_executor.submit(self.run_task, words_meaning_str, task_id, student_stage, vocabulary, class_id)
-
-            resp_result = {"id": task_id, "key": f"study/article/{task_id}"}
+           
+            pool_executor.submit(self.run_task, words_meaning_str, task_id,student_stage,vocabulary,class_id)
+           
+            resp_result = {"id":task_id,"key":f"study/article/{task_id}"}
             logger.success(f"文章生成任务提交成功:{resp_result}")
             return resp_result
         except Exception as e:
@@ -236,8 +245,9 @@ class GetArticle(OtherBaseFunction):
             logger.error(err_msg)
             return err_msg
 
-    def __get_article(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
-        dc = self._diffculty_control(student_stage, vocabulary)
+   
+    def __get_article(self,words_meaning_str,task_id,student_stage,vocabulary) -> tuple:
+        dc = self._diffculty_control(student_stage,vocabulary)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一些带中文词义的英语种子单词,请根据这些种子单词的词义,生成一篇带标题的英语文章。
 提供种子单词:{words_meaning_str}
 
@@ -251,16 +261,17 @@ class GetArticle(OtherBaseFunction):
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
             r_article_sentences = r_json.get("article_sentences")
             r_title = r_json.get("title")
-            return r_title, r_article_sentences
+            return r_title,r_article_sentences
         except json.decoder.JSONDecodeError:
             logger.error("gpt生成文章回复json格式化错误")
         except Exception as e:
             logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
 
-    def __replace_new_word(self, old_article: str, new_words: list, task_id: int):
+   
+    def __replace_new_word(self, old_article: str, new_words: list,task_id:int):
         new_words_str = ",".join(new_words)
         q = f"""你是一名在中国的英语教师,下面我会为你提供一篇英语文章和一些生词,请用其他单词使用简单、常见、难度低的单词将英语文章中的生词进行替换。
 缩写引号用单引号'。最终回复替换后的英语文章。
@@ -276,7 +287,7 @@ class GetArticle(OtherBaseFunction):
         try:
             real_ip = self.real_ip_dict[task_id]
             demo_name = self.demo_name[task_id]
-            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True, real_ip=real_ip, demo_name=demo_name))
+            r_json = json.loads(get_answer_from_gpt(q, temperature=0.8, json_resp=True,real_ip=real_ip,demo_name=demo_name))
             print(f"调试信息2 {r_json}")
             r_article = r_json.get("article")
             r_title = r_json.get("title")
@@ -286,7 +297,8 @@ class GetArticle(OtherBaseFunction):
         except Exception as e:
             logger.error(f"gpt替换生词文章回复其他错误.{type(e).__name__} {e}")
 
-    def run_get_article_task(self, words_meaning_str, task_id, student_stage, vocabulary) -> tuple:
+   
+    def run_get_article_task(self, words_meaning_str, task_id,student_stage,vocabulary) -> tuple:
         """
         :param vocabulary:
         :param student_stage:
@@ -295,40 +307,47 @@ class GetArticle(OtherBaseFunction):
         :return: 标题,文章,句子翻译的字典
         """
 
-        def get_article_chinese(title, r_article_sentences, task_id, code=0) -> tuple:
+        def get_article_chinese(title,r_article_sentences,task_id,code=0)-> tuple:
             r_article_chinese_dict = self._get_article_chinese_dict(title, r_article_sentences, task_id)
             chinese_str = "\n".join(r_article_chinese_dict.values())
             r_article = "".join(r_article_sentences)
 
-            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id, code=code)
+            self.insert_article_to_mysql(title=r_title, article=r_article, chinese=chinese_str, task_id=task_id,code=code)
             return r_title, r_article_sentences, r_article_chinese_dict
 
-        r_title, r_article_sentences = self.__get_article(words_meaning_str, task_id, student_stage, vocabulary)
+       
+        r_title,r_article_sentences = self.__get_article(words_meaning_str,task_id,student_stage,vocabulary)
 
         new_word_rate, new_words = self._calculate_new_word_rate(r_article_sentences)
         if new_word_rate < 0.03:
             return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
 
+       
         replace_article_gpt = "".join(r_article_sentences)
         for i in range(3):
-            if tuple_data := self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words, task_id=task_id):
-                r_title, replace_article_gpt = tuple_data
+            if tuple_data:=self.__replace_new_word(old_article=replace_article_gpt, new_words=new_words,task_id=task_id):
+                r_title,replace_article_gpt = tuple_data
 
                 new_word_rate, new_words = self._calculate_new_word_rate(replace_article_gpt)
                 if new_word_rate < 0.03 or i == 2:
                     if i == 2:
                         logger.warning(f"3次后生词率未到3%以下。task_id:{task_id}")
-                    return get_article_chinese(title=r_title, r_article_sentences=r_article_sentences, task_id=task_id)
+                    return get_article_chinese(title=r_title,r_article_sentences=r_article_sentences,task_id=task_id)
 
-    def split_article_make_json(self, task_id: int, title: str, r_article_sentences: list, r_article_chinese_dict: dict):
+   
+    def split_article_make_json(self, task_id: int,title:str, r_article_sentences: list,r_article_chinese_dict:dict):
 
+       
         article = "".join(r_article_sentences)
         article = title + "\n\n" + article
 
-        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article, task_id)
+       
+        all_sentence_word_meaningid_dict = self.run_query_word_meaning(article,task_id)
 
-        word_count = get_article_words_count(title + article)
+       
+        word_count = get_article_words_count(title+article)
 
+       
         create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
         outside_json_dict = {"id": task_id, "body": article, "wordCount": word_count, "paragraphs": [],
                              "createTime": create_time}
@@ -336,16 +355,17 @@ class GetArticle(OtherBaseFunction):
         article_paragraphs = article.split("\n\n")
         article_sentence_count = 0
         for paragraph in article_paragraphs:
-            sentences = split_text_to_sentences(paragraph)
+            sentences = split_text_to_sentences(paragraph) 
 
             p = {"sentences": []}
             for single_sentence in sentences:
                 article_sentence_count += 1
-                single_sentence_chinese = r_article_chinese_dict.get(single_sentence, "")
-
-                w = {"words": [], "chinese": single_sentence_chinese}
-                split_words: list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence)
+                single_sentence_chinese = r_article_chinese_dict.get(single_sentence,"")
+               
+                w = {"words": [],"chinese":single_sentence_chinese}
+                split_words:list[str] = re.findall(r'\b[-\'\w]+\b|[^\w\s]', single_sentence) 
 
+               
                 for originale_word in split_words:
                     single_word = originale_word
                     if not originale_word:
@@ -354,24 +374,26 @@ class GetArticle(OtherBaseFunction):
                         w["words"].append({"spell": originale_word, "type": "punctuation"})
                         continue
 
+                   
                     word_id = self.get_wordid_by_wordspelling(originale_word)
 
-                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0, 0])
-                    if type_ == 0:
+                    x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + originale_word, [0,0])
+                    if type_ == 0: 
                         single_word = originale_word.lower()
-                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
-                        if type_ == 0:
+                        x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0,0])
+                        if type_ == 0: 
                             single_word = word_to_prototype(single_word)
-                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word, [0, 0])
+                            x_data, type_ = all_sentence_word_meaningid_dict.get(single_sentence + single_word,[0,0])
 
                     if type_ == 0:
                         logger.warning(f"警告:type_还是0,那就是二次查询时,也没有给词义。有漏下的单词{originale_word}")
                         continue
 
-                    if type_ == 1:
+                   
+                    if type_ == 1: 
                         meaning_id = x_data
                         meaning = self.get_meaning_by_meaningid(x_data)
-                    elif type_ == 2:
+                    elif type_ == 2: 
                         meaning_id = self._get_fake_meaningid(single_word)
                         meaning = x_data
                     else:
@@ -379,10 +401,11 @@ class GetArticle(OtherBaseFunction):
                         meaning_id = 9999999
                         meaning = '无'
 
+                   
                     word_prototype = word_to_prototype(originale_word)
 
-                    word_json = {"id": word_id, "meaningId": meaning_id, "meaning": meaning, "spell": originale_word,
-                                 "exchanges": get_word_exchange_list(word=single_word), "prototype": word_prototype}
+                    word_json = {"id": word_id, "meaningId": meaning_id,"meaning":meaning, "spell": originale_word,
+                                 "exchanges": get_word_exchange_list(word=single_word),"prototype": word_prototype}
                     w["words"].append(word_json)
 
                 p["sentences"].append(w)
@@ -390,9 +413,10 @@ class GetArticle(OtherBaseFunction):
             outside_json_dict["paragraphs"].append(p)
 
         outside_json_dict["articleSentenceCount"] = article_sentence_count
-        return outside_json_dict, word_count, article_sentence_count
+        return outside_json_dict,word_count,article_sentence_count
 
-    def run_query_word_meaning(self, article, task_id):
+   
+    def run_query_word_meaning(self, article,task_id):
         futures = []
         article_paragraphs = article.split("\n\n")
 
@@ -400,20 +424,21 @@ class GetArticle(OtherBaseFunction):
             sentences = split_text_to_sentences(paragraph)
 
             for single_sentence in sentences:
-                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence, task_id)
+                f = pool_executor.submit(self.query_word_meaning_from_gpt, single_sentence,task_id)
                 futures.append(f)
 
         wait(futures)
         all_sentence_word_meaningid_dict = {}
         for f in futures:
-            f_result = f.result()
+            f_result = f.result() 
             all_sentence_word_meaningid_dict.update(f_result)
         return all_sentence_word_meaningid_dict
 
-    def query_word_meaning_from_gpt(self, single_sentence, task_id) -> dict:
+   
+    def query_word_meaning_from_gpt(self, single_sentence,task_id) -> dict:
         """single_sentence 提交单个句子"""
         split_words = split_text_to_word(single_sentence)
-
+       
         split_words = [word_to_prototype(w) for w in split_words if w]
 
         placeholders = ', '.join(['%s'] * len(split_words))
@@ -451,19 +476,20 @@ apple ** 234567
 
         real_ip = self.real_ip_dict[task_id]
         demo_name = self.demo_name[task_id]
-        r_gpt = get_answer_from_gpt(q, real_ip=real_ip, demo_name=demo_name)
+        r_gpt = get_answer_from_gpt(q,real_ip=real_ip,demo_name=demo_name)
 
-        already_data, need_twice_data = {}, []
+       
+        already_data,need_twice_data = {},[]
 
-        three_list = self._clean_gpt_res(single_sentence, r_gpt, split_words)
+        three_list = self._clean_gpt_res(single_sentence, r_gpt,split_words)
 
         for sentence, spelling, meaning_id in three_list:
-
+           
             if meaning_id == 0:
                 need_twice_data.append([sentence, spelling, meaning_id])
             else:
-
-                already_data[sentence + spelling] = [meaning_id, 1]
+               
+                already_data[sentence + spelling] = [meaning_id,1]
 
         for _, spelling, _ in need_twice_data:
             need_twice_words = ",".join([spelling])
@@ -478,17 +504,18 @@ apple ** 234567
     回复示例:
     {{"单词":"中文词义",...}}
     """
-            r2 = get_answer_from_gpt(q2, real_ip=real_ip, demo_name=demo_name, json_resp=True)
-            r2_json: dict = json.loads(r2)
-            for w_spelling, chinese_meaning in r2_json.items():
-                already_data[single_sentence + w_spelling] = [chinese_meaning, 2]
+            r2 = get_answer_from_gpt(q2,real_ip=real_ip,demo_name=demo_name,json_resp=True)
+            r2_json:dict = json.loads(r2)
+            for w_spelling,chinese_meaning in r2_json.items():
+                already_data[single_sentence + w_spelling] = [chinese_meaning,2]
 
         return already_data
 
-    def upload_json_file_to_oss(self, article_id: int, data_dict: dict):
+   
+    def upload_json_file_to_oss(self,article_id:int,data_dict:dict):
         json_data = json.dumps(data_dict, ensure_ascii=False)
         object_name = f'study/article/{article_id}'
-        content = json_data.encode('utf-8')
+        content = json_data.encode('utf-8') 
         for _ in range(2):
             try:
                 r = self.bucket.put_object(object_name, content)
@@ -502,17 +529,18 @@ apple ** 234567
         else:
             logger.critical(f"2次上传oss错误,taskid:{article_id}")
 
-    def notice_teach_system(self, article_id: int, class_id: int, word_count: int, article_sentence_count: int):
+   
+    def notice_teach_system(self,article_id:int,class_id:int,word_count:int,article_sentence_count:int):
         url = self.callback_url_dict.get(article_id)
         if not url or "localhost/callback" in url:
             return False
 
-        json_data = {"classId": class_id, "articleId": article_id, "articleWordCount": word_count, "articleSentenceCount": article_sentence_count}
+        json_data = {"classId": class_id,"articleId": article_id,"articleWordCount": word_count,"articleSentenceCount": article_sentence_count}
         for _ in range(3):
             try:
-                r = requests.post(url, json=json_data)
+                r = requests.post(url,json=json_data)
                 r.raise_for_status()
-                self.callback_url_dict.pop(article_id, '')
+                self.callback_url_dict.pop(article_id,'')
                 logger.success(f"通知成功{r.text}")
                 return True
             except Exception as e:
@@ -520,22 +548,23 @@ apple ** 234567
 
         logger.critical(f"通知接口失败,三次全错. article_id:{article_id} callback_url:{url}")
 
-    def clean_source(self, article_id):
+   
+    def clean_source(self,article_id):
         self.callback_url_dict.pop(article_id, '')
         self.real_ip_dict.pop(article_id, '')
 
-    def run_task(self, words_meaning_str, task_id, student_stage, vocabulary, class_id):
+   
+    def run_task(self,words_meaning_str, task_id,student_stage,vocabulary,class_id):
         try:
-            title, r_article_sentences, r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id, student_stage, vocabulary)
+            title,r_article_sentences,r_article_chinese_dict = self.run_get_article_task(words_meaning_str, task_id,student_stage,vocabulary)
 
-            outside_json_dict, word_count, article_sentence_count = self.split_article_make_json(task_id, title, r_article_sentences,
-                                                                                                 r_article_chinese_dict)
-            self.upload_json_file_to_oss(article_id=task_id, data_dict=outside_json_dict)
-            self.notice_teach_system(article_id=task_id, class_id=class_id, word_count=word_count, article_sentence_count=article_sentence_count)
+            outside_json_dict,word_count,article_sentence_count = self.split_article_make_json(task_id,title,r_article_sentences,r_article_chinese_dict)
+            self.upload_json_file_to_oss(article_id=task_id,data_dict=outside_json_dict)
+            self.notice_teach_system(article_id=task_id,class_id=class_id,word_count=word_count,article_sentence_count=article_sentence_count)
             self.clean_source(article_id=task_id)
             logger.success(f"文章任务完成。taskid:{task_id}")
 
         except Exception as e:
             logger.error(f"{type(e).__name__} {e}")
             traceback_str = traceback.format_exc()
-            logger.error(f"外围错误追溯:{traceback_str}")
+            logger.error(f"外围错误追溯:{traceback_str}")

+ 91 - 46
gpt/get_article2.py

@@ -1,21 +1,27 @@
 # -*- coding: utf-8 -*-
 
-import json
-from collections import defaultdict
-from random import randint, shuffle, sample
+from gpt.chatgpt import get_answer_from_gpt, get_article_gpt_pydantic
+from gpt.gpt_check import CheckGptAnswer, CheckArticleResult
+from tools.new_mysql import MySQLUploader
+from tools.loglog import logger, log_err_e
+from tools.thread_pool_manager import pool_executor
+from common.common_data import all_exchange_words
+from common.split_text import split_text_to_word, get_article_words_count
 
-import httpx
+from pydantic import BaseModel
+from cachetools import TTLCache
+from concurrent.futures import wait
+from random import randint, shuffle, sample
+import json,time
 import requests
-from fastapi import BackgroundTasks
 from openpyxl import load_workbook
 from tenacity import retry, stop_after_attempt, wait_fixed
+import httpx
+import asyncio
+from threading import Lock
+from collections import defaultdict
+from fastapi import BackgroundTasks
 
-from common.common_data import all_exchange_words
-from common.split_text import split_text_to_word, get_article_words_count
-from gpt.chatgpt import get_article_gpt_pydantic
-from gpt.gpt_check import CheckArticleResult
-from tools.loglog import logger, log_err_e
-from tools.new_mysql import MySQLUploader
 
 
 def get_article_difficulty(article) -> int:
@@ -63,9 +69,11 @@ def merge_and_split(list1, list2):
     import random
     random.shuffle(combined)
 
+   
     two_thirds = []
     one_third = []
 
+   
     total_length = len(combined)
     if total_length > 15:
         two_thirds = combined[:15]
@@ -79,33 +87,40 @@ def merge_and_split(list1, list2):
 
 class GetArticle:
     def __init__(self):
-        self.m = MySQLUploader()
+        self.m = MySQLUploader() 
 
+       
         self.callback_url_dict = defaultdict(str)
-        self.real_ip_dict = defaultdict(str)
+        self.real_ip_dict = defaultdict(str) 
         self.demo_name = defaultdict(str)
 
-        self.article_result = {}
 
+        self.article_result = {} 
+
+       
         self.punctuation = [",", ".", "!", "?", ":", ";", '"', "–", "_", "-", "...", "......"]
         all_exchange_words.update(self.punctuation)
 
-        self.exchange_data: dict[str, list] = {}
+       
+        self.exchange_data: dict[str, list] = {} 
         self.read_spring_bamboo_exchange_table()
 
+
+   
     def read_spring_bamboo_exchange_table(self):
         """变形是键,原型是值"""
         wb = load_workbook(r"data/春笋单词对照变形.xlsx", read_only=True, data_only=True)
         ws = wb.active
         for row in ws.values:
-            prototype = row[0]
-            exchange = row[1]
+            prototype = row[0] 
+            exchange = row[1] 
             if prototype not in self.exchange_data:
                 self.exchange_data[prototype] = [exchange]
             else:
                 self.exchange_data[prototype].append(exchange)
         wb.close()
 
+   
     def parser_insert_to_mysql(self, resp_result):
         try:
             for single_article in resp_result['articles']:
@@ -117,12 +132,13 @@ class GetArticle:
                 sql = "INSERT INTO spring_bamboo_article (article_json,difficult_level) VALUES (%s,%s)"
                 self.m.execute_(sql, (article_json, difficult_value))
         except Exception as e:
-
+           
             logger.error(f"插入数据库时发生错误: {str(e)}")
 
+   
     def submit_task(self, real_ip: str, core_words: list, take_count: int,
-                    demo_name: str, reading_level: int, article_length: int, exercise_id: int,
-                    background_tasks: BackgroundTasks):
+                          demo_name: str, reading_level: int, article_length: int, exercise_id: int,
+                          background_tasks: BackgroundTasks):
         """
         core_words: 词义数据组
         take_count: 取文章数量 (int类型,正常是2篇,最大8篇)
@@ -139,10 +155,11 @@ class GetArticle:
             self.real_ip_dict[task_id] = real_ip
             self.demo_name[task_id] = demo_name
 
-            resp_result = self.run_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
-
+            resp_result = self.run_task(core_words, task_id,exercise_id, take_count, reading_level, article_length)
+            
+           
             background_tasks.add_task(self.parser_insert_to_mysql, resp_result)
-
+            
             logger.success(f"reading-comprehension 文章2任务完成。学案id:{exercise_id},taskid:{task_id}")
             return resp_result
         except Exception as e:
@@ -150,17 +167,18 @@ class GetArticle:
             log_err_e(e, msg="GetArticle提交任务失败;")
             return err_msg
         finally:
-
+           
             self.real_ip_dict.pop(task_id, None)
             self.demo_name.pop(task_id, None)
 
-    def __parse_gpt_resp(self, gpt_resp: dict, core_words: list):
-        return_json = {"articles": []}
+   
+    def __parse_gpt_resp(self,gpt_resp:dict,core_words:list):
+        return_json = {"articles": []} 
         for choice in gpt_resp["choices"]:
             single_article_dict = json.loads(choice["message"]["content"])
 
-            allWordAmount = 0
-
+            allWordAmount = 0 
+           
             articleWordAmount = get_article_words_count(single_article_dict["englishArticle"])
             allWordAmount += articleWordAmount
 
@@ -170,25 +188,28 @@ class GetArticle:
                 allWordAmount += count_trunk
                 allWordAmount += count_candidates
 
-            usedMeanIds: list = single_article_dict['usedMeanIds']
-
+           
+            usedMeanIds: list = single_article_dict['usedMeanIds'] 
+           
             article_words = split_text_to_word(single_article_dict['englishArticle'])
-
+           
             for i in core_words:
                 meaning_id = i.get('meaning_id', 0)
                 if not meaning_id:
                     continue
                 word = i["spell"]
-                if meaning_id not in usedMeanIds and word in self.exchange_data:
+                if meaning_id not in usedMeanIds and word in self.exchange_data: 
                     words_exchanges_list = self.exchange_data[word]
                     for exchange_word in words_exchanges_list:
                         if exchange_word in article_words:
                             usedMeanIds.append(meaning_id)
                             break
 
+           
             single_article_dict["body"] = single_article_dict.pop("englishArticle")
             single_article_dict["chinese"] = single_article_dict.pop("chineseArticle")
 
+           
             for q in single_article_dict['questions']:
                 data = q['candidates']
                 shuffled_candidates = sample(data, len(data))
@@ -198,13 +219,17 @@ class GetArticle:
                     candidate['label'] = labels[index]
                 q['candidates'] = shuffled_candidates
 
+           
             return_json['articles'].append({**single_article_dict, "allWordAmount": allWordAmount, "articleWordAmount": articleWordAmount})
 
         return return_json
 
-    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
-    def get_article(self, core_words: list, task_id: int, exercise_id: int, reading_level, article_length, n) -> dict:
 
+
+   
+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2), reraise=True)
+    def get_article(self, core_words: list, task_id: int,exercise_id:int, reading_level, article_length,n) -> dict:
+       
         if not article_length:
             if 0 < reading_level <= 10:
                 article_length = 50 + 10 * reading_level
@@ -217,9 +242,10 @@ class GetArticle:
             if start <= reading_level <= end:
                 difficulty_control_stage = index
                 break
-        else:
+        else: 
             difficulty_control_stage = 2
 
+       
         diffculty_control = {
             1: {"grade": "小学", "desc_difficulty": "最简单最容易没有难度", "paragraph_count": "1-2",
                 "desc2": "文章整体非常简洁,通俗易懂,适合初学者,刚入门,单词全是最常见的,语句通顺即可。",
@@ -234,13 +260,15 @@ class GetArticle:
                 "desc2": "文章整体难度适中,大约和中国的高中生,中国CET-6,雅思6分这样的难度标准。",
                 "choice_desc": "选择题难度偏难,要有迷惑性混淆性,答案不要出现直接在文中,4个选项要学生推理或逻辑判断,参考中国高中生水平,高考标准。"}
         }
+       
 
-        grade = diffculty_control[difficulty_control_stage]["grade"]
-        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"]
-        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"]
+        grade = diffculty_control[difficulty_control_stage]["grade"] 
+        select_diffculty = diffculty_control[difficulty_control_stage]["desc_difficulty"] 
+        select_paragraph_count = diffculty_control[difficulty_control_stage]["paragraph_count"] 
         desc2 = diffculty_control[difficulty_control_stage]["desc2"]
-        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"]
+        choice_desc = diffculty_control[difficulty_control_stage]["choice_desc"] 
 
+       
         shuffle(core_words)
         core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words])
 
@@ -266,10 +294,10 @@ class GetArticle:
             demo_name = self.demo_name[task_id]
 
             gpt_resp = get_article_gpt_pydantic(q, temperature=1.2, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
-                                                check_fucn=CheckArticleResult.get_article_1, max_tokens=15000,
-                                                sys_prompt=sys_prompt, n=n, task_id=task_id, exercise_id=exercise_id)
+                                                               check_fucn=CheckArticleResult.get_article_1, max_tokens=15000,
+                                                               sys_prompt=sys_prompt,n=n,task_id=task_id,exercise_id=exercise_id)
 
-            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp, core_words=core_words)
+            multi_articles_dict = self.__parse_gpt_resp(gpt_resp=gpt_resp,core_words=core_words)
             return multi_articles_dict
 
         except httpx.HTTPError as e:
@@ -282,8 +310,11 @@ class GetArticle:
             log_err_e(e, f"gpt生成文章回复其他错误.")
             raise
 
-    def run_get_article_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length) -> dict:
+
+   
+    def run_get_article_task(self, core_words, task_id,exercise_id, take_count, reading_level, article_length) -> dict:
         """
+        :param exercise_id: 学案id
         :param core_words: 核心单词数据,优先级1;可能为空
         :param task_id: 任务id
         :param take_count: 文章数量
@@ -292,15 +323,16 @@ class GetArticle:
         :return:
         """
         try:
-            return_json = self.get_article(core_words, task_id, exercise_id, reading_level, article_length, n=take_count)
+            return_json = self.get_article(core_words, task_id,exercise_id, reading_level, article_length,n=take_count)
             return return_json
         except Exception as e:
             logger.error(f"运行文章任务时发生错误: {str(e)}")
             raise
 
-    def run_task(self, core_words, task_id, exercise_id, take_count, reading_level, article_length):
+   
+    def run_task(self, core_words, task_id,exercise_id, take_count, reading_level, article_length):
         try:
-            outside_json = self.run_get_article_task(core_words, task_id, exercise_id, take_count, reading_level, article_length)
+            outside_json = self.run_get_article_task(core_words, task_id,exercise_id, take_count, reading_level, article_length)
             return outside_json
         except Exception as e:
             log_err_e(e, msg="外层总任务捕获错误")
@@ -308,3 +340,16 @@ class GetArticle:
     def cleanup(self):
         """清理所有资源"""
         pass
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+       
+

+ 27 - 23
gpt/gpt.py

@@ -1,26 +1,26 @@
 # -*- coding:utf-8 -*-
 if __name__ == '__main__':
     import os
-
     os.chdir("..")
 
-import time
-
 import requests
-
-from tools.loglog import logger, simple_logger
+import random
+import time
+from tools.loglog import logger,simple_logger
 from tools.new_mysql import MySQLUploader
 
 m = MySQLUploader()
 
 
-def insert_ip_token(ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens):
+def insert_ip_token(ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens):
     sql = "insert into consumer_token (ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens) values (%s,%s,%s,%s,%s,%s)"
-    m.execute_(sql, (ip, demo_name, str(gpt_content), prompt_tokens, completion_tokens, total_tokens))
+    m.execute_(sql,(ip,demo_name,str(gpt_content),prompt_tokens,completion_tokens,total_tokens))
 
+def get_answer_from_gpt(question,real_ip="localhost",demo_name="无",model="gpt-4o",max_tokens=3500,temperature:float=0,json_resp=False,n=1,sys_prompt=None):
+   
+   
+   
 
-def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="gpt-4o", max_tokens=3500, temperature: float = 0, json_resp=False, n=1,
-                        sys_prompt=None):
     if "3.5" in model or "3.5-turbo" in model or "3.5turbo" in model:
         model = "gpt-3.5-turbo"
     elif "4o" in model or "gpt4o" in model:
@@ -28,17 +28,19 @@ def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="g
     elif "4turbo" in model or "4-turbo" in model:
         model = "gpt-4-turbo"
 
+   
     d2 = {
-        "model": model,
-        "messages": [],
-        "max_tokens": max_tokens,
-        "temperature": temperature,
-        'n': n}
+    "model": model,
+    "messages": [],
+    "max_tokens": max_tokens,
+    "temperature": temperature,
+    'n': n}
 
     if sys_prompt:
         d2['messages'].append({"role": "system", "content": sys_prompt})
     d2['messages'].append({"role": "user", "content": question})
 
+
     if json_resp is True:
         d2["response_format"] = {"type": "json_object"}
     elif json_resp is False:
@@ -48,22 +50,23 @@ def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="g
 
     for _ in range(3):
         try:
-
+           
             response = requests.post(f'http://170.106.108.95/v1/chat/completions', json=d2)
             r_json = response.json()
-            if r2 := r_json.get("choices", None):
-                if n > 1:
+            if r2:= r_json.get("choices",None):
+                if n>1:
                     gpt_res = []
                     for i in r2:
                         gpt_res.append(i["message"]["content"])
                 else:
-                    gpt_res = r2[0]["message"]["content"]
+                    gpt_res= r2[0]["message"]["content"]
 
+               
                 gpt_content = str(gpt_res)
                 prompt_tokens = r_json["usage"]["prompt_tokens"]
                 completion_tokens = r_json["usage"]["completion_tokens"]
                 total_tokens = r_json["usage"]["total_tokens"]
-                insert_ip_token(real_ip, demo_name, gpt_content, prompt_tokens, completion_tokens, total_tokens)
+                insert_ip_token(real_ip,demo_name,gpt_content,prompt_tokens,completion_tokens,total_tokens)
 
                 simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{gpt_res}")
                 return gpt_res
@@ -80,20 +83,21 @@ def get_answer_from_gpt(question, real_ip="localhost", demo_name="无", model="g
     logger.critical("get_answer_from_gpt 严重错误,3次后都失败了")
 
 
-def parse_gpt_phon_to_tuplelist(text: str) -> list:
+
+def parse_gpt_phon_to_tuplelist(text:str) -> list:
     """解析gpt返回的音标数据"""
     result = []
     if not text:
         return []
     for i in text.split("\n"):
         ii = i.split("***")
-        if len(ii) >= 3:
-            result.append((ii[0].strip(), ii[1].strip(), ii[2].strip()))
+        if len(ii)>=3:
+            result.append((ii[0].strip(),ii[1].strip(),ii[2].strip()))
     return result
 
 
 if __name__ == '__main__':
     pass
 
-    resp = get_answer_from_gpt("hello", temperature=0.8, model='gpt-4o')
+    resp = get_answer_from_gpt("hello",temperature=0.8,model='gpt-4o')
     print(resp)

+ 51 - 28
gpt/gpt_check.py

@@ -4,31 +4,31 @@
 GPT回复的各个校验模块"""
 import json
 import re
-from enum import Enum
 from typing import List
-
-from pydantic import BaseModel
-
+from enum import Enum
+from pydantic import BaseModel,ValidationError
 
 class CheckGptAnswer:
     @staticmethod
     def default_no_check(gpt_text: str):
-
+       
         return True
 
+   
     @staticmethod
     def score_value(gpt_text: str):
-
+       
         if gpt_text.count("【取值0】") > 1:
             return False
         return True if re.findall("【取值.+?】", gpt_text) else False
 
+   
     @staticmethod
     def original_modify(gpt_text: str):
         split_text = gpt_text.split("\n")
         for t in split_text:
-
-            if "修改理由" in t and "错误" in t and len(t) <= 25:
+           
+            if "修改理由" in t and "错误" in t and len(t)<=25:
                 return False
             elif "没有严重的语法错误" in t:
                 return False
@@ -38,56 +38,72 @@ class CheckGptAnswer:
         else:
             return False
 
+   
     @staticmethod
     def count_chinese_characters_50(s: str):
         chinese_count = 0
         for char in s:
-
+           
             if '\u4e00' <= char <= '\u9fff':
                 chinese_count += 1
-        return True if s and chinese_count / len(s) >= 0.5 else False
+        return True if s and chinese_count/len(s) >= 0.5 else False
 
+   
     @staticmethod
-    def count_english_count_30(s: str, english_words_count=30):
-        words_count = len(re.findall(r"[a-zA-Z\']+", s))
+    def count_english_count_30(s: str,english_words_count=30):
+        words_count = len(re.findall(r"[a-zA-Z\']+",s))
         return True if words_count >= english_words_count else False
 
+   
     @staticmethod
-    def count_letter_percentages(s: str, letter_percentages=0.8):
-        count_letter = 0
-
+    def count_letter_percentages(s:str,letter_percentages=0.8):
+        count_letter=0
+       
         total_length = len(s)
 
+       
         for char in s:
-
+           
             if char.isalpha():
+               
                 count_letter += 1
-        result = True if round(count_letter / total_length, 2) > letter_percentages else False
+        result = True if round(count_letter/total_length,2)>letter_percentages else False
         return result
 
 
 class CheckArticleResult:
     @staticmethod
     def default_no_check(gpt_text: str):
-
+       
         return True
 
     @staticmethod
     def get_article_1(gpt_text: str):
-
+       
         try:
             json_object = json.loads(gpt_text)
         except json.decoder.JSONDecodeError:
             return False
-
-        if not all(i in json_object for i in ["englishArticle", "chineseArticle", "difficultSentences", "usedMeanIds", "questions"]):
+       
+        if not all(i in json_object for i in ["englishArticle","chineseArticle","difficultSentences","usedMeanIds","questions"]):
             return False
-
+       
         try:
             for question in json_object['questions']:
-                analysis = question['analysis']
-                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis)) / len(analysis)
-                if words_count_pct > 0.5:
+               
+                english_article = question['englishArticle']
+                words_count_pct = len(re.findall(r"[^\u4e00-\u9fff]", english_article)) / len(english_article)
+                if words_count_pct > 0.85: 
+                    return False
+               
+                chinese_article = question['chineseArticle']
+                words_count_pct = len(re.findall(r"[^\u4e00-\u9fff]", chinese_article)) / len(chinese_article)
+                if words_count_pct < 0.15: 
+                    return False
+
+                analysis = question['analysis'] 
+                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis))/len(analysis)
+                if words_count_pct>0.5:
                     return False
         except:
             return False
@@ -95,6 +111,7 @@ class CheckArticleResult:
         return True
 
 
+
 class IsRight(Enum):
     RIGHT = 1
     WRONG = 0
@@ -131,12 +148,16 @@ class Article(BaseModel):
     englishArticle: str
     chineseArticle: str
 
-
 class Annotation(BaseModel):
-    annotation_text: str
-
+    annotation_text:str
 
 if __name__ == '__main__':
+   
+   
+   
+   
+   
+
     text = """{
   "difficultSentences": [
     {
@@ -382,3 +403,5 @@ if __name__ == '__main__':
 }"""
     json_text = json.loads(text2)
     print(json_text)
+   
+   

+ 4 - 10
gpt/query_oss_file.py

@@ -1,17 +1,12 @@
 # -*- coding: UTF-8 -*-
 if __name__ == '__main__':
     import os
-
     os.chdir("..")
 
-import json
-
+from tools.loglog import logger
 import oss2
 from oss2.credentials import EnvironmentVariableCredentialsProvider
-
-from tools.loglog import logger
-
-
+import json
 
 def query_file_content(key):
     """
@@ -23,7 +18,7 @@ def query_file_content(key):
     try:
         object_stream = bucket.get_object(key)
         content = b''.join(object_stream)
-
+       
         text_content = content.decode('utf-8')
         json_content = json.loads(text_content)
         return json_content
@@ -32,6 +27,5 @@ def query_file_content(key):
     except Exception as e:
         logger.error(f"{type(e).__name__}: {e}")
 
-
 if __name__ == '__main__':
-    print(query_file_content('study/article/10613145')["id"])
+    print(query_file_content('study/article/10613145')["id"])

+ 3 - 4
main.py

@@ -1,10 +1,9 @@
 # -*- coding: utf-8 -*-
 import time
-from threading import Thread
-from typing import Callable
-
 from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
+from threading import Thread
+from typing import Callable
 
 from core.api_article_annotation import router_article_annotation as r7
 from core.api_get_article import router as r1
@@ -51,7 +50,7 @@ async def add_process_time_header(request: Request, call_next: Callable):
     response.headers["X-Process-Time"] = process_time
 
     if path not in ['/', '/tts']:
-        with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
+        with open('log/time_log.txt', encoding='utf-8', mode='a') as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response

+ 16 - 17
main_9000.py

@@ -1,21 +1,21 @@
 # -*- coding: utf-8 -*-
 import time
 from threading import Thread
-from typing import Callable
 
-from fastapi import FastAPI, Request
+from fastapi import FastAPI,Request
 from fastapi.responses import PlainTextResponse
-
-from core.api_article_annotation import router_article_annotation as r7
+from typing import Callable
 from core.api_get_article import router as r1
-from core.api_get_article2 import router as r3
-from core.api_get_article3 import router as r6
 from core.api_get_audio import router as r2
-from core.api_get_spoken_language import router as r5
+from core.api_get_article2 import router as r3
 from core.api_get_word import router as r4
-from core.respone_format import *
+from core.api_get_spoken_language import router as r5
+from core.api_get_article3 import router as r6
+from core.api_article_annotation import router_article_annotation as r7
+
+from tools.loglog import logger,log_err_e
 from tools.del_expire_file import run_del_normal
-from tools.loglog import logger, log_err_e
+from core.respone_format import *
 
 app = FastAPI(title="AI相关功能接口", version="1.1")
 
@@ -38,35 +38,34 @@ async def add_process_time_header(request: Request, call_next: Callable):
     try:
         body = await request.json() if request.method in ["POST", "PUT", "PATCH"] else ""
     except:
-        body = ""
+        body =""
     logger.info(f"\n测试接口请求:{real_ip} {request.method} {path}\n查询参数:{params}\n携带参数:{body}")
 
     try:
         response = await call_next(request)
     except Exception as e:
-        log_err_e(e, msg="http中间件错误捕捉")
+        log_err_e(e,msg="http中间件错误捕捉")
         return resp_500(message=f"{type(e).__name__},{e}")
 
-    process_time = str(round(time.time() - start_time, 2))
+    process_time = str(round(time.time() - start_time,2))
     response.headers["X-Process-Time"] = process_time
 
-    if path not in ['/', '/tts']:
+   
+    if path not in ['/','/tts']:
         with open('log/time_log.txt', encoding='utf-8', mode='a')as f:
             t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             f.write(f"{t}  路径:{path} - 用时:{process_time}\n")
     return response
 
-
 @app.get("/")
 @app.post("/")
 def hello():
     return PlainTextResponse("hello world")
 
-
 del_file_thread = Thread(target=run_del_normal, daemon=True)
 del_file_thread.start()
 
+
 if __name__ == "__main__":
     import uvicorn
-
-    uvicorn.run("main_9000:app", port=9000)
+    uvicorn.run("main_9000:app", port=9000)

File diff suppressed because it is too large
+ 4 - 1031
make_docx_demo/check_test_table/aaaaaaaaaa.py


+ 6 - 7
make_docx_demo/check_test_table/baidu_ocr.py

@@ -1,8 +1,7 @@
 # -*- coding:utf-8 -*-
 import base64
-import time
-
 import requests
+import time
 
 access_token = None
 token_time = 0
@@ -16,25 +15,25 @@ def high_ocr_location(pic_path):
     with open(pic_path, 'rb') as f:
         img = base64.b64encode(f.read())
 
-    if time.time() - token_time > 3600 * 8:
+    if time.time()-token_time>3600*8:
         print("获取token啦")
         url_token = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=BaL3yDflxe7Z5001vF8rAzKu&client_secret=xs40HshFLDDyWgCCfgnz86zWhQ8X1s5f'
         token = requests.post(url_token).json()
-
+       
         access_token = token['access_token']
         token_time = time.time()
 
     request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate"
 
-    params = {"image": img, "recognize_granularity": "small"}
+    params = {"image": img,"recognize_granularity":"small"}
     request_url = request_url + "?access_token=" + access_token
     headers = {'content-type': 'application/x-www-form-urlencoded'}
     response = requests.post(request_url, data=params, headers=headers)
     if response:
         r_json = response.json()
-
+       
         return r_json
 
 
 if __name__ == '__main__':
-    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))
+    print(high_ocr_location(r"C:\Users\86131\Desktop\4.jpg"))

+ 126 - 75
make_docx_demo/check_test_table/image_preprocess.py

@@ -3,14 +3,14 @@
 需要增加,2个上下单词的黑点,靠近哪一边的算法,从而解决上下错位的问题
 
 """
-import json
 import re
-from pathlib import Path
+import time
 
-import cv2
-import numpy as np
 from PIL import Image, ImageFilter
-
+import numpy as np
+import cv2
+import json
+from pathlib import Path
 from baidu_ocr import high_ocr_location
 
 
@@ -23,39 +23,56 @@ def test_log(text: str):
 
 class PreprocessImage:
     def __init__(self, image_path):
-        self.image_path = image_path
-        self.template_image_path = "template.jpg"
+        self.image_path = image_path 
+        self.template_image_path = "template.jpg" 
 
-        self.image = cv2.imread(image_path)
+        self.image = cv2.imread(image_path) 
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2]
+        self.temp_h, self.temp_w = self.template_image.shape[:2] 
 
-    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
 
         src_points = np.float32(point_tuple)
 
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
+       
+       
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-
+       
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
+       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
+       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
-        image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
+       
+       
+
+       
+       
+       
+       
+       
+       
+       
+       
 
+       
+        image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
+       
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
     def sharpen_image(self):
-
+       
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg')
+        sharpened_img.save('sharpen_image.jpg') 
 
     @staticmethod
     def parser_ocr(ocr_data):
@@ -64,16 +81,27 @@ class PreprocessImage:
             text: str = word_item['words']
             if text.startswith("1."):
                 left_char_location = word_item['chars'][0]['location']
-                p1 = (left_char_location['left'], left_char_location['top'])
+                p1 = (left_char_location['left'], left_char_location['top']) 
             elif text.startswith("51."):
                 left_char_location = word_item['chars'][0]['location']
-                p2 = (left_char_location['left'], left_char_location['top'])
+                p2 = (left_char_location['left'], left_char_location['top']) 
             elif text.startswith("50."):
                 left_char_location = word_item['chars'][0]['location']
-                p3 = (left_char_location['left'], left_char_location['top'])
+                p3 = (left_char_location['left'], left_char_location['top']) 
             elif text.startswith("100."):
                 left_char_location = word_item['chars'][0]['location']
-                p4 = (left_char_location['left'], left_char_location['top'])
+                p4 = (left_char_location['left'], left_char_location['top']) 
+
+           
+           
+           
+           
+           
+           
+           
+           
+           
+           
 
         if any([not p1, not p2, not p3, not p4]):
             print([p1, p2, p3, p4])
@@ -82,8 +110,8 @@ class PreprocessImage:
         return [p1, p2, p3, p4]
 
     def run(self):
-
-        self.sharpen_image()
+       
+        self.sharpen_image() 
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
@@ -93,24 +121,28 @@ class ComparisonAlgorithm:
     """比较算法核心"""
 
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image)
-        self.ocr_data = ocr_data
-        self.order_ocr_data = {}
-        self.already_find_index = set()
+        self.transformed_image = cv2.imread(transformed_image) 
+        self.ocr_data = ocr_data 
+        self.order_ocr_data = {} 
+        self.already_find_index = set() 
 
-        self.image = Image.open(transformed_image)
+        self.image = Image.open(transformed_image) 
 
     @staticmethod
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text))
-        letters = "".join(re.findall(r'[a-zA-Z]+', text))
+        numbers = "".join(re.findall(r'\d+', text)) 
+        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
         return numbers, letters
 
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
 
+       
+       
+       
+
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
 
@@ -121,16 +153,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2:
-
+            if word[0].isdigit() and len(word) >= 2: 
+               
                 word_text = word_item['words']
-                location = word_item['location']
-                first_char_location = word_item['chars'][0]['location']
-                end_char_location = word_item['chars'][-1]['location']
-                chars_location = word_item['chars']
+                location = word_item['location'] 
+                first_char_location = word_item['chars'][0]['location'] 
+                end_char_location = word_item['chars'][-1]['location'] 
+                chars_location = word_item['chars'] 
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data:
+                if numbers not in self.order_ocr_data: 
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
@@ -142,23 +174,25 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1)
-        black_count_1 = 0
+        next_index = str(int_index + 1) 
+        black_count_1 = 0 
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
+           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height
+            bottom_location_y = b_top + b_height 
 
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-
+           
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
-
+               
+               
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
             for y in range(bottom_location_y, next_word_top_location):
@@ -168,10 +202,12 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8:
+        if black_count_per > 0.8: 
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
@@ -179,11 +215,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         """
-        black_count_2 = 0
+        black_count_2 = 0 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5
+            bottom = word_location['top'] + int(word_location['height']) + 5 
 
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
@@ -192,10 +228,12 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92:
+        if black_count_per > 0.92: 
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
@@ -205,33 +243,34 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1)
-        black_count_1 = 0
+        next_index = str(int_index + 1) 
+        black_count_1 = 0 
         moving_distance = 20
 
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = []
+        all_x = [] 
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2
+        word_right_loca = word_location['left'] + word_location['width'] + 2 
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
 
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height
-
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
+        bottom_location_y = b_top + b_height 
+       
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
 
         for x in all_x:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-
+           
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3
+                next_word_top_location = next_word_location['top'] + 3 
             else:
-
+               
+               
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -241,55 +280,63 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4:
+        if black_count_per > 0.4: 
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
+       
+       
 
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 break
-        else:
+        else: 
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height'] // 2
+        point_location_half = point_location['top'] + point_location['height']//2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val > 110 or range_value < 90:
-                white_block += 1
+            if min_val>110 or range_value < 90:
+                white_block +=1
 
-        if white_block / point_location['width'] < 0.1:
+        if white_block/point_location['width'] < 0.1:
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
+       
         white_block = 0
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
+        y1, y2 = bottom+2, bottom + end_char_location['height']-10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val > 110 or range_value < 90:
-                white_block += 1
+            if min_val>110 or range_value < 90:
+                white_block +=1
 
-        if white_block / point_location['width'] < 0.1:
+        if white_block/point_location['width'] < 0.1:
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
+
+
+   
     def core_algorithm(self):
         self.__make_order_ocr_data()
 
@@ -299,10 +346,10 @@ class ComparisonAlgorithm:
                 continue
 
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word']
-            word_location = current_dict['location']
-            first_char_location = current_dict['first_char_location']
-            end_char_location = current_dict['end_char_location']
+            word = current_dict['word'] 
+            word_location = current_dict['location'] 
+            first_char_location = current_dict['first_char_location'] 
+            end_char_location = current_dict['end_char_location'] 
             chars_location = current_dict['chars_location']
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -319,19 +366,23 @@ class ComparisonAlgorithm:
 
 
 if __name__ == '__main__':
+   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
+   
     script_path = Path(__file__).resolve()
-
+   
     script_directory = script_path.parent
-
+   
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
+   
     pi = PreprocessImage(image_path)
     pi.run()
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
-    test_log(transformed_image_ocr_data)
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
+    test_log(transformed_image_ocr_data) 
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
+

+ 115 - 72
make_docx_demo/check_test_table/image_preprocess2.py

@@ -3,14 +3,14 @@
 20250114 在单词上划线,分别有斜杠、反斜杠、横着划线三种方式;找到它们的位置
 
 """
-import json
 import re
-from pathlib import Path
+import time
 
-import cv2
-import numpy as np
 from PIL import Image, ImageFilter
-
+import numpy as np
+import cv2
+import json
+from pathlib import Path
 from baidu_ocr import high_ocr_location
 
 
@@ -23,50 +23,70 @@ def test_log(text: str):
 
 class PreprocessImage:
     def __init__(self, image_path):
-        self.image_path = image_path
-        self.template_image_path = "template.jpg"
+        self.image_path = image_path 
+        self.template_image_path = "template.jpg" 
 
-        self.image = cv2.imread(image_path)
+        self.image = cv2.imread(image_path) 
         self.template_image = cv2.imread(self.template_image_path)
-        self.temp_h, self.temp_w = self.template_image.shape[:2]
+        self.temp_h, self.temp_w = self.template_image.shape[:2] 
 
-    def correct_image(self, point_tuple, image_path='sharpen_image.jpg'):
+    def correct_image(self, point_tuple,image_path='sharpen_image.jpg'):
         """图像矫正
         point_tuple:传过来的4个点坐标的元组"""
         sharpen_image = cv2.imread(image_path)
 
         src_points = np.float32(point_tuple)
 
-        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]])
+       
+       
+        dst_points = np.float32([[122, 78], [1070, 78], [122, 2730], [1070, 2730]]) 
 
         M = cv2.getPerspectiveTransform(src_points, dst_points)
-
+       
         transformed_image = cv2.warpPerspective(sharpen_image, M, (self.temp_w, self.temp_h))
 
+       
         gray = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2GRAY)
 
+       
         blurred = cv2.GaussianBlur(gray, (5, 5), 0)
 
-        image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
+       
+       
 
+       
+       
+       
+       
+       
+       
+       
+       
+
+       
+        image_rgb = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
+       
         cv2.imwrite('transformed_image.jpg', image_rgb)
 
     def sharpen_image(self):
-
+       
         img = Image.open(self.image_path)
         sharpened_img = img.filter(ImageFilter.SHARPEN)
-        sharpened_img.save('sharpen_image.jpg')
+        sharpened_img.save('sharpen_image.jpg') 
 
     @staticmethod
     def parser_ocr(ocr_data):
         for word_item in ocr_data['words_result']:
-
+           
             for char_item in word_item['chars']:
+
                 pass
 
-    def run(self):
 
-        self.sharpen_image()
+
+    def run(self):
+       
+        self.sharpen_image() 
         ocr_data = high_ocr_location(self.image_path)
         point_tuple = self.parser_ocr(ocr_data)
         self.correct_image(point_tuple)
@@ -76,24 +96,28 @@ class ComparisonAlgorithm:
     """比较算法核心"""
 
     def __init__(self, transformed_image, ocr_data):
-        self.transformed_image = cv2.imread(transformed_image)
-        self.ocr_data = ocr_data
-        self.order_ocr_data = {}
-        self.already_find_index = set()
+        self.transformed_image = cv2.imread(transformed_image) 
+        self.ocr_data = ocr_data 
+        self.order_ocr_data = {} 
+        self.already_find_index = set() 
 
-        self.image = Image.open(transformed_image)
+        self.image = Image.open(transformed_image) 
 
     @staticmethod
     def separate_numbers_and_letters(text):
         """正则提取数字和字母"""
-        numbers = "".join(re.findall(r'\d+', text))
-        letters = "".join(re.findall(r'[a-zA-Z]+', text))
+        numbers = "".join(re.findall(r'\d+', text)) 
+        letters = "".join(re.findall(r'[a-zA-Z]+', text)) 
         return numbers, letters
 
     def is_line_word(self, x, y):
         """判断点的颜色是否符合标准; cv2取点速度没有pillow快
         指定要查询的点的坐标 (x, y)"""
 
+       
+       
+       
+
         rgb_color = self.image.getpixel((x, y))
         r, g, b = rgb_color
 
@@ -104,16 +128,16 @@ class ComparisonAlgorithm:
     def __make_order_ocr_data(self):
         for word_item in self.ocr_data['words_result']:
             word = word_item['words']
-            if word[0].isdigit() and len(word) >= 2:
-
+            if word[0].isdigit() and len(word) >= 2: 
+               
                 word_text = word_item['words']
-                location = word_item['location']
-                first_char_location = word_item['chars'][0]['location']
-                end_char_location = word_item['chars'][-1]['location']
-                chars_location = word_item['chars']
+                location = word_item['location'] 
+                first_char_location = word_item['chars'][0]['location'] 
+                end_char_location = word_item['chars'][-1]['location'] 
+                chars_location = word_item['chars'] 
 
                 numbers, letters = self.separate_numbers_and_letters(word_text)
-                if numbers not in self.order_ocr_data:
+                if numbers not in self.order_ocr_data: 
                     self.order_ocr_data[numbers] = {"word": letters, "location": location, "chars_location": chars_location,
                                                     "first_char_location": first_char_location, "end_char_location": end_char_location}
 
@@ -125,23 +149,25 @@ class ComparisonAlgorithm:
         first_char_location: 第一个字母的位置;对应 self.order_ocr_data[current_index]['first_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1)
-        black_count_1 = 0
+        next_index = str(int_index + 1) 
+        black_count_1 = 0 
 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
+           
             b_top, b_height = first_char_location['top'], int(first_char_location['height'])
-            bottom_location_y = b_top + b_height
+            bottom_location_y = b_top + b_height 
 
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-
+           
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
                 next_word_top_location = next_word_location['top'] + int(next_word_location['height'] / 8)
             else:
-
+               
+               
                 next_word_top_location = bottom_location_y + int(b_height * 0.5)
 
             for y in range(bottom_location_y, next_word_top_location):
@@ -151,10 +177,12 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / (word_location['width'])
-        if black_count_per > 0.8:
+        if black_count_per > 0.8: 
             print(f"{int_index}正常划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_2(self, int_index, word_location, word):
         """颜色算法2,单词自身中间的黑点率
@@ -162,11 +190,11 @@ class ComparisonAlgorithm:
         word_location:这个序号的单词的整行位置;对应 self.order_ocr_data[current_index]['location']
         word:具体序号的单词,标识用
         """
-        black_count_2 = 0
+        black_count_2 = 0 
         for x in range(word_location['left'], word_location['left'] + word_location['width']):
 
             mid = word_location['top'] + int(word_location['height'] / 2)
-            bottom = word_location['top'] + int(word_location['height']) + 5
+            bottom = word_location['top'] + int(word_location['height']) + 5 
 
             for y in range(mid, bottom):
                 result = self.is_line_word(x, y)
@@ -175,10 +203,12 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_2 / (word_location['width'])
-        if black_count_per > 0.92:
+        if black_count_per > 0.92: 
             print(f"{int_index}中间划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_3(self, int_index, word_location, end_char_location, word):
         """
@@ -188,33 +218,34 @@ class ComparisonAlgorithm:
         end_char_location: 最后一个字母的位置;对应 self.order_ocr_data[current_index]['end_char_location']
         word:具体序号的单词,标识用
         """
-        next_index = str(int_index + 1)
-        black_count_1 = 0
+        next_index = str(int_index + 1) 
+        black_count_1 = 0 
         moving_distance = 20
 
         """这是在获取所有需要的横向左右x坐标"""
-        all_x = []
+        all_x = [] 
         for i in range(word_location['left'] - moving_distance, word_location['left']):
             all_x.append(i)
-        word_right_loca = word_location['left'] + word_location['width'] + 2
+        word_right_loca = word_location['left'] + word_location['width'] + 2 
         for i in range(word_right_loca, word_right_loca + moving_distance):
             all_x.append(i)
 
         b_top, b_height = word_location['top'], int(word_location['height'])
-        bottom_location_y = b_top + b_height
-
-        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8)
+        bottom_location_y = b_top + b_height 
+       
+        bottom_location_y_half = end_char_location['top'] + int(end_char_location['height'] * 0.8) 
 
         for x in all_x:
             if int_index == 50 or int_index == 100:
                 next_word_top_location = bottom_location_y + b_height * 2
-
+           
             elif next_index in self.order_ocr_data and (
                     self.order_ocr_data[next_index]['first_char_location']['top'] - bottom_location_y) < b_height:
                 next_word_location = self.order_ocr_data[next_index]['first_char_location']
-                next_word_top_location = next_word_location['top'] + 3
+                next_word_top_location = next_word_location['top'] + 3 
             else:
-
+               
+               
                 next_word_top_location = bottom_location_y + int(b_height * 0.3)
 
             for y in range(bottom_location_y_half, next_word_top_location):
@@ -224,55 +255,63 @@ class ComparisonAlgorithm:
                     break
 
         black_count_per = black_count_1 / len(all_x)
-        if black_count_per > 0.4:
+        if black_count_per > 0.4: 
             print(f"{int_index}前后双边划线{black_count_per:.2f}", word)
             self.already_find_index.add(int_index)
-            return int_index
+            return int_index 
+       
+       
 
     def color_algorithm_4(self, int_index, word_location, chars_location, word):
         """灰度图极差算法"""
+       
+       
 
         for char_index, char_dict in enumerate(chars_location):
             if char_dict['char'] == '.' or char_dict['char'] == ',':
                 point_location, point_char_index = char_dict['location'], char_index
                 break
-        else:
+        else: 
             char_index = 2
             point_location, point_char_index = chars_location[char_index]['location'], char_index
 
         white_block = 0
-        point_location_half = point_location['top'] + point_location['height'] // 2
+        point_location_half = point_location['top'] + point_location['height']//2
         y1, y2 = point_location_half, point_location_half + point_location['height']
         for x in range(point_location['left'], point_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val > 110 or range_value < 90:
-                white_block += 1
+            if min_val>110 or range_value < 90:
+                white_block +=1
 
-        if white_block / point_location['width'] < 0.1:
+        if white_block/point_location['width'] < 0.1:
             print(f"{int_index}极差算法 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
+       
         white_block = 0
         end_char_location = chars_location[-2]['location']
         bottom = end_char_location['top'] + end_char_location['height']
-        y1, y2 = bottom + 2, bottom + end_char_location['height'] - 10
+        y1, y2 = bottom+2, bottom + end_char_location['height']-10
         for x in range(end_char_location['left'], end_char_location['left'] + point_location['width']):
             roi_image = self.transformed_image[y1:y2, x:x + 1]
             min_val = np.min(roi_image)
             max_val = np.max(roi_image)
             range_value = max_val - min_val
-            if min_val > 110 or range_value < 90:
-                white_block += 1
+            if min_val>110 or range_value < 90:
+                white_block +=1
 
-        if white_block / point_location['width'] < 0.1:
+        if white_block/point_location['width'] < 0.1:
             print(f"{int_index}极差算法二 {word},左{point_location['left']},宽{point_location['width']},高{point_location['height']},{y1},{y2}")
             self.already_find_index.add(int_index)
             return int_index
 
+
+
+   
     def core_algorithm(self):
         self.__make_order_ocr_data()
 
@@ -282,10 +321,10 @@ class ComparisonAlgorithm:
                 continue
 
             current_dict = self.order_ocr_data[current_index]
-            word = current_dict['word']
-            word_location = current_dict['location']
-            first_char_location = current_dict['first_char_location']
-            end_char_location = current_dict['end_char_location']
+            word = current_dict['word'] 
+            word_location = current_dict['location'] 
+            first_char_location = current_dict['first_char_location'] 
+            end_char_location = current_dict['end_char_location'] 
             chars_location = current_dict['chars_location']
 
             if self.color_algorithm_1(int_index=int_index, word_location=word_location, first_char_location=first_char_location, word=word):
@@ -302,19 +341,23 @@ class ComparisonAlgorithm:
 
 
 if __name__ == '__main__':
+   
     image_path = r"C:\Users\86131\Desktop\4.jpg"
 
+   
     script_path = Path(__file__).resolve()
-
+   
     script_directory = script_path.parent
-
+   
     transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
 
+   
     pi = PreprocessImage(image_path)
     pi.run()
 
-    transformed_image_ocr_data = high_ocr_location(transformed_image_path)
-    test_log(transformed_image_ocr_data)
+    transformed_image_ocr_data = high_ocr_location(transformed_image_path) 
+    test_log(transformed_image_ocr_data) 
 
     ca = ComparisonAlgorithm(transformed_image=transformed_image_path, ocr_data=transformed_image_ocr_data)
     ca.core_algorithm()
+

+ 8 - 4
make_docx_demo/check_test_table/mark_ocr_loca.py

@@ -1,25 +1,29 @@
 # -*- coding: utf-8 -*-
 """测试ocr的位置,与预期是否一致"""
+from PIL import Image, ImageDraw
 import json
 from pathlib import Path
 
-from PIL import Image, ImageDraw
-
 
 def draw_rectangles_on_image(image_path, rectangles, output_path):
+   
     image = Image.open(image_path)
     draw = ImageDraw.Draw(image)
 
+   
     for rectangle in rectangles:
         top_left = (rectangle['left'], rectangle['top'])
         bottom_right = (rectangle['left'] + rectangle['width'], rectangle['top'] + rectangle['height'])
         draw.rectangle([top_left, bottom_right], outline='red', width=2)
 
+   
     image.save(output_path)
 
 
 rectangles = [
-
+   
+   
+   
 ]
 
 with open("log.txt", "r", encoding="utf-8") as f:
@@ -35,4 +39,4 @@ for i in ocr_data['words_result']:
 script_path = Path(__file__).resolve()
 script_directory = script_path.parent
 transformed_image_path = str(Path(script_directory, r"transformed_image.jpg"))
-draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')
+draw_rectangles_on_image(transformed_image_path, rectangles, 'output_with_rectangles.jpg')

File diff suppressed because it is too large
+ 2 - 1535
make_docx_demo/data.py


+ 42 - 38
make_docx_demo/docx_other_func.py

@@ -1,17 +1,18 @@
 # -*- coding: utf-8 -*-
-import datetime
-import io
-import time
-from base64 import b64decode
 from functools import wraps
-
-import matplotlib.pyplot as plt
+import time
+import io
 import qrcode
 from docx.shared import RGBColor
+from base64 import b64decode
+import datetime
 
+import matplotlib.pyplot as plt
 plt.switch_backend('Agg')
 from io import BytesIO
 from tools.loglog import logger, log_err_e
+from docx import Document
+from docx.shared import Inches,Cm
 from threading import Lock
 from config.read_config import address
 
@@ -21,43 +22,40 @@ width_cm, height_cm = 5.4, 3
 width_in = width_cm
 height_in = height_cm
 
-plt.figure(figsize=(width_in, height_in))
+plt.figure(figsize=(width_in, height_in)) 
 
 
-def hex_to_rgb(hex_color: str):
-    hex_color = hex_color.lstrip('#')
+def hex_to_rgb(hex_color:str):
+    hex_color = hex_color.lstrip('#') 
     return RGBColor(int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16))
 
-
 def rgb_to_hex(r, g, b):
     return '{:02x}{:02x}{:02x}'.format(r, g, b)
 
-
 def is_base64(text):
     try:
-
-        image_bytes = b64decode(text)
+       
+        image_bytes =b64decode(text)
         return image_bytes
     except Exception:
-
+       
         return False
 
 
 def time_use(fn):
     @wraps(fn)
-    def cc(*args, **kwargs):
+    def cc(*args,**kwargs): 
         f_time = time.time()
-        res = fn(*args, **kwargs)
+        res = fn(*args,**kwargs)
 
-        cha = round(time.time() - f_time, 3)
+        cha = round(time.time()-f_time,3)
         if cha > 0.3:
-            print(f'函数:{fn.__name__} 一共用时', cha, '秒')
-        return res
+            print(f'函数:{fn.__name__} 一共用时',cha,'秒')
+        return res 
+    return cc 
 
-    return cc
 
-
-def qrcode_maker(id_text=None, full_url=None) -> BytesIO:
+def qrcode_maker(id_text=None,full_url=None) -> BytesIO:
     """
     :param id_text: id_text 提供id,二维码地址是春笋筛查表的地址;http://dcjxb.yunzhixue.cn/link?type=scanpage&id=999;
     :param full_url: 如果提供,直接使用这个文本来生成二维码的地址
@@ -75,14 +73,16 @@ def qrcode_maker(id_text=None, full_url=None) -> BytesIO:
     qr.add_data(text)
     qr.make(fit=True)
 
+   
     img = qr.make_image(fill_color="black", back_color="white")
     img_byte_arr = io.BytesIO()
     img.save(img_byte_arr, format='PNG')
     img_byte_arr.seek(0)
-
+   
     return img_byte_arr
 
 
+
 def get_weekday():
     today = datetime.date.today()
     weekday_index = today.weekday()
@@ -91,7 +91,7 @@ def get_weekday():
     return weekday_chinese
 
 
-def make_chart(x_axis_data, y_axis_datas, title, sub_title_list, x_axis_label=None, y_axis_label=None):
+def make_chart(x_axis_data,y_axis_datas,title,sub_title_list,x_axis_label=None,y_axis_label=None):
     """
     :param sub_title_list: 小标题集合,放在右上角,用来标记每个y轴的数据标题
     :param y_axis_label:Y轴文本
@@ -103,6 +103,7 @@ def make_chart(x_axis_data, y_axis_datas, title, sub_title_list, x_axis_label=No
     """
     x_len = len(x_axis_data)
 
+   
     image_io = BytesIO()
 
     font1 = {'family': 'SimSun', 'weight': 'normal', 'size': 14}
@@ -112,31 +113,34 @@ def make_chart(x_axis_data, y_axis_datas, title, sub_title_list, x_axis_label=No
         for y in y_axis_datas:
             if len(y) != x_len:
                 logger.error("x轴的y轴的数据个数不一致")
-            plt.plot(x_axis_data, y, marker='o', label="zxs")
+            plt.plot(x_axis_data, y, marker='o',label="zxs") 
 
-        plt.title(title)
+        plt.title(title) 
         if x_axis_label:
-            plt.xlabel(x_axis_label)
+            plt.xlabel(x_axis_label) 
         if y_axis_label:
-            plt.ylabel(y_axis_label)
-        plt.grid(True)
+            plt.ylabel(y_axis_label) 
+        plt.grid(True) 
 
-        for index, sub_title in enumerate(sub_title_list):
-            plt.text(0.95, 0.9 - index * 0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right', backgroundcolor='w')
+        for index,sub_title in enumerate(sub_title_list):
+            plt.text(0.95, 0.9-index*0.15, sub_title, transform=plt.gca().transAxes, fontsize=10, va='top', ha='right', backgroundcolor='w')
         with lock:
-            plt.savefig(image_io, format='png', bbox_inches='tight')
-            image_io.seek(0)
+            plt.savefig(image_io, format='png', bbox_inches='tight') 
+            image_io.seek(0) 
 
         return image_io
     except Exception as e:
-        log_err_e(e, "折线图生成错误")
+        log_err_e(e,"折线图生成错误")
         image_io.close()
         return None
 
 
 if __name__ == '__main__':
-    t = time.time()
-    io = qrcode_maker('', "http://111.231.167.191:8001/mp3")
-    with open("1.jpg", 'wb') as f:
+   
+   
+
+    t= time.time()
+    io = qrcode_maker('',"http://111.231.167.191:8001/mp3")
+    with open("1.jpg",'wb') as f:
         f.write(io.read())
-    print(time.time() - t)
+    print(time.time()-t)

+ 11 - 9
make_docx_demo/get_standard_data.py

@@ -1,28 +1,30 @@
 # -*- coding:utf-8 -*-
 """获取学段标准数据"""
+from cachetools import TTLCache,cached
 import requests
-from cachetools import TTLCache
+from tools.loglog import logger, log_err_e
+
 
 cache = TTLCache(maxsize=100, ttl=86400)
 
 
-def get_standard_data(student_stage: int):
+def get_standard_data(student_stage:int):
     if student_stage in cache:
         return cache[student_stage]
 
     url = "https://dcjxb.yunzhixue.cn/api-dev/standard/study"
-    params = {"stage": student_stage}
-    response = requests.get(url, params=params)
+    params = {"stage":student_stage}
+    response = requests.get(url,params=params)
     if response.status_code == 200:
-        data_obj = response.json()['data']
-
-        return_data = data_obj['totalVocabulary'], data_obj['readingAccuracy'], data_obj['readingLevel'], data_obj['readingSpeed']
+        data_obj = response.json()['data'] 
+       
+        return_data = data_obj['totalVocabulary'],data_obj['readingAccuracy'],data_obj['readingLevel'],data_obj['readingSpeed']
 
-        cache[student_stage] = return_data
+        cache[student_stage] = return_data 
         return return_data
 
 
 if __name__ == '__main__':
     print(get_standard_data(3))
     print(cache)
-    print(1 in cache, 2 in cache, 3 in cache)
+    print(1 in cache,2 in cache,3 in cache)

+ 169 - 114
make_docx_demo/main_word.py

@@ -1,28 +1,29 @@
 # -*- coding: UTF-8 -*-
-import math
-import re
 import time
-from docx_base import Word, Table, ParagraphBase
+import re
+import math
 from io import BytesIO
-from random import shuffle
+from random import randint, shuffle
 from threading import Thread
 
-from docx.shared import Inches
-
+from docx_base import Word, Table, hex_to_rgb, rgb_to_hex, ParagraphBase
+from docx.shared import Pt, Inches, Cm, RGBColor
+from docx.enum.text import WD_COLOR_INDEX
 from make_docx_demo.data import *
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday, make_chart
+from tools.loglog import logger, log_err_e
 from make_docx_demo.word2pdf import convert_word_to_pdf
-from tools.loglog import log_err_e
 
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
+
 @time_use
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
-    exercise_title = json_data.get("ExerciseTitle", "")
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
+    exercise_title = json_data.get("ExerciseTitle", "") 
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '')
@@ -40,12 +41,15 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
+       
 
-    target_section = docx.doc.sections[-1]
+    target_section = docx.doc.sections[-1] 
     target_section.header.is_linked_to_previous = False
-
+   
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear()
+        paragraph.clear() 
+   
+   
 
     target_section.header_distance = 0
     target_section.footer_distance = 280000
@@ -56,9 +60,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     line_width = 205
     main_rect_x = line_width + 10
-    main_rect_width = 150
+    main_rect_width = 150 
 
-    right_line_x = main_rect_x + main_rect_width + 10
+    right_line_x = main_rect_x + main_rect_width + 10 
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
@@ -78,27 +82,28 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed']
-    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy']
-    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
+    reading_speed = json_data['StudentInfo']['StudentStudy']['ReadingSpeed'] 
+    reading_accuracy = json_data['StudentInfo']['StudentStudy']['ReadingAccuracy'] 
+    reading_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
 
+   
     chart_width = 5.4
     all_chart = json_data['StudentInfo']['StudentStudy']['ChartData']
-    reading_speed_chart = all_chart["ReadingSpeed"]
-    reading_accuracy_chart = all_chart["ReadingAccuracy"]
-    reading_difficult_chart = all_chart["ReadingDifficulties"]
+    reading_speed_chart = all_chart["ReadingSpeed"] 
+    reading_accuracy_chart = all_chart["ReadingAccuracy"] 
+    reading_difficult_chart = all_chart["ReadingDifficulties"] 
 
-    reading_speed_x_data = reading_speed_chart['XAxis']
-    reading_speed_sub_title = reading_speed_chart['Legend']
-    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']]
+    reading_speed_x_data = reading_speed_chart['XAxis'] 
+    reading_speed_sub_title = reading_speed_chart['Legend'] 
+    reading_speed_y_datas = [i['Data'] for i in reading_speed_chart['Series']] 
 
-    reading_accuracy_x_data = reading_accuracy_chart['XAxis']
-    reading_accuracy_sub_title = reading_accuracy_chart['Legend']
-    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']]
+    reading_accuracy_x_data = reading_accuracy_chart['XAxis'] 
+    reading_accuracy_sub_title = reading_accuracy_chart['Legend'] 
+    reading_accuracy_y_datas = [i['Data'] for i in reading_accuracy_chart['Series']] 
 
-    reading_difficult_x_data = reading_difficult_chart['XAxis']
-    reading_difficult_sub_title = reading_difficult_chart['Legend']
-    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']]
+    reading_difficult_x_data = reading_difficult_chart['XAxis'] 
+    reading_difficult_sub_title = reading_difficult_chart['Legend'] 
+    reading_difficult_y_datas = [i['Data'] for i in reading_difficult_chart['Series']] 
 
     "开始版面-------------------------------------------------"
 
@@ -128,26 +133,26 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     chart1_io = make_chart(x_axis_data=reading_speed_x_data, y_axis_datas=reading_speed_y_datas, title="阅读速度",
                            sub_title_list=reading_speed_sub_title)
     run1.add_pic(chart1_io, width=chart_width)
-    chart1_io.close()
+    chart1_io.close() 
 
     p2 = tb3.get_cell_paragraph(0, 1, dq=15, dh=15)
     run2 = ParagraphBase(p2)
     chart2_io = make_chart(x_axis_data=reading_accuracy_x_data, y_axis_datas=reading_accuracy_y_datas, title="阅读准确率",
                            sub_title_list=reading_accuracy_sub_title)
     run2.add_pic(chart2_io, width=chart_width)
-    chart2_io.close()
+    chart2_io.close() 
 
     p3 = tb3.get_cell_paragraph(0, 2, dq=15, dh=15)
     run3 = ParagraphBase(p3)
     chart3_io = make_chart(x_axis_data=reading_difficult_x_data, y_axis_datas=reading_difficult_y_datas, title="阅读难度",
                            sub_title_list=reading_difficult_sub_title)
     run3.add_pic(chart3_io, width=chart_width)
-    chart3_io.close()
+    chart3_io.close() 
 
     docx.add_blank_paragraph()
 
     tb4 = Table(docx, rows=5, cols=5, border=True, tb_name="自主复习记录")
-
+   
     tb4.set_table_width_xml([2000, 3000, 2000, 2000, 2000])
 
     first_cell = tb4.get_cell(0, 0)
@@ -241,7 +246,7 @@ def section_2(docx: Word, json_data, *args, **kwargs):
 
     docx.add_blank_paragraph(dq=2, dh=2)
     docx.add_paragraph("北京云知学科技有限公司", align="right", size=10)
-
+   
     docx.add_page_section()
 
 
@@ -328,12 +333,15 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
         tb_inside.merge_cell(0, 0, 0, 1)
-        tb_inside.merge_cell(1, 0, 1, 1)
-        tb_inside.merge_cell(0, 2, 1, 2)
-        tb_inside.merge_cell(2, 0, 2, 2)
-        tb_inside.merge_cell(3, 0, 3, 2)
-        tb_inside.merge_cell(4, 0, 4, 2)
+        tb_inside.merge_cell(1, 0, 1, 1) 
+        tb_inside.merge_cell(0, 2, 1, 2) 
+        tb_inside.merge_cell(2, 0, 2, 2) 
+        tb_inside.merge_cell(3, 0, 3, 2) 
+        tb_inside.merge_cell(4, 0, 4, 2) 
 
+       
+
+       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -341,17 +349,19 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
 
-        image_io: BytesIO = qrcode_result.get(data[9], "")
+       
+        image_io:BytesIO = qrcode_result.get(data[9], "") 
         if image_io:
             cell_p = tb_inside.get_cell_paragraph(0, 2, dq=5)
             p_base = ParagraphBase(cell_p)
             p_base.add_pic(image_io, width=1.5)
             image_io.close()
 
+       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
-        cell_p_1.add_run_to_p("   " + data[4], size=8)
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
+        cell_p_1.add_run_to_p("   " + data[4], size=8) 
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
@@ -365,39 +375,40 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = []
+    strange_words_data = [] 
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = []
+    qrcode_thread = [] 
     qrcode_result = {}
 
     for item in strange_words:
-        spell = item['Spell']
-        word_id = item['WordId']
+        spell = item['Spell'] 
+        word_id = item['WordId'] 
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
-        symbols_en = "英" + f'[{en}]'
-        symbols_am = "美" + f'[{am}]'
+        symbols_en = "英" + f'[{en}]' 
+        symbols_am = "美" + f'[{am}]' 
 
+       
         tts_url = f"https://dcjxb.yunzhixue.cn/exercise/word?id={word_id}"
         t = Thread(target=qrcode_maker, args=(tts_url, qrcode_result))
         qrcode_thread.append(t)
         t.start()
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
-        word_meanings = item.get('Meaning', "")
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
+        word_meanings = item.get('Meaning', "") 
         word_changes = ";".join([s["Type"] + ":" + s["Spell"] for s in item["WordChanges"]])
 
         if item['Sentences']:
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
             sentences = ""
-
+       
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences, tts_url)
         strange_words_data.append(single_word_tuple)
 
-    rows = math.ceil(len(strange_words_data) / 2)
+    rows = math.ceil(len(strange_words_data) / 2) 
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
 
@@ -417,10 +428,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
+   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3
+    random_copy_word_list = copy_word_list * 3 
     shuffle(random_copy_word_list)
 
+   
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
@@ -473,6 +486,8 @@ def section_6(docx: Word, json_data, *args, **kwargs):
         p.add_run_to_p("☆ ", size=10, font_name="MS Gothic")
         p.add_run_to_p(t, size=10)
 
+   
+
     data = ["1. I have no chance to go sightseeing this summer.	(chance)",
             "2. And with that, we conclude the third and final example.	(third)",
             "3. He lives a healthy and normal life and has a strong body.	(healthy)",
@@ -490,8 +505,8 @@ def section_6(docx: Word, json_data, *args, **kwargs):
             "15. His performance at the concert last night proved that he is in the top of international pianists.	(concert)"]
 
     for i in example_sentence:
-        p = docx.add_blank_paragraph(dq=4, dh=4)
-        p.add_run_to_p("□  ", size=12, font_name="宋体")
+        p = docx.add_blank_paragraph(dq=4,dh=4)
+        p.add_run_to_p("□  ", size=12,font_name="宋体")
         p.add_run_to_p(i + "___________")
 
     docx.add_page_section()
@@ -499,20 +514,22 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
+   
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-
+       
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-
+       
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
+       
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle']
+                participle = s['Participle'] 
                 if participle:
                     single_select_text += participle + ' \n'
                 else:
@@ -521,11 +538,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{ques_index}. {single_select_text}")
 
+       
         all_select_text = "\n".join(select_text)
 
+       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
+       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -568,6 +588,8 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
 
+       
+       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -578,7 +600,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else:
+                else: 
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -596,8 +618,9 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
+   
     def reading(index, article_single):
-
+       
         all_article_length = 0
 
         def single_yuedu(index, a):
@@ -605,19 +628,20 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             article_length = a['AllWordAmount']
             nonlocal all_article_length
             all_article_length += article_length
-
+           
             strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-
+           
             explanatory_words_ids = [i['MeaningId'] for i in a['ExplanatoryWords']]
 
+           
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
-
-                subject = candidates['Subject'] + '\n'
+               
+                subject = candidates['Subject'] + '\n' 
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. '
-                    participle = s['Participle']
+                    single_select_text += s['Label'] + '. ' 
+                    participle = s['Participle'] 
                     if participle:
                         single_select_text += participle + ' \n'
                     else:
@@ -625,11 +649,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
+           
             all_select_text = "\n".join(select_text)
 
+           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
             article_main_list = article_main.split(" ")
 
+           
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -645,6 +672,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
             tb2.set_tb_colum_width(width=[320, 140])
 
+           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
@@ -655,7 +683,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif meaning_id in explanatory_words_ids:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                    else:
+                    else: 
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
                 else:
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -668,9 +696,9 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             docx.add_blank_paragraph()
 
         "---------------------开始单篇运行---------------------"
-        if index == 1:
+        if index == 1: 
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "春笋智学, 高效学习专家")
-
+           
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用红笔加以标记,以便日后快速回顾。\n",
@@ -695,19 +723,21 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
+   
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-
+       
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-
+       
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
+       
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle']
+            participle = s_candidates['Participle'] 
             if participle:
                 single_select_text += participle
             else:
@@ -716,11 +746,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{single_select_text}")
 
+       
         all_select_text = "\n".join(select_text)
 
+       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
+       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} {i['SymbolsEn']} {i['SymbolsAm']} {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -752,6 +785,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
 
+       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -762,7 +796,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else:
+                else: 
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -779,18 +813,19 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
     "判断题型;根据题型选择----------------------------"
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category']
+        article_type = article_single['Category'] 
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single)
+        article_type_select[article_type](index, article_single) 
 
     docx.add_page_section()
 
 
 @time_use
 def section_8(docx: Word, json_data, *args, **kwargs):
+   
     sub_title_maker(docx, "单词趣味填", "趣味练习,多维提升和巩固")
     docx.add_pic_single_paragraph("make_docx_demo/static/happy_word.jpg", align="center", width=14.58)
     docx.add_page_section()
@@ -800,46 +835,51 @@ def section_8(docx: Word, json_data, *args, **kwargs):
 def section_9(docx: Word, json_data, *args, **kwargs):
     def wanxing(index, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = ''
+        all_analysis = '' 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
 
+       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip()
-            abcd_label = ''
+            analysis = question_item['Analysis'].strip() 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label'].strip()
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
+   
     def reading(index, article_single):
-        all_analysis = ''
-        all_difficult_sentences = []
+        all_analysis = '' 
+        all_difficult_sentences = [] 
 
         chinese_article = article_single['Chinese']
 
+       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip("\n")
-            abcd_label = ''
+            analysis = question_item['Analysis'].strip("\n") 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label'].strip("\n")
 
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}\n"
 
+       
         all_analysis += '\n'
 
+       
         for difficult_sentence_item in article_single['DifficultSentences']:
             all_difficult_sentences.append(difficult_sentence_item['Chinese'])
 
@@ -858,19 +898,19 @@ def section_9(docx: Word, json_data, *args, **kwargs):
 
     def seven_to_five(index, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = ''
+        all_analysis = '' 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         text = "做阅读题的目的是锻炼理解英语文本的能力,答题只是检验理解程度的手段。请尽量根据所给题眼理解解题依据。若需要看汉语解析才能明白,你需要回到词汇与阅读训练,并从较低难度入手,以便打好基础。"
         docx.add_paragraph(text, size=9)
-
+       
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis']
-            abcd_label = ''
+            analysis = question_item['Analysis'] 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
@@ -882,10 +922,10 @@ def section_9(docx: Word, json_data, *args, **kwargs):
     "判断题型;根据题型选择----------------------------"
     sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯")
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category']
+        article_type = article_single['Category'] 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single)
+        article_type_select[article_type](index, article_single) 
         docx.add_blank_paragraph()
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
@@ -933,16 +973,17 @@ def section_10(docx: Word, json_data, scanpage_format, **kwargs):
 
 
 def two_check_page(docx: Word, json_data, **kwargs):
+   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("")
+            word_data_list.append("") 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
 
         p_cell = tb.get_cell_paragraph(0, 0, dq=10)
         p = ParagraphBase(p_cell)
-        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2))
+        p.add_pic("make_docx_demo/static/logo2.png", width=Inches(1.2)) 
 
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -953,9 +994,9 @@ def two_check_page(docx: Word, json_data, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2)
-        for index, row in enumerate(range(half_count)):
-            first_word, second_word = word_data_list[row], word_data_list[row + half_count]
+        half_count = int(len(word_data_list) / 2) 
+        for index,row in enumerate(range(half_count)):
+            first_word, second_word = word_data_list[row],word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
             cell4 = "□ ___________________________" if second_word else ""
 
@@ -966,12 +1007,13 @@ def two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break()
+        docx.add_page_break() 
 
+   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""])
+            word_data_list2.append(["", ""]) 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[40, 130, 130, 150, 70])
@@ -997,10 +1039,15 @@ def two_check_page(docx: Word, json_data, **kwargs):
         tb = Table(docx, rows=0, cols=4, tb_name="第二页筛查表")
 
         ## 1234横着放
-
+       
+       
+       
         #
-
+       
+       
         #
+       
+       
 
         ## 1234竖着放
         total_row = int(len(word_data_list2) / 2)
@@ -1011,13 +1058,13 @@ def two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
-            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4]
+            data = [f"{row + 1}. {spell1}", f"□ {meaning1}", cell3, cell4] 
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1:
+        if article_type == 1: 
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -1025,39 +1072,45 @@ def two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
 
-    student_name = json_data.get("StudentInfo").get("StudentName", '')
-    class_name = json_data.get("StudentInfo").get("ClassName", '')
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
-    article_type = json_data['Articles'][0]['Category']
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
+
+   
+    student_name = json_data.get("StudentInfo").get("StudentName", '') 
+    class_name = json_data.get("StudentInfo").get("ClassName", '') 
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
+    article_type = json_data['Articles'][0]['Category'] 
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
 
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
+       
         if index >= 2:
             docx.add_page_break()
 
-        page_title = page['Title']
-        page_sub_title = page['SubTitle']
-        foot_description = page['FootDescription']
-        foot_description2 = page['FootDescription2']
+        page_title = page['Title'] 
+        page_sub_title = page['SubTitle'] 
+        foot_description = page['FootDescription'] 
+        foot_description2 = page['FootDescription2'] 
 
         word_data_list1 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']:
+        for i in page['FilterTable']['Items']: 
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
+       
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
+       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 @time_use
 def other(docx, json_data, **kwargs):
+   
     sections = docx.doc.sections
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
@@ -1090,6 +1143,7 @@ def start_make_word(json_data, document_format, scanpage_format):
         for s in menu:
             s(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
 
+       
         docx.save_docx()
         if document_format == 1:
             return "develop.docx"
@@ -1107,5 +1161,6 @@ if __name__ == '__main__':
     t = time.time()
     os.chdir("..")
 
+   
     start_make_word(test_json5, 1, 1)
     print(time.time() - t)

+ 244 - 147
make_docx_demo/main_word_applet.py

@@ -1,31 +1,34 @@
 # -*- coding: UTF-8 -*-
 """专为鲍利提分小程序,制作的word文档;apifox接口在-单词教学宝-词汇突击学案文档生成接口"""
 
-import math
-import re
 import time
-from docx_base import Word, Table, ParagraphBase
-from random import shuffle
-
-from docx.shared import Inches
+import re
+import os
+import math
+import yaml
+from random import randint, shuffle
 
-from common.split_text import split_text_to_word_punctuation
-from config.read_config import address
+from docx.shared import Pt, Inches, Cm, RGBColor
+from docx.enum.text import WD_COLOR_INDEX
 from make_docx_demo.data import *
+from docx_base import Word, Table, hex_to_rgb, rgb_to_hex, ParagraphBase
 from make_docx_demo.docx_other_func import time_use, qrcode_maker, get_weekday
-from make_docx_demo.get_standard_data import get_standard_data
+from tools.loglog import logger, log_err_e
 from make_docx_demo.word2pdf import convert_word_to_pdf
-from tools.loglog import log_err_e
+from make_docx_demo.get_standard_data import get_standard_data
+from common.split_text import split_text_to_word_punctuation
+from config.read_config import address
 
 num_dict = {1: "❶", 2: "❷", 3: "❸", 4: "❹", 5: "❺", 6: "❻", 7: "❼", 8: "❽", 9: "❾",
             10: "❿", 11: "⓫", 12: "⓬", 13: "⓭", 14: "⓮", 15: "⓯", 16: "⓰", 17: "⓱", 18: "⓲", 19: "⓳", 20: "⓴"}
 
 
+
 @time_use
 def header_maker(docx: Word, json_data):
-    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0")
-    exercise_title = json_data.get("ExerciseTitle", "")
-    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
+    exercise_id = str(json_data.get("ExerciseId", "")).rjust(11, "0") 
+    exercise_title = json_data.get("ExerciseTitle", "") 
+    exercise_level = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
 
     student_name = json_data.get("StudentInfo").get("StudentName", '')
     class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
@@ -43,12 +46,15 @@ def header_maker(docx: Word, json_data):
         tb_header.set_cell_text(0, 4, f"{t_date}\n{t_weekday}\n{t_time}", size=8, border=False, color=(220, 220, 220))
 
         tb_header.set_tb_colum_width(width=[100, 70, 70, 150, 80])
+       
 
-    target_section = docx.doc.sections[-1]
+    target_section = docx.doc.sections[-1] 
     target_section.header.is_linked_to_previous = False
-
+   
     for paragraph in target_section.header.paragraphs:
-        paragraph.clear()
+        paragraph.clear() 
+   
+   
 
     target_section.header_distance = 0
     target_section.footer_distance = 280000
@@ -59,9 +65,9 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
     p = docx.add_blank_paragraph()
     line_width = 200
     main_rect_x = line_width + 10
-    main_rect_width = 150
+    main_rect_width = 150 
 
-    right_line_x = main_rect_x + main_rect_width + 10
+    right_line_x = main_rect_x + main_rect_width + 10 
 
     p.add_rectangle(main_title, x=main_rect_x, y=4, fill_color="000000", width=main_rect_width, height=48, font_color="ffffff",
                     font_size=18)
@@ -78,25 +84,32 @@ def sub_title_maker(docx: Word, main_title, sub_title_name1, sub_title_name2='
 
 @time_use
 def section_1(docx: Word, json_data, *args, **kwargs):
-    exercise_id_int = json_data.get("ExerciseId", "")
-    student_name = json_data.get("StudentInfo").get("StudentName", '')
-    student_stage = json_data.get("StudentInfo").get("StudentStage")
+   
+    exercise_id_int = json_data.get("ExerciseId", "") 
+    student_name = json_data.get("StudentInfo").get("StudentName", '') 
+    student_stage = json_data.get("StudentInfo").get("StudentStage") 
     grade_name = {1: "小学", 2: "初中", 3: "高中"}.get(student_stage)
-    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+    t_date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
 
+   
     totalVocabulary, readingAccuracy, readingLevel, readingSpeed = get_standard_data(student_stage)
 
-    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary']
-    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary']
-    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume']
+    FirstVocabulary = json_data['StudentInfo']['StudentStudy']['FirstVocabulary'] 
+    Vocabulary = json_data['StudentInfo']['StudentStudy']['Vocabulary'] 
+    ReadingVolume = json_data['StudentInfo']['StudentStudy']['ReadingVolume'] 
 
-    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel']
+   
+   
+   
+    r6 = json_data['StudentInfo']['StudentStudy']['ReadingLevel'] 
 
-    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']])
-    r8 = r6
-    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']]
-    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0
+   
+    r7 = len([strange_words for exercise in json_data['WordAndArticleContents'] for strange_words in exercise['StrangeWords']]) 
+    r8 = r6 
+    multi_article_difficulty = [article_obj['Score'] for article_obj in json_data['WordAndArticleContents'][0]['Articles']] 
+    difficulty_value = sum(multi_article_difficulty) // len(multi_article_difficulty) if multi_article_difficulty else 0 
 
+   
     InspirationalMessage = json_data.get('InspirationalMessage')
     "开始版面-------------------------------------------------"
 
@@ -143,10 +156,12 @@ def section_1(docx: Word, json_data, *args, **kwargs):
     t5.set_row_height(row_height=50)
     t5.set_tb_colum_width(0, 500)
 
+   
     docx.add_paragraph(text="多媒体辅助", size=16, align="left", bold=True, dq=10, dh=5)
     docx.add_paragraph(text="需要示范的的学员,扫以下二维码获取音频、视频示范:", size=12, align="left", dq=5, dh=5)
     p = docx.add_blank_paragraph()
 
+   
     img_io = qrcode_maker(full_url=f"{address}/link?type=exercise&id={exercise_id_int}&from=bltf")
     p.add_pic(img_io, width=2)
     img_io.close()
@@ -182,13 +197,17 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         cell_outside = tb_outside.get_cell(row, col, delete_default_para=True)
         tb_inside = Table(cell_outside, rows=5, cols=3, tb_name="内部内容")
 
+       
         tb_inside.merge_cell(0, 0, 0, 2)
-        tb_inside.merge_cell(1, 0, 1, 2)
+        tb_inside.merge_cell(1, 0, 1, 2) 
+       
+        tb_inside.merge_cell(2, 0, 2, 2) 
+        tb_inside.merge_cell(3, 0, 3, 2) 
+        tb_inside.merge_cell(4, 0, 4, 2) 
 
-        tb_inside.merge_cell(2, 0, 2, 2)
-        tb_inside.merge_cell(3, 0, 3, 2)
-        tb_inside.merge_cell(4, 0, 4, 2)
+       
 
+       
         num_calucate = 2 * row + 1 if col == 0 else 2 * row + 2
         p = ParagraphBase(tb_inside.get_cell_paragraph(0, 0, align="left"))
         p.add_run_to_p(num_dict[num_calucate], bold=True, size=22, font_name="MS Gothic")
@@ -196,10 +215,19 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
         tb_inside.set_cell_text(row=1, column=0, cell_text=data[1] + "  " + data[2], border=False, size=10, align="left",
                                 bk_color=(240, 240, 240))
 
+       
+       
+       
+       
+       
+       
+       
+
+       
         cell_p = tb_inside.get_cell_paragraph(2, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
-        cell_p_1.add_run_to_p(data[3], size=10, bold=True)
-        cell_p_1.add_run_to_p("   " + data[4], size=8)
+        cell_p_1.add_run_to_p(data[3], size=10, bold=True) 
+        cell_p_1.add_run_to_p("   " + data[4], size=8) 
 
         cell_p = tb_inside.get_cell_paragraph(3, 0, align="left")
         cell_p_1 = ParagraphBase(cell_p)
@@ -213,25 +241,31 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
     properties_chinese_map = {"adj": "形容词", "n": "名词", "interj": "感叹词", "conj": "连词", "num": "数字", "art": "冠词",
                               "pron": "代词", "adv": "副词", "prep": "介词", "v": "动词"}
-    strange_words_data = []
+    strange_words_data = [] 
     strange_words = json_data.get('StrangeWords')
-    qrcode_thread = []
+    qrcode_thread = [] 
     qrcode_result = {}
 
     for item in strange_words:
-        spell = item['Spell']
-        word_id = item['WordId']
+        spell = item['Spell'] 
+        word_id = item['WordId'] 
         en = "" if not item.get("SymbolsEn", "") else item.get("SymbolsEn")
         am = "" if not item.get("SymbolsAm", "") else item.get("SymbolsAm")
 
-        symbols_en = "英" + f'[{en}]'
-        symbols_am = "美" + f'[{am}]'
+        symbols_en = "英" + f'[{en}]' 
+        symbols_am = "美" + f'[{am}]' 
+
+       
+       
+       
+       
+       
 
-        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']])
-        word_meanings = item.get('Meaning', "")
+        word_properties = " ".join([properties_chinese_map.get(i, "") for i in item['WordProperties']]) 
+        word_meanings = item.get('Meaning', "") 
         word_changes_list = []
-        for idx, s in enumerate(item["WordChanges"], start=1):
-            s_type, s_spell = s['Type'], s['Spell']
+        for idx, s in enumerate(item["WordChanges"],start=1):
+            s_type,s_spell = s['Type'], s['Spell']
             if "原型" in s_type or "大小写" in s_type:
                 continue
             tail = '\n' if idx != len(item["WordChanges"]) else ''
@@ -242,12 +276,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
             sentences = item['Sentences'][0]['English'] + '\n' + item['Sentences'][0]['Chinese']
         else:
             sentences = ""
-
+       
         single_word_tuple = (spell, symbols_en, symbols_am, word_properties, word_meanings,
                              "词汇变形", word_changes, "例句", sentences)
         strange_words_data.append(single_word_tuple)
 
-    rows = math.ceil(len(strange_words_data) / 2)
+    rows = math.ceil(len(strange_words_data) / 2) 
     tb_outside = Table(docx, rows=rows, cols=2, tb_name="外层框架")
     tb_outside.set_tb_colum_width(width=[230, 230])
 
@@ -267,10 +301,12 @@ def section_4_1(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_5(docx: Word, json_data, *args, **kwargs):
+   
     copy_word_list = [i['Meaning'] for i in json_data.get('StrangeWords')]
-    random_copy_word_list = copy_word_list * 3
+    random_copy_word_list = copy_word_list * 3 
     shuffle(random_copy_word_list)
 
+   
     first_copy_word_list = copy_word_list.copy()
     copy_word_list_add_num = [f"{i} ({idx})" for idx, i in enumerate(first_copy_word_list, start=1)]
     shuffle(copy_word_list_add_num)
@@ -333,20 +369,22 @@ def section_6(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_7(docx: Word, json_data, *args, **kwargs):
+   
     def wanxing(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-
+       
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-
+       
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
+       
         select_text = []
         for ques_index, candidates in enumerate(article_single['Questions'], start=1):
             single_select_text = ''
             for s in candidates['Candidates']:
                 single_select_text += s['Label'] + '. '
-                participle = s['Participle']
+                participle = s['Participle'] 
                 if participle:
                     single_select_text += participle + ' \n'
                 else:
@@ -355,11 +393,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{ques_index}. {single_select_text}")
 
+       
         all_select_text = "\n".join(select_text)
 
+       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
+       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -401,6 +442,8 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="完形填空")
         tb2.set_tb_colum_width(width=[320, 140])
 
+       
+       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -411,7 +454,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else:
+                else: 
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -427,40 +470,42 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(tail_zhushi, size=10.5)
         docx.add_blank_paragraph()
 
+   
     def reading(index, article_single):
 
         def single_yuedu(index, a):
             article_id = a['Id']
-            article_length = a['AllWordAmount']
+            article_length = a['AllWordAmount'] 
 
-            strange_words_ids = set()
-            explanatory_words_ids = set()
-            bold_word = set()
-            italics_word = set()
-            italics_index_dict = {}
+            strange_words_ids = set() 
+            explanatory_words_ids = set() 
+            bold_word = set() 
+            italics_word = set() 
+            italics_index_dict = {} 
 
             for i in json_data['StrangeWords']:
                 strange_words_ids.add(i['MeanId'])
                 bold_word.add(i['Spell'])
                 bold_word.update([change_word['Spell'] for change_word in i['WordChanges']])
-            for italics_index, ii in enumerate(a['ExplanatoryWords'], start=1):
+            for italics_index,ii in enumerate(a['ExplanatoryWords'], start=1):
                 explanatory_words_ids.add(ii['MeaningId'])
                 italics_word.add(ii['Spell'])
                 if 'WordChanges' in ii:
                     italics_word.update([change_word['Spell'] for change_word in ii['WordChanges']])
-                    italics_index_dict.update({change_word['Spell']: f"[{italics_index}]" for change_word in ii['WordChanges']})
-
+                    italics_index_dict.update({change_word['Spell']:f"[{italics_index}]" for change_word in ii['WordChanges']})
+               
                 italics_index_dict[ii['MeaningId']] = f"[{italics_index}]"
                 italics_index_dict[ii['Spell']] = f"[{italics_index}]"
 
+           
             select_text = []
             for ques_index, candidates in enumerate(a['Questions'], start=1):
                 single_select_text = ''
-
-                subject = candidates['Subject'] + '\n'
+               
+                subject = candidates['Subject'] + '\n' 
                 for s in candidates['Candidates']:
-                    single_select_text += s['Label'] + '. '
-                    participle = s['Participle']
+                    single_select_text += s['Label'] + '. ' 
+                    participle = s['Participle'] 
                     if participle:
                         single_select_text += participle + ' \n'
                     else:
@@ -468,11 +513,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                         single_select_text += text + ' \n'
                 select_text.append(str(ques_index) + ". " + subject + single_select_text)
 
+           
             all_select_text = "\n".join(select_text)
 
+           
             article_main: str = a['English'] + "\n\n郑重提示:认真看完全文再看问题。\n" + all_select_text
             article_main_list = split_text_to_word_punctuation(article_main)
 
+           
             explanatory_words = "\n\n".join(
                 [f"{index}. {i['Spell']}\n [{i['SymbolsEn']}] [{i['SymbolsAm']}]\n {i['Meaning']}" for index, i in
                  enumerate(a['ExplanatoryWords'], start=1)])
@@ -488,6 +536,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
             tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="阅读")
             tb2.set_tb_colum_width(width=[320, 140])
 
+           
             tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
             for w in article_main_list:
                 word = re.search(r"\[(\d+)]", w)
@@ -502,7 +551,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
-                else:
+                else: 
                     if w in bold_word:
                         tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                     elif w in italics_word:
@@ -511,7 +560,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     else:
                         tb2_p.add_run_to_p(w + ' ', size=10.5)
 
-            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False, line_spacing=300)
+            tb2.set_cell_text(0, 1, explanatory_words, size=10.5, font_color=(80, 80, 80), align="left", centre=False,line_spacing=300)
 
             docx.add_blank_paragraph()
             tail_zhushi = """完成时间:_____点_____分_____秒,本篇用时:_____秒。"""
@@ -520,7 +569,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
         def top_header():
             sub_title_maker(docx, "阅读提升练", "智能匹配难度,轻松提升阅读", "鲍利提分, 高效学习专家")
-
+           
             tb = Table(docx, 1, 1, tb_name="真题强化练", border=True)
             tb.set_tb_colum_width(0, 460)
             text = ["阅读中不认识的单词,尽量猜测词义,并用斜线划掉,以便拍照报告给我们。\n",
@@ -541,25 +590,27 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     pp.add_run_to_p(t, size=10)
 
             docx.add_blank_paragraph()
-
         "---------------------开始单篇运行---------------------"
-        if index == 1:
+        if index == 1: 
             top_header()
         single_yuedu(index, article_single)
 
+
+   
     def seven_to_five(index, article_single):
         article_id = article_single['Id']
         article_length = article_single['AllWordAmount']
-
+       
         strange_words_ids = [i['MeanId'] for i in json_data['StrangeWords']]
-
+       
         explanatory_words_ids = [i['MeaningId'] for i in article_single['ExplanatoryWords']]
 
+       
         select_text = []
         for ques_index, s_candidates in enumerate(article_single['Candidates'], start=1):
             single_select_text = ''
             single_select_text += s_candidates['Label'] + '. '
-            participle = s_candidates['Participle']
+            participle = s_candidates['Participle'] 
             if participle:
                 single_select_text += participle
             else:
@@ -568,11 +619,14 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
             select_text.append(f"{single_select_text}")
 
+       
         all_select_text = "\n".join(select_text)
 
+       
         article_main: str = article_single['English'] + "\n\n郑重提示:认真看完全文再看问题。\n\n" + all_select_text
         article_main_list = article_main.split(" ")
 
+       
         explanatory_words = "\n\n".join(
             [f"{index}. {i['Spell']} [{i['SymbolsEn']}] [{i['SymbolsAm']}] {i['Meaning']}" for index, i in
              enumerate(article_single['ExplanatoryWords'], start=1)])
@@ -603,6 +657,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         tb2 = Table(docx, rows=1, cols=2, border=True, tb_name="七选五")
         tb2.set_tb_colum_width(width=[320, 140])
 
+       
         tb2_p = ParagraphBase(tb2.get_cell_paragraph(0, 0, align="left"))
         for w in article_main_list:
             word = re.search(r"\[(\d+)]", w)
@@ -613,7 +668,7 @@ def section_7(docx: Word, json_data, *args, **kwargs):
                     tb2_p.add_run_to_p(w + ' ', size=10.5, bold=True)
                 elif meaning_id in explanatory_words_ids:
                     tb2_p.add_run_to_p(w + ' ', size=10.5, italic=True)
-                else:
+                else: 
                     tb2_p.add_run_to_p(w + ' ', size=10.5)
             else:
                 tb2_p.add_run_to_p(w + ' ', size=10.5)
@@ -623,17 +678,18 @@ def section_7(docx: Word, json_data, *args, **kwargs):
         docx.add_blank_paragraph()
 
     "判断题型;根据题型选择----------------------------"
-
+   
     all_article_length = 0
 
     for index, article_single in enumerate(json_data['Articles'], start=1):
-        article_type = article_single['Category']
+        article_type = article_single['Category'] 
 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
 
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_single)
+        article_type_select[article_type](index, article_single) 
 
+       
         article_length = article_single['AllWordAmount']
         all_article_length += article_length
 
@@ -647,49 +703,53 @@ def section_7(docx: Word, json_data, *args, **kwargs):
 
 @time_use
 def section_9(docx: Word, json_data, *args, **kwargs):
-    def wanxing(index, article_count, article_single):
+    def wanxing(index,article_count, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = ''
+        all_analysis = '' 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
 
+       
         for ques_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis'].strip()
-            abcd_label = ''
+            analysis = question_item['Analysis'].strip() 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label'].strip()
 
-            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n"
+            all_analysis += f"{ques_index}.\n{abcd_label}  {analysis}\n" 
 
         docx.add_paragraph(all_analysis, size=9)
         docx.add_paragraph("全文参考译文", chinese_font_name="微软雅黑", dq=15, dh=5, bold=True)
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
-    def reading(index, article_count, article_single):
+   
+    def reading(index,article_count, article_single):
         """
         index : 外面传入,从1开始。如果只有
         """
-        all_analysis = ''
-        all_difficult_sentences = []
+        all_analysis = '' 
+        all_difficult_sentences = [] 
 
         chinese_article = article_single['Chinese']
 
+       
         questions = article_single['Questions']
         for ques_index, question_item in enumerate(questions, start=1):
-            analysis = question_item['Analysis'].strip("\n")
-            abcd_label = ''
+            analysis = question_item['Analysis'].strip("\n") 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label'].strip("\n")
 
-            new_line = "" if ques_index == len(questions) else "\n"
+            new_line = "" if ques_index==len(questions) else "\n"
             all_analysis += f"{ques_index}.{abcd_label}  {analysis}{new_line}"
 
-        if index != article_count:
+       
+        if index!=article_count:
             all_analysis += '\n'
 
         docx.add_paragraph(f"Passage {index}", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True, size=16)
@@ -699,19 +759,20 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
         docx.add_paragraph(all_analysis, size=9)
 
-    def seven_to_five(index, article_count, article_single):
+
+    def seven_to_five(index,article_count, article_single):
         chinese_article = article_single['Chinese']
-        all_analysis = ''
+        all_analysis = '' 
 
         docx.add_paragraph("答案和解析", chinese_font_name="微软雅黑", dq=5, dh=5, bold=True)
-
+       
         for q_index, question_item in enumerate(article_single['Questions'], start=1):
-            analysis = question_item['Analysis']
-            abcd_label = ''
+            analysis = question_item['Analysis'] 
+            abcd_label = '' 
 
             candidates = question_item['Candidates']
             for abcd_selected in candidates:
-                if abcd_selected['IsRight']:
+                if abcd_selected['IsRight']: 
                     abcd_label += abcd_selected['Label']
             all_analysis += f"{q_index}.{abcd_label}  {analysis}\n"
 
@@ -721,14 +782,14 @@ def section_9(docx: Word, json_data, *args, **kwargs):
         docx.add_paragraph(chinese_article, size=9, dq=5, dh=5, line_spacing=300)
 
     "判断题型;根据题型选择----------------------------"
-    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯", "鲍利提分,你的智能教练")
+    sub_title_maker(docx, "解题自主纠", "自主学习,逐步养成良好学习习惯","鲍利提分,你的智能教练")
     articles = json_data['Articles']
     article_count = len(articles)
     for index, article_single in enumerate(articles, start=1):
-        article_type = article_single['Category']
+        article_type = article_single['Category'] 
         article_type_select = {1: reading, 2: wanxing, 3: seven_to_five}
         assert article_type in article_type_select
-        article_type_select[article_type](index, article_count, article_single)
+        article_type_select[article_type](index,article_count, article_single) 
 
     docx.add_docx_component("make_docx_demo/word_component/blank.docx")
     docx.add_page_section()
@@ -776,15 +837,20 @@ def section_10(docx: Word, json_data, scanpage_format, *args, **kwargs):
 
 @time_use
 def two_check_page(docx: Word, json_data, *args, **kwargs):
+   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
-        page_sub_title = "词汇训练"
+        page_sub_title = "词汇训练" 
 
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("")
+            word_data_list.append("") 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[40, 100, 100])
 
+       
+       
+       
+
         tb.set_tb_colum_width(0, 100)
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
@@ -796,7 +862,7 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2)
+        half_count = int(len(word_data_list) / 2) 
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -809,13 +875,14 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break()
+        docx.add_page_break() 
 
+   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
-        page_sub_title = "词汇训练"
+        page_sub_title = "词汇训练" 
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""])
+            word_data_list2.append(["", ""]) 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
@@ -847,56 +914,73 @@ def two_check_page(docx: Word, json_data, *args, **kwargs):
             cell3 = f"{spell2}" if spell2 else ""
             cell4 = f"{total_row + row + 1}. {meaning2}" if meaning2 else ""
 
-            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4]
+            data = [f"{spell1}", f"{row + 1}. {meaning1}", cell3, cell4] 
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9], alignment=['right', 'left', 'right', 'left'])
 
+       
         tb.set_row_height(13.8)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
 
         docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                            font_name="仿宋", dq=5)
 
-    student_name = json_data.get("StudentInfo").get("StudentName", '')
-    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "")
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
+
+   
+    student_name = json_data.get("StudentInfo").get("StudentName", '') 
+    class_name = json_data.get("StudentInfo").get("ClassName", '').replace("词汇突击", "") 
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
     article_type = 1
     try:
-        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
+        article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
     except Exception as e:
         log_err_e(e, "学案类型不存在就取1,词汇突击里面只有阅读理解")
 
+   
+
     """---------------------------------------------------------------------------------"""
     screening_scanPages = json_data['ScreeningScanPages']
     for index, page in enumerate(screening_scanPages, start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
-        page_title = page['Title']
-        page_sub_title = page['SubTitle']
-        foot_description = page['FootDescription']
-        foot_description2 = page['FootDescription2']
+        page_title = page['Title'] 
+        page_sub_title = page['SubTitle'] 
+        foot_description = page['FootDescription'] 
+        foot_description2 = page['FootDescription2'] 
+       
 
         word_data_list1 = []
         word_data_list2 = []
-        for i in page['FilterTable']['Items']:
+        for i in page['FilterTable']['Items']: 
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
+       
+       
+       
+
+       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
-
-        if index != len(screening_scanPages):
-            pass
+       
+        if index!=len(screening_scanPages):
+            pass 
         docx.add_page_break()
 
 
+
 def old_two_check_page(docx: Word, json_data, **kwargs):
+   
     def empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list):
         if len(word_data_list) % 2 != 0:
-            word_data_list.append("")
+            word_data_list.append("") 
 
         tb = Table(docx, 1, 3, tb_name="头部三元素")
         tb.set_tb_colum_width(width=[140, 100, 100])
 
+       
+       
+       
+
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8, dh=2)
         tb.set_cell_text(0, 2, f"{page_title}\n{page_sub_title}", border=False, size=8, dh=2)
@@ -907,7 +991,7 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
 
-        half_count = int(len(word_data_list) / 2)
+        half_count = int(len(word_data_list) / 2) 
         for index, row in enumerate(range(half_count)):
             first_word, second_word = word_data_list[row], word_data_list[row + half_count]
             cell3 = f"{index + 1 + half_count}. {second_word}" if second_word else ""
@@ -920,16 +1004,21 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
         blank_count = " " * 80
         p = docx.add_blank_paragraph(dq=5)
         p.add_run_to_p(f"{t_datetime} {page_title}-{page_sub_title}{blank_count}", size=8, chinese_font_name="仿宋", font_name="仿宋")
-        docx.add_page_break()
+        docx.add_page_break() 
 
+   
     def filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2):
         if len(word_data_list2) % 2 != 0:
-            word_data_list2.append(["", ""])
+            word_data_list2.append(["", ""]) 
 
         tb = Table(docx, 1, 5, tb_name="头部五元素")
         tb.set_tb_colum_width(width=[80, 100, 120, 150, 70])
 
+       
+       
+       
+
         tb.set_cell_text(0, 0, f"鲍利提分", border=False, size=16, bold=True, chinese_font_name="黑体")
         tb.set_cell_text(0, 1, f"{class_name}\n{student_name}", border=False, size=8)
         tb.set_cell_text(0, 2, f"{page_id}", border=False, size=16, dh=2, bold=True, font_name="黑体")
@@ -962,13 +1051,13 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             cell3 = f"{total_row + row + 1}. {spell2}" if spell2 else ""
             cell4 = f"□ {meaning2}" if meaning2 else ""
 
-            data = [cell1, cell2, cell3, cell4]
+            data = [cell1,cell2, cell3, cell4] 
             tb.add_table_row_data_xml_fastly(data, font_size=[10.5, 9, 10.5, 9])
 
         tb.set_all_border_fastly(xml=True, outside_side_border=True, outside_side_border_size=5)
         tb.set_row_height(13.6, first_row_h=6)
         tb.set_table_width_xml([2124, 3257, 2140, 3257])
-        if article_type == 1:
+        if article_type == 1: 
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
             docx.add_paragraph(foot_description2, align="right", size=8, chinese_font_name="仿宋")
@@ -976,45 +1065,51 @@ def old_two_check_page(docx: Word, json_data, **kwargs):
             docx.add_paragraph(f"{t_datetime} {page_title}-{page_sub_title}{foot_description}", size=8, chinese_font_name="仿宋",
                                font_name="仿宋", dq=5)
 
-    student_name = json_data.get("StudentInfo").get("StudentName", '')
-    class_name = json_data.get("StudentInfo").get("ClassName", '')
-    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime())
-    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category']
-    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage']
+   
+    student_name = json_data.get("StudentInfo").get("StudentName", '') 
+    class_name = json_data.get("StudentInfo").get("ClassName", '') 
+    t_datetime = time.strftime("%Y-%m-%d %H:%M", time.localtime()) 
+    article_type = json_data['WordAndArticleContents'][0]['Articles'][0]['Category'] 
+    is_add_empty_filter_page = json_data['Config']['AddEmptyFilterPage'] 
 
     """---------------------------------------------------------------------------------"""
     for index, page in enumerate(json_data['ScreeningScanPages'], start=1):
         page_id = str(page['PageId']).rjust(11, "0")
 
+       
         if index >= 2:
             docx.add_page_break()
 
-        page_title = page['Title']
-        page_sub_title = page['SubTitle']
-        foot_description = page['FootDescription']
-        foot_description2 = page['FootDescription2']
+        page_title = page['Title'] 
+        page_sub_title = page['SubTitle'] 
+        foot_description = page['FootDescription'] 
+        foot_description2 = page['FootDescription2'] 
 
         word_data_list1 = []
         word_data_list2 = []
 
-        item_list: list = page['FilterTable']['Items']
+       
+        item_list:list = page['FilterTable']['Items']
         item_count = len(item_list)
-        if item_count < 100:
-            item_list.extend([{"Spell": "", "Meaning": ""} for _ in range(100 - item_count)])
+        if item_count<100:
+            item_list.extend([{"Spell":"","Meaning":""} for _ in range(100-item_count)])
 
-        for i in page['FilterTable']['Items']:
+        for i in page['FilterTable']['Items']: 
             word_data_list1.append(i['Spell'])
             word_data_list2.append([i['Spell'], i['Meaning']])
 
+       
         if is_add_empty_filter_page:
             empty_filter_page(class_name, student_name, page_title, page_sub_title, t_datetime, word_data_list1)
 
+       
         filter_table_page(page_id, class_name, student_name, page_title, page_sub_title, t_datetime,
                           foot_description, foot_description2, article_type, word_data_list2)
 
 
 @time_use
 def other(docx, json_data, *args, **kwargs):
+   
     sections = docx.doc.sections
     for section in sections[:-1]:
         section.top_margin = Inches(0.3)
@@ -1032,20 +1127,21 @@ def other(docx, json_data, *args, **kwargs):
 
 
 def start_make_word(json_data, document_format, scanpage_format):
-    parent_path = "make_docx_demo/file_result/"
+    parent_path = "make_docx_demo/file_result/" 
     if not os.path.exists(parent_path):
         os.makedirs(parent_path)
     try:
-        exercise_id = json_data['ExerciseId']
+        exercise_id = json_data['ExerciseId'] 
 
+       
         docx = Word(save_file_name=f"{parent_path}{exercise_id}.docx",
                     start_template_name="make_docx_demo/word_component/start_template.docx")
-
+       
         section_1(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-
+       
         section_4(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-
-        for exercise_json in json_data['WordAndArticleContents']:
+       
+        for exercise_json in json_data['WordAndArticleContents']: 
             section_4_1(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_5(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
             section_6(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
@@ -1053,9 +1149,9 @@ def start_make_word(json_data, document_format, scanpage_format):
             section_9(docx=docx, json_data=exercise_json, scanpage_format=scanpage_format)
 
         if scanpage_format == 1:
-
+           
             two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
-
+           
             old_two_check_page(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
         elif scanpage_format == 2:
             section_10(docx=docx, json_data=json_data, scanpage_format=scanpage_format)
@@ -1082,5 +1178,6 @@ if __name__ == '__main__':
     t = time.time()
     os.chdir("..")
 
+   
     start_make_word(test_json1, 1, 1)
     print(time.time() - t)

+ 8 - 10
make_docx_demo/new_word2pdf.py

@@ -1,43 +1,41 @@
 # -*- coding:utf-8 -*-
 
+import os
 import time
+from docx2pdf import convert
+import win32com.client
+from concurrent.futures import ProcessPoolExecutor
 from multiprocessing import Process
-
 import pythoncom
-import win32com.client
-from docx2pdf import convert
 
 
 def convert_word_to_pdf(input_file):
     output_file = input_file.replace('.docx', '.pdf')
     word = win32com.client.Dispatch("Word.Application")
-    word.Visible = False
+    word.Visible = False 
     doc = word.Documents.Open(input_file)
-    doc.SaveAs(output_file, FileFormat=17)
+    doc.SaveAs(output_file, FileFormat=17) 
     doc.Close()
     word.Quit()
 
-
 def convert_word_to_pdf2(input_file):
     pythoncom.CoInitialize()
     convert(input_file)
     pythoncom.CoUninitialize()
 
-
 if __name__ == '__main__':
     import os
-
     files = os.listdir(r"C:\Users\86131\Desktop\回收\潘资料")
     print(files)
 
     t = time.time()
     p_lists = []
     for file in files:
-        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\" + file,))
+        p1 = Process(target=convert_word_to_pdf2, args=(r"C:\\Users\\86131\\Desktop\\回收\\潘资料\\"+file,))
         p1.start()
         p_lists.append(p1)
 
     for p in p_lists:
         p.join()
 
-    print(time.time() - t)
+    print(time.time() - t)

+ 25 - 11
make_docx_demo/word2pdf.py

@@ -1,11 +1,10 @@
 # -*- coding=utf-8 -*-
-import os
+from docx2pdf import convert
+import pythoncom
 import time
+import os
 from threading import Lock
 
-import pythoncom
-from docx2pdf import convert
-
 ll = Lock()
 
 
@@ -15,25 +14,24 @@ def convert_word_to_pdf(pdf_name):
             ll.acquire()
             print('加锁,进入转pdf')
             pythoncom.CoInitialize()
-            convert(f'{pdf_name}.docx')
+            convert(f'{pdf_name}.docx') 
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
                 time.sleep(0.5)
             break
         except Exception as ee:
-
+           
             print(ee)
         finally:
             pythoncom.CoUninitialize()
             print('解锁,转pdf完成')
-            ll.release()
-
+            ll.release() 
 
 def convert_word_to_pdf2(pdf_name):
     for cccou in range(3):
         try:
-            convert(f'{pdf_name}.docx')
+            convert(f'{pdf_name}.docx') 
             for i in range(30):
                 if os.path.exists(f'{pdf_name}.pdf'):
                     break
@@ -44,6 +42,22 @@ def convert_word_to_pdf2(pdf_name):
 
 
 if __name__ == '__main__':
-    pass
-
+    import multiprocessing
+
+   
+   
+   
+
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
     #
+   

+ 9 - 6
make_docx_demo/word_component/make_rectangle.py

@@ -1,16 +1,15 @@
 # -*- coding:utf-8 -*-
 from random import randint
 
-
 def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color, font_size, boder_color, chinese_font, english_font, dash,
-                   shape_type='rect', rotate_angle=0, behindDoc=0):
+                   shape_type='rect',rotate_angle=0,behindDoc=0):
     """
     rotate_angle:角度,顺时针30,60,90等
     behindDoc为0浮于文字上方,为1浮于文字下方"""
     if x > 600: x = 600
     if y > 800: y = 800
     font_size = font_size * 2
-    boder_size = boder_size * 12700
+    boder_size = boder_size * 12700 
     dash_elem = '<a:prstDash val="dash"/>' if dash else ''
 
     idid = randint(1, 99999)
@@ -27,7 +26,9 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         boder = f"""<a:lnRef idx="2"><a:srgbClr val="{boder_color}"/></a:lnRef>"""
         noboder = f"""<a:ln w="{boder_size}"><a:srgbClr val="{boder_color}"/>{dash_elem}</a:ln>"""
     else:
-
+       
+       
+       
         boder = """<a:lnRef idx="2"><a:noFill/></a:lnRef>"""
         noboder = """<a:ln w="12700"><a:noFill/></a:ln>"""
 
@@ -62,6 +63,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
     else:
         insert_text_xml = ''
 
+   
     shape_geom_map = {
         'rect': '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>',
         'circle': '<a:prstGeom prst="ellipse"><a:avLst/></a:prstGeom>',
@@ -80,6 +82,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
         'arc': '<a:prstGeom prst="arc"><a:avLst/></a:prstGeom>',
     }
 
+   
     shape_geom = shape_geom_map.get(shape_type, '<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>')
 
     r = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@@ -148,7 +151,7 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 										<wps:wsp>
 											<wps:cNvSpPr/>
 											<wps:spPr>
-												<a:xfrm rot="{60000 * rotate_angle}">
+												<a:xfrm rot="{60000*rotate_angle}">
 													<a:off x="0"
 													       y="0"/>
 													<a:ext cx="1777593"
@@ -221,4 +224,4 @@ def make_shape_fun(text, x, y, boder_size, width, height, font_color, fill_color
 		</w:sectPr>
 	</w:body>
 </w:document>"""
-    return r
+    return r

+ 71 - 59
mock/mock_request.py

@@ -1,42 +1,41 @@
 # -*- coding:utf-8 -*-
 #
-import json
+import json 
 import time
-from concurrent.futures import ThreadPoolExecutor, wait
 from functools import wraps
-from random import shuffle, sample, randint
-from typing import List
+from random import shuffle,sample,randint
+from threading import Thread
+from concurrent.futures import ThreadPoolExecutor,wait
 
 import httpx
 import requests
 from pydantic import BaseModel
+from typing import List
+
 
-product_adress = "http://111.231.167.191"
-test_address = "http://111.231.167.191:8004"
-test_address2 = "http://111.231.167.191:8003"
+product_adress = "http://111.231.167.191" 
+test_address = "http://111.231.167.191:8004" 
+test_address2 = "http://111.231.167.191:8003" 
 
-local_adress = "http://127.0.0.1:9000"
+local_adress = "http://127.0.0.1:9000" 
 
-use_address = test_address
+use_address = product_adress 
 
 
 class DifficultSentence(BaseModel):
     english: str
     chinese: str
 
-
 class Candidate(BaseModel):
     label: str
     text: str
     isRight: int
 
-
 class Question(BaseModel):
     trunk: str
     analysis: str
     candidates: List[Candidate]
 
-
 class Article(BaseModel):
     difficultSentences: List[DifficultSentence]
     usedMeanIds: List[int]
@@ -45,21 +44,20 @@ class Article(BaseModel):
     chineseArticle: str
     allWordAmount: int
 
-
 class ArticleData(BaseModel):
     articles: List[Article]
 
 
 def time_use(fn):
     @wraps(fn)
-    def cc(*args, **kwargs):
+    def cc(*args, **kwargs): 
         f_time = time.time()
         res = fn(*args, **kwargs)
 
         cha = round(time.time() - f_time, 3)
         if cha > 0.1:
             print(f'函数:{fn.__name__} 一共用时', cha, '秒')
-        return res
+        return res 
 
     return cc
 
@@ -82,7 +80,7 @@ def get_article():
                  "student_stage": 1, "vocabulary": 700, "class_id": 123456}
 
     r = requests.post(f"{use_address}/article", json=json_data)
-
+   
     key = r.json()['key']
     time.sleep(120)
     query_file_content(key)
@@ -92,7 +90,7 @@ def query_file_content(key):
     json_data = {"key": key}
     try:
         r = requests.post(f"{use_address}/query_oss_file", json=json_data)
-        r.raise_for_status()
+        r.raise_for_status() 
         response_data = r.json()
         assert response_data['wordCount'] > 0, "词数为0"
     except requests.RequestException as e:
@@ -107,9 +105,9 @@ def query_file_content(key):
 
 def get_audio():
     word = "cat"
-    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0})
-    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2})
-    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1})
+    r1 = requests.post(f"{use_address}/tts", json={"text": word, "type": 0}) 
+    r2 = requests.post(f"{use_address}/tts", json={"text": word, "type": 2}) 
+    r3 = requests.post(f"{use_address}/tts", json={"text": word, "type": 1}) 
     assert r1.json()['code'] == 200
     assert r2.json()['code'] == 200
     assert r3.status_code == 200
@@ -202,9 +200,9 @@ def get_article2_1():
                        {'spell': 'offer', 'meaning': '提出, 提供', 'word_id': 1005291, 'meaning_id': 824, 'serial': 569}
                        ]
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list, 15)
+    core_words_chiose_list = sample(core_words_list,15)
     json_data = {'core_words': core_words_chiose_list,
-                 'take_count': 1, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100, 999),
+                 'take_count': 1, 'student_stage': 2, 'demo_name': '春笋英语', "exercise_id": randint(100,999),
                  "article_length": 220, "reading_level": 25}
 
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
@@ -215,7 +213,6 @@ def get_article2_1():
         print("春笋文章reading-comprehension错误", e)
         print("错误数据", r_json)
 
-
 @time_use
 def get_article2_2():
     """测试通过requests来直接访问openai"""
@@ -265,7 +262,7 @@ def get_article2_2():
                        {'spell': 'migration', 'meaning': '迁移, 移居', 'word_id': 1175117, 'meaning_id': 5069, 'serial': 4063}
                        ]
     shuffle(core_words_list)
-    core_words_chiose_list = sample(core_words_list, 15)
+    core_words_chiose_list = sample(core_words_list,15)
     core_words_meaning_str = "; ".join([f"[{i['meaning_id']}  {i['spell']} {i['meaning']}]" for i in core_words_chiose_list])
 
     question = f"""下面我会为你提供一组数据,[单词组](里面包含词义id,英语单词,中文词义),请根据这些单词的中文词义,生成一篇带中文翻译的考场英语文章,英语文章和中文翻译要有[标题]。注意这个单词有多个词义时,生成的英语文章一定要用提供的中文词义。并挑选一句复杂的句子和其中文翻译,放入difficultSentences。英语文章,放入"englishArticle"中。中文翻译,放入"chineseArticle"中。最终文中使用到的单词id放入"usedMeanIds"中。4个选择题,放入questions字段。questions结构下有4个选择题对象,其中trunk是[英语]问题文本,analysis是[中文]的问题分析,candidates是4个ABCD选项,内部有label是指选项序号A B C D ,text是[英语]选项文本,isRight是否正确答案1是正确0是错误。
@@ -281,47 +278,39 @@ def get_article2_2():
 提供[单词组]:{core_words_meaning_str}
 """
 
+   
+   
     url = 'http://170.106.108.95/v1/chat/completions'
 
+   
     headers = {
         "Authorization": f"Bearer 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",
         "Content-Type": "application/json"
     }
 
+   
     data = {
-        "model": "gpt-4.1",
+        "model": "gpt-4.1", 
         "messages": [
-
+           
             {"role": "user", "content": question}
         ],
-        "max_tokens": 8000,
-        "temperature": 1.2,
-        "n": 4,
-        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {
-            'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'},
-                           'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate',
-            'type': 'object'}, 'DifficultSentence': {
-            'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}},
-            'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {
-            'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'},
-                           'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}},
-            'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {
-            'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'},
-            'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'},
-            'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'},
-            'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'},
-            'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions',
-                                                                                          'englishArticle', 'chineseArticle', 'allWordAmount'],
-                                                                                                 'title': 'Article', 'type': 'object'}}}
+        "max_tokens": 8000, 
+        "temperature": 1.2, 
+        "n":4,
+        "response_format": {'type': 'json_schema', 'json_schema': {'name': 'Article', 'schema': {'$defs': {'Candidate': {'properties': {'label': {'title': 'Label', 'type': 'string'}, 'text': {'title': 'Text', 'type': 'string'}, 'isRight': {'title': 'Isright', 'type': 'integer'}}, 'required': ['label', 'text', 'isRight'], 'title': 'Candidate', 'type': 'object'}, 'DifficultSentence': {'properties': {'english': {'title': 'English', 'type': 'string'}, 'chinese': {'title': 'Chinese', 'type': 'string'}}, 'required': ['english', 'chinese'], 'title': 'DifficultSentence', 'type': 'object'}, 'Question': {'properties': {'trunk': {'title': 'Trunk', 'type': 'string'}, 'analysis': {'title': 'Analysis', 'type': 'string'}, 'candidates': {'items': {'$ref': '#/$defs/Candidate'}, 'title': 'Candidates', 'type': 'array'}}, 'required': ['trunk', 'analysis', 'candidates'], 'title': 'Question', 'type': 'object'}}, 'properties': {'difficultSentences': {'items': {'$ref': '#/$defs/DifficultSentence'}, 'title': 'Difficultsentences', 'type': 'array'}, 'usedMeanIds': {'items': {'type': 'integer'}, 'title': 'Usedmeanids', 'type': 'array'}, 'questions': {'items': {'$ref': '#/$defs/Question'}, 'title': 'Questions', 'type': 'array'}, 'englishArticle': {'title': 'Englisharticle', 'type': 'string'}, 'chineseArticle': {'title': 'Chinesearticle', 'type': 'string'}, 'allWordAmount': {'title': 'Allwordamount', 'type': 'integer'}}, 'required': ['difficultSentences', 'usedMeanIds', 'questions', 'englishArticle', 'chineseArticle', 'allWordAmount'], 'title': 'Article', 'type': 'object'}}}
     }
 
+   
     try:
-        response = httpx.post(url, headers=headers, json=data, timeout=300)
+        response = httpx.post(url, headers=headers, json=data,timeout=300)
         return response.json()
     except Exception as e:
         print(f"错误:{type(e).__name__}: {e}")
 
 
+
+
 def download_word():
     from make_docx_demo.data import test_json2
     params = {"document_format": 2, "scanpage_format": 1}
@@ -370,33 +359,38 @@ def get_article3():
 
 @time_use
 def run_all_test_cese():
-    test_connect()
+    test_connect() 
 
-    get_audio()
-    spoken_language()
-    download_word()
-    get_article2_1()
+    get_audio() 
+    spoken_language() 
+    download_word() 
+    get_article2_1() 
+    article_annotation() 
+
+   
+   
 
 
 @time_use
 def multi_request():
     with ThreadPoolExecutor(max_workers=150) as executor:
-
+       
         futures = [executor.submit(get_article2_1) for _ in range(100)]
+       
 
+       
         wait(futures)
         print("完成等待")
-        f = open("result.txt", 'w', encoding='utf-8')
+        f = open("result.txt",'w',encoding='utf-8')
 
         try:
-            for index, future in enumerate(futures, start=1):
-                f.write(str(future.result()) + '\n')
+            for index,future in enumerate(futures,start=1):
+                f.write(str(future.result()) + '\n') 
         except Exception as e:
             print(f"错误:{type(e).__name__}: {e}")
         finally:
             f.close()
 
-
 @time_use
 def article_annotation():
     text = """The expression, “Everybody’s doing it,” is very much at the center of the concept of peer pressure. It is a social influence applied on an individual in order to get that person to act or believe in a(n) ____1____ way as a larger group. 
@@ -405,12 +399,30 @@ For certain individuals, seeking social acceptance is so important that it becom
 However, peer pressure is not always negative. A student whose friends are good at contests may be ____11____ to work harder and get good grades. Players on a sports team may feel driven to play harder in order to help the team win. This type of ____12____ can also get a friend off drugs, or to help an adult take up a good habit or drop a bad one.
 Although peer pressure is sometimes quite obvious, it can also be so ____13____ that a person may not even notice that it is affecting his or her behavior. For this reason, when making important decisions, simply going with a(n) ____14____ is risky. Instead, people should seriously consider why they feel drawn to taking a particular action, and whether the real ____15____ is simply that everyone else is doing the same thing."""
 
-    json_data = {"english_text": "hello , please give me an apple ."}
-
-    r = requests.post(f"{use_address}/article/annotation", json=json_data)
+    json_data = {"english_text":"hello,please give me an apple.","split_blank":False} 
+   
+    r = requests.post(f"{use_address}/article/meaning/annotation", json=json_data)
     r_json = r.json()
+    assert r.status_code == 200 and r_json.get("code")==200
     print(r_json)
 
 
+
+
 if __name__ == '__main__':
+   
+   
+   
+
     article_annotation()
+
+
+   
+   
+
+   
+
+   
+   
+
+   

+ 1 - 2
spoken_language/common/utils.py

@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 import sys
 
-
 def is_python3():
     if sys.version > '3':
         return True
-    return False
+    return False

+ 7 - 4
spoken_language/read_config.py

@@ -1,17 +1,20 @@
 # -*- coding:utf-8 -*-
 import yaml
 
-
 def read_config(parent_dir="."):
-    with open(parent_dir + "/config/tencent_config.yaml", "r", encoding="utf-8") as file:
+   
+    with open(parent_dir +"/config/tencent_config.yaml", "r",encoding="utf-8") as file:
         config = yaml.safe_load(file)
         return config
+   
+   
+
 
 
 if __name__ == '__main__':
-    import sys, os
+    import sys,os
 
     current_dir = os.path.dirname(os.path.abspath(__file__))
     parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
     sys.path.append(parent_dir)
-    print(read_config(parent_dir))
+    print(read_config(parent_dir))

+ 15 - 15
spoken_language/soe/speaking_assessment.py

@@ -1,20 +1,18 @@
 # -*- coding: utf-8 -*-
-import base64
-import hashlib
+import sys
 import hmac
+import hashlib
+import base64
+import time
 import json
-import sys
 import threading
-import time
 import urllib
-import uuid
-from urllib.parse import quote
 
 import websocket
-
+import uuid
+from urllib.parse import quote
 from tools.loglog import logger
 
-
 def is_python3():
     if sys.version > '3':
         return True
@@ -106,7 +104,7 @@ class SpeakingAssessment:
 
     def set_text_mode(self, text_mode):
         self.text_mode = text_mode
-
+    
     def set_rec_mode(self, rec_mode):
         self.rec_mode = rec_mode
 
@@ -212,7 +210,7 @@ class SpeakingAssessment:
 
     def start(self):
         def on_message(ws, message):
-
+           
             response = json.loads(message)
             response['voice_id'] = self.voice_id
             if response['code'] != 0:
@@ -224,7 +222,7 @@ class SpeakingAssessment:
                 self.status = FINAL
                 self.result = message
                 self.listener.on_recognition_complete(response)
-
+               
                 self.ws.close()
                 return
             else:
@@ -240,8 +238,9 @@ class SpeakingAssessment:
                          (format(error), self.voice_id))
             self.status = ERROR
 
-        def on_close(ws, close_status_code, close_msg):
-
+        def on_close(ws,close_status_code, close_msg):
+           
+           
             self.status = CLOSED
             logger.info("websocket closed  voice id %s" %
                         self.voice_id)
@@ -257,13 +256,13 @@ class SpeakingAssessment:
         signstr = self.format_sign_string(query)
         autho = self.sign(signstr, self.credential.secret_key)
         requrl = self.create_query_string(query_arr)
-
+       
         if is_python3():
             autho = urllib.parse.quote(autho)
         else:
             autho = urllib.quote(autho)
         requrl += "&signature=%s" % autho
-
+       
         self.ws = websocket.WebSocketApp(requrl, None,
                                          on_error=on_error, on_close=on_close, on_message=on_message)
         self.ws.on_open = on_open
@@ -273,3 +272,4 @@ class SpeakingAssessment:
         self.status = STARTED
         response = {'voice_id': self.voice_id}
         self.listener.on_recognition_start(response)
+       

+ 40 - 23
spoken_language/soeexample.py

@@ -1,17 +1,19 @@
 # -*- coding: utf-8 -*-
-import json
 import os
-import threading
 import time
-
 import requests
+import threading
+from datetime import datetime
+import json
 
 from spoken_language.common import credential
-from spoken_language.read_config import read_config
 from spoken_language.soe import speaking_assessment
 
+from spoken_language.read_config import read_config
+
+
 config_data = read_config()
-app_id, secret_id, secret_key = config_data['appId'], config_data['SecretId'], config_data['SecretKey']
+app_id,secret_id,secret_key= config_data['appId'],config_data['SecretId'],config_data['SecretKey']
 
 APPID = app_id
 SECRET_ID = secret_id
@@ -29,25 +31,33 @@ class MySpeechRecognitionListener(speaking_assessment.SpeakingAssessmentListener
 
     def on_recognition_start(self, response):
         pass
+       
+       
 
     def on_intermediate_result(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
+       
+       
 
     def on_recognition_complete(self, response):
         global spoken_result
         spoken_result[self.id] = response
+       
+       
+       
 
     def on_fail(self, response):
         rsp_str = json.dumps(response, ensure_ascii=False)
-
+       
+       
 
 def process(id):
     audio = r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3"
     listener = MySpeechRecognitionListener(id)
-
+   
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text("anyway")
     recognizer.set_eval_mode(0)
@@ -61,34 +71,36 @@ def process(id):
             while content:
                 recognizer.write(content)
                 content = f.read(SLICE_SIZE)
-                # sleep模拟实际实时语音发送间隔
-
+                #sleep模拟实际实时语音发送间隔
+               
+               
                 time.sleep(0.2)
     except Exception as e:
         print(e)
     finally:
         recognizer.stop()
 
-
-def process_rec(task_id, audio_path, audio_text, audio_binary=None):
+def process_rec(task_id,audio_path,audio_text,audio_binary=None):
     audio = audio_path
     listener = MySpeechRecognitionListener(task_id)
-
+   
     credential_var = credential.Credential(SECRET_ID, SECRET_KEY)
     recognizer = speaking_assessment.SpeakingAssessment(
-        APPID, credential_var, ENGINE_MODEL_TYPE, listener)
+        APPID, credential_var, ENGINE_MODEL_TYPE,  listener)
     recognizer.set_text_mode(0)
     recognizer.set_ref_text(audio_text)
     recognizer.set_eval_mode(1)
     recognizer.set_keyword("")
     recognizer.set_sentence_info_enabled(0)
     recognizer.set_voice_format(2)
-
+   
+   
     recognizer.set_rec_mode(1)
     try:
         recognizer.start()
-        if audio_binary:
-
+        if audio_binary: 
+           
+           
             recognizer.write(audio_binary)
         else:
             with open(f"{task_id}.mp3", 'rb') as f:
@@ -111,17 +123,18 @@ def process_multithread(number):
         thread.join()
 
 
-def make_spoken(task_id, audio_url, audio_content, audio_text):
+def make_spoken(task_id,audio_url,audio_content,audio_text):
+
     if audio_url:
         print("有url,应该去下载mp3文件")
-
+       
         r = requests.get(audio_url)
         audio_content = r.content
     else:
-        with open(f"{task_id}.mp3", 'wb') as f:
+        with open(f"{task_id}.mp3",'wb') as f:
             f.write(audio_content)
 
-    process_rec(task_id, audio_path=f"", audio_text=audio_text, audio_binary=audio_content)
+    process_rec(task_id,audio_path=f"",audio_text=audio_text,audio_binary=audio_content)
     global spoken_result
     for _ in range(60):
         if task_id in spoken_result:
@@ -133,6 +146,10 @@ def make_spoken(task_id, audio_url, audio_content, audio_text):
         time.sleep(0.5)
     return None
 
-
 if __name__ == "__main__":
-    process_rec(0, r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3", "You must study to be frank with the world apple")
+
+   
+   
+   
+    process_rec(0,r"C:\Users\86131\Desktop\音频\output_16k_mono.mp3","You must study to be frank with the world apple")
+   

+ 13 - 8
tools/ali_log.py

@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
-import os
-import time
-
 from aliyun.log import LogClient, PutLogsRequest, LogItem, GetLogsRequest, IndexConfig
+import time
+import os
 
 accessKeyId = os.getenv("OSS_ACCESS_KEY_ID")
 accessKey = os.getenv("OSS_ACCESS_KEY_SECRET")
@@ -29,21 +28,18 @@ logstore_index = {'line': {
 from_time = int(time.time()) - 3600
 to_time = time.time() + 3600
 
-
 def create_project():
     print("ready to create project %s" % project_name)
     client.create_project(project_name, project_des="")
     print("create project %s success " % project_name)
     time.sleep(60)
 
-
 def create_logstore():
     print("ready to create logstore %s" % logstore_name)
     client.create_logstore(project_name, logstore_name, ttl=3, shard_count=2)
     print("create logstore %s success " % project_name)
     time.sleep(30)
 
-
 def create_index():
     print("ready to create index for %s" % logstore_name)
     index_config = IndexConfig()
@@ -52,8 +48,7 @@ def create_index():
     print("create index for %s success " % logstore_name)
     time.sleep(60 * 2)
 
-
-def put_logs(msg: str):
+def put_logs(msg:str):
     log_group = []
 
     log_item = LogItem()
@@ -66,6 +61,7 @@ def put_logs(msg: str):
     client.put_logs(request)
 
 
+
 def get_logs():
     print("ready to query logs from logstore %s" % logstore_name)
     request = GetLogsRequest(project_name, logstore_name, from_time, to_time, query=query)
@@ -77,4 +73,13 @@ def get_logs():
 
 
 if __name__ == '__main__':
+   
+   
+   
+   
+   
+   
+   
     put_logs("测试")
+   
+   

File diff suppressed because it is too large
+ 61 - 35
tools/audio.py


+ 12 - 9
tools/del_expire_file.py

@@ -2,12 +2,12 @@
 """
 删除过期的文件资源
 """
-import datetime
 import os
+import datetime
 from time import sleep
 
 
-def del_file(folder_path, expired_days=10):
+def del_file(folder_path,expired_days=10):
     """
     删除文件夹内过时的文件
     folder_path: 需要删除过期文件的文件夹
@@ -18,28 +18,31 @@ def del_file(folder_path, expired_days=10):
         print("文件夹不存在")
         return None
 
+   
     now = datetime.datetime.now()
 
+   
     for filename in os.listdir(folder_path):
         file_path = os.path.join(folder_path, filename)
-
+       
         if os.path.isfile(file_path):
-
+           
             create_time = os.path.getctime(file_path)
             create_date = datetime.datetime.fromtimestamp(create_time)
-
+           
             delta = now - create_date
-
+           
             if delta.days > expired_days:
                 os.remove(file_path)
+               
 
 
 def run_del_normal():
     """这是小程序项目内的正常删除机制"""
     while True:
-        del_file("make_docx_demo/file_result", expired_days=15)
-        sleep(3600 * 24)
+        del_file("make_docx_demo/file_result",expired_days=15)
+        sleep(3600*24)
 
 
 if __name__ == '__main__':
-    run_del_normal()
+    run_del_normal()

+ 19 - 27
tools/loglog.py

@@ -1,23 +1,19 @@
 # -*- coding=utf-8 -*-
-import os
-import time
+import time, os
 import traceback
-from threading import Lock
-
 from loguru import logger
-
+from threading import Lock
 from tools.ali_log import put_logs
 
-
 class SimpleLogger:
     """
     简易日志:存放几乎没用的大量gpt日志
     """
 
     def __init__(self, base_file_name: str = "ai_log"):
-        self.base_file_name = "log/" + base_file_name
+        self.base_file_name = "log/" + base_file_name 
         self.file_ext = ".txt"
-        self.max_size = 10 * 1024 * 1024
+        self.max_size = 10 * 1024 * 1024 
         self.current_file = self._get_current_file()
         self.lock = Lock()
         if not os.path.exists("log"):
@@ -41,7 +37,7 @@ class SimpleLogger:
             with open(f"{self.base_file_name}_1{self.file_ext}", "a", encoding="utf-8") as log_file:
                 log_file.write(f"Hello World\n")
 
-    def log(self, message: str, level="INFO"):
+    def log(self, message:str, level="INFO"):
         """记录日志到文件"""
         self._check_file_size()
         date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
@@ -49,32 +45,29 @@ class SimpleLogger:
             with open(self.current_file, "a", encoding="utf-8") as log_file:
                 log_file.write(f"{date_time} 【{level}】 {str(message)}\n\n")
 
-    def info(self, message: str):
+    def info(self, message:str):
         """记录INFO级别的日志"""
         self.log(message, "INFO")
 
-    def warning(self, message: str):
+    def warning(self, message:str):
         """记录WARNING级别的日志"""
         self.log(message, "WARNING")
 
-    def error(self, message: str):
+    def error(self, message:str):
         """记录ERROR级别的日志"""
         message = "\n" + "-" * 20 + "\n" + message + "\n" + "-" * 20
         self.log(message, "ERROR")
 
-    def debug(self, message: str):
+    def debug(self, message:str):
         """记录DEBUG级别的日志"""
         self.log(message, "DEBUG")
 
-
-logger.remove(handler_id=None)
+logger.remove(handler_id=None) 
 logger.add('log/log.log', level="INFO", rotation="5 MB", encoding="utf-8", retention="7 days")
 logger.add('log/error.log', level="ERROR", rotation="5 MB", encoding="utf-8", retention="7 days")
 
 simple_logger = SimpleLogger()
-temp_logger = SimpleLogger(base_file_name="temp_log")
-
-
+annotation_logger = SimpleLogger(base_file_name="annotation_log")
 
 def exception_handler(func):
     def wrapper(*args, **kwargs):
@@ -84,32 +77,31 @@ def exception_handler(func):
             logger.error(f"{type(e).__name__}: {e}")
             traceback_str = traceback.format_exc()
             logger.error(f"错误追溯:{traceback_str}")
-
     return wrapper
 
-
-def log_err_e(e: Exception, msg=None):
+def log_err_e(e:Exception,msg=None):
     if msg:
         logger.error(f"{msg}{type(e).__name__}:{e}")
     traceback_str = traceback.format_exc()
     logger.error(traceback_str)
 
-
 class AliyunLogHandler:
     @staticmethod
     def write(message):
         put_logs(message)
 
-
 if os.getenv("env") != "development":
     print("这是正式环境,加载阿里云日志")
     aliyun_log_handler = AliyunLogHandler()
-    logger.add(aliyun_log_handler, enqueue=True)
+    logger.add(aliyun_log_handler, enqueue=True) 
+
 
 if __name__ == '__main__':
+   
+   
+   
     #
-
+   
     import os
-
     os.chdir("..")
-    logger.error("test信息0123456789.*/-")
+    logger.error("test信息0123456789.*/-")

+ 39 - 16
tools/new_mysql.py

@@ -1,11 +1,9 @@
 # -*- coding:utf-8 -*-
 import os
-import time
-
+from tools.loglog import logger
 import pymysql
 from dbutils.pooled_db import PooledDB
-
-from tools.loglog import logger
+import time
 
 
 class MySQLUploader:
@@ -55,7 +53,7 @@ class MySQLUploader:
 
     def execute_(self, query, params=None):
         for _ in range(3):
-            conn = self._pool.connection()
+            conn = self._pool.connection() 
             cursor = conn.cursor()
             try:
                 if params:
@@ -78,7 +76,7 @@ class MySQLUploader:
     def bulk_insert(self, query, data_list):
         """执行批量插入"""
         for _ in range(3):
-            conn = self._pool.connection()
+            conn = self._pool.connection() 
             cursor = conn.cursor()
             try:
                 cursor.executemany(query, data_list)
@@ -86,7 +84,7 @@ class MySQLUploader:
                 return True
             except pymysql.MySQLError as e:
                 logger.warning(f"可忽略的错误 bulk_insert数据库批量插入错误{type(e).__name__}:{e}")
-                conn.rollback()
+                conn.rollback() 
                 time.sleep(0.5)
             finally:
                 cursor.close()
@@ -98,14 +96,14 @@ class MySQLUploader:
     def query_data(self, query, params=None):
         """执行查询并返回结果"""
         for _ in range(3):
-            conn = self._pool.connection()
+            conn = self._pool.connection() 
             cursor = conn.cursor()
             try:
                 if params:
                     cursor.execute(query, params)
                 else:
                     cursor.execute(query)
-
+               
                 results = cursor.fetchall()
                 return results
             except pymysql.MySQLError as e:
@@ -118,24 +116,26 @@ class MySQLUploader:
         logger.critical(f"query_data 3次没成功.{query} {params}")
         return False
 
-    def execute_sql_file(self, script_file_path):
+    def execute_sql_file(self,script_file_path):
         """执行sql脚本:传入路径或者sql路径都可以"""
-
         def execute_file(path):
 
+           
             with open(path, 'r', encoding='utf-8') as file:
                 sql_script = file.read()
 
-            conn = self._pool.connection()
+            conn = self._pool.connection() 
             cursor = conn.cursor()
-
+           
+           
             cursor.execute(sql_script)
-
+           
             conn.commit()
 
             cursor.close()
             conn.close()
 
+        
         if os.path.isdir(script_file_path):
             for file in os.listdir(script_file_path):
                 execute_file(script_file_path + "\\" + file)
@@ -143,15 +143,38 @@ class MySQLUploader:
             if script_file_path.endswith(".sql"):
                 execute_file(script_file_path)
 
-    def close_connection(self):
-        ...
+    def close_connection(self):...
+
 
 
 if __name__ == '__main__':
+
     m = MySQLUploader()
     s = "select Id,BritishPronunciation from dictionary_word where wordspelling = %s"
     r = m.query_data(s, ("sky",))
     print(r)
     input()
 
+   
+   
+   
+   
+
+   
+   
+   
+   
+   
+   
+
+   
+   
+   
+   
+   
+   
+   
+   
     #
+   
+   

+ 42 - 25
tools/sql_format.py

@@ -1,40 +1,44 @@
 # -*- coding: utf-8 -*-
-from core.respone_format import *
-from tools.loglog import logger
 from tools.new_mysql import MySQLUploader
+from tools.loglog import logger
+
+from core.respone_format import *
 
 
 class CRUD:
     def __init__(self):
         self.m = MySQLUploader()
-        self.people_place_name = []
+        self.people_place_name = [] 
         self.get_people_place_name()
 
     def get_word_by_wordid(self, wordid):
         s = "select WordSpelling from dictionary_word where Id = %s"
         r = self.m.query_data(s, (wordid,))
         if r:
+           
             word = r[0][0]
             return word
         return None
 
-    def get_wordid_by_wordspelling(self, wordspelling, auto_insert=False):
+    def get_wordid_by_wordspelling(self, wordspelling,auto_insert=False):
         """加一个功能。大字典内没有这个单词就自动插入,返回id。auto_insert为真,自动插入大字典,获取其id"""
         s = "select Id from dictionary_word where wordspelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         if r:
+           
             wordid = r[0][0]
             return wordid
 
         if auto_insert:
             s = "insert into dictionary_word (WordSpelling) VALUES (%s);"
-            self.m.execute_(s, (wordspelling,))
+            self.m.execute_(s,(wordspelling,))
             s = "select Id from dictionary_word where wordspelling = %s"
             r = self.m.query_data(s, (wordspelling,))
             wordid = r[0][0]
             return wordid
 
-    def get_exchange_prototype(self, wordspelling):
+   
+    def get_exchange_prototype(self,wordspelling):
         s = "select Word from dictionary_exchange where Word = %s"
         r = self.m.query_data(s, (wordspelling,))
         if r:
@@ -49,30 +53,37 @@ class CRUD:
         r = self.m.query_data(s, (wordid,))
         return r
 
+   
     def get_people_place_name(self):
         s2 = "select word from people_place_name"
         r = self.m.query_data(s2)
         for i in r:
             self.people_place_name.append(i[0])
 
+
+   
     def get_word_meaning_by_wordspelling(self, wordspelling, frequency):
         """根据单词获取其全部词义"""
-
+       
         wordid = self.get_wordid_by_wordspelling(wordspelling)
 
+       
         return_data = {"word_id": wordid, "frequency": frequency, "word": wordspelling,
                        "meanings": {"default": [], "sun_english": {"name": "春笋英语", "items": []}, "oxford": {"name": "牛津", "items": []}}}
 
+       
         s = "select Id,WordMeaning from dictionary_meaningitem where WordSpelling = %s"
         r = self.m.query_data(s, (wordspelling,))
         for row_data in r:
             return_data["meanings"]["default"].append({"id": row_data[0], "text": row_data[1]})
 
+       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_spring_bamboo where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
             return_data["meanings"]["sun_english"]["items"].append({"id": row_data[0], "text": row_data[1]})
 
+       
         s2 = "select Id,WordMeaning from dictionary_meaningitem_oxford where WordSpelling = %s"
         r2 = self.m.query_data(s2, (wordspelling,))
         for row_data in r2:
@@ -86,7 +97,8 @@ class CRUD:
         logger.info(f"根据词义id删除,{wordmeaningid}。结果{r}")
         return True if r is True else False
 
-    def get_word_all_info(self, word_id, spell, frequency):
+
+    def get_word_all_info(self,word_id, spell,frequency):
         def get_associational_words_info(word_meaning_id) -> list:
             return_data = []
             s = "select Id,BaseWordMeaningId,BaseWord,BaseWordMeaning,AssociationalWord,AssociationalWordMeaningId,AssociationalWordMeaning," \
@@ -98,13 +110,12 @@ class CRUD:
 
             for single_meaning in r:
                 associational_id, base_word_meaning_id, base_word, base_word_meaning, associational_word, \
-                associational_word_meaning_id, associational_word_meaning, association_reason, \
+                associational_word_meaning_id, associational_word_meaning, association_reason,\
                 reverse_association_reason, created_time, updated_time = single_meaning
-                r_data = {"id": associational_id, "base_word": {"word": base_word, "meaning_id": base_word_meaning_id, "meaning": base_word_meaning},
-                          "associational_word": {"word": associational_word, "meaning_id": associational_word_meaning_id,
-                                                 "meaning": associational_word_meaning},
-                          "association_reason": association_reason, "reverse_association_reason": reverse_association_reason,
-                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'), "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                r_data = {"id":associational_id,"base_word":{"word":base_word,"meaning_id":base_word_meaning_id,"meaning":base_word_meaning},
+                          "associational_word":{"word":associational_word,"meaning_id":associational_word_meaning_id,"meaning":associational_word_meaning},
+                          "association_reason":association_reason,"reverse_association_reason":reverse_association_reason,
+                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
 
             return return_data
@@ -118,8 +129,8 @@ class CRUD:
                 return return_data
             for single_phrase in r:
                 phrase_id, phrase_spelling_text, phrase_chinese_translation, from_type, created_time, updated_time = single_phrase
-                r_data = {"id": phrase_id, "english": phrase_spelling_text, "chinese": phrase_chinese_translation, "from": from_type,
-                          "create_time": created_time.strftime('%Y-%m-%d %H:%M:%S'), "update_time": updated_time.strftime('%Y-%m-%d %H:%M:%S')}
+                r_data = {"id":phrase_id,"english":phrase_spelling_text,"chinese":phrase_chinese_translation,"from":from_type,
+                          "create_time":created_time.strftime('%Y-%m-%d %H:%M:%S'),"update_time":updated_time.strftime('%Y-%m-%d %H:%M:%S')}
                 return_data.append(r_data)
 
             return return_data
@@ -132,24 +143,24 @@ class CRUD:
             if not r:
                 return return_data
             for single_exchange in r:
-                exchange_id, spell, properties, description = single_exchange
+                exchange_id,spell,properties,description = single_exchange
                 r_data = {"id": exchange_id, "spell": spell, "properties": properties, "description": description}
                 return_data.append(r_data)
 
             return return_data
 
-        return_data_all = {"word_id": word_id, "spell": spell, "frequency": frequency, "meanings": [], "exchanges": []}
+        return_data_all = {"word_id":word_id,"spell":spell,"frequency":frequency,"meanings":[],"exchanges":[]}
         if spell in self.people_place_name:
             return_data_all["type"] = "人名地名"
         return_data_all["type"] = "一般词汇"
 
         s = "select Id,WordMeaning,OperateAccount from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
-        if not r:
+        if not r: 
             return resp_200(data=return_data_all)
         for single_meaning in r:
             meaning_id, word_meaning, operate_account = single_meaning
-            single_meaning_item = {"id": meaning_id, "text": word_meaning, "editor": operate_account}
+            single_meaning_item = {"id":meaning_id,"text":word_meaning,"editor":operate_account}
             associational_words_list = get_associational_words_info(meaning_id)
             single_meaning_item["associational_words"] = associational_words_list
             phrases_list = get_phrases_info(meaning_id)
@@ -161,7 +172,7 @@ class CRUD:
 
         return resp_200(data=return_data_all)
 
-    def delete_associational_word(self, word_id, associational_id):
+    def delete_associational_word(self,word_id,associational_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         if not r:
@@ -170,8 +181,8 @@ class CRUD:
 
         s = "select BaseWordMeaningId from dictionary_associational_word where Id = %s"
         r = self.m.query_data(s, (associational_id,))
-
-        if r and r[0][0] == meaning_id:
+       
+        if r and r[0][0]==meaning_id:
             s = "DELETE FROM dictionary_associational_word where Id = %s"
             r = self.m.execute_(s, (associational_id,))
             logger.info(f"根据联想词id删除,{associational_id}。结果{r}")
@@ -181,7 +192,7 @@ class CRUD:
             logger.info(f"删除联想词时,单词id与联想词id校验失败。{r} {meaning_id}")
             return resp_400(message="单词id与联想词id校验失败")
 
-    def delete_phrese_word(self, word_id, phrase_id):
+    def delete_phrese_word(self,word_id,phrase_id):
         s = "select Id from dictionary_meaningitem where WordId = %s"
         r = self.m.query_data(s, (word_id,))
         if not r:
@@ -190,7 +201,7 @@ class CRUD:
 
         s = "select WordMeaningId from dictionary_phrase where Id = %s"
         r = self.m.query_data(s, (phrase_id,))
-
+       
         if r and r[0][0] == meaning_id:
             s = "DELETE FROM dictionary_phrase where Id = %s"
             r = self.m.execute_(s, (phrase_id,))
@@ -214,6 +225,7 @@ class UserCRUD:
         s = "select id,account,password,uname,create_time from user where account = %s"
         r = self.m.query_data(s, (account,))
         if r:
+           
             user_info = (r[0][0], r[0][1], r[0][2], r[0][3], r[0][4].strftime('%Y-%m-%d %H:%M:%S'))
             return user_info
         return None
@@ -225,6 +237,11 @@ class UserCRUD:
 
 if __name__ == '__main__':
     crud = CRUD()
+   
+   
+   
+   
+   
 
     r = crud.get_wordid_by_wordspelling("abcdefg")
     print(type(r))

+ 3 - 2
tools/thread_pool_manager.py

@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, wait
 
-pool_executor = ThreadPoolExecutor(max_workers=50)
+
+pool_executor = ThreadPoolExecutor(max_workers=50)

Some files were not shown because too many files changed in this diff