Răsfoiți Sursa

1.增加了对文章解析字段的校验,需要中文的解析。
2.随机了选项文本ABCD

xie 1 lună în urmă
părinte
comite
165b8cc296
4 a modificat fișierele cu 34 adăugiri și 12 ștergeri
  1. 7 7
      gpt/chatgpt.py
  2. 16 4
      gpt/get_article2.py
  3. 10 0
      gpt/gpt_check.py
  4. 1 1
      mock/mock_request.py

+ 7 - 7
gpt/chatgpt.py

@@ -5,6 +5,7 @@ if __name__ == '__main__':
 
 import requests
 import random
+import json
 import time
 from tools.loglog import logger,simple_logger
 from tools.new_mysql import MySQLUploader
@@ -104,20 +105,19 @@ def get_article_gpt_pydantic(question,real_ip="localhost",demo_name="无",model=
         try:
            
             response = requests.post(f'http://170.106.108.95/get_article', json=d2)
-            r_json = response.json()
-           
-            gpt_content = str(r_json)
+            r_str = response.json() 
 
-            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{gpt_content}")
+            simple_logger.info(f"问题日志:\n{question}\n回答日志:\n{r_str}")
 
            
             if not check_fucn:
-                return r_json
+                return r_str
 
            
-            check_result = check_fucn(str(gpt_content))
+            check_result = check_fucn(r_str)
+
             if check_result: 
-                return r_json
+                return r_str
             else:
                 raise Exception(f"第{num_count + 1}次共3次,GPT的校验没有通过,校验函数:{check_fucn.__name__}")
 

+ 16 - 4
gpt/get_article2.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from gpt.chatgpt import get_answer_from_gpt,get_article_gpt_pydantic
+from gpt.chatgpt import get_answer_from_gpt, get_article_gpt_pydantic
 from gpt.gpt_check import CheckGptAnswer, CheckArticleResult
 from tools.new_mysql import MySQLUploader
 from tools.loglog import logger, log_err_e
@@ -11,7 +11,7 @@ from common.split_text import split_text_to_word
 from pydantic import BaseModel
 from cachetools import TTLCache
 from concurrent.futures import wait
-from random import randint, shuffle
+from random import randint, shuffle, sample
 import json
 import requests
 from openpyxl import load_workbook
@@ -202,6 +202,7 @@ class GetArticle:
 3.文章中使用提供单词,一定要和提供单词的中文词义匹配,尤其是一词多义时,务必使用提供单词的词义。必须要用提供单词的词义。如果用到的词义与提供单词词义不一致,请不要使用这个单词。
 4.生成的文章要求{select_word_count}词左右,可以用{no_escape_code}字符分段,一般{select_paragraph_count}个段落左右。第一段是文章标题。
 5.生成文章优先使用[单词组1]的词义,其次可以挑选使用[单词组2]的词义。允许不使用[单词组1]的个别单词,优先保证文章整体意思通顺连贯和故事完整。
+6.注意回复字段的中英文,englishArticle是英文,chineseArticle是中文,其中trunk是英文,analysis是中文,text是英文。
 
 提供[单词组1]:{core_words_meaning_str};
 提供[单词组2]:{extend_words_meaning_str};
@@ -213,7 +214,7 @@ class GetArticle:
            
 
             r_json = json.loads(get_article_gpt_pydantic(q, temperature=0.9, real_ip=real_ip, demo_name=demo_name, model='gpt-4.1',
-                                                 max_tokens=4000, sys_prompt=sys_prompt))
+                                                         check_fucn=CheckArticleResult.get_article_1, max_tokens=4000, sys_prompt=sys_prompt))
            
             allWordAmount = 0
             allWordAmount += len(split_text_to_word(r_json["englishArticle"]))
@@ -244,12 +245,23 @@ class GetArticle:
             r_json["body"] = r_json.pop("englishArticle")
             r_json["chinese"] = r_json.pop("chineseArticle")
 
+           
+            for q in r_json['questions']:
+                data = q['candidates']
+                shuffled_candidates = sample(data, len(data))
+
+                labels = ['A', 'B', 'C', 'D']
+                for index, candidate in enumerate(shuffled_candidates):
+                    candidate['label'] = labels[index]
+                q['candidates'] = shuffled_candidates
+
             return {**r_json, "allWordAmount": allWordAmount}
         except json.decoder.JSONDecodeError:
             logger.error("gpt生成文章回复json格式化错误")
             raise
         except Exception as e:
-            logger.error(f"gpt生成文章回复其他错误.{type(e).__name__} {e}")
+            log_err_e(e,f"gpt生成文章回复其他错误.")
+
             raise
 
    

+ 10 - 0
gpt/gpt_check.py

@@ -85,6 +85,16 @@ class CheckArticleResult:
        
         if not all(i in json_object for i in ["englishArticle","chineseArticle","difficultSentences","usedMeanIds","questions"]):
             return False
+       
+        try:
+            for question in json_object['questions']:
+                analysis = question['analysis'] 
+                words_count_pct = len(re.findall(r"[a-zA-Z\']+", analysis))/len(analysis)
+                if words_count_pct>0.5:
+                    return False
+        except:
+            return False
+
         return True
 
 if __name__ == '__main__':

+ 1 - 1
mock/mock_request.py

@@ -123,7 +123,7 @@ def get_article2_1():
                                   {'spell': 'waste', 'meaning': '浪费, 荒芜, 废物', 'word_id': 1160701, 'meaning_id': 1292},
                                   {'spell': 'environment', 'meaning': '环境, 外界', 'word_id': 873514, 'meaning_id': 1293},
                                   {'spell': 'memory', 'meaning': '记忆, 记忆力, 回忆', 'word_id': 981104, 'meaning_id': 1294}],
-                 'take_count': 1, 'student_stage': 3, 'demo_name': '春笋英语',"article_difficulty":1500}
+                 'take_count': 1, 'student_stage': 3, 'demo_name': '春笋英语',"article_difficulty":800}
 
     r = requests.post(f"{use_address}/article/reading-comprehension", json=json_data)
     r_json = r.json()