audio.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. # -*- coding: utf-8 -*-
  2. from pathlib import Path
  3. from threading import Lock
  4. import io
  5. import os
  6. import re
  7. import json
  8. from cachetools import TTLCache
  9. from hashlib import md5
  10. from core.respone_format import *
  11. from tools.new_mysql import MySQLUploader
  12. from tools.loglog import logger
  13. from tools.thread_pool_manager import pool_executor
  14. from aliyunsdkcore.client import AcsClient
  15. from aliyunsdkcore.request import CommonRequest
  16. import oss2
  17. import nls
  18. from oss2.credentials import EnvironmentVariableCredentialsProvider
  19. Path("data/speech_data").mkdir(parents=True, exist_ok=True)
  20. class TestTts:
  21. def __init__(self, token):
  22. self.appkey = "EwztMPbSeu5hTrss"
  23. self.token = token
  24. self.url = "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1"
  25. def start(self, text, local_file_path):
  26. self.__text = text
  27. self.__f = open(local_file_path, "wb")
  28. self.__test_run()
  29. def test_on_metainfo(self, message, *args):
  30. print("on_metainfo message=>{}".format(message))
  31. def test_on_error(self, message, *args):
  32. print("on_error args=>{}".format(args))
  33. def test_on_close(self, *args):
  34. try:
  35. self.__f.close()
  36. except Exception as e:
  37. print("close file failed since:", e)
  38. def test_on_data(self, data, *args):
  39. try:
  40. self.__f.write(data)
  41. except Exception as e:
  42. print("write data failed:", e)
  43. def test_on_completed(self, message, *args):
  44. print("on_completed:args=>{} message=>{}".format(args, message))
  45. def __test_run(self):
  46. tts = nls.NlsSpeechSynthesizer(url=self.url,
  47. token=self.token,
  48. appkey=self.appkey,
  49. on_data=self.test_on_data,
  50. on_error=self.test_on_error,
  51. on_close=self.test_on_close,
  52. )
  53. tts.start(self.__text, voice="Eva", aformat="mp3", speech_rate=-400)
  54. class GetAudio:
  55. def __init__(self):
  56. self.m = MySQLUploader()
  57. self.auth = oss2.ProviderAuth(EnvironmentVariableCredentialsProvider())
  58. self.bucket_name = 'public-qingti-data'
  59. self.bucket = oss2.Bucket(self.auth, 'oss-cn-hangzhou.aliyuncs.com', self.bucket_name)
  60. self.token_cache = TTLCache(maxsize=10, ttl=28800)
  61. self.token = None
  62. self.lock = Lock()
  63. self.re_compile = re.compile("[.!?;*\"]")
  64. def upload_file_to_oss(self, word_or_hash_name, oss_file_name, local_file_path):
  65. for _ in range(2):
  66. try:
  67. r = self.bucket.put_object_from_file(oss_file_name, local_file_path)
  68. except Exception as e:
  69. logger.error(f"上传文件错误{type(e).__name__} {e}")
  70. continue
  71. s = r.resp.status
  72. if s == 200:
  73. print(f"音频上传oss成功 {word_or_hash_name}")
  74. os.remove(local_file_path)
  75. return True
  76. else:
  77. logger.critical(f"2次上传oss错误,音频:{word_or_hash_name}")
  78. def insert_mysql(self, word_or_hash_name,oss_file_name,original_text):
  79. s = "insert into tts (word_or_hash_name,audio,long_tts_text) values (%s,%s,%s)"
  80. self.m.execute_(s, (word_or_hash_name,oss_file_name,original_text))
  81. def query_mysql(self, word):
  82. s = "select audio from tts where word_or_hash_name=%s"
  83. r = self.m.query_data(s, (word,))
  84. if not r:
  85. return False
  86. return r[0][0]
  87. @staticmethod
  88. def __create_token():
  89. client = AcsClient(
  90. os.getenv('OSS_ACCESS_KEY_ID'),
  91. os.getenv('OSS_ACCESS_KEY_SECRET'),
  92. "cn-shanghai"
  93. )
  94. request = CommonRequest()
  95. request.set_method('POST')
  96. request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
  97. request.set_version('2019-02-28')
  98. request.set_action_name('CreateToken')
  99. try:
  100. response = client.do_action_with_exception(request)
  101. jss = json.loads(response)
  102. if 'Token' in jss and 'Id' in jss['Token']:
  103. token = jss['Token']['Id']
  104. logger.info(f"生成token成功。{token}")
  105. return token
  106. except Exception as e:
  107. logger.error(f"token生成错误:{type(e).__name__} {e}")
  108. def get_audio(self, word, local_file_path):
  109. self.lock.acquire()
  110. try:
  111. if "token" not in self.token_cache:
  112. self.token = self.__create_token()
  113. self.token_cache["token"] = self.token
  114. except Exception as e:
  115. logger.error(f"{type(e).__name__} {e}")
  116. finally:
  117. self.lock.release()
  118. t = TestTts(self.token)
  119. t.start(word, local_file_path)
  120. def submit_task(self, word_or_phrase:str,resp_type:int):
  121. f = pool_executor.submit(self.run_task, word_or_phrase,resp_type)
  122. return f
  123. def __preprocess_data(self,text: str) -> tuple:
  124. """
  125. 预处理数据。超过50个字符的所有名字都用哈希值。
  126. :param text: 预处理文本内容
  127. :return:
  128. """
  129. text = self.re_compile.sub("",text.strip())
  130. if len(text) > 30 or " " in text:
  131. hash_str = md5(text.encode()).hexdigest()
  132. return text, hash_str
  133. return text,text
  134. def __resp_convert(self,oss_file_name,resp_type):
  135. """
  136. :param oss_file_name: OSS全路径;"baseData/audio/{word_or_hash_name}.mp3"
  137. :param resp_type: 回复格式设计:0返回oss路径,1 二进制文件,2 url三种;
  138. :return:
  139. """
  140. if resp_type == 0:
  141. logger.success(f"返回成功:oss路径{oss_file_name}")
  142. return oss_file_name
  143. elif resp_type == 1:
  144. try:
  145. obj = self.bucket.get_object(oss_file_name)
  146. content = io.BytesIO(obj.read())
  147. logger.success(f"返回成功:二进制数据{oss_file_name}")
  148. return content
  149. except Exception as e:
  150. raise resp_500(message=f"错误:转换二进制文件 {e}")
  151. elif resp_type == 2:
  152. mp3_file_url = f"https://{self.bucket_name}.oss-cn-hangzhou.aliyuncs.com/{oss_file_name}"
  153. logger.success(f"返回成功:https的url {mp3_file_url}")
  154. return mp3_file_url
  155. def run_task(self, word_or_phrase,resp_type):
  156. """
  157. :param word_or_phrase: 单词或短语文本,去生成tts的文本
  158. :param resp_type: 回复设计:0返回oss路径,1 二进制文件,2 url三种;
  159. :return:
  160. """
  161. original_text,word_or_hash_name = self.__preprocess_data(word_or_phrase)
  162. oss_file_name = f"baseData/audio/{word_or_hash_name}.mp3"
  163. local_file_path = f"data/speech_data/{word_or_hash_name}.mp3"
  164. query_word_mp3_path = self.query_mysql(word_or_hash_name)
  165. if query_word_mp3_path:
  166. resp_convertt = self.__resp_convert(oss_file_name=query_word_mp3_path,resp_type=resp_type)
  167. return resp_convertt
  168. self.get_audio(original_text, local_file_path)
  169. upload_result = self.upload_file_to_oss(word_or_hash_name, oss_file_name, local_file_path)
  170. if upload_result:
  171. self.insert_mysql(word_or_hash_name,oss_file_name,original_text)
  172. resp_convertt = self.__resp_convert(oss_file_name=oss_file_name, resp_type=resp_type)
  173. return resp_convertt
  174. return False
  175. if __name__ == '__main__':
  176. import os
  177. os.chdir('..')
  178. g = GetAudio()
  179. article="""The Marches were a happy family. Poverty, hard work, and even the fact that Father March was away with the Union armies could not down the spirits of Meg, Jo, Amy, and Marmee, as the March girls called their mother. The March sisters tried to be good but had their share of faults. Pretty Meg was often displeased with the schoolchildren she taught; boyish Jo was easy to become angry; golden-haired schoolgirl Amy liked to show up; but Beth, who kept the house, was loving and gentle always. The happy days passed and darkness came when a telegram arrived for Mrs. March. "Your husband is very ill," it said, "come at once." The girl tried to be brave when their mother left for the front. They waited and prayed. Little Beth got scarlet fever when she was taking care of the sick neighbor. She became very ill but began to recover by the time Marmee was back. When Father came home from the front and at that joyful Christmas dinner they were once more all together. Three years later, the March girls had grown into young womanhood. Meg became Mrs. Brooke, and after a few family troubles got used to her new state happily. Jo had found pleasure in her literary efforts. Amy had grown into a young lady with a talent for design and an even greater one for society. But Beth had never fully regained her health, and her family watched her with love and anxiety. Amy was asked to go and stay in Europe with a relative of the Marches. Jo went to New York and became successful in her writing and had the satisfaction of seeing her work published there. But at home the bitterest blow was yet to fall. Beth had known for some time that she couldn't live much longer to be with the family and in the spring time she died. News came from Europe that Amy and Laurie, the grandson of a wealthy neighbor, had planned to be married soon. Now Jo became ever more successful in her writing and got married to Professor Bhaer and soon afterwards founded a school for boys. And so the little women had grown up and lived happily with their children, enjoying the harvest of love and goodness that they had devoted all their lives to."""
  180. g.get_audio(article, "1.mp3")