python 阿里雲平臺合成語音(TTS)
阿新 • • 發佈:2021-01-18
1. 安裝阿里雲pythonSDKcore:
pip3 install aliyun-python-sdk-core-v3
2. 安裝ali_speech python SDK, 從github上下載
https://github.com/aliyun/alibabacloud-nls-python-sdk
解壓之後,安裝
cd alibabacloud-nls-python-sdk
sudo python3 setup.py install
3. 增加阿里雲speech配置檔案,命名為ali_wav_config
4. 生成語音
執行指令碼:
./ali_wav.py ./words.txt
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys import os import threading import ali_speech import logging import time import json import base64 from ali_speech.callbacks import SpeechSynthesizerCallback from ali_speech.constant import TTSFormat from ali_speech.constant import TTSSampleRate from aliyunsdkcore.client import AcsClient from aliyunsdkcore.request import CommonRequest words_file = "" config_file_name = "/ali_wav_config" speaker = "xiaoyun" volume = 50 speech_rate = 0 pitch_rate = 0 thread_list = [] MAX_THREAD = 10 class MyCallback(SpeechSynthesizerCallback): # 引數name用於指定儲存音訊的檔案 def __init__(self, name): self._name = name self._fout = open(name, 'wb') def on_binary_data_received(self, raw): #print('MyCallback.on_binary_data_received: %s' % len(raw)) self._fout.write(raw) def on_completed(self, message): #print('MyCallback.OnRecognitionCompleted: %s' % message) #使用ffmpeg 工具將wav檔案中的靜音部分剪下掉,如果不剪下也可以 os.system("ffmpeg -i " + self._name + \ " -af silenceremove=start_periods=1:" + \ "start_duration=0:start_threshold=-100dB:" + \ "stop_periods=1:stop_duration=2:stop_threshold=-100dB -y -ac 1 -ar 16000 " + \ self._name + " > /dev/null 2>&1") print(self._name + " Done!!!") self._fout.close() def on_task_failed(self, message): #print('MyCallback.OnRecognitionTaskFailed-task_id:%s, status_text:%s' % ( # message['header']['task_id'], message['header']['status_text'])) self._fout.close() def on_channel_closed(self): print('MyCallback.OnRecognitionChannelClosed') def on_metainfo(self, message): print('MyCallback.on_metainfo: %s' % message) def process(client, appkey, token, text, audio_name): global speaker,volume,speech_rate,pitch_rate callback = MyCallback(audio_name) synthesizer = client.create_synthesizer(callback) synthesizer.set_appkey(appkey) synthesizer.set_token(token) synthesizer.set_voice(speaker) synthesizer.set_text(text) synthesizer.set_format(TTSFormat.WAV) synthesizer.set_sample_rate(TTSSampleRate.SAMPLE_RATE_16K) synthesizer.set_volume(volume) synthesizer.set_speech_rate(speech_rate) synthesizer.set_pitch_rate(pitch_rate) try: ret = synthesizer.start() if ret < 0: return ret synthesizer.wait_completed() except Exception as e: print(e) finally: synthesizer.close() def process_multithread(client, appkey, token, text, audio_name): global thread_list thread = threading.Thread(target=process, args=(client, appkey, token, text, audio_name)) thread_list.append(thread) thread.start() def get_token(): client = AcsClient( "xxxxxxxxxxxxxxxx", base64.b64decode(b'xxxxxxxxxxxxxxxxxxxxx').decode(), "cn-shanghai" ); # 建立request,並設定引數 request = CommonRequest() request.set_method('POST') request.set_domain('nls-meta.cn-shanghai.aliyuncs.com') request.set_version('2019-02-28') request.set_action_name('CreateToken') response = client.do_action_with_exception(request) content = json.loads(response.decode()) token = content['Token']['Id'] return token def main(): global words_file,speaker,volume,speech_rate,pitch_rate,config_file_name,thread_list words_file = sys.argv[1] if len(words_file) == 0: print("ali tts arg error") print(sys._getframe().f_lineno) return if os.path.exists(words_file) == False: print("file " + words_file + " not exist") print(sys._getframe().f_lineno) return config_file = os.path.dirname(sys.argv[0]) config_file_name = config_file + config_file_name if os.path.exists(config_file_name) == False: print("config file " + config_file_name + " not exist") print(sys._getframe().f_lineno) return with open(config_file_name,'r',encoding='utf-8') as config_f: for line in config_f: value = line.strip().replace("\n","").replace("\r","").replace("\t","").replace('\"',"") if len(value) == 0: continue if value[0] == "#": continue lst = value.split("=") if len(lst): if lst[0].find("speaker") != -1: speaker = lst[-1].strip().lower() elif lst[0].find("volume") != -1: volume = int(lst[-1].strip()) elif lst[0].find("speech_rate") != -1: speech_rate = int(lst[-1].strip()) elif lst[0].find("pitch_rate") != -1: pitch_rate = int(lst[-1].strip()) """ print(speaker) print(volume) print(speech_rate) print(pitch_rate) """ client = ali_speech.NlsClient() # 設定輸出日誌資訊的級別:DEBUG、INFO、WARNING、ERROR client.set_log_level('ERROR') appkey = 'xxxxxxxxxxxx' token = get_token() wav_path = os.path.dirname(words_file) with open(words_file,'r',encoding='utf-8') as r_file: r_lines = r_file.readlines() word_idx = 0 for line in r_lines: text = line.replace("\n","").replace("\r","").replace("\t"," ") text = ' '.join(text.split()) if len(text): if word_idx < 10: audio_name = wav_path + "/00" + str(word_idx) + "-" + text.replace(" ","-") + ".wav" elif word_idx < 100: audio_name = wav_path + "/0" + str(word_idx) + "-" + text.replace(" ","-") + ".wav" else: audio_name = wav_path + "/" + str(word_idx) + "-" + text.replace(" ","-") + ".wav" process_multithread(client, appkey, token, text, audio_name) word_idx += 1 global MAX_THREAD if (word_idx % MAX_THREAD == 0): for thread in thread_list: thread.join() thread_list = [] for thread in thread_list: thread.join() os.system("stty sane") if __name__ == "__main__": if len(sys.argv) < 2: print("ali tts arg error") print(sys._getframe().f_lineno) exit() main()