1. 程式人生 > 其它 >python 阿里雲平臺合成語音(TTS)

python 阿里雲平臺合成語音(TTS)

技術標籤:python工具TTS

1. 安裝阿里雲pythonSDKcore:
pip3 install aliyun-python-sdk-core-v3

2. 安裝ali_speech python SDK, 從github上下載

https://github.com/aliyun/alibabacloud-nls-python-sdk

解壓之後,安裝

cd alibabacloud-nls-python-sdk
sudo python3 setup.py install

3. 增加阿里雲speech配置檔案,命名為ali_wav_config

4. 生成語音

執行指令碼:

./ali_wav.py ./words.txt

#!/usr/bin/env python3

# -*- coding: utf-8 -*-
import sys
import os
import threading
import ali_speech
import logging
import time
import json
import base64
from ali_speech.callbacks import SpeechSynthesizerCallback
from ali_speech.constant import TTSFormat
from ali_speech.constant import TTSSampleRate
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest

words_file = ""
config_file_name = "/ali_wav_config"
speaker = "xiaoyun"
volume = 50
speech_rate = 0
pitch_rate = 0
thread_list = []
MAX_THREAD = 10

class MyCallback(SpeechSynthesizerCallback):
    # 引數name用於指定儲存音訊的檔案
    def __init__(self, name):
        self._name = name
        self._fout = open(name, 'wb')

    def on_binary_data_received(self, raw):
        #print('MyCallback.on_binary_data_received: %s' % len(raw))
        self._fout.write(raw)

    def on_completed(self, message):
        #print('MyCallback.OnRecognitionCompleted: %s' % message)
        #使用ffmpeg 工具將wav檔案中的靜音部分剪下掉,如果不剪下也可以
        os.system("ffmpeg -i " + self._name + \
                  " -af silenceremove=start_periods=1:" + \
                  "start_duration=0:start_threshold=-100dB:" + \
                  "stop_periods=1:stop_duration=2:stop_threshold=-100dB -y -ac 1 -ar 16000 " + \
                  self._name + " > /dev/null 2>&1")
        print(self._name + " Done!!!")
        self._fout.close()

    def on_task_failed(self, message):
        #print('MyCallback.OnRecognitionTaskFailed-task_id:%s, status_text:%s' % (
        #    message['header']['task_id'], message['header']['status_text']))
        self._fout.close()

    def on_channel_closed(self):
        print('MyCallback.OnRecognitionChannelClosed')

    def on_metainfo(self, message):
        print('MyCallback.on_metainfo: %s' % message)


def process(client, appkey, token, text, audio_name):
    global speaker,volume,speech_rate,pitch_rate
    callback = MyCallback(audio_name)
    synthesizer = client.create_synthesizer(callback)
    synthesizer.set_appkey(appkey)
    synthesizer.set_token(token)
    synthesizer.set_voice(speaker)
    synthesizer.set_text(text)
    synthesizer.set_format(TTSFormat.WAV)
    synthesizer.set_sample_rate(TTSSampleRate.SAMPLE_RATE_16K)
    synthesizer.set_volume(volume)
    synthesizer.set_speech_rate(speech_rate)
    synthesizer.set_pitch_rate(pitch_rate)

    try:
        ret = synthesizer.start()
        if ret < 0:
            return ret

        synthesizer.wait_completed()
    except Exception as e:
        print(e)
    finally:
        synthesizer.close()


def process_multithread(client, appkey, token, text, audio_name):
    global thread_list
    thread = threading.Thread(target=process, args=(client, appkey, token, text, audio_name))
    thread_list.append(thread)
    thread.start()

def get_token():
    client = AcsClient(
        "xxxxxxxxxxxxxxxx",
        base64.b64decode(b'xxxxxxxxxxxxxxxxxxxxx').decode(),
        "cn-shanghai"
    );

    # 建立request,並設定引數
    request = CommonRequest()
    request.set_method('POST')
    request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
    request.set_version('2019-02-28')
    request.set_action_name('CreateToken')
    response = client.do_action_with_exception(request)

    content = json.loads(response.decode())
    token = content['Token']['Id']

    return token

def main():
    global words_file,speaker,volume,speech_rate,pitch_rate,config_file_name,thread_list
    words_file = sys.argv[1]
    if len(words_file) == 0:
        print("ali tts arg error")
        print(sys._getframe().f_lineno)
        return
    
    if os.path.exists(words_file) == False:
        print("file " + words_file + " not exist")
        print(sys._getframe().f_lineno)
        return
    
    config_file = os.path.dirname(sys.argv[0])
    
    config_file_name = config_file + config_file_name
    
    if os.path.exists(config_file_name) == False:
        print("config file " + config_file_name + " not exist")
        print(sys._getframe().f_lineno)
        return
    
    with open(config_file_name,'r',encoding='utf-8') as config_f:
        for line in config_f:
            value = line.strip().replace("\n","").replace("\r","").replace("\t","").replace('\"',"")
            if len(value) == 0:
                continue
            if value[0] == "#":
                continue
            lst = value.split("=")
            if len(lst):
                if lst[0].find("speaker") != -1:
                    speaker = lst[-1].strip().lower()
                elif lst[0].find("volume") != -1:
                    volume = int(lst[-1].strip())
                elif lst[0].find("speech_rate") != -1:
                    speech_rate = int(lst[-1].strip())
                elif lst[0].find("pitch_rate") != -1:
                    pitch_rate = int(lst[-1].strip())
    """
    print(speaker)
    print(volume)
    print(speech_rate)
    print(pitch_rate)
    """
        
    client = ali_speech.NlsClient()
    # 設定輸出日誌資訊的級別:DEBUG、INFO、WARNING、ERROR
    client.set_log_level('ERROR')

    appkey = 'xxxxxxxxxxxx'
    token = get_token()

    wav_path = os.path.dirname(words_file)
    
    with open(words_file,'r',encoding='utf-8') as r_file:
        r_lines = r_file.readlines()
        word_idx = 0
        for line in r_lines:
            text = line.replace("\n","").replace("\r","").replace("\t"," ")
            text = ' '.join(text.split())
            if len(text):
                if word_idx < 10:
                    audio_name = wav_path + "/00" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                elif word_idx < 100:
                    audio_name = wav_path + "/0" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                else:
                    audio_name = wav_path + "/" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                process_multithread(client, appkey, token, text, audio_name)
                word_idx += 1
            global MAX_THREAD
            if (word_idx % MAX_THREAD == 0):
                for thread in thread_list:
                    thread.join()

                thread_list = []
    for thread in thread_list:
        thread.join()
    os.system("stty sane")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("ali tts arg error")
        print(sys._getframe().f_lineno)
        exit()
    main()