1. 程式人生 > >unity3d:百度語音線上語音轉文字,文字轉語音,跨平臺

unity3d:百度語音線上語音轉文字,文字轉語音,跨平臺

轉自洪流學堂
語音轉文字
1.開啟麥克風記錄

_clipRecord = Microphone.Start(null, false, 30, 16000);

2.將Unity的AudioClip資料轉化為PCM格式16bit資料

/// <summary>
        /// 將Unity的AudioClip資料轉化為PCM格式16bit資料
        /// </summary>
        /// <param name="clip"></param>
        /// <returns></returns>
        public
static byte[] ConvertAudioClipToPCM16(AudioClip clip) { var samples = new float[clip.samples * clip.channels]; clip.GetData(samples, 0); var samples_int16 = new short[samples.Length]; for (var index = 0; index < samples.Length; index++) { var
f = samples[index]; samples_int16[index] = (short) (f * short.MaxValue); } var byteArray = new byte[samples_int16.Length * 2]; Buffer.BlockCopy(samples_int16, 0, byteArray, 0, byteArray.Length); return byteArray; }

3.將位元組流上傳到百度語音uri,得到轉換後的文字

 public IEnumerator Recognize(byte[] data, Action<AsrResponse> callback)
        {
            yield return PreAction ();

            if (tokenFetchStatus == Base.TokenFetchStatus.Failed) {
                Debug.LogError("Token fetched failed, please check your APIKey and SecretKey");
                yield break;
            }

            var uri = string.Format("{0}?lan=zh&cuid={1}&token={2}", UrlAsr, SystemInfo.deviceUniqueIdentifier, Token);

            var form = new WWWForm();
            form.AddBinaryData("audio", data);
            var www = UnityWebRequest.Post(uri, form);
            www.SetRequestHeader("Content-Type", "audio/pcm;rate=16000");
            yield return www.SendWebRequest();

            if (string.IsNullOrEmpty(www.error))
            {
                Debug.Log(www.downloadHandler.text);
                callback(JsonUtility.FromJson<AsrResponse>(www.downloadHandler.text));
            }
            else
                Debug.LogError(www.error);
        }

文字轉語音
1.文字上傳百度語音uri,得到位元組流

 public IEnumerator Synthesis(string text, Action<TtsResponse> callback, int speed = 5, int pit = 5, int vol = 5,
            Pronouncer per = Pronouncer.Female)
        {
            yield return PreAction();

            if (tokenFetchStatus == Base.TokenFetchStatus.Failed)
            {
                Debug.LogError("Token was fetched failed. Please check your APIKey and SecretKey");
                callback(new TtsResponse()
                {
                    err_no = -1,
                    err_msg = "Token was fetched failed. Please check your APIKey and SecretKey"
                });
                yield break;
            }

            var param = new Dictionary<string, string>();
            param.Add("tex", text);
            param.Add("tok", Token);
            param.Add("cuid", SystemInfo.deviceUniqueIdentifier);
            param.Add("ctp", "1");
            param.Add("lan", "zh");
            param.Add("spd", Mathf.Clamp(speed, 0, 9).ToString());
            param.Add("pit", Mathf.Clamp(pit, 0, 9).ToString());
            param.Add("vol", Mathf.Clamp(vol, 0, 15).ToString());
            param.Add("per", ((int) per).ToString());

            string url = UrlTts;
            int i = 0;
            foreach (var p in param)
            {
                url += i != 0 ? "&" : "?";
                url += p.Key + "=" + p.Value;
                i++;
            }

#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
            var www = UnityWebRequest.Get(url);
#else
            var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG);
#endif
            Debug.Log(www.url);
            yield return www.SendWebRequest();


            if (string.IsNullOrEmpty(www.error))
            {
                var type = www.GetResponseHeader("Content-Type");
                Debug.Log("response type: " + type);

                if (type == "audio/mp3")
                {
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
                    var clip = GetAudioClipFromMP3ByteArray(www.downloadHandler.data);
                    var response = new TtsResponse {clip = clip};
#else
                    var response = new TtsResponse {clip = DownloadHandlerAudioClip.GetContent(www) };
#endif
                    callback(response);
                }
                else
                {
                    Debug.LogError(www.downloadHandler.text);
                    callback(JsonUtility.FromJson<TtsResponse>(www.downloadHandler.text));
                }
            }
            else
                Debug.LogError(www.error);
        }

2.位元組流轉化為AudioClip播放

private AudioClip GetAudioClipFromMP3ByteArray(byte[] mp3Data)
        {
            var mp3MemoryStream = new MemoryStream(mp3Data);
            MP3Sharp.MP3Stream mp3Stream = new MP3Sharp.MP3Stream(mp3MemoryStream);

            //Get the converted stream data
            MemoryStream convertedAudioStream = new MemoryStream();
            byte[] buffer = new byte[2048];
            int bytesReturned = -1;
            int totalBytesReturned = 0;

            while (bytesReturned != 0)
            {
                bytesReturned = mp3Stream.Read(buffer, 0, buffer.Length);
                convertedAudioStream.Write(buffer, 0, bytesReturned);
                totalBytesReturned += bytesReturned;
            }

            Debug.Log("MP3 file has " + mp3Stream.ChannelCount + " channels with a frequency of " +
                      mp3Stream.Frequency);

            byte[] convertedAudioData = convertedAudioStream.ToArray();

            //bug of mp3sharp that audio with 1 channel has right channel data, to skip them
            byte[] data = new byte[convertedAudioData.Length / 2];
            for (int i = 0; i < data.Length; i += 2)
            {
                data[i] = convertedAudioData[2 * i];
                data[i + 1] = convertedAudioData[2 * i + 1];
            }

            Wav wav = new Wav(data, mp3Stream.ChannelCount, mp3Stream.Frequency);

            AudioClip audioClip = AudioClip.Create("testSound", wav.SampleCount, 1, wav.Frequency, false);
            audioClip.SetData(wav.LeftChannel, 0);

            return audioClip;
        }