unity3d:百度語音線上語音轉文字,文字轉語音,跨平臺
阿新 • • 發佈:2019-02-20
轉自洪流學堂
語音轉文字
1.開啟麥克風記錄
_clipRecord = Microphone.Start(null, false, 30, 16000);
2.將Unity的AudioClip資料轉化為PCM格式16bit資料
/// <summary>
/// 將Unity的AudioClip資料轉化為PCM格式16bit資料
/// </summary>
/// <param name="clip"></param>
/// <returns></returns>
public static byte[] ConvertAudioClipToPCM16(AudioClip clip)
{
var samples = new float[clip.samples * clip.channels];
clip.GetData(samples, 0);
var samples_int16 = new short[samples.Length];
for (var index = 0; index < samples.Length; index++)
{
var f = samples[index];
samples_int16[index] = (short) (f * short.MaxValue);
}
var byteArray = new byte[samples_int16.Length * 2];
Buffer.BlockCopy(samples_int16, 0, byteArray, 0, byteArray.Length);
return byteArray;
}
3.將位元組流上傳到百度語音uri,得到轉換後的文字
public IEnumerator Recognize(byte[] data, Action<AsrResponse> callback)
{
yield return PreAction ();
if (tokenFetchStatus == Base.TokenFetchStatus.Failed) {
Debug.LogError("Token fetched failed, please check your APIKey and SecretKey");
yield break;
}
var uri = string.Format("{0}?lan=zh&cuid={1}&token={2}", UrlAsr, SystemInfo.deviceUniqueIdentifier, Token);
var form = new WWWForm();
form.AddBinaryData("audio", data);
var www = UnityWebRequest.Post(uri, form);
www.SetRequestHeader("Content-Type", "audio/pcm;rate=16000");
yield return www.SendWebRequest();
if (string.IsNullOrEmpty(www.error))
{
Debug.Log(www.downloadHandler.text);
callback(JsonUtility.FromJson<AsrResponse>(www.downloadHandler.text));
}
else
Debug.LogError(www.error);
}
文字轉語音
1.文字上傳百度語音uri,得到位元組流
public IEnumerator Synthesis(string text, Action<TtsResponse> callback, int speed = 5, int pit = 5, int vol = 5,
Pronouncer per = Pronouncer.Female)
{
yield return PreAction();
if (tokenFetchStatus == Base.TokenFetchStatus.Failed)
{
Debug.LogError("Token was fetched failed. Please check your APIKey and SecretKey");
callback(new TtsResponse()
{
err_no = -1,
err_msg = "Token was fetched failed. Please check your APIKey and SecretKey"
});
yield break;
}
var param = new Dictionary<string, string>();
param.Add("tex", text);
param.Add("tok", Token);
param.Add("cuid", SystemInfo.deviceUniqueIdentifier);
param.Add("ctp", "1");
param.Add("lan", "zh");
param.Add("spd", Mathf.Clamp(speed, 0, 9).ToString());
param.Add("pit", Mathf.Clamp(pit, 0, 9).ToString());
param.Add("vol", Mathf.Clamp(vol, 0, 15).ToString());
param.Add("per", ((int) per).ToString());
string url = UrlTts;
int i = 0;
foreach (var p in param)
{
url += i != 0 ? "&" : "?";
url += p.Key + "=" + p.Value;
i++;
}
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
var www = UnityWebRequest.Get(url);
#else
var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG);
#endif
Debug.Log(www.url);
yield return www.SendWebRequest();
if (string.IsNullOrEmpty(www.error))
{
var type = www.GetResponseHeader("Content-Type");
Debug.Log("response type: " + type);
if (type == "audio/mp3")
{
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
var clip = GetAudioClipFromMP3ByteArray(www.downloadHandler.data);
var response = new TtsResponse {clip = clip};
#else
var response = new TtsResponse {clip = DownloadHandlerAudioClip.GetContent(www) };
#endif
callback(response);
}
else
{
Debug.LogError(www.downloadHandler.text);
callback(JsonUtility.FromJson<TtsResponse>(www.downloadHandler.text));
}
}
else
Debug.LogError(www.error);
}
2.位元組流轉化為AudioClip播放
private AudioClip GetAudioClipFromMP3ByteArray(byte[] mp3Data)
{
var mp3MemoryStream = new MemoryStream(mp3Data);
MP3Sharp.MP3Stream mp3Stream = new MP3Sharp.MP3Stream(mp3MemoryStream);
//Get the converted stream data
MemoryStream convertedAudioStream = new MemoryStream();
byte[] buffer = new byte[2048];
int bytesReturned = -1;
int totalBytesReturned = 0;
while (bytesReturned != 0)
{
bytesReturned = mp3Stream.Read(buffer, 0, buffer.Length);
convertedAudioStream.Write(buffer, 0, bytesReturned);
totalBytesReturned += bytesReturned;
}
Debug.Log("MP3 file has " + mp3Stream.ChannelCount + " channels with a frequency of " +
mp3Stream.Frequency);
byte[] convertedAudioData = convertedAudioStream.ToArray();
//bug of mp3sharp that audio with 1 channel has right channel data, to skip them
byte[] data = new byte[convertedAudioData.Length / 2];
for (int i = 0; i < data.Length; i += 2)
{
data[i] = convertedAudioData[2 * i];
data[i + 1] = convertedAudioData[2 * i + 1];
}
Wav wav = new Wav(data, mp3Stream.ChannelCount, mp3Stream.Frequency);
AudioClip audioClip = AudioClip.Create("testSound", wav.SampleCount, 1, wav.Frequency, false);
audioClip.SetData(wav.LeftChannel, 0);
return audioClip;
}