1. 程式人生 > 實用技巧 >springBoot+vue+百度語音識別

springBoot+vue+百度語音識別

1.將百度語音識別demo下載下來 並且用maven封裝成一個jar包 核心程式碼如下

package com.baidu.speech.restapi.asrdemo;

import com.alibaba.fastjson.JSONObject;
import com.baidu.speech.restapi.asrdemo.common.ConnUtil;
import com.baidu.speech.restapi.asrdemo.common.DemoException;
import com.baidu.speech.restapi.asrdemo.common.TokenHolder;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;

public class AsrUtil { private final boolean METHOD_RAW = false; // 預設以json方式上傳音訊檔案 // 填寫網頁上申請的appkey 如 $apiKey="g8eBUMSokVB1BHGmgxxxxxx" // private final String APP_KEY = "kVcnfD9iW2XVZSMaLMrtLYIz"; // my private static final String APP_KEY = "XXXXXXXXXXXXXX"; // 填寫網頁上申請的APP SECRET 如 $SECRET_KEY="94dc99566550d87f8fa8ece112xxxxx"
// private final String SECRET_KEY = "O9o1O213UgG5LFn0bDGNtoRN3VWl2du6"; // my private static final String SECRET_KEY = "XXXXXXXXXXXX"; // 需要識別的檔案 private static final String FILENAME = "16k.pcm"; // 檔案格式, 支援pcm/wav/amr 格式,極速版額外支援m4a 格式 private static final String FORMAT = FILENAME.substring(FILENAME.length() - 3
); private static String CUID = "1234567JAVA"; // 取樣率固定值 private static final int RATE = 16000; private static String URL; private static int DEV_PID; //private int LM_ID;//測試自訓練平臺需要開啟此註釋 private static String SCOPE; // 普通版 引數 // { // URL = "http://vop.baidu.com/server_api"; // 可以改為https // // 1537 表示識別普通話,使用輸入法模型。 其它語種參見文件 // DEV_PID = 1537; // SCOPE = "audio_voice_assistant_get"; // } // 自訓練平臺 引數 /*{ //自訓練平臺模型上線後,您會看見 第二步:“”獲取專屬模型引數pid:8001,modelid:1234”,按照這個資訊獲取 dev_pid=8001,lm_id=1234 DEV_PID = 8001; LM_ID = 1234; }*/ // 極速版 引數 static { URL = "http://vop.baidu.com/pro_api"; // 可以改為https DEV_PID = 80001; SCOPE = "brain_enhanced_asr"; } /* 忽略scope檢查,非常舊的應用可能沒有 { SCOPE = null; } */ public static String obtainAsrResult(byte[] bytes) throws IOException, DemoException { String resultJson = execute(bytes); System.out.println("識別結束:結果是:"); System.out.println(resultJson); return resultJson; } public static String execute(byte[] bytes) throws IOException, DemoException { TokenHolder holder = new TokenHolder(APP_KEY, SECRET_KEY, SCOPE); holder.resfresh(); String token = holder.getToken(); String result = null; result = runJsonPostMethod(token, bytes); return result; } private String runRawPostMethod(String token) throws IOException, DemoException { String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&token=" + token; //測試自訓練平臺需要開啟以下資訊 //String url2 = URL + "?cuid=" + ConnUtil.urlEncode(CUID) + "&dev_pid=" + DEV_PID + "&lm_id="+ LM_ID + "&token=" + token; String contentTypeStr = "audio/" + FORMAT + "; rate=" + RATE; //System.out.println(url2); byte[] content = getFileContent(FILENAME); HttpURLConnection conn = (HttpURLConnection) new URL(url2).openConnection(); conn.setConnectTimeout(5000); conn.setRequestProperty("Content-Type", contentTypeStr); conn.setRequestMethod("POST"); conn.setDoOutput(true); conn.getOutputStream().write(content); conn.getOutputStream().close(); System.out.println("url is " + url2); System.out.println("header is " + "Content-Type :" + contentTypeStr); String result = ConnUtil.getResponseString(conn); return result; } public static String runJsonPostMethod(String token,byte[] bytes) throws DemoException, IOException { // byte[] content = getFileContent(FILENAME); String speech = base64Encode(bytes); JSONObject params = new JSONObject(); params.put("dev_pid", DEV_PID); //params.put("lm_id",LM_ID);//測試自訓練平臺需要開啟註釋 params.put("format", "wav"); params.put("rate", RATE); params.put("token", token); params.put("cuid", CUID); params.put("channel", "1"); params.put("len", bytes.length); params.put("speech", speech); // System.out.println(params.toString()); HttpURLConnection conn = (HttpURLConnection) new URL(URL).openConnection(); conn.setConnectTimeout(5000); conn.setRequestMethod("POST"); conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); conn.setDoOutput(true); conn.getOutputStream().write(params.toString().getBytes()); conn.getOutputStream().close(); String result = ConnUtil.getResponseString(conn); params.put("speech", "base64Encode(getFileContent(FILENAME))"); System.out.println("url is : " + URL); System.out.println("params is :" + params.toString()); return result; } private byte[] getFileContent(String filename) throws DemoException, IOException { File file = new File(filename); if (!file.canRead()) { System.err.println("檔案不存在或者不可讀: " + file.getAbsolutePath()); throw new DemoException("file cannot read: " + file.getAbsolutePath()); } FileInputStream is = null; try { is = new FileInputStream(file); return ConnUtil.getInputStreamContent(is); } finally { if (is != null) { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } private static String base64Encode(byte[] content) { /** Base64.Encoder encoder = Base64.getEncoder(); // JDK 1.8 推薦方法 String str = encoder.encodeToString(content); **/ char[] chars = Base64Util.encode(content); // 1.7 及以下,不推薦,請自行跟換相關庫 String str = new String(chars); return str; } }

呼叫obtainAsrResult 方法即可獲得識別後字串

2.使用ffmpeg 對音訊進行轉碼

  下載ffmpeg 並且將bin目錄設定到環境變數即可

程式碼如下:

@Override
    public String aiAsrTest(MultipartFile file) {
        String r = null;
        try {
            byte[] bytes = file.getBytes();
            // 對上傳檔案進行轉碼處理
            String path = "D:\\bwbd\\temp\\";
            File dir = new File(path);
            if (dir == null || !dir.exists()) {
                dir.mkdirs();
            }
            File file1 = new File(path + "16k1.wav");
            file.transferTo(file1);
            File file2 = new File(path + "16k2.wav");
            try {
                logger.info("========音訊格式轉換======");
                Runtime runtime = Runtime.getRuntime();
                String cutCmd = "ffmpeg -y  -i " + file1 + "  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + file2;
                Process proce = runtime.exec(cutCmd);
                InputStream erro = proce.getErrorStream();
                byte[] a = new byte[1024];
                int j = 0;
                while ((j = erro.read(a)) > -1) {
//                logger.info(new String(a));
                }
            } catch (Exception e) {
                e.printStackTrace();
                logger.info("=========檔案 "+ file + " 正在轉換出現異常");
            }
            byte[] fileByteArray = FileUtil.getFileByteArray(file2);

            String result = AsrUtil.obtainAsrResult(fileByteArray);
            log.info("===ai介面返回:" + result);
            JSONObject jsonObject = JSONObject.parseObject(result);
            Object result1 = jsonObject.get("result");
            if (null != result1) {
                List<String> strings = JSONArray.parseArray(result1.toString(), String.class);
                r = strings.get(0);
                r = new String(r.getBytes(),"utf-8");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return r;
    }

最後 附上vue實現音功能 並上傳到後臺 獲得識別後字串

1.methods域內定義以下方法

initAudio(){
      // this.$nextTick(() => {
        // try {
        //   // <!-- 檢查是否能夠呼叫麥克風 -->
        //   window.AudioContext = window.AudioContext || window.webkitAudioContext;
        //   navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
        //   window.URL = window.URL || window.webkitURL;
        //
        //   audio_context = new AudioContext;
        //   console.log('navigator.getUserMedia ' + (navigator.getUserMedia ? 'available.' : 'not present!'));
        // } catch (e) {
        //   alert('No web audio support in this browser!');
        // }
      var _this = this;
      navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia;
      navigator.getUserMedia({audio: true}, function (stream) {
        let recorder = new HZRecorder(stream);
        _this.recorder = recorder;
        console.log('初始化完成');
      }, function(e) {
        console.log('No live audio input: ' + e);
      });
      // })
    },
      readyOriginal () {
        if (!this.isVoice) {
          // <!-- 開啟錄音 -->
          this.recorder && this.recorder.start();
          this.isVoice = true
        } else {
          this.isVoice = false
          // 結束錄音
          this.recorder && this.recorder.stop();
          setTimeout(()=> {
            // <!-- 錄音上傳 --> https://wx.csbwbd.com/bwbd/fg/aiAsrTest
            var mp3Blob = this.recorder.upload();
            var fd = new FormData();
            fd.append('file', mp3Blob);
            // this.$axios.post('https://api.csbwbd.com/api/webUser/insertUserLog',allJoin).then(
            //   res=>{
            //     if(res.data.data===true){
            //       return
            //     }
            //   }
            // )
            this.$axios.post('http://localhost/bwbd/fg/aiAsrTest',fd).then((res) => {
              // 這裡做登入攔截
              if (res.data.status === 200) {
                console.log('儲存成功');
                console.log(res.data.data)
              } else {
                this.returnmsg = '上傳失敗'
              }
            })
          },1000)
        }
      },

html元素呼叫readyOriginal 方法mounted 域內呼叫initAudio 方法

記得引入js檔案

import { HZRecorder} from '../utils/HZRecorder.js';
function HZRecorder(stream, config) {
  config = config || {};
  config.sampleBits = config.sampleBits || 16;   //取樣數位 8, 16
  config.sampleRate = config.sampleRate || 16000;  //取樣率16khz

  var context = new (window.webkitAudioContext || window.AudioContext)();
  var audioInput = context.createMediaStreamSource(stream);
  var createScript = context.createScriptProcessor || context.createJavaScriptNode;
  var recorder = createScript.apply(context, [4096, 1, 1]);

  var audioData = {
    size: 0     //錄音檔案長度
    , buffer: []   //錄音快取
    , inputSampleRate: context.sampleRate  //輸入取樣率
    , inputSampleBits: 16    //輸入取樣數位 8, 16
    , outputSampleRate: config.sampleRate  //輸出取樣率
    , oututSampleBits: config.sampleBits    //輸出取樣數位 8, 16
    , input: function (data) {
      this.buffer.push(new Float32Array(data));
      this.size += data.length;
    }
    , compress: function () { //合併壓縮
      //合併
      var data = new Float32Array(this.size);
      var offset = 0;
      for (var i = 0; i < this.buffer.length; i++) {
        data.set(this.buffer[i], offset);
        offset += this.buffer[i].length;
      }
      //壓縮
      var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
      var length = data.length / compression;
      var result = new Float32Array(length);
      var index = 0, j = 0;
      while (index < length) {
        result[index] = data[j];
        j += compression;
        index++;
      }
      return result;
    }
    , encodeWAV: function () {
      var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
      var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
      var bytes = this.compress();
      var dataLength = bytes.length * (sampleBits / 8);
      var buffer = new ArrayBuffer(44 + dataLength);
      var data = new DataView(buffer);

      var channelCount = 1;//單聲道
      var offset = 0;

      var writeString = function (str) {
        for (var i = 0; i < str.length; i++) {
          data.setUint8(offset + i, str.charCodeAt(i));
        }
      }

      // 資源交換檔案識別符號
      writeString('RIFF'); offset += 4;
      // 下個地址開始到檔案尾總位元組數,即檔案大小-8
      data.setUint32(offset, 36 + dataLength, true); offset += 4;
      // WAV檔案標誌
      writeString('WAVE'); offset += 4;
      // 波形格式標誌
      writeString('fmt '); offset += 4;
      // 過濾位元組,一般為 0x10 = 16
      data.setUint32(offset, 16, true); offset += 4;
      // 格式類別 (PCM形式取樣資料)
      data.setUint16(offset, 1, true); offset += 2;
      // 通道數
      data.setUint16(offset, channelCount, true); offset += 2;
      // 取樣率,每秒樣本數,表示每個通道的播放速度
      data.setUint32(offset, sampleRate, true); offset += 4;
      // 波形資料傳輸率 (每秒平均位元組數) 單聲道×每秒資料位數×每樣本資料位/8
      data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
      // 快資料調整數 取樣一次佔用位元組數 單聲道×每樣本的資料位數/8
      data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
      // 每樣本資料位數
      data.setUint16(offset, sampleBits, true); offset += 2;
      // 資料識別符號
      writeString('data'); offset += 4;
      // 取樣資料總數,即資料總大小-44
      data.setUint32(offset, dataLength, true); offset += 4;
      // 寫入取樣資料
      if (sampleBits === 8) {
        for (var i = 0; i < bytes.length; i++, offset++) {
          var s = Math.max(-1, Math.min(1, bytes[i]));
          var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
          val = parseInt(255 / (65535 / (val + 32768)));
          data.setInt8(offset, val, true);
        }
      } else {
        for (var i = 0; i < bytes.length; i++, offset += 2) {
          var s = Math.max(-1, Math.min(1, bytes[i]));
          data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
        }
      }

      return new Blob([data], { type: 'audio/wav' });
    }
  };
  //開始錄音
  this.start = function () {
    audioInput.connect(recorder);
    recorder.connect(context.destination);
  }

  //停止
  this.stop = function () {
    recorder.disconnect();
  }

  //獲取音訊檔案
  this.getBlob = function () {
    this.stop();
    return audioData.encodeWAV();
  }

  //回放
  this.play = function (audio) {
    var blob=this.getBlob();
    // saveAs(blob, "F:/3.wav");
    audio.src = window.URL.createObjectURL(this.getBlob());
  }

  //上傳
  this.upload = function () {
    return this.getBlob()
  }

  //音訊採集
  recorder.onaudioprocess = function (e) {
    audioData.input(e.inputBuffer.getChannelData(0));
    //record(e.inputBuffer.getChannelData(0));
  }

  return this;
}

export {
  HZRecorder
}