iOS:百度長語音識別具體的封裝:識別、播放、進度刷新
阿新 • • 發佈:2018-01-03
stat app span nsdata cst 放音 datawit har resp
一、介紹
以前做過訊飛語音識別,比較簡單,識別率很不錯,但是它的識別時間是有限制的,最多60秒。可是有的時候我們需要更長的識別時間,例如朗誦古詩等功能。當然訊飛語音也是可以通過曲線救國來實現,就是每達到60秒時識別停止就立即重新開啟,每次結束拼接錄音。這麽做,顯然是麻煩的,百度語音解決了這個問題,它最近上線了長語音識別技術,可以不受時間限制,還是非常棒的。這次,我就專門抽成一個工具類使用,包括語音識別,錄音拼接,錄音播放、進度刷新,具體如何集成SDK看官方文檔,我就不廢話了,直接介紹如何使用我的這個工具類。
二、依賴
因為語音錄制格式是pcm格式的,我使用lame靜態庫將其轉成了mp3格式。
下載路徑參考我的博客,有具體的介紹:http://www.cnblogs.com/XYQ-208910/p/7650759.html
三、代碼
SJVoiceTransform.h
#import <Foundation/Foundation.h> @interface SJVoiceTransform : NSObject /** * 根據路徑將pcm文件轉化為MP3 * * @param docPath docment路徑 */ +(NSString *)stransformToMp3ByUrlWithUrl:(NSString *)docPath; @endView Code
SJVoiceTransform.m
#import "SJVoiceTransform.h" #import "lame.h" @interface SJVoiceTransform() //@property (strong , nonatomic)NSString * path;//存放音頻沙河路徑 @end @implementation SJVoiceTransform +(NSString *)stransformToMp3ByUrlWithUrl:(NSString *)docPath { NSString *pathUrl = [NSString stringWithFormat:@"View Code%@",docPath];//存儲錄音pcm格式音頻地址 NSString * mp3Url = pathUrl; NSString *mp3FilePath = [docPath stringByAppendingString:@".mp3"];//存放Mp3地址 if (!mp3Url || !mp3FilePath) { return 0; } @try { unsigned long read, write; FILE *pcm = fopen([mp3Url cStringUsingEncoding:1], "rb"); //source 被轉換的音頻文件位置 //音頻不能為空 if (!pcm) { return nil; } fseek(pcm, 4*1024, SEEK_CUR); //skip file header FILE *mp3 = fopen([mp3FilePath cStringUsingEncoding:1], "wb"); //output 輸出生成的Mp3文件位置 const int PCM_SIZE = 8192; const int MP3_SIZE = 8192; short int pcm_buffer[PCM_SIZE*2]; unsigned char mp3_buffer[MP3_SIZE]; lame_t lame = lame_init(); lame_set_num_channels(lame,1); lame_set_in_samplerate(lame, 8000.0); //11025.0 //lame_set_VBR(lame, vbr_default); lame_set_brate(lame, 8); lame_set_mode(lame, 3); lame_set_quality(lame, 2);// lame_init_params(lame); do { read = fread(pcm_buffer, 2*sizeof(short int), PCM_SIZE, pcm); if (read == 0) write = lame_encode_flush(lame, mp3_buffer, MP3_SIZE); else write = lame_encode_buffer_interleaved(lame, pcm_buffer, read, mp3_buffer, MP3_SIZE); fwrite(mp3_buffer, write, 1, mp3); } while (read != 0); lame_close(lame); fclose(mp3); fclose(pcm); } @catch (NSException *exception) { NSLog(@"%@",[exception description]); } @finally { NSLog(@"MP3生成成功: %@",mp3FilePath); } return mp3FilePath; } @end
BDHelper.h
// // BDHelper.h // BDRecognizer // // Created by 夏遠全 on 2017/11/14. // Copyright ? 2017年 夏遠全. All rights reserved. #import <Foundation/Foundation.h> #import <UIKit/UIKit.h> #import <AudioToolbox/AudioToolbox.h> #import <AVFoundation/AVFoundation.h> @protocol BDHelperDelegate <NSObject> @optional -(void)recognitionPartialResult:(NSString *)recognitionResult; //中間結果 -(void)recognitionFinalResult:(NSString *)recognitionResult; //最終結果 -(void)recognitionError:(NSError *)error; //識別錯誤 -(void)updateProgress:(CGFloat)progress duration:(int)duration;//更新播放進度 -(void)updateReadingTime:(int)readingTime;//更新朗誦時間 -(void)recognitionRecordFinishedPlay;//語音識別錄音播放完成 @end @interface BDHelper : NSObject /** 代理 */ @property (nonatomic, weak) id<BDHelperDelegate> delegate; /** 播放器 */ @property (nonatomic, strong) AVAudioPlayer *audioPlayer; /** 文件路徑 */ @property (nonatomic, copy) NSString *audioFilePath; /** 創建對象 @param voiceFileName 錄音文件名 @return 實例 */ +(BDHelper *)sharedBDHelperWithVoiceFileName:(NSString *)voiceFileName; /** 開始語音識別 */ - (void)startLongSpeechRecognition; /** 結束語音識別 */ - (void)endLongSpeechRecognition; /** 播放識別語音 */ -(void)playListenningRecognition; /** 暫停語音播放 */ -(void)pauseListenningRecognition; /** @param isNeedDeleteFilePath 是否需要移除緩存的音頻文件 銷毀播放器 */ -(void)didRemoveAudioPlayer:(BOOL)isNeedDeleteFilePath; /** 啟動計時器,累計朗誦時間 */ - (void)beginStatisticsReadingTime; /** 銷毀計時器 */ - (void)endStatisticsReadingTime; @endView Code
BDHelper.m
// // BDHelper.m // BDRecognizer // // Created by 夏遠全 on 2017/11/14. // Copyright ? 2017年 夏遠全. All rights reserved. // #import "BDHelper.h" #import "SJVoiceTransform.h" #if !TARGET_IPHONE_SIMULATOR #import "BDSEventManager.h" #import "BDSASRDefines.h" #import "BDSASRParameters.h" /// "請在官網新建應用,配置包名,並在此填寫應用的 api key, secret key, appid(即appcode)" static NSString* const API_KEY = @"BxLweqmGUxxxxxxxxxxxxxx"; static NSString* const SECRET_KEY = @"rhUIXG4gXmxxxxxxxxxxxxxx"; static NSString* const APP_ID = @"81xxxxx"; @interface BDHelper()<BDSClientASRDelegate,AVAudioPlayerDelegate> @property (nonatomic, strong) BDSEventManager *asrEventManager; @property (nonatomic, strong) CADisplayLink *progressLink; @property (nonatomic, strong) NSTimer *readingTimer; @property (nonatomic, strong) NSMutableData *mutabelData; @property (nonatomic, strong) NSFileHandle *fileHandler; @property (nonatomic, copy) NSString *voiceFileName; @end #endif @implementation BDHelper #if !TARGET_IPHONE_SIMULATOR +(BDHelper *)sharedBDHelperWithVoiceFileName:(NSString *)voiceFileName{ BDHelper *helper = [[self alloc] init]; helper.voiceFileName = voiceFileName; [helper setupDefalutValue]; return helper; } -(void)setupDefalutValue{ self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME]; [self configVoiceRecognitionClient]; NSLog(@"current sdk version: %@", [self.asrEventManager libver]); } #pragma mark - public: Method - (void)startLongSpeechRecognition{ //移除播放器 [self didRemoveAudioPlayer:NO]; //設置錄音路徑 if (!_audioFilePath) { [self pcmFilePathConfig]; } //啟動識別服務 [self beginStatisticsReadingTime]; [self.asrEventManager sendCommand:BDS_ASR_CMD_START]; } - (void)endLongSpeechRecognition{ //關閉識別服務 [self endStatisticsReadingTime]; [self.asrEventManager sendCommand:BDS_ASR_CMD_STOP]; [self.fileHandler writeData:self.mutabelData]; self.mutabelData = nil; } -(void)playListenningRecognition{ //避免重復點擊 if (_audioPlayer && _audioPlayer.isPlaying) { return; } //直接播放 if (_audioPlayer && !_audioPlayer.isPlaying) { [_audioPlayer play]; _progressLink.paused = NO; return; } //播放識別語音(pcm格式轉成mp3格式) NSString *mp3Path = [SJVoiceTransform stransformToMp3ByUrlWithUrl:_audioFilePath]; if (!mp3Path) { return; } //初始化播放器 _audioPlayer = [[AVAudioPlayer alloc]initWithContentsOfURL:[NSURL fileURLWithPath:mp3Path] error:NULL]; _audioPlayer.volume = 1; _audioPlayer.delegate = self; [[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayback error:nil]; [[AVAudioSession sharedInstance] setActive:YES error:nil]; [_audioPlayer prepareToPlay]; [_audioPlayer play]; _progressLink = [CADisplayLink displayLinkWithTarget:self selector:@selector(updateProgressValue)]; [_progressLink addToRunLoop:[NSRunLoop currentRunLoop] forMode:NSRunLoopCommonModes]; } -(void)pauseListenningRecognition{ //暫停播放 if (_audioPlayer && _audioPlayer.isPlaying) { [_audioPlayer pause]; _progressLink.paused = YES; } } #pragma mark - event -(void)updateProgressValue{ //更新播放進度 int duration = round(_audioPlayer.duration); if (self.delegate && [self.delegate respondsToSelector:@selector(updateProgress:duration:)]) { [self.delegate updateProgress:_audioPlayer.currentTime/_audioPlayer.duration duration:duration]; } } -(void)startReadingTimer{ //累計朗誦時間 if (self.delegate && [self.delegate respondsToSelector:@selector(updateReadingTime:)]) { [self.delegate updateReadingTime:1]; } } #pragma mark - Private: Configuration - (void)configVoiceRecognitionClient { // ---- 設置DEBUG_LOG的級別 [self.asrEventManager setParameter:@(EVRDebugLogLevelTrace) forKey:BDS_ASR_DEBUG_LOG_LEVEL]; // ---- 配置API_KEY 和 SECRET_KEY 和 APP_ID [self.asrEventManager setParameter:@[API_KEY, SECRET_KEY] forKey:BDS_ASR_API_SECRET_KEYS]; [self.asrEventManager setParameter:APP_ID forKey:BDS_ASR_OFFLINE_APP_CODE]; // ---- 配置端點檢測(二選一) [self configModelVAD]; //[self configDNNMFE]; // ---- 語義與標點 ----- [self enableNLU]; [self enablePunctuation]; // ---- 長語音請務必開啟本地VAD ----- [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LONG_SPEECH]; [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LOCAL_VAD]; // ---- 錄音文件路徑 ----- [self pcmFilePathConfig]; // ---- 設置代理 ----- [self.asrEventManager setDelegate:self]; [self.asrEventManager setParameter:nil forKey:BDS_ASR_AUDIO_FILE_PATH]; [self.asrEventManager setParameter:nil forKey:BDS_ASR_AUDIO_INPUT_STREAM]; } - (void)pcmFilePathConfig{ [self configFileHandler:self.voiceFileName]; _audioFilePath = [self getFilePath:self.voiceFileName]; } - (void)enableNLU { // ---- 開啟語義理解 ----- [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_NLU]; [self.asrEventManager setParameter:@"15361" forKey:BDS_ASR_PRODUCT_ID]; } - (void)enablePunctuation { // ---- 開啟標點輸出 ----- [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_DISABLE_PUNCTUATION]; // ---- 普通話標點 ----- [self.asrEventManager setParameter:@"1537" forKey:BDS_ASR_PRODUCT_ID]; } - (void)configModelVAD { NSString *modelVAD_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"]; [self.asrEventManager setParameter:modelVAD_filepath forKey:BDS_ASR_MODEL_VAD_DAT_FILE]; [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_MODEL_VAD]; } - (void)configDNNMFE { NSString *mfe_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_dnn" ofType:@"dat"]; NSString *cmvn_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_cmvn" ofType:@"dat"]; [self.asrEventManager setParameter:mfe_dnn_filepath forKey:BDS_ASR_MFE_DNN_DAT_FILE]; [self.asrEventManager setParameter:cmvn_dnn_filepath forKey:BDS_ASR_MFE_CMVN_DAT_FILE]; //自定義靜音時長(單位:每幀10ms) //[self.asrEventManager setParameter:@(500) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE]; //[self.asrEventManager setParameter:@(501) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION]; } #pragma mark - MVoiceRecognitionClientDelegate - (void)VoiceRecognitionClientWorkStatus:(int)workStatus obj:(id)aObj { switch (workStatus) { case EVoiceRecognitionClientWorkStatusNewRecordData: { /// 錄音數據回調、NSData-原始音頻數據,此處可以用來存儲錄音 NSData *originData = (NSData *)aObj; [self.mutabelData appendData:originData]; break; } case EVoiceRecognitionClientWorkStatusStartWorkIng: { /// 識別工作開始,開始采集及處理數據 NSDictionary *logDic = [self parseLogToDic:aObj]; NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: start vr, log: %@\n", logDic]); break; } case EVoiceRecognitionClientWorkStatusStart: { /// 檢測到用戶開始說話 NSLog(@"CALLBACK: detect voice start point.\n"); break; } case EVoiceRecognitionClientWorkStatusEnd: { /// 本地聲音采集結束 NSLog(@"CALLBACK: detect voice end point.\n"); break; } case EVoiceRecognitionClientWorkStatusFlushData: { /// 連續上屏、NSDictionary-中間結果 NSString *result = [self getDescriptionForDic:aObj]; NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: partial result -%@.\n\n" ,result]); NSMutableString *recognitionResult = [aObj[@"results_recognition"] firstObject]; if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionPartialResult:)]) { [self.delegate recognitionPartialResult:recognitionResult]; } break; } case EVoiceRecognitionClientWorkStatusFinish: { /// 語音識別功能完成,服務器返回正確結果、NSDictionary-最終識別結果 NSString *result = [self getDescriptionForDic:aObj]; NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: final result - %@.\n\n",result]); NSString *recognitionResult = [aObj[@"results_recognition"] firstObject]; if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionFinalResult:)]) { [self.delegate recognitionFinalResult:recognitionResult]; } break; } case EVoiceRecognitionClientWorkStatusMeterLevel: { /// 當前音量回調、NSNumber:int-當前音量 NSLog(@"-------voice volume:%d-------",[aObj intValue]); break; } case EVoiceRecognitionClientWorkStatusCancel: { /// 用戶主動取消 NSLog(@"CALLBACK: user press cancel.\n"); break; } case EVoiceRecognitionClientWorkStatusError: { /// 發生錯誤 NSError-錯誤信息 NSLog(@"%@", [NSString stringWithFormat:@"CALLBACK: encount error - %@.\n", (NSError *)aObj]); if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionError:)]) { [self.delegate recognitionError:(NSError *)aObj]; } break; } case EVoiceRecognitionClientWorkStatusLoaded: { /// 離線引擎加載完成 NSLog(@"CALLBACK: offline engine loaded.\n"); break; } case EVoiceRecognitionClientWorkStatusUnLoaded: { /// 離線引擎卸載完成 NSLog(@"CALLBACK: offline engine unLoaded.\n"); break; } case EVoiceRecognitionClientWorkStatusChunkThirdData: { /// CHUNK: 識別結果中的第三方數據 NSData NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk 3-party data length: %lu\n", (unsigned long)[(NSData *)aObj length]]); break; } case EVoiceRecognitionClientWorkStatusChunkNlu: { /// CHUNK: 識別結果中的語義結果 NSData NSString *nlu = [[NSString alloc] initWithData:(NSData *)aObj encoding:NSUTF8StringEncoding]; NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk NLU data: %@\n", nlu]); break; } case EVoiceRecognitionClientWorkStatusChunkEnd: { /// CHUNK: 識別過程結束 NSString NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk end, sn: %@.\n", aObj]); break; } case EVoiceRecognitionClientWorkStatusFeedback: { /// Feedback: 識別過程反饋的打點數據 NSString NSDictionary *logDic = [self parseLogToDic:aObj]; NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK Feedback: %@\n", logDic]); break; } case EVoiceRecognitionClientWorkStatusRecorderEnd: { /// 錄音機關閉,頁面跳轉需檢測此時間,規避狀態條 (iOS) NSLog(@"CALLBACK: recorder closed.\n"); break; } case EVoiceRecognitionClientWorkStatusLongSpeechEnd: { /// 長語音結束狀態 NSLog(@"CALLBACK: Long Speech end.\n"); [self endLongSpeechRecognition]; break; } default: break; } } #pragma mark - AVAudioPlayerDelegate -(void)audioPlayerDidFinishPlaying:(AVAudioPlayer *)player successfully:(BOOL)flag{ if (flag) { if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionRecordFinishedPlay)]) { [self.delegate recognitionRecordFinishedPlay]; } } } #pragma mark - public: Method -(void)didRemoveAudioPlayer:(BOOL)isNeedDeleteFilePath{ [_audioPlayer stop]; [_progressLink invalidate]; _audioPlayer = nil; _progressLink = nil; if (isNeedDeleteFilePath) { [[NSFileManager defaultManager] removeItemAtPath:_audioFilePath error:nil]; _audioFilePath = nil; } } - (void)beginStatisticsReadingTime{ [self.readingTimer fire]; } - (void)endStatisticsReadingTime{ if (self.readingTimer.isValid) { [self.readingTimer invalidate]; self.readingTimer = nil; } } #pragma mark - private: Method - (NSDictionary *)parseLogToDic:(NSString *)logString { NSArray *tmp = NULL; NSMutableDictionary *logDic = [[NSMutableDictionary alloc] initWithCapacity:3]; NSArray *items = [logString componentsSeparatedByString:@"&"]; for (NSString *item in items) { tmp = [item componentsSeparatedByString:@"="]; if (tmp.count == 2) { [logDic setObject:tmp.lastObject forKey:tmp.firstObject]; } } return logDic; } - (NSString *)getDescriptionForDic:(NSDictionary *)dic { if (dic) { return [[NSString alloc] initWithData:[NSJSONSerialization dataWithJSONObject:dic options:NSJSONWritingPrettyPrinted error:nil] encoding:NSUTF8StringEncoding]; } return nil; } #pragma mark - Private: File - (NSString *)getFilePath:(NSString *)fileName { NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); if (paths && [paths count]) { return [[paths objectAtIndex:0] stringByAppendingPathComponent:fileName]; } else { return nil; } } - (void)configFileHandler:(NSString *)fileName { self.fileHandler = [self createFileHandleWithName:fileName isAppend:NO]; } - (NSFileHandle *)createFileHandleWithName:(NSString *)aFileName isAppend:(BOOL)isAppend { NSFileHandle *fileHandle = nil; NSString *fileName = [self getFilePath:aFileName]; int fd = -1; if (fileName) { if ([[NSFileManager defaultManager] fileExistsAtPath:fileName]&& !isAppend) { [[NSFileManager defaultManager] removeItemAtPath:fileName error:nil]; } int flags = O_WRONLY | O_APPEND | O_CREAT; fd = open([fileName fileSystemRepresentation], flags, 0644); } if (fd != -1) { fileHandle = [[NSFileHandle alloc] initWithFileDescriptor:fd closeOnDealloc:YES]; } return fileHandle; } #pragma mark - lazy load -(NSMutableData *)mutabelData{ if (!_mutabelData) { _mutabelData = [NSMutableData data]; } return _mutabelData; } -(NSTimer *)readingTimer{ if (!_readingTimer) { _readingTimer = [NSTimer scheduledTimerWithTimeInterval:1.0 target:self selector:@selector(startReadingTimer) userInfo:nil repeats:YES]; [[NSRunLoop currentRunLoop] addTimer:_readingTimer forMode:UITrackingRunLoopMode]; } return _readingTimer; } #endif @endView Code
四、註意
百度語音SDK只支持armv6、armv7的真機架構,不支持x86_64模擬器架構。
五、如何在模擬器下開發
辦法:
1、首先將涉及到百度語音的代碼全部采用宏定義註釋掉,如:
#if !TARGET_IPHONE_SIMULATOR // 語音相關調用 // self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME]; // 其他調用 #endif
2、(重點要處理的地方)切換到模擬時,將libBaiduSpeechSDK.a靜態包從Linked Frameworks and Librarise刪掉(切換到真機時,再將libBaiduSpeechSDK.a導入進來就行)
iOS:百度長語音識別具體的封裝:識別、播放、進度刷新