Speex 之回聲消除
SpeexEchoState* m_pState;
SpeexPreprocessState* m_pPreprocessorState;
int m_nFrameSize;
int m_nFilterLen;
int m_nSampleRate;
float* m_pfNoise;
};
#endif
//fine name:speexEC.cpp
#include "SpeexEC.h"
CSpeexEC::CSpeexEC()
{
m_bHasInit = false;
m_pState = NULL;
m_pPreprocessorState = NULL;
m_nFrameSize = 160;
m_nFilterLen = 160*8;
m_nSampleRate = 8000;
m_pfNoise = NULL;
}
CSpeexEC::~CSpeexEC()
{
Reset();
}
void CSpeexEC::Init(int frame_size, int filter_length, int sampling_rate)
{
Reset();
if (frame_size<=0 || filter_length<=0 || sampling_rate<=0)
{
m_nFrameSize =160;
m_nFilterLen = 160*8;
m_nSampleRate = 8000;
}
else
{
m_nFrameSize =frame_size;
m_nFilterLen = filter_length;
m_nSampleRate = sampling_rate;
}
m_pState = speex_echo_state_init(m_nFrameSize, m_nFilterLen);
m_pPreprocessorState = speex_preprocess_state_init(m_nFrameSize, m_nSampleRate);
m_pfNoise = new float[m_nFrameSize+1];
m_bHasInit = true;
}
void CSpeexEC::Reset()
{
if (m_pState != NULL)
{
speex_echo_state_destroy(m_pState);
m_pState = NULL;
}
if (m_pPreprocessorState != NULL)
{
speex_preprocess_state_destroy(m_pPreprocessorState);
m_pPreprocessorState = NULL;
}
if (m_pfNoise != NULL)
{
delete []m_pfNoise;
m_pfNoise = NULL;
}
m_bHasInit = false;
}
void CSpeexEC:DoAEC(short* mic, short* ref, short* out)
{
if (!m_bHasInit)
return;
speex_echo_cancel(m_pState, mic, ref, out, m_pfNoise);
speex_preprocess(m_pPreprocessorState, (__int16 *)out, m_pfNoise);
}
可以看出,這個回聲消除器類很簡單,只要初始化一下就可以呼叫了。但是,要注意的是,傳給回聲消除器的兩個聲音訊號,必須同步得非常的好,就是說,在B端,接收到A說的話以後,要把這些話音資料傳給回聲消除器做參考,然後再傳給音效卡,音效卡再放出來,這有一段延時,這時,B再採集,然後傳給回聲消除器,與那個參考資料比較,從採集到的資料中把頻域和參考資料相同的部分消除掉。如果傳給消除器的兩個訊號同步得不好,即兩個訊號找不到頻域相同的部分,就沒有辦法進行消除了。
測試程式:
#define NN 160
void main()
{
FILE* ref_fd, *mic_fd, *out_fd;
short ref[NN], mic[NN], out[NN];
ref_fd = fopen ("ref.pcm", "rb"); //開啟參考檔案,即要消除的聲音
mic_fd = fopen ("mic.pcm", "rb");//開啟mic採集到的聲音檔案,包含回聲在裡面
out_fd = fopen ("echo.pcm", "wb");//消除了回聲以後的檔案
CSpeexEC ec;
ec.Init();
while (fread(mic, 1, NN*2, mic_fd))
{
fread(ref, 1, NN*2, ref_fd);
ec.DoAEC(mic, ref, out);
fwrite(out, 1, NN*2, out_fd);
}
fclose(ref_fd);
fclose(mic_fd);
fclose(out_fd);
}
以上的程式是用檔案來模擬回聲和MIC,但在實時流中是大不一樣的,在一般的VOIP軟體中,接收對方的聲音並傳到音效卡中播放是在一個執行緒中進行的,而採集本地的聲音並傳送到對方又是在另一個執行緒中進行的,而聲學回聲消除器在對採集到的聲音進行回聲消除的同時,還需要播放執行緒中的資料作為參考,而要同步這兩個執行緒中的資料是非常困難的,因為稍稍有些不同步,聲學回聲消除器中的自適應濾波器就會發散,不但消除不了回聲,還會破壞原始採集到的聲音,使被破壞的聲音難以分辨。我做過好多嘗試,始終無法用軟體來實現對這兩個執行緒中的資料進行同步,導致實現失敗,希望有經驗的網友們一起分享一下這方面的經驗。
示例程式碼:
Sample code
This section shows sample code for encoding and decoding speech using the Speex API. The commands can be used to encode and decode a file by calling:
% sampleenc in_file.sw | sampledec out_file.sw
where both files are raw (no header) files encoded at 16 bits per sample (in the machine natural endianness).
sampleenc.c
sampleenc takes a raw 16 bits/sample file, encodes it and outputs a Speex stream to stdout. Note that the packing used is NOT compatible with that of speexenc/speexdec.
#include <speex/speex.h>
#include <stdio.h>
#define FRAME_SIZE 160
int main(int argc, char **argv)
{
char *inFile;
FILE *fin;
short in[FRAME_SIZE];
float input[FRAME_SIZE];
char cbits[200];
int nbBytes;
void *state;
SpeexBits bits;
int i, tmp;
state = speex_encoder_init(&speex_nb_mode);
tmp=8;
speex_encoder_ctl(state, SPEEX_SET_QUALITY, &tmp);
inFile = argv[1];
fin = fopen(inFile, "r");
speex_bits_init(&bits);
while (1)
{
fread(in, sizeof(short), FRAME_SIZE, fin);
if (feof(fin))
break;
for (i=0;i<FRAME_SIZE;i++)
input[i]=in[i];
speex_bits_reset(&bits);
speex_encode(state, input, &bits);
nbBytes = speex_bits_write(&bits, cbits, 200);
fwrite(&nbBytes, sizeof(int), 1, stdout);
fwrite(cbits, 1, nbBytes, stdout);
}
speex_encoder_destroy(state);
speex_bits_destroy(&bits);
fclose(fin);
return 0;
}
sampledec.c
sampledec reads a Speex stream from stdin, decodes it and outputs it to a raw 16 bits/sample file. Note that the packing used is NOT compatible with that of speexenc/speexdec.
#include <speex/speex.h>
#include <stdio.h>
#define FRAME_SIZE 160
int main(int argc, char **argv)
{
char *outFile;
FILE *fout;
short out[FRAME_SIZE];
float output[FRAME_SIZE];
char cbits[200];
int nbBytes;
void *state;
SpeexBits bits;
int i, tmp;
state = speex_decoder_init(&speex_nb_mode);
tmp=1;
speex_decoder_ctl(state, SPEEX_SET_ENH, &tmp);
outFile = argv[1];
fout = fopen(outFile, "w");
speex_bits_init(&bits);
while (1)
{
fread(&nbBytes, sizeof(int), 1, stdin);
fprintf (stderr, "nbBytes: %d\n", nbBytes);
if (feof(stdin))
break;
fread(cbits, 1, nbBytes, stdin);
speex_bits_read_from(&bits, cbits, nbBytes);
speex_decode(state, &bits, output);
for (i=0;i<FRAME_SIZE;i++)
out[i]=output[i];
fwrite(out, sizeof(short), FRAME_SIZE, fout);
}
speex_decoder_destroy(state);
speex_bits_destroy(&bits);
fclose(fout);
return 0;
}
開源 H323 協議中封裝的使用參考程式碼:
#include <ptlib.h>
#ifdef __GNUC__
#pragma implementation "speexcodec.h"
#endif
#include "speexcodec.h"
#include "h323caps.h"
#include "h245.h"
#include "rtp.h"
extern "C" {
#include "speex/libspeex/speex.h"
};
#define new PNEW
#define XIPH_COUNTRY_CODE 0xB5 // (181) Country code for United States
#define XIPH_T35EXTENSION 0
#define XIPH_MANUFACTURER_CODE 0x0026 // Allocated by Delta Inc
#define EQUIVALENCE_COUNTRY_CODE 9 // Country code for Australia
#define EQUIVALENCE_T35EXTENSION 0
#define EQUIVALENCE_MANUFACTURER_CODE 61 // Allocated by Australian Communications Authority, Oct 2000
#define SAMPLES_PER_FRAME 160
#define SPEEX_BASE_NAME "Speex"
#define SPEEX_NARROW2_H323_NAME SPEEX_BASE_NAME "Narrow-5.95k{sw}"
#define SPEEX_NARROW3_H323_NAME SPEEX_BASE_NAME "Narrow-8k{sw}"
#define SPEEX_NARROW4_H323_NAME SPEEX_BASE_NAME "Narrow-11k{sw}"
#define SPEEX_NARROW5_H323_NAME SPEEX_BASE_NAME "Narrow-15k{sw}"
#define SPEEX_NARROW6_H323_NAME SPEEX_BASE_NAME "Narrow-18.2k{sw}"
H323_REGISTER_CAPABILITY(SpeexNarrow2AudioCapability, SPEEX_NARROW2_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow3AudioCapability, SPEEX_NARROW3_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow4AudioCapability, SPEEX_NARROW4_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow5AudioCapability, SPEEX_NARROW5_H323_NAME);
H323_REGISTER_CAPABILITY(SpeexNarrow6AudioCapability, SPEEX_NARROW6_H323_NAME);
#define XIPH_SPEEX_NARROW2_H323_NAME SPEEX_BASE_NAME "Narrow-5.95k(Xiph){sw}"
#define XIPH_SPEEX_NARROW3_H323_NAME SPEEX_BASE_NAME "Narrow-8k(Xiph){sw}"
#define XIPH_SPEEX_NARROW4_H323_NAME SPEEX_BASE_NAME "Narrow-11k(Xiph){sw}"
#define XIPH_SPEEX_NARROW5_H323_NAME SPEEX_BASE_NAME "Narrow-15k(Xiph){sw}"
#define XIPH_SPEEX_NARROW6_H323_NAME SPEEX_BASE_NAME "Narrow-18.2k(Xiph){sw}"
H323_REGISTER_CAPABILITY(XiphSpeexNarrow2AudioCapability, XIPH_SPEEX_NARROW2_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow3AudioCapability, XIPH_SPEEX_NARROW3_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow4AudioCapability, XIPH_SPEEX_NARROW4_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow5AudioCapability, XIPH_SPEEX_NARROW5_H323_NAME);
H323_REGISTER_CAPABILITY(XiphSpeexNarrow6AudioCapability, XIPH_SPEEX_NARROW6_H323_NAME);
/////////////////////////////////////////////////////////////////////////
static int Speex_Bits_Per_Second(int mode) {
void *tmp_coder_state;
int bitrate;
tmp_coder_state = speex_encoder_init(&speex_nb_mode);
speex_encoder_ctl(tmp_coder_state, SPEEX_SET_QUALITY, &mode);
speex_encoder_ctl(tmp_coder_state, SPEEX_GET_BITRATE, &bitrate);
speex_encoder_destroy(tmp_coder_state);
return bitrate;
}
static int Speex_Bytes_Per_Frame(int mode) {
int bits_per_frame = Speex_Bits_Per_Second(mode) / 50; // (20ms frame size)
return ((bits_per_frame+7)/8); // round up
}
OpalMediaFormat const OpalSpeexNarrow_5k95(OPAL_SPEEX_NARROW_5k95,
OpalMediaFormat::DefaultAudioSessionID,