FFmpeg音訊編解碼處理

新版的ffmpeg對音訊編碼處理已經有了很大的變化，記錄在此，做個備忘。
早期ffmpeg編碼音訊，輸入資料一般都是S16格式，解碼輸出一般也是S16，也就是說PCM資料是儲存在連續的buffer中，對一個雙聲道（左右）音訊來說，儲存格式可能就為
LRLRLR.........（左聲道在前還是右聲道在前沒有認真研究過）。所以以往編碼部分的程式碼基本形如：

int sample_bytes = av_get_bytes_per_sample(pCodecCtx->sample_fmt);

int frame_bytes = pCodecCtx->frame_size * sample_bytes * pCodecCtx->channels;

   // AVFifoBuffer* fifo;    存放pcm資料
    while(av_fifo_size(fifo) >= frame_bytes) {
        av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

        AVPacket pkt = {0};
        av_init_packet(&pkt);
        pkt.data = encodeBuf;
        pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
        int got_packet = 0;

        audioframe->nb_samples = pCodecCtx->frame_size;
        int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                  audioframe->nb_samples,
                                                  pCodecCtx->sample_fmt, 0);
        avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,

                inputBuf, samples_size, 0);
        audioframe->pts = audio_sync_opts;
        audio_sync_opts = audioframe->pts + audioframe->nb_samples;

        avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);
        if (got_packet ) {
            //處理pkt，封裝儲存、流輸出或交由上層應用
        }

    }
專案中需要對音視訊流進行轉碼輸出，音訊處理部分一般是先解碼（得到PCM S16資料），再交由編碼（MP3、AAC）

ffmpeg升級到2.1後（具體哪個版本開始的沒去查，可能早幾個版本就已經這樣做了），音訊格式增加了plane概念（呃，不是灰機，是平面）
enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

// 以下都是帶平面格式
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};
這就有點像視訊部分的YUV資料，有的帶P，有的是不帶P的，同樣對雙聲道音訊PCM資料，以S16P為例，儲存就可能是
plane 0: LLLLLLLLLLLLLLLLLLLLLLLLLL...
plane 1: RRRRRRRRRRRRRRRRRRRRRRRRRR...
而不再是以前的連續buffer。
如mp3編碼就明確規定了只使用平面格式的資料
AVCodec ff_libmp3lame_encoder = {
.....
    .capabilities          = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
    .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
                                                             AV_SAMPLE_FMT_FLTP,
                                                             AV_SAMPLE_FMT_S16P,
                                                             AV_SAMPLE_FMT_NONE },
....
};
而AAC編碼依舊使用 AV_SAMPLE_FMT_S16格式
也就說，音訊編碼不能再像以前那樣簡單的處理，統一輸入S16資料，而要根據具體的codec轉化為其支援的格式，否則無論是編碼還是解碼輸出的聲音會莫名其妙，幸好，轉換工作不用自己做，ffmpeg提供了相應的API，swr_convert（類似以前的audio_resample，只是audio_resample目前已不再推薦使用，因為swr_convert更強大）
基於此，對音訊編碼部分做了相應修改，主要用的資料結構為 struct SwrContext* m_SwrCtx;
step 1：判斷是否需要進行convert，初始化階段
if (pCodecCtx->channels != pInputCtx->channels
|| pCodecCtx->sample_rate != pInputCtx->sample_rate
|| pCodecCtx->sample_fmt != pInputCtx->sample_fmt)
{
u::Log::write(get_log_file(), "Audio need resample!");
if ( NULL == m_SwrCtx ) {
m_SwrCtx = swr_alloc();
}
#if LIBSWRESAMPLE_VERSION_MINOR >= 17 // 根據版本不同，選用適當函式
av_opt_set_int(m_SwrCtx, "ich", pInputCtx->channels, 0);
av_opt_set_int(m_SwrCtx, "och", pCodecCtx->channels, 0);
av_opt_set_int(m_SwrCtx, "in_sample_rate",  pInputCtx->sample_rate, 0);
av_opt_set_int(m_SwrCtx, "out_sample_rate",  pCodecCtx->sample_rate, 0);
av_opt_set_sample_fmt(m_SwrCtx, "in_sample_fmt", pInputCtx->sample_fmt, 0);
av_opt_set_sample_fmt(m_SwrCtx, "out_sample_fmt", pCodecCtx->sample_fmt, 0);

#else
m_SwrCtx = swr_alloc_set_opts(m_SwrCtx,
pInputCtx->channel_layout, AV_SAMPLE_FMT_S16, pInputCtx->sample_rate,
pInputCtx->channel_layout, pInputCtx->sample_fmt, pInputCtx->sample_rate,
0, NULL);
#endif
swr_init(m_SwrCtx);
if (av_sample_fmt_is_planar(pCodecCtx->sample_fmt)) {
//如果是分平面資料，為每一聲道分配一個fifo，單獨儲存各平面資料
for (int i = 0; i < pCodecCtx->channels; i++){
m_fifo[i] = av_fifo_alloc(BUF_SIZE_20K);
}
} else {
//不分平面，所有的資料只要一個fifo就夠了，其實用不用fifo完全看個人了，只是我覺得方便些
fifo = av_fifo_alloc(BUF_SIZE_20K);
}

}

step 2：進行轉換
//以下程式碼部分抄自ffmpeg自帶的例子
if (m_SwrCtx != NULL) {
if ( !m_audioOut ) {
ret = av_samples_alloc_array_and_samples(&m_audioOut,
&dst_samples_linesize, pCodecCtx->channels, max_dst_nb_samples, pCodecCtx->sample_fmt, 0);
if (ret < 0){
av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate destination samples\n", __FILE__, __LINE__, __FUNCTION__);
return -1;
}
}

dst_nb_samples = av_rescale_rnd(swr_get_delay(m_SwrCtx, pCodecCtx->sample_rate) + src_nb_samples,
pCodecCtx->sample_rate, pCodecCtx->sample_rate, AV_ROUND_UP);
if (dst_nb_samples > max_dst_nb_samples) {
av_free(m_audioOut[0]);
ret = av_samples_alloc(m_audioOut, &dst_samples_linesize, pCodecCtx->channels, dst_nb_samples, pCodecCtx->sample_fmt, 0);
if (ret < 0){
av_log(NULL, AV_LOG_WARNING, "[%s.%d %s() Could not allocate samples Buffer\n", __FILE__, __LINE__, __FUNCTION__);
return -1;
}
max_dst_nb_samples = dst_nb_samples;
}

//輸入也可能是分平面的，所以要做如下處理
uint8_t* m_ain[SWR_CH_MAX];
setup_array(m_ain, (uint8_t*)input_buf, data->ctx.sample_fmt, src_nb_samples);

len = swr_convert(m_SwrCtx, m_audioOut, dst_nb_samples, (const uint8_t**)m_ain, src_nb_samples);

if (len < 0) {
char errmsg[BUF_SIZE_1K];
av_strerror(len, errmsg, sizeof(errmsg));
av_log(NULL, AV_LOG_WARNING, "[%s:%d] swr_convert!(%d)(%s)", __FILE__, __LINE__, len, errmsg);
return -1;
}

paudiobuf = m_audioOut[0];
decode_size = len * pCodecCtx->channels * av_get_bytes_per_sample(pCodecCtx->sample_fmt);

} else {
paudiobuf = (uint8_t*)input_buf;
decode_size = input_size;
}

//儲存PCM資料，注意：m_SwrCtx即使進行了轉換，也要判斷轉換後的資料是否分平面
if (m_SwrCtx && av_sample_fmt_is_planar(pCodecCtx->sample_fmt) ) {
for (int i = 0; i < pCodecCtx->channels; i++){
if (av_fifo_realloc2(m_fifo[i], av_fifo_size(m_fifo[i]) + len*av_get_bytes_per_sample(pCodecCtx->sample_fmt)) < 0){
av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
return -1;
}
av_fifo_generic_write(m_fifo[i], m_audioOut[0]+i*dst_samples_linesize, len*av_get_bytes_per_sample(pCodecCtx->sample_fmt), NULL);
}
} else {
if (av_fifo_realloc2(fifo, av_fifo_size(fifo) + decode_size) < 0) {
av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
return -1;
}
av_fifo_generic_write(fifo, paudiobuf, decode_size, NULL);
}

setup_array函式摘自ffmpeg例程
static void setup_array(uint8_t* out[SWR_CH_MAX], uint8_t* in, int format, int samples){
if (av_sample_fmt_is_planar((AVSampleFormat)format)) {
int i;
int plane_size = av_get_bytes_per_sample((AVSampleFormat)(format & 0xFF)) * samples;
format &= 0xFF;
for (i = 0; i < SWR_CH_MAX; i++) {
out[i] = in + i*plane_size;
}
} else {
out[0] = in;
}
}

step 3：進行編碼
//編碼格式要求是分平面資料
if (m_SwrCtx && ( av_sample_fmt_is_planar(pCodecCtx->sample_fmt) )) {
  //這裡為簡單示例，只判斷第一個聲道（因為左右聲道資料大小是一致的），實際應用中應考慮每個聲道具體情況
while(av_fifo_size(m_fifo[0]) >= pCodecCtx->frame_size * sample_bytes){
for (int i = 0; i < pCodecCtx->channels; i++) {
  //inputBuf是一塊連續記憶體
av_fifo_generic_read(m_fifo[i], inputBuf+i*pCodecCtx->frame_size * sample_bytes, pCodecCtx->frame_size * sample_bytes, NULL);
}
AVPacket pkt = {0};
av_init_packet(&pkt);
pkt.data = encodeBuf;
pkt.size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
int got_packet = 0;

audioframe->nb_samples = pCodecCtx->frame_size;
int samples_size = av_samples_get_buffer_size(NULL, pCodecCtx->channels,
                                                  audioframe->nb_samples,
                                                  pCodecCtx->sample_fmt, 0);
avcodec_fill_audio_frame(audioframe, pCodecCtx->channels, pCodecCtx->sample_fmt,
inputBuf, samples_size, 0);

int ret = avcodec_encode_audio2(pCodecCtx, &pkt, audioframe, &got_packet);

if (got_packet ) {
//處理pkt
}

}

} else {
//不分平面
while(av_fifo_size(fifo) >= frame_bytes) {
av_fifo_generic_read(fifo, inputBuf, frame_bytes, NULL);

AVPacket pkt = {0};
av_init_packet(&pkt);
pkt.data = encodeBuf;

FFmpeg音訊編解碼處理

FFmpeg音訊編解碼處理

[總結]FFMPEG視音訊編解碼零基礎學習方法

音訊編解碼基礎(wav/aac/pcma/pcmu)

視訊編解碼的理論和實踐2：Ffmpeg視訊編解碼

即時通訊音視訊開發（六）：如何開始音訊編解碼技術的學習

[總結]視音訊編解碼技術零基礎學習方法

FPGA設計標準I2S協議音訊編解碼器

G711(PCM/PCMA/PCMU),G721,G723,G729音訊編解碼

g.729a 音訊編解碼演算法

音訊編解碼標準G.711與G.729

FPGA音訊編解碼驅動及I2C寫入程式碼

音訊編解碼總結

音訊編解碼faac

android MediaCodec 音訊編解碼的實現

android 音訊編解碼混音 mp3編碼解碼 pcm編碼解碼

視音訊編解碼技術零基礎學習方法

視音訊編解碼學習工程：H.264分析器

實現單晶片2400bps 音訊編解碼方案

ffmpeg查詢編解碼器decoder，encoder的兩個介面

音訊編解碼speex庫的使用方法

FFmpeg音訊編解碼處理

相關推薦