1. 程式人生 > >基於FFmpeg和OpenSL ES的Android音訊播放器實現

基於FFmpeg和OpenSL ES的Android音訊播放器實現

前言

在以前的博文中,我們通過FFmpeg解碼,並基於OpenGL ES完成了視訊的渲染。本文我們將基於OpenSL ES完成native音訊的注入播放。
OpenSL ES也是The Khronos Group Inc組織制定的一個音訊規範,網上資料很多,在Android SDK程式碼裡還有例子:
native-audio,裡面詳細的實現了OpenSL ES的不同功能,本文不再闡述原理了,基本上是按照固定流程呼叫。
當我們基於FFmpeg解碼時,最合適的方法是採用buffer管理的方式,將音訊PCM資料連續注入解碼記憶體,再由晶片完成PCM的播放。有關取樣率,聲道儲存位寬等音訊相關知識,請自行學習。

實現需求

  1. 只演示音訊解碼和播放,不進行視訊解碼,也不做音視訊同步;
  2. 基於FFmpeg來進行音訊解封裝和解碼,基於OpenSL ES進行PCM注入播放;

實現架構

仍然類似於前面視訊播放的播放流程設計,由於不涉及surface的管理,所以要簡單一些,注意下面幾點:
1. createEngine()和createBufferQueueAudioPlayer()函式放在開啟媒體檔案之後,因為要依賴媒體的音訊屬性(聲道數,取樣率等);
2. 播放器PCM資料的讀取是通過回撥的方式從媒體佇列中讀取並進行解碼,而不是另建一個執行緒主動注入,這點與視訊解碼是不同的。
3. 因為音訊的播放速度是由取樣率決定的,所以音訊的播放速率無需干預,不存在視訊幀播放快慢的問題。
音訊結構圖

程式碼結構

程式碼結構3
audio-jni.cpp提供jni相關函式的實現,原生代碼最終生成libaudio-jni.so庫。
audio.cpp完成基於OpenSL ES的音訊管理。
player.cpp裡實現媒體檔案的開啟播放(基於FFmpeg)。

主要程式碼

下面是audio-jni.cpp的原始碼,這是本專案的核心實現。需要特別注意如下幾點,否則音訊無法播放成功:
1. createBufferQueueAudioPlayer()函式中,SLDataFormat_PCM format_pcm的第二個引數是聲道數,第三個引數是取樣率,FFmpeg和OpenSL ES差1000的倍乘,要注意的是引數channelMask,一定要和聲道數對應,比如,如果是雙聲道,我們需要將兩個聲道通過或關係繫結(SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT),而不能簡單的設定SL_SPEAKER_FRONT_CENTER,否則CreateAudioPlayer會失敗,這是我遇到過的問題。
2. 播放器SetPlayState狀態為SL_PLAYSTATE_PLAYING後,並不會主動呼叫回撥函式來取資料,所以我們需要主動呼叫一次回撥函式(通過fireOnPlayer函式)觸發資料讀取。
3. 回撥函式讀取資料並不會像SDL那樣,告知底層需要讀取的資料長度,所以我們可以做簡單一點,僅讀取一個包並解碼回撥即可,待資料消耗完畢,還會再次呼叫回撥函式讀PCM資料的,效率並不低。

#include <assert.h>
#include <jni.h>
#include <string.h>
#include "com_opensles_ffmpeg_MainActivity.h"
#include "player.h"

// for native audio
#include <SLES/OpenSLES.h>
#include <SLES/OpenSLES_Android.h>

// engine interfaces
static SLObjectItf engineObject = NULL;
static SLEngineItf engineEngine;

// output mix interfaces
static SLObjectItf outputMixObject = NULL;
static SLEnvironmentalReverbItf outputMixEnvironmentalReverb = NULL;

// buffer queue player interfaces
static SLObjectItf bqPlayerObject = NULL;
static SLPlayItf bqPlayerPlay;
static SLAndroidSimpleBufferQueueItf bqPlayerBufferQueue;
static SLEffectSendItf bqPlayerEffectSend;
static SLVolumeItf bqPlayerVolume;
static uint8_t decoded_audio_buf[AVCODEC_MAX_AUDIO_FRAME_SIZE];

// this callback handler is called every time a buffer finishes playing
void bqPlayerCallback(SLAndroidSimpleBufferQueueItf bq, void *context) {
    SLresult result;

    //LOGV2("bqPlayerCallback...");

    if (bq != bqPlayerBufferQueue) {
        LOGV2("bqPlayerCallback : not the same player object.");
        return;
    }

    int decoded_size = audio_decode_frame(decoded_audio_buf,
            sizeof(decoded_audio_buf));
    if (decoded_size > 0) {
        result = (*bqPlayerBufferQueue)->Enqueue(bqPlayerBufferQueue,
                decoded_audio_buf, decoded_size);
        // the most likely other result is SL_RESULT_BUFFER_INSUFFICIENT,
        // which for this code example would indicate a programming error
        if (SL_RESULT_SUCCESS != result) {
            LOGV2("bqPlayerCallback : bqPlayerBufferQueue Enqueue failure.");
        }
    }
}

int createEngine() {

    SLresult result;

    // create engine
    result = slCreateEngine(&engineObject, 0, NULL, 0, NULL, NULL);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("slCreateEngine failure.");
        return -1;
    }

    // realize the engine
    result = (*engineObject)->Realize(engineObject, SL_BOOLEAN_FALSE );
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("engineObject Realize failure.");
        (*engineObject)->Destroy(engineObject);
        engineObject = NULL;
        engineEngine = NULL;
        return -1;
    }

    // get the engine interface, which is needed in order to create other objects
    result = (*engineObject)->GetInterface(engineObject, SL_IID_ENGINE,
            &engineEngine);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("engineObject GetInterface failure.");
        (*engineObject)->Destroy(engineObject);
        engineObject = NULL;
        engineEngine = NULL;
        return -1;
    }

    // create output mix, with environmental reverb specified as a non-required interface
    const SLInterfaceID ids[1] = { SL_IID_ENVIRONMENTALREVERB };
    const SLboolean req[1] = { SL_BOOLEAN_FALSE };
    result = (*engineEngine)->CreateOutputMix(engineEngine, &outputMixObject, 1,
            ids, req);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("engineObject CreateOutputMix failure.");
        (*engineObject)->Destroy(engineObject);
        engineObject = NULL;
        engineEngine = NULL;
        return -1;
    }

    // realize the output mix
    result = (*outputMixObject)->Realize(outputMixObject, SL_BOOLEAN_FALSE );
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("outputMixObject Realize failure.");

        (*outputMixObject)->Destroy(outputMixObject);
        outputMixObject = NULL;
        outputMixEnvironmentalReverb = NULL;
        (*engineObject)->Destroy(engineObject);
        engineObject = NULL;
        engineEngine = NULL;
        return -1;
    }

    // get the environmental reverb interface
    // this could fail if the environmental reverb effect is not available,
    // either because the feature is not present, excessive CPU load, or
    // the required MODIFY_AUDIO_SETTINGS permission was not requested and granted
    result = (*outputMixObject)->GetInterface(outputMixObject,
            SL_IID_ENVIRONMENTALREVERB, &outputMixEnvironmentalReverb);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("outputMixObject Realize failure.");
        (*outputMixObject)->Destroy(outputMixObject);
        outputMixObject = NULL;
        outputMixEnvironmentalReverb = NULL;
        (*engineObject)->Destroy(engineObject);
        engineObject = NULL;
        engineEngine = NULL;
        return -1;
    }

    LOGV2("OpenSL ES createEngine success.");
    return 0;
}

int createBufferQueueAudioPlayer() {
    SLresult result;
    SLuint32 channelMask;

    // configure audio source
    SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {
            SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2 };

    if (global_context.acodec_ctx->channels == 2)
        channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
    else
        channelMask = SL_SPEAKER_FRONT_CENTER;

    SLDataFormat_PCM format_pcm = { SL_DATAFORMAT_PCM,
            global_context.acodec_ctx->channels,
            global_context.acodec_ctx->sample_rate * 1000,
            SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
            channelMask, SL_BYTEORDER_LITTLEENDIAN };

    SLDataSource audioSrc = { &loc_bufq, &format_pcm };

    // configure audio sink
    SLDataLocator_OutputMix loc_outmix = { SL_DATALOCATOR_OUTPUTMIX,
            outputMixObject };
    SLDataSink audioSnk = { &loc_outmix, NULL };

    // create audio player
    const SLInterfaceID ids[3] = { SL_IID_BUFFERQUEUE, SL_IID_EFFECTSEND,
            SL_IID_VOLUME };
    const SLboolean req[3] =
            { SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE };
    result = (*engineEngine)->CreateAudioPlayer(engineEngine, &bqPlayerObject,
            &audioSrc, &audioSnk, 3, ids, req);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("CreateAudioPlayer failure.");
        return -1;
    }

    // realize the player
    result = (*bqPlayerObject)->Realize(bqPlayerObject, SL_BOOLEAN_FALSE );
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject Realize failure.");
        return -1;
    }

    // get the play interface
    result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_PLAY,
            &bqPlayerPlay);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject GetInterface failure.");
        return -1;
    }

    // get the buffer queue interface
    result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_BUFFERQUEUE,
            &bqPlayerBufferQueue);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject GetInterface failure.");
        return -1;
    }

    // register callback on the buffer queue
    result = (*bqPlayerBufferQueue)->RegisterCallback(bqPlayerBufferQueue,
            bqPlayerCallback, NULL);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject RegisterCallback failure.");
        return -1;
    }

    // get the effect send interface
    result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_EFFECTSEND,
            &bqPlayerEffectSend);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject GetInterface SL_IID_EFFECTSEND failure.");
        return -1;
    }

    // get the volume interface
    result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_VOLUME,
            &bqPlayerVolume);
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject GetInterface SL_IID_VOLUME failure.");
        return -1;
    }

    // set the player's state to playing
    result = (*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_PLAYING );
    if (SL_RESULT_SUCCESS != result) {
        LOGV2("bqPlayerObject SetPlayState SL_PLAYSTATE_PLAYING failure.");
        return -1;
    }

    LOGV2("OpenSL ES CreateAudioPlayer success.");

    return 0;
}

void fireOnPlayer() {
    bqPlayerCallback(bqPlayerBufferQueue, NULL);
}

/**
 * Destroys the given object instance.
 *
 * @param object object instance. [IN/OUT]
 */
static void DestroyObject(SLObjectItf& object) {
    if (0 != object)
        (*object)->Destroy(object);

    object = 0;
}

void destroyPlayerAndEngine() {
    // Destroy audio player object
    DestroyObject(bqPlayerObject);

    // Destroy output mix object
    DestroyObject(outputMixObject);

    // Destroy the engine instance
    DestroyObject(engineObject);
}

/*
 * Class:     com_opensles_ffmpeg_MainActivity
 * Method:    startAudioPlayer
 * Signature: ()I
 */JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_startAudioPlayer(
        JNIEnv *, jclass) {
    pthread_t thread;
    pthread_create(&thread, NULL, open_media, NULL);
    return 0;
}

/*
 * Class:     com_opensles_ffmpeg_MainActivity
 * Method:    destroyEngine
 * Signature: ()I
 */JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_destroyEngine(
        JNIEnv *, jclass) {
    destroyPlayerAndEngine();
    return 0;
}

/*
 * Class:     com_opensles_ffmpeg_MainActivity
 * Method:    stopAudioPlayer
 * Signature: ()I
 */JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_stopAudioPlayer(
        JNIEnv *, jclass) {
    (*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_STOPPED );
    global_context.pause = 1;
    global_context.quit = 1;
    usleep(50000);
    return 0;
}

下面是audio.cpp的原始碼,audio_decode_frame()函式通過FFmpeg的avcodec_decode_audio4()函式完成音訊資料解碼,init_filter_graph()函式,av_buffersrc_add_frame()函式,av_buffersink_get_frame()函式,完成解碼後音訊格式的統一轉換,最後都變成AV_SAMPLE_FMT_S16的儲存形式,當然取樣率和聲道數保持原始音訊屬性,這樣做的好處是避免了不同音訊格式解碼出不同PCM儲存型別。

#include "player.h"

#define DECODE_AUDIO_BUFFER_SIZE ((AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) )

static AVFilterContext *in_audio_filter;  // the first filter in the audio chain
static AVFilterContext *out_audio_filter;  // the last filter in the audio chain
static AVFilterGraph *agraph;              // audio filter graph
static struct AudioParams audio_filter_src;

static int init_filter_graph(AVFilterGraph **graph, AVFilterContext **src,
        AVFilterContext **sink) {
    AVFilterGraph *filter_graph;
    AVFilterContext *abuffer_ctx;
    AVFilter *abuffer;
    AVFilterContext *aformat_ctx;
    AVFilter *aformat;
    AVFilterContext *abuffersink_ctx;
    AVFilter *abuffersink;

    char options_str[1024];
    char ch_layout[64];

    int err;

    /* Create a new filtergraph, which will contain all the filters. */
    filter_graph = avfilter_graph_alloc();
    if (!filter_graph) {
        av_log(NULL, AV_LOG_ERROR, "Unable to create filter graph.\n");
        return AVERROR(ENOMEM);
    }

    /* Create the abuffer filter;
     * it will be used for feeding the data into the graph. */
    abuffer = avfilter_get_by_name("abuffer");
    if (!abuffer) {
        av_log(NULL, AV_LOG_ERROR, "Could not find the abuffer filter.\n");
        return AVERROR_FILTER_NOT_FOUND ;
    }

    abuffer_ctx = avfilter_graph_alloc_filter(filter_graph, abuffer, "src");
    if (!abuffer_ctx) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not allocate the abuffer instance.\n");
        return AVERROR(ENOMEM);
    }

    /* Set the filter options through the AVOptions API. */
    av_get_channel_layout_string(ch_layout, sizeof(ch_layout), (int) 0,
            audio_filter_src.channel_layout);
    av_opt_set(abuffer_ctx, "channel_layout", ch_layout,
            AV_OPT_SEARCH_CHILDREN);
    av_opt_set(abuffer_ctx, "sample_fmt",
            av_get_sample_fmt_name(audio_filter_src.fmt),
            AV_OPT_SEARCH_CHILDREN);
    av_opt_set_q(abuffer_ctx, "time_base",
            (AVRational ) { 1, audio_filter_src.freq },
            AV_OPT_SEARCH_CHILDREN);
    av_opt_set_int(abuffer_ctx, "sample_rate", audio_filter_src.freq,
            AV_OPT_SEARCH_CHILDREN);

    /* Now initialize the filter; we pass NULL options, since we have already
     * set all the options above. */
    err = avfilter_init_str(abuffer_ctx, NULL);
    if (err < 0) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not initialize the abuffer filter.\n");
        return err;
    }

    /* Create the aformat filter;
     * it ensures that the output is of the format we want. */
    aformat = avfilter_get_by_name("aformat");
    if (!aformat) {
        av_log(NULL, AV_LOG_ERROR, "Could not find the aformat filter.\n");
        return AVERROR_FILTER_NOT_FOUND ;
    }

    aformat_ctx = avfilter_graph_alloc_filter(filter_graph, aformat, "aformat");
    if (!aformat_ctx) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not allocate the aformat instance.\n");
        return AVERROR(ENOMEM);
    }

    /* A third way of passing the options is in a string of the form
     * key1=value1:key2=value2.... */
    snprintf(options_str, sizeof(options_str),
            "sample_fmts=%s:sample_rates=%d:channel_layouts=0x%x",
            av_get_sample_fmt_name(AV_SAMPLE_FMT_S16), audio_filter_src.freq,
            audio_filter_src.channel_layout);
    err = avfilter_init_str(aformat_ctx, options_str);
    if (err < 0) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not initialize the aformat filter.\n");
        return err;
    }

    /* Finally create the abuffersink filter;
     * it will be used to get the filtered data out of the graph. */
    abuffersink = avfilter_get_by_name("abuffersink");
    if (!abuffersink) {
        av_log(NULL, AV_LOG_ERROR, "Could not find the abuffersink filter.\n");
        return AVERROR_FILTER_NOT_FOUND ;
    }

    abuffersink_ctx = avfilter_graph_alloc_filter(filter_graph, abuffersink,
            "sink");
    if (!abuffersink_ctx) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not allocate the abuffersink instance.\n");
        return AVERROR(ENOMEM);
    }

    /* This filter takes no options. */
    err = avfilter_init_str(abuffersink_ctx, NULL);
    if (err < 0) {
        av_log(NULL, AV_LOG_ERROR,
                "Could not initialize the abuffersink instance.\n");
        return err;
    }

    /* Connect the filters;
     * in this simple case the filters just form a linear chain. */
    err = avfilter_link(abuffer_ctx, 0, aformat_ctx, 0);
    if (err >= 0) {
        err = avfilter_link(aformat_ctx, 0, abuffersink_ctx, 0);
    }

    if (err < 0) {
        av_log(NULL, AV_LOG_ERROR, "Error connecting filters\n");
        return err;
    }

    /* Configure the graph. */
    err = avfilter_graph_config(filter_graph, NULL);
    if (err < 0) {
        av_log(NULL, AV_LOG_ERROR, "Error configuring the filter graph\n");
        return err;
    }

    *graph = filter_graph;
    *src = abuffer_ctx;
    *sink = abuffersink_ctx;

    return 0;
}

static inline int64_t get_valid_channel_layout(int64_t channel_layout,
        int channels) {
    if (channel_layout
            && av_get_channel_layout_nb_channels(channel_layout) == channels) {
        return channel_layout;
    } else {
        return 0;
    }
}

// decode a new packet(not multi-frame)
// return decoded frame size, not decoded packet size
int audio_decode_frame(uint8_t *audio_buf, int buf_size) {
    static AVPacket pkt;
    static uint8_t *audio_pkt_data = NULL;
    static int audio_pkt_size = 0;
    int len1, data_size;
    int got_frame;
    AVFrame * frame = NULL;
    static int reconfigure = 1;
    int ret = -1;

    for (;;) {

        while (audio_pkt_size > 0) {

            if (NULL == frame) {
                frame = av_frame_alloc();
            }

            data_size = buf_size;
            got_frame = 0;

            // len1 is decoded packet size
            len1 = avcodec_decode_audio4(global_context.acodec_ctx, frame,
                    &got_frame, &pkt);
            if (got_frame) {

                if (reconfigure) {

                    reconfigure = 0;
                    int64_t dec_channel_layout = get_valid_channel_layout(
                            frame->channel_layout,
                            av_frame_get_channels(frame));

                    // used by init_filter_graph()
                    audio_filter_src.fmt = (enum AVSampleFormat) frame->format;
                    audio_filter_src.channels = av_frame_get_channels(frame);
                    audio_filter_src.channel_layout = dec_channel_layout;
                    audio_filter_src.freq = frame->sample_rate;

                    init_filter_graph(&agraph, &in_audio_filter,
                            &out_audio_filter);
                }

                if ((ret = av_buffersrc_add_frame(in_audio_filter, frame))
                        < 0) {
                    av_log(NULL, AV_LOG_ERROR,
                            "av_buffersrc_add_frame :  failure. \n");
                    return ret;
                }

                if ((ret = av_buffersink_get_frame(out_audio_filter, frame))
                        < 0) {
                    av_log(NULL, AV_LOG_ERROR,
                            "av_buffersink_get_frame :  failure. \n");
                    continue;
                }

                data_size = av_samples_get_buffer_size(NULL, frame->channels,
                        frame->nb_samples, (enum AVSampleFormat) frame->format,
                        1);

                // len1 is decoded packet size
                // < 0  means failure or error,so break to get a new packet
                if (len1 < 0) {
                    audio_pkt_size = 0;
                    av_log(NULL, AV_LOG_ERROR,
                            "avcodec_decode_audio4 failure. \n");
                    break;
                }

                // decoded data to audio buf
                memcpy(audio_buf, frame->data[0], data_size);

                audio_pkt_data += len1;
                audio_pkt_size -= len1;

                int n = 2 * global_context.acodec_ctx->channels;
                /*audio_clock += (double) data_size
                 / (double) (n * global_context.acodec_ctx->sample_rate); // add bytes offset */
                av_free_packet(&pkt);
                av_frame_free(&frame);

                return data_size;
            } else if (len1 < 0) {
                char errbuf[64];
                av_strerror(ret, errbuf, 64);
                LOGV2("avcodec_decode_audio4 ret < 0, %s", errbuf);
            }
        }

        av_free_packet(&pkt);
        av_frame_free(&frame);

        // get a new packet
        if (packet_queue_get(&global_context.audio_queue, &pkt) < 0) {
            return -1;
        }

        //LOGV2("pkt.size is %d", pkt.size);

        audio_pkt_data = pkt.data;
        audio_pkt_size = pkt.size;
    }

    return ret;
}

GitHub原始碼