基於FFmpeg和OpenSL ES的Android音訊播放器實現
前言
在以前的博文中,我們通過FFmpeg解碼,並基於OpenGL ES完成了視訊的渲染。本文我們將基於OpenSL ES完成native音訊的注入播放。
OpenSL ES也是The Khronos Group Inc組織制定的一個音訊規範,網上資料很多,在Android SDK程式碼裡還有例子:
native-audio,裡面詳細的實現了OpenSL ES的不同功能,本文不再闡述原理了,基本上是按照固定流程呼叫。
當我們基於FFmpeg解碼時,最合適的方法是採用buffer管理的方式,將音訊PCM資料連續注入解碼記憶體,再由晶片完成PCM的播放。有關取樣率,聲道儲存位寬等音訊相關知識,請自行學習。
實現需求
- 只演示音訊解碼和播放,不進行視訊解碼,也不做音視訊同步;
- 基於FFmpeg來進行音訊解封裝和解碼,基於OpenSL ES進行PCM注入播放;
實現架構
仍然類似於前面視訊播放的播放流程設計,由於不涉及surface的管理,所以要簡單一些,注意下面幾點:
1. createEngine()和createBufferQueueAudioPlayer()函式放在開啟媒體檔案之後,因為要依賴媒體的音訊屬性(聲道數,取樣率等);
2. 播放器PCM資料的讀取是通過回撥的方式從媒體佇列中讀取並進行解碼,而不是另建一個執行緒主動注入,這點與視訊解碼是不同的。
3. 因為音訊的播放速度是由取樣率決定的,所以音訊的播放速率無需干預,不存在視訊幀播放快慢的問題。
程式碼結構
audio-jni.cpp提供jni相關函式的實現,原生代碼最終生成libaudio-jni.so庫。
audio.cpp完成基於OpenSL ES的音訊管理。
player.cpp裡實現媒體檔案的開啟播放(基於FFmpeg)。
主要程式碼
下面是audio-jni.cpp的原始碼,這是本專案的核心實現。需要特別注意如下幾點,否則音訊無法播放成功:
1. createBufferQueueAudioPlayer()函式中,SLDataFormat_PCM format_pcm的第二個引數是聲道數,第三個引數是取樣率,FFmpeg和OpenSL ES差1000的倍乘,要注意的是引數channelMask,一定要和聲道數對應,比如,如果是雙聲道,我們需要將兩個聲道通過或關係繫結(SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT),而不能簡單的設定SL_SPEAKER_FRONT_CENTER,否則CreateAudioPlayer會失敗,這是我遇到過的問題。
2. 播放器SetPlayState狀態為SL_PLAYSTATE_PLAYING後,並不會主動呼叫回撥函式來取資料,所以我們需要主動呼叫一次回撥函式(通過fireOnPlayer函式)觸發資料讀取。
3. 回撥函式讀取資料並不會像SDL那樣,告知底層需要讀取的資料長度,所以我們可以做簡單一點,僅讀取一個包並解碼回撥即可,待資料消耗完畢,還會再次呼叫回撥函式讀PCM資料的,效率並不低。
#include <assert.h>
#include <jni.h>
#include <string.h>
#include "com_opensles_ffmpeg_MainActivity.h"
#include "player.h"
// for native audio
#include <SLES/OpenSLES.h>
#include <SLES/OpenSLES_Android.h>
// engine interfaces
static SLObjectItf engineObject = NULL;
static SLEngineItf engineEngine;
// output mix interfaces
static SLObjectItf outputMixObject = NULL;
static SLEnvironmentalReverbItf outputMixEnvironmentalReverb = NULL;
// buffer queue player interfaces
static SLObjectItf bqPlayerObject = NULL;
static SLPlayItf bqPlayerPlay;
static SLAndroidSimpleBufferQueueItf bqPlayerBufferQueue;
static SLEffectSendItf bqPlayerEffectSend;
static SLVolumeItf bqPlayerVolume;
static uint8_t decoded_audio_buf[AVCODEC_MAX_AUDIO_FRAME_SIZE];
// this callback handler is called every time a buffer finishes playing
void bqPlayerCallback(SLAndroidSimpleBufferQueueItf bq, void *context) {
SLresult result;
//LOGV2("bqPlayerCallback...");
if (bq != bqPlayerBufferQueue) {
LOGV2("bqPlayerCallback : not the same player object.");
return;
}
int decoded_size = audio_decode_frame(decoded_audio_buf,
sizeof(decoded_audio_buf));
if (decoded_size > 0) {
result = (*bqPlayerBufferQueue)->Enqueue(bqPlayerBufferQueue,
decoded_audio_buf, decoded_size);
// the most likely other result is SL_RESULT_BUFFER_INSUFFICIENT,
// which for this code example would indicate a programming error
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerCallback : bqPlayerBufferQueue Enqueue failure.");
}
}
}
int createEngine() {
SLresult result;
// create engine
result = slCreateEngine(&engineObject, 0, NULL, 0, NULL, NULL);
if (SL_RESULT_SUCCESS != result) {
LOGV2("slCreateEngine failure.");
return -1;
}
// realize the engine
result = (*engineObject)->Realize(engineObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject Realize failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// get the engine interface, which is needed in order to create other objects
result = (*engineObject)->GetInterface(engineObject, SL_IID_ENGINE,
&engineEngine);
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject GetInterface failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// create output mix, with environmental reverb specified as a non-required interface
const SLInterfaceID ids[1] = { SL_IID_ENVIRONMENTALREVERB };
const SLboolean req[1] = { SL_BOOLEAN_FALSE };
result = (*engineEngine)->CreateOutputMix(engineEngine, &outputMixObject, 1,
ids, req);
if (SL_RESULT_SUCCESS != result) {
LOGV2("engineObject CreateOutputMix failure.");
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// realize the output mix
result = (*outputMixObject)->Realize(outputMixObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("outputMixObject Realize failure.");
(*outputMixObject)->Destroy(outputMixObject);
outputMixObject = NULL;
outputMixEnvironmentalReverb = NULL;
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
// get the environmental reverb interface
// this could fail if the environmental reverb effect is not available,
// either because the feature is not present, excessive CPU load, or
// the required MODIFY_AUDIO_SETTINGS permission was not requested and granted
result = (*outputMixObject)->GetInterface(outputMixObject,
SL_IID_ENVIRONMENTALREVERB, &outputMixEnvironmentalReverb);
if (SL_RESULT_SUCCESS != result) {
LOGV2("outputMixObject Realize failure.");
(*outputMixObject)->Destroy(outputMixObject);
outputMixObject = NULL;
outputMixEnvironmentalReverb = NULL;
(*engineObject)->Destroy(engineObject);
engineObject = NULL;
engineEngine = NULL;
return -1;
}
LOGV2("OpenSL ES createEngine success.");
return 0;
}
int createBufferQueueAudioPlayer() {
SLresult result;
SLuint32 channelMask;
// configure audio source
SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {
SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2 };
if (global_context.acodec_ctx->channels == 2)
channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
else
channelMask = SL_SPEAKER_FRONT_CENTER;
SLDataFormat_PCM format_pcm = { SL_DATAFORMAT_PCM,
global_context.acodec_ctx->channels,
global_context.acodec_ctx->sample_rate * 1000,
SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
channelMask, SL_BYTEORDER_LITTLEENDIAN };
SLDataSource audioSrc = { &loc_bufq, &format_pcm };
// configure audio sink
SLDataLocator_OutputMix loc_outmix = { SL_DATALOCATOR_OUTPUTMIX,
outputMixObject };
SLDataSink audioSnk = { &loc_outmix, NULL };
// create audio player
const SLInterfaceID ids[3] = { SL_IID_BUFFERQUEUE, SL_IID_EFFECTSEND,
SL_IID_VOLUME };
const SLboolean req[3] =
{ SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE };
result = (*engineEngine)->CreateAudioPlayer(engineEngine, &bqPlayerObject,
&audioSrc, &audioSnk, 3, ids, req);
if (SL_RESULT_SUCCESS != result) {
LOGV2("CreateAudioPlayer failure.");
return -1;
}
// realize the player
result = (*bqPlayerObject)->Realize(bqPlayerObject, SL_BOOLEAN_FALSE );
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject Realize failure.");
return -1;
}
// get the play interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_PLAY,
&bqPlayerPlay);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface failure.");
return -1;
}
// get the buffer queue interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_BUFFERQUEUE,
&bqPlayerBufferQueue);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface failure.");
return -1;
}
// register callback on the buffer queue
result = (*bqPlayerBufferQueue)->RegisterCallback(bqPlayerBufferQueue,
bqPlayerCallback, NULL);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject RegisterCallback failure.");
return -1;
}
// get the effect send interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_EFFECTSEND,
&bqPlayerEffectSend);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface SL_IID_EFFECTSEND failure.");
return -1;
}
// get the volume interface
result = (*bqPlayerObject)->GetInterface(bqPlayerObject, SL_IID_VOLUME,
&bqPlayerVolume);
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject GetInterface SL_IID_VOLUME failure.");
return -1;
}
// set the player's state to playing
result = (*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_PLAYING );
if (SL_RESULT_SUCCESS != result) {
LOGV2("bqPlayerObject SetPlayState SL_PLAYSTATE_PLAYING failure.");
return -1;
}
LOGV2("OpenSL ES CreateAudioPlayer success.");
return 0;
}
void fireOnPlayer() {
bqPlayerCallback(bqPlayerBufferQueue, NULL);
}
/**
* Destroys the given object instance.
*
* @param object object instance. [IN/OUT]
*/
static void DestroyObject(SLObjectItf& object) {
if (0 != object)
(*object)->Destroy(object);
object = 0;
}
void destroyPlayerAndEngine() {
// Destroy audio player object
DestroyObject(bqPlayerObject);
// Destroy output mix object
DestroyObject(outputMixObject);
// Destroy the engine instance
DestroyObject(engineObject);
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: startAudioPlayer
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_startAudioPlayer(
JNIEnv *, jclass) {
pthread_t thread;
pthread_create(&thread, NULL, open_media, NULL);
return 0;
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: destroyEngine
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_destroyEngine(
JNIEnv *, jclass) {
destroyPlayerAndEngine();
return 0;
}
/*
* Class: com_opensles_ffmpeg_MainActivity
* Method: stopAudioPlayer
* Signature: ()I
*/JNIEXPORT jint JNICALL Java_com_opensles_ffmpeg_MainActivity_stopAudioPlayer(
JNIEnv *, jclass) {
(*bqPlayerPlay)->SetPlayState(bqPlayerPlay, SL_PLAYSTATE_STOPPED );
global_context.pause = 1;
global_context.quit = 1;
usleep(50000);
return 0;
}
下面是audio.cpp的原始碼,audio_decode_frame()函式通過FFmpeg的avcodec_decode_audio4()函式完成音訊資料解碼,init_filter_graph()函式,av_buffersrc_add_frame()函式,av_buffersink_get_frame()函式,完成解碼後音訊格式的統一轉換,最後都變成AV_SAMPLE_FMT_S16的儲存形式,當然取樣率和聲道數保持原始音訊屬性,這樣做的好處是避免了不同音訊格式解碼出不同PCM儲存型別。
#include "player.h"
#define DECODE_AUDIO_BUFFER_SIZE ((AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) )
static AVFilterContext *in_audio_filter; // the first filter in the audio chain
static AVFilterContext *out_audio_filter; // the last filter in the audio chain
static AVFilterGraph *agraph; // audio filter graph
static struct AudioParams audio_filter_src;
static int init_filter_graph(AVFilterGraph **graph, AVFilterContext **src,
AVFilterContext **sink) {
AVFilterGraph *filter_graph;
AVFilterContext *abuffer_ctx;
AVFilter *abuffer;
AVFilterContext *aformat_ctx;
AVFilter *aformat;
AVFilterContext *abuffersink_ctx;
AVFilter *abuffersink;
char options_str[1024];
char ch_layout[64];
int err;
/* Create a new filtergraph, which will contain all the filters. */
filter_graph = avfilter_graph_alloc();
if (!filter_graph) {
av_log(NULL, AV_LOG_ERROR, "Unable to create filter graph.\n");
return AVERROR(ENOMEM);
}
/* Create the abuffer filter;
* it will be used for feeding the data into the graph. */
abuffer = avfilter_get_by_name("abuffer");
if (!abuffer) {
av_log(NULL, AV_LOG_ERROR, "Could not find the abuffer filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
abuffer_ctx = avfilter_graph_alloc_filter(filter_graph, abuffer, "src");
if (!abuffer_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the abuffer instance.\n");
return AVERROR(ENOMEM);
}
/* Set the filter options through the AVOptions API. */
av_get_channel_layout_string(ch_layout, sizeof(ch_layout), (int) 0,
audio_filter_src.channel_layout);
av_opt_set(abuffer_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN);
av_opt_set(abuffer_ctx, "sample_fmt",
av_get_sample_fmt_name(audio_filter_src.fmt),
AV_OPT_SEARCH_CHILDREN);
av_opt_set_q(abuffer_ctx, "time_base",
(AVRational ) { 1, audio_filter_src.freq },
AV_OPT_SEARCH_CHILDREN);
av_opt_set_int(abuffer_ctx, "sample_rate", audio_filter_src.freq,
AV_OPT_SEARCH_CHILDREN);
/* Now initialize the filter; we pass NULL options, since we have already
* set all the options above. */
err = avfilter_init_str(abuffer_ctx, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the abuffer filter.\n");
return err;
}
/* Create the aformat filter;
* it ensures that the output is of the format we want. */
aformat = avfilter_get_by_name("aformat");
if (!aformat) {
av_log(NULL, AV_LOG_ERROR, "Could not find the aformat filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
aformat_ctx = avfilter_graph_alloc_filter(filter_graph, aformat, "aformat");
if (!aformat_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the aformat instance.\n");
return AVERROR(ENOMEM);
}
/* A third way of passing the options is in a string of the form
* key1=value1:key2=value2.... */
snprintf(options_str, sizeof(options_str),
"sample_fmts=%s:sample_rates=%d:channel_layouts=0x%x",
av_get_sample_fmt_name(AV_SAMPLE_FMT_S16), audio_filter_src.freq,
audio_filter_src.channel_layout);
err = avfilter_init_str(aformat_ctx, options_str);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the aformat filter.\n");
return err;
}
/* Finally create the abuffersink filter;
* it will be used to get the filtered data out of the graph. */
abuffersink = avfilter_get_by_name("abuffersink");
if (!abuffersink) {
av_log(NULL, AV_LOG_ERROR, "Could not find the abuffersink filter.\n");
return AVERROR_FILTER_NOT_FOUND ;
}
abuffersink_ctx = avfilter_graph_alloc_filter(filter_graph, abuffersink,
"sink");
if (!abuffersink_ctx) {
av_log(NULL, AV_LOG_ERROR,
"Could not allocate the abuffersink instance.\n");
return AVERROR(ENOMEM);
}
/* This filter takes no options. */
err = avfilter_init_str(abuffersink_ctx, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR,
"Could not initialize the abuffersink instance.\n");
return err;
}
/* Connect the filters;
* in this simple case the filters just form a linear chain. */
err = avfilter_link(abuffer_ctx, 0, aformat_ctx, 0);
if (err >= 0) {
err = avfilter_link(aformat_ctx, 0, abuffersink_ctx, 0);
}
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "Error connecting filters\n");
return err;
}
/* Configure the graph. */
err = avfilter_graph_config(filter_graph, NULL);
if (err < 0) {
av_log(NULL, AV_LOG_ERROR, "Error configuring the filter graph\n");
return err;
}
*graph = filter_graph;
*src = abuffer_ctx;
*sink = abuffersink_ctx;
return 0;
}
static inline int64_t get_valid_channel_layout(int64_t channel_layout,
int channels) {
if (channel_layout
&& av_get_channel_layout_nb_channels(channel_layout) == channels) {
return channel_layout;
} else {
return 0;
}
}
// decode a new packet(not multi-frame)
// return decoded frame size, not decoded packet size
int audio_decode_frame(uint8_t *audio_buf, int buf_size) {
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
int got_frame;
AVFrame * frame = NULL;
static int reconfigure = 1;
int ret = -1;
for (;;) {
while (audio_pkt_size > 0) {
if (NULL == frame) {
frame = av_frame_alloc();
}
data_size = buf_size;
got_frame = 0;
// len1 is decoded packet size
len1 = avcodec_decode_audio4(global_context.acodec_ctx, frame,
&got_frame, &pkt);
if (got_frame) {
if (reconfigure) {
reconfigure = 0;
int64_t dec_channel_layout = get_valid_channel_layout(
frame->channel_layout,
av_frame_get_channels(frame));
// used by init_filter_graph()
audio_filter_src.fmt = (enum AVSampleFormat) frame->format;
audio_filter_src.channels = av_frame_get_channels(frame);
audio_filter_src.channel_layout = dec_channel_layout;
audio_filter_src.freq = frame->sample_rate;
init_filter_graph(&agraph, &in_audio_filter,
&out_audio_filter);
}
if ((ret = av_buffersrc_add_frame(in_audio_filter, frame))
< 0) {
av_log(NULL, AV_LOG_ERROR,
"av_buffersrc_add_frame : failure. \n");
return ret;
}
if ((ret = av_buffersink_get_frame(out_audio_filter, frame))
< 0) {
av_log(NULL, AV_LOG_ERROR,
"av_buffersink_get_frame : failure. \n");
continue;
}
data_size = av_samples_get_buffer_size(NULL, frame->channels,
frame->nb_samples, (enum AVSampleFormat) frame->format,
1);
// len1 is decoded packet size
// < 0 means failure or error,so break to get a new packet
if (len1 < 0) {
audio_pkt_size = 0;
av_log(NULL, AV_LOG_ERROR,
"avcodec_decode_audio4 failure. \n");
break;
}
// decoded data to audio buf
memcpy(audio_buf, frame->data[0], data_size);
audio_pkt_data += len1;
audio_pkt_size -= len1;
int n = 2 * global_context.acodec_ctx->channels;
/*audio_clock += (double) data_size
/ (double) (n * global_context.acodec_ctx->sample_rate); // add bytes offset */
av_free_packet(&pkt);
av_frame_free(&frame);
return data_size;
} else if (len1 < 0) {
char errbuf[64];
av_strerror(ret, errbuf, 64);
LOGV2("avcodec_decode_audio4 ret < 0, %s", errbuf);
}
}
av_free_packet(&pkt);
av_frame_free(&frame);
// get a new packet
if (packet_queue_get(&global_context.audio_queue, &pkt) < 0) {
return -1;
}
//LOGV2("pkt.size is %d", pkt.size);
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
return ret;
}