利用ffmpeg的filter混音

阿新 • • 發佈：2019-02-18

概述

一個簡單的混音demo，把檔案a和檔案b的音訊混為一個音訊流輸出並存為檔案，只處理每個檔案的第一個音訊流
注：原始碼是網友Larry_Liang(1085803139)寫的，我幫其除錯通過，存在這裡以免備忘

流程介紹

流程如下：

開啟輸入1->開啟輸入2->開啟輸出->初始化混音filter->開啟採集執行緒和編碼寫檔案迴圈

執行緒之間資料走向如下：

採集執行緒1採集到資料->解碼->寫入fifo1 |
採集執行緒2採集到資料->解碼->寫入fifo2 | ->主執行緒編碼迴圈讀取2個fifo->把資料壓人filter->從filter中讀取混音後資料->編碼->寫入檔案

程式碼

/*
*一個簡單的混音demo，把檔案a和檔案b的音訊混為一個音訊流輸出並存為檔案，只處理每個檔案的第一個音訊流
*原始碼是網友Larry_Liang(1085803139)寫的，我幫其除錯通過
*MK(821486004@qq.com)
*/


extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/avfiltergraph.h" 

#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
}

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib") 

#pragma comment(lib, "avfilter.lib")

//#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
//#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")

#include <windows.h>
#include <conio.h>
#include <time.h>

enum CaptureState
{
    PREPARED,
    RUNNING,
    STOPPED,
    FINISHED
};

 typedef struct BufferSourceContext {
    const AVClass    *bscclass;
    AVFifoBuffer     *fifo;
    AVRational        time_base;     ///< time_base to set in the output link
    AVRational        frame_rate;    ///< frame_rate to set in the output link
    unsigned          nb_failed_requests;
    unsigned          warning_limit;

    /* video only */
    int               w, h;
    enum AVPixelFormat  pix_fmt;
    AVRational        pixel_aspect;
    char              *sws_param;

    AVBufferRef *hw_frames_ctx;

    /* audio only */
    int sample_rate;
    enum AVSampleFormat sample_fmt;
    int channels;
    uint64_t channel_layout;
    char    *channel_layout_str;

    int got_format_from_params;
    int eof;
 } BufferSourceContext;

AVFormatContext* _fmt_ctx_spk = NULL;
AVFormatContext* _fmt_ctx_mic = NULL;
AVFormatContext* _fmt_ctx_out = NULL;
int _index_spk = -1;
int _index_mic = -1;
int _index_a_out = -1;

AVFilterGraph* _filter_graph = NULL;
AVFilterContext* _filter_ctx_src_spk = NULL;
AVFilterContext* _filter_ctx_src_mic = NULL;
AVFilterContext* _filter_ctx_sink = NULL;

CaptureState _state = CaptureState::PREPARED;

CRITICAL_SECTION _section_spk;
CRITICAL_SECTION _section_mic;
AVAudioFifo* _fifo_spk = NULL;
AVAudioFifo* _fifo_mic = NULL;

void InitRecorder()
{
    av_register_all();
    avdevice_register_all();
    avfilter_register_all();
}

int OpenSpeakerInput(char* inputForamt, char* url)
{
    AVInputFormat* ifmt = av_find_input_format(inputForamt);
    AVDictionary* opt1 = NULL;
    av_dict_set(&opt1, "rtbufsize", "10M", 0);

    int ret = 0;
    ret = avformat_open_input(&_fmt_ctx_spk, url, ifmt, &opt1);
    if (ret < 0)
    {
        printf("Speaker: failed to call avformat_open_input\n");
        return -1;
    }
    ret = avformat_find_stream_info(_fmt_ctx_spk, NULL);
    if (ret < 0)
    {
        printf("Speaker: failed to call avformat_find_stream_info\n");
        return -1;
    }
    for (int i = 0; i < _fmt_ctx_spk->nb_streams; i++)
    {
        if (_fmt_ctx_spk->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            _index_spk = i;
            break;
        }
    }
    if (_index_spk < 0)
    {
        printf("Speaker: negative audio index\n");
        return -1;
    }
    AVCodecContext* codec_ctx = _fmt_ctx_spk->streams[_index_spk]->codec;
    AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
    if (codec == NULL)
    {
        printf("Speaker: null audio decoder\n");
        return -1;
    }
    ret = avcodec_open2(codec_ctx, codec, NULL);
    if (ret < 0)
    {
        printf("Speaker: failed to call avcodec_open2\n");
        return -1;
    }
    av_dump_format(_fmt_ctx_spk, _index_spk, url, 0);

    return 0;
}

int OpenMicrophoneInput(char* inputForamt, char* url)
{
    AVInputFormat* ifmt = av_find_input_format(inputForamt);
    AVDictionary* opt1 = NULL;
    av_dict_set(&opt1, "rtbufsize", "10M", 0);

    int ret = 0;
    ret = avformat_open_input(&_fmt_ctx_mic, url, ifmt, &opt1);
    if (ret < 0)
    {
        printf("Microphone: failed to call avformat_open_input\n");
        return -1;
    }
    ret = avformat_find_stream_info(_fmt_ctx_mic, NULL);
    if (ret < 0)
    {
        printf("Microphone: failed to call avformat_find_stream_info\n");
        return -1;
    }
    for (int i = 0; i < _fmt_ctx_mic->nb_streams; i++)
    {
        if (_fmt_ctx_mic->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            _index_mic = i;
            break;
        }
    }
    if (_index_mic < 0)
    {
        printf("Microphone: negative audio index\n");
        return -1;
    }
    AVCodecContext* codec_ctx = _fmt_ctx_mic->streams[_index_mic]->codec;
    AVCodec* codec = avcodec_find_decoder(codec_ctx->codec_id);
    if (codec == NULL)
    {
        printf("Microphone: null audio decoder\n");
        return -1;
    }
    ret = avcodec_open2(codec_ctx, codec, NULL);
    if (ret < 0)
    {
        printf("Microphone: failed to call avcodec_open2\n");
        return -1;
    }

    av_dump_format(_fmt_ctx_mic, _index_mic, url, 0);

    return 0;
}

int OpenFileOutput(char* fileName)
{
    int ret = 0;
    ret = avformat_alloc_output_context2(&_fmt_ctx_out, NULL, NULL, fileName);
    if (ret < 0)
    {
        printf("Mixer: failed to call avformat_alloc_output_context2\n");
        return -1;
    }
    AVStream* stream_a = NULL;
    stream_a = avformat_new_stream(_fmt_ctx_out, NULL);
    if (stream_a == NULL)
    {
        printf("Mixer: failed to call avformat_new_stream\n");
        return -1;
    }
    _index_a_out = 0;

    stream_a->codec->codec_type = AVMEDIA_TYPE_AUDIO;
    AVCodec* codec_mp3 = avcodec_find_encoder(AV_CODEC_ID_MP3);
    stream_a->codec->codec = codec_mp3;
    stream_a->codec->sample_rate = 44100;
    stream_a->codec->channels = 2;
    stream_a->codec->channel_layout = av_get_default_channel_layout(2);
    stream_a->codec->sample_fmt = codec_mp3->sample_fmts[0];
    stream_a->codec->bit_rate = 320000;
    stream_a->codec->time_base.num = 1;
    stream_a->codec->time_base.den = stream_a->codec->sample_rate;
    stream_a->codec->codec_tag = 0;


    if (_fmt_ctx_out->oformat->flags & AVFMT_GLOBALHEADER)
        stream_a->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;

    if (avcodec_open2(stream_a->codec, stream_a->codec->codec, NULL) < 0)
    {
        printf("Mixer: failed to call avcodec_open2\n");
        return -1; 
    }
    if (!(_fmt_ctx_out->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&_fmt_ctx_out->pb, fileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("Mixer: failed to call avio_open\n");
            return -1;
        }
    }

    if (avformat_write_header(_fmt_ctx_out, NULL) < 0)
    {
        printf("Mixer: failed to call avformat_write_header\n");
        return -1;
    }

    bool b = (!_fmt_ctx_out->streams[0]->time_base.num && _fmt_ctx_out->streams[0]->codec->time_base.num);

    av_dump_format(_fmt_ctx_out, _index_a_out, fileName, 1);

    _fifo_spk = av_audio_fifo_alloc(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt, _fmt_ctx_spk->streams[_index_spk]->codec->channels, 30*_fmt_ctx_spk->streams[_index_spk]->codec->frame_size);
    _fifo_mic = av_audio_fifo_alloc(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt, _fmt_ctx_mic->streams[_index_mic]->codec->channels, 30*_fmt_ctx_mic->streams[_index_spk]->codec->frame_size);

    return 0;
}

int InitFilter(char* filter_desc)
{
    char args_spk[512];
    char* pad_name_spk = "in0";
    char args_mic[512];
    char* pad_name_mic = "in1";

    AVFilter* filter_src_spk = avfilter_get_by_name("abuffer");
    AVFilter* filter_src_mic = avfilter_get_by_name("abuffer");
    AVFilter* filter_sink = avfilter_get_by_name("abuffersink");
    AVFilterInOut* filter_output_spk = avfilter_inout_alloc();
    AVFilterInOut* filter_output_mic = avfilter_inout_alloc();
    AVFilterInOut* filter_input = avfilter_inout_alloc();
    _filter_graph = avfilter_graph_alloc();

    sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", 
        _fmt_ctx_spk->streams[_index_spk]->codec->time_base.num, 
        _fmt_ctx_spk->streams[_index_spk]->codec->time_base.den, 
        _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate, 
        av_get_sample_fmt_name(_fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt), 
        _fmt_ctx_spk->streams[_index_spk]->codec->channel_layout);
    sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", 
        _fmt_ctx_mic->streams[_index_mic]->codec->time_base.num, 
        _fmt_ctx_mic->streams[_index_mic]->codec->time_base.den, 
        _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate, 
        av_get_sample_fmt_name(_fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt), 
        _fmt_ctx_mic->streams[_index_mic]->codec->channel_layout);

    //sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
    //sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);


    int ret = 0;
    ret = avfilter_graph_create_filter(&_filter_ctx_src_spk, filter_src_spk, pad_name_spk, args_spk, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- src spk\n");
        return -1;
    }
    ret = avfilter_graph_create_filter(&_filter_ctx_src_mic, filter_src_mic, pad_name_mic, args_mic, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- src mic\n");
        return -1;
    }

    ret = avfilter_graph_create_filter(&_filter_ctx_sink, filter_sink, "out", NULL, NULL, _filter_graph);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_create_filter -- sink\n");
        return -1;
    }
    AVCodecContext* encodec_ctx = _fmt_ctx_out->streams[_index_a_out]->codec;
    ret = av_opt_set_bin(_filter_ctx_sink, "sample_fmts", (uint8_t*)&encodec_ctx->sample_fmt, sizeof(encodec_ctx->sample_fmt), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- sample_fmts\n");
        return -1;
    }
    ret = av_opt_set_bin(_filter_ctx_sink, "channel_layouts", (uint8_t*)&encodec_ctx->channel_layout, sizeof(encodec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- channel_layouts\n");
        return -1;
    }
    ret = av_opt_set_bin(_filter_ctx_sink, "sample_rates", (uint8_t*)&encodec_ctx->sample_rate, sizeof(encodec_ctx->sample_rate), AV_OPT_SEARCH_CHILDREN);
    if (ret < 0)
    {
        printf("Filter: failed to call av_opt_set_bin -- sample_rates\n");
        return -1;
    }

    filter_output_spk->name = av_strdup(pad_name_spk);
    filter_output_spk->filter_ctx = _filter_ctx_src_spk;
    filter_output_spk->pad_idx = 0;
    filter_output_spk->next = filter_output_mic;

    filter_output_mic->name = av_strdup(pad_name_mic);
    filter_output_mic->filter_ctx = _filter_ctx_src_mic;
    filter_output_mic->pad_idx = 0;
    filter_output_mic->next = NULL;

    filter_input->name = av_strdup("out");
    filter_input->filter_ctx = _filter_ctx_sink;
    filter_input->pad_idx = 0;
    filter_input->next = NULL;

    AVFilterInOut* filter_outputs[2];
    filter_outputs[0] = filter_output_spk;
    filter_outputs[1] = filter_output_mic;

    ret = avfilter_graph_parse_ptr(_filter_graph, filter_desc, &filter_input, filter_outputs, NULL);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_parse_ptr\n");
        return -1;
    }

    ret = avfilter_graph_config(_filter_graph, NULL);
    if (ret < 0)
    {
        printf("Filter: failed to call avfilter_graph_config\n");
        return -1;
    }

    avfilter_inout_free(&filter_input);
    av_free(filter_src_spk);
    av_free(filter_src_mic);
    avfilter_inout_free(filter_outputs);
    //av_free(filter_outputs);

    char* temp = avfilter_graph_dump(_filter_graph, NULL);
    printf("%s\n", temp);

    return 0;
}

DWORD WINAPI SpeakerCapThreadProc(LPVOID lpParam)
{
    AVFrame* pFrame = av_frame_alloc();
    AVPacket packet;
    av_init_packet(&packet);

    int got_sound;

    while (_state == CaptureState::RUNNING)
    {
        packet.data = NULL;
        packet.size = 0;

        if (av_read_frame(_fmt_ctx_spk, &packet) < 0)
        {
            continue;
        }
        if (packet.stream_index == _index_spk)
        {
            if (avcodec_decode_audio4(_fmt_ctx_spk->streams[_index_spk]->codec, pFrame, &got_sound, &packet) < 0)
            {
                break;
            }
            av_free_packet(&packet);

            if (!got_sound)
            {
                continue;
            }

            int fifo_spk_space = av_audio_fifo_space(_fifo_spk);
            while(fifo_spk_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
            {
                Sleep(10);
                printf("_fifo_spk full !\n");
                fifo_spk_space = av_audio_fifo_space(_fifo_spk);
            }

            if (fifo_spk_space >= pFrame->nb_samples)
            {
                EnterCriticalSection(&_section_spk);
                int nWritten = av_audio_fifo_write(_fifo_spk, (void**)pFrame->data, pFrame->nb_samples);
                LeaveCriticalSection(&_section_spk);
            }
        }
    }
    av_frame_free(&pFrame);

    return 0;
}

DWORD WINAPI MicrophoneCapThreadProc(LPVOID lpParam)
{
    AVFrame* pFrame = av_frame_alloc();
    AVPacket packet;
    av_init_packet(&packet);

    int got_sound;

    while (_state == CaptureState::PREPARED)
    {

    }

    while (_state == CaptureState::RUNNING)
    {
        packet.data = NULL;
        packet.size = 0;

        if (av_read_frame(_fmt_ctx_mic, &packet) < 0)
        {
            continue;
        }
        if (packet.stream_index == _index_mic)
        {
            if (avcodec_decode_audio4(_fmt_ctx_mic->streams[_index_mic]->codec, pFrame, &got_sound, &packet) < 0)
            {
                break;
            }
            av_free_packet(&packet);

            if (!got_sound)
            {
                continue;
            }

            int fifo_mic_space = av_audio_fifo_space(_fifo_mic);
            while(fifo_mic_space < pFrame->nb_samples && _state == CaptureState::RUNNING)
            {
                Sleep(10);
                printf("_fifo_mic full !\n");
                fifo_mic_space = av_audio_fifo_space(_fifo_mic);
            }

            if (fifo_mic_space >= pFrame->nb_samples)
            {
                EnterCriticalSection(&_section_mic);
                int temp = av_audio_fifo_space(_fifo_mic);
                int temp2 = pFrame->nb_samples;
                int nWritten = av_audio_fifo_write(_fifo_mic, (void**)pFrame->data, pFrame->nb_samples);
                LeaveCriticalSection(&_section_mic);
            }
        }
    }
    av_frame_free(&pFrame);

    return 0;
}

int main()
{
    int ret = 0;

    InitRecorder();

    char fileName[128];
    char* outFileType = ".mp3";

    time_t rawtime;
    tm* timeInfo;
    time(&rawtime);
    timeInfo = localtime(&rawtime);
    sprintf_s(fileName, sizeof(fileName), "%d_%d_%d_%d_%d_%d%s",
        timeInfo->tm_year + 1900, timeInfo->tm_mon + 1, timeInfo->tm_mday,
        timeInfo->tm_hour, timeInfo->tm_min, timeInfo->tm_sec, outFileType);

    char* filter_desc = "[in0][in1]amix=inputs=2[out]";

    //ret = OpenSpeakerInput("dshow", "audio=virtual-audio-capturer");
    ret = OpenSpeakerInput(NULL, "故鄉.mp3");
    if (ret < 0)
    {
        goto Release;
    }
    //ret = OpenMicrophoneInput("dshow", "audio=External Microphone (Conexant S");
    ret = OpenMicrophoneInput(NULL, "旅行.mp3");
    if (ret < 0)
    {
        goto Release;
    }
    ret = OpenFileOutput(fileName);
    if (ret < 0)
    {
        goto Release;
    }
    ret = InitFilter(filter_desc);
    if (ret < 0)
    {
        goto Release;
    }

    _state = CaptureState::RUNNING;

    InitializeCriticalSection(&_section_spk);
    InitializeCriticalSection(&_section_mic);

    CreateThread(NULL, 0, SpeakerCapThreadProc, 0, 0, NULL);
    CreateThread(NULL, 0, MicrophoneCapThreadProc, 0, 0, NULL);

    int tmpFifoFailed = 0;
    int64_t frame_count = 0;

    while (_state != CaptureState::FINISHED)
    {
        if (_kbhit())
        {
            _state = CaptureState::STOPPED;
            break;
        }
        else
        {
            int ret = 0;
            AVFrame* pFrame_spk = av_frame_alloc();
            AVFrame* pFrame_mic = av_frame_alloc();


            AVPacket packet_out;

            int got_packet_ptr = 0;

            int fifo_spk_size = av_audio_fifo_size(_fifo_spk);
            int fifo_mic_size = av_audio_fifo_size(_fifo_mic);
            int frame_spk_min_size = _fmt_ctx_spk->streams[_index_spk]->codec->frame_size;
            int frame_mic_min_size = _fmt_ctx_mic->streams[_index_mic]->codec->frame_size;
            if (fifo_spk_size >= frame_spk_min_size && fifo_mic_size >= frame_mic_min_size)
            {
                tmpFifoFailed = 0;

                pFrame_spk->nb_samples = frame_spk_min_size;
                pFrame_spk->channel_layout = _fmt_ctx_spk->streams[_index_spk]->codec->channel_layout;
                pFrame_spk->format = _fmt_ctx_spk->streams[_index_spk]->codec->sample_fmt;
                pFrame_spk->sample_rate = _fmt_ctx_spk->streams[_index_spk]->codec->sample_rate;
                av_frame_get_buffer(pFrame_spk, 0);

                pFrame_mic->nb_samples = frame_mic_min_size;
                pFrame_mic->channel_layout = _fmt_ctx_mic->streams[_index_mic]->codec->channel_layout;
                pFrame_mic->format = _fmt_ctx_mic->streams[_index_mic]->codec->sample_fmt;
                pFrame_mic->sample_rate = _fmt_ctx_mic->streams[_index_mic]->codec->sample_rate;
                av_frame_get_buffer(pFrame_mic, 0);

                EnterCriticalSection(&_section_spk);
                ret = av_audio_fifo_read(_fifo_spk, (void**)pFrame_spk->data, frame_spk_min_size);
                LeaveCriticalSection(&_section_spk);

                EnterCriticalSection(&_section_mic);
                ret = av_audio_fifo_read(_fifo_mic, (void**)pFrame_mic->data, frame_mic_min_size);
                LeaveCriticalSection(&_section_mic);

                pFrame_spk->pts = av_frame_get_best_effort_timestamp(pFrame_spk);
                pFrame_mic->pts = av_frame_get_best_effort_timestamp(pFrame_mic);

                BufferSourceContext* s = (BufferSourceContext*)_filter_ctx_src_spk->priv;
                bool b1 = (s->sample_fmt != pFrame_spk->format);
                bool b2 = (s->sample_rate != pFrame_spk->sample_rate);
                bool b3 = (s->channel_layout != pFrame_spk->channel_layout);
                bool b4 = (s->channels != pFrame_spk->channels);

                ret = av_buffersrc_add_frame(_filter_ctx_src_spk, pFrame_spk);
                if (ret < 0)
                {
                    printf("Mixer: failed to call av_buffersrc_add_frame (speaker)\n");
                    break;
                }

                ret = av_buffersrc_add_frame(_filter_ctx_src_mic, pFrame_mic);
                if (ret < 0)
                {
                    printf("Mixer: failed to call av_buffersrc_add_frame (microphone)\n");
                    break;
                }

                while (1)
                {
                    AVFrame* pFrame_out = av_frame_alloc();

                    ret = av_buffersink_get_frame_flags(_filter_ctx_sink, pFrame_out, 0);
                    if (ret < 0)
                    {
                        printf("Mixer: failed to call av_buffersink_get_frame_flags\n");
                        break;
                    }
                    if (pFrame_out->data[0] != NULL)
                    {
                        av_init_packet(&packet_out);
                        packet_out.data = NULL;
                        packet_out.size = 0;

                        ret = avcodec_encode_audio2(_fmt_ctx_out->streams[_index_a_out]->codec, &packet_out, pFrame_out, &got_packet_ptr);
                        if (ret < 0)
                        {
                            printf("Mixer: failed to call avcodec_decode_audio4\n");
                            break;
                        }
                        if (got_packet_ptr)
                        {
                            packet_out.stream_index = _index_a_out;
                            packet_out.pts = frame_count * _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;
                            packet_out.dts = packet_out.pts;
                            packet_out.duration = _fmt_ctx_out->streams[_index_a_out]->codec->frame_size;

                            packet_out.pts = av_rescale_q_rnd(packet_out.pts, 
                                _fmt_ctx_out->streams[_index_a_out]->codec->time_base,
                                _fmt_ctx_out->streams[_index_a_out]->time_base,
                                (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
                            packet_out.dts = packet_out.pts;
                            packet_out.duration = av_rescale_q_rnd(packet_out.duration,
                                _fmt_ctx_out->streams[_index_a_out]->codec->time_base,
                                _fmt_ctx_out->streams[_index_a_out]->time_base,
                                (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));

                            frame_count++;

                            ret = av_interleaved_write_frame(_fmt_ctx_out, &packet_out);
                            if (ret < 0)
                            {
                                printf("Mixer: failed to call av_interleaved_write_frame\n");
                            }
                            printf("Mixer: write frame to file\n");
                        }
                        av_free_packet(&packet_out);                    
                    }
                    av_frame_free(&pFrame_out);
                }
            }
            else
            {
                tmpFifoFailed++;
                Sleep(20);
                if (tmpFifoFailed > 300)
                {
                    _state = CaptureState::STOPPED;
                    Sleep(30);
                    break;
                }
            }
            av_frame_free(&pFrame_spk);
            av_frame_free(&pFrame_mic);
        }
    }

    av_write_trailer(_fmt_ctx_out);

Release:
    av_audio_fifo_free(_fifo_spk);
    av_audio_fifo_free(_fifo_mic);

    avfilter_free(_filter_ctx_src_spk);
    avfilter_free(_filter_ctx_src_mic);
    avfilter_free(_filter_ctx_sink);

    avfilter_graph_free(&_filter_graph);


    if (_fmt_ctx_out)
    {
        avio_close(_fmt_ctx_out->pb);
    }

    avformat_close_input(&_fmt_ctx_spk);
    avformat_close_input(&_fmt_ctx_mic);
    avformat_free_context(_fmt_ctx_out);

    return ret;
}

下載地址

利用ffmpeg的filter混音

概述程式碼下載地址概述一個簡單的混音demo，把檔案a和檔案b的音訊混為一個音訊流輸出並存為檔案，只處理每個檔案的第一個音訊流注：原始碼是網友Larry_Liang(1085803139)寫的，我幫其除錯通過，存在這裡以免備忘

在遊戲中何為音效和混音的區別！

記得看過一本書，書上說聲音是電影的50%.電影的視覺效果搭配上恰當的聲音就能夠營造出一部精彩的電影，醞釀最純的情緒，使故事飽滿而又深刻，那更何況如今火熱的遊戲。自然也少不了音效的功勞，可以說其發展前景不容小噓。據統計資料

Python3+疊加兩個音訊檔案，實現混音

將兩個單聲道的音訊檔案疊加成一個新的音訊檔案。實現：a + b = c(新) 同理，如果用 c - b 可以得到檔案a 同理，也可以將多個單聲道音訊檔案疊加到一起。注意：兩個檔案要是相同聲道的檔案，這裡的程式碼只適用1聲道的。兩個音訊檔案的rate也要是相同的。當兩個音訊檔案的

Python3+疊加兩個音頻文件，實現混音

_array int dir sam close ram sample array 疊加將兩個單聲道的音頻文件疊加成一個新的音頻文件。實現：a + b = c(新) 同理，如果用 c - b 可以得到文件a 同理，也可以將多個單聲道音頻文件疊加到一起。註意：兩

ffmpeg混音命令

ffmpeg -i INPUT1 -i INPUT2 -i INPUT3 -filter_complex amix=inputs=3:duration=first:dropout_transition=3 OUTPUT inputs The number of inputs. If u

PortAudio採集和播放音訊，實現一個雙路混音器

混音，顧名思義，就是把多個音源混合的過程，是一個很常見的應用。這兩天我也做了一個雙路混音器，當然，我沒有做多麼專業的音訊訊號處理，只是一個簡單的混音，調節各路音量，並實現了一些音效處理。主要功能有：採集硬體裝置，讀取wav檔案，播放，混音，音量調節，音訊節奏、音調的調節，wa

android 音訊編解碼混音 mp3編碼解碼 pcm編碼解碼

特別提示：這裡所提供的原始碼真實有效，並且只是出售原始碼，不提供專案整合服務。如果覺得效果是自己想要的歡迎使用。（感謝各位慷慨資助，謝謝）音訊編解碼，錄製鈴音，增加混音效果，目前只支援編解碼MP3格式檔案。將音訊檔案放到SDCard/RecordMixMp3/music下

實時音視訊直播新玩法中的混音技術

作者｜冼牛編輯｜覃雲最近半年，視訊直播領域中產生不少創新玩法，其中包括 K 歌直播和合唱直播。這些創新玩法都用到實時音訊混音技術。今天我們來聊一下混音技術的實現，及其在創新玩法中的應用。混音的應用場景混音，顧名思義，就是把兩路或者多路音訊流混合在一

FFMPEG調節音訊的音量大小，混音

找了很久，國內網站沒有，最後還是用google到ffmpeg論壇，找到了。 http://ffmpeg.gusari.org/viewtopic.php?f=25&t=517 防止論壇倒閉，在這裡備份下： This thread is OUTDATED

[RK3288][Android6.0] Audio中的混音過程小結

Platform: Rockchip OS: Android 6.0 Kernel: 3.10.92 bool AudioFlinger::PlaybackThread::threadLoop

一個新型的混音演算法

針對傳統經典的線性混音，路數多時音量變小的缺點；自創了一個新的混音演算法，解決該問題，聲音不會忽大忽小，不會溢位，而且該方法還能一定程度抑制噪聲，突出人聲，能實時計算量小，專利已經受理。對於混音方法，網上和文獻上流傳許多方法。 1.平均權重 2.隨幅值變化的權

使用OpenAL混音，新增音訊特效

1. OpenAL 支援的混音型別 const static int AUDIO_EFFECT_GENERIC = 0; const static int AUDIO_EFFECT_PADDED_CELL = 1;//軟墊室

Android視訊編輯器（五）音訊編解碼、從視訊中分離音訊、音訊混音、音訊音量調節等

/** * 歸一化混音 * */ public static byte[] normalizationMix(byte[][] allAudioBytes){ if (allAudioBytes == null || allAudioBytes.length

Python和C|C++的混編（二）：利用Cython進行混編

cde uil 有時當前 class def 將在 python 混編還能夠使用Cython來實現混編 1 下載Cython。用python setup.py install進行安裝 2 一個實例 ① 創建helloworld文件夾創建hellowor

利用NSAttributedString實現圖文混排

nes -s union ref and 代理方法 tag erl efault UILabel 和 UITextView 都能添加 NSAttributedString 屬性字符串,通過這一點,可以實現帶有屬性的文字和文字內包含圖片的文本內容展示. 效果如下: 1-

arm linux利用alsa驅動並使用usb音頻設備

lin 鏈接 fat32 arm開發板系列準備工作壓縮情況註意事項一.背景: arm linux的內核版本是3.13.0 二.準備工作添加alsa驅動到內核中,也就是在編譯內核的時候加入以下選項: 接下來就重新編譯內核即可三.交叉編譯alsa-lib和al

利用說話人嵌入實現混響環境下遠距離語音的魯棒說話人識別

Robust Speaker Recognition from Distant Speech under Real Reverberant Environments Using Speaker Embeddings 3.演講者識別系統在本節中，我們將描述為我們的實驗

利用css進行圖文混排

<!doctype html> <html> <head> <meta charset="utf-8"> <title>圖片及文字的混排</title> <style stype="text/css"

unity3d中利用AVPro video播放mkv視訊切換音軌，多音軌切換，實現ktv開啟或關閉原唱功能

unity3d利用外掛AVPro Video播放mkv格式視訊、切換音軌，實現ktv點歌伴唱功能（貌似只支援Win10和Android）一、播放mkv檔案首先將高版本(低版本好像不支援mkv)外掛A

iOS 上利用 fallback 機制為不同語言的文字 (script) 設定字型，從而使得文字混排更為優雅

如果您是一位 WEB 開發者，相信您對 CSS 的 font-family 屬性一定不會陌生。通常我們會為 font-family 屬性設定一長串的字型(家族)列表，就像這樣的： .text { font-family: Menlo, Monaco, Consolas, "Courier

利用ffmpeg的filter混音

概述

流程介紹

流程如下：

執行緒之間資料走向如下：

程式碼

下載地址

相關推薦