ffmpeg 音频转码

Posted 2020-07-26 王纲

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了ffmpeg 音频转码相关的知识，希望对你有一定的参考价值。

大多数厂家摄像机输出的音频流格式都是PCM,有一些场合（比如讲音视频流保存成Ts流）需要将PCM格式转成AAC格式。基本的思路是先解码得到音频帧，再将音频帧编码成AAC格式。编码和解码之间需要添加一个filter。filter起到适配的作用。

首先解码：

        AVFrame * decode(AVPacket* sample)
        {    
            int gotframe = 0;
            AVFrame* frame = av_frame_alloc();
            AVFrame *filt_frame = nullptr;
            auto length = avcodec_decode_audio4(decoderContext, frame, &gotframe, sample);
            frame->pts = frame->pkt_pts;
            if(length >= 0 && gotframe != 0)
            {
                if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_PUSH) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Error while feeding the audio filtergraph\n");
                    av_frame_free(&frame);
                    return nullptr;
                }
                frame->pts = AV_NOPTS_VALUE;

                /* pull filtered audio from the filtergraph */
                filt_frame = av_frame_alloc();
                while (1) {
                    int ret = av_buffersink_get_frame_flags(buffersink_ctx, filt_frame, AV_BUFFERSINK_FLAG_NO_REQUEST);
                    if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                        break;
                    if(ret < 0)
                    {
                        av_frame_free(&frame);
                        av_frame_free(&filt_frame);
                        return nullptr;
                    }

                    int64_t frame_pts = AV_NOPTS_VALUE;
                    if (filt_frame->pts != AV_NOPTS_VALUE) {
                        startTime = (startTime == AV_NOPTS_VALUE) ? 0 : startTime;
                        AVRational av_time_base_q;
                        av_time_base_q.num = 1;
                        av_time_base_q.den = AV_TIME_BASE;
                        filt_frame->pts = frame_pts = 
                            av_rescale_q(filt_frame->pts, buffersink_ctx->inputs[0]->time_base,    encoderContext->time_base) 
                            - av_rescale_q(startTime, av_time_base_q,    encoderContext->time_base);
                    }
                    av_frame_free(&frame);
                    return filt_frame;
                }
            }
            av_frame_free(&filt_frame);
            av_frame_free(&frame);
            return  nullptr;
        }

　　decode 得到AVFrame 也即音频帧，这个frame是不能做为编码的源要经过filter,原因之一是有些摄像机输出的音频包每个packet是320个字节，AAC每个Packet是1024个字节。

初始化Filter：

        int initFilters()
        {
                char args[512];
                int ret;
                AVFilter *abuffersrc  = avfilter_get_by_name("abuffer");
                AVFilter *abuffersink = avfilter_get_by_name("abuffersink");
                AVFilterInOut *outputs = avfilter_inout_alloc();
                AVFilterInOut *inputs  = avfilter_inout_alloc();
                static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE };
                static const int64_t out_channel_layouts[] = { AV_CH_LAYOUT_MONO, -1 };
                static const int out_sample_rates[] = {decoderContext->sample_rate , -1 };
                AVRational time_base = input->time_base;
                filter_graph = avfilter_graph_alloc();

                /* buffer audio source: the decoded frames from the decoder will be inserted here. */

                if (!decoderContext->channel_layout)
                    decoderContext->channel_layout = av_get_default_channel_layout(decoderContext->channels);
                sprintf_s(args, sizeof(args),
                    "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
                    time_base.num, time_base.den, decoderContext->sample_rate,
                    av_get_sample_fmt_name(decoderContext->sample_fmt), decoderContext->channel_layout);
                ret = avfilter_graph_create_filter(&buffersrc_ctx, abuffersrc, "in",
                    args, NULL, filter_graph);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n");
                    return ret;
                }

                /* buffer audio sink: to terminate the filter chain. */
                ret = avfilter_graph_create_filter(&buffersink_ctx, abuffersink, "out",
                    NULL, NULL, filter_graph);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n");
                    return ret;
                }

                ret = av_opt_set_int_list(buffersink_ctx, "sample_fmts", out_sample_fmts, -1,
                    AV_OPT_SEARCH_CHILDREN);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n");
                    return ret;
                }

                ret = av_opt_set_int_list(buffersink_ctx, "channel_layouts", out_channel_layouts, -1,
                    AV_OPT_SEARCH_CHILDREN);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n");
                    return ret;
                }

                ret = av_opt_set_int_list(buffersink_ctx, "sample_rates", out_sample_rates, -1,
                    AV_OPT_SEARCH_CHILDREN);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n");
                    return ret;
                }

                /* Endpoints for the filter graph. */
                outputs->name       = av_strdup("in");
                outputs->filter_ctx = buffersrc_ctx;
                outputs->pad_idx    = 0;
                outputs->next       = NULL;

                inputs->name       = av_strdup("out");
                inputs->filter_ctx = buffersink_ctx;
                inputs->pad_idx    = 0;
                inputs->next       = NULL;

                if ((ret = avfilter_graph_parse_ptr(filter_graph, "anull",
                    &inputs, &outputs, nullptr)) < 0)
                    return ret;

                if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
                    return ret;

                av_buffersink_set_frame_size(buffersink_ctx, 1024);
            return 0;
        }

Filter可以简理解为FIFO（当然实际上不是）输入是解码后的AVFrame,输出是编码的源头。AVFrame 经过Filter以后就可以编码了。

        shared_ptr<AVPacket> encode(AVFrame * frame)
        {
            int gotpacket = 0;
            shared_ptr<AVPacket> packet((AVPacket*)av_malloc(sizeof(AVPacket)), [&](AVPacket *p){av_free_packet(p);av_freep(&p);});
            auto pkt = packet.get();
            av_init_packet(pkt);
            pkt->data = nullptr;
            pkt->size = 0;
            frame->nb_samples = encoderContext->frame_size;
            frame->format = encoderContext->sample_fmt;
            frame->channel_layout = encoderContext->channel_layout;
            int hr = avcodec_encode_audio2(encoderContext.get(), pkt, frame, &gotpacket);
            av_frame_free(&frame);
            if(gotpacket)
            {
                if (pkt->pts != AV_NOPTS_VALUE)
                    pkt->pts      = av_rescale_q(pkt->pts,      encoderContext->time_base, output->time_base);
                if (pkt->dts != AV_NOPTS_VALUE)
                    pkt->dts      = av_rescale_q(pkt->dts,      encoderContext->time_base,output->time_base);
                if (pkt->duration > 0)
                    pkt->duration = int(av_rescale_q(pkt->duration, encoderContext->time_base, output->time_base));
                return packet;
            }
            return nullptr;            
        }

　　实际运用中我们用到了智能指针shared_ptr<AVPacket>，也可以不用。但是要注意内存泄露问题。如果程序运行在多核上，建议AVFilterGraph 中thread设置为1.以上代码久经考验。放心使用。如果有什么问题，可以联系我 350197870.

以上是关于ffmpeg 音频转码的主要内容，如果未能解决你的问题，请参考以下文章