FFMpeg SDK使用4调用FFmpeg SDK解析封装格式的视频为音频流和视频流

Posted 2021-08-02 叮咚咕噜

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了FFMpeg SDK使用4调用FFmpeg SDK解析封装格式的视频为音频流和视频流相关的知识，希望对你有一定的参考价值。

音视频文件=（音频流 + 视频流） * 同步信息
实现方式：音频和视频通过某种标准格式进行复用，生成某种封装格式
封装格式：文件扩展名——mp4/avi/flv/mkv
从底层考虑，我们可以使用的只有视频解码器、音频解码器，或者再加上一些附加的字幕解码等额外信息，却不存在所谓的mp4解码器或者avi解码器
为了可以正确播放视频文件，必须将封装格式的视频文件分离出视频和音频信息分别进行解码和播放
FFMpeg专门定义了一个库来处理设计文件封装格式的功能，即libavformat

一、FFMpeg解复用-解码器所包含的结构

这一过程实际上包括了封装文件的解复用和音频/视频解码两个步骤，因此需要定义的结构体大致包括用于解码和解封装的部分。我们定义下面这样的一个结构体实现这个功能：

/*************************************************
Struct:         DemuxingVideoAudioContex
Description:    保存解复用器和解码器的上下文组件
*************************************************/
typedef struct
{
    AVFormatContext *fmt_ctx;
    AVCodecContext *video_dec_ctx, *audio_dec_ctx;
    AVStream *video_stream, *audio_stream;
    AVFrame *frame;
    AVPacket pkt;

    int video_stream_idx, audio_stream_idx;
    int width, height;

    uint8_t *video_dst_data[4];
    int video_dst_linesize[4];
    int video_dst_bufsize;
    enum AVPixelFormat pix_fmt;
} DemuxingVideoAudioContex;

这个结构体中的大部分数据类型我们在前面做编码/解码等功能时已经见到过，另外几个是涉及到视频文件的复用的，其中有：

AVFormatContext：用于处理音视频封装格式的上下文信息。
AVStream：表示音频或者视频流的结构。
AVPixelFormat：枚举类型，表示图像像素的格式，最常用的是AV_PIX_FMT_YUV420P

二、FFMpeg解复用-解码的过程

1、相关结构的初始化

与使用FFMpeg进行其他操作一样，首先需注册FFMpeg组件：

av_register_all();

随后，我们需要打开待处理的音视频文件。然而在此我们不使用打开文件的fopen函数，而是使用avformat_open_input函数。该函数不但会打开输入文件，而且可以根据输入文件读取相应的格式信息。该函数的声明如下：

int avformat_open_input(AVFormatContext **ps, const char *url, AVInputFormat *fmt, AVDictionary **options);

该函数的各个参数的作用为：

ps：根据输入文件接收与格式相关的句柄信息；可以指向NULL，那么AVFormatContext类型的实例将由该函数进行分配。
url：视频url或者文件路径；
fmt：强制输入格式，可设置为NULL以自动检测；
options：保存文件格式无法识别的信息；
返回值：成功返回0，失败则返回负的错误码；

该函数的调用方式为：

if (avformat_open_input(&(va_ctx.fmt_ctx), files.src_filename, NULL, NULL) < 0)
{
    fprintf(stderr, "Could not open source file %s\\n", files.src_filename);
    return -1;
}

打开文件后，调用avformat_find_stream_info函数获取文件中的流信息。该函数的声明为：

int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)

该函数的第一个参数即前面的文件句柄，第二个参数也是用于保存无法识别的信息的AVDictionary的结构，通常可设为NULL。调用方式如：

/* retrieve stream information */
if (avformat_find_stream_info(va_ctx.fmt_ctx, NULL) < 0) 
{
    fprintf(stderr, "Could not find stream information\\n");
    return -1;
}

获取文件中的流信息后，下一步则是获取文件中的音频和视频流，并准备对音频和视频信息进行解码。获取文件中的流使用av_find_best_stream函数，其声明如：

int av_find_best_stream(AVFormatContext *ic,
                    enum AVMediaType type,
                    int wanted_stream_nb,
                    int related_stream,
                    AVCodec **decoder_ret,
                    int flags);

其中各个参数的意义：

ic：视频文件句柄；AVFormatContext *ic中会返回解码的AVCodecContext值，里面保存了码流相关的信息，宽、高、数据格式、当前流的数据格式等解码需要的东西
type：表示数据的类型，常用的有AVMEDIA_TYPE_VIDEO表示视频，AVMEDIA_TYPE_AUDIO表示音频等；
wanted_stream_nb：我们期望获取到的数据流的数量，设置为-1使用自动获取；
related_stream：获取相关的音视频流，如果没有则设为-1；
decoder_ret：返回这一路数据流的解码器；
flags：未定义；
返回值：函数执行成功返回流的数量，失败则返回负的错误码；
函数执行成功后，便可调用avcodec_find_decoder和avcodec_open2打开解码器准备解码音视频流。该部分的代码实现如：

static int open_codec_context(IOFileName &files, DemuxingVideoAudioContex &va_ctx, enum AVMediaType type)
{
    int ret, stream_index;
    AVStream *st;
    AVCodecContext *dec_ctx = NULL;
    AVCodec *dec = NULL;
    AVDictionary *opts = NULL;

    ret = av_find_best_stream(va_ctx.fmt_ctx, type, -1, -1, NULL, 0);
    if (ret < 0) 
    {
        fprintf(stderr, "Could not find %s stream in input file '%s'\\n", av_get_media_type_string(type), files.src_filename);
        return ret;
    } 
    else 
    {
        stream_index = ret;
        st = va_ctx.fmt_ctx->streams[stream_index];

        /* find decoder for the stream */
        dec_ctx = st->codec;
        dec = avcodec_find_decoder(dec_ctx->codec_id);
        if (!dec) 
        {
            fprintf(stderr, "Failed to find %s codec\\n", av_get_media_type_string(type));
            return AVERROR(EINVAL);
        }

        /* Init the decoders, with or without reference counting */
        av_dict_set(&opts, "refcounted_frames", files.refcount ? "1" : "0", 0);
        if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) 
        {
            fprintf(stderr, "Failed to open %s codec\\n", av_get_media_type_string(type));
            return ret;
        }

        switch (type)
        {
        case AVMEDIA_TYPE_VIDEO:
            va_ctx.video_stream_idx = stream_index;
            va_ctx.video_stream = va_ctx.fmt_ctx->streams[stream_index];
            va_ctx.video_dec_ctx = va_ctx.video_stream->codec;
            break;
        case AVMEDIA_TYPE_AUDIO:
            va_ctx.audio_stream_idx = stream_index;
            va_ctx.audio_stream = va_ctx.fmt_ctx->streams[stream_index];
            va_ctx.audio_dec_ctx = va_ctx.audio_stream->codec;
            break;
        default:
            fprintf(stderr, "Error: unsupported MediaType: %s\\n", av_get_media_type_string(type));
            return -1;
        }
    }

    return 0;
}

之后打印码流解析出的信息：

void av_dump_format(AVFormatContext *ic,
                    int index,
                    const char *url,
                    int is_output);

实际使用：

	av_dump_format(va_ctx.fmt_ctx, 0, files.src_filename, 0);

打印在串口：（打印出视频和音频的一些码流信息：编码格式、比特率、宽高、帧、创建时间等）
在这里插入图片描述

整体初始化的函数代码为：

int InitDemuxContext(IOFileName &files, DemuxingVideoAudioContex &va_ctx)
{
    int ret = 0, width, height;

    /* register all formats and codecs */
    av_register_all();

    /* open input file, and allocate format context */
    if (avformat_open_input(&(va_ctx.fmt_ctx), files.src_filename, NULL, NULL) < 0)
    {
        fprintf(stderr, "Could not open source file %s\\n", files.src_filename);
        return -1;
    }

    /* retrieve stream information */
    if (avformat_find_stream_info(va_ctx.fmt_ctx, NULL) < 0) 
    {
        fprintf(stderr, "Could not find stream information\\n");
        return -1;
    }

    if (open_codec_context(files, va_ctx, AVMEDIA_TYPE_VIDEO) >= 0) 
    {
        files.video_dst_file = fopen(files.video_dst_filename, "wb");
        if (!files.video_dst_file) 
        {
            fprintf(stderr, "Could not open destination file %s\\n", files.video_dst_filename);
            return -1;
        }

        /* allocate image where the decoded image will be put */
        va_ctx.width = va_ctx.video_dec_ctx->width;
        va_ctx.height = va_ctx.video_dec_ctx->height;
        va_ctx.pix_fmt = va_ctx.video_dec_ctx->pix_fmt;
        ret = av_image_alloc(va_ctx.video_dst_data, va_ctx.video_dst_linesize, va_ctx.width, va_ctx.height, va_ctx.pix_fmt, 1);
        if (ret < 0) 
        {
            fprintf(stderr, "Could not allocate raw video buffer\\n");
            return -1;
        }
        va_ctx.video_dst_bufsize = ret;
    }

    if (open_codec_context(files, va_ctx, AVMEDIA_TYPE_AUDIO) >= 0) 
    {
        files.audio_dst_file = fopen(files.audio_dst_filename, "wb");
        if (!files.audio_dst_file) 
        {
            fprintf(stderr, "Could not open destination file %s\\n", files.audio_dst_filename);
            return -1;
        }
    }

    if (va_ctx.video_stream)
    {
        printf("Demuxing video from file '%s' into '%s'\\n", files.src_filename, files.video_dst_filename);
    }

    if (va_ctx.audio_stream)
    {
        printf("Demuxing audio from file '%s' into '%s'\\n", files.src_filename, files.audio_dst_filename);
    }

    /* dump input information to stderr */
    av_dump_format(va_ctx.fmt_ctx, 0, files.src_filename, 0);

    if (!va_ctx.audio_stream && !va_ctx.video_stream) 
    {
        fprintf(stderr, "Could not find audio or video stream in the input, aborting\\n");
        return -1;
    }

    return 0;
}

以上是关于FFMpeg SDK使用4调用FFmpeg SDK解析封装格式的视频为音频流和视频流的主要内容，如果未能解决你的问题，请参考以下文章