FFmpeg C API - 同步视频和音频

Posted

技术标签:

【中文标题】FFmpeg C API - 同步视频和音频【英文标题】:FFmpeg C API - syncing video and audio 【发布时间】:2015-03-30 17:21:45 【问题描述】:

我正在修剪视频,但很难让音频正确同步。下面的代码与我的工作一样接近。我已经尝试过重新编码而不是重新编码输出流。

视频正确修剪并写入输出容器。音频流也会正确修剪,但会写入输出容器的前面。例如,如果修剪长度为 10 秒 - 音频的正确部分播放 10 秒,然后视频的正确部分播放。

//////// audio stream ////////
const AVStream *input_stream_audio = input_container->streams[audio_stream_index];
const AVCodec *decoder_audio = avcodec_find_decoder(input_stream_audio->codec->codec_id);
if(!decoder_audio) 
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Audio decoder not found");
    return -1;

if(avcodec_open2(input_stream_audio->codec, decoder_audio, NULL) < 0) 
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error opening audio decoder");
    return -1;


AVStream *output_stream_audio = avformat_new_stream(output_container, NULL);
if(avcodec_copy_context(output_stream_audio->codec, input_stream_audio->codec) != 0)
    LOGE("=> Failed to Copy audio Context ");
    return -1;

else 
    LOGI("=> Copied audio context ");
    output_stream_audio->codec->codec_id = input_stream_audio->codec->codec_id;
    output_stream_audio->codec->codec_tag = 0;
    output_stream_audio->pts = input_stream_audio->pts;
    output_stream_audio->time_base.num = input_stream_audio->time_base.num;
    output_stream_audio->time_base.den = input_stream_audio->time_base.den;



if(avio_open(&output_container->pb, output_file, AVIO_FLAG_WRITE) < 0) 
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error opening output file");
    return -1;


// allocate frame for conversion
decoded_frame = avcodec_alloc_frame();
if(!decoded_frame) 
    cleanup(decoded_packet, output_container, decoded_frame);
    avformat_close_input(&input_container);
    LOGE("=> Error allocating frame");
    return -1;


av_dump_format(input_container, 0, input_file, 0);
avformat_write_header(output_container, NULL);
av_init_packet(&decoded_packet);

decoded_packet.data = NULL;
decoded_packet.size = 0;
int current_frame_num = 1;
int current_frame_num_audio = 1;
int got_frame, len;

AVRational default_timebase;
default_timebase.num = 1;
default_timebase.den = AV_TIME_BASE;

int64_t starttime_int64 = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
int64_t endtime_int64 = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
LOGI("=> starttime_int64:     %" PRId64, starttime_int64);
LOGI("=> endtime_int64:       %" PRId64, endtime_int64);

int64_t starttime_int64_audio = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
int64_t endtime_int64_audio = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
LOGI("=> starttime_int64_audio:     %" PRId64, starttime_int64_audio);
LOGI("=> endtime_int64_audio:       %" PRId64, endtime_int64_audio);

// loop input container and decode frames
while(av_read_frame(input_container, &decoded_packet)>=0) 
    // video packets
    if (decoded_packet.stream_index == video_stream_index) 
        len = avcodec_decode_video2(input_stream->codec, decoded_frame, &got_frame, &decoded_packet);
        if(len < 0) 
            cleanup(decoded_packet, output_container, decoded_frame);
            avformat_close_input(&input_container);
            LOGE("=> No frames to decode");
            return -1;
        
        // this is the trim range we're looking for
        if(got_frame && decoded_frame->pkt_pts >= starttime_int64 && decoded_frame->pkt_pts <= endtime_int64) 
                av_init_packet(&encoded_packet);
                encoded_packet.data =  NULL;
                encoded_packet.size =  0;

                ret = avcodec_encode_video2(output_stream->codec, &encoded_packet, decoded_frame, &got_frame);
                if (ret < 0) 
                    cleanup(decoded_packet, output_container, decoded_frame);
                    avformat_close_input(&input_container);
                    LOGE("=> Error encoding frames");
                    return ret;
                
                if(got_frame) 
                    if (output_stream->codec->coded_frame->key_frame) 
                        encoded_packet.flags |= AV_PKT_FLAG_KEY;
                    

                    encoded_packet.stream_index = output_stream->index;
                    encoded_packet.pts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);
                    encoded_packet.dts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);

                    ret = av_interleaved_write_frame(output_container, &encoded_packet);
                    if (ret < 0) 
                        cleanup(decoded_packet, output_container, decoded_frame);
                        avformat_close_input(&input_container);
                        LOGE("=> Error encoding frames");
                        return ret;
                    
                    else 
                        current_frame_num +=1;
                    
                
            av_free_packet(&encoded_packet);
        
    
    // audio packets
    else if(decoded_packet.stream_index == audio_stream_index) 
        // this is the trim range we're looking for
        if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) 
            av_init_packet(&encoded_packet);

            encoded_packet.data =  decoded_packet.data;
            encoded_packet.size =  decoded_packet.size;
            encoded_packet.stream_index = audio_stream_index;
            encoded_packet.pts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);
            encoded_packet.dts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);

            ret = av_interleaved_write_frame(output_container, &encoded_packet);
            if (ret < 0) 
                cleanup(decoded_packet, output_container, decoded_frame);
                avformat_close_input(&input_container);
                LOGE("=> Error encoding frames");
                return ret;
            
            else 
                current_frame_num_audio +=1;
            
           av_free_packet(&encoded_packet);
        
    

编辑

我对初始代码略有改进。音视频还没有完全同步,但是解决了原来先音频后视频的问题。

我现在将解码后的数据包写入输出容器,而不是重新编码。

最后虽然我有同样的问题 - 修剪后的视频的音频和视频流没有完全同步。

// audio packets
    else if(decoded_packet.stream_index == audio_stream_index) 
        // this is the trim range we're looking for
        if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) 
            ret = av_interleaved_write_frame(output_container, &decoded_packet);
            if (ret < 0) 
                cleanup(decoded_packet, output_container, decoded_frame);
                avformat_close_input(&input_container);
                LOGE("=> Error writing audio frame (%s)", av_err2str(ret));
                return ret;
            
            else 
                current_frame_num_audio +=1;
            
        
        else if(decoded_frame->pkt_pts > endtime_int64_audio) 
            audio_copy_complete = true;
        
    

【问题讨论】:

【参考方案1】:

如果您正确设置编解码器上下文和流时基,我相信您应该能够完成这项工作,然后在 encode_video2 和 encode_audio2 之后使用这样的时基调用 av_packet_rescale_tb

【讨论】:

以上是关于FFmpeg C API - 同步视频和音频的主要内容,如果未能解决你的问题,请参考以下文章

FFmpeg学习6:视音频同步

ffmpeg音频和视频同步错误

与 ffmpeg 的音视频同步

使用 ffmpeg 剪切电影导致音频/视频不同步

将 wav 和 mp4 与 ffmpeg 合并时,音视频同步漂移缓慢

FFMPEG 将 image2pipe 中的视频与 RTMP 中的音频同步