[Libav-user] Example for recompressing a video?

jettoblack jettoblack at gmail.com
Tue Jun 5 03:00:06 CEST 2012


Hi Christian and Nicolas,
Thanks for the input.  I managed to get video encoding working and
everything looks good there.  The solution was to use the
picture.best_effort_timestamp as the pts of encoded frame instead of the
pkt.pts (since I have now learned that a pkt could contain 0 or more
pictures, not necessarily one picture per packet).

However I'm still having trouble with the audio, specifically resampling and
encoding.  If I simply pass the source audio through, it works ok.  The call
to avresample_convert() seems to work ok (returns a valid length).  The
first call to avcodec_encode_audio2() will succeed, but all subsequent calls
after the first will return -22 (Illegal argument error).  I'm not sure what
argument is illegal especially since the first call succeeds.  

Does anyone have any ideas where I went wrong on the audio encoding part?  I
put the whole source code below.  I'm looking forward to any suggestions. :) 
Thanks everyone!


#include <stdio.h>
#include <assert.h>
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswscale/swscale.h"
#include "libavresample/avresample.h"
#include "libavutil/opt.h"

int main (int argc, const char * argv[])
{
    const char *infile = argv[1];
    const char *outfile = argv[2];
    int r;
    int video_index = -1, audio_index = -1;     // stream index
    AVPacket pkt;
    AVFrame *srcpic = NULL, *srcaudio = NULL;
    AVFormatContext *in = NULL, *out = NULL;
    AVCodecContext *in_vcodec, *in_acodec, *out_vcodec, *out_acodec;
    AVStream *in_vstream, *in_astream, *out_vstream, *out_astream;
    AVAudioResampleContext *avr;
    AVCodec *h264, *aac;
    int got_picture, got_audio;
    int video_frames = 0, audio_samples = 0;
    char errbuf[128];
    struct SwsContext *img_convert_ctx;
    double framerate, samplerate;
    int got_packet_ptr = 0;
    int audio_bufsize = AVCODEC_MAX_AUDIO_FRAME_SIZE +
FF_INPUT_BUFFER_PADDING_SIZE;
    uint8_t *video_outbuf;
    int video_outbuf_size = 2000000;    // TODO: is there a defined max size
for encoded video buffer?
    uint8_t *picbuf;
    int picbuf_size;
    int64_t firstpts = -1;
    
    // init LAVF
    av_register_all();
    avformat_network_init();
    av_log_set_level(AV_LOG_VERBOSE);
    
    // Open input file
    printf("Open input file: %s\n", infile);
    r = avformat_open_input(&in, infile, NULL, NULL);
    if (r) {
        printf("err %x\n", r);
        return r;
    }
    r = avformat_find_stream_info(in, NULL);
    if (r) {
        printf("err %x\n", r);
        return r;
    }
    
    // iterate over input streams
    for (int i = 0; i < in->nb_streams; i++) {
        AVStream *inputStream = in->streams[i];
        if (inputStream->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
            inputStream->discard = AVDISCARD_NONE;
            video_index = i;
            in_vstream = inputStream;
            in_vcodec = inputStream->codec;
            
            if (!inputStream->codec->codec) {
                avcodec_open2(inputStream->codec,
avcodec_find_decoder(inputStream->codec->codec_id), NULL);
            }
            printf("Input video %s rate %d/%d width %d height %d\n",
in_vcodec->codec->name, inputStream->r_frame_rate.num,
inputStream->r_frame_rate.den, in_vcodec->width, in_vcodec->height);
            
        }        
        else if (inputStream->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
            inputStream->discard = AVDISCARD_NONE;
            audio_index = i;
            in_astream = inputStream;
            in_acodec = inputStream->codec;
            
            if (!inputStream->codec->codec) {
                avcodec_open2(inputStream->codec,
avcodec_find_decoder(inputStream->codec->codec_id), NULL);
            }
            printf("Input audio %s rate %d channels %d sample_format %d\n",
in_acodec->codec->name, in_acodec->sample_rate, in_acodec->channels,
in_acodec->sample_fmt);
        }
        else {
            inputStream->discard = AVDISCARD_ALL;
        }
        
    }
    
    assert(in_vcodec && in_acodec);

    // Open output file for writing
    out = avformat_alloc_context();
    assert(out);
    out->oformat = av_guess_format(NULL, outfile, NULL);    // Guess output
container format based on file extension
    assert(out->oformat);

    // Output parameters
    // Video codec
    h264 = avcodec_find_encoder(CODEC_ID_H264);
    assert(h264);
    out_vcodec = avcodec_alloc_context3(h264);
    assert(out_vcodec);
    avcodec_get_context_defaults3(out_vcodec, h264);
    out_vcodec->bit_rate = 500000;
    out_vcodec->width = 640;
    out_vcodec->height = 360;
    out_vcodec->time_base.num = in_vstream->r_frame_rate.den;   // time_base
is 1/framerate
    out_vcodec->time_base.den = in_vstream->r_frame_rate.num;
    out_vcodec->gop_size = (int)round(av_q2d(in_vstream->r_frame_rate) / 2);   
// GOP size is framerate/2
    out_vcodec->max_b_frames = 0;
    out_vcodec->coder_type = FF_CODER_TYPE_VLC;
    out_vcodec->pix_fmt = PIX_FMT_YUV420P;
    out_vcodec->profile = FF_PROFILE_H264_BASELINE;
    // TODO: set other codec parameters
    r = avcodec_open2(out_vcodec, h264, NULL);
    assert(!r);

    // Video stream
    out_vstream = avformat_new_stream(out, out_vcodec->codec);
    assert(out_vstream);
    out_vstream->codec = out_vcodec;
    out_vstream->r_frame_rate = in_vstream->r_frame_rate;
    framerate = av_q2d(out_vstream->r_frame_rate);
    
    // Audio codec
    aac = avcodec_find_encoder(CODEC_ID_MP2);
    assert(aac);
    out_acodec = avcodec_alloc_context3(aac);
    avcodec_get_context_defaults3(out_acodec, aac);
    assert(out_acodec);
    out_acodec->codec_id = aac->id;
    out_acodec->codec_type = AVMEDIA_TYPE_AUDIO;
    out_acodec->bit_rate = 128000;
    out_acodec->channels = 2;
    out_acodec->sample_rate = 48000;
    samplerate = out_acodec->sample_rate;
    out_acodec->sample_fmt =  AV_SAMPLE_FMT_S16;
    out_acodec->channel_layout = av_get_channel_layout("stereo");
    out_acodec->time_base = in_acodec->time_base;
    // TODO: set other codec parameters
    
    r = avcodec_open2(out_acodec, aac, NULL);
    assert(!r);

    // Audio stream
    out_astream = avformat_new_stream(out, out_acodec->codec);
    assert(out_astream);
    out_astream->codec = out_acodec;
    
     
    // Begin writing output file
    printf("Open output file: %s\nOutput container: %s\n", outfile,
out->oformat->long_name);
    r = avio_open2(&out->pb, outfile, AVIO_FLAG_WRITE, NULL, NULL);
    if (r) {
        printf("err %x\n", r);
        return r;
    }
    printf("write out header\n");
    r = avformat_write_header(out, NULL);
    if (r) {
        printf("err %x\n", r);
        return r;
    }
    
    // show output streams
    for (int i = 0; i < out->nb_streams; i++) {
        AVStream *outputStream = out->streams[i];
        if (outputStream->codec && outputStream->codec->codec) {
            printf("Output stream %d: %s %d/%d ", i,
outputStream->codec->codec->name, outputStream->time_base.num,
outputStream->time_base.den);
            if (outputStream->codec->codec_type == AVMEDIA_TYPE_VIDEO)
                printf("width %d height %d bitrate %d\n",
outputStream->codec->width, outputStream->codec->height,
outputStream->codec->bit_rate);
            if (outputStream->codec->codec_type == AVMEDIA_TYPE_AUDIO)
                printf("channels %d sample_rate %d bitrate %d\n",
outputStream->codec->channels, outputStream->codec->sample_rate,
outputStream->codec->bit_rate);
        }
        else
            printf("Output stream %d: %d/%d\n", i,
outputStream->time_base.num, outputStream->time_base.den);
    }

    // buffer for encoded video data
    video_outbuf = (uint8_t*)av_malloc(video_outbuf_size);
    assert(video_outbuf);
    img_convert_ctx = sws_getContext(in_vcodec->width, in_vcodec->height,
                                     in_vcodec->pix_fmt,
                                     out_vcodec->width, out_vcodec->height,
                                     out_vcodec->pix_fmt,
                                     SWS_BICUBIC, NULL, NULL, NULL);
    assert(img_convert_ctx);

    // buffer for picture data
    picbuf_size = avpicture_get_size(out_vcodec->pix_fmt, out_vcodec->width,
out_vcodec->height);
    picbuf = (uint8_t*)av_malloc(picbuf_size);
    assert(picbuf);
    
    // setup resample context
    avr = avresample_alloc_context();
    av_opt_set_int(avr,  "in_channel_layout", in_acodec ->channel_layout,
0);
    av_opt_set_int(avr, "out_channel_layout", out_acodec->channel_layout,
0);
    av_opt_set_int(avr,  "in_sample_fmt",     in_acodec ->sample_fmt,    
0);
    av_opt_set_int(avr, "out_sample_fmt",     out_acodec->sample_fmt,    
0);
    av_opt_set_int(avr,  "in_sample_rate",    in_acodec ->sample_rate,   
0);
    av_opt_set_int(avr, "out_sample_rate",    out_acodec->sample_rate,   
0);
    av_opt_set_int(avr,  "in_channels",       in_acodec ->channels,    0);
    av_opt_set_int(avr, "out_channels",       out_acodec->channels,    0);
    r = avresample_open(avr);
    assert(!r);
    
    printf("begin input loop\n");
    while (1) {
        av_init_packet(&pkt);
        r = av_read_frame(in, &pkt);
        if (r) {
            if (r == AVERROR_EOF)
                printf("EOF\n");
            else
                printf("read error %x\n", r);
            break;
        }
        printf("src pkt stream %d, pts %"PRId64", dts %"PRId64"\n",
pkt.stream_index, pkt.pts, pkt.dts);
        
        if (firstpts == -1 && pkt.pts != AV_NOPTS_VALUE)
            firstpts = pkt.pts;
        
        if (pkt.stream_index == in_vstream->index) {
            srcpic = avcodec_alloc_frame();
            assert(srcpic);
            avcodec_get_frame_defaults(srcpic);
            got_picture = 0;

            r = avcodec_decode_video2(in_vcodec, srcpic, &got_picture,
&pkt);
            if (r < 0) {
                av_strerror(r, errbuf, 128);
                printf("video decode error %d %s\n", r, errbuf);
                break;
            }
            else if (got_picture) {
                AVPacket newpkt;
                AVFrame *destpic;
                av_init_packet(&newpkt);
                destpic = avcodec_alloc_frame();
                got_packet_ptr = 0;

                printf("got picture: best_effort_timestamp %"PRId64"\n",
srcpic->best_effort_timestamp);
                
                // convert picture to dest format
                avpicture_fill((AVPicture*)destpic, picbuf, 
                               out_vcodec->pix_fmt, out_vcodec->width,
out_vcodec->height);
                sws_scale(img_convert_ctx, (const uint8_t*
const*)srcpic->data, srcpic->linesize, 0, 
                          in_vcodec->height, destpic->data,
destpic->linesize);

                // set destpic PTS
                if (srcpic->best_effort_timestamp != AV_NOPTS_VALUE)
                    destpic->pts =
av_rescale_q(srcpic->best_effort_timestamp, in_vstream->time_base,
out_vstream->time_base);
                else
                    destpic->pts = (int)((double)video_frames *
(90000.0/framerate));   // TODO: not always 90k

                // encode picture
                r = avcodec_encode_video2(out_vcodec, &newpkt, destpic,
&got_packet_ptr);
                if (r < 0) {
                    av_strerror(r, errbuf, 128);
                    printf("video encode error %d %s\n", r, errbuf);
                }
                else if (got_packet_ptr) {
                    // write packet
                    newpkt.stream_index = out_vstream->index;
                    printf("write video pkt: stream %d, pts %"PRId64", dts
%"PRId64"\n", 
                           newpkt.stream_index, newpkt.pts, newpkt.dts);
                    r = av_interleaved_write_frame(out, &newpkt);
                    if (r && (r != AVERROR(EINVAL))) {
                        printf("video write error %x\n", r);
                    }
                    assert(!r);
                }
                av_free_packet(&newpkt);
                av_free(destpic);
                video_frames++;
                
            }
            av_free(srcpic);
        }
        else if (pkt.stream_index == in_astream->index) {
            // decode audio
            srcaudio = avcodec_alloc_frame();
            avcodec_get_frame_defaults(srcaudio);
            got_audio = 0;

            r = avcodec_decode_audio4(in_acodec, srcaudio, &got_audio,
&pkt);
            if (r < 0) {
                av_strerror(r, errbuf, 128);
                printf("audio decode error %d %s\n", r, errbuf);
                break;
            }
            else if (got_audio) {
                // convert audio
                AVPacket newpkt;
                AVFrame *destaudio;     // frame for resampled audio
                int nb_samples;
                av_init_packet(&newpkt);
                destaudio = avcodec_alloc_frame();
                avcodec_get_frame_defaults(destaudio);
                destaudio->extended_data = av_malloc(sizeof(uint8_t*));
                destaudio->extended_data[0] = av_malloc(audio_bufsize);
                got_packet_ptr = 0;

                printf("srcaudio linesize[0]=%d nb_samples=%d\n",
srcaudio->linesize[0], srcaudio->nb_samples);

                // resample to dest format
                nb_samples = avresample_convert(avr, 
                                (void**)destaudio->extended_data,
destaudio->linesize[0], audio_bufsize, 
                                (void**)srcaudio->extended_data,
srcaudio->linesize[0], srcaudio->nb_samples);
                if (nb_samples < 0) {
                    av_strerror(nb_samples, errbuf, 128);
                    printf("avr error %d %s\n", nb_samples, errbuf);
                }
                printf("avr ret len %d\n", nb_samples);

                if (srcaudio->best_effort_timestamp != AV_NOPTS_VALUE)
                    destaudio->pts =
av_rescale_q(srcaudio->best_effort_timestamp, in_astream->time_base,
out_astream->time_base);
                else
                    destaudio->pts = firstpts + (int)((double)audio_samples
* (90000.0/samplerate));

                printf("destaudio pts %"PRId64"\n", destaudio->pts);
                
                // why does this return -22 after the first successfull
call?
                r = avcodec_encode_audio2(out_acodec, &newpkt, destaudio,
&got_packet_ptr);

                if (r < 0) {
                    av_strerror(r, errbuf, 128);
                    printf("audio encode error %d %s\n", r, errbuf);
                }
                else if (got_packet_ptr) {
                    // write frame
                    newpkt.stream_index = out_astream->index;
                    newpkt.flags |= AV_PKT_FLAG_KEY;
                    printf("write audio pkt: stream %d, pts %"PRId64", dts
%"PRId64"\n", 
                           newpkt.stream_index, newpkt.pts, newpkt.dts);
                    r = av_interleaved_write_frame(out, &newpkt);
                    if (r && (r != AVERROR(EINVAL))) {
                        printf("audio write error %x\n", r);
                    }
                }
                
                av_free(destaudio->extended_data[0]);
                av_free(destaudio->extended_data);
                av_free(destaudio);
                av_free_packet(&newpkt);
                audio_samples += nb_samples;
            }
            
            av_free(srcaudio);
            
        }
        
        av_free_packet(&pkt);
        
    }
    
    // Flush any remaining encoded data
    // encode picture
    av_free_packet(&pkt);
    printf("Flush video packets\n");
    while (1) {
        av_init_packet(&pkt);
        got_packet_ptr = 0;
        r = avcodec_encode_video2(out_vcodec, &pkt, NULL, &got_packet_ptr);
        if (r < 0) {
            av_strerror(r, errbuf, 128);
            printf("video encode error %d %s\n", r, errbuf);
            break;
        }
        else if (got_packet_ptr) {
            // write packet
            pkt.stream_index = out_vstream->index;
            printf("write video pkt: stream %d, pts %"PRId64", dts
%"PRId64"\n", 
                   pkt.stream_index, pkt.pts, pkt.dts);
            r = av_interleaved_write_frame(out, &pkt);
            if (r && (r != AVERROR(EINVAL))) {
                printf("video write error %x\n", r);
            }
            assert(!r);
        }
        else if (r == 0) {
            break;
        }
        av_free_packet(&pkt);
    }
    av_free_packet(&pkt);
    
    // flush audio
    printf("Flush audio packets\n");
    while (1) {
        av_init_packet(&pkt);
        got_packet_ptr = 0;
        r = avcodec_encode_audio2(out_acodec, &pkt, NULL, &got_packet_ptr);
        if (r < 0) {
            av_strerror(r, errbuf, 128);
            printf("audio encode error %d %s\n", r, errbuf);
            break;
        }
        else if (got_packet_ptr) {
            // write packet
            pkt.stream_index = out_astream->index;
            printf("write audio pkt: stream %d, pts %"PRId64", dts
%"PRId64"\n", 
                   pkt.stream_index, pkt.pts, pkt.dts);
            r = av_interleaved_write_frame(out, &pkt);
            if (r && (r != AVERROR(EINVAL))) {
                printf("audio write error %x\n", r);
            }
            assert(!r);
        }
        else if (r == 0) {
            break;
        }
        av_free_packet(&pkt);
    }
    av_free_packet(&pkt);
    
    av_free(picbuf);
    av_free(video_outbuf);

    avcodec_close(in_vcodec);
    avcodec_close(in_acodec);
    avcodec_close(out_vcodec);
    avcodec_close(out_acodec);
    
    // TODO: anything else to free/close?

    r = av_write_trailer(out);
    if (r) {
        printf("error closing output %x\n", r);
    }
    
    avformat_close_input(&in);
    printf("Wrote output file: %s\n", outfile);

    return 0;

}


--
View this message in context: http://libav-users.943685.n4.nabble.com/Example-for-recompressing-a-video-tp4655098p4655124.html
Sent from the libav-users mailing list archive at Nabble.com.


More information about the Libav-user mailing list