[Libav-user] Last audio frame missing when transcoding to H264

M N assemblerx86 at yandex.com
Sat Oct 8 00:45:14 EEST 2016


Hi,

I am doing a program to transcode .mp4 files to H264, but I am running into a problem which is that the last audio frame is not being written to the output stream, and MediaInfo gives (Duration_LastFrame: -20 ms.) I also don't know if it has to do with this, but Windows Media Player doesn't show the video of the generated mp4 file, it just plays the sound but the video is black screen (Checked the color space and chroma subsampling, its yuv420p.)

Here is my code:

#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavutil/avutil.h"
#include "libavutil/rational.h"
#include "libavutil/timestamp.h"

#include <stdio.h>

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
    printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n\n",
           tag,
           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
           pkt->stream_index);
}

int main()
{
    av_register_all();

    av_log_set_level(AV_LOG_FATAL);


    AVFormatContext *ps = avformat_alloc_context();

    AVFormatContext *ps2 = NULL;
    AVOutputFormat *oF = av_guess_format("mp4", NULL, "video/mp4");

    FILE *gSize = fopen("vid.mp4", "rb");
    fseek(gSize, 0, SEEK_END);
    size_t iSize = ftell(gSize);
    fclose(gSize);




    if(avformat_open_input(&ps, "vid.mp4", NULL, NULL) != 0)
    {
        printf("Failed to open input file.\n");
        return -1;
    }

    avformat_alloc_output_context2(&ps2, oF, NULL, "vid2.mp4");

    avformat_find_stream_info(ps, NULL);

    AVCodecContext **pC = (AVCodecContext**)malloc(ps->nb_streams), **p2C = (AVCodecContext**)malloc(ps->nb_streams);

    AVStream *oStream = NULL;
    AVStream *iStream = NULL;

    AVCodec *encoder = NULL;
    AVCodec *decoder = NULL;
    AVCodecContext *strCtx = NULL;

    unsigned int i;

    avio_open(&ps2->pb, "vid2.mp4", AVIO_FLAG_WRITE);

    for(i = 0; i < ps->nb_streams; i++)
    {
        printf("%d\n", i);

        iStream = ps->streams[i];

        pC[i] = iStream->codec;


        if(pC[i]->codec_type == AVMEDIA_TYPE_UNKNOWN)
        {
            printf("Skipping bad stream\n");
            continue;
        }

        if(pC[i]->codec_type == AVMEDIA_TYPE_VIDEO || pC[i]->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            encoder = avcodec_find_encoder(pC[i]->codec_id);
            if (!encoder)
            {
                av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
                return AVERROR_INVALIDDATA;
            }

            oStream = avformat_new_stream(ps2, encoder);

            //av_dict_copy(&oStream->metadata, iStream->metadata, 0);

            strCtx = oStream->codec; //We have to set oStream->codec parameters for write_header to work,
                                    //since write_header only relies on the stream parameters.

            //avcodec_parameters_copy(oStream->codecpar, iStream->codecpar);
            //p2C[i] = oStream->codec;
            p2C[i] = avcodec_alloc_context3(encoder); //H264 codec context must be set using alloc_context

            //AVCodecParameters *pars = avcodec_parameters_alloc();
            //avcodec_parameters_from_context(pars, pC[i]);
            //avcodec_parameters_to_context(p2C[i], pars);

            AVDictionary *param = NULL;


            if (pC[i]->codec_type == AVMEDIA_TYPE_VIDEO)
            {
                p2C[i]->width = pC[i]->width;
                p2C[i]->height = pC[i]->height;

                if (encoder->pix_fmts)
                    p2C[i]->pix_fmt = encoder->pix_fmts[0];
                else
                    p2C[i]->pix_fmt = pC[i]->pix_fmt;

                p2C[i]->sample_rate = pC[i]->sample_rate;
                p2C[i]->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                //p2C[i]->bits_per_coded_sample = pC[i]->bits_per_coded_sample;
                //p2C[i]->bits_per_raw_sample = pC[i]->bits_per_raw_sample;
                //p2C[i]->flags = pC[i]->flags;
                //p2C[i]->flags2 = pC[i]->flags2;
                p2C[i]->time_base = pC[i]->time_base;
                //p2C[i]->bit_rate = pC[i]->bit_rate;
                //p2C[i]->bit_rate_tolerance = pC[i]->bit_rate_tolerance;
                free(p2C[i]->extradata);
                p2C[i]->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                p2C[i]->extradata = pC[i]->extradata;
                p2C[i]->extradata_size = pC[i]->extradata_size;
                p2C[i]->gop_size = pC[i]->gop_size;

                strCtx->width = pC[i]->width;
                strCtx->height = pC[i]->height;

                /*if (encoder->pix_fmts)
                    strCtx->pix_fmt = encoder->pix_fmts[0];
                else
                    strCtx->pix_fmt = pC[i]->pix_fmt;*/
                //strCtx->sample_rate = pC[i]->sample_rate;
                //strCtx->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                strCtx->time_base = pC[i]->time_base;
                free(strCtx->extradata);
                strCtx->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                strCtx->extradata = pC[i]->extradata;
                strCtx->extradata_size = pC[i]->extradata_size;

                //av_dict_set(&param, "qp", "23", 0);
                //av_opt_set(p2C[i]->priv_data, "profile", "high", (1 << 0));
                //av_opt_set(strCtx->priv_data, "profile", "high", (1 << 0));
                /*
                Change options to trade off compression efficiency against encoding speed. If you specify a preset, the changes it makes will be applied before all other parameters are applied.
                You should generally set this option to the slowest you can bear.
                Values available: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, placebo.
                */
                //av_dict_set(&param, "preset", "placebo", 0);
                /*
                Tune options to further optimize them for your input content. If you specify a tuning, the changes will be applied after --preset but before all other parameters.
                If your source content matches one of the available tunings you can use this, otherwise leave unset.
                Values available: film, animation, grain, stillimage, psnr, ssim, fastdecode, zerolatency.
                */
                //av_dict_set(&param, "crf", "23", 0);
                //av_dict_set(&param, "coder", "1", 0);
                //av_dict_set(&param, "vprofile", "film", 0);
                //av_dict_set(&param, "tune", "zerolatency", 0);
                //av_dict_set(&param, "no-cabac", "0", 0);
                //av_dict_set(&param, "preset", "medium", 0);
            }
            else
            {
                //av_opt_set(p2C[i]->priv_data, "profile", "high", (1 << 0));
                p2C[i]->sample_rate = pC[i]->sample_rate;
                p2C[i]->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                p2C[i]->channel_layout = pC[i]->channel_layout;
                p2C[i]->channels = av_get_channel_layout_nb_channels(p2C[i]->channel_layout);
                // take first format from list of supported formats
                p2C[i]->sample_fmt = encoder->sample_fmts[0];
                p2C[i]->time_base = (AVRational){1, p2C[i]->sample_rate};
                p2C[i]->frame_size = pC[i]->frame_size;
                free(p2C[i]->extradata);
                p2C[i]->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                p2C[i]->extradata = pC[i]->extradata;
                p2C[i]->extradata_size = pC[i]->extradata_size;
                //p2C[i]->gop_size = pC[i]->gop_size;

                strCtx->sample_rate = pC[i]->sample_rate;
                strCtx->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                //strCtx->channel_layout = pC[i]->channel_layout;
                //strCtx->channels = av_get_channel_layout_nb_channels(strCtx->channel_layout);
                // take first format from list of supported formats
                //strCtx->sample_fmt = encoder->sample_fmts[0];
                strCtx->time_base = (AVRational){1, strCtx->sample_rate};
                strCtx->frame_size = pC[i]->frame_size;
                free(strCtx->extradata);
                strCtx->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                strCtx->extradata = pC[i]->extradata;
                strCtx->extradata_size = pC[i]->extradata_size;
            }

            //AVCodecParameters *par = avcodec_parameters_alloc();
            //avcodec_parameters_from_context(par, pC[i]);
            //avcodec_parameters_to_context(p2C[i], par);

            decoder = avcodec_find_decoder(pC[i]->codec_id);
            if(decoder == NULL) printf("Couldn't find decoder\n");

            int ret1 = avcodec_open2(pC[i], decoder, NULL);
            int ret2 = avcodec_open2(p2C[i], encoder, NULL);
            printf("Ret1: %d | Ret2: %d\n", ret1, ret2);

        }
        else if (pC[i]->codec_type == AVMEDIA_TYPE_UNKNOWN) {
            av_log(NULL, AV_LOG_FATAL, "Elementary stream #%d is of unknown type, cannot proceed\n", i);

        }
        else
        {
            //avcodec_copy_context(oStream->codec, iStream->codec);
            //printf("BUG\n");
        }
    }
    printf("done\n");

    AVDictionaryEntry *tag = NULL;
    while ((tag = av_dict_get(ps2->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
        printf("%s=%s\n", tag->key, tag->value);

    int ret = avformat_write_header(ps2, NULL);
    char err[200];
    av_make_error_string(err, 200, ret);
    printf("Write header %d: %s\n", ret, err);
    printf("Frames in 0: %d\n", ps->streams[0]->nb_frames);
    printf("Frames in 1: %d\n", ps->streams[1]->nb_frames);
    int decoded_af = 0;
    int audio_frames = 0;
    int encoded_af = 0, encoded2_af = 0;

    int state = 0;
    int prevStream = 0;


    unsigned long long j = 0;
    for(;; ++j)
    {
        AVPacket *pkts = av_packet_alloc();
        av_init_packet(pkts);
        pkts->data = NULL;
        pkts->size = 0;
        AVPacket *pktr = av_packet_alloc();
        av_init_packet(pktr);
        pktr->data = NULL;
        pktr->size = 0;
        AVFrame *rawFrame = av_frame_alloc();




        if(av_read_frame(ps, pkts) == AVERROR_EOF)
        {
            //printf("END\n");

            if(state == 0)
            {
                state++;
                printf("Changed to state %d\n", state);
            }


        }


        int stream_index = pkts->stream_index;
        //if(prevStream != stream_index)
        prevStream = stream_index;

        if(!(ps2->flags & AVFMT_NOTIMESTAMPS))
        {
            pkts->dts = av_rescale_q(pkts->dts, ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            pkts->pts = av_rescale_q(pkts->pts, ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            pkts->duration = av_rescale_q(pkts->duration, ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            //pkts->pos = -1;
            //log_packet(ps2, pkts, "out");
        }
        else
        {
            pkts->dts = AV_NOPTS_VALUE;
            pkts->pts = AV_NOPTS_VALUE;
            printf("NO TIME STAMPS!\n");
        }


        //decoding
        int dret = 0, eret = 0;

        if(state == 0) avcodec_send_packet(pC[stream_index], pkts);
        else if(state == 1)
        {
            avcodec_send_packet(pC[pkts->stream_index], NULL);
            state++;
        }

        dret = avcodec_receive_frame(pC[stream_index], rawFrame);
        if(dret == 0 || state >= 3)
        {
            if(stream_index == 1) decoded_af++;
            //encoding
            if(state < 3)
            {
                rawFrame->pts = av_frame_get_best_effort_timestamp(rawFrame);
                int rets = avcodec_send_frame(p2C[stream_index], rawFrame);
                if(rets == 0 && stream_index == 1) encoded_af++;
                //if(stream_index == 1) printf("Frame: %d\n", p2C[stream_index]->frame_number);
            }
            else if (state == 3)
            {
                avcodec_send_frame(p2C[stream_index], NULL);
                state++;
            }

            eret = avcodec_receive_packet(p2C[stream_index], pktr);
            if(eret == 0)
            {
                if(stream_index == 1) encoded2_af++;

                while(eret == 0)
                {
                    pktr->stream_index = stream_index;
                    int retW = av_interleaved_write_frame(ps2, pktr);

                    if(retW != 0)
                    {
                        printf("Failed to write packet\n");
                        break;
                    }
                    else if(retW == 0 && stream_index == 1) audio_frames++;
                    eret = avcodec_receive_packet(p2C[stream_index], pktr);
                }
                //avcodec_flush_buffers(pC[stream_index]);
            }
            else if(eret == AVERROR_EOF)
            {
                if(stream_index == 1) printf("Audio frame failure at EOF\n");
                avcodec_flush_buffers(pC[stream_index]);
                printf("Finished\n");
                break;
            }
            else if(eret == AVERROR(EAGAIN))
            {
                if(stream_index == 1) printf("Audio frame failure at AVERROR(EAGAIN)\n");
                else printf("AVERROR(EAGAIN)\n");
                //continue;
                goto clean;
            }
            else
            {
                if(stream_index == 1) printf("Audio frame failure at other error.\n");
                printf("other error\n");
            }
        }
        else if(dret == AVERROR_EOF && state == 2)
        {
            state++;

            printf("Changed to state %d\n", state);
        }

clean:


        av_packet_free(&pkts);
        av_packet_free(&pktr);
        av_frame_free(&rawFrame);
        av_frame_unref(rawFrame);
    }

    printf("Written AF: %d\nDecoded AF: %d\nEncoded AF: %d\nEncoded2_AF: %d\n", audio_frames, decoded_af, encoded_af, encoded2_af);

    if(av_write_trailer(ps2) == 0) printf("Wrote trailer\n");

}
********************************************

Thanks!


More information about the Libav-user mailing list