[Libav-user] Audio transcoding problem: lost audio in the end

Andrew Sherepenko andrew.sherepenko at gmail.com
Thu Nov 14 19:40:43 CET 2013


Hello libav and ffmpeg users.

I am new with ffmpeg and I have a problem with audio transcoding. I've 
tried to implement almost
all found tutorials, but it still doesn't work correctly.

I have an mpeg-ts container with h264 video and ac3 audio codecs.

I need to change container from mpeg-ts to mp4 with copying video and 
change audio format from ac3 to aac or mp3.

I've alreary changed container and copy video, but my audio stream has 
lost last few seconds of audio after transcoding
and I don't understand why.

For example: if i try to convert 17s video i will lose 3 seconds of 
audio in the for mp3 and 5s for aac.

If somebody have such kind of problem please give an advice what I did 
wrong. Or may be someone have a complete example of
transcoding audio fram by frame. It will be very usefull for me.

And this is my code:

#include <stdexcept>
#include <iostream>
#include <sstream>
#include <cstring>
#include <cstdlib>
#include <cassert>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/error.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libavutil/rational.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>

}

void usage(char *argv[]) {
     std::cout << "Changes container of media file from any format 
(mostly mpeg-ts) to mp4" << std::endl;
     std::cout << "usage: " << argv[0] << " INFILE [OUTFILE]" << std::endl;
}

void rescalePacket(AVPacket &pkt, int64_t startTime, AVRational 
inTimeBase, AVRational outTimeBase) {
     if (pkt.pts != AV_NOPTS_VALUE) {
         pkt.pts = av_rescale_q(pkt.pts - startTime, inTimeBase, 
outTimeBase);
     }

     if (pkt.dts != AV_NOPTS_VALUE) {
         pkt.dts = av_rescale_q(pkt.dts - startTime, inTimeBase, 
outTimeBase);
     }

     if (pkt.duration > 0) {
         pkt.duration = av_rescale_q(pkt.duration, inTimeBase, outTimeBase);
     }
}

AVFormatContext* initInputContext(const std::string& fileName) {
     assert(!fileName.empty());
     AVFormatContext* context = NULL;

     if (avformat_open_input(&context, fileName.c_str(), NULL, NULL) < 0) {
         std::cout << "Unable to get AVFormatContext from " << fileName 
<< std::endl;
         return NULL;
     }

     if (avformat_find_stream_info(context, NULL) < 0) {
         std::cout << "Could not find stream information" << std::endl;
         return NULL;
     }

     return context;
}

AVFormatContext* initOutputContext(const std::string& fileName) {
     assert(!fileName.empty());
     AVFormatContext* formatContext = NULL;

     if (avformat_alloc_output_context2(&formatContext, NULL, NULL, 
fileName.c_str()) < 0) {
         if (avformat_alloc_output_context2(&formatContext, NULL, "mp4", 
NULL) < 0) {
             std::cout << "Could not deduce output format: " << fileName 
<< std::endl;
         }
     }

     if (!formatContext) {
         std::cout << "Unable to init format context" << std::endl;
         return NULL;
     }

     return formatContext;
}

AVStream* getStream(AVFormatContext* formatContext, AVMediaType mediaType) {
     assert(formatContext != NULL);
     int index = 0;

     AVStream* stream = NULL;

     if ((index = av_find_best_stream(formatContext, mediaType, -1, -1, 
NULL, 0)) < 0) {
         std::cout << "Stream index not found" << std::endl;
         return NULL;
     }

     stream = formatContext->streams[index];

     if (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
         AVCodec* decoder = avcodec_find_decoder(stream->codec->codec_id);

         if (!decoder) {
             std::cout << "Could not find stream decoder with ID: " << 
stream->codec->codec_id << std::endl;
         } else if (avcodec_open2(stream->codec, decoder, NULL) < 0) {
             std::cout << "Could not open codec: " << 
stream->codec->codec_id << std::endl;
         }
     }

     return stream;
}

SwrContext* swrContext = NULL;
uint8_t* rawData = NULL;
int rawDataSize = 0;

bool initEncoder(AVFormatContext* context, AVStream* inStream, AVStream* 
outStream, CodecID codecId) {
     swrContext = swr_alloc_set_opts(NULL, 
outStream->codec->channel_layout, outStream->codec->sample_fmt, 
outStream->codec->sample_rate, inStream->codec->channel_layout, 
inStream->codec->sample_fmt, inStream->codec->sample_rate, 0, NULL);

     if (!swrContext) {
         return false;
     }

     if (swr_init(swrContext) < 0) {
        return false;
     }

     if (av_samples_alloc(&rawData, &rawDataSize, 
outStream->codec->channels, outStream->codec->frame_size, 
outStream->codec->sample_fmt, 1) < 0) {
         return false;
     }

     return true;
}

AVStream* createStream(AVFormatContext* formatContext, CodecID codecId, 
AVStream* inStream = NULL) {
     AVCodec* encoder = avcodec_find_encoder(codecId);

     if (!encoder) {
         std::cout << "Could not find stream encoder with ID: " << codecId;
         return NULL;
     }

     AVStream* stream = avformat_new_stream(formatContext, encoder);

     if (!stream) {
         std::cout << "Could not create output stream" << std::endl;
         return NULL;
     }

     if (inStream && stream->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
         stream->id = 1;
         stream->codec->bit_rate = 192000;
         stream->codec->sample_rate = 44100;
         stream->codec->sample_fmt = AV_SAMPLE_FMT_FLT;
         stream->codec->channel_layout = AV_CH_LAYOUT_STEREO;
         stream->codec->channels = 
av_get_channel_layout_nb_channels(stream->codec->channel_layout);
         stream->r_frame_rate = inStream->r_frame_rate;
         stream->avg_frame_rate = inStream->avg_frame_rate;
         stream->duration = inStream->duration;

         AVDictionary *options = NULL;
         av_dict_set(&options, "strict", "experimental", 0);

         if (avcodec_open2(stream->codec, encoder, &options) < 0) {
             std::cout << "Could not open codec: " << 
stream->codec->codec_id << std::endl;
         }
     }

     return stream;
}

bool copyStreamContext(AVStream* outStream, AVStream* inStream) {
     if (avcodec_copy_context(outStream->codec, inStream->codec) < 0) {
         std::cout << "Could not copy codec context" << std::endl;
         return false;
     }

     outStream->codec->codec_tag = 0;
     outStream->codec->time_base = outStream->time_base;
     outStream->sample_aspect_ratio = outStream->codec->sample_aspect_ratio;
     outStream->r_frame_rate = inStream->r_frame_rate;
     outStream->avg_frame_rate = inStream->avg_frame_rate;
     outStream->duration = inStream->duration;
     av_dict_copy(&outStream->metadata, inStream->metadata, 0);

     return true;
}

int ra = 0;
int wa = 0;
int rv = 0;
int wv = 0;

*int decodeFromPacket(AVCodecContext* codecContext, AVPacket* packet, 
AVFrame*& frame) {**
**    if (!frame) {**
**        frame = avcodec_alloc_frame();**
**    }**
**
**    avcodec_get_frame_defaults(frame);**
**
**    int gotFrame = 0;**
**    int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, 
packet);**
**
**    if (result < 0) {**
**        std::cout << "Could not decode audio frame" << std::endl;**
**        packet->size = 0;**
**        packet->data = NULL;**
**
**        return 0;**
**    }**
**
**    packet->size -= result;**
**    packet->data += result;**
**
**    return gotFrame;**
**}**
**
**int convertFrame(AVCodecContext* codecContext, AVFrame*& frame) {**
**    int result = swr_convert(swrContext, &rawData, 
codecContext->frame_size, (const uint8_t**)frame->extended_data, 
frame->nb_samples);**
**
**    if (result < 0) {**
**        std::cout << "Could not convert frame content: " << std::endl;**
**        return 0;**
**    }**
**
**    avcodec_get_frame_defaults(frame);**
**    frame->nb_samples = codecContext->frame_size;**
**    frame->format     = codecContext->sample_fmt;**
**
**    result = avcodec_fill_audio_frame(frame, codecContext->channels, 
codecContext->sample_fmt, (uint8_t *)rawData, rawDataSize, 1);**
**
**    if (result < 0) {**
**        std::cout << "Could not fill output frame" << std::endl;**
**        return 0;**
**    }**
**
**    return 1;**
**}**
**
**int encodeToPacket(AVCodecContext* codecContext, AVPacket* packet, 
AVFrame*& frame) {**
**    int gotPacket = 0;**
**
**    int result = avcodec_encode_audio2(codecContext, packet, frame, 
&gotPacket);**
**
**    if (result < 0) {**
**        std::cout << "Could not encode audio frame" << std::endl;**
**        return 0;**
**    }**
**
**    return gotPacket;**
**}**
**
**void writeFrame (AVFormatContext* formatContext, AVPacket* packet, int 
streamIndex) {**
**    packet->stream_index = streamIndex;**
**
**    if (av_interleaved_write_frame(formatContext, packet) < 0) {**
**        std::cout << "Could not write audio frame" << std::endl;**
**    }**
**
**    if (streamIndex == 1) {**
**        wa++;**
**    } else {**
**        wv++;**
**    }**
**}*

int64_t time_base = 0;

*void transcodeAudioPacket(AVFormatContext* formatContext, AVStream* 
inStream, AVStream* outStream, AVPacket* inPacket, AVFrame*& frame) {**
**    int result = 0;**
**
**    while(inPacket->size > 0) {**
**        result = decodeFromPacket(inStream->codec, inPacket, frame);**
**
**        if (!result) {**
**            continue;**
**        }**
**
**        result = convertFrame(outStream->codec, frame);**
**
**        if (!result) {**
**            continue;**
**        }**
**
**        AVPacket outPacket = {0};**
**        av_init_packet(&outPacket);**
**
**        result = encodeToPacket(outStream->codec, &outPacket, frame);**
**
**        if (result) {**
**            writeFrame(formatContext,&outPacket, outStream->index);**
**        }**
**
**        av_free_packet(&outPacket);**
**    }**
**}*

void processVideo(const std::string& inputFile, const std::string& 
outputFile) {
     AVFormatContext* inFormatContext  = initInputContext(inputFile);
     AVFormatContext* outFormatContext = initOutputContext(outputFile);

     outFormatContext->duration   = inFormatContext->duration;
     outFormatContext->bit_rate   = inFormatContext->bit_rate;
     outFormatContext->start_time = inFormatContext->start_time;

     AVStream* inVideoStream = getStream(inFormatContext, 
AVMEDIA_TYPE_VIDEO);
     AVStream* inAudioStream = getStream(inFormatContext, 
AVMEDIA_TYPE_AUDIO);

     AVOutputFormat* outFormat = outFormatContext->oformat;
     outFormat->codec_tag   = NULL;

     AVStream* outVideoStream = createStream(outFormatContext, 
outFormat->video_codec);
     AVStream* outAudioStream = createStream(outFormatContext, 
outFormat->audio_codec, inAudioStream);

     copyStreamContext(outVideoStream, inVideoStream);
     av_dict_copy(&outAudioStream->metadata, inAudioStream->metadata, 0);

     initEncoder(outFormatContext, inAudioStream, outAudioStream, 
outFormat->audio_codec);

     if (!(outFormat->flags & AVFMT_NOFILE)) {
          if (avio_open(&outFormatContext->pb, outputFile.c_str(), 
AVIO_FLAG_WRITE) < 0) {
              std::cout << "Could not open output file: " << outputFile 
<< std::endl;
              return;
          }
     }

     if (outFormat->flags & AVFMT_GLOBALHEADER) {
         outVideoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
         outAudioStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
     }

     av_dump_format(inFormatContext, 0, inputFile.c_str(), 0);
     av_dump_format(outFormatContext, 0, outputFile.c_str(), 1);

     AVDictionary* options = NULL;
     av_dict_set(&options, "movflags", "frag_keyframe", 0);

     if (avformat_write_header(outFormatContext, &options) < 0) {
         std::cout << "Unable to write headers " << std::endl;
     }

     av_dict_free(&options);

     AVPacket packet  = {0};
     AVFrame* frame = avcodec_alloc_frame();
     int result    = 0;
     int gotPacket = 0;

*// rewrite video and audio packets**
**    while (av_read_frame(inFormatContext, &packet) >= 0) {**
**        if (packet.stream_index == outAudioStream->index) {**
**            ra++;**
**            //rescalePacket(packet, inFormatContext->start_time, 
inAudioStream->time_base, outAudioStream->time_base);**
**            transcodeAudioPacket(outFormatContext, inAudioStream, 
outAudioStream, &packet, frame);**
**        } else if (packet.stream_index == outVideoStream->index) {**
**            rv++;**
**            //rescalePacket(packet, inFormatContext->start_time, 
inVideoStream->time_base, outVideoStream->time_base);**
**            writeFrame(outFormatContext, &packet, 
outVideoStream->index);**
**        }**
**
**        av_free_packet(&packet);**
**    }**
*
     gotPacket = 1;

     std::cout << "Video => read = " << rv << "; wrote = " << wv << 
std::endl;
     std::cout << "Audio => read = " << ra << "; wrote = " << wa << 
std::endl;

     packet.size = 0;
     packet.data = NULL;

     while (gotPacket) {
         result = avcodec_encode_audio2(outAudioStream->codec, &packet, 
NULL, &gotPacket);

         if (result >= 0 && gotPacket) {
             packet.stream_index = outAudioStream->index;

             if (av_interleaved_write_frame(outFormatContext, &packet) < 
0) {
                 std::cout << "Could not write audio frame" << std::endl;
             } else {
                 wa++;
             }
         } else {
             std::cout << "Could not encode audio frame" << std::endl;
         }

         av_free_packet(&packet);
     }

     std::cout << "Video => read = " << rv << "; wrote = " << wv << 
std::endl;
     std::cout << "Audio => read = " << ra << "; wrote = " << wa << 
std::endl;

     av_free(frame);
     av_free(rawData);
     swr_free(&swrContext);

     if (av_write_trailer(outFormatContext) < 0) {
         std::cout << "Unable to write trailer" << std::endl;
         return;
     }

     avcodec_close(inAudioStream->codec);
     avcodec_close(outAudioStream->codec);

     // close input
     avformat_close_input(&inFormatContext);
     inFormatContext = NULL;

     // close output
     if (!(outFormat->flags & AVFMT_NOFILE)) {
         avio_close(outFormatContext->pb);
     }

     avformat_free_context(outFormatContext);
     outFormatContext = NULL;
}

int main(int argc, char *argv[]) {
     std::string inpFile, outFile;

     if (argc < 2) {
         std::cout << "error: you should specify input file for 
transcoding" << std::endl;
         usage(argv);
         return 1;
     }

     inpFile = argv[1];
     if (argc == 3) {
        outFile = argv[2];
     } else {
        outFile = "out.mp4";
     }

     // register all muxers and demuxers
     avcodec_register_all();
     av_register_all();

     processVideo(inpFile, outFile);

     return 0;
}

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://ffmpeg.org/pipermail/libav-user/attachments/20131114/88847ea1/attachment.html>


More information about the Libav-user mailing list