[Libav-user] Writing audio to mp4 file

Mon Mar 30 11:54:24 EEST 2020

Hello,

within a previous email I asked about the best way to write audio to a file.

I'm trying to write a screen capture software that records the screen and
audio from the microphone.

At the moment I'm struggling with the audio part of this project. I'm not
able to write any audio to an empty mp4 file, without video.

I'm not really sure what the best way to post code is, but I'll do my best.

This is the code that I use to open the file. 

int Recorder::init_outputfile(QString filename)

{

       int ret = 0;

       std::string tmp = filename.toStdString();

       const char* output_file = tmp.c_str();

       avformat_alloc_output_context2(&outAVFormatContext, NULL, NULL,
output_file);

       if (!outAVFormatContext)

       {

             cout << "\nerror in allocating av format output context";

             exit(1);

       }

       /* Returns the output format in the list of registered output formats
which best matches the provided parameters, or returns NULL if there is no
match. */

       outputFormat = av_guess_format(NULL, output_file, NULL);

       if (!outputFormat)

       {

             cout << "\nerror in guessing the video format. try with correct
format";

             exit(1);

       }

}

The part where I open the output stream for audio

int Recorder::openOutputStream_audio()

{

       int ret = 0;

       int codec_id = 0;

       stream = avformat_new_stream(outAVFormatContext, NULL);

       if (!stream)

       {

             cout << "\nerror in creating a av format new stream";

             exit(1);

       }

       outAVCodec = avcodec_find_encoder(AV_CODEC_ID_AAC);

       if (!outAVCodec)

       {

             cout << "\nerror in finding the av codecs. try again with
correct codec";

             exit(1);

       }

       outAVCodecContext = avcodec_alloc_context3(outAVCodec);

       if (!outAVCodecContext)

       {

             cout << "\nerror in allocating the codec contexts";

             exit(1);

       }

       /* set property of the video file */

       outAVCodecContext = stream->codec;

       outAVCodecContext->codec_id = AV_CODEC_ID_AAC;// AV_CODEC_ID_MPEG4;
// AV_CODEC_ID_H264 // AV_CODEC_ID_MPEG1VIDEO

       outAVCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;

       outAVCodecContext->channels = 2;

       outAVCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;

       outAVCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;

       outAVCodecContext->bit_rate = 128000;

       outAVCodecContext->sample_rate = 48000;

       outAVCodecContext->time_base.num = 1;

       outAVCodecContext->time_base.den = 48000; //30; //15fps

       /* Some container formats (like MP4) require global headers to be
present

          Mark the encoder so that it behaves accordingly. */

       if (outAVFormatContext->oformat->flags & AVFMT_GLOBALHEADER)

       {

             outAVCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

       }

       ret = avcodec_open2(outAVCodecContext, outAVCodec, NULL);

       if (ret < 0)

       {

             cout << "\nerror in opening the avcodec";

             exit(1);

       }

       return 0;

}

Open the input stream for audio

int Recorder::openAudio()

{

       inAVFormatContext = avformat_alloc_context();//Allocate an
AVFormatContext.

       AVInputFormat* inputFormat = av_find_input_format("dshow");

       int value = avformat_open_input(pFormatCtx, "audio=Mikrofon (Realtek
High Definition Audio)", inputFormat, NULL);

       if (value != 0)

       {

             std::cout << "\nerror in opening input device";

             exit(1);

       }

       if (value < 0)

       {

             cout << "\nunable to find the stream information";

             exit(1);

       }

       /* select the audio stream */

       int ret = av_find_best_stream(inAVFormatContext, AVMEDIA_TYPE_AUDIO,
-1, -1, &inAVCodec, 0);

       if (ret < 0) {

             av_log(NULL, AV_LOG_ERROR, "Cannot find a audio stream in the
input file\n");

             return ret;

       }

       videoSteamIndex = ret;

       // assign pAVFormatContext to VideoStreamIndx

       inAVCodecContext = avcodec_alloc_context3(inAVCodec);

       if (!inAVCodecContext)

             return AVERROR(ENOMEM);

       avcodec_parameters_to_context(inAVCodecContext,
inAVFormatContext->streams[videoSteamIndex]->codecpar);

       value = avcodec_open2(inAVCodecContext, inAVCodec, NULL);//Initialize
the AVCodecContext to use the given AVCodec.

       if (value < 0)

       {

             cout << "\nunable to open the av codec";

             exit(1);

       }

}

The actual magic should happen here, the transcoding and en/decode part.

This part is experimental so it isn't final.

void Recorder::decode_audio()

{

/* create empty video file */

       if (!(outAVFormatContext->flags & AVFMT_NOFILE))

       {

             if (avio_open2(&outAVFormatContext->pb, "test.mp4",
AVIO_FLAG_WRITE, NULL, NULL) < 0)

             {

                    cout << "\nerror in creating the video file";

                    exit(1);

             }

       }

       if (!outAVFormatContext->nb_streams)

       {

             cout << "\noutput file dose not contain any stream";

             exit(1);

       }

       /* imp: mp4 container or some advanced container file required header
information*/

       int ret = avformat_write_header(outAVFormatContext, &options);

       if (ret < 0)

       {

             cout << "\nerror in writing the header context";

             exit(1);

       }

       int frameFinished;

       int value;

       AVPacket* inPacket = av_packet_alloc();

       AVFrame* inFrame = av_frame_alloc();

       int got_frame;

       SwrContext* swrCtx_ = nullptr;

       while (threading)

       {

             if (av_read_frame(inAVFormatContext, inPacket) >= 0) {

                    if (inPacket->stream_index == videoSteamIndex)

                    {

                           int len = avcodec_decode_audio4(inAVCodecContext,
inFrame, &got_frame, inPacket);

                           if (len < 0) {

                                  fprintf(stderr, "Error while decoding\n");

                                  exit(1);

                           }

                           if (got_frame) {

                                  /* if a frame has been decoded, output it
*/

                                  int data_size =
av_get_bytes_per_sample(outAVCodecContext->sample_fmt);

                                  if (data_size < 0) {

                                        /* This should not occur, checking
just for paranoia */

                                        fprintf(stderr, "Failed to calculate
data size\n");

                                        exit(1);

                                  }

                                  if (swrCtx_ == nullptr) {

                                         swrCtx_ = swr_alloc_set_opts(NULL,

outAVCodecContext->channel_layout,

outAVCodecContext->sample_fmt,

outAVCodecContext->sample_rate,

                                               AV_CH_LAYOUT_STEREO,

                                               inAVCodecContext->sample_fmt,

inAVCodecContext->sample_rate,

                                               0, NULL

                                        );

                                        swr_init(swrCtx_);

                                  }

                                  AVFrame* outFrame =
av_frame_alloc();//Allocate an AVFrame and set its fields to default values.

                                  if (!outFrame)

                                  {

                                        cout << "\nunable to release the
avframe resources for outframe";

                                        exit(1);

                                  }

                                  av_frame_copy_props(outFrame, inFrame);

                                  outFrame->channel_layout =
outAVCodecContext->channel_layout;

                                  outFrame->format =
outAVCodecContext->sample_fmt;

                                  outFrame->sample_rate =
outAVCodecContext->sample_rate;

                                  swr_convert_frame(swrCtx_, outFrame,
inFrame);

                                  AVPacket* outPacket = av_packet_alloc();

                                  int got_packet_ptr;

                                  avcodec_encode_audio2(outAVCodecContext,
outPacket, outFrame, &got_packet_ptr);

                                  if (got_packet_ptr) {

                                        std::cout << outPacket->pts << "; "
<< outPacket->dts << "\n";

                                        outPacket->stream_index =
videoSteamIndex;

                                        if
(av_write_frame(outAVFormatContext, outPacket) != 0)

                                        {

                                               cout << "\nerror in writing
audio frame";

                                        }

                                  }

                           }

                    }

             }

       }// End of while-loop

int value = av_write_trailer(outAVFormatContext);

       if (value < 0)

       {

              cout << "\nerror in writing av trailer";

              exit(1);

       }

}

I'm sure there is a very big design flaw in this bur I can't find it to be
honest.

The method init_outputfile(..) should work just fine, but this is the only
part I'm sure about.

I hope someone can help me as this should be a very basic flaw.

Sebastian Wichmann

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://ffmpeg.org/pipermail/libav-user/attachments/20200330/98808b7f/attachment.html>