[FFmpeg-trac] #3556(undetermined:new): bug when resampling stereo to stereo

FFmpeg trac at avcodec.org
Mon Apr 14 10:41:07 CEST 2014


#3556: bug when resampling stereo to stereo
-------------------------------------+-------------------------------------
             Reporter:  olegog       |                     Type:  defect
               Status:  new          |                 Priority:  normal
            Component:               |                  Version:
  undetermined                       |  unspecified
             Keywords:               |               Blocked By:
             Blocking:               |  Reproduced by developer:  0
Analyzed by developer:  0            |
-------------------------------------+-------------------------------------
 I found a bug in ffmpeg.

 {{{

 #include "stdafx.h"
 #include <iostream>

 extern "C"
 {
 #include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 //#include "swscale.h"
 #include "libswresample/swresample.h"
 };

 FILE           *fin,
         *fout;

 int ffmpeg_audio_decode( const char * inFile, const char * outFile)
 {
         // Initialize FFmpeg
         av_register_all();

         AVFrame* frame = avcodec_alloc_frame();
         if (!frame)
         {
                 std::cout << "Error allocating the frame" << std::endl;
                 return 1;
         }

         // you can change the file name "01 Push Me to the Floor.wav" to
 whatever the file is you're reading, like "myFile.ogg" or
         // "someFile.webm" and this should still work
         AVFormatContext* formatContext = NULL;
         //if (avformat_open_input(&formatContext, "01 Push Me to the
 Floor.wav", NULL, NULL) != 0)
         if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0)
         {
                 av_free(frame);
                 std::cout << "Error opening the file" << std::endl;
                 return 1;
         }

         if (avformat_find_stream_info(formatContext, NULL) < 0)
         {
                 av_free(frame);
                 av_close_input_file(formatContext);
                 std::cout << "Error finding the stream info" << std::endl;
                 return 1;
         }

         AVStream* audioStream = NULL;
         // Find the audio stream (some container files can have multiple
 streams in them)
         for (unsigned int i = 0; i < formatContext->nb_streams; ++i)
         {
                 if (formatContext->streams[i]->codec->codec_type ==
 AVMEDIA_TYPE_AUDIO)
                 {
                         audioStream = formatContext->streams[i];
                         break;
                 }
         }

         if (audioStream == NULL)
         {
                 av_free(frame);
                 av_close_input_file(formatContext);
                 std::cout << "Could not find any audio stream in the file"
 << std::endl;
                 return 1;
         }

         AVCodecContext* codecContext = audioStream->codec;

         codecContext->codec =
 avcodec_find_decoder(codecContext->codec_id);
         if (codecContext->codec == NULL)
         {
                 av_free(frame);
                 av_close_input_file(formatContext);
                 std::cout << "Couldn't find a proper decoder" <<
 std::endl;
                 return 1;
         }
         else if (avcodec_open2(codecContext, codecContext->codec, NULL) !=
 0)
         {
                 av_free(frame);
                 av_close_input_file(formatContext);
                 std::cout << "Couldn't open the context with the decoder"
 << std::endl;
                 return 1;
         }

         std::cout << "This stream has " << codecContext->channels << "
 channels and a sample rate of " << codecContext->sample_rate << "Hz" <<
 std::endl;
         std::cout << "The data is in the format " <<
 av_get_sample_fmt_name(codecContext->sample_fmt) << std::endl;

         //codecContext->sample_fmt = AV_SAMPLE_FMT_S16;

         int64_t outChannelLayout =
 AV_CH_LAYOUT_STEREO;//AV_CH_LAYOUT_MONO; //AV_CH_LAYOUT_STEREO;
         AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16; // Packed
 audio, non-planar (this is the most common format, and probably what you
 want; also, WAV needs it)
         int outSampleRate = 44100;//8000;//44100;
 /*
         Wav wav;
         wav.sampleRate = outSampleRate;
         wav.sampleSize = av_get_bytes_per_sample(outSampleFormat);
         wav.channels =
 av_get_channel_layout_nb_channels(outChannelLayout);
 */
         // Note that AVCodecContext::channel_layout may or may not be set
 by libavcodec. Because of this,
         // we won't use it, and will instead try to guess the layout from
 the number of channels.
         SwrContext* swrContext = swr_alloc_set_opts(NULL,
                 outChannelLayout,
                 outSampleFormat,
                 outSampleRate,
                 av_get_default_channel_layout(codecContext->channels),
                 codecContext->sample_fmt,
                 codecContext->sample_rate,
                 0,
                 NULL);

         if (swrContext == NULL)
         {
                 av_free(frame);
                 avcodec_close(codecContext);
                 avformat_close_input(&formatContext);
                 std::cout << "Couldn't create the SwrContext" <<
 std::endl;
                 return 1;
         }

         if (swr_init(swrContext) != 0)
         {
                 av_free(frame);
                 avcodec_close(codecContext);
                 avformat_close_input(&formatContext);
                 swr_free(&swrContext);
                 std::cout << "Couldn't initialize the SwrContext" <<
 std::endl;
                 return 1;
         }

         fout = fopen(outFile, "wb+");

         AVPacket packet;
         av_init_packet(&packet);

         // Read the packets in a loop
         while (av_read_frame(formatContext, &packet) == 0)
         {
                 if (packet.stream_index == audioStream->index)
                 {
                         AVPacket decodingPacket = packet;

                         while (decodingPacket.size > 0)
                         {
                                 // Try to decode the packet into a frame
                                 int frameFinished = 0;
                                 int result =
 avcodec_decode_audio4(codecContext, frame, &frameFinished,
 &decodingPacket);

                                 if (result < 0 || frameFinished == 0)
                                 {
                                         break;
                                 }

                                 //std::vector<unsigned char>
 buffer(wav.channels * wav.sampleRate * wav.sampleSize);

                                 unsigned char buffer[100000] = {NULL};
                                 unsigned char* pointers[SWR_CH_MAX] =
 {NULL};
                                 pointers[0] = &buffer[0];

                                 int numSamplesOut =
 swr_convert(swrContext,
 pointers,
 outSampleRate, //wav.sampleRate,
 (const unsigned char**)frame->extended_data,
 //(const uint8_t**)frame->extended_data[0],
 frame->nb_samples);

                                 //processFrame(frame, swrContext, wav);

                                 //fwrite(  frame->data[0], sizeof(short),
 (size_t)(frame->nb_samples), fout);
                                 //fwrite(  frame->extended_data,
 sizeof(short), (size_t)(frame->nb_samples), fout);
                                 //uint16_t uiCnt_1 = (uint16_t
 )frame->extended_data[0];
                                 //uint16_t uiCnt_2 = (uint16_t
 )frame->extended_data[1];

                                 /*
                                 ReSampleContext *rs_ctx = NULL;
                                 // resample to 44100, stereo, s16
                                 rs_ctx = av_audio_resample_init(
                                         1, codecContext->channels,
                                         8000, codecContext->sample_rate,
                                         AV_SAMPLE_FMT_S16,
 codecContext->sample_fmt,
                                         16, 10, 0, 1);

                                 //outbuff =
 (uint8_t*)av_malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE);
                                 short bufferSh[100000] = {NULL};
                                 // resampling
                                 //int after_sampled_len =
 audio_resample(rs_ctx, (short *)buffer, (short *)frame->extended_data[0],
 frame->nb_samples);
                                 int after_sampled_len =
 audio_resample(rs_ctx, bufferSh, (short *)frame->extended_data,
 frame->nb_samples);
                                 */

                                 fwrite(  (short *)buffer, sizeof(short),
 (size_t)numSamplesOut, fout);

                                 decodingPacket.size -= result;
                                 decodingPacket.data += result;
                         }

                         /*
                         // Try to decode the packet into a frame
                         int frameFinished = 0;
                         avcodec_decode_audio4(codecContext, frame,
 &frameFinished, &packet);

                         // Some frames rely on multiple packets, so we
 have to make sure the frame is finished before
                         // we can use it
                         if (frameFinished)
                         {

                                 //fwrite(  (short *)&(frame->data[0]),
 sizeof(short), (size_t)(frame->nb_samples*2), fout);
                                 fwrite(  (short *)&(frame->data[0]),
 sizeof(short), (size_t)(frame->nb_samples*2), fout);

                                 // frame now has usable audio data in it.
 How it's stored in the frame depends on the format of
                                 // the audio. If it's packed audio, all
 the data will be in frame->data[0]. If it's in planar format,
                                 // the data will be in frame->data and
 possibly frame->extended_data. Look at frame->data, frame->nb_samples,
                                 // frame->linesize, and other related
 fields on the FFmpeg docs. I don't know how you're actually using
                                 // the audio data, so I won't add any junk
 here that might confuse you. Typically, if I want to find
                                 // documentation on an FFmpeg structure or
 function, I just type "<name> doxygen" into google (like
                                 // "AVFrame doxygen" for AVFrame's docs)
                         }
                         */
                 }

                 // You *must* call av_free_packet() after each call to
 av_read_frame() or else you'll leak memory
                 av_free_packet(&packet);
         }

         // Some codecs will cause frames to be buffered up in the decoding
 process. If the CODEC_CAP_DELAY flag
         // is set, there can be buffered up frames that need to be
 flushed, so we'll do that
         if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
         {
                 av_init_packet(&packet);
                 // Decode all the remaining frames in the buffer, until
 the end is reached
                 int frameFinished = 0;
                 while (avcodec_decode_audio4(codecContext, frame,
 &frameFinished, &packet) >= 0 && frameFinished)
                 {
                 }
         }

         // Clean up!
         av_free(frame);
         avcodec_close(codecContext);
         av_close_input_file(formatContext);
         fclose(fout);
 }



 }}}


 When files  02.mp3  are converted into a format  8000 pcm mono okay.
     See file voice_01_sinus_8000_mono.raw.
 Any discrete mono converted well.

 ----

 Any discrete stereo converted bad.
 When converting to pcm stereo 8000 it turns wrong.
        See file voice_01_ sinus_ 8000_stereo.raw.
 When converting to pcm 44100 stereo also turns out not correct.
        See file voice_01_ sinus_ 44100_stereo.raw. Distort the shape of a
 sine wave.

--
Ticket URL: <https://trac.ffmpeg.org/ticket/3556>
FFmpeg <https://ffmpeg.org>
FFmpeg issue tracker


More information about the FFmpeg-trac mailing list