[FFmpeg-cvslog] lavc: use a separate field for exporting audio encoder padding

Anton Khirnov git at videolan.org
Tue Oct 14 02:45:36 CEST 2014


ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Sat Aug 23 12:40:50 2014 +0000| [2df0c32ea12ddfa72ba88309812bfb13b674130f] | committer: Anton Khirnov

lavc: use a separate field for exporting audio encoder padding

Currently, the amount of padding inserted at the beginning by some audio
encoders, is exported through AVCodecContext.delay. However
- the term 'delay' is heavily overloaded and can have multiple different
  meanings even in the case of audio encoding.
- this field has entirely different meanings, depending on whether the
  codec context is used for encoding or decoding (and has yet another
  different meaning for video), preventing generic handling of the codec
  context.

Therefore, add a new field -- AVCodecContext.initial_padding. It could
conceivably be used for decoding as well at a later point.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2df0c32ea12ddfa72ba88309812bfb13b674130f
---

 doc/APIchanges                 |    4 ++++
 libavcodec/aacenc.c            |    2 +-
 libavcodec/ac3enc.c            |    2 +-
 libavcodec/audio_frame_queue.c |    4 ++--
 libavcodec/avcodec.h           |   28 ++++++++++++++++++----------
 libavcodec/g722enc.c           |    4 ++--
 libavcodec/libfaac.c           |    2 +-
 libavcodec/libfdk-aacenc.c     |    2 +-
 libavcodec/libmp3lame.c        |    2 +-
 libavcodec/libopencore-amr.c   |    4 ++--
 libavcodec/libopusenc.c        |    4 ++--
 libavcodec/libspeexenc.c       |    2 +-
 libavcodec/libtwolame.c        |    4 ++--
 libavcodec/libvo-aacenc.c      |    2 +-
 libavcodec/libvo-amrwbenc.c    |    4 ++--
 libavcodec/libvorbis.c         |    4 ++--
 libavcodec/mpegaudioenc.c      |    4 ++--
 libavcodec/nellymoserenc.c     |    2 +-
 libavcodec/ra144enc.c          |    2 +-
 libavcodec/utils.c             |    9 +++++++++
 libavcodec/version.h           |    7 +++++--
 libavcodec/wmaenc.c            |    5 ++---
 22 files changed, 63 insertions(+), 40 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index ee31719..48b0ac8 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil:     2014-08-09
 
 API changes, most recent first:
 
+2014-10-13 - xxxxxxx - lavc 55.03.0 - avcodec.h
+  Add AVCodecContext.initial_padding. Deprecate the use of AVCodecContext.delay
+  for audio encoding.
+
 2014-09-xx - xxxxxxx - lavu 54.04.0 - pixdesc.h
   Add API to return the name of frame and context color properties.
 
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 55aa2f1..fa0ac00 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -777,7 +777,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     for (i = 0; i < 428; i++)
         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
 
-    avctx->delay = 1024;
+    avctx->initial_padding = 1024;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 5c02e7f..13666ef 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -2436,7 +2436,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         return ret;
 
     avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
-    avctx->delay      = AC3_BLOCK_SIZE;
+    avctx->initial_padding = AC3_BLOCK_SIZE;
 
     s->bitstream_mode = avctx->audio_service_type;
     if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
diff --git a/libavcodec/audio_frame_queue.c b/libavcodec/audio_frame_queue.c
index 0a8b25c..82c16a1 100644
--- a/libavcodec/audio_frame_queue.c
+++ b/libavcodec/audio_frame_queue.c
@@ -29,8 +29,8 @@ av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
 {
     afq->avctx             = avctx;
     afq->next_pts          = AV_NOPTS_VALUE;
-    afq->remaining_delay   = avctx->delay;
-    afq->remaining_samples = avctx->delay;
+    afq->remaining_delay   = avctx->initial_padding;
+    afq->remaining_samples = avctx->initial_padding;
     afq->frame_queue       = NULL;
 }
 
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index f0fa7a9..a24ce40 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1191,16 +1191,7 @@ typedef struct AVCodecContext {
      *   encoded input.
      *
      * Audio:
-     *   For encoding, this is the number of "priming" samples added by the
-     *   encoder to the beginning of the stream. The decoded output will be
-     *   delayed by this many samples relative to the input to the encoder (or
-     *   more, if the decoder adds its own padding).
-     *   The timestamps on the output packets are adjusted by the encoder so
-     *   that they always refer to the first sample of the data actually
-     *   contained in the packet, including any added padding.
-     *   E.g. if the timebase is 1/samplerate and the timestamp of the first
-     *   input sample is 0, the timestamp of the first output packet will be
-     *   -delay.
+     *   For encoding, this field is unused (see initial_padding).
      *
      *   For decoding, this is the number of samples the decoder needs to
      *   output before the decoder's output is valid. When seeking, you should
@@ -2780,6 +2771,23 @@ typedef struct AVCodecContext {
      * use AVOptions to set this field.
      */
     int side_data_only_packets;
+
+    /**
+     * Audio only. The number of "priming" samples (padding) inserted by the
+     * encoder at the beginning of the audio. I.e. this number of leading
+     * decoded samples must be discarded by the caller to get the original audio
+     * without leading padding.
+     *
+     * - decoding: unused
+     * - encoding: Set by libavcodec. The timestamps on the output packets are
+     *             adjusted by the encoder so that they always refer to the
+     *             first sample of the data actually contained in the packet,
+     *             including any added padding.  E.g. if the timebase is
+     *             1/samplerate and the timestamp of the first input sample is
+     *             0, the timestamp of the first output packet will be
+     *             -initial_padding.
+     */
+    int initial_padding;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/g722enc.c b/libavcodec/g722enc.c
index e7b67da..be43794 100644
--- a/libavcodec/g722enc.c
+++ b/libavcodec/g722enc.c
@@ -106,7 +106,7 @@ static av_cold int g722_encode_init(AVCodecContext * avctx)
            a common packet size for VoIP applications */
         avctx->frame_size = 320;
     }
-    avctx->delay = 22;
+    avctx->initial_padding = 22;
 
     if (avctx->trellis) {
         /* validate trellis */
@@ -375,7 +375,7 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
     *got_packet_ptr = 1;
     return 0;
 }
diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c
index 9b5b11a..ad51a03 100644
--- a/libavcodec/libfaac.c
+++ b/libavcodec/libfaac.c
@@ -157,7 +157,7 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
         goto error;
     }
 
-    avctx->delay = FAAC_DELAY_SAMPLES;
+    avctx->initial_padding = FAAC_DELAY_SAMPLES;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
index 34717d4..d45fad2 100644
--- a/libavcodec/libfdk-aacenc.c
+++ b/libavcodec/libfdk-aacenc.c
@@ -286,7 +286,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size = info.frameLength;
-    avctx->delay      = info.encoderDelay;
+    avctx->initial_padding = info.encoderDelay;
     ff_af_queue_init(avctx, &s->afq);
 
     if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index b7a323a..23f1581 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -137,7 +137,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
     }
 
     /* get encoder delay */
-    avctx->delay = lame_get_encoder_delay(s->gfp) + 528 + 1;
+    avctx->initial_padding = lame_get_encoder_delay(s->gfp) + 528 + 1;
     ff_af_queue_init(avctx, &s->afq);
 
     avctx->frame_size  = lame_get_framesize(s->gfp);
diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 6b45959..0704e94 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c
@@ -200,7 +200,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size  = 160;
-    avctx->delay       =  50;
+    avctx->initial_padding = 50;
     ff_af_queue_init(avctx, &s->afq);
 
     s->enc_state = Encoder_Interface_init(s->enc_dtx);
@@ -250,7 +250,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 return AVERROR(ENOMEM);
             memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf));
             samples = flush_buf;
-            if (frame->nb_samples < avctx->frame_size - avctx->delay)
+            if (frame->nb_samples < avctx->frame_size - avctx->initial_padding)
                 s->enc_last_frame = -1;
         }
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) {
diff --git a/libavcodec/libopusenc.c b/libavcodec/libopusenc.c
index 9af8bcd..ee9655b 100644
--- a/libavcodec/libopusenc.c
+++ b/libavcodec/libopusenc.c
@@ -87,7 +87,7 @@ static void libopus_write_header(AVCodecContext *avctx, int stream_count,
     bytestream_put_buffer(&p, "OpusHead", 8);
     bytestream_put_byte(&p, 1); /* Version */
     bytestream_put_byte(&p, channels);
-    bytestream_put_le16(&p, avctx->delay); /* Lookahead samples at 48kHz */
+    bytestream_put_le16(&p, avctx->initial_padding); /* Lookahead samples at 48kHz */
     bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
     bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
 
@@ -277,7 +277,7 @@ static int av_cold libopus_encode_init(AVCodecContext *avctx)
         goto fail;
     }
 
-    ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->delay));
+    ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
     if (ret != OPUS_OK)
         av_log(avctx, AV_LOG_WARNING,
                "Unable to get number of lookahead samples: %s\n",
diff --git a/libavcodec/libspeexenc.c b/libavcodec/libspeexenc.c
index 651d7ac..98f89b2 100644
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c
@@ -235,7 +235,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     s->header.frames_per_packet = s->frames_per_packet;
 
     /* set encoding delay */
-    speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
+    speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->initial_padding);
     ff_af_queue_init(avctx, &s->afq);
 
     /* create header packet bytes from header struct */
diff --git a/libavcodec/libtwolame.c b/libavcodec/libtwolame.c
index def5fee..400985a 100644
--- a/libavcodec/libtwolame.c
+++ b/libavcodec/libtwolame.c
@@ -60,7 +60,7 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
     int ret;
 
     avctx->frame_size = TWOLAME_SAMPLES_PER_FRAME;
-    avctx->delay      = 512 - 32 + 1;
+    avctx->initial_padding = 512 - 32 + 1;
 
     s->glopts = twolame_init();
     if (!s->glopts)
@@ -151,7 +151,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
     if (frame) {
         if (frame->pts != AV_NOPTS_VALUE)
-            avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+            avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
     } else {
         avpkt->pts = s->next_pts;
     }
diff --git a/libavcodec/libvo-aacenc.c b/libavcodec/libvo-aacenc.c
index 9450792..6dd7117 100644
--- a/libavcodec/libvo-aacenc.c
+++ b/libavcodec/libvo-aacenc.c
@@ -61,7 +61,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     int index, ret;
 
     avctx->frame_size = FRAME_SIZE;
-    avctx->delay      = ENC_DELAY;
+    avctx->initial_padding = ENC_DELAY;
     s->last_frame     = 2;
     ff_af_queue_init(avctx, &s->afq);
 
diff --git a/libavcodec/libvo-amrwbenc.c b/libavcodec/libvo-amrwbenc.c
index b255ba5..da3941b 100644
--- a/libavcodec/libvo-amrwbenc.c
+++ b/libavcodec/libvo-amrwbenc.c
@@ -93,7 +93,7 @@ static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
     s->last_bitrate    = avctx->bit_rate;
 
     avctx->frame_size  = 320;
-    avctx->delay       =  80;
+    avctx->initial_padding =  80;
 
     s->state     = E_IF_init();
 
@@ -131,7 +131,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size = size;
     *got_packet_ptr = 1;
diff --git a/libavcodec/libvorbis.c b/libavcodec/libvorbis.c
index 4b4caaa..07973e6 100644
--- a/libavcodec/libvorbis.c
+++ b/libavcodec/libvorbis.c
@@ -322,8 +322,8 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (duration > 0) {
         /* we do not know encoder delay until we get the first packet from
          * libvorbis, so we have to update the AudioFrameQueue counts */
-        if (!avctx->delay) {
-            avctx->delay              = duration;
+        if (!avctx->initial_padding) {
+            avctx->initial_padding    = duration;
             s->afq.remaining_delay   += duration;
             s->afq.remaining_samples += duration;
         }
diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc.c
index 51a6f5b..4e074a5 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc.c
@@ -84,7 +84,7 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
     bitrate = bitrate / 1000;
     s->nb_channels = channels;
     avctx->frame_size = MPA_FRAME_SIZE;
-    avctx->delay      = 512 - 32 + 1;
+    avctx->initial_padding = 512 - 32 + 1;
 
     /* encoding freq */
     s->lsf = 0;
@@ -735,7 +735,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     encode_frame(s, bit_alloc, padding);
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size = put_bits_count(&s->pb) / 8;
     *got_packet_ptr = 1;
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 5732163..9a84591 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -165,7 +165,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     }
 
     avctx->frame_size = NELLY_SAMPLES;
-    avctx->delay      = NELLY_BUF_LEN;
+    avctx->initial_padding = NELLY_BUF_LEN;
     ff_af_queue_init(avctx, &s->afq);
     s->avctx = avctx;
     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 7627adc..fd04766 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -56,7 +56,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
         return -1;
     }
     avctx->frame_size = NBLOCKS * BLOCKSIZE;
-    avctx->delay      = avctx->frame_size;
+    avctx->initial_padding = avctx->frame_size;
     avctx->bit_rate = 8000;
     ractx = avctx->priv_data;
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 89f249f..b28a659 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -1240,6 +1240,11 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         }
     }
 
+#if FF_API_AUDIOENC_DELAY
+    if (av_codec_is_encoder(avctx->codec))
+        avctx->delay = avctx->initial_padding;
+#endif
+
     if (av_codec_is_decoder(avctx->codec)) {
         /* validate channel layout from the decoder */
         if (avctx->channel_layout) {
@@ -1447,6 +1452,10 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
 end:
     av_frame_free(&padded_frame);
 
+#if FF_API_AUDIOENC_DELAY
+    avctx->delay = avctx->initial_padding;
+#endif
+
     return ret;
 }
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 90b1f10..c44686d 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,8 +29,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 56
-#define LIBAVCODEC_VERSION_MINOR  2
-#define LIBAVCODEC_VERSION_MICRO  2
+#define LIBAVCODEC_VERSION_MINOR  3
+#define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -153,5 +153,8 @@
 #ifndef FF_API_AFD
 #define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 57)
 #endif
+#ifndef FF_API_AUDIOENC_DELAY
+#define FF_API_AUDIOENC_DELAY    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index 95fc199..e801663 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -92,8 +92,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avctx->block_align = block_align;
     avctx->bit_rate    = avctx->block_align * 8LL * avctx->sample_rate /
                          s->frame_len;
-    avctx->frame_size  =
-    avctx->delay       = s->frame_len;
+    avctx->frame_size = avctx->initial_padding = s->frame_len;
 
     return 0;
 }
@@ -420,7 +419,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
     flush_put_bits(&s->pb);
 
     if (frame->pts != AV_NOPTS_VALUE)
-        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+        avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 
     avpkt->size     = avctx->block_align;
     *got_packet_ptr = 1;



More information about the ffmpeg-cvslog mailing list