[FFmpeg-devel] [PATCH] Add Lame info tag to the Xing/Info header of mp3 files

Fri May 23 17:38:06 CEST 2014

> Is this related to ticket #3577?

I wasn't aware of it, but yes, it fixes #3577.
I may submit a patch for #3576 some time in the future.

I am addressing the comment, splitting the patch and resubmitting.
Thanks!

Giovanni

On Fri, May 23, 2014 at 6:01 AM, Michael Niedermayer <michaelni at gmx.at>wrote:

> On Thu, May 22, 2014 at 05:31:09PM -0700, Giovanni Motta wrote:
> > Add Lame info tag (with delay, padding, replay gain, signal peak, etc..)
> > to the Xing/Info header of mp3 files.
> >
> > A few notes/comments:
> >
> > - Fate initially failed for lavf.mp3; this is expected.
> >   The test result has been updated with new CRC.
> >
> > - The Lame info tag is generated by libmp3lame and passed to the mp3enc
> via extradata.
> >
> > - To keep the size of the tag constant and simplify the code, vbr_scale
> is always added.
> >
> > - The Lame string vendor in the tag is fixed length, so vendor is trimmed
> >   to 9 bytes and padded with 0x20 if shorter.
> >   Note: the current vendor string in ffmpeg is too long. The lame tags
> >   currently generated doesn't work and the delay added after the string
> is
> >   never parsed correctly.
> >
> > - replay_gain and find_peak need a version of lame patched with
> >   libmp3lame/lame.c Revision 1.367 (patch tracker item #66):
> http://sourceforge.net/p/lame/patches/66/
> >   They have no effect otherwise.
> >
> > - find_peak_sample only works if Lame is configured with
> --enable-decoder.
> >   It has no effect otherwise.
> >
> > - Some fields in the Lame tag are not set because not accessible from
> >   the set/get API (preset and noise shaping, for example). I will bring
> this to
> >   the attention of the Lame developers and help there with any change if
> we
> >   decide to merge the patch.
> >
> > Thanks
> >
> > Giovanni
> >
> >
> >
> > ---
> >  libavcodec/libmp3lame.c | 256
> +++++++++++++++++++++++++++++++++++++++++++++++-
> >  libavformat/mp3enc.c    | 112 ++++++++++++++++-----
> >  tests/ref/lavf-fate/mp3 |   2 +-
> >  3 files changed, 344 insertions(+), 26 deletions(-)
>
> please split this patch between libavcodec and libavformat
>
>
> >
> > diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
> > index a8c39cc..8f8b5ee 100644
> > --- a/libavcodec/libmp3lame.c
> > +++ b/libavcodec/libmp3lame.c
> > @@ -26,6 +26,7 @@
> >
> >  #include <lame/lame.h>
> >
> > +#include "libavutil/avstring.h"
> >  #include "libavutil/channel_layout.h"
> >  #include "libavutil/common.h"
> >  #include "libavutil/float_dsp.h"
> > @@ -40,6 +41,9 @@
> >
> >  #define BUFFER_SIZE (7200 + 2 * MPA_FRAME_SIZE + MPA_FRAME_SIZE /
> 4+1000) // FIXME: Buffer size to small? Adding 1000 to make up for it.
> >
> > +/* size of the Lame Info tag added to the Xing/Info header */
> > +#define LAME_EXT_SIZE 40
> > +
> >  typedef struct LAMEContext {
> >      AVClass *class;
> >      AVCodecContext *avctx;
> > @@ -48,6 +52,8 @@ typedef struct LAMEContext {
> >      int buffer_index;
> >      int buffer_size;
> >      int reservoir;
> > +    int find_peak_sample;
> > +    int replay_gain;
> >      int joint_stereo;
> >      int abr;
> >      float *samples_flt[2];
>
> > @@ -93,6 +99,10 @@ static av_cold int mp3lame_encode_init(AVCodecContext
> *avctx)
> >
> >      s->avctx = avctx;
> >
> > +    /* extradata is used to store the Lame Info tag */
> > +    avctx->extradata = NULL;
> > +    avctx->extradata_size = 0;
>
> not needed
>
>
>
> > +
> >      /* initialize LAME and get defaults */
> >      if ((s->gfp = lame_init()) == NULL)
> >          return AVERROR(ENOMEM);
> > @@ -131,6 +141,12 @@ static av_cold int
> mp3lame_encode_init(AVCodecContext *avctx)
> >      /* bit reservoir usage */
> >      lame_set_disable_reservoir(s->gfp, !s->reservoir);
> >
> > +    /* find peak sample */
> > +    lame_set_decode_on_the_fly(s->gfp, s->find_peak_sample);
> > +
> > +    /* compute replay gain */
> > +    lame_set_findReplayGain(s->gfp, s->replay_gain);
> > +
> >      /* set specified parameters */
> >      if (lame_init_params(s->gfp) < 0) {
> >          ret = -1;
> > @@ -176,6 +192,226 @@ error:
> >                         s->buffer_size - s->buffer_index);
>     \
> >  } while (0)
> >
> > +/* flow and comments adapted from PutLameVBR() in Lame's VbrTag.c */
> > +static int GetLameInfoTag(lame_global_flags const *gfp, uint8_t *
> extradata)
> > +{
> > +    int nBytesWritten = 0;
> > +
> > +    int enc_delay = lame_get_encoder_delay(gfp);      // encoder delay
> > +    int enc_padding = lame_get_encoder_padding(gfp);  // encoder padding
> > +
> > +    /* recall: gfp->VBR_q is for example set by the switch -V */
> > +    /*         gfp->quality by -q, -h, -f, etc.               */
> > +    int nQuality = (100 - 10 * lame_get_VBR_q(gfp) -
> lame_get_quality(gfp));
> > +
> > +    const char *szVersion = get_lame_very_short_version();
> > +    uint8_t nVBR;
> > +    uint8_t nRevision = 0x00;
> > +    uint8_t nRevMethod;
> > +
> > +    /* numbering is different in vbr_mode vs. Lame tag */
> > +    uint8_t vbr_type_translator[] = { 1, 5, 3, 2, 4, 0, 3 };
> > +
> > +    uint8_t nLowpass =
> > +        (((lame_get_lowpassfreq(gfp) / 100.0) + .5) > 255 ?
> > +                255 : (lame_get_lowpassfreq(gfp) / 100.0) + .5);
> > +
> > +    uint32_t nPeakSignalAmplitude = 0;
> > +    uint16_t nRadioReplayGain = 0;
> > +    uint16_t nAudiophileReplayGain = 0;
> > +
> > +    uint8_t nNoiseShaping = 0;
> > +    uint8_t nStereoMode = 0;
> > +    int bNonOptimal = 0;
> > +    uint8_t nSourceFreq = 0;
> > +
> > +    uint8_t nMisc = 0;
> > +
> > +    size_t  nMusicLength = 0;
> > +    uint16_t nMusicCRC = 0;
> > +    uint16_t nTagCRC = 0;
> > +
> > +    /* psy model type: Gpsycho or NsPsytune */
> > +    unsigned char bExpNPsyTune = lame_get_exp_nspsytune(gfp) & 1;
> > +    unsigned char bSafeJoint = (lame_get_exp_nspsytune(gfp) & 2) != 0;
> > +
> > +    unsigned char bNoGapMore = 0;
> > +    unsigned char bNoGapPrevious = 0;
> > +
> > +    int nNoGapCount = lame_get_nogap_total(gfp);
> > +    int nNoGapCurr = lame_get_nogap_currentindex(gfp);
> > +
> > +    uint8_t nAthType = lame_get_ATHtype(gfp);  /* 4 bits */
> > +    int nInSampleRate = lame_get_in_samplerate(gfp);
> > +    uint8_t nFlags = 0;
> > +
> > +    /* if ABR, {store bitrate <= 255} else {store "-b"} */
> > +    int nABRBitrate;
> > +    switch (lame_get_VBR(gfp)) {
> > +    case vbr_abr:
> > +        nABRBitrate = lame_get_VBR_mean_bitrate_kbps(gfp);
> > +        break;
> > +    case vbr_off:
> > +        nABRBitrate = lame_get_brate(gfp);
> > +        break;
> > +    default:          // vbr modes
> > +        nABRBitrate = lame_get_VBR_mean_bitrate_kbps(gfp);
> > +    }
> > +
> > +    /* revision and vbr method */
> > +    if (lame_get_VBR(gfp) < sizeof(vbr_type_translator))
> > +        nVBR = vbr_type_translator[lame_get_VBR(gfp)];
> > +    else
> > +        nVBR = 0x00;    // unknown
> > +    nRevMethod = 0x10 * nRevision + nVBR;
> > +
> > +
> > +    /* ReplayGain */
> > +    if (lame_get_findReplayGain(gfp)) {
> > +        int nRadioGain = lame_get_RadioGain(gfp);
> > +        if (nRadioGain > 0x1FE)
> > +            nRadioGain = 0x1FE;
> > +        if (nRadioGain < -0x1FE)
> > +            nRadioGain = -0x1FE;
> > +
> > +        nRadioReplayGain = 0x2000;  // set name code
> > +        nRadioReplayGain |= 0xC00;  // set originator code to
> `determined automatically'
> > +
> > +        if (nRadioGain >= 0) {
> > +            nRadioReplayGain |= nRadioGain;  // set gain adjustment
> > +        } else {
> > +            nRadioReplayGain |= 0x200;       // set the sign bit
> > +            nRadioReplayGain |= -nRadioGain; // set gain adjustment
> > +        }
> > +    }
> > +
> > +    /* peak sample */
> > +    if (lame_get_decode_on_the_fly(gfp))
> > +        nPeakSignalAmplitude = abs((int) ((lame_get_PeakSample(gfp) /
> 32767.0) * pow(2, 23) + .5));
> > +
> > +    /* nogap */
> > +    if (nNoGapCount != -1) {
> > +        if (nNoGapCurr > 0)
> > +            bNoGapPrevious = 1;
> > +
> > +        if (nNoGapCurr < nNoGapCount - 1)
> > +            bNoGapMore = 1;
> > +    }
> > +
> > +    /* flags */
> > +    nFlags = nAthType + (bExpNPsyTune << 4)
> > +        + (bSafeJoint << 5)
> > +        + (bNoGapMore << 6)
> > +        + (bNoGapPrevious << 7);
> > +
> > +    if (nQuality < 0)
> > +        nQuality = 0;
> > +
> > +    /* stereo mode field... a bit ugly */
> > +    switch (lame_get_mode(gfp)) {
> > +    case MONO:
> > +        nStereoMode = 0;
> > +        break;
> > +    case STEREO:
> > +        nStereoMode = 1;
> > +        break;
> > +    case DUAL_CHANNEL:
> > +        nStereoMode = 2;
> > +        break;
> > +    case JOINT_STEREO:
> > +        if (lame_get_force_ms(gfp))
> > +            nStereoMode = 4;
> > +        else
> > +            nStereoMode = 3;
> > +        break;
> > +    case NOT_SET:
> > +        // FALLTHROUGH
> > +    default:
> > +        nStereoMode = 7;
> > +        break;
> > +    }
> > +
> > +    /* intensity stereo : nStereoMode = 6. IS is not implemented */
> > +    if (nInSampleRate <= 32000)
> > +        nSourceFreq = 0x00;
> > +    else if (nInSampleRate == 48000)
> > +        nSourceFreq = 0x02;
> > +    else if (nInSampleRate > 48000)
> > +        nSourceFreq = 0x03;
> > +    else
> > +        nSourceFreq = 0x01;   // default is 44100Hz
> > +
> > +    /* check if the user overrided the default LAME behaviour with some
> nasty options */
> > +    if ((lame_get_no_short_blocks(gfp) == 1) ||     // short blocks
> dispensed
> > +        (lame_get_force_short_blocks(gfp) == 1) ||  // short blocks
> forced
> > +        ((lame_get_lowpassfreq(gfp) == -1) &&
> (lame_get_highpassfreq(gfp) == -1)) ||    // "-k"
> > +        (lame_get_scale_left(gfp) < lame_get_scale_right(gfp)) ||
> > +        (lame_get_scale_left(gfp) > lame_get_scale_right(gfp)) ||
> > +        (lame_get_disable_reservoir(gfp) && lame_get_brate(gfp) < 320)
> ||
> > +        lame_get_noATH(gfp) || lame_get_ATHonly(gfp) || (nAthType == 0)
> ||
> > +        (nInSampleRate <= 32000))
> > +        bNonOptimal = 1;
> > +
> > +    nMisc = nNoiseShaping + (nStereoMode << 2)
> > +        + (bNonOptimal << 5)
> > +        + (nSourceFreq << 6);
> > +
> > +    /* write all this information to extradata */
> > +    AV_WB32(&extradata[nBytesWritten], nQuality);
> > +    nBytesWritten += 4;
> > +
> > +    av_strlcpy((char *) &extradata[nBytesWritten], szVersion, 9);
> > +    nBytesWritten += 9;
> > +
> > +    extradata[nBytesWritten] = nRevMethod;
> > +    nBytesWritten++;
> > +
> > +    extradata[nBytesWritten] = nLowpass;
> > +    nBytesWritten++;
> > +
> > +    AV_WB32(&extradata[nBytesWritten], nPeakSignalAmplitude);
> > +    nBytesWritten += 4;
> > +
> > +    AV_WB16(&extradata[nBytesWritten], nRadioReplayGain);
> > +    nBytesWritten += 2;
> > +
> > +    AV_WB16(&extradata[nBytesWritten], nAudiophileReplayGain);
> > +    nBytesWritten += 2;
> > +
> > +    extradata[nBytesWritten] = nFlags;
> > +    nBytesWritten++;
> > +
> > +    if (nABRBitrate >= 255)
> > +        extradata[nBytesWritten] = 0xFF;
> > +    else
> > +        extradata[nBytesWritten] = nABRBitrate;
> > +    nBytesWritten++;
> > +
> > +    extradata[nBytesWritten] = enc_delay >> 4;
> > +    extradata[nBytesWritten + 1] = (enc_delay << 4) + (enc_padding >>
> 8);
> > +    extradata[nBytesWritten + 2] = enc_padding;
> > +    nBytesWritten += 3;
> > +
> > +    extradata[nBytesWritten] = nMisc;
> > +    nBytesWritten++;
> > +
> > +    extradata[nBytesWritten++] = 0;   // unused in rev0
> > +
> > +    AV_WB16(&extradata[nBytesWritten], 0);
> > +    nBytesWritten += 2;
> > +
> > +    AV_WB32(&extradata[nBytesWritten], (int) nMusicLength);
> > +    nBytesWritten += 4;
> > +
> > +    AV_WB16(&extradata[nBytesWritten], nMusicCRC);
> > +    nBytesWritten += 2;
> > +
> > +    AV_WB16(&extradata[nBytesWritten], nTagCRC);
> > +    nBytesWritten += 2;
>
> see libavcodec/bytestream.h, above can be simplified using it
>
>
>
> > +
> > +    return nBytesWritten;
> > +}
> > +
> >  static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
> >                                  const AVFrame *frame, int
> *got_packet_ptr)
> >  {
> > @@ -213,6 +449,18 @@ static int mp3lame_encode_frame(AVCodecContext
> *avctx, AVPacket *avpkt,
> >      } else {
> >          lame_result = lame_encode_flush(s->gfp, s->buffer +
> s->buffer_index,
> >                                          s->buffer_size -
> s->buffer_index);
> > +        /* get Lame Info tag and store it in extradata */
> > +        if(avctx->extradata == NULL) {
> > +            avctx->extradata = av_malloc(LAME_EXT_SIZE);
> > +            avctx->extradata_size = GetLameInfoTag(s->gfp,
> avctx->extradata);
> > +            if (avctx->extradata_size != LAME_EXT_SIZE) {
> > +                av_log(avctx, AV_LOG_ERROR,
> > +                       "error storing Lame info tag (%d bytes written
> instead of %d)\n",
> > +                       avctx->extradata_size, LAME_EXT_SIZE);
> > +            } else {
> > +                av_log(avctx, AV_LOG_DEBUG, "lame info tag succesfully
> stored\n");
> > +            }
>
> that looks wrong, you set extradata_size to something else than the
> allocated size then you check it and print an error but dont
> handle it beyond that,
> if extradata_size is bigger than the allocated space this will
> be inconsistent and crash if accessed
>
>
> > +        }
> >      }
> >      if (lame_result < 0) {
> >          if (lame_result == -1) {
> > @@ -267,9 +515,11 @@ static int mp3lame_encode_frame(AVCodecContext
> *avctx, AVPacket *avpkt,
> >  #define OFFSET(x) offsetof(LAMEContext, x)
> >  #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> >  static const AVOption options[] = {
> > -    { "reservoir",    "use bit reservoir", OFFSET(reservoir),
>  AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> > -    { "joint_stereo", "use joint stereo",  OFFSET(joint_stereo),
> AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> > -    { "abr",          "use ABR",           OFFSET(abr),
>  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> > +    { "reservoir",        "use bit reservoir",   OFFSET(reservoir),
>    AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> > +    { "find_peak_sample", "find peak sample",
>  OFFSET(find_peak_sample), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> > +    { "replay_gain",      "compute replay gain", OFFSET(replay_gain),
>    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> > +    { "joint_stereo",     "use joint stereo",    OFFSET(joint_stereo),
>     AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> > +    { "abr",              "use ABR",             OFFSET(abr),
>    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
>
> reformating existing options and adding new ones should be in
> seperate patches to keep git diff and blame readable
>
>
> [...]
>
> > +
> > +    avio_flush(s->pb);
> >      avio_seek(s->pb, 0, SEEK_END);
> >  }
> >
> > @@ -363,7 +431,7 @@ static int mp3_write_trailer(struct AVFormatContext
> *s)
> >          avio_write(s->pb, buf, ID3v1_TAG_SIZE);
> >      }
> >
> > -    if (mp3->xing_offset)
> > +    if (mp3->write_xing && mp3->xing_offset)
> >          mp3_update_xing(s);
> >
> >      return 0;
>
> > @@ -384,7 +452,7 @@ static int query_codec(enum AVCodecID id, int
> std_compliance)
> >  AVOutputFormat ff_mp2_muxer = {
> >      .name              = "mp2",
> >      .long_name         = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer
> 2)"),
> > -    .mime_type         = "audio/mpeg",
> > +    .mime_type         = "audio/x-mpeg",
>
> like ubitux already said, this doesnt belong in here, please
> read over your patch before submitting it to make sure it doesnt
> contain things that you didnt intent to be in it ... could be theres
> more that we dont recognize but you of course would know which
> changes you want to submit ...
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> In fact, the RIAA has been known to suggest that students drop out
> of college or go to community college in order to be able to afford
> settlements. -- The RIAA
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>