[FFmpeg-devel] [PATCH] avfilter: add loudnorm

Kyle Swanson k at ylo.ph
Wed Apr 6 02:01:14 CEST 2016


Here's another audio filter. I hinted at this a few months ago, but I found out that
finishing the last 5% took almost as long as the first 95%. This is an EBU R128
dynamic loudness normalization filter. This filter uses libebur128 v1.1.0[1] and must be
configured with `--enable-libebur128'. Please also see the accompanying blog post[2]
which has an algorithm description, as well as some usage instructions.

[1] https://github.com/jiixyj/libebur128
[2] http://k.ylo.ph/2016/04/04/loudnorm.html

Thanks!
Kyle

Signed-off-by: Kyle Swanson <k at ylo.ph>
---
 Changelog                 |   1 +
 MAINTAINERS               |   1 +
 configure                 |   5 +
 doc/filters.texi          |  42 +++
 libavfilter/Makefile      |   1 +
 libavfilter/af_loudnorm.c | 905 ++++++++++++++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c  |   1 +
 libavfilter/version.h     |   4 +-
 8 files changed, 958 insertions(+), 2 deletions(-)
 create mode 100644 libavfilter/af_loudnorm.c

diff --git a/Changelog b/Changelog
index fa4edd4..602f6a2 100644
--- a/Changelog
+++ b/Changelog
@@ -20,6 +20,7 @@ version <next>:
 - bitstream filter for extracting DTS core
 - ADPCM IMA DAT4 decoder
 - musx demuxer
+- loudnorm filter
 
 version 3.0:
 - Common Encryption (CENC) MP4 encoding and decoding support
diff --git a/MAINTAINERS b/MAINTAINERS
index a993a67..fb4b8fd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -358,6 +358,7 @@ Filters:
   af_compand.c                          Paul B Mahol
   af_firequalizer.c                     Muhammad Faiz
   af_ladspa.c                           Paul B Mahol
+  af_loudnorm.c                         Kyle Swanson
   af_pan.c                              Nicolas George
   af_sidechaincompress.c                Paul B Mahol
   af_silenceremove.c                    Paul B Mahol
diff --git a/configure b/configure
index 94a66d8..024568a 100755
--- a/configure
+++ b/configure
@@ -219,6 +219,8 @@ External library support:
   --enable-libcdio         enable audio CD grabbing with libcdio [no]
   --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394
                            and libraw1394 [no]
+  --enable-libebur128      enable libebur128 for EBU R128 measurement,
+                           needed for loudnorm filter [no]
   --enable-libfaac         enable AAC encoding via libfaac [no]
   --enable-libfdk-aac      enable AAC de/encoding via libfdk-aac [no]
   --enable-libflite        enable flite (voice synthesis) support via libflite [no]
@@ -1467,6 +1469,7 @@ EXTERNAL_LIBRARY_LIST="
     libcdio
     libcelt
     libdc1394
+    libebur128
     libfaac
     libfdk_aac
     libflite
@@ -2948,6 +2951,7 @@ hwupload_cuda_filter_deps="cuda"
 interlace_filter_deps="gpl"
 kerndeint_filter_deps="gpl"
 ladspa_filter_deps="ladspa dlopen"
+loudnorm_filter_deps="libebur128"
 mcdeint_filter_deps="avcodec gpl"
 movie_filter_deps="avcodec avformat"
 mpdecimate_filter_deps="gpl"
@@ -5534,6 +5538,7 @@ enabled libcelt           && require libcelt celt/celt.h celt_decode -lcelt0 &&
                              { check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
                                die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
 enabled libcaca           && require_pkg_config caca caca.h caca_create_canvas
+enabled libebur128        && require ebur128 ebur128.h ebur128_relative_threshold -lebur128
 enabled libfaac           && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
 enabled libfdk_aac        && { use_pkg_config fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
diff --git a/doc/filters.texi b/doc/filters.texi
index 592fc24..6c563d9 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2711,6 +2711,48 @@ Modify the @var{N}-th control value.
 If the specified value is not valid, it is ignored and prior one is kept.
 @end table
 
+ at section loudnorm
+
+EBU R128 loudness normalization. Includes both dynamic and linear normalization modes.
+Support for both single pass (livestreams, files) and double pass (files) modes.
+This algorithm can target IL, LRA, and maximum true peak. Requires libebur128.
+
+The filter accepts the following options:
+
+ at table @option
+ at item I, i
+Set integrated loudness target
+
+ at item LRA, lra
+Set loudness range target
+
+ at item TP, tp
+Set maximum true peak
+
+ at item measured_I, measured_i
+Measured IL of input file
+
+ at item measured_LRA, measured_lra
+Measured LRA of input file
+
+ at item measured_TP, measured_tp
+Measured true peak of input file
+
+ at item measured_thresh
+Measured threshold of input file
+
+ at item offset
+Set offset gain. Gain is applied before the true-peak limiter.
+
+ at item linear
+Normalize linearly if possible.
+measured_I, measured_LRA, measured_TP, and measured_thresh also need
+to be specified to use this mode.
+
+ at item print_format
+Set print format for stats. Options are summary, json, or none.
+ at end table
+
 @section lowpass
 
 Apply a low-pass filter with 3dB point frequency.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index b6e1999..766a930 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -86,6 +86,7 @@ OBJS-$(CONFIG_FLANGER_FILTER)                += af_flanger.o generate_wave_table
 OBJS-$(CONFIG_HIGHPASS_FILTER)               += af_biquads.o
 OBJS-$(CONFIG_JOIN_FILTER)                   += af_join.o
 OBJS-$(CONFIG_LADSPA_FILTER)                 += af_ladspa.o
+OBJS-$(CONFIG_LOUDNORM_FILTER)               += af_loudnorm.o
 OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
 OBJS-$(CONFIG_REPLAYGAIN_FILTER)             += af_replaygain.o
diff --git a/libavfilter/af_loudnorm.c b/libavfilter/af_loudnorm.c
new file mode 100644
index 0000000..a5adf2d
--- /dev/null
+++ b/libavfilter/af_loudnorm.c
@@ -0,0 +1,905 @@
+/*
+ * Copyright (c) 2016 Kyle Swanson <k at ylo.ph>.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* http://k.ylo.ph/2016/04/04/loudnorm.html */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+#include <ebur128.h>
+
+typedef struct LoudNormContext {
+    const AVClass *class;
+    double target_i;
+    double target_lra;
+    double target_tp;
+    double measured_i;
+    double measured_lra;
+    double measured_tp;
+    double measured_thresh;
+		double offset;
+    int linear;
+    int print_format;
+
+    double *buf;
+    int buf_size;
+    int buf_index;
+    int prev_buf_index;
+
+    double delta[30];
+    double weights[21];
+    double prev_delta;
+    int index;
+
+    double gain_reduction[2];
+    double *limiter_buf;
+    double *prev_smp;
+    int limiter_buf_index;
+    int limiter_buf_size;
+    int limiter_state;
+    int peak_index;
+    int env_index;
+    int env_cnt;
+    int attack_length;
+    int release_length;
+
+    int64_t pts;
+    int frame_type;
+    int above_threshold;
+    int prev_nb_samples;
+
+    ebur128_state *r128_in;
+    ebur128_state *r128_out;
+} LoudNormContext;
+
+enum {
+    FIRST_FRAME,
+    INNER_FRAME,
+    FINAL_FRAME,
+    LINEAR_MODE
+};
+
+enum {
+    OUT,
+    ATTACK,
+    SUSTAIN,
+    RELEASE
+};
+
+enum {
+    NONE,
+    JSON,
+    SUMMARY
+};
+
+#define OFFSET(x) offsetof(LoudNormContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption loudnorm_options[] = {
+    { "I",                "set integrated loudness target",    OFFSET(target_i),         AV_OPT_TYPE_DOUBLE,  {.dbl = -24.},   -70.,      -5.,  FLAGS },
+    { "i",                "set integrated loudness target",    OFFSET(target_i),         AV_OPT_TYPE_DOUBLE,  {.dbl = -24.},   -70.,      -5.,  FLAGS },
+    { "LRA",              "set loudness range target",         OFFSET(target_lra),       AV_OPT_TYPE_DOUBLE,  {.dbl =  7.},     1.,       20.,  FLAGS },
+    { "lra",              "set loudness range target",         OFFSET(target_lra),       AV_OPT_TYPE_DOUBLE,  {.dbl =  7.},     1.,       20.,  FLAGS },
+    { "TP",               "set maximum true peak",             OFFSET(target_tp),        AV_OPT_TYPE_DOUBLE,  {.dbl = -2.},    -9.,        0.,  FLAGS },
+    { "tp",               "set maximum true peak",             OFFSET(target_tp),        AV_OPT_TYPE_DOUBLE,  {.dbl = -2.},    -9.,        0.,  FLAGS },
+    { "measured_I",       "measured IL of input file",         OFFSET(measured_i),       AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,       0.,  FLAGS },
+    { "measured_i",       "measured IL of input file",         OFFSET(measured_i),       AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,       0.,  FLAGS },
+    { "measured_LRA",     "measured LRA of input file",        OFFSET(measured_lra),     AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},     0.,       99.,  FLAGS },
+    { "measured_lra",     "measured LRA of input file",        OFFSET(measured_lra),     AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},     0.,       99.,  FLAGS },
+    { "measured_TP",      "measured true peak of input file",  OFFSET(measured_tp),      AV_OPT_TYPE_DOUBLE,  {.dbl =  99.},   -99.,      99.,  FLAGS },
+    { "measured_tp",      "measured true peak of input file",  OFFSET(measured_tp),      AV_OPT_TYPE_DOUBLE,  {.dbl =  99.},   -99.,      99.,  FLAGS },
+    { "measured_thresh",  "measured threshold of input file",  OFFSET(measured_thresh),  AV_OPT_TYPE_DOUBLE,  {.dbl = -70.},   -99.,       0.,  FLAGS },
+    { "offset",           "set offset gain",                   OFFSET(offset),           AV_OPT_TYPE_DOUBLE,  {.dbl =  0.},    -99.,      99.,  FLAGS },
+    { "linear",           "normalize linearly if possible",    OFFSET(linear),           AV_OPT_TYPE_BOOL,    {.i64 =  1},        0,        1,  FLAGS },
+    { "print_format",     "set print format for stats",        OFFSET(print_format),     AV_OPT_TYPE_INT,     {.i64 =  NONE},  NONE,  SUMMARY,  FLAGS, "print_format" },
+    {     "none",         0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  NONE},     0,        0,  FLAGS, "print_format" },
+    {     "json",         0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  JSON},     0,        0,  FLAGS, "print_format" },
+    {     "summary",      0,                                   0,                        AV_OPT_TYPE_CONST,   {.i64 =  SUMMARY},  0,        0,  FLAGS, "print_format" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(loudnorm);
+
+static inline int frame_size(int sample_rate, int frame_len_msec)
+{
+    const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
+    return frame_size + (frame_size % 2);
+}
+
+static void init_gaussian_filter(LoudNormContext *s)
+{
+    double total_weight = 0.0;
+    const double sigma = 3.5;
+    double adjust;
+    int i;
+
+    const int offset = 21 / 2;
+    const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
+    const double c2 = 2.0 * pow(sigma, 2.0);
+
+    for (i = 0; i < 21; i++) {
+        const int x = i - offset;
+        s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
+        total_weight += s->weights[i];
+    }
+
+    adjust = 1.0 / total_weight;
+    for (i = 0; i < 21; i++)
+        s->weights[i] *= adjust;
+}
+
+static double gaussian_filter(LoudNormContext *s, int index)
+{
+    double result = 0.;
+    int i;
+
+    index = index - 10 > 0 ? index - 10 : index + 20;
+    for (i = 0; i < 21; i++)
+        result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
+
+    return result;
+}
+
+static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
+{
+    int n, c, i, index;
+    double ceiling;
+    double *buf;
+
+    *peak_delta = -1;
+    buf = s->limiter_buf;
+    ceiling = s->target_tp;
+
+    index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
+    if (index >= s->limiter_buf_size)
+        index -= s->limiter_buf_size;
+
+    if (s->frame_type == FIRST_FRAME) {
+        for (c = 0; c < channels; c++)
+            s->prev_smp[c] = fabs(buf[index + c - channels]);
+    }
+
+    for (n = 0; n < nb_samples; n++) {
+        for (c = 0; c < channels; c++) {
+            double this, next, max_peak;
+
+            this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
+            next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
+
+            if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
+                int detected;
+
+                detected = 1;
+                for (i = 2; i < 12; i++) {
+                    next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
+                    if (next > this) {
+                        detected = 0;
+                        break;
+                    }
+                }
+
+                if (!detected)
+                    continue;
+
+                for (c = 0; c < channels; c++) {
+                    if (c == 0 || fabs(buf[index + c]) > max_peak)
+                        max_peak = fabs(buf[index + c]);
+
+                    s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
+                }
+
+                *peak_delta = n;
+                s->peak_index = index;
+                *peak_value = max_peak;
+                return;
+            }
+
+            s->prev_smp[c] = this;
+        }
+
+        index += channels;
+        if (index >= s->limiter_buf_size)
+            index -= s->limiter_buf_size;
+    }
+}
+
+static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
+{
+    int n, c, index, peak_delta, smp_cnt;
+    double ceiling, peak_value;
+    double *buf;
+
+    buf = s->limiter_buf;
+    ceiling = s->target_tp;
+    index = s->limiter_buf_index;
+    smp_cnt = 0;
+
+    if (s->frame_type == FIRST_FRAME) {
+        double max;
+
+        max = 0.;
+        for (n = 0; n < 1920; n++) {
+            for (c = 0; c < channels; c++) {
+              max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
+            }
+            buf += channels;
+        }
+
+        if (max > ceiling) {
+            s->gain_reduction[1] = ceiling / max;
+            s->limiter_state = SUSTAIN;
+            buf = s->limiter_buf;
+
+            for (n = 0; n < 1920; n++) {
+                for (c = 0; c < channels; c++) {
+                    double env;
+                    env = s->gain_reduction[1];
+                    buf[c] *= env;
+                }
+                buf += channels;
+            }
+        }
+
+        buf = s->limiter_buf;
+    }
+
+    do {
+
+        switch(s->limiter_state) {
+        case OUT:
+            detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
+            if (peak_delta != -1) {
+                s->env_cnt = 0;
+                smp_cnt += (peak_delta - s->attack_length);
+                s->gain_reduction[0] = 1.;
+                s->gain_reduction[1] = ceiling / peak_value;
+                s->limiter_state = ATTACK;
+
+                s->env_index = s->peak_index - (s->attack_length * channels);
+                if (s->env_index < 0)
+                    s->env_index += s->limiter_buf_size;
+
+                s->env_index += (s->env_cnt * channels);
+                if (s->env_index > s->limiter_buf_size)
+                    s->env_index -= s->limiter_buf_size;
+
+            } else {
+                smp_cnt = nb_samples;
+            }
+            break;
+
+        case ATTACK:
+            for (; s->env_cnt < s->attack_length; s->env_cnt++) {
+                for (c = 0; c < channels; c++) {
+                    double env;
+                    env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
+                    buf[s->env_index + c] *= env;
+                }
+
+                s->env_index += channels;
+                if (s->env_index >= s->limiter_buf_size)
+                    s->env_index -= s->limiter_buf_size;
+
+                smp_cnt++;
+                if (smp_cnt >= nb_samples) {
+                    s->env_cnt++;
+                    break;
+                }
+            }
+
+            if (smp_cnt < nb_samples) {
+                s->env_cnt = 0;
+                s->attack_length = 1920;
+                s->limiter_state = SUSTAIN;
+            }
+            break;
+
+        case SUSTAIN:
+            detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
+            if (peak_delta == -1) {
+                s->limiter_state = RELEASE;
+                s->gain_reduction[0] = s->gain_reduction[1];
+                s->gain_reduction[1] = 1.;
+                s->env_cnt = 0;
+                break;
+            } else {
+                double gain_reduction;
+                gain_reduction = ceiling / peak_value;
+
+                if (gain_reduction < s->gain_reduction[1]) {
+                    s->limiter_state = ATTACK;
+
+                    s->attack_length = peak_delta;
+                    if (s->attack_length <= 1)
+                        s->attack_length =  2;
+
+                    s->gain_reduction[0] = s->gain_reduction[1];
+                    s->gain_reduction[1] = gain_reduction;
+                    s->env_cnt = 0;
+                    break;
+                }
+
+                for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
+                    for (c = 0; c < channels; c++) {
+                        double env;
+                        env = s->gain_reduction[1];
+                        buf[s->env_index + c] *= env;
+                    }
+
+                    s->env_index += channels;
+                    if (s->env_index >= s->limiter_buf_size)
+                        s->env_index -= s->limiter_buf_size;
+
+                    smp_cnt++;
+                    if (smp_cnt >= nb_samples) {
+                        s->env_cnt++;
+                        break;
+                    }
+                }
+            }
+            break;
+
+        case RELEASE:
+            for (; s->env_cnt < s->release_length; s->env_cnt++) {
+                for (c = 0; c < channels; c++) {
+                    double env;
+                    env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
+                    buf[s->env_index + c] *= env;
+                }
+
+                s->env_index += channels;
+                if (s->env_index >= s->limiter_buf_size)
+                    s->env_index -= s->limiter_buf_size;
+
+                smp_cnt++;
+                if (smp_cnt >= nb_samples) {
+                    s->env_cnt++;
+                    break;
+                }
+            }
+
+            if (smp_cnt < nb_samples) {
+                s->env_cnt = 0;
+                s->limiter_state = OUT;
+            }
+
+            break;
+        }
+
+    } while (smp_cnt < nb_samples);
+
+    for (n = 0; n < nb_samples; n++) {
+        for (c = 0; c < channels; c++) {
+            out[c] = buf[index + c];
+            if (fabs(out[c]) > ceiling) {
+                out[c] = ceiling * (out[c] < 0 ? -1 : 1);
+            }
+        }
+        out += channels;
+        index += channels;
+        if (index >= s->limiter_buf_size)
+            index -= s->limiter_buf_size;
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LoudNormContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    const double *src;
+    double *dst;
+    double *buf;
+    double *limiter_buf;
+    int i, n, c, subframe_length, src_index;
+    double gain, gain_next, env_global, env_shortterm,
+    global, shortterm, lra, relative_threshold;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    out->pts = s->pts;
+    src = (const double *)in->data[0];
+    dst = (double *)out->data[0];
+    buf = s->buf;
+    limiter_buf = s->limiter_buf;
+
+    ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
+
+    if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
+        double offset, offset_tp, true_peak;
+
+        ebur128_loudness_global(s->r128_in, &global);
+        for (c = 0; c < inlink->channels; c++) {
+            double tmp;
+            ebur128_true_peak(s->r128_in, c, &tmp);
+            if (c == 0 || tmp > true_peak)
+                true_peak = tmp;
+        }
+
+        offset    = s->target_i - global;
+        offset_tp = true_peak + offset;
+        s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
+        s->offset = pow(10., s->offset / 20.);
+        s->frame_type = LINEAR_MODE;
+    }
+
+    switch (s->frame_type) {
+    case FIRST_FRAME:
+        for (n = 0; n < in->nb_samples; n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                buf[s->buf_index + c] = src[c];
+            }
+            src += inlink->channels;
+            s->buf_index += inlink->channels;
+        }
+
+        ebur128_loudness_shortterm(s->r128_in, &shortterm);
+
+        if (shortterm < s->measured_thresh) {
+            s->above_threshold = 0;
+				    env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
+        } else {
+            s->above_threshold = 1;
+				    env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
+        }
+
+        for (n = 0; n < 30; n++)
+            s->delta[n] = pow(10., env_shortterm / 20.);
+        s->prev_delta = s->delta[s->index];
+
+				s->buf_index =
+        s->limiter_buf_index = 0;
+
+        for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
+            }
+            s->limiter_buf_index += inlink->channels;
+            if (s->limiter_buf_index >= s->limiter_buf_size)
+                s->limiter_buf_index -= s->limiter_buf_size;
+
+            s->buf_index += inlink->channels;
+        }
+
+        subframe_length = frame_size(inlink->sample_rate, 100);
+        true_peak_limiter(s, dst, subframe_length, inlink->channels);
+        ebur128_add_frames_double(s->r128_out, dst, subframe_length);
+
+        s->pts +=
+        out->nb_samples =
+        inlink->min_samples =
+        inlink->max_samples =
+        inlink->partial_buf_size = subframe_length;
+
+        s->frame_type = INNER_FRAME;
+        break;
+
+    case INNER_FRAME:
+        gain      = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
+        gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
+
+        for (n = 0; n < in->nb_samples; n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                buf[s->prev_buf_index + c] = src[c];
+                limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
+            }
+            src += inlink->channels;
+
+            s->limiter_buf_index += inlink->channels;
+            if (s->limiter_buf_index >= s->limiter_buf_size)
+                s->limiter_buf_index -= s->limiter_buf_size;
+
+            s->prev_buf_index += inlink->channels;
+            if (s->prev_buf_index >= s->buf_size)
+                s->prev_buf_index -= s->buf_size;
+
+            s->buf_index += inlink->channels;
+            if (s->buf_index >= s->buf_size)
+                s->buf_index -= s->buf_size;
+        }
+
+        subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
+        s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
+
+        true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
+        ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+
+        ebur128_loudness_range(s->r128_in, &lra);
+        ebur128_loudness_global(s->r128_in, &global);
+        ebur128_loudness_shortterm(s->r128_in, &shortterm);
+        ebur128_relative_threshold(s->r128_in, &relative_threshold);
+
+        if (s->above_threshold == 0) {
+            double shortterm_out;
+
+            if (shortterm > s->measured_thresh)
+                s->prev_delta *= 1.0058;
+
+            ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
+            if (shortterm_out >= s->target_i)
+                s->above_threshold = 1;
+        }
+
+        if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
+            s->delta[s->index] = s->prev_delta;
+        } else {
+            env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
+            env_shortterm = s->target_i - shortterm;
+            s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
+        }
+
+        s->prev_delta = s->delta[s->index];
+        s->index++;
+        if (s->index >= 30)
+            s->index -= 30;
+        s->prev_nb_samples = in->nb_samples;
+        s->pts += in->nb_samples;
+        break;
+
+    case FINAL_FRAME:
+        gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
+        s->limiter_buf_index = 0;
+        src_index = 0;
+
+        for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
+            }
+            src_index += inlink->channels;
+
+            s->limiter_buf_index += inlink->channels;
+            if (s->limiter_buf_index >= s->limiter_buf_size)
+                s->limiter_buf_index -= s->limiter_buf_size;
+        }
+
+        subframe_length = frame_size(inlink->sample_rate, 100);
+        for (i = 0; i < in->nb_samples / subframe_length; i++) {
+            true_peak_limiter(s, dst, subframe_length, inlink->channels);
+
+            for (n = 0; n < subframe_length; n++) {
+                for (c = 0; c < inlink->channels; c++) {
+                    if (src_index < (in->nb_samples * inlink->channels)) {
+                        limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
+                    } else {
+                        limiter_buf[s->limiter_buf_index + c] = 0.;
+                    }
+                }
+
+                if (src_index < (in->nb_samples * inlink->channels))
+                    src_index += inlink->channels;
+
+                s->limiter_buf_index += inlink->channels;
+                if (s->limiter_buf_index >= s->limiter_buf_size)
+                    s->limiter_buf_index -= s->limiter_buf_size;
+            }
+
+            dst += (subframe_length * inlink->channels);
+        }
+
+        dst = (double *)out->data[0];
+        ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+        break;
+
+    case LINEAR_MODE:
+        for (n = 0; n < in->nb_samples; n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                dst[c] = src[c] * s->offset;
+            }
+            src += inlink->channels;
+            dst += inlink->channels;
+        }
+
+        dst = (double *)out->data[0];
+        ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
+        s->pts += in->nb_samples;
+        break;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    int ret;
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    LoudNormContext *s = ctx->priv;
+
+    ret = ff_request_frame(inlink);
+    if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
+        double *src;
+        double *buf;
+        int nb_samples, n, c, offset;
+        AVFrame *frame;
+
+        nb_samples  = (s->buf_size / inlink->channels) - s->prev_nb_samples;
+        nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
+
+        frame = ff_get_audio_buffer(outlink, nb_samples);
+        if (!frame)
+            return AVERROR(ENOMEM);
+        frame->nb_samples = nb_samples;
+
+        buf = s->buf;
+        src = (double *)frame->data[0];
+
+        offset  = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
+        offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
+        s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
+
+        for (n = 0; n < nb_samples; n++) {
+            for (c = 0; c < inlink->channels; c++) {
+                src[c] = buf[s->buf_index + c];
+            }
+            src += inlink->channels;
+            s->buf_index += inlink->channels;
+            if (s->buf_index >= s->buf_size)
+                s->buf_index -= s->buf_size;
+        }
+
+        s->frame_type = FINAL_FRAME;
+        ret = filter_frame(inlink, frame);
+    }
+    return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const int input_srate[] = {192000, -1};
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+		formats = ff_make_format_list(input_srate);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_formats_ref(formats, &inlink->out_samplerates);
+    if (ret < 0)
+        return ret;
+    ret = ff_formats_ref(formats, &outlink->in_samplerates);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    LoudNormContext *s = ctx->priv;
+
+    s->r128_in = av_malloc((size_t) sizeof(ebur128_state*));
+    if (!s->r128_in)
+        return AVERROR(ENOMEM);
+    s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_TRUE_PEAK);
+
+    s->r128_out = av_malloc((size_t) sizeof(ebur128_state*));
+    if (!s->r128_out)
+        return AVERROR(ENOMEM);
+    s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_TRUE_PEAK);
+
+    s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
+    s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
+    if (!s->buf)
+        return AVERROR(ENOMEM);
+
+    s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
+    s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
+    if (!s->limiter_buf)
+        return AVERROR(ENOMEM);
+
+    s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
+    if (!s->prev_smp)
+      return AVERROR(ENOMEM);
+
+    init_gaussian_filter(s);
+
+    s->frame_type = FIRST_FRAME;
+
+    if (s->linear) {
+        double offset, offset_tp;
+        offset    = s->target_i - s->measured_i;
+        offset_tp = s->measured_tp + offset;
+
+        if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
+            if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
+                s->frame_type = LINEAR_MODE;
+                s->offset = offset;
+            }
+        }
+    }
+
+    if (s->frame_type != LINEAR_MODE) {
+        inlink->min_samples =
+        inlink->max_samples =
+        inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
+    }
+
+    s->pts =
+    s->buf_index =
+    s->prev_buf_index =
+    s->limiter_buf_index = 0;
+    s->index = 1;
+    s->limiter_state = OUT;
+    s->offset = pow(10., s->offset / 20.);
+    s->target_tp = pow(10., s->target_tp / 20.);
+    s->attack_length = frame_size(inlink->sample_rate, 10);
+    s->release_length = frame_size(inlink->sample_rate, 100);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    LoudNormContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
+    int c;
+
+    ebur128_loudness_range(s->r128_in, &lra_in);
+    ebur128_loudness_global(s->r128_in, &i_in);
+    ebur128_relative_threshold(s->r128_in, &thresh_in);
+    for (c = 0; c < outlink->channels; c++) {
+        double tmp;
+        ebur128_true_peak(s->r128_in, c, &tmp);
+        if ((c == 0) || (tmp > tp_in))
+            tp_in = tmp;
+    }
+
+    ebur128_loudness_range(s->r128_out, &lra_out);
+    ebur128_loudness_global(s->r128_out, &i_out);
+    ebur128_relative_threshold(s->r128_out, &thresh_out);
+    for (c = 0; c < outlink->channels; c++) {
+        double tmp;
+        ebur128_true_peak(s->r128_out, c, &tmp);
+        if ((c == 0) || (tmp > tp_out))
+            tp_out = tmp;
+    }
+
+    switch(s->print_format) {
+    case NONE:
+        break;
+
+    case JSON:
+        av_log(ctx, AV_LOG_INFO,
+            "\n{\n"
+            "\t\"input_i\" : \"%.2f\",\n"
+            "\t\"input_tp\" : \"%.2f\",\n"
+            "\t\"input_lra\" : \"%.2f\",\n"
+            "\t\"input_thresh\" : \"%.2f\",\n"
+            "\t\"output_i\" : \"%.2f\",\n"
+            "\t\"output_tp\" : \"%+.2f\",\n"
+            "\t\"output_lra\" : \"%.2f\",\n"
+            "\t\"output_thresh\" : \"%.2f\",\n"
+            "\t\"normalization_type\" : \"%s\",\n"
+            "\t\"target_offset\" : \"%.2f\"\n"
+            "}\n",
+            i_in,
+            20. * log10(tp_in),
+            lra_in,
+            thresh_in,
+            i_out,
+            20. * log10(tp_out),
+            lra_out,
+            thresh_out,
+            s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
+            s->target_i - i_out
+        );
+        break;
+
+    case SUMMARY:
+        av_log(ctx, AV_LOG_INFO,
+            "\n"
+            "Input Integrated:   %+6.1f LUFS\n"
+            "Input True Peak:    %+6.1f dBTP\n"
+            "Input LRA:          %6.1f LU\n"
+            "Input Threshold:    %+6.1f LUFS\n"
+            "\n"
+            "Output Integrated:  %+6.1f LUFS\n"
+            "Output True Peak:   %+6.1f dBTP\n"
+            "Output LRA:         %6.1f LU\n"
+            "Output Threshold:   %+6.1f LUFS\n"
+            "\n"
+            "Normalization Type:   %s\n"
+            "Target Offset:      %+6.1f LU\n",
+            i_in,
+            20. * log10(tp_in),
+            lra_in,
+            thresh_in,
+            i_out,
+            20. * log10(tp_out),
+            lra_out,
+            thresh_out,
+            s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
+            s->target_i - i_out
+        );
+        break;
+    }
+
+    ebur128_destroy(&s->r128_in);
+    ebur128_destroy(&s->r128_out);
+    av_freep(&s->limiter_buf);
+    av_freep(&s->prev_smp);
+    av_freep(&s->buf);
+}
+
+static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
+    {
+        .name = "default",
+        .request_frame = request_frame,
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_loudnorm = {
+    .name          = "loudnorm",
+    .description   = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
+    .priv_size     = sizeof(LoudNormContext),
+    .priv_class    = &loudnorm_class,
+    .query_formats = query_formats,
+    .uninit        = uninit,
+    .inputs        = avfilter_af_loudnorm_inputs,
+    .outputs       = avfilter_af_loudnorm_outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 5c18fd1..f815fcc 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -107,6 +107,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(HIGHPASS,       highpass,       af);
     REGISTER_FILTER(JOIN,           join,           af);
     REGISTER_FILTER(LADSPA,         ladspa,         af);
+    REGISTER_FILTER(LOUDNORM,       loudnorm,       af);
     REGISTER_FILTER(LOWPASS,        lowpass,        af);
     REGISTER_FILTER(PAN,            pan,            af);
     REGISTER_FILTER(REPLAYGAIN,     replaygain,     af);
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 94c6bdc..0dbb7e5 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,8 +30,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   6
-#define LIBAVFILTER_VERSION_MINOR  40
-#define LIBAVFILTER_VERSION_MICRO 102
+#define LIBAVFILTER_VERSION_MINOR  41
+#define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
-- 
2.5.4 (Apple Git-61)



More information about the ffmpeg-devel mailing list