[FFmpeg-devel] [PATCH 2/2] lavfi: add audio silencedetect filter.

Clément Bœsch ubitux at gmail.com
Sat Jan 7 15:41:05 CET 2012


On Thu, Jan 05, 2012 at 08:29:35PM +0100, Nicolas George wrote:
> Le sextidi 16 nivôse, an CCXX, Clément Bœsch a écrit :
> > ---
> >  Changelog                      |    1 +
> >  doc/filters.texi               |   23 ++++++
> >  libavfilter/Makefile           |    1 +
> >  libavfilter/af_silencedetect.c |  161 ++++++++++++++++++++++++++++++++++++++++
> >  libavfilter/allfilters.c       |    1 +
> >  libavfilter/avfilter.h         |    4 +-
> >  6 files changed, 189 insertions(+), 2 deletions(-)
> >  create mode 100644 libavfilter/af_silencedetect.c
> > 
> > diff --git a/Changelog b/Changelog
> > index 8b59ac8..80da97b 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -19,6 +19,7 @@ version next:
> >  - Avid 1:1 10-bit RGB Packer decoder
> >  - v308 Quicktime Uncompressed 4:4:4 encoder and decoder
> >  - yuv4 libquicktime packed 4:2:0 encoder and decoder
> > +- silencedetect audio filter
> >  
> >  
> >  version 0.9:
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index de73e3f..5349fd8 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -358,6 +358,29 @@ Note that @command{ffmpeg} integrates a default down-mix (and up-mix) system
> >  that should be preferred (see "-ac" option) unless you have very specific
> >  needs.
> >  
> 
> > + at section silencedetect
> > +
> > +Detect silence in an audio stream.
> 
> And do what with it? A few more words may be nice.
> 

It should be better now, except the wording.

> > +
> > + at table @option
> > + at item d, duration
> > +Set silence duration until notification (default is 2 seconds).
> > +
> > + at item noise, n
> > +Set noise tolerance. Can be specified in dB or amplitude ratio. Default is
> > +-60dB, or 0.001.
> > + at end table
> > +
> 
> > +Detect 5 seconds of silence with -50dB noise tolerance:
> > + at example
> > +silencedetect=n=-50dB:d=5
> > + at end example
> > +
> > +Detect silence with 0.0001 noise tolerance:
> > + at example
> > +silencedetect=noise=0.0001
> > + at end example
> 
> An example with a complete filtergraph may be nice, especially if the use
> requires something more complex than just inserting the filter.
> 

OK, just added an example with ffmpeg.

> > +
> >  @section volume
> >  
> >  Adjust the input audio volume.
> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > index 0d8f120..bb5748b 100644
> > --- a/libavfilter/Makefile
> > +++ b/libavfilter/Makefile
> > @@ -35,6 +35,7 @@ OBJS-$(CONFIG_ASTREAMSYNC_FILTER)            += af_astreamsync.o
> >  OBJS-$(CONFIG_EARWAX_FILTER)                 += af_earwax.o
> >  OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
> >  OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
> > +OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o
> >  
> >  OBJS-$(CONFIG_ABUFFER_FILTER)                += asrc_abuffer.o
> >  OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o
> > diff --git a/libavfilter/af_silencedetect.c b/libavfilter/af_silencedetect.c
> > new file mode 100644
> > index 0000000..8d68b63
> > --- /dev/null
> > +++ b/libavfilter/af_silencedetect.c
> > @@ -0,0 +1,161 @@
> 
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> 
> The copyright line is missing.
> 

Is that really necessary? Added anyway.

> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * Audio silence detector
> > + */
> > +
> > +#include "libavutil/opt.h"
> > +#include "avfilter.h"
> > +
> > +typedef struct {
> > +    const AVClass *class;
> 
> > +    char *noise_str;
> 
> Too bad we can not let the options system parse the option instead of
> copying it.
> 

:(

> > +    double noise;
> > +    int duration;
> 
> > +    int nb_null_samples;
> 
> int64_t? If the silence if very long, it could overflow.
> 

Changed.

> > +    int silence;
> > +} SilenceDetectContext;
> > +
> > +#define OFFSET(x) offsetof(SilenceDetectContext, x)
> > +static const AVOption silencedetect_options[] = {
> > +    { "n",         "set noise tolerance",              OFFSET(noise_str), AV_OPT_TYPE_STRING, {.str="-60dB"}, CHAR_MIN, CHAR_MAX },
> > +    { "noise",     "set noise tolerance",              OFFSET(noise_str), AV_OPT_TYPE_STRING, {.str="-60dB"}, CHAR_MIN, CHAR_MAX },
> > +    { "d",         "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_INT,    {.dbl=2},    0, INT_MAX},
> > +    { "duration",  "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_INT,    {.dbl=2},    0, INT_MAX},
> > +    { NULL },
> > +};
> > +
> > +static const char *silencedetect_get_name(void *ctx)
> > +{
> > +    return "silencedetect";
> > +}
> > +
> > +static const AVClass silencedetect_class = {
> > +    .class_name = "SilenceDetectContext",
> > +    .item_name  = silencedetect_get_name,
> > +    .option     = silencedetect_options,
> > +};
> > +
> > +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> > +{
> > +    int ret;
> > +    char *tail;
> > +    SilenceDetectContext *silence = ctx->priv;
> > +
> > +    silence->class = &silencedetect_class;
> > +    av_opt_set_defaults(silence);
> > +
> > +    if ((ret = av_set_options_string(silence, args, "=", ":")) < 0) {
> > +        av_log(ctx, AV_LOG_ERROR, "Error parsing options string: '%s'\n", args);
> > +        return ret;
> > +    }
> > +
> > +    silence->noise = strtod(silence->noise_str, &tail);
> 
> > +    if (strcmp(tail, "dB") == 0)
> 
> Some people do not like == 0 around here.
> 

NOTed.

> > +        silence->noise = pow(10, silence->noise/20);
> > +
> > +    return 0;
> > +}
> > +
> > +static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
> > +{
> > +    int i;
> > +    SilenceDetectContext *silence = inlink->dst->priv;
> > +    const int nb_samples = insamples->audio->nb_samples *
> > +        av_get_channel_layout_nb_channels(insamples->audio->channel_layout);
> > +
> > +    // FIXME: sample rate change will break this
> 
> > +    int nbr_samples_notify = inlink->sample_rate * silence->duration;
> 
> It seems your code below counts samples per channel, but nbr_samples_notify
> is computed without taking the number of channels into account.
> 

Thanks, fixed.

> Oh, and nbr looks like a French abbreviation, not an English one.
> 

Mmh, ok, renamed.

> > +
> > +    switch (insamples->format) {
> > +        case AV_SAMPLE_FMT_DBL: {
> > +            double *p = (double *)insamples->data[0];
> > +            for (i = 0; i < nb_samples; i++, p++) {
> > +                if (*p < silence->noise && *p > -silence->noise) {
> 
> > +                    if (!silence->silence) {
> > +                        silence->nb_null_samples++;
> > +                        if (silence->nb_null_samples == nbr_samples_notify
> > +                            && !silence->silence) {
> 
> !silence->silence seems duplicated.

Removed the second one.

>                                     And possibly even useless: since you
> check on silence->nb_null_samples == nbr_samples_notify and not >=, and you
> increment each time, it can only be true once.
> 

Well, I'd like to avoid the unecessary increment since the filter can be
used to monitor hours or days of audio, and I'm afraid of overflows in
that case.

> > +                            av_log(silence, AV_LOG_INFO, "Silence detected around %f sec\n",
> > +                                   insamples->pts * av_q2d(inlink->time_base));
> 
> I believe insamples->pts can be NOPTS?
> 

If so, there are a few other filters to fix; at least ashowinfo. Are you
sure the pts are not set at this point?

> > +                            silence->silence = 1;
> > +                        }
> > +                    }
> > +                } else {
> 
> > +                    if (silence->silence && silence->nb_null_samples)
> 
> Here again, the test could be made simpler: if silence->nb_null_samples >=
> nbr_samples_notify.
> 

Simplified differently.

> > +                        av_log(silence, AV_LOG_INFO, "Silence ended (%f sec)\n",
> > +                               insamples->pts * av_q2d(inlink->time_base));
> > +                    silence->nb_null_samples = silence->silence = 0;
> > +                }
> > +            }
> > +            break;
> > +        }
> > +    }
> > +
> > +    avfilter_filter_samples(inlink->dst->outputs[0], insamples);
> > +}
> > +
> > +static int query_formats(AVFilterContext *ctx)
> > +{
> > +    AVFilterFormats *formats = NULL;
> > +    enum AVSampleFormat sample_fmts[] = {
> > +        AV_SAMPLE_FMT_DBL,
> > +        AV_SAMPLE_FMT_NONE
> > +    };
> > +    int packing_fmts[] = { AVFILTER_PACKED, -1 };
> > +
> > +    formats = avfilter_make_all_channel_layouts();
> > +    if (!formats)
> > +        return AVERROR(ENOMEM);
> > +    avfilter_set_common_channel_layouts(ctx, formats);
> > +
> > +    formats = avfilter_make_format_list(sample_fmts);
> > +    if (!formats)
> > +        return AVERROR(ENOMEM);
> > +    avfilter_set_common_sample_formats(ctx, formats);
> > +
> > +    formats = avfilter_make_format_list(packing_fmts);
> > +    if (!formats)
> > +        return AVERROR(ENOMEM);
> > +    avfilter_set_common_packing_formats(ctx, formats);
> > +
> > +    return 0;
> > +}
> > +
> > +AVFilter avfilter_af_silencedetect = {
> > +    .name          = "silencedetect",
> > +    .description   = NULL_IF_CONFIG_SMALL("Detect silence."),
> > +    .priv_size     = sizeof(SilenceDetectContext),
> > +    .init          = init,
> > +    .query_formats = query_formats,
> > +
> > +    .inputs = (const AVFilterPad[]) {
> > +        { .name             = "default",
> > +          .type             = AVMEDIA_TYPE_AUDIO,
> > +          .get_audio_buffer = avfilter_null_get_audio_buffer,
> > +          .filter_samples   = filter_samples, },
> 
> > +        { .name = NULL}
> 
> Is it on purpose that most filters lack the space between the NULL and the
> closing brace?
> 

Heh, I don't remember from where I copy-pasted it :)

> > +    },
> > +    .outputs = (const AVFilterPad[]) {
> > +        { .name = "default",
> > +          .type = AVMEDIA_TYPE_AUDIO, },
> > +        { .name = NULL}
> > +    },
> 
> Did you consider making it a sink rather than a filter?
> 

blackframe is not a sink filter IIRC, so at least for consistency I'd like
to keep it this way. Also, playing the stream in and see the notifications
in almost real time might be interesting.

[...]

Also changed from last version:
 - doxycommented the context struct
 - warn when sample rate change
 - more accurate and helpful notifications

-- 
Clément B.
-------------- next part --------------
From 2e7c6e75d225818b0eacf29f5599dfd308804406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= <ubitux at gmail.com>
Date: Tue, 3 Jan 2012 23:47:09 +0100
Subject: [PATCH] lavfi: add audio silencedetect filter.

---
 Changelog                      |    1 +
 doc/filters.texi               |   24 ++++++
 libavfilter/Makefile           |    1 +
 libavfilter/af_silencedetect.c |  171 ++++++++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c       |    1 +
 libavfilter/avfilter.h         |    4 +-
 6 files changed, 200 insertions(+), 2 deletions(-)
 create mode 100644 libavfilter/af_silencedetect.c

diff --git a/Changelog b/Changelog
index db4c7b3..d4ec698 100644
--- a/Changelog
+++ b/Changelog
@@ -19,6 +19,7 @@ version next:
 - Avid 1:1 10-bit RGB Packer decoder
 - v308 Quicktime Uncompressed 4:4:4 encoder and decoder
 - yuv4 libquicktime packed 4:2:0 encoder and decoder
+- silencedetect audio filter
 
 
 version 0.9:
diff --git a/doc/filters.texi b/doc/filters.texi
index de73e3f..3bc117c 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -358,6 +358,30 @@ Note that @command{ffmpeg} integrates a default down-mix (and up-mix) system
 that should be preferred (see "-ac" option) unless you have very specific
 needs.
 
+ at section silencedetect
+
+Print a notification line when silence is detected, or when sound is back in
+the given audio stream.
+
+ at table @option
+ at item d, duration
+Set silence duration until notification (default is 2 seconds).
+
+ at item noise, n
+Set noise tolerance. Can be specified in dB or amplitude ratio. Default is
+-60dB, or 0.001.
+ at end table
+
+Detect 5 seconds of silence with -50dB noise tolerance:
+ at example
+silencedetect=n=-50dB:d=5
+ at end example
+
+Complete example with ffmpeg to detect silence with 0.0001 noise tolerance:
+ at example
+ffmpeg -f lavfi -i amovie=silence.mp3,silencedetect=noise=0.0001 -f null -
+ at end example
+
 @section volume
 
 Adjust the input audio volume.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0d8f120..bb5748b 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -35,6 +35,7 @@ OBJS-$(CONFIG_ASTREAMSYNC_FILTER)            += af_astreamsync.o
 OBJS-$(CONFIG_EARWAX_FILTER)                 += af_earwax.o
 OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
+OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o
 
 OBJS-$(CONFIG_ABUFFER_FILTER)                += asrc_abuffer.o
 OBJS-$(CONFIG_AEVALSRC_FILTER)               += asrc_aevalsrc.o
diff --git a/libavfilter/af_silencedetect.c b/libavfilter/af_silencedetect.c
new file mode 100644
index 0000000..fe798dc
--- /dev/null
+++ b/libavfilter/af_silencedetect.c
@@ -0,0 +1,171 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2011 Clément Bœsch <ubitux at gmail.com>
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Audio silence detector
+ */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+
+typedef struct {
+    const AVClass *class;
+    char *noise_str;            ///< noise option string
+    double noise;               ///< noise amplitude ratio
+    int duration;               ///< minimum duration to monitor in seconds
+    int64_t nb_null_samples;    ///< current number of continuous zero samples
+    double start;               ///< if silence is detected, this value contains the time of the first zero sample
+    int last_sample_rate;       ///< last sample rate to check for sample rate changes
+} SilenceDetectContext;
+
+#define OFFSET(x) offsetof(SilenceDetectContext, x)
+static const AVOption silencedetect_options[] = {
+    { "n",         "set noise tolerance",              OFFSET(noise_str), AV_OPT_TYPE_STRING, {.str="-60dB"}, CHAR_MIN, CHAR_MAX },
+    { "noise",     "set noise tolerance",              OFFSET(noise_str), AV_OPT_TYPE_STRING, {.str="-60dB"}, CHAR_MIN, CHAR_MAX },
+    { "d",         "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_INT,    {.dbl=2},    0, INT_MAX},
+    { "duration",  "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_INT,    {.dbl=2},    0, INT_MAX},
+    { NULL },
+};
+
+static const char *silencedetect_get_name(void *ctx)
+{
+    return "silencedetect";
+}
+
+static const AVClass silencedetect_class = {
+    .class_name = "SilenceDetectContext",
+    .item_name  = silencedetect_get_name,
+    .option     = silencedetect_options,
+};
+
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
+{
+    int ret;
+    char *tail;
+    SilenceDetectContext *silence = ctx->priv;
+
+    silence->class = &silencedetect_class;
+    av_opt_set_defaults(silence);
+
+    if ((ret = av_set_options_string(silence, args, "=", ":")) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error parsing options string: '%s'\n", args);
+        return ret;
+    }
+
+    silence->noise = strtod(silence->noise_str, &tail);
+    if (!strcmp(tail, "dB"))
+        silence->noise = pow(10, silence->noise/20);
+
+    return 0;
+}
+
+static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
+{
+    int i;
+    SilenceDetectContext *silence = inlink->dst->priv;
+    const int     nb_channels       = av_get_channel_layout_nb_channels(inlink->channel_layout);
+    const int     nb_samples        = insamples->audio->nb_samples            * nb_channels;
+    const int64_t nb_samples_notify = inlink->sample_rate * silence->duration * nb_channels;
+
+    if (silence->last_sample_rate &&
+        silence->last_sample_rate != inlink->sample_rate) {
+        av_log(silence, AV_LOG_WARNING,
+               "Sample rate changed, detection might not be accurate\n");
+        silence->start = silence->nb_null_samples = 0;
+    }
+    silence->last_sample_rate = inlink->sample_rate;
+
+    // TODO: support more sample formats
+    if (insamples->format == AV_SAMPLE_FMT_DBL) {
+        double *p = (double *)insamples->data[0];
+
+        for (i = 0; i < nb_samples; i++, p++) {
+            if (*p < silence->noise && *p > -silence->noise) {
+                if (!silence->start) {
+                    silence->nb_null_samples++;
+                    if (silence->nb_null_samples == nb_samples_notify) {
+                        silence->start = insamples->pts * av_q2d(inlink->time_base) - silence->duration;
+                        av_log(silence, AV_LOG_INFO,
+                               "Silence detected at %f sec\n", silence->start);
+                    }
+                }
+            } else {
+                if (silence->start) {
+                    double end = insamples->pts * av_q2d(inlink->time_base);
+                    av_log(silence, AV_LOG_INFO,
+                           "Silence ended at %f sec (duration: %f sec)\n",
+                           end, end - silence->start);
+                }
+                silence->nb_null_samples = silence->start = 0;
+            }
+        }
+    }
+
+    avfilter_filter_samples(inlink->dst->outputs[0], insamples);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int packing_fmts[] = { AVFILTER_PACKED, -1 };
+
+    formats = avfilter_make_all_channel_layouts();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    avfilter_set_common_channel_layouts(ctx, formats);
+
+    formats = avfilter_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    avfilter_set_common_sample_formats(ctx, formats);
+
+    formats = avfilter_make_format_list(packing_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    avfilter_set_common_packing_formats(ctx, formats);
+
+    return 0;
+}
+
+AVFilter avfilter_af_silencedetect = {
+    .name          = "silencedetect",
+    .description   = NULL_IF_CONFIG_SMALL("Detect silence."),
+    .priv_size     = sizeof(SilenceDetectContext),
+    .init          = init,
+    .query_formats = query_formats,
+
+    .inputs = (const AVFilterPad[]) {
+        { .name             = "default",
+          .type             = AVMEDIA_TYPE_AUDIO,
+          .get_audio_buffer = avfilter_null_get_audio_buffer,
+          .filter_samples   = filter_samples, },
+        { .name = NULL }
+    },
+    .outputs = (const AVFilterPad[]) {
+        { .name = "default",
+          .type = AVMEDIA_TYPE_AUDIO, },
+        { .name = NULL }
+    },
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 621568e..a863a93 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -45,6 +45,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER (EARWAX,      earwax,      af);
     REGISTER_FILTER (PAN,         pan,         af);
     REGISTER_FILTER (VOLUME,      volume,      af);
+    REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
 
     REGISTER_FILTER (ABUFFER,     abuffer,     asrc);
     REGISTER_FILTER (AEVALSRC,    aevalsrc,    asrc);
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index d88d3ab..9c79597 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -30,8 +30,8 @@
 #include "libavcodec/avcodec.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  2
-#define LIBAVFILTER_VERSION_MINOR 57
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MINOR 58
+#define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
-- 
1.7.8.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20120107/f6b51335/attachment.asc>


More information about the ffmpeg-devel mailing list