FFmpeg: libavfilter/af_volumedetect.c Source File

00001 /*
00002  * Copyright (c) 2012 Nicolas George
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public License
00008  * as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public License
00017  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
00018  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "libavutil/audioconvert.h"
00022 #include "libavutil/avassert.h"
00023 #include "audio.h"
00024 #include "avfilter.h"
00025 #include "internal.h"
00026 
00027 typedef struct {
00033     uint64_t histogram[0x10001];
00034 } VolDetectContext;
00035 
00036 static int query_formats(AVFilterContext *ctx)
00037 {
00038     enum AVSampleFormat sample_fmts[] = {
00039         AV_SAMPLE_FMT_S16,
00040         AV_SAMPLE_FMT_S16P,
00041         AV_SAMPLE_FMT_NONE
00042     };
00043     AVFilterFormats *formats;
00044 
00045     if (!(formats = ff_make_format_list(sample_fmts)))
00046         return AVERROR(ENOMEM);
00047     ff_set_common_formats(ctx, formats);
00048 
00049     return 0;
00050 }
00051 
00052 static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples)
00053 {
00054     AVFilterContext *ctx = inlink->dst;
00055     VolDetectContext *vd = ctx->priv;
00056     int64_t layout  = samples->audio->channel_layout;
00057     int nb_samples  = samples->audio->nb_samples;
00058     int nb_channels = av_get_channel_layout_nb_channels(layout);
00059     int nb_planes   = nb_planes;
00060     int plane, i;
00061     int16_t *pcm;
00062 
00063     if (!av_sample_fmt_is_planar(samples->format)) {
00064         nb_samples *= nb_channels;
00065         nb_planes = 1;
00066     }
00067     for (plane = 0; plane < nb_planes; plane++) {
00068         pcm = (int16_t *)samples->extended_data[plane];
00069         for (i = 0; i < nb_samples; i++)
00070             vd->histogram[pcm[i] + 0x8000]++;
00071     }
00072 
00073     return ff_filter_samples(inlink->dst->outputs[0], samples);
00074 }
00075 
00076 #define MAX_DB 91
00077 
00078 static inline double logdb(uint64_t v)
00079 {
00080     double d = v / (double)(0x8000 * 0x8000);
00081     if (!v)
00082         return MAX_DB;
00083     return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
00084 }
00085 
00086 static void print_stats(AVFilterContext *ctx)
00087 {
00088     VolDetectContext *vd = ctx->priv;
00089     int i, max_volume, shift;
00090     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
00091     uint64_t histdb[MAX_DB + 1] = { 0 };
00092 
00093     for (i = 0; i < 0x10000; i++)
00094         nb_samples += vd->histogram[i];
00095     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
00096     if (!nb_samples)
00097         return;
00098 
00099     /* If nb_samples > 1<<34, there is a risk of overflow in the
00100        multiplication or the sum: shift all histogram values to avoid that.
00101        The total number of samples must be recomputed to avoid rounding
00102        errors. */
00103     shift = av_log2(nb_samples >> 33);
00104     for (i = 0; i < 0x10000; i++) {
00105         nb_samples_shift += vd->histogram[i] >> shift;
00106         power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
00107     }
00108     if (!nb_samples_shift)
00109         return;
00110     power = (power + nb_samples_shift / 2) / nb_samples_shift;
00111     av_assert0(power <= 0x8000 * 0x8000);
00112     av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
00113 
00114     max_volume = 0x8000;
00115     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
00116                              !vd->histogram[0x8000 - max_volume])
00117         max_volume--;
00118     av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
00119 
00120     for (i = 0; i < 0x10000; i++)
00121         histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
00122     for (i = 0; i <= MAX_DB && !histdb[i]; i++);
00123     for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
00124         av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
00125         sum += histdb[i];
00126     }
00127 }
00128 
00129 static int request_frame(AVFilterLink *outlink)
00130 {
00131     AVFilterContext *ctx = outlink->src;
00132     int ret = ff_request_frame(ctx->inputs[0]);
00133     if (ret == AVERROR_EOF)
00134         print_stats(ctx);
00135     return ret;
00136 }
00137 
00138 AVFilter avfilter_af_volumedetect = {
00139     .name          = "volumedetect",
00140     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
00141 
00142     .priv_size     = sizeof(VolDetectContext),
00143     .query_formats = query_formats,
00144 
00145     .inputs    = (const AVFilterPad[]) {
00146         { .name             = "default",
00147           .type             = AVMEDIA_TYPE_AUDIO,
00148           .get_audio_buffer = ff_null_get_audio_buffer,
00149           .filter_samples   = filter_samples,
00150           .min_perms        = AV_PERM_READ, },
00151         { .name = NULL }
00152     },
00153     .outputs   = (const AVFilterPad[]) {
00154         { .name = "default",
00155           .type = AVMEDIA_TYPE_AUDIO,
00156           .request_frame = request_frame, },
00157         { .name = NULL }
00158     },
00159 };