[FFmpeg-devel] [PATCH 2/3] avfilter/af_volumedetect.c: Add 32bit float audio support

Tue Jun 18 12:16:55 EEST 2024

> On Jun 17, 2024, at 5:52 PM, Rémi Denis-Courmont <remi at remlab.net> wrote:
> 
> 
> 
> Le 17 juin 2024 13:18:11 GMT+02:00, Yigithan Yigit <yigithanyigitdevel at gmail.com <mailto:yigithanyigitdevel at gmail.com>> a écrit :
>> ---
>> libavfilter/af_volumedetect.c | 159 ++++++++++++++++++++++++++++------
>> 1 file changed, 133 insertions(+), 26 deletions(-)
>> 
>> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
>> index 327801a7f9..dbbcd037a5 100644
>> --- a/libavfilter/af_volumedetect.c
>> +++ b/libavfilter/af_volumedetect.c
>> @@ -20,27 +20,51 @@
>> 
>> #include "libavutil/channel_layout.h"
>> #include "libavutil/avassert.h"
>> +#include "libavutil/mem.h"
>> #include "audio.h"
>> #include "avfilter.h"
>> #include "internal.h"
>> 
>> +#define MAX_DB_FLT 1024
>> #define MAX_DB 91
>> +#define HISTOGRAM_SIZE 0x10000
>> +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2)
>> 
>> typedef struct VolDetectContext {
>> -    /**
>> -     * Number of samples at each PCM value.
>> -     * histogram[0x8000 + i] is the number of samples at value i.
>> -     * The extra element is there for symmetry.
>> -     */
>> -    uint64_t histogram[0x10001];
>> +    uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB
>> +    uint64_t nb_samples; ///< number of samples
>> +    double sum2;         ///< sum of the squares of the samples
>> +    double max;          ///< maximum sample value
>> +    int is_float;        ///< true if the input is in floating point
>> } VolDetectContext;
>> 
>> -static inline double logdb(uint64_t v)
>> +static inline double logdb(double v, enum AVSampleFormat sample_fmt)
>> {
>> -    double d = v / (double)(0x8000 * 0x8000);
>> -    if (!v)
>> -        return MAX_DB;
>> -    return -log10(d) * 10;
>> +    if (sample_fmt == AV_SAMPLE_FMT_FLT) {
>> +        if (!v)
>> +            return MAX_DB_FLT;
>> +        return -log10(v) * 10;
>> +    } else {
>> +        double d = v / (double)(0x8000 * 0x8000);
>> +        if (!v)
>> +            return MAX_DB;
>> +        return -log10(d) * 10;
>> +    }
>> +}
>> +
>> +static void update_float_stats(VolDetectContext *vd, float *audio_data)
>> +{
>> +    double sample;
>> +    int idx;
>> +    if(!isnormal(*audio_data))
>> +        return;
> 
> Do we really need to classify floats here? That's probably going to hurt perfs badly, and makes an otherwise very vectorisable function not so easily vectored.

You could be correct, we might consider checking NaN, Inf+/- values. Otherwise there is a risk of a crash if someone uses something like this “aelvalsrc=3.4e39”.

> 
>> +    sample = fabsf(*audio_data);
>> +    if (sample > vd->max)
>> +        vd->max = sample;
>> +    vd->sum2 += sample * sample;
>> +    idx = lrintf(floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT))) + MAX_DB_FLT;
> 
> You're recomputing the same value again, and you seem to be rounding twice in a row?

I missed, fixed.

> 
>> +    vd->histogram[idx]++;
>> +    vd->nb_samples++;
>> }
>> 
>> static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
>> @@ -51,18 +75,41 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
>>    int nb_channels = samples->ch_layout.nb_channels;
>>    int nb_planes   = nb_channels;
>>    int plane, i;
>> -    int16_t *pcm;
>> +    int planar = 0;
>> 
>> -    if (!av_sample_fmt_is_planar(samples->format)) {
>> -        nb_samples *= nb_channels;
>> +    planar = av_sample_fmt_is_planar(samples->format);
>> +    if (!planar)
>>        nb_planes = 1;
>> +    if (vd->is_float) {
>> +        float *audio_data;
>> +        for (plane = 0; plane < nb_planes; plane++) {
>> +            audio_data = (float *)samples->extended_data[plane];
>> +            for (i = 0; i < nb_samples; i++) {
>> +                if (planar) {
>> +                    update_float_stats(vd, &audio_data[i]);
>> +                } else {
>> +                    for (int j = 0; j < nb_channels; j++)
>> +                        update_float_stats(vd, &audio_data[i * nb_channels + j]);
>> +                }
>> +            }
>> +        }
>> +    } else {
>> +        int16_t *pcm;
>> +        for (plane = 0; plane < nb_planes; plane++) {
>> +            pcm = (int16_t *)samples->extended_data[plane];
>> +            for (i = 0; i < nb_samples; i++) {
>> +                if (planar) {
>> +                    vd->histogram[pcm[i] + 0x8000]++;
>> +                    vd->nb_samples++;
>> +                } else {
>> +                    for (int j = 0; j < nb_channels; j++) {
>> +                        vd->histogram[pcm[i * nb_channels + j] + 0x8000]++;
>> +                        vd->nb_samples++;
>> +                    }
>> +                }
>> +            }
>> +        }
> 
> Can't we pick the correct implementation (planar/packed and float/int) once and for all whilst configuring the filter?

Actually sounds good, I am going to try.
Thanks for the feedback!

> 
>>    }
>> -    for (plane = 0; plane < nb_planes; plane++) {
>> -        pcm = (int16_t *)samples->extended_data[plane];
>> -        for (i = 0; i < nb_samples; i++)
>> -            vd->histogram[pcm[i] + 0x8000]++;
>> -    }
>> -
>>    return ff_filter_frame(inlink->dst->outputs[0], samples);
>> }
>> 
>> @@ -73,6 +120,20 @@ static void print_stats(AVFilterContext *ctx)
>>    uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
>>    uint64_t histdb[MAX_DB + 1] = { 0 };
>> 
>> +    if (!vd->nb_samples)
>> +        return;
>> +    if (vd->is_float) {
>> +        av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples);
>> +        av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 / vd->nb_samples, AV_SAMPLE_FMT_FLT));
>> +        av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT));
>> +        for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++);
>> +        for (; i >= 0 && sum < vd->nb_samples / 1000; i++) {
>> +            if (!vd->histogram[i])
>> +                continue;
>> +            av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]);
>> +            sum += vd->histogram[i];
>> +        }
>> +    } else {
>>    for (i = 0; i < 0x10000; i++)
>>        nb_samples += vd->histogram[i];
>>    av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
>> @@ -92,26 +153,61 @@ static void print_stats(AVFilterContext *ctx)
>>        return;
>>    power = (power + nb_samples_shift / 2) / nb_samples_shift;
>>    av_assert0(power <= 0x8000 * 0x8000);
>> -    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
>> +    av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16));
>> 
>>    max_volume = 0x8000;
>>    while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
>>                             !vd->histogram[0x8000 - max_volume])
>>        max_volume--;
>> -    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
>> +    av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16));
>> 
>>    for (i = 0; i < 0x10000; i++)
>> -        histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
>> +        histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i];
>>    for (i = 0; i <= MAX_DB && !histdb[i]; i++);
>>    for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
>> -        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
>> +        av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, histdb[i]);
>>        sum += histdb[i];
>>    }
>> +    }
>> +}
>> +
>> +static int config_output(AVFilterLink *outlink)
>> +{
>> +    AVFilterContext *ctx = outlink->src;
>> +    VolDetectContext *vd = ctx->priv;
>> +    size_t histogram_size;
>> +
>> +    vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT ||
>> +                   outlink->format == AV_SAMPLE_FMT_FLTP;
>> +
>> +    if (!vd->is_float) {
>> +        /*
>> +        * Number of samples at each PCM value.
>> +        * Only used for integer formats.
>> +        * For 16 bit signed PCM there are 65536.
>> +        * histogram[0x8000 + i] is the number of samples at value i.
>> +        * The extra element is there for symmetry.
>> +        */
>> +        histogram_size = HISTOGRAM_SIZE + 1;
>> +    } else {
>> +        /*
>> +        * The histogram is used to store the number of samples at each dB
>> +        * instead of the number of samples at each PCM value.
>> +        */
>> +        histogram_size = HISTOGRAM_SIZE_FLT + 1;
>> +    }
>> +    vd->histogram = av_calloc(histogram_size, sizeof(uint64_t));
>> +    if (!vd->histogram)
>> +        return AVERROR(ENOMEM);
>> +    return 0;
>> }
>> 
>> static av_cold void uninit(AVFilterContext *ctx)
>> {
>> +    VolDetectContext *vd = ctx->priv;
>>    print_stats(ctx);
>> +    if (vd->histogram)
>> +        av_freep(&vd->histogram);
>> }
>> 
>> static const AVFilterPad volumedetect_inputs[] = {
>> @@ -122,6 +218,14 @@ static const AVFilterPad volumedetect_inputs[] = {
>>    },
>> };
>> 
>> +static const AVFilterPad volumedetect_outputs[] = {
>> +    {
>> +        .name         = "default",
>> +        .type         = AVMEDIA_TYPE_AUDIO,
>> +        .config_props = config_output,
>> +    },
>> +};
>> +
>> const AVFilter ff_af_volumedetect = {
>>    .name          = "volumedetect",
>>    .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
>> @@ -129,6 +233,9 @@ const AVFilter ff_af_volumedetect = {
>>    .uninit        = uninit,
>>    .flags         = AVFILTER_FLAG_METADATA_ONLY,
>>    FILTER_INPUTS(volumedetect_inputs),
>> -    FILTER_OUTPUTS(ff_audio_default_filterpad),
>> -    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P),
>> +    FILTER_OUTPUTS(volumedetect_outputs),
>> +    FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16,
>> +                      AV_SAMPLE_FMT_S16P,
>> +                      AV_SAMPLE_FMT_FLT,
>> +                      AV_SAMPLE_FMT_FLTP),
>> };
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org <mailto:ffmpeg-devel at ffmpeg.org>
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org <mailto:ffmpeg-devel-request at ffmpeg.org> with subject "unsubscribe".