[FFmpeg-devel] [PATCH][BULK][again] new multimedia filter avf_showcqt.c

Wed May 28 09:03:39 CEST 2014

On Wed, May 28, 2014 at 10:59:20AM +0700, Muhammad Faiz wrote:
> Hi,
> this filter is same as showspectrum but with constant Q transform,
> so frequency is spaced logarithmically

> diff -ruN a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> --- a/libavfilter/allfilters.c	2014-05-19 13:35:09.327627603 +0700
> +++ b/libavfilter/allfilters.c	2014-05-19 11:07:44.977770955 +0700

Would it be possible to provide a git-format-patch instead?

> @@ -233,6 +233,7 @@
>      REGISTER_FILTER(CONCAT,         concat,         avf);
>      REGISTER_FILTER(SHOWSPECTRUM,   showspectrum,   avf);
>      REGISTER_FILTER(SHOWWAVES,      showwaves,      avf);
> +    REGISTER_FILTER(SHOWCQT,        showcqt,        avf);
>  
>      /* multimedia sources */
>      REGISTER_FILTER(AMOVIE,         amovie,         avsrc);
> diff -ruN a/libavfilter/avf_showcqt.c b/libavfilter/avf_showcqt.c
> --- a/libavfilter/avf_showcqt.c	1970-01-01 07:00:00.000000000 +0700
> +++ b/libavfilter/avf_showcqt.c	2014-05-28 10:41:20.000000000 +0700
> @@ -0,0 +1,880 @@
> +/*
> + * Copyright (c) 2014 Muhammad Faiz <mfcc64 at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavcodec/avfft.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/channel_layout.h"
> +#include "libavutil/opt.h"
> +#include "avfilter.h"
> +#include "internal.h"
> +
> +#include <math.h>
> +#include <stdlib.h>
> +
> +/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm*/
> +/* start from E0 to D#10 (10 octaves) */
> +/* so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins */
> +/* match with full HD resolution */
> +
> +#define VIDEO_WIDTH 1920
> +#define VIDEO_HEIGHT 1080
> +#define FONT_HEIGHT 24
> +#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
> +#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
> +#define BASE_FREQ 20.051392800492
> +#define COEFF_CLAMP 1.0e-4
> +
> +/* image 'A BC D EF G ' from font 'Nimbus Mono L, Bold' */
> +static uint8_t abcdefg_string[] = {

Could you use avpriv_vga16_font or avpriv_cga_font instead? See in
libavfilter/f_ebur128.c for example

[...]

> +typedef struct {
> +    FFTSample value;
> +    int index;
> +} SparseCoeff;
> +
> +
> +static double window_function(double x)
> +{
> +    double a0 = 0.355768;
> +    double a1 = 0.487396/a0;
> +    double a2 = 0.144232/a0;
> +    double a3 = 0.012604/a0;
> +    double c1, c2, c3, val;
> +    if(fabs(x) >= 0.5)

nit: "if ("

Please fix this style issue in all your code, it's present is several
places.

> +        return 0.0;
> +    c1 = a1 * cos(2.0*M_PI*x);
> +    c2 = a2 * cos(4.0*M_PI*x);
> +    c3 = a3 * cos(6.0*M_PI*x);
> +    val = 1.0 + c1 + c2 + c3;
> +    return val;
> +}
> +
> +static int qsort_sparsecoeff(const void *a, const void *b)
> +{
> +    const SparseCoeff *x = (SparseCoeff*) a;
> +    const SparseCoeff *y = (SparseCoeff*) b;
> +    if(fabsf(x->value) > fabsf(y->value))
> +        return 1;
> +    if(fabsf(x->value) < fabsf(y->value))
> +        return -1;

> +    if((x->index) > (y->index))

Pointless braces

> +        return 1;
> +    return 0;
> +}
> +
> +
> +typedef struct {
> +    const AVClass *class;
> +    AVFrame *outpicref;
> +    FFTContext *fft_context;
> +    FFTComplex *fft_data;
> +    FFTComplex *fft_result_left;
> +    FFTComplex *fft_result_right;
> +    SparseCoeff *coeff_sort;
> +    SparseCoeff *coeffs[VIDEO_WIDTH];
> +    int coeffs_len[VIDEO_WIDTH];
> +    float color[VIDEO_WIDTH];
> +    float alpha[FONT_HEIGHT][VIDEO_WIDTH];
> +    uint8_t spectogram[SPECTOGRAM_HEIGHT][VIDEO_WIDTH][3];
> +    int64_t frame_count;
> +    int spectogram_count;
> +    int spectogram_index;
> +    int fft_bits;
> +    int req_fullfilled;
> +    int remaining_fill;
> +    double volume;
> +    double timeclamp;   /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/
> +    float coeffclamp;   /* lower coeffclamp, more precise, higher coeffclamp, faster */
> +    float gamma;        /* lower gamma, more contrast, higher gamma, more range */
> +    int fps;            /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */
> +    int count;          /* fps * count = transform rate */
> +} ShowCQTContext;
> +
> +#define OFFSET(x) offsetof(ShowCQTContext, x)
> +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
> +
> +static const AVOption showcqt_options[] = {
> +    { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_DOUBLE, {.dbl = 16}, 0.1, 100, FLAGS },
> +    { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, {.dbl = 0.17 }, 0.1, 1.0, FLAGS},
> +    { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, {.dbl = 1}, 0.1, 10, FLAGS},
> +    { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3}, 1, 7, FLAGS },
> +    { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS},
> +    { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS},
> +    { NULL }

stylenit: some lines ends up without a space before '}'; make it
consistent please (yes I know the issue is present in several other
places)

> +};
> +
> +AVFILTER_DEFINE_CLASS(showcqt);
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    int k;
> +    ShowCQTContext *s = ctx->priv;
> +    av_fft_end(s->fft_context);
> +    s->fft_context = NULL;
> +    for (k = 0; k < VIDEO_WIDTH; k++)
> +        av_freep(&s->coeffs[k]);
> +    av_freep(&s->fft_data);
> +    av_freep(&s->fft_result_left);
> +    av_freep(&s->fft_result_right);
> +    av_freep(&s->coeff_sort);
> +    av_frame_free(&s->outpicref);
> +}
> +
> +
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    AVFilterFormats *formats = NULL;
> +    AVFilterChannelLayouts *layouts = NULL;
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
> +    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE };
> +    static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1};
> +    static const int samplerates[] = { 44100, 48000, -1};

> +    

Trailing whitespace here and several times later in the whole file. The
patch won't be pushable. At least Git can show them.

> +    /* set input audio formats */
> +    formats = ff_make_format_list(sample_fmts);
> +    if (!formats)
> +        return AVERROR(ENOMEM);
> +    ff_formats_ref(formats, &inlink->out_formats);
> +    
> +    layouts = avfilter_make_format64_list(channel_layouts);
> +    if (!layouts)
> +        return AVERROR(ENOMEM);
> +    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
> +    
> +    formats = ff_make_format_list(samplerates);
> +    if (!formats)
> +        return AVERROR(ENOMEM);
> +    ff_formats_ref(formats, &inlink->out_samplerates);
> +    
> +    /* set output video format */
> +    formats = ff_make_format_list(pix_fmts);
> +    if (!formats)
> +        return AVERROR(ENOMEM);
> +    ff_formats_ref(formats, &outlink->in_formats);
> +    
> +    return 0;
> +}
> +
> +
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +    AVFilterLink *inlink = ctx->inputs[0];
> +    ShowCQTContext *s = ctx->priv;
> +    int fft_len, k, x, y;
> +    int num_coeffs = 0;
> +    int rate = inlink->sample_rate;
> +    double max_len = rate * (double) s->timeclamp;
> +    s->fft_bits = ceil(log2(max_len));
> +    fft_len = 1 << s->fft_bits;
> +    
> +    if((rate % (s->fps * s->count)))
> +    {

wrong style, { on the same line. same several time below

> +        av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count);
> +        return AVERROR(EINVAL);
> +    }
> +    

> +    s->fft_data = av_malloc_array(sizeof(FFTComplex), fft_len);

Arguments are swapped (semantically speaking)

Also please use sizeof(*var) form

> +    if (!s->fft_data)
> +        return AVERROR(ENOMEM);
> +    
> +    s->coeff_sort = av_malloc_array(sizeof(SparseCoeff), fft_len);
> +    if (!s->coeff_sort)
> +        return AVERROR(ENOMEM);
> +    
> +    s->fft_result_left = av_malloc_array(sizeof(FFTComplex), fft_len);
> +    if (!s->fft_result_left)
> +        return AVERROR(ENOMEM);
> +    
> +    s->fft_result_right = av_malloc_array(sizeof(FFTComplex), fft_len);
> +    if (!s->fft_result_right)
> +        return AVERROR(ENOMEM);

    s->fft_data         = av_malloc_array(fft_len, sizeof(*s->fft_data));
    s->coeff_sort       = av_malloc_array(fft_len, sizeof(*s->coeff_sort));
    s->fft_result_left  = av_malloc_array(fft_len, sizeof(*s->fft_result_left));
    s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right));
    if (!s->fft_result_left || !s->coeff_sort || !s->fft_data || !s->fft_result_right)
        return AVERROR(ENOMEM);

...is probably more compact and readable

> +    
> +    s->fft_context = av_fft_init(s->fft_bits, 0);
> +    if (!s->fft_context)
> +        return AVERROR(ENOMEM);
> +    
> +    /* initializing font */
> +    for (x = 0; x < VIDEO_WIDTH; x++)
> +    {
> +        if((x >= (12*3+8)*16) && (x < (12*4+8)*16))
> +        {
> +            float fx = (x - (12*3+8)*16) * (1.0f/192.0f);
> +            float sv = sinf(M_PI*fx);
> +            s->color[x] = sv*sv*255.0f; 
> +        }
> +        else
> +            s->color[x] = 0.0f;
> +        
> +        for (y = 0; y < FONT_HEIGHT; y++)
> +        {
> +            int u = (x + 7*16) % (12*16);
> +            s->alpha[y][x] = (1.0f/255.0f) * abcdefg_string[y*(12*16) + u];
> +        }
> +    }
> +    
> +    av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n");
> +    for (k = 0; k < VIDEO_WIDTH; k++)
> +    {
> +        int x, hlen = fft_len >> 1;
> +        float total = 0;
> +        float partial = 0;
> +        double freq = BASE_FREQ * exp2(k * (1.0/192.0));
> +        double tlen = rate * (24.0 * 16.0) /freq;
> +        tlen = tlen * max_len / (tlen + max_len);
> +        s->fft_data[0].re = 0;
> +        s->fft_data[0].im = 0;
> +        s->fft_data[hlen].re = window_function(0) * (1.0/tlen) * s->volume * (1.0/fft_len);
> +        s->fft_data[hlen].im = 0;
> +        for (x = 1; x < hlen; x++)
> +        {
> +            double sv, cv, w;
> +            sv = sin(2.0*M_PI*freq*x*(1.0/rate));
> +            cv = cos(2.0*M_PI*freq*x*(1.0/rate));

I think you might be able here to use that trick:
http://iquilezles.org/www/articles/sincos/sincos.htm

> +            w = window_function(x * (1.0/tlen)) * (1.0/tlen) * s->volume * (1.0/fft_len);
> +            s->fft_data[hlen + x].re = w * cv;
> +            s->fft_data[hlen + x].im = w * sv;
> +            s->fft_data[hlen - x].re = s->fft_data[hlen + x].re;
> +            s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im;
> +        }
> +        av_fft_permute(s->fft_context, s->fft_data);
> +        av_fft_calc(s->fft_context, s->fft_data);
> +        
> +        for (x = 0; x < fft_len; x++)
> +        {
> +            s->coeff_sort[x].index = x;
> +            s->coeff_sort[x].value = s->fft_data[x].re;
> +        }
> +        
> +        qsort(s->coeff_sort, fft_len, sizeof(SparseCoeff), qsort_sparsecoeff);
> +        for (x = 0; x < fft_len; x++)
> +            total += fabsf(s->coeff_sort[x].value);
> +        
> +        for (x = 0; x < fft_len; x++)
> +        {
> +            partial += fabsf(s->coeff_sort[x].value);
> +            if (partial > (total * s->coeffclamp * COEFF_CLAMP))
> +            {
> +                int y;
> +                s->coeffs_len[k] = fft_len - x;
> +                num_coeffs += s->coeffs_len[k];
> +                s->coeffs[k] = av_malloc_array(sizeof(SparseCoeff), s->coeffs_len[k]);
> +                if (!s->coeffs[k])
> +                    return AVERROR(ENOMEM);
> +                for (y = 0; y < (s->coeffs_len[k]); y++)
> +                {
> +                    s->coeffs[k][y] = s->coeff_sort[x+y];
> +                }
> +                break;
> +            }
> +        }
> +    }
> +    
> +    outlink->w = VIDEO_WIDTH;
> +    outlink->h = VIDEO_HEIGHT;
> +    
> +    s->req_fullfilled = 0;
> +    s->spectogram_index = 0;
> +    s->frame_count = 0;
> +    s->spectogram_count = 0;
> +    s->remaining_fill = fft_len >> 1;
> +    memset(s->spectogram, 0, VIDEO_WIDTH*SPECTOGRAM_HEIGHT);
> +    memset(s->fft_data, 0, fft_len * sizeof(FFTComplex));
> +    
> +    s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!s->outpicref)
> +        return AVERROR(ENOMEM);
> +    

> +    outlink->sample_aspect_ratio = (AVRational){1,1};
> +    outlink->time_base = (AVRational){ 1, s->fps };
> +    outlink->frame_rate = (AVRational){s->fps, 1};

You can use av_make_q() here, but it's at your own discretion

[...]
> diff -ruN a/libavfilter/Makefile b/libavfilter/Makefile
> --- a/libavfilter/Makefile	2014-05-19 13:35:09.329627630 +0700
> +++ b/libavfilter/Makefile	2014-05-19 11:06:27.353131460 +0700
> @@ -226,6 +226,7 @@
>  OBJS-$(CONFIG_CONCAT_FILTER)                 += avf_concat.o
>  OBJS-$(CONFIG_SHOWSPECTRUM_FILTER)           += avf_showspectrum.o
>  OBJS-$(CONFIG_SHOWWAVES_FILTER)              += avf_showwaves.o
> +OBJS-$(CONFIG_SHOWCQT_FILTER)                += avf_showcqt.o

'C' comes before 'W' and 'S', please keep alphabetical order when
possible.

[...]

Please also add the documentation and all the little remaining bits you
can find at the end of the doc/writing_filters.txt document
("Finalization" step).

Thank you.

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140528/fc464288/attachment.asc>