[FFmpeg-soc] [PATCH] Audio visualization initial draft (incomplete) and question on callbacks

Fri Aug 6 13:47:11 CEST 2010

On date Friday 2010-08-06 02:57:55 -0700, S.N. Hemanth Meenakshisundaram encoded:
> 
> This filter will need the value of delay (calculated in ffplay based on
> how much SDL has consumed at the moment. What would be the best way to
> get this information to the filter? I was thinking of using a callback
> but am not sure if its right to call an ffplay.c function from lavfi.
> Please let me know if this is ok or if there are better ways.

You can put the callback in the opaque, then it would be a job of the
framework to correctly fill the opaque when inserting the filter.

Also ideally we should have a unique filterchain mixing audio and
video, since that requires much more framework changes than doesn't
look like a viable solution right now.

One problem that I see with this approach is that the function
callback may need to access the ffplay context, which is defined
statically in ffplay.c.

> I already get audio info through filter_samples and other info through
> init. Only changing information from video side is a problem.
> 
> Regards,
> Hemanth
> 
> ---
>  libavfilter/af_aviz.c |  235 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 235 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_aviz.c
> 
> 
> 

> diff --git a/libavfilter/af_aviz.c b/libavfilter/af_aviz.c
> new file mode 100644
> index 0000000..1dd77ca
> --- /dev/null
> +++ b/libavfilter/af_aviz.c
> @@ -0,0 +1,235 @@
> +/*
> + * copyright (c) 2010 S.N. Hemanth Meenakshisundaram <smeenaks at ucsd.edu>

> + * based on code in libavcodec/aviz.c by Fabrice Bellard

libavcodec/aviz.c, uh? There has ever been such a thing in libavcodec?

> + * and libavcodec/audioconvert.c by Michael Neidermayer
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * aviz audio filter
> + */
> +
> +#include "avfilter.h"
> +#include "libavcodec/audioconvert.h"
> +
> +typedef struct {
> +    short *sample_array;        ///< array of recent audio samples to be used for visualization
> +    int sample_array_index;     ///< index of data being used
> +    RDFTContext *rdft;          ///< DFT context for converting data into frequency domain
> +    int rdft_bits;              ///< DFT bits
> +    FFTSample *rdft_data;       ///< frequency domain data
> +    int nb_channels;            ///< number of channels of audio data
> +    int screen_width;           ///< width of screen
> +    int screen_height;          ///< height of screen
> +    int hsub, vsub;             ///< chroma subsampling values of required output frames
> +    int viz_type;               ///< visualize frequency or time domain data
> +    AVFilterBufferRef *viz;     ///< buffer that stores the visualized picture data
> +} AVizContext;
> +
> +/* callback to get delay value from app and update other properties */
> +int (*app_callback) (int *delay; int *screen_w, int *screen_h, int *viz_type);
> +
> +#define SAMPLE_ARRAY_SIZE (2*65536)
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    AVizContext *aviz = ctx->priv;
> +    int rdft_bits = 0, nb_freq = 0;
> +
> +    aviz->sample_array = av_malloc(SAMPLE_ARRAY_SIZE * sizeof(short));
> +    if (args){
> +        sscanf(args, "%d:%d:%d", &aviz->screen_width, &aviz->screen_height, &aviz->viz_type);
> +    }
> +
> +    for(rdft_bits=1; (1<<rdft_bits)<2*aviz->height; rdft_bits++)
> +        ;
> +    nb_freq= 1<<(rdft_bits-1);
> +
> +    aviz->rdft = av_rdft_init(rdft_bits, DFT_R2C);
> +    aviz->rdft_bits = rdft_bits;
> +    aviz->rdft_data = av_malloc(4*nb_freq*sizeof(*s->rdft_data));
> +
> +    /* TODO: Add error checking and configure callback function if there is going to be one */
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    AVizContext *aviz = ctx->priv;
> +    av_free(aviz->sample_array);
> +    av_rdft_end(aviz->rdft);
> +    av_free(aviz->rdft_data);
> +    if (aviz->viz)
> +        avfilter_unref_buffer(aviz->viz);
> +}
> +
> +static int input_config_props(AVFilterLink *link)
> +{
> +    AVFilterContext *ctx = link->dst;
> +    AVizContext *aviz = ctx->priv;
> +
> +    aviz->nb_channels = avcodec_channel_layout_num_channels(link->channel_layout);
> +
> +    /* We expect framework to insert appropriate resample filter when using the aviz filter */
> +    if (link->format != SAMPLE_FMT_S16) {
> +        av_log(ctx, AV_LOG_ERROR, "Input samples must be in S16 format\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static int output_config_props(AVFilterLink *link)
> +{
> +    AVFilterContext *ctx = link->dst;
> +    AVizContext *aviz = ctx->priv;
> +
> +    /**
> +     * Store chroma subsampling values so we can generate visualization frames
> +     * in the required format.
> +     */
> +    const AVPixFmtDescriptor *pix_desc = &av_pix_fmt_descriptors[link->format];
> +    aviz->hsub = pix_desc->log2_chroma_w;
> +    aviz->vsub = pix_desc->log2_chroma_h;
> +}
> +
> +static void filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
> +{
> +    AVizContext *aviz = link->dst->priv;
> +    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterBufferRefAudioProps *sample_props;
> +    short samples_nb_changed = 0;
> +    int size, len, channels;
> +    uint8_t *audio_data = samplesref->data[0];
> +    AVFILTER_GET_BUFREF_AUDIO_PROPS(sample_props, samplesref);
> +
> +    /* We update this since this frame may have a different number of channels */
> +    aviz->channels = avcodec_channel_layout_num_channels(samplesref->channel_layout);
> +
> +    size = sample_props->size / sizeof(short);
> +    while (size > 0) {
> +        len = SAMPLE_ARRAY_SIZE - aviz->sample_array_index;
> +        if (len > size)
> +            len = size;
> +        /* We definitely need a copy of data since we keep old audio data around for visualization */
> +        memcpy(aviz->sample_array + aviz->sample_array_index, audio_data, len * sizeof(short));
> +        audio_data += len;
> +        is->sample_array_index += len;
> +        if (is->sample_array_index >= SAMPLE_ARRAY_SIZE)
> +            is->sample_array_index = 0;
> +        size -= len;
> +    }
> +    avfilter_unref_buffer(samplesref);
> +}

> +#define SET_PIXEL(pic_ref, yuv_color, x, y, hsub, vsub) { \
> +    luma_pos    = ((x)          ) + ((y)          ) * picref->linesize[0]; \
> +    chroma_pos1 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[1]; \
> +    chroma_pos2 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[2]; \
> +    picref->data[0][luma_pos   ] = (yuv_color[3] * yuv_color[0] + (255 - yuv_color[3]) * picref->data[0][luma_pos   ]) >> 8; \
> +    picref->data[1][chroma_pos1] = (yuv_color[3] * yuv_color[1] + (255 - yuv_color[3]) * picref->data[1][chroma_pos1]) >> 8; \
> +    picref->data[2][chroma_pos2] = (yuv_color[3] * yuv_color[2] + (255 - yuv_color[3]) * picref->data[2][chroma_pos2]) >> 8; \
> +}
> +
> +static inline void fillrect(AVFilterBufferRef *picref, unsigned int x, unsigned int y,
> +                            unsigned int width, unsigned int height,
> +                            unsigned char yuv_color[4], int hsub, int vsub)
> +{
> +    int i, plane;
> +    uint8_t *p;
> +
> +    if (yuv_color[3] != 0xFF) {
> +        unsigned int j, luma_pos, chroma_pos1, chroma_pos2;
> +
> +        for (j = 0; j < height; j++)
> +            for (i = 0; i < width; i++)
> +                SET_PIXEL(pic_ref, yuv_color, (i+x), (y+j), hsub, vsub);
> +
> +    } else {
> +        for (plane = 0; plane < 3 && pic_ref->data[plane]; plane++) {
> +            int hsub1 = plane == 1 || plane == 2 ? hsub : 0;
> +            int vsub1 = plane == 1 || plane == 2 ? vsub : 0;
> +
> +            p = pic_ref->data[plane] + (y >> vsub1) * pic_ref->linesize[plane] + (x >> hsub1);
> +            for (i = 0; i < (height >> vsub1); i++) {
> +                memset(p, yuv_color[plane], (width >> hsub1));
> +                p += pic_ref->linesize[plane];
> +            }
> +        }
> +    }
> +}

These are shared with drawtext, ideally they should be moved to a
common file, not an high priority though.

> +
> +/* Both these will use the code from ffplay or any other method and the util functions above from
> + * vf_drawtext. The visualization frame will thus be in YUV format. */
> +
> +AVFilterBufferRef *time_domain_visualize()
> +{
> +}
> +
> +AVFilterBufferRef *frequency_domain_visualize()
> +{
> +}
> +
> +/* The output visualization filter calls this when it needs or is ready to receive new picture data */
> +static int request_frame(AVFilterLink *link)
> +{
> +    AVFilterBufferRef *picref;
> +    int update_config, delay = 0;
> +   /**
> +    * Here we may need to use callback or in some other way get information on changed parameters.
> +    * Changed parameters may include audio position reached by playback, screen dimensions or
> +    * visualization type.
> +    */
> +    // update_config = app_callback(&delay, &aviz->width, &aviz->height, &aviz->viz_type);
> +
> +    if (update_config)
> +        reconfig_aviz(); // This will reallocate the DFT context based on new height etc.

> +    if (aviz->viz_type == 1)
> +        picref = time_domain_visualize();
> +    else if (aviz->viz_type == 2)
> +        picref = freq_domain_visualiza();

use enum/defines here.

Regards.