[FFmpeg-soc] [PATCH] Add af sox - lavfi wrapper for libsox

Fri Sep 3 13:51:01 CEST 2010

On date Thursday 2010-09-02 01:11:21 -0700, S.N. Hemanth Meenakshisundaram encoded:
> Hi All,
> 
> I need some advice on the lavfi audio sox wrapper filter.
> 
> The sox API that starts off the filtering of data - sox_flow_effects() -
> keeps trying to pull data from the input effect defined in lavfi until
> it sends EOF, otherwise doesn't return. So I changed the af_sox input
> effect behavior in the patch below to always send EOF after giving every
> buffer to the sox chain. This means the sox chain would be reinitialized
> when calling sox_flow_effects for each audio buffer. This causes sox to
> crash after playing out a few buffers.
> 
> Instead, would it be ok to call sox_flow_effects once in a new thread
> and let it fetch and output its buffers from/to the FIFOs at the input
> and output? Please let me know.

If this is the supposed way to use libsox then it's OK. For maximum
portability you may use pthreads.

> Regards,
> 
> ---
>  libavfilter/Makefile     |    1 +
>  libavfilter/af_sox.c     |  351 ++++++++++++++++++++++++++++++++++++++++++++++
>  libavfilter/allfilters.c |    1 +

Missing configure magic (yes it should go in the same patch).

>  3 files changed, 353 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_sox.c

> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index fe8b4db..749fbcc 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -16,6 +16,7 @@ OBJS = allfilters.o                                                     \
>  
>  OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
>  OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
> +OBJS-$(CONFIG_SOX_FILTER)                    += af_sox.o
>  
>  OBJS-$(CONFIG_ASPECT_FILTER)                 += vf_aspect.o
>  OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o
> diff --git a/libavfilter/af_sox.c b/libavfilter/af_sox.c
> new file mode 100644
> index 0000000..383958a
> --- /dev/null
> +++ b/libavfilter/af_sox.c
> @@ -0,0 +1,351 @@
[...]
> +/**
> + * @file
> + * Sox Filter
> + */
> +
> +#include "avfilter.h"
> +#include "parseutils.h"
> +#include "libavcodec/audioconvert.h"
> +#include "libavutil/fifo.h"
> +#include <assert.h>
> +#include <sox.h>
> +#include <string.h>
> +
> +typedef struct {
> +    char *sox_args;              ///< filter arguments;
> +    sox_effects_chain_t * chain; ///< handle to sox effects chain.

Nit: *chain

> +    AVFifoBuffer *in_fifo;       ///< fifo buffer of input audio frame pointers
> +    AVFifoBuffer *out_fifo;      ///< fifo buffer of output audio data from sox
> +    int64_t ch_layout;           ///< channel layout of data handled
> +    int64_t sample_rate;         ///< sample rate of data handled
> +    int nb_channels;             ///< number of channels in our channel layout

Nit: channels_nb

I prefer the _nb at the end because I read it as:
channels number
rather than
number of channels

also having _nb at the end permits alignment tricks like:
args
args_nb

> +    int out_size;                ///< desired size of each output audio buffer
> +} SoxContext;
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    // Sox effects only operate on signed 32-bit integer audio data.
> +    enum SampleFormat sample_fmts[] = {
> +        SAMPLE_FMT_S32, SAMPLE_FMT_NONE
> +    };
> +
> +    avfilter_set_common_formats(ctx, avfilter_make_format_list(sample_fmts));
> +    return 0;
> +}
> +
> +typedef struct {
> +    SoxContext *lavfi_ctx;
> +} sox_inout_ctx;
> +
> +// configures the sox effect for sample input.
> +static int inout_config_opts(sox_effect_t *effect, int argc, char **argv)
> +{
> +    SoxContext *sox;
> +    if (argc < 2) {
> +        lsx_fail("lavfi context not supplied");

> +        return (SOX_EOF);

unnecessary ()

> +    }
> +    sscanf(argv[1], "%ld", (long int *)&sox);
> +    ((sox_inout_ctx *)effect->priv)->lavfi_ctx = sox;
> +    return SOX_SUCCESS;
> +}
> +
> +/**
> + * a sox effect handler to handle input of samples to the effects chain.

Please describe what the function *does*, not what the function
*is*. That is, use a predicate (start with a verb) rather than a
nominal form.

> + * The function that will be called to input samples into the effects chain.
> + */
> +static int input_drain(sox_effect_t *effect,
> +                       sox_sample_t *o_samples, size_t *o_samples_size)
> +{
> +
> +    SoxContext *sox = ((sox_inout_ctx *)effect->priv)->lavfi_ctx;
> +    AVFilterBufferRef *samplesref;
> +    int input_nb_samples = 0;
> +
> +    if (av_fifo_size(sox->in_fifo)) {
> +
> +        // read first audio frame from queued input buffers and give it to sox.
> +        av_fifo_generic_read(sox->in_fifo, &samplesref, sizeof(samplesref), NULL);
> +
> +        /**
> +         * inside lavfi, nb_samples is number of samples in each channel, while in sox
> +         * number of samples refers to the total number over all channels
> +         */
> +        input_nb_samples = samplesref->audio->samples_nb * sox->nb_channels;

Nit: input_nb_samples => input_samples_nb

> +        // ensure that *o_samples_size is a multiple of the number of channels.
> +        *o_samples_size -= *o_samples_size % sox->nb_channels;
> +
> +        /**
> +         * FIXME: Right now, if sox chain accepts fewer samples than in one buffer, we drop
> +         * remaining data. We should be taking the required data and preserving the rest.
> +         * Luckily, this is highly unlikely.
> +         */
> +        if (*o_samples_size < input_nb_samples)
> +            input_nb_samples = *o_samples_size;

"o_" and "input" are lexically inconsistent, either "output"/"input"
or "o_"/"i_" should be more readable.

> +        memcpy(o_samples, samplesref->data[0], input_nb_samples*sizeof(int));

> +        *o_samples_size = input_nb_samples;

is o_samples_size supposed to contain a buffer size or a number of
samples?

Also the logic here is confused, you do:
*o_samples_size -= ...
input_nb_samples = *o_samples_size
use input_nb_samples
*o_samples_size = input_nb_samples

maybe input_nb_samples use can be safely removed.

> +
> +        avfilter_unref_buffer(samplesref);
> +    }
> +    return SOX_EOF;
> +}
> +
> +/**
> + * a sox effect handler to handle output of samples to the effects chain.
> + * The function that will be called to output samples from the effects chain.
> + */
> +static int output_flow(sox_effect_t *effect UNUSED,
> +                       sox_sample_t const *i_samples,
> +                       sox_sample_t *o_samples UNUSED, size_t *i_samples_size,
> +                       size_t *o_samples_size)
> +{
> +    SoxContext *sox = ((sox_inout_ctx *)effect->priv)->lavfi_ctx;
> +
> +    // If our fifo runs out of space, we just drop this frame and keep going.
> +    if (*i_samples_size > 0) {
> +        if (av_fifo_space(sox->out_fifo) < *i_samples_size * sizeof(int)) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "Buffering limit reached. Sox output data being dropped.\n");
> +            return SOX_SUCCESS;
> +        }
> +
> +        av_fifo_generic_write(sox->out_fifo, (void *)i_samples, *i_samples_size, NULL);
> +    }
> +
> +    // Set *o_samples_size to 0 since this is the last effect of the sox chain.
> +    *o_samples_size = 0;
> +
> +    return SOX_SUCCESS; // All samples output successfully.
> +}
> +
> +static sox_effect_handler_t const * input_handler(void)
> +{
> +    static sox_effect_handler_t handler = {
> +        "input", NULL, SOX_EFF_MCHAN, inout_config_opts, NULL, NULL,
> +        input_drain, NULL, NULL, sizeof(SoxContext *)
> +    };
> +    return &handler;
> +}
> +
> +// a sox effect handler to handle output of samples from the effects chain.
> +static sox_effect_handler_t const *output_handler(void)
> +{
> +    static sox_effect_handler_t handler = {
> +        "output", NULL, SOX_EFF_MCHAN, inout_config_opts, NULL,
> +        output_flow, NULL, NULL, NULL, sizeof(SoxContext *)
> +    };
> +    return &handler;
> +}
> +
> +#define INFIFO_SIZE 8
> +#define OUTFIFO_SIZE 8192
> +#define OUT_FRAME_SIZE 2048
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    SoxContext *sox = ctx->priv;
> +
> +    sox->sox_args = av_strdup(args);
> +    sox->in_fifo = av_fifo_alloc(INFIFO_SIZE*sizeof(AVFilterBufferRef*));
> +    // the output data fifo stores samples in sox's native s32 integer format.
> +    sox->out_size = OUT_FRAME_SIZE; // FIXME: Make this configurable;
> +    sox->out_fifo = av_fifo_alloc(OUTFIFO_SIZE*sizeof(int));

would be possible to use uint32_t instead of int? Should be safer /
more portable.

> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    SoxContext *sox = ctx->priv;
> +    sox_delete_effects_chain(sox->chain);
> +    sox_quit();
> +    av_fifo_free(sox->in_fifo);
> +    av_fifo_free(sox->out_fifo);
> +    av_free(sox->sox_args);
> +}
> +
> +#define MAX_EFFECT_ARGS 10
> +
> +static inline int add_effect_and_setopts(AVFilterContext *ctx, char *effect_str, sox_signalinfo_t *signal)
> +{
> +    SoxContext *sox = ctx->priv;
> +    int nb_args = -1, err = 0;

Nit: args_nb

> +    char *args[MAX_EFFECT_ARGS];
> +    sox_effect_t *effect = NULL;
> +
> +    effect_str = strtok(effect_str, " ");

Note: strtok() is not thread safe, OK to fix that later anyway.

> +    effect = sox_create_effect(sox_find_effect(effect_str));
> +    if (!effect) {
> +        av_log(ctx, AV_LOG_ERROR, "No such sox effect: '%s'.\n", effect_str);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    while (args[++nb_args] = strtok(NULL, " ")) {
> +        if  (nb_args >= MAX_EFFECT_ARGS-1) {
> +            av_log(ctx, AV_LOG_ERROR, "Too many arguments for sox effect'%s'.\n", effect_str);
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +    if ((err = sox_add_effect(sox->chain, effect, signal, signal)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if (nb_args >= 0)
> +        err = sox_effect_options(effect, nb_args, args);
> +    else
> +        err = sox_effect_options(effect, nb_args, NULL);

nb_args >= 0 ? args : NULL

> +    if (err != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static int config_input(AVFilterLink *link)
> +{

I find "inlink" in place of "link" usually much more useful when
reading/debugging code.

> +    AVFilterContext *ctx = link->dst;
> +    SoxContext *sox = ctx->priv;

> +    sox_effects_chain_t * chain;

Nit++: *chain

> +    sox_effect_t *e = NULL;
> +    sox_encodinginfo_t *enc = av_malloc (sizeof(sox_encodinginfo_t));
> +    sox_signalinfo_t *in_signal_info = av_malloc (sizeof(sox_signalinfo_t));
> +
> +    char *token = NULL, param[32], *ioargs[] = {param};
> +    char *cpargs = av_strdup(sox->sox_args);
> +    int err = 0;
> +
> +    memset(enc, 0, sizeof(sox_encodinginfo_t));
> +    memset(in_signal_info, 0, sizeof(sox_signalinfo_t));
> +
> +    enc->encoding = SOX_DEFAULT_ENCODING;
> +    enc->bits_per_sample = 32;
> +

> +    if (link->format != SAMPLE_FMT_S32) {
> +        av_log(link->dst, AV_LOG_ERROR,
> +               "Sox needs signed 32-bit input samples, insert resample filter.");
> +        return AVERROR(EINVAL);
> +    }

can this happen, assuming that the negotiation already took place? If
not then it's safe to remove this check.

> +    // store channel layout and number of channels, insert resample filter to keep this constant.
> +    sox->ch_layout   = link->channel_layout;
> +    sox->sample_rate = link->sample_rate;
> +    sox->nb_channels = in_signal_info->channels =
> +                       avcodec_channel_layout_num_channels(link->channel_layout);
> +    in_signal_info->rate = (double) sox->sample_rate;
> +    in_signal_info->precision = 32;
> +
> +    if ((err = sox_init()) != SOX_SUCCESS) {

> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);

all these warnings and return can be factorized:
if (err)
    goto fail;

fail:
    ...

(that's one of the cases where the goto construct is considered
useful, in spite of Dijkstra and computer science professors).

> +    }
> +
> +    chain = sox_create_effects_chain(enc, enc);
> +    sox->chain = chain;
> +
> +    snprintf(param, sizeof(param), "%ld", (long int)sox);
> +    // set up the audio buffer source as first effect of the chain.
> +    e = sox_create_effect(input_handler());
> +    if ((err = sox_add_effect(sox->chain, e, in_signal_info, in_signal_info)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if ((err = sox_effect_options(e, 1, ioargs)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    token = strtok (cpargs, ":");
> +    while (token) {
> +        if ((err = add_effect_and_setopts(ctx, token, in_signal_info))) {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid sox argument: '%s'.\n", token);
> +            return err;
> +        }
> +        token = strtok (NULL, ":");
> +    }
> +
> +    e = sox_create_effect(output_handler());
> +    if ((err = sox_add_effect(sox->chain, e, in_signal_info, in_signal_info)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    if ((err = sox_effect_options(e, 1, ioargs)) != SOX_SUCCESS) {
> +        av_log(ctx, AV_LOG_ERROR, "Sox error: '%s'.\n", sox_strerror(err));
> +        return AVERROR(EINVAL);
> +    }
> +    av_free(cpargs);
> +    return 0;
> +}

[...]

Regards.