[FFmpeg-devel] [PATCH] libavfilter: add atempo filter (revised patch v4)

Stefano Sabatini stefasab at gmail.com
Tue Jun 12 01:15:51 CEST 2012


On date Sunday 2012-06-10 16:49:17 -0600, Pavel Koshevoy encoded:
> Add atempo audio filter for adjusting audio tempo without affecting
> pitch. This filter implements WSOLA algorithm with fast cross
> correlation calculation in frequency domain.
> 
> Signed-off-by: Pavel Koshevoy <pavel at homestead.aragog.com>
> ---
>  Changelog                |    1 +
>  MAINTAINERS              |    1 +
>  configure                |    1 +
>  doc/filters.texi         |   18 +
>  libavfilter/Makefile     |    2 +
>  libavfilter/af_atempo.c  | 1158 ++++++++++++++++++++++++++++++++++++++++++++++
>  libavfilter/allfilters.c |    1 +
>  7 files changed, 1182 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/af_atempo.c
> 
> diff --git a/Changelog b/Changelog
> index 41b0bdc..a639c71 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -5,6 +5,7 @@ version next:
>  - INI and flat output in ffprobe
>  - Scene detection in libavfilter
>  - Indeo Audio decoder
> +- atempo filter
>  
[...]
> +/**
> + * A fragment of audio waveform
> + */
> +typedef struct {
> +    // index of the first sample of this fragment in the overall waveform;
> +    // 0: input sample position
> +    // 1: output sample position
> +    int64_t position[2];
> +
> +    // original packed multi-channel samples:
> +    uint8_t *data;
> +
> +    // number of samples in this fragment:
> +    int nsamples;
> +
> +    // FFT transform of the down-mixed mono fragment, used for
> +    // fast waveform alignment via correlation in frequency domain:
> +    FFTComplex *xdat;
> +
> +} AudioFragment;

Nit++: weird space before end of block to my eyes

> +
> +/**
> + * Filter state machine states
> + */
> +typedef enum {
> +    YAE_LOAD_FRAGMENT,
> +    YAE_ADJUST_POSITION,
> +    YAE_RELOAD_FRAGMENT,
> +    YAE_OUTPUT_OVERLAP_ADD,
> +    YAE_FLUSH_OUTPUT,
> +
> +} FilterState;

Ditto.

[...]
> +/**
> + * Prepare filter for processing audio data of given format,
> + * sample rate and number of channels.
> + */
> +static int yae_reset(ATempoContext *atempo,
> +                     enum AVSampleFormat format,
> +                     int sample_rate,
> +                     int channels)
> +{
> +    const int sample_size = av_get_bytes_per_sample(format);
> +    uint32_t nlevels  = 0;
> +    uint32_t pot;
> +    int i;
> +
> +    atempo->format   = format;
> +    atempo->channels = channels;
> +    atempo->stride   = sample_size * channels;
> +
> +    // pick a segment window size:
> +    atempo->window = sample_rate / 24;
> +
> +    // adjust window size to be a power-of-two integer:
> +    nlevels = av_log2(atempo->window);
> +    pot = 1 << nlevels;
> +    av_assert0(pot <= atempo->window);
> +
> +    if (pot < atempo->window) {
> +        atempo->window = pot * 2;
> +        nlevels++;
> +    }
> +

> +    // initialize audio fragment buffers:
> +    atempo->frag[0].data = av_realloc(atempo->frag[0].data,
> +                                      atempo->window * atempo->stride);
> +    if (!atempo->frag[0].data) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->frag[1].data = av_realloc(atempo->frag[1].data,
> +                                      atempo->window * atempo->stride);
> +    if (!atempo->frag[1].data) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->frag[0].xdat = av_realloc(atempo->frag[0].xdat,
> +                                      atempo->window * 2 *
> +                                      sizeof(FFTComplex));
> +    if (!atempo->frag[0].xdat) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->frag[1].xdat = av_realloc(atempo->frag[1].xdat,
> +                                      atempo->window * 2 *
> +                                      sizeof(FFTComplex));
> +    if (!atempo->frag[1].xdat) {
> +        return AVERROR(ENOMEM);
> +    }

Suggestion:
#define REALLOC_FIELD_OR_FAIL(field, size)      \
   atempo->field = av_realloc(field, size);     \
   if (!atempo->field)                          \
        return AVERROR(ENOMEM)

> +    atempo->frag[1].xdat = av_realloc(atempo->frag[1].xdat,
> +                                      atempo->window * 2 *
> +                                      sizeof(FFTComplex));
> +    if (!atempo->frag[1].xdat) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    // initialize FFT contexts:
> +    av_fft_end(atempo->fft_forward);
> +    av_fft_end(atempo->fft_inverse);
> +
> +    atempo->fft_forward = av_fft_init(nlevels + 1, 0);
> +    if (!atempo->fft_forward) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->fft_inverse = av_fft_init(nlevels + 1, 1);
> +    if (!atempo->fft_inverse) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->correlation = av_realloc(atempo->correlation,
> +                                     atempo->window * 2 *
> +                                     sizeof(FFTComplex));
> +    if (!atempo->correlation) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    atempo->ring = atempo->window * 3;
> +    atempo->buffer = av_realloc(atempo->buffer, atempo->ring * atempo->stride);
> +    if (!atempo->buffer) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    // initialize the Hann window function:
> +    atempo->hann = av_realloc(atempo->hann, atempo->window * sizeof(float));
> +    if (!atempo->hann) {
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    for (i = 0; i < atempo->window; i++) {
> +        double t = (double)i / (double)(atempo->window - 1);
> +        double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
> +        atempo->hann[i] = (float)h;
> +    }
> +
> +    yae_clear(atempo);
> +    return 0;
> +}
> +
> +static int yae_set_tempo(AVFilterContext *ctx, const char *arg_tempo)
> +{
> +    ATempoContext *atempo = ctx->priv;
> +    char   *tail = NULL;
> +    double tempo = av_strtod(arg_tempo, &tail);
> +
> +    if (tail && *tail) {
> +        av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo);
> +        return AVERROR(EINVAL);
> +    }
> +

> +    if (tempo < 0.5 || tempo > 2.0) {
> +        av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n",
> +               tempo);
> +        return AVERROR(EINVAL);
> +    }

Just out of curiosity: can you tell shortly what happen with out of
range values? (in other words, why the algorithm can't work well).

> +
> +    atempo->tempo = tempo;
> +    return 0;
> +}
> +
> +inline static AudioFragment *yae_curr_frag(ATempoContext *atempo)
> +{
> +    return &atempo->frag[atempo->nfrag % 2];
> +}
> +
> +inline static AudioFragment *yae_prev_frag(ATempoContext *atempo)
> +{
> +    return &atempo->frag[(atempo->nfrag + 1) % 2];
> +}
> +
> +inline static void yae_transform(FFTComplex *xdat, FFTContext *fft)
> +{
> +    av_fft_permute(fft, xdat);
> +    av_fft_calc(fft, xdat);
> +}
> +

> +/**
> + * A helper macro for initializing complex data buffer with scalar data
> + * of a given type.
> + */
> +#define yae_init_xdat(scalar_type, scalar_max)                          \
> +    do {                                                                \
> +        const uint8_t *src_end =                                        \
> +            src + frag->nsamples * atempo->channels * sizeof(scalar_type); \
> +                                                                        \
> +        FFTComplex *xdat = frag->xdat;                                  \
> +        scalar_type tmp;                                                \
> +                                                                        \
> +        if (atempo->channels == 1) {                                    \
> +            for (; src < src_end; blend++) {                            \
> +                tmp = *(const scalar_type *)src;                        \
> +                src += sizeof(scalar_type);                             \
> +                                                                        \
> +                xdat->re = (FFTSample)tmp;                              \
> +                xdat->im = 0;                                           \
> +                xdat++;                                                 \
> +            }                                                           \
> +        } else {                                                        \
> +            FFTSample s, max, ti, si;                                   \
> +            int i;                                                      \
> +                                                                        \
> +            for (; src < src_end; blend++) {                            \
> +                tmp = *(const scalar_type *)src;                        \
> +                src += sizeof(scalar_type);                             \
> +                                                                        \
> +                max = (FFTSample)tmp;                                   \
> +                s = FFMIN((FFTSample)scalar_max,                        \
> +                          (FFTSample)fabsf(max));                       \
> +                                                                        \
> +                for (i = 1; i < atempo->channels; i++) {                \
> +                    tmp = *(const scalar_type *)src;                    \
> +                    src += sizeof(scalar_type);                         \
> +                                                                        \
> +                    ti = (FFTSample)tmp;                                \
> +                    si = FFMIN((FFTSample)scalar_max,                   \
> +                               (FFTSample)fabsf(ti));                   \
> +                                                                        \
> +                    if (s < si) {                                       \
> +                        s   = si;                                       \
> +                        max = ti;                                       \
> +                    }                                                   \
> +                }                                                       \
> +                                                                        \
> +                xdat->re = max;                                         \
> +                xdat->im = 0;                                           \
> +                xdat++;                                                 \
> +            }                                                           \
> +        }                                                               \
> +    } while (0)
> +
> +/**
> + * Initialize complex data buffer of a given audio fragment
> + * with down-mixed mono data of appropriate scalar type.
> + */
> +static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
> +{
> +    // shortcuts:
> +    const uint8_t *src = frag->data;
> +    const float *blend = atempo->hann;
> +
> +    // init complex data buffer used for FFT and Correlation:
> +    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window * 2);
> +
> +    if (atempo->format == AV_SAMPLE_FMT_U8) {
> +        yae_init_xdat(uint8_t, 127);
> +    } else if (atempo->format == AV_SAMPLE_FMT_S16) {
> +        yae_init_xdat(int16_t, 32767);
> +    } else if (atempo->format == AV_SAMPLE_FMT_S32) {
> +        yae_init_xdat(int, 2147483647);
> +    } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
> +        yae_init_xdat(float, 1);
> +    } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
> +        yae_init_xdat(double, 1);
> +    }
> +}

I suspect that we may have some functions for these utilitiles in
libavcodec, but then I don't have much expertise with code and maths
behind this, so I don't want to hold this but just foster comments
from someone with more indepth knowledge in our DSP utils.

[...]

> +/**
> + * Blend the overlap region of previous and current audio fragment
> + * and output the results to the given destination buffer.
> + *

> + * @return 0 if the overlap region was completely stored in the dst buffer.
> + * @return AVERROR(EAGAIN) if more destination buffer space is required.

nit++: double return

> + */
> +static int yae_overlap_add(ATempoContext *atempo,
> +                           uint8_t **dst_ref,
> +                           uint8_t *dst_end)
> +{
> +    // shortcuts:
> +    const AudioFragment *prev = yae_prev_frag(atempo);
> +    const AudioFragment *frag = yae_curr_frag(atempo);
> +
> +    const int64_t start_here = FFMAX(atempo->position[1],
> +                                     frag->position[1]);
> +
> +    const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
> +                                    frag->position[1] + frag->nsamples);
> +
> +    const int64_t overlap = stop_here - start_here;
> +
> +    const int64_t ia = start_here - prev->position[1];
> +    const int64_t ib = start_here - frag->position[1];
> +
> +    const float *wa = atempo->hann + ia;
> +    const float *wb = atempo->hann + ib;
> +
> +    const uint8_t *a = prev->data + ia * atempo->stride;
> +    const uint8_t *b = frag->data + ib * atempo->stride;
> +
> +    uint8_t *dst = *dst_ref;
> +
> +    av_assert0(start_here <= stop_here &&
> +               frag->position[1] <= start_here &&
> +               overlap <= frag->nsamples);
> +
> +    if (atempo->format == AV_SAMPLE_FMT_U8) {
> +        yae_blend(uint8_t);
> +    } else if (atempo->format == AV_SAMPLE_FMT_S16) {
> +        yae_blend(int16_t);
> +    } else if (atempo->format == AV_SAMPLE_FMT_S32) {
> +        yae_blend(int);
> +    } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
> +        yae_blend(float);
> +    } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
> +        yae_blend(double);
> +    }
> +
> +    // pass-back the updated destination buffer pointer:
> +    *dst_ref = dst;
> +
> +    return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
> +}
> +
> +/**
> + * Feed as much data to the filter as it is able to consume
> + * and receive as much processed data in the destination buffer
> + * as it is able to produce or store.
> + */
> +static void
> +yae_apply(ATempoContext *atempo,
> +          const uint8_t **src_ref,
> +          const uint8_t *src_end,
> +          uint8_t **dst_ref,
> +          uint8_t *dst_end)
> +{
> +    while (1) {
> +        if (atempo->state == YAE_LOAD_FRAGMENT) {
> +            // load additional data for the current fragment:
> +            if (yae_load_frag(atempo, src_ref, src_end) != 0) {
> +                break;
> +            }
> +
> +            // build a multi-resolution pyramid for fragment alignment:
> +            yae_downmix(atempo, yae_curr_frag(atempo));
> +
> +            // apply FFT:
> +            yae_transform(yae_curr_frag(atempo)->xdat, atempo->fft_forward);
> +
> +            // must load the second fragment before alignment can start:
> +            if (!atempo->nfrag) {
> +                yae_advance_to_next_frag(atempo);
> +                continue;
> +            }
> +
> +            atempo->state = YAE_ADJUST_POSITION;
> +        }
> +
> +        if (atempo->state == YAE_ADJUST_POSITION) {
> +            // adjust position for better alignment:
> +            if (yae_adjust_position(atempo)) {
> +                // reload the fragment at the corrected position, so that the
> +                // Hann window blending would not require normalization:
> +                atempo->state = YAE_RELOAD_FRAGMENT;
> +            } else {
> +                atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> +            }
> +        }
> +
> +        if (atempo->state == YAE_RELOAD_FRAGMENT) {
> +            // load additional data if necessary due to position adjustment:
> +            if (yae_load_frag(atempo, src_ref, src_end) != 0) {
> +                break;
> +            }
> +
> +            // build a multi-resolution pyramid for fragment alignment:
> +            yae_downmix(atempo, yae_curr_frag(atempo));
> +
> +            // apply FFT:
> +            yae_transform(yae_curr_frag(atempo)->xdat, atempo->fft_forward);
> +
> +            atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> +        }
> +
> +        if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
> +            // overlap-add and output the result:
> +            if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
> +                break;
> +            }
> +
> +            // advance to the next fragment, repeat:
> +            yae_advance_to_next_frag(atempo);
> +            atempo->state = YAE_LOAD_FRAGMENT;
> +        }
> +    }
> +}
> +
> +/**
> + * Flush any buffered data from the filter.
> + *
> + * @return 0 if all data was completely stored in the dst buffer.
> + * @return AVERROR(EAGAIN) if more destination buffer space is required.
> + */
> +static int yae_flush(ATempoContext *atempo,
> +                     uint8_t **dst_ref,
> +                     uint8_t *dst_end)
> +{
> +    AudioFragment *frag = yae_curr_frag(atempo);
> +    int64_t overlap_end;
> +    int64_t start_here;
> +    int64_t stop_here;
> +    int64_t offset;
> +
> +    const uint8_t *src;
> +    uint8_t *dst;
> +
> +    int src_size;
> +    int dst_size;
> +    int nbytes;
> +
> +    atempo->state = YAE_FLUSH_OUTPUT;
> +
> +    if (atempo->position[0] == frag->position[0] + frag->nsamples &&
> +        atempo->position[1] == frag->position[1] + frag->nsamples) {
> +        // the current fragment is already flushed:
> +        return 0;
> +    }
> +
> +    if (frag->position[0] + frag->nsamples < atempo->position[0]) {
> +        // finish loading the current (possibly partial) fragment:
> +        yae_load_frag(atempo, NULL, NULL);
> +
> +        if (atempo->nfrag) {
> +            // build a multi-resolution pyramid for fragment alignment:
> +            yae_downmix(atempo, frag);
> +
> +            // apply FFT:
> +            yae_transform(frag->xdat, atempo->fft_forward);
> +
> +            // align current fragment to previous fragment:
> +            if (yae_adjust_position(atempo)) {
> +                // reload the current fragment due to adjusted position:
> +                yae_load_frag(atempo, NULL, NULL);
> +            }
> +        }
> +    }
> +
> +    // flush the overlap region:
> +    overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
> +                                            frag->nsamples);
> +
> +    while (atempo->position[1] < overlap_end) {
> +        if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
> +            return AVERROR(EAGAIN);
> +        }
> +    }
> +
> +    // flush the remaininder of the current fragment:
> +    start_here = FFMAX(atempo->position[1], overlap_end);
> +    stop_here  = frag->position[1] + frag->nsamples;
> +    offset     = start_here - frag->position[1];
> +    av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
> +
> +    src = frag->data + offset * atempo->stride;
> +    dst = (uint8_t *)*dst_ref;
> +
> +    src_size = (int)(stop_here - start_here) * atempo->stride;
> +    dst_size = dst_end - dst;
> +    nbytes = FFMIN(src_size, dst_size);
> +
> +    memcpy(dst, src, nbytes);
> +    dst += nbytes;
> +
> +    atempo->position[1] += (nbytes / atempo->stride);
> +
> +    // pass-back the updated destination buffer pointer:
> +    *dst_ref = (uint8_t *)dst;
> +
> +    return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
> +}
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    ATempoContext *atempo = ctx->priv;
> +
> +    // NOTE: this assumes that the caller has memset ctx->priv to 0:
> +    atempo->format = AV_SAMPLE_FMT_NONE;
> +    atempo->tempo  = 1.0;
> +    atempo->state  = YAE_LOAD_FRAGMENT;
> +
> +    return args ? yae_set_tempo(ctx, args) : 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    ATempoContext *atempo = ctx->priv;
> +    yae_clear(atempo);
> +
> +    av_freep(&atempo->frag[0].data);
> +    av_freep(&atempo->frag[1].data);
> +    av_freep(&atempo->frag[0].xdat);
> +    av_freep(&atempo->frag[1].xdat);
> +
> +    av_freep(&atempo->buffer);
> +    av_freep(&atempo->hann);
> +    av_freep(&atempo->correlation);
> +
> +    av_fft_end(atempo->fft_forward);
> +    atempo->fft_forward = NULL;
> +
> +    av_fft_end(atempo->fft_inverse);
> +    atempo->fft_inverse = NULL;
> +}
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    AVFilterChannelLayouts *layouts = NULL;
> +    AVFilterFormats        *formats = NULL;
> +
> +    // WSOLA necessitates an internal sliding window ring buffer
> +    // for incoming audio stream.
> +    //
> +    // Planar sample formats are too cumbersome to store in a ring buffer,
> +    // therefore planar sample formats are not supported.
> +    //
> +    enum AVSampleFormat sample_fmts[] = {
> +        AV_SAMPLE_FMT_U8,
> +        AV_SAMPLE_FMT_S16,
> +        AV_SAMPLE_FMT_S32,
> +        AV_SAMPLE_FMT_FLT,
> +        AV_SAMPLE_FMT_DBL,
> +        AV_SAMPLE_FMT_NONE
> +    };

Note: we may create a function for returning a significant given
subsets of formats (e.g. only packed/planar).

> +
> +    layouts = ff_all_channel_layouts();
> +    if (!layouts) {
> +        return AVERROR(ENOMEM);
> +    }
> +    ff_set_common_channel_layouts(ctx, layouts);
> +
> +    formats = ff_make_format_list(sample_fmts);
> +    if (!formats) {
> +        return AVERROR(ENOMEM);
> +    }
> +    ff_set_common_formats(ctx, formats);
> +
> +    formats = ff_all_samplerates();
> +    if (!formats) {
> +        return AVERROR(ENOMEM);
> +    }
> +    ff_set_common_samplerates(ctx, formats);
> +
> +    return 0;
> +}
> +
> +static int config_props(AVFilterLink *inlink)
> +{
> +    AVFilterContext  *ctx = inlink->dst;
> +    ATempoContext *atempo = ctx->priv;
> +
> +    enum AVSampleFormat format = inlink->format;
> +    int sample_rate = (int)inlink->sample_rate;
> +    int channels = av_get_channel_layout_nb_channels(inlink->channel_layout);
> +
> +    return yae_reset(atempo, format, sample_rate, channels);
> +}
> +
> +static void filter_samples(AVFilterLink *inlink,
> +                           AVFilterBufferRef *src_buffer)
> +{
> +    AVFilterContext  *ctx = inlink->dst;
> +    ATempoContext *atempo = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +
> +    int n_in = src_buffer->audio->nb_samples;
> +    int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
> +
> +    const uint8_t *src = src_buffer->data[0];
> +    const uint8_t *src_end = src + n_in * atempo->stride;
> +
> +    while (src < src_end) {
> +        if (!atempo->dst_buffer) {
> +            atempo->dst_buffer = ff_get_audio_buffer(outlink,
> +                                                     AV_PERM_WRITE,
> +                                                     n_out);
> +            avfilter_copy_buffer_ref_props(atempo->dst_buffer, src_buffer);
> +
> +            atempo->dst = atempo->dst_buffer->data[0];
> +            atempo->dst_end = atempo->dst + n_out * atempo->stride;
> +        }
> +
> +        yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
> +
> +        if (atempo->dst == atempo->dst_end) {

> +            atempo->dst_buffer->audio->sample_rate = outlink->sample_rate;
> +            atempo->dst_buffer->audio->nb_samples  = n_out;
> +
> +            // adjust the PTS:
> +            atempo->dst_buffer->pts =
> +                av_rescale_q(atempo->nsamples_out,
> +                             (AVRational){ 1, outlink->sample_rate },
> +                             outlink->time_base);
> +
> +            ff_filter_samples(outlink, atempo->dst_buffer);
> +            atempo->dst_buffer = NULL;
> +            atempo->dst        = NULL;
> +            atempo->dst_end    = NULL;
> +
> +            atempo->nsamples_out += n_out;
> +            atempo->request_fulfilled = 1;

Maybe this can be factorized with the below code (push_samples?)

> +        }
> +    }
> +
> +    atempo->nsamples_in += n_in;
> +    avfilter_unref_bufferp(&src_buffer);
> +}
> +
> +static int request_frame(AVFilterLink *outlink)
> +{
> +    AVFilterContext  *ctx = outlink->src;
> +    ATempoContext *atempo = ctx->priv;
> +    int ret;
> +
> +    atempo->request_fulfilled = 0;
> +    do {
> +        ret = avfilter_request_frame(ctx->inputs[0]);
> +    }
> +    while (!atempo->request_fulfilled && ret >= 0);
> +
> +    if (ret == AVERROR_EOF) {
> +        // flush the filter:
> +        int n_max = atempo->ring;
> +        int n_out;
> +        int err = AVERROR(EAGAIN);
> +
> +        while (err == AVERROR(EAGAIN)) {
> +            if (!atempo->dst_buffer) {
> +                atempo->dst_buffer = ff_get_audio_buffer(outlink,
> +                                                         AV_PERM_WRITE,
> +                                                         n_max);
> +
> +                atempo->dst = atempo->dst_buffer->data[0];
> +                atempo->dst_end = atempo->dst + n_max * atempo->stride;
> +            }
> +
> +            err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
> +
> +            n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
> +                     atempo->stride);
> +
> +            if (n_out) {

> +                atempo->dst_buffer->audio->sample_rate = outlink->sample_rate;
> +                atempo->dst_buffer->audio->nb_samples  = n_out;
> +
> +                // adjust the PTS:
> +                atempo->dst_buffer->pts =
> +                    av_rescale(outlink->time_base.den,
> +                               atempo->nsamples_out,
> +                               outlink->time_base.num * outlink->sample_rate);
> +
> +                ff_filter_samples(outlink, atempo->dst_buffer);
> +                atempo->dst_buffer = NULL;
> +                atempo->dst        = NULL;
> +                atempo->dst_end    = NULL;
> +
> +                atempo->nsamples_out += n_out;

This one can be factorized with the previously marked code.

[...]

No more comments from me, but I'd like someone with more DSP expertise
to have a look at it (Michael?) or maybe we can commit it already and
refine/optimize it later.

Thanks.
-- 
FFmpeg = Free and Fostering Mythic Purposeless Extended God


More information about the ffmpeg-devel mailing list