FFmpeg
af_atempo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * tempo scaling audio filter -- an implementation of WSOLA algorithm
24  *
25  * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
26  * from Apprentice Video player by Pavel Koshevoy.
27  * https://sourceforge.net/projects/apprenticevideo/
28  *
29  * An explanation of SOLA algorithm is available at
30  * http://www.surina.net/article/time-and-pitch-scaling.html
31  *
32  * WSOLA is very similar to SOLA, only one major difference exists between
33  * these algorithms. SOLA shifts audio fragments along the output stream,
34  * where as WSOLA shifts audio fragments along the input stream.
35  *
36  * The advantage of WSOLA algorithm is that the overlap region size is
37  * always the same, therefore the blending function is constant and
38  * can be precomputed.
39  */
40 
41 #include <float.h>
42 #include "libavutil/avassert.h"
44 #include "libavutil/mem.h"
45 #include "libavutil/opt.h"
46 #include "libavutil/samplefmt.h"
47 #include "libavutil/tx.h"
48 #include "avfilter.h"
49 #include "audio.h"
50 #include "internal.h"
51 
52 /**
53  * A fragment of audio waveform
54  */
55 typedef struct AudioFragment {
56  // index of the first sample of this fragment in the overall waveform;
57  // 0: input sample position
58  // 1: output sample position
60 
61  // original packed multi-channel samples:
62  uint8_t *data;
63 
64  // number of samples in this fragment:
65  int nsamples;
66 
67  // rDFT transform of the down-mixed mono fragment, used for
68  // fast waveform alignment via correlation in frequency domain:
69  float *xdat_in;
70  float *xdat;
72 
73 /**
74  * Filter state machine states
75  */
76 typedef enum {
82 } FilterState;
83 
84 /**
85  * Filter state machine
86  */
87 typedef struct ATempoContext {
88  const AVClass *class;
89 
90  // ring-buffer of input samples, necessary because some times
91  // input fragment position may be adjusted backwards:
92  uint8_t *buffer;
93 
94  // ring-buffer maximum capacity, expressed in sample rate time base:
95  int ring;
96 
97  // ring-buffer house keeping:
98  int size;
99  int head;
100  int tail;
101 
102  // 0: input sample position corresponding to the ring buffer tail
103  // 1: output sample position
105 
106  // first input timestamp, all other timestamps are offset by this one
108 
109  // sample format:
111 
112  // number of channels:
113  int channels;
114 
115  // row of bytes to skip from one sample to next, across multple channels;
116  // stride = (number-of-channels * bits-per-sample-per-channel) / 8
117  int stride;
118 
119  // fragment window size, power-of-two integer:
120  int window;
121 
122  // Hann window coefficients, for feathering
123  // (blending) the overlapping fragment region:
124  float *hann;
125 
126  // tempo scaling factor:
127  double tempo;
128 
129  // a snapshot of previous fragment input and output position values
130  // captured when the tempo scale factor was set most recently:
132 
133  // current/previous fragment ring-buffer:
135 
136  // current fragment index:
137  uint64_t nfrag;
138 
139  // current state:
141 
142  // for fast correlation calculation in frequency domain:
147  float *correlation;
148 
149  // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
151  uint8_t *dst;
152  uint8_t *dst_end;
153  uint64_t nsamples_in;
154  uint64_t nsamples_out;
155 } ATempoContext;
156 
157 #define YAE_ATEMPO_MIN 0.5
158 #define YAE_ATEMPO_MAX 100.0
159 
160 #define OFFSET(x) offsetof(ATempoContext, x)
161 
162 static const AVOption atempo_options[] = {
163  { "tempo", "set tempo scale factor",
164  OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
168  { NULL }
169 };
170 
171 AVFILTER_DEFINE_CLASS(atempo);
172 
174 {
175  return &atempo->frag[atempo->nfrag % 2];
176 }
177 
179 {
180  return &atempo->frag[(atempo->nfrag + 1) % 2];
181 }
182 
183 /**
184  * Reset filter to initial state, do not deallocate existing local buffers.
185  */
186 static void yae_clear(ATempoContext *atempo)
187 {
188  atempo->size = 0;
189  atempo->head = 0;
190  atempo->tail = 0;
191 
192  atempo->nfrag = 0;
193  atempo->state = YAE_LOAD_FRAGMENT;
194  atempo->start_pts = AV_NOPTS_VALUE;
195 
196  atempo->position[0] = 0;
197  atempo->position[1] = 0;
198 
199  atempo->origin[0] = 0;
200  atempo->origin[1] = 0;
201 
202  atempo->frag[0].position[0] = 0;
203  atempo->frag[0].position[1] = 0;
204  atempo->frag[0].nsamples = 0;
205 
206  atempo->frag[1].position[0] = 0;
207  atempo->frag[1].position[1] = 0;
208  atempo->frag[1].nsamples = 0;
209 
210  // shift left position of 1st fragment by half a window
211  // so that no re-normalization would be required for
212  // the left half of the 1st fragment:
213  atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
214  atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
215 
216  av_frame_free(&atempo->dst_buffer);
217  atempo->dst = NULL;
218  atempo->dst_end = NULL;
219 
220  atempo->nsamples_in = 0;
221  atempo->nsamples_out = 0;
222 }
223 
224 /**
225  * Reset filter to initial state and deallocate all buffers.
226  */
227 static void yae_release_buffers(ATempoContext *atempo)
228 {
229  yae_clear(atempo);
230 
231  av_freep(&atempo->frag[0].data);
232  av_freep(&atempo->frag[1].data);
233  av_freep(&atempo->frag[0].xdat_in);
234  av_freep(&atempo->frag[1].xdat_in);
235  av_freep(&atempo->frag[0].xdat);
236  av_freep(&atempo->frag[1].xdat);
237 
238  av_freep(&atempo->buffer);
239  av_freep(&atempo->hann);
240  av_freep(&atempo->correlation_in);
241  av_freep(&atempo->correlation);
242 
243  av_tx_uninit(&atempo->real_to_complex);
244  av_tx_uninit(&atempo->complex_to_real);
245 }
246 
247 /**
248  * Prepare filter for processing audio data of given format,
249  * sample rate and number of channels.
250  */
251 static int yae_reset(ATempoContext *atempo,
252  enum AVSampleFormat format,
253  int sample_rate,
254  int channels)
255 {
256  const int sample_size = av_get_bytes_per_sample(format);
257  uint32_t nlevels = 0;
258  float scale = 1.f, iscale = 1.f;
259  uint32_t pot;
260  int ret;
261  int i;
262 
263  atempo->format = format;
264  atempo->channels = channels;
265  atempo->stride = sample_size * channels;
266 
267  // pick a segment window size:
268  atempo->window = sample_rate / 24;
269 
270  // adjust window size to be a power-of-two integer:
271  nlevels = av_log2(atempo->window);
272  pot = 1 << nlevels;
273  av_assert0(pot <= atempo->window);
274 
275  if (pot < atempo->window) {
276  atempo->window = pot * 2;
277  nlevels++;
278  }
279 
280  /* av_realloc is not aligned enough, so simply discard all the old buffers
281  * (fortunately, their data does not need to be preserved) */
282  yae_release_buffers(atempo);
283 
284  // initialize audio fragment buffers:
285  if (!(atempo->frag[0].data = av_calloc(atempo->window, atempo->stride)) ||
286  !(atempo->frag[1].data = av_calloc(atempo->window, atempo->stride)) ||
287  !(atempo->frag[0].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) ||
288  !(atempo->frag[1].xdat_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) ||
289  !(atempo->frag[0].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) ||
290  !(atempo->frag[1].xdat = av_calloc(atempo->window + 1, sizeof(AVComplexFloat)))) {
291  ret = AVERROR(ENOMEM);
292  goto fail;
293  }
294 
295  // initialize rDFT contexts:
296  ret = av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
297  AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
298  if (ret < 0)
299  goto fail;
300 
301  ret = av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
302  AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
303  if (ret < 0)
304  goto fail;
305 
306  if (!(atempo->correlation_in = av_calloc(atempo->window + 1, sizeof(AVComplexFloat))) ||
307  !(atempo->correlation = av_calloc(atempo->window, sizeof(AVComplexFloat)))) {
308  ret = AVERROR(ENOMEM);
309  goto fail;
310  }
311 
312  atempo->ring = atempo->window * 3;
313  atempo->buffer = av_calloc(atempo->ring, atempo->stride);
314  if (!atempo->buffer) {
315  ret = AVERROR(ENOMEM);
316  goto fail;
317  }
318 
319  // initialize the Hann window function:
320  atempo->hann = av_malloc_array(atempo->window, sizeof(float));
321  if (!atempo->hann) {
322  ret = AVERROR(ENOMEM);
323  goto fail;
324  }
325 
326  for (i = 0; i < atempo->window; i++) {
327  double t = (double)i / (double)(atempo->window - 1);
328  double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
329  atempo->hann[i] = (float)h;
330  }
331 
332  return 0;
333 fail:
334  yae_release_buffers(atempo);
335  return ret;
336 }
337 
339 {
340  const AudioFragment *prev;
341  ATempoContext *atempo = ctx->priv;
342 
343  prev = yae_prev_frag(atempo);
344  atempo->origin[0] = prev->position[0] + atempo->window / 2;
345  atempo->origin[1] = prev->position[1] + atempo->window / 2;
346  return 0;
347 }
348 
349 /**
350  * A helper macro for initializing complex data buffer with scalar data
351  * of a given type.
352  */
353 #define yae_init_xdat(scalar_type, scalar_max) \
354  do { \
355  const uint8_t *src_end = src + \
356  frag->nsamples * atempo->channels * sizeof(scalar_type); \
357  \
358  float *xdat = frag->xdat_in; \
359  scalar_type tmp; \
360  \
361  if (atempo->channels == 1) { \
362  for (; src < src_end; xdat++) { \
363  tmp = *(const scalar_type *)src; \
364  src += sizeof(scalar_type); \
365  \
366  *xdat = (float)tmp; \
367  } \
368  } else { \
369  float s, max, ti, si; \
370  int i; \
371  \
372  for (; src < src_end; xdat++) { \
373  tmp = *(const scalar_type *)src; \
374  src += sizeof(scalar_type); \
375  \
376  max = (float)tmp; \
377  s = FFMIN((float)scalar_max, \
378  (float)fabsf(max)); \
379  \
380  for (i = 1; i < atempo->channels; i++) { \
381  tmp = *(const scalar_type *)src; \
382  src += sizeof(scalar_type); \
383  \
384  ti = (float)tmp; \
385  si = FFMIN((float)scalar_max, \
386  (float)fabsf(ti)); \
387  \
388  if (s < si) { \
389  s = si; \
390  max = ti; \
391  } \
392  } \
393  \
394  *xdat = max; \
395  } \
396  } \
397  } while (0)
398 
399 /**
400  * Initialize complex data buffer of a given audio fragment
401  * with down-mixed mono data of appropriate scalar type.
402  */
403 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
404 {
405  // shortcuts:
406  const uint8_t *src = frag->data;
407 
408  // init complex data buffer used for FFT and Correlation:
409  memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1));
410 
411  if (atempo->format == AV_SAMPLE_FMT_U8) {
412  yae_init_xdat(uint8_t, 127);
413  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
414  yae_init_xdat(int16_t, 32767);
415  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
416  yae_init_xdat(int, 2147483647);
417  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
418  yae_init_xdat(float, 1);
419  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
420  yae_init_xdat(double, 1);
421  }
422 }
423 
424 /**
425  * Populate the internal data buffer on as-needed basis.
426  *
427  * @return
428  * 0 if requested data was already available or was successfully loaded,
429  * AVERROR(EAGAIN) if more input data is required.
430  */
431 static int yae_load_data(ATempoContext *atempo,
432  const uint8_t **src_ref,
433  const uint8_t *src_end,
434  int64_t stop_here)
435 {
436  // shortcut:
437  const uint8_t *src = *src_ref;
438  const int read_size = stop_here - atempo->position[0];
439 
440  if (stop_here <= atempo->position[0]) {
441  return 0;
442  }
443 
444  // samples are not expected to be skipped, unless tempo is greater than 2:
445  av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
446 
447  while (atempo->position[0] < stop_here && src < src_end) {
448  int src_samples = (src_end - src) / atempo->stride;
449 
450  // load data piece-wise, in order to avoid complicating the logic:
451  int nsamples = FFMIN(read_size, src_samples);
452  int na;
453  int nb;
454 
455  nsamples = FFMIN(nsamples, atempo->ring);
456  na = FFMIN(nsamples, atempo->ring - atempo->tail);
457  nb = FFMIN(nsamples - na, atempo->ring);
458 
459  if (na) {
460  uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
461  memcpy(a, src, na * atempo->stride);
462 
463  src += na * atempo->stride;
464  atempo->position[0] += na;
465 
466  atempo->size = FFMIN(atempo->size + na, atempo->ring);
467  atempo->tail = (atempo->tail + na) % atempo->ring;
468  atempo->head =
469  atempo->size < atempo->ring ?
470  atempo->tail - atempo->size :
471  atempo->tail;
472  }
473 
474  if (nb) {
475  uint8_t *b = atempo->buffer;
476  memcpy(b, src, nb * atempo->stride);
477 
478  src += nb * atempo->stride;
479  atempo->position[0] += nb;
480 
481  atempo->size = FFMIN(atempo->size + nb, atempo->ring);
482  atempo->tail = (atempo->tail + nb) % atempo->ring;
483  atempo->head =
484  atempo->size < atempo->ring ?
485  atempo->tail - atempo->size :
486  atempo->tail;
487  }
488  }
489 
490  // pass back the updated source buffer pointer:
491  *src_ref = src;
492 
493  // sanity check:
494  av_assert0(atempo->position[0] <= stop_here);
495 
496  return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
497 }
498 
499 /**
500  * Populate current audio fragment data buffer.
501  *
502  * @return
503  * 0 when the fragment is ready,
504  * AVERROR(EAGAIN) if more input data is required.
505  */
506 static int yae_load_frag(ATempoContext *atempo,
507  const uint8_t **src_ref,
508  const uint8_t *src_end)
509 {
510  // shortcuts:
511  AudioFragment *frag = yae_curr_frag(atempo);
512  uint8_t *dst;
513  int64_t missing, start, zeros;
514  uint32_t nsamples;
515  const uint8_t *a, *b;
516  int i0, i1, n0, n1, na, nb;
517 
518  int64_t stop_here = frag->position[0] + atempo->window;
519  if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
520  return AVERROR(EAGAIN);
521  }
522 
523  // calculate the number of samples we don't have:
524  missing =
525  stop_here > atempo->position[0] ?
526  stop_here - atempo->position[0] : 0;
527 
528  nsamples =
529  missing < (int64_t)atempo->window ?
530  (uint32_t)(atempo->window - missing) : 0;
531 
532  // setup the output buffer:
533  frag->nsamples = nsamples;
534  dst = frag->data;
535 
536  start = atempo->position[0] - atempo->size;
537 
538  // what we don't have we substitute with zeros:
539  zeros =
540  frag->position[0] < start ?
541  FFMIN(start - frag->position[0], (int64_t)nsamples) : 0;
542 
543  if (zeros == nsamples) {
544  return 0;
545  }
546 
547  if (frag->position[0] < start) {
548  memset(dst, 0, zeros * atempo->stride);
549  dst += zeros * atempo->stride;
550  }
551 
552  // get the remaining data from the ring buffer:
553  na = (atempo->head < atempo->tail ?
554  atempo->tail - atempo->head :
555  atempo->ring - atempo->head);
556 
557  nb = atempo->head < atempo->tail ? 0 : atempo->tail;
558 
559  // sanity check:
560  av_assert0(nsamples <= zeros + na + nb);
561 
562  a = atempo->buffer + atempo->head * atempo->stride;
563  b = atempo->buffer;
564 
565  i0 = frag->position[0] + zeros - start;
566  i1 = i0 < na ? 0 : i0 - na;
567 
568  n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
569  n1 = nsamples - zeros - n0;
570 
571  if (n0) {
572  memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
573  dst += n0 * atempo->stride;
574  }
575 
576  if (n1) {
577  memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
578  }
579 
580  return 0;
581 }
582 
583 /**
584  * Prepare for loading next audio fragment.
585  */
587 {
588  const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
589 
590  const AudioFragment *prev;
591  AudioFragment *frag;
592 
593  atempo->nfrag++;
594  prev = yae_prev_frag(atempo);
595  frag = yae_curr_frag(atempo);
596 
597  frag->position[0] = prev->position[0] + (int64_t)fragment_step;
598  frag->position[1] = prev->position[1] + atempo->window / 2;
599  frag->nsamples = 0;
600 }
601 
602 /**
603  * Calculate cross-correlation via rDFT.
604  *
605  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
606  * and transform back via complex_to_real rDFT.
607  */
608 static void yae_xcorr_via_rdft(float *xcorr_in,
609  float *xcorr,
610  AVTXContext *complex_to_real,
611  av_tx_fn c2r_fn,
612  const AVComplexFloat *xa,
613  const AVComplexFloat *xb,
614  const int window)
615 {
616  AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
617  int i;
618 
619  for (i = 0; i <= window; i++, xa++, xb++, xc++) {
620  xc->re = (xa->re * xb->re + xa->im * xb->im);
621  xc->im = (xa->im * xb->re - xa->re * xb->im);
622  }
623 
624  // apply inverse rDFT:
625  c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(*xc));
626 }
627 
628 /**
629  * Calculate alignment offset for given fragment
630  * relative to the previous fragment.
631  *
632  * @return alignment offset of current fragment relative to previous.
633  */
634 static int yae_align(AudioFragment *frag,
635  const AudioFragment *prev,
636  const int window,
637  const int delta_max,
638  const int drift,
639  float *correlation_in,
640  float *correlation,
641  AVTXContext *complex_to_real,
642  av_tx_fn c2r_fn)
643 {
644  int best_offset = -drift;
645  float best_metric = -FLT_MAX;
646  float *xcorr;
647 
648  int i0;
649  int i1;
650  int i;
651 
652  yae_xcorr_via_rdft(correlation_in,
653  correlation,
654  complex_to_real,
655  c2r_fn,
656  (const AVComplexFloat *)prev->xdat,
657  (const AVComplexFloat *)frag->xdat,
658  window);
659 
660  // identify search window boundaries:
661  i0 = FFMAX(window / 2 - delta_max - drift, 0);
662  i0 = FFMIN(i0, window);
663 
664  i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
665  i1 = FFMAX(i1, 0);
666 
667  // identify cross-correlation peaks within search window:
668  xcorr = correlation + i0;
669 
670  for (i = i0; i < i1; i++, xcorr++) {
671  float metric = *xcorr;
672 
673  // normalize:
674  float drifti = (float)(drift + i);
675  metric *= drifti * (float)(i - i0) * (float)(i1 - i);
676 
677  if (metric > best_metric) {
678  best_metric = metric;
679  best_offset = i - window / 2;
680  }
681  }
682 
683  return best_offset;
684 }
685 
686 /**
687  * Adjust current fragment position for better alignment
688  * with previous fragment.
689  *
690  * @return alignment correction.
691  */
693 {
694  const AudioFragment *prev = yae_prev_frag(atempo);
695  AudioFragment *frag = yae_curr_frag(atempo);
696 
697  const double prev_output_position =
698  (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) *
699  atempo->tempo;
700 
701  const double ideal_output_position =
702  (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2);
703 
704  const int drift = (int)(prev_output_position - ideal_output_position);
705 
706  const int delta_max = atempo->window / 2;
707  const int correction = yae_align(frag,
708  prev,
709  atempo->window,
710  delta_max,
711  drift,
712  atempo->correlation_in,
713  atempo->correlation,
714  atempo->complex_to_real,
715  atempo->c2r_fn);
716 
717  if (correction) {
718  // adjust fragment position:
719  frag->position[0] -= correction;
720 
721  // clear so that the fragment can be reloaded:
722  frag->nsamples = 0;
723  }
724 
725  return correction;
726 }
727 
728 /**
729  * A helper macro for blending the overlap region of previous
730  * and current audio fragment.
731  */
732 #define yae_blend(scalar_type) \
733  do { \
734  const scalar_type *aaa = (const scalar_type *)a; \
735  const scalar_type *bbb = (const scalar_type *)b; \
736  \
737  scalar_type *out = (scalar_type *)dst; \
738  scalar_type *out_end = (scalar_type *)dst_end; \
739  int64_t i; \
740  \
741  for (i = 0; i < overlap && out < out_end; \
742  i++, atempo->position[1]++, wa++, wb++) { \
743  float w0 = *wa; \
744  float w1 = *wb; \
745  int j; \
746  \
747  for (j = 0; j < atempo->channels; \
748  j++, aaa++, bbb++, out++) { \
749  float t0 = (float)*aaa; \
750  float t1 = (float)*bbb; \
751  \
752  *out = \
753  frag->position[0] + i < 0 ? \
754  *aaa : \
755  (scalar_type)(t0 * w0 + t1 * w1); \
756  } \
757  } \
758  dst = (uint8_t *)out; \
759  } while (0)
760 
761 /**
762  * Blend the overlap region of previous and current audio fragment
763  * and output the results to the given destination buffer.
764  *
765  * @return
766  * 0 if the overlap region was completely stored in the dst buffer,
767  * AVERROR(EAGAIN) if more destination buffer space is required.
768  */
769 static int yae_overlap_add(ATempoContext *atempo,
770  uint8_t **dst_ref,
771  uint8_t *dst_end)
772 {
773  // shortcuts:
774  const AudioFragment *prev = yae_prev_frag(atempo);
775  const AudioFragment *frag = yae_curr_frag(atempo);
776 
777  const int64_t start_here = FFMAX(atempo->position[1],
778  frag->position[1]);
779 
780  const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
781  frag->position[1] + frag->nsamples);
782 
783  const int64_t overlap = stop_here - start_here;
784 
785  const int64_t ia = start_here - prev->position[1];
786  const int64_t ib = start_here - frag->position[1];
787 
788  const float *wa = atempo->hann + ia;
789  const float *wb = atempo->hann + ib;
790 
791  const uint8_t *a = prev->data + ia * atempo->stride;
792  const uint8_t *b = frag->data + ib * atempo->stride;
793 
794  uint8_t *dst = *dst_ref;
795 
796  av_assert0(start_here <= stop_here &&
797  frag->position[1] <= start_here &&
798  overlap <= frag->nsamples);
799 
800  if (atempo->format == AV_SAMPLE_FMT_U8) {
801  yae_blend(uint8_t);
802  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
803  yae_blend(int16_t);
804  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
805  yae_blend(int);
806  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
807  yae_blend(float);
808  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
809  yae_blend(double);
810  }
811 
812  // pass-back the updated destination buffer pointer:
813  *dst_ref = dst;
814 
815  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
816 }
817 
818 /**
819  * Feed as much data to the filter as it is able to consume
820  * and receive as much processed data in the destination buffer
821  * as it is able to produce or store.
822  */
823 static void
825  const uint8_t **src_ref,
826  const uint8_t *src_end,
827  uint8_t **dst_ref,
828  uint8_t *dst_end)
829 {
830  while (1) {
831  if (atempo->state == YAE_LOAD_FRAGMENT) {
832  // load additional data for the current fragment:
833  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
834  break;
835  }
836 
837  // down-mix to mono:
838  yae_downmix(atempo, yae_curr_frag(atempo));
839 
840  // apply rDFT:
841  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
842 
843  // must load the second fragment before alignment can start:
844  if (!atempo->nfrag) {
845  yae_advance_to_next_frag(atempo);
846  continue;
847  }
848 
849  atempo->state = YAE_ADJUST_POSITION;
850  }
851 
852  if (atempo->state == YAE_ADJUST_POSITION) {
853  // adjust position for better alignment:
854  if (yae_adjust_position(atempo)) {
855  // reload the fragment at the corrected position, so that the
856  // Hann window blending would not require normalization:
857  atempo->state = YAE_RELOAD_FRAGMENT;
858  } else {
859  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
860  }
861  }
862 
863  if (atempo->state == YAE_RELOAD_FRAGMENT) {
864  // load additional data if necessary due to position adjustment:
865  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
866  break;
867  }
868 
869  // down-mix to mono:
870  yae_downmix(atempo, yae_curr_frag(atempo));
871 
872  // apply rDFT:
873  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
874 
875  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
876  }
877 
878  if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
879  // overlap-add and output the result:
880  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
881  break;
882  }
883 
884  // advance to the next fragment, repeat:
885  yae_advance_to_next_frag(atempo);
886  atempo->state = YAE_LOAD_FRAGMENT;
887  }
888  }
889 }
890 
891 /**
892  * Flush any buffered data from the filter.
893  *
894  * @return
895  * 0 if all data was completely stored in the dst buffer,
896  * AVERROR(EAGAIN) if more destination buffer space is required.
897  */
898 static int yae_flush(ATempoContext *atempo,
899  uint8_t **dst_ref,
900  uint8_t *dst_end)
901 {
902  AudioFragment *frag = yae_curr_frag(atempo);
903  int64_t overlap_end;
904  int64_t start_here;
905  int64_t stop_here;
906  int64_t offset;
907 
908  const uint8_t *src;
909  uint8_t *dst;
910 
911  int src_size;
912  int dst_size;
913  int nbytes;
914 
915  atempo->state = YAE_FLUSH_OUTPUT;
916 
917  if (!atempo->nfrag) {
918  // there is nothing to flush:
919  return 0;
920  }
921 
922  if (atempo->position[0] == frag->position[0] + frag->nsamples &&
923  atempo->position[1] == frag->position[1] + frag->nsamples) {
924  // the current fragment is already flushed:
925  return 0;
926  }
927 
928  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
929  // finish loading the current (possibly partial) fragment:
930  yae_load_frag(atempo, NULL, NULL);
931 
932  if (atempo->nfrag) {
933  // down-mix to mono:
934  yae_downmix(atempo, frag);
935 
936  // apply rDFT:
937  atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
938 
939  // align current fragment to previous fragment:
940  if (yae_adjust_position(atempo)) {
941  // reload the current fragment due to adjusted position:
942  yae_load_frag(atempo, NULL, NULL);
943  }
944  }
945  }
946 
947  // flush the overlap region:
948  overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
949  frag->nsamples);
950 
951  while (atempo->position[1] < overlap_end) {
952  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
953  return AVERROR(EAGAIN);
954  }
955  }
956 
957  // check whether all of the input samples have been consumed:
958  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
959  yae_advance_to_next_frag(atempo);
960  return AVERROR(EAGAIN);
961  }
962 
963  // flush the remainder of the current fragment:
964  start_here = FFMAX(atempo->position[1], overlap_end);
965  stop_here = frag->position[1] + frag->nsamples;
966  offset = start_here - frag->position[1];
967  av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
968 
969  src = frag->data + offset * atempo->stride;
970  dst = (uint8_t *)*dst_ref;
971 
972  src_size = (int)(stop_here - start_here) * atempo->stride;
973  dst_size = dst_end - dst;
974  nbytes = FFMIN(src_size, dst_size);
975 
976  memcpy(dst, src, nbytes);
977  dst += nbytes;
978 
979  atempo->position[1] += (nbytes / atempo->stride);
980 
981  // pass-back the updated destination buffer pointer:
982  *dst_ref = (uint8_t *)dst;
983 
984  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
985 }
986 
988 {
989  ATempoContext *atempo = ctx->priv;
990  atempo->format = AV_SAMPLE_FMT_NONE;
991  atempo->state = YAE_LOAD_FRAGMENT;
992  return 0;
993 }
994 
996 {
997  ATempoContext *atempo = ctx->priv;
998  yae_release_buffers(atempo);
999 }
1000 
1001 // WSOLA necessitates an internal sliding window ring buffer
1002 // for incoming audio stream.
1003 //
1004 // Planar sample formats are too cumbersome to store in a ring buffer,
1005 // therefore planar sample formats are not supported.
1006 //
1007 static const enum AVSampleFormat sample_fmts[] = {
1014 };
1015 
1017 {
1018  AVFilterContext *ctx = inlink->dst;
1019  ATempoContext *atempo = ctx->priv;
1020 
1021  enum AVSampleFormat format = inlink->format;
1022  int sample_rate = (int)inlink->sample_rate;
1023 
1024  return yae_reset(atempo, format, sample_rate, inlink->ch_layout.nb_channels);
1025 }
1026 
1027 static int push_samples(ATempoContext *atempo,
1028  AVFilterLink *outlink,
1029  int n_out)
1030 {
1031  int ret;
1032 
1033  atempo->dst_buffer->sample_rate = outlink->sample_rate;
1034  atempo->dst_buffer->nb_samples = n_out;
1035 
1036  // adjust the PTS:
1037  atempo->dst_buffer->pts = atempo->start_pts +
1038  av_rescale_q(atempo->nsamples_out,
1039  (AVRational){ 1, outlink->sample_rate },
1040  outlink->time_base);
1041 
1042  ret = ff_filter_frame(outlink, atempo->dst_buffer);
1043  atempo->dst_buffer = NULL;
1044  atempo->dst = NULL;
1045  atempo->dst_end = NULL;
1046  if (ret < 0)
1047  return ret;
1048 
1049  atempo->nsamples_out += n_out;
1050  return 0;
1051 }
1052 
1053 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
1054 {
1055  AVFilterContext *ctx = inlink->dst;
1056  ATempoContext *atempo = ctx->priv;
1057  AVFilterLink *outlink = ctx->outputs[0];
1058 
1059  int ret = 0;
1060  int n_in = src_buffer->nb_samples;
1061  int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
1062 
1063  const uint8_t *src = src_buffer->data[0];
1064  const uint8_t *src_end = src + n_in * atempo->stride;
1065 
1066  if (atempo->start_pts == AV_NOPTS_VALUE)
1067  atempo->start_pts = av_rescale_q(src_buffer->pts,
1068  inlink->time_base,
1069  outlink->time_base);
1070 
1071  while (src < src_end) {
1072  if (!atempo->dst_buffer) {
1073  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
1074  if (!atempo->dst_buffer) {
1075  av_frame_free(&src_buffer);
1076  return AVERROR(ENOMEM);
1077  }
1078  av_frame_copy_props(atempo->dst_buffer, src_buffer);
1079 
1080  atempo->dst = atempo->dst_buffer->data[0];
1081  atempo->dst_end = atempo->dst + n_out * atempo->stride;
1082  }
1083 
1084  yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
1085 
1086  if (atempo->dst == atempo->dst_end) {
1087  int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
1088  atempo->stride);
1089  ret = push_samples(atempo, outlink, n_samples);
1090  if (ret < 0)
1091  goto end;
1092  }
1093  }
1094 
1095  atempo->nsamples_in += n_in;
1096 end:
1097  av_frame_free(&src_buffer);
1098  return ret;
1099 }
1100 
1101 static int request_frame(AVFilterLink *outlink)
1102 {
1103  AVFilterContext *ctx = outlink->src;
1104  ATempoContext *atempo = ctx->priv;
1105  int ret;
1106 
1107  ret = ff_request_frame(ctx->inputs[0]);
1108 
1109  if (ret == AVERROR_EOF) {
1110  // flush the filter:
1111  int n_max = atempo->ring;
1112  int n_out;
1113  int err = AVERROR(EAGAIN);
1114 
1115  while (err == AVERROR(EAGAIN)) {
1116  if (!atempo->dst_buffer) {
1117  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
1118  if (!atempo->dst_buffer)
1119  return AVERROR(ENOMEM);
1120 
1121  atempo->dst = atempo->dst_buffer->data[0];
1122  atempo->dst_end = atempo->dst + n_max * atempo->stride;
1123  }
1124 
1125  err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
1126 
1127  n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
1128  atempo->stride);
1129 
1130  if (n_out) {
1131  ret = push_samples(atempo, outlink, n_out);
1132  if (ret < 0)
1133  return ret;
1134  }
1135  }
1136 
1137  av_frame_free(&atempo->dst_buffer);
1138  atempo->dst = NULL;
1139  atempo->dst_end = NULL;
1140 
1141  return AVERROR_EOF;
1142  }
1143 
1144  return ret;
1145 }
1146 
1148  const char *cmd,
1149  const char *arg,
1150  char *res,
1151  int res_len,
1152  int flags)
1153 {
1154  int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
1155 
1156  if (ret < 0)
1157  return ret;
1158 
1159  return yae_update(ctx);
1160 }
1161 
1162 static const AVFilterPad atempo_inputs[] = {
1163  {
1164  .name = "default",
1165  .type = AVMEDIA_TYPE_AUDIO,
1166  .filter_frame = filter_frame,
1167  .config_props = config_props,
1168  },
1169 };
1170 
1171 static const AVFilterPad atempo_outputs[] = {
1172  {
1173  .name = "default",
1174  .request_frame = request_frame,
1175  .type = AVMEDIA_TYPE_AUDIO,
1176  },
1177 };
1178 
1180  .name = "atempo",
1181  .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
1182  .init = init,
1183  .uninit = uninit,
1184  .process_command = process_command,
1185  .priv_size = sizeof(ATempoContext),
1186  .priv_class = &atempo_class,
1190 };
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:97
yae_update
static int yae_update(AVFilterContext *ctx)
Definition: af_atempo.c:338
ATempoContext::stride
int stride
Definition: af_atempo.c:117
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
push_samples
static int push_samples(ATempoContext *atempo, AVFilterLink *outlink, int n_out)
Definition: af_atempo.c:1027
ATempoContext::channels
int channels
Definition: af_atempo.c:113
config_props
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1016
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1015
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
ATempoContext::size
int size
Definition: af_atempo.c:98
AVTXContext
Definition: tx_priv.h:235
ATempoContext::dst_end
uint8_t * dst_end
Definition: af_atempo.c:152
int64_t
long long int64_t
Definition: coverity.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
yae_downmix
static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate sca...
Definition: af_atempo.c:403
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
YAE_FLUSH_OUTPUT
@ YAE_FLUSH_OUTPUT
Definition: af_atempo.c:81
yae_load_data
static int yae_load_data(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
Populate the internal data buffer on as-needed basis.
Definition: af_atempo.c:431
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:486
AVOption
AVOption.
Definition: opt.h:357
b
#define b
Definition: input.c:41
ATempoContext::position
int64_t position[2]
Definition: af_atempo.c:104
YAE_RELOAD_FRAGMENT
@ YAE_RELOAD_FRAGMENT
Definition: af_atempo.c:79
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:463
ATempoContext::nsamples_out
uint64_t nsamples_out
Definition: af_atempo.c:154
float.h
AVComplexFloat
Definition: tx.h:27
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
AudioFragment::xdat_in
float * xdat_in
Definition: af_atempo.c:69
OFFSET
#define OFFSET(x)
Definition: af_atempo.c:160
ATempoContext::frag
AudioFragment frag[2]
Definition: af_atempo.c:134
ATempoContext::tail
int tail
Definition: af_atempo.c:100
sample_rate
sample_rate
Definition: ffmpeg_filter.c:424
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
init
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:987
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:395
ATempoContext::c2r_fn
av_tx_fn c2r_fn
Definition: af_atempo.c:145
ATempoContext
Filter state machine.
Definition: af_atempo.c:87
AVComplexFloat::im
float im
Definition: tx.h:28
window
static SDL_Window * window
Definition: ffplay.c:361
ATempoContext::complex_to_real
AVTXContext * complex_to_real
Definition: af_atempo.c:144
fail
#define fail()
Definition: checkasm.h:186
YAE_ADJUST_POSITION
@ YAE_ADJUST_POSITION
Definition: af_atempo.c:78
samplefmt.h
ATempoContext::state
FilterState state
Definition: af_atempo.c:140
ATempoContext::origin
int64_t origin[2]
Definition: af_atempo.c:131
atempo_outputs
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1171
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
AV_OPT_FLAG_AUDIO_PARAM
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:285
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
yae_apply
static void yae_apply(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
Feed as much data to the filter as it is able to consume and receive as much processed data in the de...
Definition: af_atempo.c:824
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
float
float
Definition: af_crystalizer.c:121
ATempoContext::dst
uint8_t * dst
Definition: af_atempo.c:151
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
yae_init_xdat
#define yae_init_xdat(scalar_type, scalar_max)
A helper macro for initializing complex data buffer with scalar data of a given type.
Definition: af_atempo.c:353
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:247
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
YAE_ATEMPO_MAX
#define YAE_ATEMPO_MAX
Definition: af_atempo.c:158
YAE_OUTPUT_OVERLAP_ADD
@ YAE_OUTPUT_OVERLAP_ADD
Definition: af_atempo.c:80
ctx
AVFormatContext * ctx
Definition: movenc.c:49
channels
channels
Definition: aptx.h:31
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
ff_af_atempo
const AVFilter ff_af_atempo
Definition: af_atempo.c:1179
ATempoContext::buffer
uint8_t * buffer
Definition: af_atempo.c:92
ATempoContext::ring
int ring
Definition: af_atempo.c:95
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
arg
const char * arg
Definition: jacosubdec.c:67
ATempoContext::tempo
double tempo
Definition: af_atempo.c:127
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:709
AudioFragment::position
int64_t position[2]
Definition: af_atempo.c:59
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
ATempoContext::head
int head
Definition: af_atempo.c:99
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(atempo)
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1053
yae_xcorr_via_rdft
static void yae_xcorr_via_rdft(float *xcorr_in, float *xcorr, AVTXContext *complex_to_real, av_tx_fn c2r_fn, const AVComplexFloat *xa, const AVComplexFloat *xb, const int window)
Calculate cross-correlation via rDFT.
Definition: af_atempo.c:608
double
double
Definition: af_crystalizer.c:131
yae_curr_frag
static AudioFragment * yae_curr_frag(ATempoContext *atempo)
Definition: af_atempo.c:173
yae_reset
static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
Prepare filter for processing audio data of given format, sample rate and number of channels.
Definition: af_atempo.c:251
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1147
yae_overlap_add
static int yae_overlap_add(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Blend the overlap region of previous and current audio fragment and output the results to the given d...
Definition: af_atempo.c:769
ATempoContext::nsamples_in
uint64_t nsamples_in
Definition: af_atempo.c:153
AV_OPT_FLAG_FILTERING_PARAM
#define AV_OPT_FLAG_FILTERING_PARAM
A generic parameter which can be set by the user for filtering.
Definition: opt.h:309
yae_load_frag
static int yae_load_frag(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
Populate current audio fragment data buffer.
Definition: af_atempo.c:506
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
yae_prev_frag
static AudioFragment * yae_prev_frag(ATempoContext *atempo)
Definition: af_atempo.c:178
AVFrame::sample_rate
int sample_rate
Sample rate of the audio data.
Definition: frame.h:573
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:56
AVComplexFloat::re
float re
Definition: tx.h:28
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
FILTER_SAMPLEFMTS_ARRAY
#define FILTER_SAMPLEFMTS_ARRAY(array)
Definition: internal.h:165
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:887
ATempoContext::hann
float * hann
Definition: af_atempo.c:124
AudioFragment::nsamples
int nsamples
Definition: af_atempo.c:65
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AudioFragment::data
uint8_t * data
Definition: af_atempo.c:62
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
yae_blend
#define yae_blend(scalar_type)
A helper macro for blending the overlap region of previous and current audio fragment.
Definition: af_atempo.c:732
M_PI
#define M_PI
Definition: mathematics.h:67
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:454
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
av_get_bytes_per_sample
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:108
YAE_ATEMPO_MIN
#define YAE_ATEMPO_MIN
Definition: af_atempo.c:157
AV_SAMPLE_FMT_U8
@ AV_SAMPLE_FMT_U8
unsigned 8 bits
Definition: samplefmt.h:57
yae_advance_to_next_frag
static void yae_advance_to_next_frag(ATempoContext *atempo)
Prepare for loading next audio fragment.
Definition: af_atempo.c:586
ATempoContext::dst_buffer
AVFrame * dst_buffer
Definition: af_atempo.c:150
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:31
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: af_atempo.c:1007
yae_clear
static void yae_clear(ATempoContext *atempo)
Reset filter to initial state, do not deallocate existing local buffers.
Definition: af_atempo.c:186
ATempoContext::r2c_fn
av_tx_fn r2c_fn
Definition: af_atempo.c:145
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:55
ATempoContext::window
int window
Definition: af_atempo.c:120
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:58
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
atempo_inputs
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1162
yae_flush
static int yae_flush(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Flush any buffered data from the filter.
Definition: af_atempo.c:898
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
atempo_options
static const AVOption atempo_options[]
Definition: af_atempo.c:162
ATempoContext::correlation_in
float * correlation_in
Definition: af_atempo.c:146
ATempoContext::nfrag
uint64_t nfrag
Definition: af_atempo.c:137
request_frame
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1101
ATempoContext::format
enum AVSampleFormat format
Definition: af_atempo.c:110
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
ATempoContext::real_to_complex
AVTXContext * real_to_complex
Definition: af_atempo.c:143
channel_layout.h
yae_adjust_position
static int yae_adjust_position(ATempoContext *atempo)
Adjust current fragment position for better alignment with previous fragment.
Definition: af_atempo.c:692
AV_OPT_FLAG_RUNTIME_PARAM
#define AV_OPT_FLAG_RUNTIME_PARAM
A generic parameter which can be set by the user at runtime.
Definition: opt.h:305
avfilter.h
FilterState
FilterState
Filter state machine states.
Definition: af_atempo.c:76
ATempoContext::correlation
float * correlation
Definition: af_atempo.c:147
correlation
static void correlation(int32_t *corr, int32_t *ener, const int16_t *buffer, int16_t lag, int16_t blen, int16_t srange, int16_t scale)
Definition: ilbcdec.c:917
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
mem.h
audio.h
ib
#define ib(width, name)
Definition: cbs_h2645.c:258
yae_align
static int yae_align(AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, float *correlation_in, float *correlation, AVTXContext *complex_to_real, av_tx_fn c2r_fn)
Calculate alignment offset for given fragment relative to the previous fragment.
Definition: af_atempo.c:634
ATempoContext::start_pts
int64_t start_pts
Definition: af_atempo.c:107
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
yae_release_buffers
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:227
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:995
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
h
h
Definition: vp9dsp_template.c:2038
AV_SAMPLE_FMT_DBL
@ AV_SAMPLE_FMT_DBL
double
Definition: samplefmt.h:61
int
int
Definition: ffmpeg_filter.c:424
AV_SAMPLE_FMT_S32
@ AV_SAMPLE_FMT_S32
signed 32 bits
Definition: samplefmt.h:59
AudioFragment
A fragment of audio waveform.
Definition: af_atempo.c:55
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
YAE_LOAD_FRAGMENT
@ YAE_LOAD_FRAGMENT
Definition: af_atempo.c:77
AudioFragment::xdat
float * xdat
Definition: af_atempo.c:70
tx.h