71 #define DECL_INIT_FN(basis, interleave) \
72 static av_cold int b ##basis## _i ##interleave(AVTXContext *s, \
73 const FFTXCodelet *cd, \
75 FFTXCodeletOptions *opts, \
79 ff_tx_init_tabs_float(len); \
80 if (cd->max_len == 2) \
81 return ff_tx_gen_ptwo_revtab(s, opts); \
83 return ff_tx_gen_split_radix_parity_revtab(s, len, inv, opts, \
112 int cnt = 0,
tmp[15];
115 memcpy(
tmp,
s->map, 15*
sizeof(*
tmp));
116 for (
int i = 1;
i < 15;
i += 3) {
117 s->map[cnt] =
tmp[
i];
120 for (
int i = 2;
i < 15;
i += 3) {
121 s->map[cnt] =
tmp[
i];
124 for (
int i = 0;
i < 15;
i += 3) {
125 s->map[cnt] =
tmp[
i];
128 memmove(&
s->map[7], &
s->map[6], 4*
sizeof(
int));
129 memmove(&
s->map[3], &
s->map[1], 4*
sizeof(
int));
139 int len,
int inv,
const void *
scale)
144 s->scale_d = *((SCALE_TYPE *)
scale);
145 s->scale_f =
s->scale_d;
160 memcpy(
s->map,
s->sub->map, (
len >> 1)*
sizeof(*
s->map));
162 for (
int i = 0;
i < (
len >> 1);
i++)
163 s->map[(
len >> 1) +
s->map[
i]] =
i;
188 sub_len, inv,
scale)))
201 for (
int k = 0; k <
s->sub[0].len; k++) {
203 memcpy(
tmp, &
s->map[k*15], 15*
sizeof(*
tmp));
204 for (
int i = 1;
i < 15;
i += 3) {
205 s->map[k*15 + cnt] =
tmp[
i];
208 for (
int i = 2;
i < 15;
i += 3) {
209 s->map[k*15 + cnt] =
tmp[
i];
212 for (
int i = 0;
i < 15;
i += 3) {
213 s->map[k*15 + cnt] =
tmp[
i];
216 memmove(&
s->map[k*15 + 7], &
s->map[k*15 + 6], 4*
sizeof(
int));
217 memmove(&
s->map[k*15 + 3], &
s->map[k*15 + 1], 4*
sizeof(
int));
218 s->map[k*15 + 1] =
tmp[2];
219 s->map[k*15 + 2] =
tmp[0];
232 TX_DEF(fft2, FFT, 2, 2, 2, 0, 128,
NULL, sse3, SSE3,
AV_TX_INPLACE, 0),
233 TX_DEF(fft2_asm, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
235 TX_DEF(fft2, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
236 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_FORWARD_ONLY, 0),
237 TX_DEF(fft4_fwd_asm, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
239 TX_DEF(fft4_inv_asm, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
241 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
242 TX_DEF(fft4_inv, FFT, 4, 4, 2, 0, 128,
NULL, sse2, SSE2,
AV_TX_INPLACE |
FF_TX_INVERSE_ONLY, 0),
243 TX_DEF(fft8, FFT, 8, 8, 2, 0, 128, b8_i0, sse3, SSE3,
AV_TX_INPLACE, 0),
244 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
246 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE, 0),
247 TX_DEF(fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
248 TX_DEF(fft8_asm, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
250 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
252 TX_DEF(fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
253 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
255 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
257 TX_DEF(fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
258 TX_DEF(fft16_asm, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
260 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
264 TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
265 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
267 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
269 TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3,
AV_TX_INPLACE,
AV_CPU_FLAG_AVXSLOW),
270 TX_DEF(fft32_asm, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
272 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
274 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX, 0,
AV_CPU_FLAG_AVXSLOW),
275 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
277 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
279 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3, FMA3, 0,
AV_CPU_FLAG_AVXSLOW),
280 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3,
282 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3, FMA3,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
285 #if HAVE_AVX2_EXTERNAL
286 TX_DEF(
fft15, FFT, 15, 15, 15, 0, 320,
factor_init, avx2, AVX2,
288 TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384,
factor_init, avx2, AVX2,
291 TX_DEF(fft_sr, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0,
293 TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
295 TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
AV_TX_INPLACE |
FF_TX_PRESHUFFLE,
298 TX_DEF(fft_pfa_15xM, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 320,
fft_pfa_init, avx2, AVX2,
300 TX_DEF(fft_pfa_15xM_asm, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
302 TX_DEF(fft_pfa_15xM_ns, FFT, 60,
TX_LEN_UNLIMITED, 15, 2, 384,
fft_pfa_init, avx2, AVX2,
305 TX_DEF(mdct_inv, MDCT, 16,
TX_LEN_UNLIMITED, 2,
TX_FACTOR_ANY, 384,
m_inv_init, avx2, AVX2,