34 #define SAMPLE_FORMAT float
37 #define ctype AVComplexFloat
39 #define TX_TYPE AV_TX_FLOAT_RDFT
43 #define SAMPLE_FORMAT double
46 #define ctype AVComplexDouble
48 #define TX_TYPE AV_TX_DOUBLE_RDFT
53 #define fn3(a,b) a##_##b
54 #define fn2(a,b) fn3(a,b)
55 #define fn(a) fn2(a, SAMPLE_FORMAT)
58 int cur_nb_taps,
const ftype *time)
60 ftype ch_gain, sum = 0;
62 if (
s->ir_norm < 0.f) {
64 }
else if (
s->ir_norm == 0.f) {
65 for (
int i = 0;
i < cur_nb_taps;
i++)
69 ftype ir_norm =
s->ir_norm;
71 for (
int i = 0;
i < cur_nb_taps;
i++)
73 ch_gain = 1. /
POW(sum, 1. / ir_norm);
80 int cur_nb_taps,
int ch,
83 if (ch_gain != 1. ||
s->ir_gain != 1.) {
84 ftype gain = ch_gain *
s->ir_gain;
88 s->fdsp->vector_fmul_scalar(time, time, gain,
FFALIGN(cur_nb_taps, 4));
90 s->fdsp->vector_dmul_scalar(time, time, gain,
FFALIGN(cur_nb_taps, 8));
98 const int coffset = coeff_partition * seg->coeff_size;
99 const int nb_taps =
s->nb_taps[selir];
100 ftype *time = (
ftype *)
s->norm_ir[selir]->extended_data[ch];
101 ftype *tempin = (
ftype *)seg->tempin->extended_data[ch];
102 ftype *tempout = (
ftype *)seg->tempout->extended_data[ch];
104 const int remaining = nb_taps - (seg->input_offset + coeff_partition * seg->part_size);
105 const int size = remaining >= seg->part_size ? seg->part_size : remaining;
107 memset(tempin +
size, 0,
sizeof(*tempin) * (seg->block_size -
size));
108 memcpy(tempin, time + seg->input_offset + coeff_partition * seg->part_size,
109 size *
sizeof(*tempin));
110 seg->ctx_fn(seg->ctx[ch], tempout, tempin,
sizeof(*tempin));
111 memcpy(
coeff + coffset, tempout, seg->coeff_size *
sizeof(*
coeff));
125 if ((nb_samples & 15) == 0 && nb_samples >= 8) {
127 s->fdsp->vector_fmac_scalar(
dst,
src, 1.
f, nb_samples);
129 s->fdsp->vector_dmac_scalar(
dst,
src, 1.0, nb_samples);
132 for (
int n = 0; n < nb_samples; n++)
140 const ftype *in = (
const ftype *)
s->in->extended_data[ch] + ioffset;
142 const int min_part_size =
s->min_part_size;
143 const int nb_samples =
FFMIN(min_part_size,
out->nb_samples -
offset);
144 const int nb_segments =
s->nb_segments[selir];
145 const float dry_gain =
s->dry_gain;
146 const float wet_gain =
s->wet_gain;
163 if (dry_gain == 1.
f) {
164 memcpy(
src + input_offset, in, nb_samples *
sizeof(*
src));
165 }
else if (min_part_size >= 8) {
167 s->fdsp->vector_fmul_scalar(
src + input_offset, in, dry_gain,
FFALIGN(nb_samples, 4));
169 s->fdsp->vector_dmul_scalar(
src + input_offset, in, dry_gain,
FFALIGN(nb_samples, 8));
173 for (
int n = 0; n < nb_samples; n++)
174 src2[n] = in[n] * dry_gain;
177 output_offset[0] += min_part_size;
178 if (output_offset[0] >= part_size) {
179 output_offset[0] = 0;
183 dst += output_offset[0];
188 memset(sumin, 0,
sizeof(*sumin) * seg->
fft_length);
191 memset(tempin + part_size, 0,
sizeof(*tempin) * (seg->
block_size - part_size));
192 memcpy(tempin,
src,
sizeof(*
src) * part_size);
193 seg->
tx_fn(seg->
tx[ch], blockout, tempin,
sizeof(
ftype));
196 for (
int i = 0;
i < nb_partitions;
i++) {
197 const int input_partition = j;
198 const int coeff_partition =
i;
199 const int coffset = coeff_partition * seg->
coeff_size;
208 s->afirdsp.fcmul_add(sumin, blockout, (
const ftype *)
coeff, part_size);
210 s->afirdsp.dcmul_add(sumin, blockout, (
const ftype *)
coeff, part_size);
217 memcpy(
dst, buf, part_size *
sizeof(*
dst));
218 memcpy(buf, sumout + part_size, part_size *
sizeof(*buf));
222 if (part_size != min_part_size)
231 if (min_part_size >= 8) {
233 s->fdsp->vector_fmul_scalar(ptr, ptr, wet_gain,
FFALIGN(nb_samples, 4));
235 s->fdsp->vector_dmul_scalar(ptr, ptr, wet_gain,
FFALIGN(nb_samples, 8));
238 for (
int n = 0; n < nb_samples; n++)
246 int min_part_size,
int ch,
int offset,
247 int prev_selir,
int selir)
249 if (
ctx->is_disabled ||
s->prev_is_disabled) {
251 const ftype *xfade0 = (
const ftype *)
s->xfade[0]->extended_data[ch];
252 const ftype *xfade1 = (
const ftype *)
s->xfade[1]->extended_data[ch];
257 if (
ctx->is_disabled && !
s->prev_is_disabled) {
258 memset(
src0, 0, min_part_size *
sizeof(
ftype));
260 for (
int n = 0; n < min_part_size; n++)
261 dst[n] = xfade1[n] *
src0[n] + xfade0[n] * in[n];
262 }
else if (!
ctx->is_disabled &&
s->prev_is_disabled) {
263 memset(
src1, 0, min_part_size *
sizeof(
ftype));
265 for (
int n = 0; n < min_part_size; n++)
266 dst[n] = xfade1[n] * in[n] + xfade0[n] *
src1[n];
268 memcpy(
dst, in,
sizeof(
ftype) * min_part_size);
270 }
else if (prev_selir != selir &&
s->loading[ch] != 0) {
271 const ftype *xfade0 = (
const ftype *)
s->xfade[0]->extended_data[ch];
272 const ftype *xfade1 = (
const ftype *)
s->xfade[1]->extended_data[ch];
277 memset(
src0, 0, min_part_size *
sizeof(
ftype));
278 memset(
src1, 0, min_part_size *
sizeof(
ftype));
283 if (
s->loading[ch] >
s->max_offset[selir]) {
284 for (
int n = 0; n < min_part_size; n++)
285 dst[n] = xfade1[n] *
src0[n] + xfade0[n] *
src1[n];