FFmpeg
float_dsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <float.h>
20 #include <stdint.h>
21 
22 #include "libavutil/float_dsp.h"
23 #include "libavutil/internal.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "checkasm.h"
28 
29 #define LEN 256
30 
31 #define randomize_buffer(buf) \
32 do { \
33  int i; \
34  double bmg[2], stddev = 10.0, mean = 0.0; \
35  \
36  for (i = 0; i < LEN; i += 2) { \
37  av_bmg_get(&checkasm_lfg, bmg); \
38  buf[i] = bmg[0] * stddev + mean; \
39  buf[i + 1] = bmg[1] * stddev + mean; \
40  } \
41 } while(0);
42 
43 static void test_vector_fmul(const float *src0, const float *src1)
44 {
45  LOCAL_ALIGNED_32(float, cdst, [LEN]);
46  LOCAL_ALIGNED_32(float, odst, [LEN]);
47  int i;
48 
49  declare_func(void, float *dst, const float *src0, const float *src1,
50  int len);
51 
52  call_ref(cdst, src0, src1, LEN);
53  call_new(odst, src0, src1, LEN);
54  for (i = 0; i < LEN; i++) {
55  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
56  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
57  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
58  i, cdst[i], odst[i], cdst[i] - odst[i]);
59  fail();
60  break;
61  }
62  }
63  bench_new(odst, src0, src1, LEN);
64 }
65 
66 static void test_vector_dmul(const double *src0, const double *src1)
67 {
68  LOCAL_ALIGNED_32(double, cdst, [LEN]);
69  LOCAL_ALIGNED_32(double, odst, [LEN]);
70  int i;
71 
72  declare_func(void, double *dst, const double *src0, const double *src1,
73  int len);
74 
75  call_ref(cdst, src0, src1, LEN);
76  call_new(odst, src0, src1, LEN);
77  for (i = 0; i < LEN; i++) {
78  double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
79  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
80  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
81  i, cdst[i], odst[i], cdst[i] - odst[i]);
82  fail();
83  break;
84  }
85  }
86  bench_new(odst, src0, src1, LEN);
87 }
88 
89 #define ARBITRARY_FMUL_ADD_CONST 0.005
90 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
91 {
92  LOCAL_ALIGNED_32(float, cdst, [LEN]);
93  LOCAL_ALIGNED_32(float, odst, [LEN]);
94  int i;
95 
96  declare_func(void, float *dst, const float *src0, const float *src1,
97  const float *src2, int len);
98 
99  call_ref(cdst, src0, src1, src2, LEN);
100  call_new(odst, src0, src1, src2, LEN);
101  for (i = 0; i < LEN; i++) {
102  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
103  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
104  i, cdst[i], odst[i], cdst[i] - odst[i]);
105  fail();
106  break;
107  }
108  }
109  bench_new(odst, src0, src1, src2, LEN);
110 }
111 
112 static void test_vector_fmul_scalar(const float *src0, const float *src1)
113 {
114  LOCAL_ALIGNED_16(float, cdst, [LEN]);
115  LOCAL_ALIGNED_16(float, odst, [LEN]);
116  int i;
117 
118  declare_func(void, float *dst, const float *src, float mul, int len);
119 
120  call_ref(cdst, src0, src1[0], LEN);
121  call_new(odst, src0, src1[0], LEN);
122  for (i = 0; i < LEN; i++) {
123  double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
124  if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
125  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
126  i, cdst[i], odst[i], cdst[i] - odst[i]);
127  fail();
128  break;
129  }
130  }
131  bench_new(odst, src0, src1[0], LEN);
132 }
133 
134 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
135 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
136 {
137  LOCAL_ALIGNED_16(float, cdst, [LEN]);
138  LOCAL_ALIGNED_16(float, odst, [LEN]);
139  int i;
140 
141  declare_func(void, float *dst, const float *src0, const float *src1,
142  const float *win, int len);
143 
144  call_ref(cdst, src0, src1, win, LEN / 2);
145  call_new(odst, src0, src1, win, LEN / 2);
146  for (i = 0; i < LEN; i++) {
147  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
148  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
149  i, cdst[i], odst[i], cdst[i] - odst[i]);
150  fail();
151  break;
152  }
153  }
154  bench_new(odst, src0, src1, win, LEN / 2);
155 }
156 
157 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
158 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
159 {
160  LOCAL_ALIGNED_32(float, cdst, [LEN]);
161  LOCAL_ALIGNED_32(float, odst, [LEN]);
162  int i;
163 
164  declare_func(void, float *dst, const float *src, float mul, int len);
165 
166  memcpy(cdst, src2, LEN * sizeof(*src2));
167  memcpy(odst, src2, LEN * sizeof(*src2));
168 
169  call_ref(cdst, src0, src1[0], LEN);
170  call_new(odst, src0, src1[0], LEN);
171  for (i = 0; i < LEN; i++) {
172  if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
173  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
174  i, cdst[i], odst[i], cdst[i] - odst[i]);
175  fail();
176  break;
177  }
178  }
179  memcpy(odst, src2, LEN * sizeof(*src2));
180  bench_new(odst, src0, src1[0], LEN);
181 }
182 
183 static void test_vector_dmul_scalar(const double *src0, const double *src1)
184 {
185  LOCAL_ALIGNED_32(double, cdst, [LEN]);
186  LOCAL_ALIGNED_32(double, odst, [LEN]);
187  int i;
188 
189  declare_func(void, double *dst, const double *src, double mul, int len);
190 
191  call_ref(cdst, src0, src1[0], LEN);
192  call_new(odst, src0, src1[0], LEN);
193  for (i = 0; i < LEN; i++) {
194  double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
195  if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
196  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
197  cdst[i], odst[i], cdst[i] - odst[i]);
198  fail();
199  break;
200  }
201  }
202  bench_new(odst, src0, src1[0], LEN);
203 }
204 
205 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
206 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
207 {
208  LOCAL_ALIGNED_32(double, cdst, [LEN]);
209  LOCAL_ALIGNED_32(double, odst, [LEN]);
210  int i;
211 
212  declare_func(void, double *dst, const double *src, double mul, int len);
213 
214  memcpy(cdst, src2, LEN * sizeof(*src2));
215  memcpy(odst, src2, LEN * sizeof(*src2));
216  call_ref(cdst, src0, src1[0], LEN);
217  call_new(odst, src0, src1[0], LEN);
218  for (i = 0; i < LEN; i++) {
219  if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
220  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
221  i, cdst[i], odst[i], cdst[i] - odst[i]);
222  fail();
223  break;
224  }
225  }
226  memcpy(odst, src2, LEN * sizeof(*src2));
227  bench_new(odst, src0, src1[0], LEN);
228 }
229 
230 static void test_butterflies_float(const float *src0, const float *src1)
231 {
232  LOCAL_ALIGNED_16(float, cdst, [LEN]);
233  LOCAL_ALIGNED_16(float, odst, [LEN]);
234  LOCAL_ALIGNED_16(float, cdst1, [LEN]);
235  LOCAL_ALIGNED_16(float, odst1, [LEN]);
236  int i;
237 
238  declare_func(void, float *restrict src0, float *restrict src1,
239  int len);
240 
241  memcpy(cdst, src0, LEN * sizeof(*src0));
242  memcpy(cdst1, src1, LEN * sizeof(*src1));
243  memcpy(odst, src0, LEN * sizeof(*src0));
244  memcpy(odst1, src1, LEN * sizeof(*src1));
245 
246  call_ref(cdst, cdst1, LEN);
247  call_new(odst, odst1, LEN);
248  for (i = 0; i < LEN; i++) {
249  if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON) ||
250  !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
251  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
252  i, cdst[i], odst[i], cdst[i] - odst[i]);
253  fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
254  i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
255  fail();
256  break;
257  }
258  }
259  memcpy(odst, src0, LEN * sizeof(*src0));
260  memcpy(odst1, src1, LEN * sizeof(*src1));
261  bench_new(odst, odst1, LEN);
262 }
263 
264 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
265 static void test_scalarproduct_float(const float *src0, const float *src1)
266 {
267  float cprod, oprod;
268 
269  declare_func_float(float, const float *src0, const float *src1, int len);
270 
271  cprod = call_ref(src0, src1, LEN);
272  oprod = call_new(src0, src1, LEN);
274  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
275  cprod, oprod, cprod - oprod);
276  fail();
277  }
278  bench_new(src0, src1, LEN);
279 }
280 
281 static void test_scalarproduct_double(const double *src0, const double *src1)
282 {
283  double cprod, oprod;
284 
285  declare_func_float(double, const double *, const double *, size_t);
286 
287  cprod = call_ref(src0, src1, LEN);
288  oprod = call_new(src0, src1, LEN);
290  fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
291  cprod, oprod, cprod - oprod);
292  fail();
293  }
294  bench_new(src0, src1, LEN);
295 }
296 
298 {
299  LOCAL_ALIGNED_32(float, src0, [LEN]);
300  LOCAL_ALIGNED_32(float, src1, [LEN]);
301  LOCAL_ALIGNED_32(float, src2, [LEN]);
302  LOCAL_ALIGNED_16(float, src3, [LEN]);
303  LOCAL_ALIGNED_16(float, src4, [LEN]);
304  LOCAL_ALIGNED_16(float, src5, [LEN]);
305  LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
306  LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
307  LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
309 
310  if (!fdsp) {
311  fprintf(stderr, "floatdsp: Out of memory error\n");
312  return;
313  }
314 
318  randomize_buffer(src3);
319  randomize_buffer(src4);
320  randomize_buffer(src5);
321  randomize_buffer(dbl_src0);
322  randomize_buffer(dbl_src1);
323  randomize_buffer(dbl_src2);
324 
325  if (check_func(fdsp->vector_fmul, "vector_fmul"))
327  if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
329  if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
330  test_vector_fmul_scalar(src3, src4);
331  if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
333  if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
334  test_vector_fmul_window(src3, src4, src5);
335  report("vector_fmul");
336  if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
338  report("vector_fmac");
339  if (check_func(fdsp->vector_dmul, "vector_dmul"))
340  test_vector_dmul(dbl_src0, dbl_src1);
341  if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
342  test_vector_dmul_scalar(dbl_src0, dbl_src1);
343  report("vector_dmul");
344  if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
345  test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
346  report("vector_dmac");
347  if (check_func(fdsp->butterflies_float, "butterflies_float"))
348  test_butterflies_float(src3, src4);
349  report("butterflies_float");
350  if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
351  test_scalarproduct_float(src3, src4);
352  report("scalarproduct_float");
353  if (check_func(fdsp->scalarproduct_double, "scalarproduct_double"))
354  test_scalarproduct_double(dbl_src0, dbl_src1);
355  report("scalarproduct_double");
356 
357  av_freep(&fdsp);
358 }
AVFloatDSPContext::butterflies_float
void(* butterflies_float)(float *restrict v1, float *restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: float_dsp.h:164
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:421
AVFloatDSPContext::vector_fmul_reverse
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats, and store the result in a vector of floats...
Definition: float_dsp.h:154
randomize_buffer
#define randomize_buffer(buf)
Definition: float_dsp.c:31
float_near_abs_eps
int float_near_abs_eps(float a, float b, float eps)
Definition: checkasm.c:438
check_func
#define check_func(func,...)
Definition: checkasm.h:179
test_scalarproduct_double
static void test_scalarproduct_double(const double *src0, const double *src1)
Definition: float_dsp.c:281
float.h
declare_func_float
#define declare_func_float(ret,...)
Definition: checkasm.h:184
test_vector_dmac_scalar
static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
Definition: float_dsp.c:206
AVFloatDSPContext::vector_dmul
void(* vector_dmul)(double *dst, const double *src0, const double *src1, int len)
Calculate the entry wise product of two vectors of doubles and store the result in a vector of double...
Definition: float_dsp.h:190
call_ref
#define call_ref(...)
Definition: checkasm.h:194
win
static float win(SuperEqualizerContext *s, float n, int N)
Definition: af_superequalizer.c:119
ARBITRARY_FMUL_ADD_CONST
#define ARBITRARY_FMUL_ADD_CONST
Definition: float_dsp.c:89
double_near_abs_eps
int double_near_abs_eps(double a, double b, double eps)
Definition: checkasm.c:478
fail
#define fail()
Definition: checkasm.h:188
checkasm.h
checkasm_check_float_dsp
void checkasm_check_float_dsp(void)
Definition: float_dsp.c:297
AVFloatDSPContext::scalarproduct_float
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: float_dsp.h:175
test_butterflies_float
static void test_butterflies_float(const float *src0, const float *src1)
Definition: float_dsp.c:230
test_vector_fmul_scalar
static void test_vector_fmul_scalar(const float *src0, const float *src1)
Definition: float_dsp.c:112
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:150
call_new
#define call_new(...)
Definition: checkasm.h:297
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
test_vector_fmul
static void test_vector_fmul(const float *src0, const float *src1)
Definition: float_dsp.c:43
AVFloatDSPContext::vector_fmul_scalar
void(* vector_fmul_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float.
Definition: float_dsp.h:85
ARBITRARY_FMAC_SCALAR_CONST
#define ARBITRARY_FMAC_SCALAR_CONST
Definition: float_dsp.c:157
AVFloatDSPContext::scalarproduct_double
double(* scalarproduct_double)(const double *v1, const double *v2, size_t len)
Calculate the scalar product of two vectors of doubles.
Definition: float_dsp.h:205
float_dsp.h
AVFloatDSPContext::vector_fmul
void(* vector_fmul)(float *dst, const float *src0, const float *src1, int len)
Calculate the entry wise product of two vectors of floats and store the result in a vector of floats.
Definition: float_dsp.h:38
ARBITRARY_FMUL_WINDOW_CONST
#define ARBITRARY_FMUL_WINDOW_CONST
Definition: float_dsp.c:134
test_vector_dmul_scalar
static void test_vector_dmul_scalar(const double *src0, const double *src1)
Definition: float_dsp.c:183
AVFloatDSPContext
Definition: float_dsp.h:24
test_vector_fmul_add
static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:90
LEN
#define LEN
Definition: float_dsp.c:29
ARBITRARY_SCALARPRODUCT_CONST
#define ARBITRARY_SCALARPRODUCT_CONST
Definition: float_dsp.c:264
report
#define report
Definition: checkasm.h:191
bench_new
#define bench_new(...)
Definition: checkasm.h:368
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AVFloatDSPContext::vector_fmac_scalar
void(* vector_fmac_scalar)(float *dst, const float *src, float mul, int len)
Multiply a vector of floats by a scalar float and add to destination vector.
Definition: float_dsp.h:54
internal.h
src2
const pixel * src2
Definition: h264pred_template.c:422
AVFloatDSPContext::vector_fmul_add
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Calculate the entry wise product of two vectors of floats, add a third vector of floats and store the...
Definition: float_dsp.h:137
len
int len
Definition: vorbis_enc_data.h:426
ARBITRARY_DMAC_SCALAR_CONST
#define ARBITRARY_DMAC_SCALAR_CONST
Definition: float_dsp.c:205
test_vector_fmul_window
static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
Definition: float_dsp.c:135
AVFloatDSPContext::vector_dmul_scalar
void(* vector_dmul_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of double by a scalar double.
Definition: float_dsp.h:100
test_scalarproduct_float
static void test_scalarproduct_float(const float *src0, const float *src1)
Definition: float_dsp.c:265
src0
const pixel *const src0
Definition: h264pred_template.c:420
mem.h
AVFloatDSPContext::vector_fmul_window
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Overlap/add with window function.
Definition: float_dsp.h:119
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:146
AVFloatDSPContext::vector_dmac_scalar
void(* vector_dmac_scalar)(double *dst, const double *src, double mul, int len)
Multiply a vector of doubles by a scalar double and add to destination vector.
Definition: float_dsp.h:70
test_vector_dmul
static void test_vector_dmul(const double *src0, const double *src1)
Definition: float_dsp.c:66
test_vector_fmac_scalar
static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
Definition: float_dsp.c:158