FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * DCT test (c) 2001 Fabrice Bellard
25  * Started from sample code by Juan J. Sierralta P.
26  */
27 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "dct.h"
43 #include "simple_idct.h"
44 #include "aandcttab.h"
45 #include "faandct.h"
46 #include "faanidct.h"
47 #include "x86/idct_xvid.h"
48 #include "dctref.h"
49 
50 #undef printf
51 
52 // BFIN
53 void ff_bfin_idct(int16_t *block);
54 void ff_bfin_fdct(int16_t *block);
55 
56 // ALTIVEC
57 void ff_fdct_altivec(int16_t *block);
58 
59 // ARM
60 void ff_j_rev_dct_arm(int16_t *data);
61 void ff_simple_idct_arm(int16_t *data);
62 void ff_simple_idct_armv5te(int16_t *data);
63 void ff_simple_idct_armv6(int16_t *data);
64 void ff_simple_idct_neon(int16_t *data);
65 
66 void ff_simple_idct_axp(int16_t *data);
67 
68 struct algo {
69  const char *name;
70  void (*func)(int16_t *block);
74  int nonspec;
75 };
76 
77 static int cpu_flags;
78 
79 static const struct algo fdct_tab[] = {
80  { "REF-DBL", ff_ref_fdct, NO_PERM },
81  { "FAAN", ff_faandct, NO_PERM },
82  { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
83  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
84 
85 #if HAVE_MMX_INLINE
86  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
89 #endif
90 
91 #if HAVE_ALTIVEC
92  { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
93 #endif
94 
95 #if ARCH_BFIN
96  { "BFINfdct", ff_bfin_fdct, NO_PERM },
97 #endif
98 
99  { 0 }
100 };
101 
102 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
103 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
104  int16_t *block, int16_t *qmat);
105 
106 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
107  DECLARE_ALIGNED(16, static int16_t, qmat)[64];
108  DECLARE_ALIGNED(16, static int16_t, tmp)[64];
109  int i;
110 
111  for(i=0; i<64; i++){
112  qmat[i]=4;
113  tmp[i]= dst[i];
114  }
115  ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
116 }
117 #endif
118 
119 static const struct algo idct_tab[] = {
120  { "FAANI", ff_faanidct, NO_PERM },
121  { "REF-DBL", ff_ref_idct, NO_PERM },
122  { "INT", ff_j_rev_dct, MMX_PERM },
123  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
124 
125 #if HAVE_MMX_INLINE
127  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
128  { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
129  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
130 #if ARCH_X86_64 && HAVE_YASM
131  { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
132 #endif
133 #endif
134 
135 #if ARCH_BFIN
136  { "BFINidct", ff_bfin_idct, NO_PERM },
137 #endif
138 
139 #if ARCH_ARM
140  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
141  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
142 #endif
143 #if HAVE_ARMV5TE
144  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
145 #endif
146 #if HAVE_ARMV6
147  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
148 #endif
149 #if HAVE_NEON
151 #endif
152 
153 #if ARCH_ALPHA
154  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
155 #endif
156 
157  { 0 }
158 };
159 
160 #define AANSCALE_BITS 12
161 
162 #define NB_ITS 20000
163 #define NB_ITS_SPEED 50000
164 
165 static short idct_mmx_perm[64];
166 
167 static short idct_simple_mmx_perm[64] = {
168  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
169  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
170  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
171  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
172  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
173  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
174  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
175  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
176 };
177 
178 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
179 
180 static void idct_mmx_init(void)
181 {
182  int i;
183 
184  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
185  for (i = 0; i < 64; i++) {
186  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
187  }
188 }
189 
190 DECLARE_ALIGNED(16, static int16_t, block)[64];
191 DECLARE_ALIGNED(8, static int16_t, block1)[64];
192 
193 static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
194 {
195  int i, j;
196 
197  memset(block, 0, 64 * sizeof(*block));
198 
199  switch (test) {
200  case 0:
201  for (i = 0; i < 64; i++)
202  block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
203  if (is_idct) {
204  ff_ref_fdct(block);
205  for (i = 0; i < 64; i++)
206  block[i] >>= 3;
207  }
208  break;
209  case 1:
210  j = av_lfg_get(prng) % 10 + 1;
211  for (i = 0; i < j; i++) {
212  int idx = av_lfg_get(prng) % 64;
213  block[idx] = av_lfg_get(prng) % (2*vals) -vals;
214  }
215  break;
216  case 2:
217  block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
218  block[63] = (block[0] & 1) ^ 1;
219  break;
220  }
221 }
222 
223 static void permute(int16_t dst[64], const int16_t src[64], int perm)
224 {
225  int i;
226 
227  if (perm == MMX_PERM) {
228  for (i = 0; i < 64; i++)
229  dst[idct_mmx_perm[i]] = src[i];
230  } else if (perm == MMX_SIMPLE_PERM) {
231  for (i = 0; i < 64; i++)
232  dst[idct_simple_mmx_perm[i]] = src[i];
233  } else if (perm == SSE2_PERM) {
234  for (i = 0; i < 64; i++)
235  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
236  } else if (perm == PARTTRANS_PERM) {
237  for (i = 0; i < 64; i++)
238  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
239  } else if (perm == TRANSPOSE_PERM) {
240  for (i = 0; i < 64; i++)
241  dst[(i>>3) | ((i<<3)&0x38)] = src[i];
242  } else {
243  for (i = 0; i < 64; i++)
244  dst[i] = src[i];
245  }
246 }
247 
248 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
249 {
250  void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
251  int it, i, scale;
252  int err_inf, v;
253  int64_t err2, ti, ti1, it1, err_sum = 0;
254  int64_t sysErr[64], sysErrMax = 0;
255  int maxout = 0;
256  int blockSumErrMax = 0, blockSumErr;
257  AVLFG prng;
258  const int vals=1<<bits;
259  double omse, ome;
260  int spec_err;
261 
262  av_lfg_init(&prng, 1);
263 
264  err_inf = 0;
265  err2 = 0;
266  for (i = 0; i < 64; i++)
267  sysErr[i] = 0;
268  for (it = 0; it < NB_ITS; it++) {
269  init_block(block1, test, is_idct, &prng, vals);
270  permute(block, block1, dct->format);
271 
272  dct->func(block);
273  emms_c();
274 
275  if (dct->format == SCALE_PERM) {
276  for (i = 0; i < 64; i++) {
277  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
278  block[i] = (block[i] * scale) >> AANSCALE_BITS;
279  }
280  }
281 
282  ref(block1);
283 
284  blockSumErr = 0;
285  for (i = 0; i < 64; i++) {
286  int err = block[i] - block1[i];
287  err_sum += err;
288  v = abs(err);
289  if (v > err_inf)
290  err_inf = v;
291  err2 += v * v;
292  sysErr[i] += block[i] - block1[i];
293  blockSumErr += v;
294  if (abs(block[i]) > maxout)
295  maxout = abs(block[i]);
296  }
297  if (blockSumErrMax < blockSumErr)
298  blockSumErrMax = blockSumErr;
299  }
300  for (i = 0; i < 64; i++)
301  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
302 
303  for (i = 0; i < 64; i++) {
304  if (i % 8 == 0)
305  printf("\n");
306  printf("%7d ", (int) sysErr[i]);
307  }
308  printf("\n");
309 
310  omse = (double) err2 / NB_ITS / 64;
311  ome = (double) err_sum / NB_ITS / 64;
312 
313  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
314 
315  printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
316  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
317  omse, ome, (double) sysErrMax / NB_ITS,
318  maxout, blockSumErrMax);
319 
320  if (spec_err && !dct->nonspec)
321  return 1;
322 
323  if (!speed)
324  return 0;
325 
326  /* speed test */
327 
328  init_block(block, test, is_idct, &prng, vals);
329  permute(block1, block, dct->format);
330 
331  ti = av_gettime();
332  it1 = 0;
333  do {
334  for (it = 0; it < NB_ITS_SPEED; it++) {
335  memcpy(block, block1, sizeof(block));
336  dct->func(block);
337  }
338  emms_c();
339  it1 += NB_ITS_SPEED;
340  ti1 = av_gettime() - ti;
341  } while (ti1 < 1000000);
342 
343  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
344  (double) it1 * 1000.0 / (double) ti1);
345 
346  return 0;
347 }
348 
351 
352 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
353 {
354  static int init;
355  static double c8[8][8];
356  static double c4[4][4];
357  double block1[64], block2[64], block3[64];
358  double s, sum, v;
359  int i, j, k;
360 
361  if (!init) {
362  init = 1;
363 
364  for (i = 0; i < 8; i++) {
365  sum = 0;
366  for (j = 0; j < 8; j++) {
367  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
368  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
369  sum += c8[i][j] * c8[i][j];
370  }
371  }
372 
373  for (i = 0; i < 4; i++) {
374  sum = 0;
375  for (j = 0; j < 4; j++) {
376  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
377  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
378  sum += c4[i][j] * c4[i][j];
379  }
380  }
381  }
382 
383  /* butterfly */
384  s = 0.5 * sqrt(2.0);
385  for (i = 0; i < 4; i++) {
386  for (j = 0; j < 8; j++) {
387  block1[8 * (2 * i) + j] =
388  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
389  block1[8 * (2 * i + 1) + j] =
390  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
391  }
392  }
393 
394  /* idct8 on lines */
395  for (i = 0; i < 8; i++) {
396  for (j = 0; j < 8; j++) {
397  sum = 0;
398  for (k = 0; k < 8; k++)
399  sum += c8[k][j] * block1[8 * i + k];
400  block2[8 * i + j] = sum;
401  }
402  }
403 
404  /* idct4 */
405  for (i = 0; i < 8; i++) {
406  for (j = 0; j < 4; j++) {
407  /* top */
408  sum = 0;
409  for (k = 0; k < 4; k++)
410  sum += c4[k][j] * block2[8 * (2 * k) + i];
411  block3[8 * (2 * j) + i] = sum;
412 
413  /* bottom */
414  sum = 0;
415  for (k = 0; k < 4; k++)
416  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
417  block3[8 * (2 * j + 1) + i] = sum;
418  }
419  }
420 
421  /* clamp and store the result */
422  for (i = 0; i < 8; i++) {
423  for (j = 0; j < 8; j++) {
424  v = block3[8 * i + j];
425  if (v < 0) v = 0;
426  else if (v > 255) v = 255;
427  dest[i * linesize + j] = (int) rint(v);
428  }
429  }
430 }
431 
432 static void idct248_error(const char *name,
433  void (*idct248_put)(uint8_t *dest, int line_size,
434  int16_t *block),
435  int speed)
436 {
437  int it, i, it1, ti, ti1, err_max, v;
438  AVLFG prng;
439 
440  av_lfg_init(&prng, 1);
441 
442  /* just one test to see if code is correct (precision is less
443  important here) */
444  err_max = 0;
445  for (it = 0; it < NB_ITS; it++) {
446  /* XXX: use forward transform to generate values */
447  for (i = 0; i < 64; i++)
448  block1[i] = av_lfg_get(&prng) % 256 - 128;
449  block1[0] += 1024;
450 
451  for (i = 0; i < 64; i++)
452  block[i] = block1[i];
453  idct248_ref(img_dest1, 8, block);
454 
455  for (i = 0; i < 64; i++)
456  block[i] = block1[i];
457  idct248_put(img_dest, 8, block);
458 
459  for (i = 0; i < 64; i++) {
460  v = abs((int) img_dest[i] - (int) img_dest1[i]);
461  if (v == 255)
462  printf("%d %d\n", img_dest[i], img_dest1[i]);
463  if (v > err_max)
464  err_max = v;
465  }
466 #if 0
467  printf("ref=\n");
468  for(i=0;i<8;i++) {
469  int j;
470  for(j=0;j<8;j++) {
471  printf(" %3d", img_dest1[i*8+j]);
472  }
473  printf("\n");
474  }
475 
476  printf("out=\n");
477  for(i=0;i<8;i++) {
478  int j;
479  for(j=0;j<8;j++) {
480  printf(" %3d", img_dest[i*8+j]);
481  }
482  printf("\n");
483  }
484 #endif
485  }
486  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
487 
488  if (!speed)
489  return;
490 
491  ti = av_gettime();
492  it1 = 0;
493  do {
494  for (it = 0; it < NB_ITS_SPEED; it++) {
495  for (i = 0; i < 64; i++)
496  block[i] = block1[i];
497  idct248_put(img_dest, 8, block);
498  }
499  emms_c();
500  it1 += NB_ITS_SPEED;
501  ti1 = av_gettime() - ti;
502  } while (ti1 < 1000000);
503 
504  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
505  (double) it1 * 1000.0 / (double) ti1);
506 }
507 
508 static void help(void)
509 {
510  printf("dct-test [-i] [<test-number>] [<bits>]\n"
511  "test-number 0 -> test with random matrixes\n"
512  " 1 -> test with random sparse matrixes\n"
513  " 2 -> do 3. test from mpeg4 std\n"
514  "bits Number of time domain bits to use, 8 is default\n"
515  "-i test IDCT implementations\n"
516  "-4 test IDCT248 implementations\n"
517  "-t speed test\n");
518 }
519 
520 #if !HAVE_GETOPT
521 #include "compat/getopt.c"
522 #endif
523 
524 int main(int argc, char **argv)
525 {
526  int test_idct = 0, test_248_dct = 0;
527  int c, i;
528  int test = 1;
529  int speed = 0;
530  int err = 0;
531  int bits=8;
532 
534 
535  ff_ref_dct_init();
536  idct_mmx_init();
537 
538  for (;;) {
539  c = getopt(argc, argv, "ih4t");
540  if (c == -1)
541  break;
542  switch (c) {
543  case 'i':
544  test_idct = 1;
545  break;
546  case '4':
547  test_248_dct = 1;
548  break;
549  case 't':
550  speed = 1;
551  break;
552  default:
553  case 'h':
554  help();
555  return 0;
556  }
557  }
558 
559  if (optind < argc)
560  test = atoi(argv[optind]);
561  if(optind+1 < argc) bits= atoi(argv[optind+1]);
562 
563  printf("ffmpeg DCT/IDCT test\n");
564 
565  if (test_248_dct) {
566  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
567  } else {
568  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
569  for (i = 0; algos[i].name; i++)
570  if (!(~cpu_flags & algos[i].mm_support)) {
571  err |= dct_error(&algos[i], test, test_idct, speed, bits);
572  }
573  }
574 
575  return err;
576 }