102 #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
104 int16_t *
block, int16_t *qmat);
106 static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
130 #if ARCH_X86_64 && HAVE_YASM
160 #define AANSCALE_BITS 12
163 #define NB_ITS_SPEED 50000
168 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
169 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
170 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
171 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
172 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
173 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
174 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
175 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
185 for (i = 0; i < 64; i++) {
186 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
197 memset(block, 0, 64 *
sizeof(*block));
201 for (i = 0; i < 64; i++)
202 block[i] = (
av_lfg_get(prng) % (2*vals)) -vals;
205 for (i = 0; i < 64; i++)
211 for (i = 0; i < j; i++) {
213 block[idx] =
av_lfg_get(prng) % (2*vals) -vals;
217 block[ 0] =
av_lfg_get(prng) % (16*vals) - (8*vals);
218 block[63] = (block[0] & 1) ^ 1;
228 for (i = 0; i < 64; i++)
229 dst[idct_mmx_perm[i]] = src[i];
231 for (i = 0; i < 64; i++)
232 dst[idct_simple_mmx_perm[i]] = src[i];
234 for (i = 0; i < 64; i++)
235 dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
237 for (i = 0; i < 64; i++)
238 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
240 for (i = 0; i < 64; i++)
241 dst[(i>>3) | ((i<<3)&0x38)] = src[i];
243 for (i = 0; i < 64; i++)
253 int64_t err2, ti, ti1, it1, err_sum = 0;
254 int64_t sysErr[64], sysErrMax = 0;
256 int blockSumErrMax = 0, blockSumErr;
258 const int vals=1<<
bits;
266 for (i = 0; i < 64; i++)
268 for (it = 0; it <
NB_ITS; it++) {
276 for (i = 0; i < 64; i++) {
285 for (i = 0; i < 64; i++) {
292 sysErr[i] +=
block[i] - block1[i];
294 if (abs(
block[i]) > maxout)
295 maxout = abs(
block[i]);
297 if (blockSumErrMax < blockSumErr)
298 blockSumErrMax = blockSumErr;
300 for (i = 0; i < 64; i++)
301 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
303 for (i = 0; i < 64; i++) {
306 printf(
"%7d ", (
int) sysErr[i]);
310 omse = (double) err2 / NB_ITS / 64;
311 ome = (double) err_sum / NB_ITS / 64;
313 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
315 printf(
"%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
316 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
317 omse, ome, (
double) sysErrMax / NB_ITS,
318 maxout, blockSumErrMax);
341 }
while (ti1 < 1000000);
343 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
344 (
double) it1 * 1000.0 / (
double) ti1);
355 static double c8[8][8];
356 static double c4[4][4];
357 double block1[64], block2[64], block3[64];
364 for (i = 0; i < 8; i++) {
366 for (j = 0; j < 8; j++) {
367 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
368 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
369 sum += c8[i][j] * c8[i][j];
373 for (i = 0; i < 4; i++) {
375 for (j = 0; j < 4; j++) {
376 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
377 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
378 sum += c4[i][j] * c4[i][j];
385 for (i = 0; i < 4; i++) {
386 for (j = 0; j < 8; j++) {
387 block1[8 * (2 * i) + j] =
388 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) *
s;
389 block1[8 * (2 * i + 1) + j] =
390 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) *
s;
395 for (i = 0; i < 8; i++) {
396 for (j = 0; j < 8; j++) {
398 for (k = 0; k < 8; k++)
399 sum += c8[k][j] * block1[8 * i + k];
400 block2[8 * i + j] = sum;
405 for (i = 0; i < 8; i++) {
406 for (j = 0; j < 4; j++) {
409 for (k = 0; k < 4; k++)
410 sum += c4[k][j] * block2[8 * (2 * k) + i];
411 block3[8 * (2 * j) + i] = sum;
415 for (k = 0; k < 4; k++)
416 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
417 block3[8 * (2 * j + 1) + i] = sum;
422 for (i = 0; i < 8; i++) {
423 for (j = 0; j < 8; j++) {
424 v = block3[8 * i + j];
426 else if (v > 255) v = 255;
427 dest[i * linesize + j] = (int)
rint(v);
433 void (*idct248_put)(
uint8_t *dest,
int line_size,
437 int it, i, it1, ti, ti1, err_max,
v;
445 for (it = 0; it <
NB_ITS; it++) {
447 for (i = 0; i < 64; i++)
451 for (i = 0; i < 64; i++)
455 for (i = 0; i < 64; i++)
459 for (i = 0; i < 64; i++) {
486 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
495 for (i = 0; i < 64; i++)
502 }
while (ti1 < 1000000);
504 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
505 (
double) it1 * 1000.0 / (
double) ti1);
510 printf(
"dct-test [-i] [<test-number>] [<bits>]\n"
511 "test-number 0 -> test with random matrixes\n"
512 " 1 -> test with random sparse matrixes\n"
513 " 2 -> do 3. test from mpeg4 std\n"
514 "bits Number of time domain bits to use, 8 is default\n"
515 "-i test IDCT implementations\n"
516 "-4 test IDCT248 implementations\n"
524 int main(
int argc,
char **argv)
526 int test_idct = 0, test_248_dct = 0;
539 c =
getopt(argc, argv,
"ih4t");
560 test = atoi(argv[
optind]);
561 if(optind+1 < argc) bits= atoi(argv[optind+1]);
563 printf(
"ffmpeg DCT/IDCT test\n");
568 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
569 for (i = 0; algos[i].
name; i++)
571 err |=
dct_error(&algos[i], test, test_idct, speed, bits);