29 static const uint32_t
pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
30 static const uint32_t
pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
31 static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
32 static const int weights[] = { 0, 128, 255, -1 };
33 static const int denoms[] = {0, 7, 12, -1 };
34 static const int offsets[] = {0, 255, -1 };
36 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
37 #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
39 #define randomize_buffers() \
41 uint32_t mask = pixel_mask[bit_depth - 8]; \
43 for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
44 uint32_t r = rnd() & mask; \
45 AV_WN32A(buf0 + k, r); \
46 AV_WN32A(buf1 + k, r); \
50 AV_WN32A(dst0 + k, r); \
51 AV_WN32A(dst1 + k, r); \
55 #define randomize_buffers_ref() \
56 randomize_buffers(); \
58 uint32_t mask = pixel_mask16[bit_depth - 8]; \
60 for (k = 0; k < BUF_SIZE; k += 2) { \
61 uint32_t r = rnd() & mask; \
62 AV_WN32A(ref0 + k, r); \
63 AV_WN32A(ref1 + k, r); \
67 #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE)
68 #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
88 for (
i = 0;
i < 2;
i++) {
89 for (j = 0; j < 2; j++) {
92 switch ((j << 1) |
i) {
93 case 0:
type =
"pel_pixels";
break;
94 case 1:
type =
"qpel_h";
break;
95 case 2:
type =
"qpel_v";
break;
96 case 3:
type =
"qpel_hv";
break;
100 int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
104 for (row = 0; row <
size[
sizes]; row++) {
132 for (
i = 0;
i < 2;
i++) {
133 for (j = 0; j < 2; j++) {
136 switch ((j << 1) |
i) {
137 case 0:
type =
"pel_uni_pixels";
break;
138 case 1:
type =
"qpel_uni_h";
break;
139 case 2:
type =
"qpel_uni_v";
break;
140 case 3:
type =
"qpel_uni_hv";
break;
167 const int *denom, *wx, *ox;
169 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width);
174 for (
i = 0;
i < 2;
i++) {
175 for (j = 0; j < 2; j++) {
178 switch ((j << 1) |
i) {
179 case 0:
type =
"pel_uni_w_pixels";
break;
180 case 1:
type =
"qpel_uni_w_h";
break;
181 case 2:
type =
"qpel_uni_w_v";
break;
182 case 3:
type =
"qpel_uni_w_hv";
break;
186 for (denom =
denoms; *denom >= 0; denom++) {
187 for (wx =
weights; *wx >= 0; wx++) {
188 for (ox =
offsets; *ox >= 0; ox++) {
190 call_ref(dst0,
sizes[
size] *
SIZEOF_PIXEL,
src0,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
191 call_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
194 bench_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
224 for (
i = 0;
i < 2;
i++) {
225 for (j = 0; j < 2; j++) {
228 switch ((j << 1) |
i) {
229 case 0:
type =
"pel_bi_pixels";
break;
230 case 1:
type =
"qpel_bi_h";
break;
231 case 2:
type =
"qpel_bi_v";
break;
232 case 3:
type =
"qpel_bi_hv";
break;
261 const int *denom, *wx, *ox;
264 int height,
int denom,
int wx0,
int wx1,
265 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width);
270 for (
i = 0;
i < 2;
i++) {
271 for (j = 0; j < 2; j++) {
274 switch ((j << 1) |
i) {
275 case 0:
type =
"pel_bi_w_pixels";
break;
276 case 1:
type =
"qpel_bi_w_h";
break;
277 case 2:
type =
"qpel_bi_w_v";
break;
278 case 3:
type =
"qpel_bi_w_hv";
break;
282 for (denom =
denoms; *denom >= 0; denom++) {
283 for (wx =
weights; *wx >= 0; wx++) {
284 for (ox =
offsets; *ox >= 0; ox++) {
286 call_ref(dst0,
sizes[
size] *
SIZEOF_PIXEL,
src0,
sizes[
size] *
SIZEOF_PIXEL, ref0,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);
287 call_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL, ref1,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);
290 bench_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL, ref1,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);
320 for (
i = 0;
i < 2;
i++) {
321 for (j = 0; j < 2; j++) {
324 switch ((j << 1) |
i) {
325 case 0:
type =
"pel_pixels";
break;
326 case 1:
type =
"epel_h";
break;
327 case 2:
type =
"epel_v";
break;
328 case 3:
type =
"epel_hv";
break;
332 int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
336 for (row = 0; row <
size[
sizes]; row++) {
364 for (
i = 0;
i < 2;
i++) {
365 for (j = 0; j < 2; j++) {
368 switch ((j << 1) |
i) {
369 case 0:
type =
"pel_uni_pixels";
break;
370 case 1:
type =
"epel_uni_h";
break;
371 case 2:
type =
"epel_uni_v";
break;
372 case 3:
type =
"epel_uni_hv";
break;
399 const int *denom, *wx, *ox;
401 int height,
int denom,
int wx,
int ox, intptr_t mx, intptr_t my,
int width);
406 for (
i = 0;
i < 2;
i++) {
407 for (j = 0; j < 2; j++) {
410 switch ((j << 1) |
i) {
411 case 0:
type =
"pel_uni_w_pixels";
break;
412 case 1:
type =
"epel_uni_w_h";
break;
413 case 2:
type =
"epel_uni_w_v";
break;
414 case 3:
type =
"epel_uni_w_hv";
break;
418 for (denom =
denoms; *denom >= 0; denom++) {
419 for (wx =
weights; *wx >= 0; wx++) {
420 for (ox =
offsets; *ox >= 0; ox++) {
422 call_ref(dst0,
sizes[
size] *
SIZEOF_PIXEL,
src0,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
423 call_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
426 bench_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL,
sizes[
size], *denom, *wx, *ox,
i, j,
sizes[
size]);
456 for (
i = 0;
i < 2;
i++) {
457 for (j = 0; j < 2; j++) {
460 switch ((j << 1) |
i) {
461 case 0:
type =
"pel_bi_pixels";
break;
462 case 1:
type =
"epel_bi_h";
break;
463 case 2:
type =
"epel_bi_v";
break;
464 case 3:
type =
"epel_bi_hv";
break;
493 const int *denom, *wx, *ox;
496 int height,
int denom,
int wx0,
int wx1,
497 int ox0,
int ox1, intptr_t mx, intptr_t my,
int width);
502 for (
i = 0;
i < 2;
i++) {
503 for (j = 0; j < 2; j++) {
506 switch ((j << 1) |
i) {
507 case 0:
type =
"pel_bi_w_pixels";
break;
508 case 1:
type =
"epel_bi_w_h";
break;
509 case 2:
type =
"epel_bi_w_v";
break;
510 case 3:
type =
"epel_bi_w_hv";
break;
514 for (denom =
denoms; *denom >= 0; denom++) {
515 for (wx =
weights; *wx >= 0; wx++) {
516 for (ox =
offsets; *ox >= 0; ox++) {
518 call_ref(dst0,
sizes[
size] *
SIZEOF_PIXEL,
src0,
sizes[
size] *
SIZEOF_PIXEL, ref0,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);
519 call_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL, ref1,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);
522 bench_new(dst1,
sizes[
size] *
SIZEOF_PIXEL,
src1,
sizes[
size] *
SIZEOF_PIXEL, ref1,
sizes[
size], *denom, *wx, *wx, *ox, *ox,
i, j,
sizes[
size]);