56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
62 0, 8, 1, 9, 16, 24, 2, 10,
63 17, 25, 32, 40, 48, 56, 33, 41,
64 18, 26, 3, 11, 4, 12, 19, 27,
65 34, 42, 49, 57, 50, 58, 35, 43,
66 20, 28, 5, 13, 6, 14, 21, 29,
67 36, 44, 51, 59, 52, 60, 37, 45,
68 22, 30, 7, 15, 23, 31, 38, 46,
69 53, 61, 54, 62, 39, 47, 55, 63,
76 0, 1, 2, 3, 8, 9, 16, 17,
77 10, 11, 4, 5, 6, 7, 15, 14,
78 13, 12, 19, 18, 24, 25, 32, 33,
79 26, 27, 20, 21, 22, 23, 28, 29,
80 30, 31, 34, 35, 40, 41, 48, 49,
81 42, 43, 36, 37, 38, 39, 44, 45,
82 46, 47, 50, 51, 56, 57, 58, 59,
83 52, 53, 54, 55, 60, 61, 62, 63,
87 0, 8, 16, 24, 1, 9, 2, 10,
88 17, 25, 32, 40, 48, 56, 57, 49,
89 41, 33, 26, 18, 3, 11, 4, 12,
90 19, 27, 34, 42, 50, 58, 35, 43,
91 51, 59, 20, 28, 5, 13, 6, 14,
92 21, 29, 36, 44, 52, 60, 37, 45,
93 53, 61, 22, 30, 7, 15, 23, 31,
94 38, 46, 54, 62, 39, 47, 55, 63,
99 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
100 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
101 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
102 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
103 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
104 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
105 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
106 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
121 j = src_scantable[i];
135 int idct_permutation_type)
139 switch(idct_permutation_type){
142 idct_permutation[i]= i;
146 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
154 idct_permutation[i]= ((i&7)<<3) | (i>>3);
158 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
174 for (i = 0; i < 16; i++) {
175 for (j = 0; j < 16; j += 8) {
186 pix += line_size - 16;
197 for (i = 0; i < 16; i++) {
198 for (j = 0; j < 16; j += 8) {
210 register uint64_t x=*(uint64_t*)pix;
212 s += sq[(x>>8)&0xff];
213 s += sq[(x>>16)&0xff];
214 s += sq[(x>>24)&0xff];
215 s += sq[(x>>32)&0xff];
216 s += sq[(x>>40)&0xff];
217 s += sq[(x>>48)&0xff];
218 s += sq[(x>>56)&0xff];
220 register uint32_t x=*(uint32_t*)pix;
222 s += sq[(x>>8)&0xff];
223 s += sq[(x>>16)&0xff];
224 s += sq[(x>>24)&0xff];
225 x=*(uint32_t*)(pix+4);
227 s += sq[(x>>8)&0xff];
228 s += sq[(x>>16)&0xff];
229 s += sq[(x>>24)&0xff];
234 pix += line_size - 16;
242 for(i=0; i+8<=w; i+=8){
269 for (i = 0; i < h; i++) {
270 s += sq[pix1[0] - pix2[0]];
271 s += sq[pix1[1] - pix2[1]];
272 s += sq[pix1[2] - pix2[2]];
273 s += sq[pix1[3] - pix2[3]];
286 for (i = 0; i < h; i++) {
287 s += sq[pix1[0] - pix2[0]];
288 s += sq[pix1[1] - pix2[1]];
289 s += sq[pix1[2] - pix2[2]];
290 s += sq[pix1[3] - pix2[3]];
291 s += sq[pix1[4] - pix2[4]];
292 s += sq[pix1[5] - pix2[5]];
293 s += sq[pix1[6] - pix2[6]];
294 s += sq[pix1[7] - pix2[7]];
307 for (i = 0; i < h; i++) {
308 s += sq[pix1[ 0] - pix2[ 0]];
309 s += sq[pix1[ 1] - pix2[ 1]];
310 s += sq[pix1[ 2] - pix2[ 2]];
311 s += sq[pix1[ 3] - pix2[ 3]];
312 s += sq[pix1[ 4] - pix2[ 4]];
313 s += sq[pix1[ 5] - pix2[ 5]];
314 s += sq[pix1[ 6] - pix2[ 6]];
315 s += sq[pix1[ 7] - pix2[ 7]];
316 s += sq[pix1[ 8] - pix2[ 8]];
317 s += sq[pix1[ 9] - pix2[ 9]];
318 s += sq[pix1[10] - pix2[10]];
319 s += sq[pix1[11] - pix2[11]];
320 s += sq[pix1[12] - pix2[12]];
321 s += sq[pix1[13] - pix2[13]];
322 s += sq[pix1[14] - pix2[14]];
323 s += sq[pix1[15] - pix2[15]];
337 block[0] = s1[0] - s2[0];
338 block[1] = s1[1] - s2[1];
339 block[2] = s1[2] - s2[2];
340 block[3] = s1[3] - s2[3];
341 block[4] = s1[4] - s2[4];
342 block[5] = s1[5] - s2[5];
343 block[6] = s1[6] - s2[6];
344 block[7] = s1[7] - s2[7];
358 pixels[0] = av_clip_uint8(block[0]);
359 pixels[1] = av_clip_uint8(block[1]);
360 pixels[2] = av_clip_uint8(block[2]);
361 pixels[3] = av_clip_uint8(block[3]);
362 pixels[4] = av_clip_uint8(block[4]);
363 pixels[5] = av_clip_uint8(block[5]);
364 pixels[6] = av_clip_uint8(block[6]);
365 pixels[7] = av_clip_uint8(block[7]);
379 pixels[0] = av_clip_uint8(block[0]);
380 pixels[1] = av_clip_uint8(block[1]);
381 pixels[2] = av_clip_uint8(block[2]);
382 pixels[3] = av_clip_uint8(block[3]);
396 pixels[0] = av_clip_uint8(block[0]);
397 pixels[1] = av_clip_uint8(block[1]);
410 for (i = 0; i < 8; i++) {
411 for (j = 0; j < 8; j++) {
414 else if (*block > 127)
417 *pixels = (
uint8_t)(*block + 128);
421 pixels += (line_size - 8);
432 pixels[0] += block[0];
433 pixels[1] += block[1];
434 pixels[2] += block[2];
435 pixels[3] += block[3];
436 pixels[4] += block[4];
437 pixels[5] += block[5];
438 pixels[6] += block[6];
439 pixels[7] += block[7];
452 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
453 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
454 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
455 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
456 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
457 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
458 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
459 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
472 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
473 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
474 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
475 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
488 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
489 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
499 sum+=
FFABS(block[i]);
507 for (i = 0; i < h; i++) {
508 memset(block, value, 16);
517 for (i = 0; i < h; i++) {
518 memset(block, value, 8);
523 #define avg2(a,b) ((a+b+1)>>1)
524 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
528 const int A=(16-x16)*(16-y16);
529 const int B=( x16)*(16-y16);
530 const int C=(16-x16)*( y16);
531 const int D=( x16)*( y16);
536 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
537 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
538 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
539 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
540 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
541 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
542 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
543 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
564 int src_x, src_y, frac_x, frac_y,
index;
573 if((
unsigned)src_x <
width){
574 if((
unsigned)src_y <
height){
575 index= src_x + src_y*
stride;
576 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
577 + src[index +1]* frac_x )*(s-frac_y)
578 + ( src[index+stride ]*(s-frac_x)
579 + src[index+stride+1]* frac_x )* frac_y
582 index= src_x + av_clip(src_y, 0, height)*
stride;
583 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
584 + src[index +1]* frac_x )*s
588 if((
unsigned)src_y <
height){
589 index= av_clip(src_x, 0, width) + src_y*
stride;
590 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
591 + src[index+stride ]* frac_y )*s
594 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*
stride;
595 dst[y*stride + x]= src[
index ];
609 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
610 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
611 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
612 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
618 for (i=0; i <
height; i++) {
619 for (j=0; j <
width; j++) {
620 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
629 for (i=0; i <
height; i++) {
630 for (j=0; j <
width; j++) {
631 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
640 for (i=0; i <
height; i++) {
641 for (j=0; j <
width; j++) {
642 dst[j] = (683*(2*src[j] + src[j+
stride] + 1)) >> 11;
651 for (i=0; i <
height; i++) {
652 for (j=0; j <
width; j++) {
653 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15;
662 for (i=0; i <
height; i++) {
663 for (j=0; j <
width; j++) {
664 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
673 for (i=0; i <
height; i++) {
674 for (j=0; j <
width; j++) {
675 dst[j] = (683*(src[j] + 2*src[j+
stride] + 1)) >> 11;
684 for (i=0; i <
height; i++) {
685 for (j=0; j <
width; j++) {
686 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
695 for (i=0; i <
height; i++) {
696 for (j=0; j <
width; j++) {
697 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15;
706 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
707 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
708 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
709 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
715 for (i=0; i <
height; i++) {
716 for (j=0; j <
width; j++) {
717 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
726 for (i=0; i <
height; i++) {
727 for (j=0; j <
width; j++) {
728 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
737 for (i=0; i <
height; i++) {
738 for (j=0; j <
width; j++) {
739 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+
stride] + 1)) >> 11) + 1) >> 1;
748 for (i=0; i <
height; i++) {
749 for (j=0; j <
width; j++) {
750 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
759 for (i=0; i <
height; i++) {
760 for (j=0; j <
width; j++) {
761 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
770 for (i=0; i <
height; i++) {
771 for (j=0; j <
width; j++) {
772 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+
stride] + 1)) >> 11) + 1) >> 1;
781 for (i=0; i <
height; i++) {
782 for (j=0; j <
width; j++) {
783 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
792 for (i=0; i <
height; i++) {
793 for (j=0; j <
width; j++) {
794 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
801 #define QPEL_MC(r, OPNAME, RND, OP) \
802 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
803 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
807 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
808 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
809 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
810 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
811 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
812 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
813 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
814 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
820 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
822 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
826 const int src0= src[0*srcStride];\
827 const int src1= src[1*srcStride];\
828 const int src2= src[2*srcStride];\
829 const int src3= src[3*srcStride];\
830 const int src4= src[4*srcStride];\
831 const int src5= src[5*srcStride];\
832 const int src6= src[6*srcStride];\
833 const int src7= src[7*srcStride];\
834 const int src8= src[8*srcStride];\
835 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
836 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
837 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
838 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
839 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
840 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
841 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
842 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
848 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
849 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
854 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
855 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
856 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
857 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
858 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
859 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
860 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
861 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
862 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
863 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
864 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
865 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
866 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
867 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
868 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
869 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
875 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
876 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
881 const int src0= src[0*srcStride];\
882 const int src1= src[1*srcStride];\
883 const int src2= src[2*srcStride];\
884 const int src3= src[3*srcStride];\
885 const int src4= src[4*srcStride];\
886 const int src5= src[5*srcStride];\
887 const int src6= src[6*srcStride];\
888 const int src7= src[7*srcStride];\
889 const int src8= src[8*srcStride];\
890 const int src9= src[9*srcStride];\
891 const int src10= src[10*srcStride];\
892 const int src11= src[11*srcStride];\
893 const int src12= src[12*srcStride];\
894 const int src13= src[13*srcStride];\
895 const int src14= src[14*srcStride];\
896 const int src15= src[15*srcStride];\
897 const int src16= src[16*srcStride];\
898 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
899 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
900 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
901 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
902 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
903 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
904 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
905 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
906 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
907 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
908 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
909 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
910 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
911 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
912 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
913 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
919 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
922 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
923 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
928 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
931 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
934 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
935 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
938 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
942 copy_block9(full, src, 16, stride, 9);\
943 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
944 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
947 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
950 copy_block9(full, src, 16, stride, 9);\
951 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
954 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
958 copy_block9(full, src, 16, stride, 9);\
959 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
960 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
962 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
968 copy_block9(full, src, 16, stride, 9);\
969 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
970 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
971 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
972 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
974 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
979 copy_block9(full, src, 16, stride, 9);\
980 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
981 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
982 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
983 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
985 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
991 copy_block9(full, src, 16, stride, 9);\
992 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
993 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
994 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
995 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
997 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1001 uint8_t halfHV[64];\
1002 copy_block9(full, src, 16, stride, 9);\
1003 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1004 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1005 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1006 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1008 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1010 uint8_t full[16*9];\
1013 uint8_t halfHV[64];\
1014 copy_block9(full, src, 16, stride, 9);\
1015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1016 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1018 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1020 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1022 uint8_t full[16*9];\
1024 uint8_t halfHV[64];\
1025 copy_block9(full, src, 16, stride, 9);\
1026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1027 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1029 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1031 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1033 uint8_t full[16*9];\
1036 uint8_t halfHV[64];\
1037 copy_block9(full, src, 16, stride, 9);\
1038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1039 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1040 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1041 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1043 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1045 uint8_t full[16*9];\
1047 uint8_t halfHV[64];\
1048 copy_block9(full, src, 16, stride, 9);\
1049 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1050 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1051 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1052 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1054 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1057 uint8_t halfHV[64];\
1058 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1059 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1060 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1062 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1065 uint8_t halfHV[64];\
1066 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1067 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1068 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1070 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1072 uint8_t full[16*9];\
1075 uint8_t halfHV[64];\
1076 copy_block9(full, src, 16, stride, 9);\
1077 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1078 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1079 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1080 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1082 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1084 uint8_t full[16*9];\
1086 copy_block9(full, src, 16, stride, 9);\
1087 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1088 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1089 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1091 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1093 uint8_t full[16*9];\
1096 uint8_t halfHV[64];\
1097 copy_block9(full, src, 16, stride, 9);\
1098 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1099 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1100 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1101 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1103 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1105 uint8_t full[16*9];\
1107 copy_block9(full, src, 16, stride, 9);\
1108 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1109 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1110 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1112 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1115 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1116 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1119 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1122 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1123 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1126 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1128 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1131 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1134 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1135 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1138 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1140 uint8_t full[24*17];\
1142 copy_block17(full, src, 24, stride, 17);\
1143 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1144 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1147 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1149 uint8_t full[24*17];\
1150 copy_block17(full, src, 24, stride, 17);\
1151 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1154 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1156 uint8_t full[24*17];\
1158 copy_block17(full, src, 24, stride, 17);\
1159 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1160 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1162 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1164 uint8_t full[24*17];\
1165 uint8_t halfH[272];\
1166 uint8_t halfV[256];\
1167 uint8_t halfHV[256];\
1168 copy_block17(full, src, 24, stride, 17);\
1169 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1170 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1171 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1172 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1174 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1176 uint8_t full[24*17];\
1177 uint8_t halfH[272];\
1178 uint8_t halfHV[256];\
1179 copy_block17(full, src, 24, stride, 17);\
1180 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1181 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1182 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1183 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1185 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1187 uint8_t full[24*17];\
1188 uint8_t halfH[272];\
1189 uint8_t halfV[256];\
1190 uint8_t halfHV[256];\
1191 copy_block17(full, src, 24, stride, 17);\
1192 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1193 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1194 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1195 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1197 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1199 uint8_t full[24*17];\
1200 uint8_t halfH[272];\
1201 uint8_t halfHV[256];\
1202 copy_block17(full, src, 24, stride, 17);\
1203 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1204 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1205 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1206 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1208 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1210 uint8_t full[24*17];\
1211 uint8_t halfH[272];\
1212 uint8_t halfV[256];\
1213 uint8_t halfHV[256];\
1214 copy_block17(full, src, 24, stride, 17);\
1215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1216 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1218 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1220 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1222 uint8_t full[24*17];\
1223 uint8_t halfH[272];\
1224 uint8_t halfHV[256];\
1225 copy_block17(full, src, 24, stride, 17);\
1226 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1227 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1231 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1233 uint8_t full[24*17];\
1234 uint8_t halfH[272];\
1235 uint8_t halfV[256];\
1236 uint8_t halfHV[256];\
1237 copy_block17(full, src, 24, stride, 17);\
1238 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1239 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1240 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1241 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1243 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1245 uint8_t full[24*17];\
1246 uint8_t halfH[272];\
1247 uint8_t halfHV[256];\
1248 copy_block17(full, src, 24, stride, 17);\
1249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1250 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1251 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1252 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1254 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1256 uint8_t halfH[272];\
1257 uint8_t halfHV[256];\
1258 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1259 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1260 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1262 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1264 uint8_t halfH[272];\
1265 uint8_t halfHV[256];\
1266 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1267 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1268 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1270 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1272 uint8_t full[24*17];\
1273 uint8_t halfH[272];\
1274 uint8_t halfV[256];\
1275 uint8_t halfHV[256];\
1276 copy_block17(full, src, 24, stride, 17);\
1277 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1278 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1279 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1280 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1282 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1284 uint8_t full[24*17];\
1285 uint8_t halfH[272];\
1286 copy_block17(full, src, 24, stride, 17);\
1287 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1288 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1289 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1291 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1293 uint8_t full[24*17];\
1294 uint8_t halfH[272];\
1295 uint8_t halfV[256];\
1296 uint8_t halfHV[256];\
1297 copy_block17(full, src, 24, stride, 17);\
1298 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1299 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1300 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1301 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1303 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1305 uint8_t full[24*17];\
1306 uint8_t halfH[272];\
1307 copy_block17(full, src, 24, stride, 17);\
1308 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1309 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1310 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1312 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1314 uint8_t halfH[272];\
1315 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1316 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1319 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1320 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1321 #define op_put(a, b) a = cm[((b) + 16)>>5]
1322 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1326 QPEL_MC(0, avg_ , _ ,
op_avg)
1329 #undef op_avg_no_rnd
1331 #undef op_put_no_rnd
1335 put_pixels8_8_c(dst, src, stride, 8);
1339 avg_pixels8_8_c(dst, src, stride, 8);
1343 put_pixels16_8_c(dst, src, stride, 16);
1347 avg_pixels16_8_c(dst, src, stride, 16);
1350 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1351 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1352 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1353 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1354 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1355 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1362 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1363 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1364 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1365 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1366 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1367 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1368 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1369 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1375 #if CONFIG_RV40_DECODER
1378 put_pixels16_xy2_8_c(dst, src, stride, 16);
1382 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1386 put_pixels8_xy2_8_c(dst, src, stride, 8);
1390 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1394 #if CONFIG_DIRAC_DECODER
1395 #define DIRAC_MC(OPNAME)\
1396 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1398 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1400 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1402 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1404 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1406 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1407 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1409 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1411 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1413 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1415 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1417 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1419 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1420 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1422 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1424 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1426 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1428 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1430 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1432 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1433 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1444 const int src_1= src[ -srcStride];
1445 const int src0 = src[0 ];
1446 const int src1 = src[ srcStride];
1447 const int src2 = src[2*srcStride];
1448 const int src3 = src[3*srcStride];
1449 const int src4 = src[4*srcStride];
1450 const int src5 = src[5*srcStride];
1451 const int src6 = src[6*srcStride];
1452 const int src7 = src[7*srcStride];
1453 const int src8 = src[8*srcStride];
1454 const int src9 = src[9*srcStride];
1455 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1456 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1457 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1458 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1459 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1460 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1461 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1462 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1472 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1484 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1500 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1510 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1520 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1530 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1532 if (d<-2*strength) d1= 0;
1533 else if(d<- strength) d1=-2*strength - d;
1534 else if(d< strength) d1= d;
1535 else if(d< 2*strength) d1= 2*strength - d;
1540 if(p1&256) p1= ~(p1>>31);
1541 if(p2&256) p2= ~(p2>>31);
1548 d2= av_clip((p0-p3)/4, -ad1, ad1);
1550 src[x-2*
stride] = p0 - d2;
1551 src[x+
stride] = p3 + d2;
1557 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1563 int p0= src[y*stride-2];
1564 int p1= src[y*stride-1];
1565 int p2= src[y*stride+0];
1566 int p3= src[y*stride+1];
1567 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1569 if (d<-2*strength) d1= 0;
1570 else if(d<- strength) d1=-2*strength - d;
1571 else if(d< strength) d1= d;
1572 else if(d< 2*strength) d1= 2*strength - d;
1577 if(p1&256) p1= ~(p1>>31);
1578 if(p2&256) p2= ~(p2>>31);
1580 src[y*stride-1] = p1;
1581 src[y*stride+0] = p2;
1585 d2= av_clip((p0-p3)/4, -ad1, ad1);
1587 src[y*stride-2] = p0 - d2;
1588 src[y*stride+1] = p3 + d2;
1599 s += abs(pix1[0] - pix2[0]);
1600 s += abs(pix1[1] - pix2[1]);
1601 s += abs(pix1[2] - pix2[2]);
1602 s += abs(pix1[3] - pix2[3]);
1603 s += abs(pix1[4] - pix2[4]);
1604 s += abs(pix1[5] - pix2[5]);
1605 s += abs(pix1[6] - pix2[6]);
1606 s += abs(pix1[7] - pix2[7]);
1607 s += abs(pix1[8] - pix2[8]);
1608 s += abs(pix1[9] - pix2[9]);
1609 s += abs(pix1[10] - pix2[10]);
1610 s += abs(pix1[11] - pix2[11]);
1611 s += abs(pix1[12] - pix2[12]);
1612 s += abs(pix1[13] - pix2[13]);
1613 s += abs(pix1[14] - pix2[14]);
1614 s += abs(pix1[15] - pix2[15]);
1627 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1628 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1629 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1630 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1631 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1632 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1633 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1634 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1635 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1636 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1637 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1638 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1639 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1640 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1641 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1642 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1652 uint8_t *pix3 = pix2 + line_size;
1656 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1657 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1658 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1659 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1660 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1661 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1662 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1663 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1664 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1665 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1666 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1667 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1668 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1669 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1670 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1671 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1682 uint8_t *pix3 = pix2 + line_size;
1686 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1687 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1688 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1689 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1690 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1691 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1692 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1693 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1694 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1695 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1696 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1697 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1698 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1699 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1700 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1701 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1715 s += abs(pix1[0] - pix2[0]);
1716 s += abs(pix1[1] - pix2[1]);
1717 s += abs(pix1[2] - pix2[2]);
1718 s += abs(pix1[3] - pix2[3]);
1719 s += abs(pix1[4] - pix2[4]);
1720 s += abs(pix1[5] - pix2[5]);
1721 s += abs(pix1[6] - pix2[6]);
1722 s += abs(pix1[7] - pix2[7]);
1735 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1736 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1737 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1738 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1739 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1740 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1741 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1742 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1752 uint8_t *pix3 = pix2 + line_size;
1756 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1757 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1758 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1759 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1760 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1761 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1762 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1763 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1774 uint8_t *pix3 = pix2 + line_size;
1778 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1779 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1780 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1781 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1782 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1783 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1784 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1785 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1800 for(x=0; x<16; x++){
1801 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1804 for(x=0; x<15; x++){
1805 score2+=
FFABS( s1[x ] - s1[x +stride]
1806 - s1[x+1] + s1[x+1+stride])
1807 -
FFABS( s2[x ] - s2[x +stride]
1808 - s2[x+1] + s2[x+1+stride]);
1816 else return score1 +
FFABS(score2)*8;
1827 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1831 score2+=
FFABS( s1[x ] - s1[x +stride]
1832 - s1[x+1] + s1[x+1+stride])
1833 -
FFABS( s2[x ] - s2[x +stride]
1834 - s2[x+1] + s2[x+1+stride]);
1842 else return score1 +
FFABS(score2)*8;
1849 for(i=0; i<8*8; i++){
1855 sum += (w*
b)*(w*b)>>4;
1863 for(i=0; i<8*8; i++){
1875 memset(cmp, 0,
sizeof(
void*)*6);
1934 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1935 long a = *(
long*)(src+i);
1936 long b = *(
long*)(dst+i);
1940 dst[i+0] += src[i+0];
1945 #if !HAVE_FAST_UNALIGNED
1946 if((
long)src2 & (
sizeof(
long)-1)){
1947 for(i=0; i+7<w; i+=8){
1948 dst[i+0] = src1[i+0]-src2[i+0];
1949 dst[i+1] = src1[i+1]-src2[i+1];
1950 dst[i+2] = src1[i+2]-src2[i+2];
1951 dst[i+3] = src1[i+3]-src2[i+3];
1952 dst[i+4] = src1[i+4]-src2[i+4];
1953 dst[i+5] = src1[i+5]-src2[i+5];
1954 dst[i+6] = src1[i+6]-src2[i+6];
1955 dst[i+7] = src1[i+7]-src2[i+7];
1959 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1960 long a = *(
long*)(src1+i);
1961 long b = *(
long*)(src2+i);
1965 dst[i+0] = src1[i+0]-src2[i+0];
1976 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1993 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
2006 for(i=0; i<w-1; i++){
2063 #define BUTTERFLY2(o1,o2,i1,i2) \
2067 #define BUTTERFLY1(x,y) \
2076 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2087 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2088 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2089 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2090 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2132 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2133 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2134 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2135 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2166 sum -=
FFABS(temp[8*0] + temp[8*4]);
2184 const int s07 = SRC(0) + SRC(7);\
2185 const int s16 = SRC(1) + SRC(6);\
2186 const int s25 = SRC(2) + SRC(5);\
2187 const int s34 = SRC(3) + SRC(4);\
2188 const int a0 = s07 + s34;\
2189 const int a1 = s16 + s25;\
2190 const int a2 = s07 - s34;\
2191 const int a3 = s16 - s25;\
2192 const int d07 = SRC(0) - SRC(7);\
2193 const int d16 = SRC(1) - SRC(6);\
2194 const int d25 = SRC(2) - SRC(5);\
2195 const int d34 = SRC(3) - SRC(4);\
2196 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2197 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2198 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2199 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2201 DST(1, a4 + (a7>>2)) ;\
2202 DST(2, a2 + (a3>>1)) ;\
2203 DST(3, a5 + (a6>>2)) ;\
2205 DST(5, a6 - (a5>>2)) ;\
2206 DST(6, (a2>>1) - a3 ) ;\
2207 DST(7, (a4>>2) - a7 ) ;\
2218 #define SRC(x) dct[i][x]
2219 #define DST(x,v) dct[i][x]= v
2220 for( i = 0; i < 8; i++ )
2225 #define
SRC(x) dct[x][i]
2226 #define DST(x,v) sum += FFABS(v)
2227 for( i = 0; i < 8; i++ )
2254 int16_t *
const bak =
temp+64;
2262 memcpy(bak,
temp, 64*
sizeof(int16_t));
2269 sum+= (
temp[i]-bak[i])*(
temp[i]-bak[i]);
2309 for(i=start_i; i<last; i++){
2310 int j= scantable[i];
2315 if((level&(~127)) == 0){
2325 level=
temp[i] + 64;
2329 if((level&(~127)) == 0){
2345 distortion= s->
dsp.
sse[1](NULL, lsrc2, lsrc1, 8, 8);
2347 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2380 for(i=start_i; i<last; i++){
2381 int j= scantable[i];
2386 if((level&(~127)) == 0){
2396 level=
temp[i] + 64;
2400 if((level&(~127)) == 0){
2409 #define VSAD_INTRA(size) \
2410 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2414 for(y=1; y<h; y++){ \
2415 for(x=0; x<size; x+=4){ \
2416 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2417 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2432 for(x=0; x<16; x++){
2433 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2442 #define SQ(a) ((a)*(a))
2443 #define VSSE_INTRA(size) \
2444 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2448 for(y=1; y<h; y++){ \
2449 for(x=0; x<size; x+=4){ \
2450 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2451 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2466 for(x=0; x<16; x++){
2467 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2480 for(i=0; i<
size; i++)
2481 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2485 #define WRAPPER8_16_SQ(name8, name16)\
2486 static int name16(void *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2488 score +=name8(s, dst , src , stride, 8);\
2489 score +=name8(s, dst+8 , src+8 , stride, 8);\
2493 score +=name8(s, dst , src , stride, 8);\
2494 score +=name8(s, dst+8 , src+8 , stride, 8);\
2511 uint32_t maxi, uint32_t maxisign)
2514 if(a > mini)
return mini;
2515 else if((a^(1
U<<31)) > maxisign)
return maxi;
2521 uint32_t mini = *(uint32_t*)min;
2522 uint32_t maxi = *(uint32_t*)max;
2523 uint32_t maxisign = maxi ^ (1
U<<31);
2524 uint32_t *dsti = (uint32_t*)dst;
2525 const uint32_t *srci = (
const uint32_t*)src;
2526 for(i=0; i<
len; i+=8) {
2527 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2528 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2529 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2530 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2531 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2532 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2533 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2534 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2539 if(min < 0 && max > 0) {
2542 for(i=0; i <
len; i+=8) {
2543 dst[i ] = av_clipf(src[i ], min, max);
2544 dst[i + 1] = av_clipf(src[i + 1], min, max);
2545 dst[i + 2] = av_clipf(src[i + 2], min, max);
2546 dst[i + 3] = av_clipf(src[i + 3], min, max);
2547 dst[i + 4] = av_clipf(src[i + 4], min, max);
2548 dst[i + 5] = av_clipf(src[i + 5], min, max);
2549 dst[i + 6] = av_clipf(src[i + 6], min, max);
2550 dst[i + 7] = av_clipf(src[i + 7], min, max);
2560 res += *v1++ * *v2++;
2570 *v1++ += mul * *v3++;
2576 const int16_t *window,
unsigned int len)
2579 int len2 = len >> 1;
2581 for (i = 0; i < len2; i++) {
2582 int16_t w = window[i];
2583 output[i] = (
MUL16(input[i], w) + (1 << 14)) >> 15;
2584 output[len-i-1] = (
MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2592 *dst++ = av_clip(*src++, min, max);
2593 *dst++ = av_clip(*src++, min, max);
2594 *dst++ = av_clip(*src++, min, max);
2595 *dst++ = av_clip(*src++, min, max);
2596 *dst++ = av_clip(*src++, min, max);
2597 *dst++ = av_clip(*src++, min, max);
2598 *dst++ = av_clip(*src++, min, max);
2599 *dst++ = av_clip(*src++, min, max);
2639 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2643 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2651 for(i=0;i<512;i++) {
2659 static int did_fail=0;
2662 if((intptr_t)aligned & 15){
2664 #if HAVE_MMX || HAVE_ALTIVEC
2666 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2667 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2668 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2669 "Do not report crashes to FFmpeg developers.\n");
2700 #endif //CONFIG_ENCODERS
2707 }
else if(avctx->
lowres==2){
2712 }
else if(avctx->
lowres==3){
2786 #define dspfunc(PFX, IDX, NUM) \
2787 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2788 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2789 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2790 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2791 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2792 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2793 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2794 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2795 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2796 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2797 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2798 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2799 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2800 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2801 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2802 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2805 dspfunc(put_no_rnd_qpel, 0, 16);
2811 dspfunc(put_no_rnd_qpel, 1, 8);
2827 #define SET_CMP_FUNC(name) \
2828 c->name[0]= name ## 16_c;\
2829 c->name[1]= name ## 8x8_c;
2848 c->
vsad[4]= vsad_intra16_c;
2849 c->
vsad[5]= vsad_intra8_c;
2851 c->
vsse[4]= vsse_intra16_c;
2852 c->
vsse[5]= vsse_intra8_c;
2855 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
2870 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2893 #define FUNC(f, depth) f ## _ ## depth
2894 #define FUNCC(f, depth) f ## _ ## depth ## _c
2900 #define BIT_DEPTH_FUNCS(depth) \
2901 c->get_pixels = FUNCC(get_pixels, depth);