00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "libavutil/x86_cpu.h"
00027
00028 #define ALIGN_MASK "$-8"
00029
00030 #undef REAL_PAVGB
00031 #undef PAVGB
00032 #undef PMINUB
00033 #undef PMAXUB
00034
00035 #if HAVE_MMX2
00036 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00037 #elif HAVE_AMD3DNOW
00038 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00039 #endif
00040 #define PAVGB(a,b) REAL_PAVGB(a,b)
00041
00042 #if HAVE_MMX2
00043 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00044 #elif HAVE_MMX
00045 #define PMINUB(b,a,t) \
00046 "movq " #a ", " #t " \n\t"\
00047 "psubusb " #b ", " #t " \n\t"\
00048 "psubb " #t ", " #a " \n\t"
00049 #endif
00050
00051 #if HAVE_MMX2
00052 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00053 #elif HAVE_MMX
00054 #define PMAXUB(a,b) \
00055 "psubusb " #a ", " #b " \n\t"\
00056 "paddb " #a ", " #b " \n\t"
00057 #endif
00058
00059
00060 #if HAVE_MMX
00061
00064 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00065 int numEq= 0, dcOk;
00066 src+= stride*4;
00067 __asm__ volatile(
00068 "movq %0, %%mm7 \n\t"
00069 "movq %1, %%mm6 \n\t"
00070 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
00071 );
00072
00073 __asm__ volatile(
00074 "lea (%2, %3), %%"REG_a" \n\t"
00075
00076
00077
00078 "movq (%2), %%mm0 \n\t"
00079 "movq (%%"REG_a"), %%mm1 \n\t"
00080 "movq %%mm0, %%mm3 \n\t"
00081 "movq %%mm0, %%mm4 \n\t"
00082 PMAXUB(%%mm1, %%mm4)
00083 PMINUB(%%mm1, %%mm3, %%mm5)
00084 "psubb %%mm1, %%mm0 \n\t"
00085 "paddb %%mm7, %%mm0 \n\t"
00086 "pcmpgtb %%mm6, %%mm0 \n\t"
00087
00088 "movq (%%"REG_a",%3), %%mm2 \n\t"
00089 PMAXUB(%%mm2, %%mm4)
00090 PMINUB(%%mm2, %%mm3, %%mm5)
00091 "psubb %%mm2, %%mm1 \n\t"
00092 "paddb %%mm7, %%mm1 \n\t"
00093 "pcmpgtb %%mm6, %%mm1 \n\t"
00094 "paddb %%mm1, %%mm0 \n\t"
00095
00096 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00097 PMAXUB(%%mm1, %%mm4)
00098 PMINUB(%%mm1, %%mm3, %%mm5)
00099 "psubb %%mm1, %%mm2 \n\t"
00100 "paddb %%mm7, %%mm2 \n\t"
00101 "pcmpgtb %%mm6, %%mm2 \n\t"
00102 "paddb %%mm2, %%mm0 \n\t"
00103
00104 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
00105
00106 "movq (%2, %3, 4), %%mm2 \n\t"
00107 PMAXUB(%%mm2, %%mm4)
00108 PMINUB(%%mm2, %%mm3, %%mm5)
00109 "psubb %%mm2, %%mm1 \n\t"
00110 "paddb %%mm7, %%mm1 \n\t"
00111 "pcmpgtb %%mm6, %%mm1 \n\t"
00112 "paddb %%mm1, %%mm0 \n\t"
00113
00114 "movq (%%"REG_a"), %%mm1 \n\t"
00115 PMAXUB(%%mm1, %%mm4)
00116 PMINUB(%%mm1, %%mm3, %%mm5)
00117 "psubb %%mm1, %%mm2 \n\t"
00118 "paddb %%mm7, %%mm2 \n\t"
00119 "pcmpgtb %%mm6, %%mm2 \n\t"
00120 "paddb %%mm2, %%mm0 \n\t"
00121
00122 "movq (%%"REG_a", %3), %%mm2 \n\t"
00123 PMAXUB(%%mm2, %%mm4)
00124 PMINUB(%%mm2, %%mm3, %%mm5)
00125 "psubb %%mm2, %%mm1 \n\t"
00126 "paddb %%mm7, %%mm1 \n\t"
00127 "pcmpgtb %%mm6, %%mm1 \n\t"
00128 "paddb %%mm1, %%mm0 \n\t"
00129
00130 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00131 PMAXUB(%%mm1, %%mm4)
00132 PMINUB(%%mm1, %%mm3, %%mm5)
00133 "psubb %%mm1, %%mm2 \n\t"
00134 "paddb %%mm7, %%mm2 \n\t"
00135 "pcmpgtb %%mm6, %%mm2 \n\t"
00136 "paddb %%mm2, %%mm0 \n\t"
00137 "psubusb %%mm3, %%mm4 \n\t"
00138
00139 " \n\t"
00140 #if HAVE_MMX2
00141 "pxor %%mm7, %%mm7 \n\t"
00142 "psadbw %%mm7, %%mm0 \n\t"
00143 #else
00144 "movq %%mm0, %%mm1 \n\t"
00145 "psrlw $8, %%mm0 \n\t"
00146 "paddb %%mm1, %%mm0 \n\t"
00147 "movq %%mm0, %%mm1 \n\t"
00148 "psrlq $16, %%mm0 \n\t"
00149 "paddb %%mm1, %%mm0 \n\t"
00150 "movq %%mm0, %%mm1 \n\t"
00151 "psrlq $32, %%mm0 \n\t"
00152 "paddb %%mm1, %%mm0 \n\t"
00153 #endif
00154 "movq %4, %%mm7 \n\t"
00155 "paddusb %%mm7, %%mm7 \n\t"
00156 "psubusb %%mm7, %%mm4 \n\t"
00157 "packssdw %%mm4, %%mm4 \n\t"
00158 "movd %%mm0, %0 \n\t"
00159 "movd %%mm4, %1 \n\t"
00160
00161 : "=r" (numEq), "=r" (dcOk)
00162 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00163 : "%"REG_a
00164 );
00165
00166 numEq= (-numEq) &0xFF;
00167 if(numEq > c->ppMode.flatnessThreshold){
00168 if(dcOk) return 0;
00169 else return 1;
00170 }else{
00171 return 2;
00172 }
00173 }
00174 #endif //HAVE_MMX
00175
00180 #if !HAVE_ALTIVEC
00181 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00182 {
00183 #if HAVE_MMX2 || HAVE_AMD3DNOW
00184 src+= stride*3;
00185 __asm__ volatile(
00186 "movq %2, %%mm0 \n\t"
00187 "pxor %%mm4, %%mm4 \n\t"
00188
00189 "movq (%0), %%mm6 \n\t"
00190 "movq (%0, %1), %%mm5 \n\t"
00191 "movq %%mm5, %%mm1 \n\t"
00192 "movq %%mm6, %%mm2 \n\t"
00193 "psubusb %%mm6, %%mm5 \n\t"
00194 "psubusb %%mm1, %%mm2 \n\t"
00195 "por %%mm5, %%mm2 \n\t"
00196 "psubusb %%mm0, %%mm2 \n\t"
00197 "pcmpeqb %%mm4, %%mm2 \n\t"
00198
00199 "pand %%mm2, %%mm6 \n\t"
00200 "pandn %%mm1, %%mm2 \n\t"
00201 "por %%mm2, %%mm6 \n\t"
00202
00203 "movq (%0, %1, 8), %%mm5 \n\t"
00204 "lea (%0, %1, 4), %%"REG_a" \n\t"
00205 "lea (%0, %1, 8), %%"REG_c" \n\t"
00206 "sub %1, %%"REG_c" \n\t"
00207 "add %1, %0 \n\t"
00208 "movq (%0, %1, 8), %%mm7 \n\t"
00209 "movq %%mm5, %%mm1 \n\t"
00210 "movq %%mm7, %%mm2 \n\t"
00211 "psubusb %%mm7, %%mm5 \n\t"
00212 "psubusb %%mm1, %%mm2 \n\t"
00213 "por %%mm5, %%mm2 \n\t"
00214 "psubusb %%mm0, %%mm2 \n\t"
00215 "pcmpeqb %%mm4, %%mm2 \n\t"
00216
00217 "pand %%mm2, %%mm7 \n\t"
00218 "pandn %%mm1, %%mm2 \n\t"
00219 "por %%mm2, %%mm7 \n\t"
00220
00221
00222
00223
00224
00225
00226
00227
00228 "movq (%0, %1), %%mm0 \n\t"
00229 "movq %%mm0, %%mm1 \n\t"
00230 PAVGB(%%mm6, %%mm0)
00231 PAVGB(%%mm6, %%mm0)
00232
00233 "movq (%0, %1, 4), %%mm2 \n\t"
00234 "movq %%mm2, %%mm5 \n\t"
00235 PAVGB((%%REGa), %%mm2)
00236 PAVGB((%0, %1, 2), %%mm2)
00237 "movq %%mm2, %%mm3 \n\t"
00238 "movq (%0), %%mm4 \n\t"
00239 PAVGB(%%mm4, %%mm3)
00240 PAVGB(%%mm0, %%mm3)
00241 "movq %%mm3, (%0) \n\t"
00242
00243 "movq %%mm1, %%mm0 \n\t"
00244 PAVGB(%%mm6, %%mm0)
00245 "movq %%mm4, %%mm3 \n\t"
00246 PAVGB((%0,%1,2), %%mm3)
00247 PAVGB((%%REGa,%1,2), %%mm5)
00248 PAVGB((%%REGa), %%mm5)
00249 PAVGB(%%mm5, %%mm3)
00250 PAVGB(%%mm0, %%mm3)
00251 "movq %%mm3, (%0,%1) \n\t"
00252
00253 PAVGB(%%mm4, %%mm6)
00254 "movq (%%"REG_c"), %%mm0 \n\t"
00255 PAVGB((%%REGa, %1, 2), %%mm0)
00256 "movq %%mm0, %%mm3 \n\t"
00257 PAVGB(%%mm1, %%mm0)
00258 PAVGB(%%mm6, %%mm0)
00259 PAVGB(%%mm2, %%mm0)
00260 "movq (%0, %1, 2), %%mm2 \n\t"
00261 "movq %%mm0, (%0, %1, 2) \n\t"
00262
00263 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00264 PAVGB((%%REGc), %%mm0)
00265 PAVGB(%%mm0, %%mm6)
00266 PAVGB(%%mm1, %%mm4)
00267 PAVGB(%%mm2, %%mm1)
00268 PAVGB(%%mm1, %%mm6)
00269 PAVGB(%%mm5, %%mm6)
00270 "movq (%%"REG_a"), %%mm5 \n\t"
00271 "movq %%mm6, (%%"REG_a") \n\t"
00272
00273 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00274 PAVGB(%%mm7, %%mm6)
00275 PAVGB(%%mm4, %%mm6)
00276 PAVGB(%%mm3, %%mm6)
00277 PAVGB(%%mm5, %%mm2)
00278 "movq (%0, %1, 4), %%mm4 \n\t"
00279 PAVGB(%%mm4, %%mm2)
00280 PAVGB(%%mm2, %%mm6)
00281 "movq %%mm6, (%0, %1, 4) \n\t"
00282
00283 PAVGB(%%mm7, %%mm1)
00284 PAVGB(%%mm4, %%mm5)
00285 PAVGB(%%mm5, %%mm0)
00286 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00287 PAVGB(%%mm6, %%mm1)
00288 PAVGB(%%mm0, %%mm1)
00289 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
00290
00291 PAVGB((%%REGc), %%mm2)
00292 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00293 PAVGB(%%mm0, %%mm6)
00294 PAVGB(%%mm7, %%mm6)
00295 PAVGB(%%mm2, %%mm6)
00296 "movq %%mm6, (%%"REG_c") \n\t"
00297
00298 PAVGB(%%mm7, %%mm5)
00299 PAVGB(%%mm7, %%mm5)
00300
00301 PAVGB(%%mm3, %%mm0)
00302 PAVGB(%%mm0, %%mm5)
00303 "movq %%mm5, (%%"REG_a", %1, 4) \n\t"
00304 "sub %1, %0 \n\t"
00305
00306 :
00307 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00308 : "%"REG_a, "%"REG_c
00309 );
00310 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00311 const int l1= stride;
00312 const int l2= stride + l1;
00313 const int l3= stride + l2;
00314 const int l4= stride + l3;
00315 const int l5= stride + l4;
00316 const int l6= stride + l5;
00317 const int l7= stride + l6;
00318 const int l8= stride + l7;
00319 const int l9= stride + l8;
00320 int x;
00321 src+= stride*3;
00322 for(x=0; x<BLOCK_SIZE; x++){
00323 const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00324 const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00325
00326 int sums[10];
00327 sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00328 sums[1] = sums[0] - first + src[l4];
00329 sums[2] = sums[1] - first + src[l5];
00330 sums[3] = sums[2] - first + src[l6];
00331 sums[4] = sums[3] - first + src[l7];
00332 sums[5] = sums[4] - src[l1] + src[l8];
00333 sums[6] = sums[5] - src[l2] + last;
00334 sums[7] = sums[6] - src[l3] + last;
00335 sums[8] = sums[7] - src[l4] + last;
00336 sums[9] = sums[8] - src[l5] + last;
00337
00338 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00339 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00340 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00341 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00342 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00343 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00344 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00345 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00346
00347 src++;
00348 }
00349 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00350 }
00351 #endif //HAVE_ALTIVEC
00352
00360 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00361 {
00362 #if HAVE_MMX2 || HAVE_AMD3DNOW
00363 src+= stride*3;
00364
00365 __asm__ volatile(
00366 "pxor %%mm7, %%mm7 \n\t"
00367 "lea (%0, %1), %%"REG_a" \n\t"
00368 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00369
00370
00371 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00372 "movq (%0, %1, 4), %%mm1 \n\t"
00373 "movq %%mm1, %%mm2 \n\t"
00374 "psubusb %%mm0, %%mm1 \n\t"
00375 "psubusb %%mm2, %%mm0 \n\t"
00376 "por %%mm1, %%mm0 \n\t"
00377 "movq (%%"REG_c"), %%mm3 \n\t"
00378 "movq (%%"REG_c", %1), %%mm4 \n\t"
00379 "movq %%mm3, %%mm5 \n\t"
00380 "psubusb %%mm4, %%mm3 \n\t"
00381 "psubusb %%mm5, %%mm4 \n\t"
00382 "por %%mm4, %%mm3 \n\t"
00383 PAVGB(%%mm3, %%mm0)
00384 "movq %%mm2, %%mm1 \n\t"
00385 "psubusb %%mm5, %%mm2 \n\t"
00386 "movq %%mm2, %%mm4 \n\t"
00387 "pcmpeqb %%mm7, %%mm2 \n\t"
00388 "psubusb %%mm1, %%mm5 \n\t"
00389 "por %%mm5, %%mm4 \n\t"
00390 "psubusb %%mm0, %%mm4 \n\t"
00391 "movq %%mm4, %%mm3 \n\t"
00392 "movq %2, %%mm0 \n\t"
00393 "paddusb %%mm0, %%mm0 \n\t"
00394 "psubusb %%mm0, %%mm4 \n\t"
00395 "pcmpeqb %%mm7, %%mm4 \n\t"
00396 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00397 "pand %%mm4, %%mm3 \n\t"
00398
00399 PAVGB(%%mm7, %%mm3)
00400 "movq %%mm3, %%mm1 \n\t"
00401 PAVGB(%%mm7, %%mm3)
00402 PAVGB(%%mm1, %%mm3)
00403
00404 "movq (%0, %1, 4), %%mm0 \n\t"
00405 "pxor %%mm2, %%mm0 \n\t"
00406 "psubusb %%mm3, %%mm0 \n\t"
00407 "pxor %%mm2, %%mm0 \n\t"
00408 "movq %%mm0, (%0, %1, 4) \n\t"
00409
00410 "movq (%%"REG_c"), %%mm0 \n\t"
00411 "pxor %%mm2, %%mm0 \n\t"
00412 "paddusb %%mm3, %%mm0 \n\t"
00413 "pxor %%mm2, %%mm0 \n\t"
00414 "movq %%mm0, (%%"REG_c") \n\t"
00415
00416 PAVGB(%%mm7, %%mm1)
00417
00418 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00419 "pxor %%mm2, %%mm0 \n\t"
00420 "psubusb %%mm1, %%mm0 \n\t"
00421 "pxor %%mm2, %%mm0 \n\t"
00422 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00423
00424 "movq (%%"REG_c", %1), %%mm0 \n\t"
00425 "pxor %%mm2, %%mm0 \n\t"
00426 "paddusb %%mm1, %%mm0 \n\t"
00427 "pxor %%mm2, %%mm0 \n\t"
00428 "movq %%mm0, (%%"REG_c", %1) \n\t"
00429
00430 PAVGB(%%mm7, %%mm1)
00431
00432 "movq (%%"REG_a", %1), %%mm0 \n\t"
00433 "pxor %%mm2, %%mm0 \n\t"
00434 "psubusb %%mm1, %%mm0 \n\t"
00435 "pxor %%mm2, %%mm0 \n\t"
00436 "movq %%mm0, (%%"REG_a", %1) \n\t"
00437
00438 "movq (%%"REG_c", %1, 2), %%mm0 \n\t"
00439 "pxor %%mm2, %%mm0 \n\t"
00440 "paddusb %%mm1, %%mm0 \n\t"
00441 "pxor %%mm2, %%mm0 \n\t"
00442 "movq %%mm0, (%%"REG_c", %1, 2) \n\t"
00443
00444 :
00445 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
00446 : "%"REG_a, "%"REG_c
00447 );
00448 #else //HAVE_MMX2 || HAVE_AMD3DNOW
00449
00450 const int l1= stride;
00451 const int l2= stride + l1;
00452 const int l3= stride + l2;
00453 const int l4= stride + l3;
00454 const int l5= stride + l4;
00455 const int l6= stride + l5;
00456 const int l7= stride + l6;
00457
00458
00459 int x;
00460
00461 src+= stride*3;
00462 for(x=0; x<BLOCK_SIZE; x++){
00463 int a= src[l3] - src[l4];
00464 int b= src[l4] - src[l5];
00465 int c= src[l5] - src[l6];
00466
00467 int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
00468 d= FFMAX(d, 0);
00469
00470 if(d < co->QP*2){
00471 int v = d * FFSIGN(-b);
00472
00473 src[l2] +=v>>3;
00474 src[l3] +=v>>2;
00475 src[l4] +=(3*v)>>3;
00476 src[l5] -=(3*v)>>3;
00477 src[l6] -=v>>2;
00478 src[l7] -=v>>3;
00479 }
00480 src++;
00481 }
00482 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
00483 }
00484
00485 #if !HAVE_ALTIVEC
00486 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00487 {
00488 #if HAVE_MMX2 || HAVE_AMD3DNOW
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503 src+= stride*4;
00504 __asm__ volatile(
00505
00506 #if 0 //slightly more accurate and slightly slower
00507 "pxor %%mm7, %%mm7 \n\t"
00508 "lea (%0, %1), %%"REG_a" \n\t"
00509 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00510
00511
00512
00513
00514
00515 "movq (%0, %1, 2), %%mm0 \n\t"
00516 "movq (%0), %%mm1 \n\t"
00517 "movq %%mm0, %%mm2 \n\t"
00518 PAVGB(%%mm7, %%mm0)
00519 PAVGB(%%mm1, %%mm0)
00520 PAVGB(%%mm2, %%mm0)
00521
00522 "movq (%%"REG_a"), %%mm1 \n\t"
00523 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
00524 "movq %%mm1, %%mm4 \n\t"
00525 PAVGB(%%mm7, %%mm1)
00526 PAVGB(%%mm3, %%mm1)
00527 PAVGB(%%mm4, %%mm1)
00528
00529 "movq %%mm0, %%mm4 \n\t"
00530 "psubusb %%mm1, %%mm0 \n\t"
00531 "psubusb %%mm4, %%mm1 \n\t"
00532 "por %%mm0, %%mm1 \n\t"
00533
00534
00535 "movq (%0, %1, 4), %%mm0 \n\t"
00536 "movq %%mm0, %%mm4 \n\t"
00537 PAVGB(%%mm7, %%mm0)
00538 PAVGB(%%mm2, %%mm0)
00539 PAVGB(%%mm4, %%mm0)
00540
00541 "movq (%%"REG_c"), %%mm2 \n\t"
00542 "movq %%mm3, %%mm5 \n\t"
00543 PAVGB(%%mm7, %%mm3)
00544 PAVGB(%%mm2, %%mm3)
00545 PAVGB(%%mm5, %%mm3)
00546
00547 "movq %%mm0, %%mm6 \n\t"
00548 "psubusb %%mm3, %%mm0 \n\t"
00549 "psubusb %%mm6, %%mm3 \n\t"
00550 "por %%mm0, %%mm3 \n\t"
00551 "pcmpeqb %%mm7, %%mm0 \n\t"
00552
00553
00554 "movq (%%"REG_c", %1), %%mm6 \n\t"
00555 "movq %%mm6, %%mm5 \n\t"
00556 PAVGB(%%mm7, %%mm6)
00557 PAVGB(%%mm4, %%mm6)
00558 PAVGB(%%mm5, %%mm6)
00559
00560 "movq (%%"REG_c", %1, 2), %%mm5 \n\t"
00561 "movq %%mm2, %%mm4 \n\t"
00562 PAVGB(%%mm7, %%mm2)
00563 PAVGB(%%mm5, %%mm2)
00564 PAVGB(%%mm4, %%mm2)
00565
00566 "movq %%mm6, %%mm4 \n\t"
00567 "psubusb %%mm2, %%mm6 \n\t"
00568 "psubusb %%mm4, %%mm2 \n\t"
00569 "por %%mm6, %%mm2 \n\t"
00570
00571
00572
00573 PMINUB(%%mm2, %%mm1, %%mm4)
00574 "movq %2, %%mm4 \n\t"
00575 "paddusb "MANGLE(b01)", %%mm4 \n\t"
00576 "pcmpgtb %%mm3, %%mm4 \n\t"
00577 "psubusb %%mm1, %%mm3 \n\t"
00578 "pand %%mm4, %%mm3 \n\t"
00579
00580 "movq %%mm3, %%mm1 \n\t"
00581
00582 PAVGB(%%mm7, %%mm3)
00583 PAVGB(%%mm7, %%mm3)
00584 "paddusb %%mm1, %%mm3 \n\t"
00585
00586
00587 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00588 "movq (%0, %1, 4), %%mm5 \n\t"
00589 "movq (%0, %1, 4), %%mm4 \n\t"
00590 "psubusb %%mm6, %%mm5 \n\t"
00591 "psubusb %%mm4, %%mm6 \n\t"
00592 "por %%mm6, %%mm5 \n\t"
00593 "pcmpeqb %%mm7, %%mm6 \n\t"
00594 "pxor %%mm6, %%mm0 \n\t"
00595 "pand %%mm0, %%mm3 \n\t"
00596 PMINUB(%%mm5, %%mm3, %%mm0)
00597
00598 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00599 PAVGB(%%mm7, %%mm3)
00600
00601 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00602 "movq (%0, %1, 4), %%mm2 \n\t"
00603 "pxor %%mm6, %%mm0 \n\t"
00604 "pxor %%mm6, %%mm2 \n\t"
00605 "psubb %%mm3, %%mm0 \n\t"
00606 "paddb %%mm3, %%mm2 \n\t"
00607 "pxor %%mm6, %%mm0 \n\t"
00608 "pxor %%mm6, %%mm2 \n\t"
00609 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00610 "movq %%mm2, (%0, %1, 4) \n\t"
00611 #endif //0
00612
00613 "lea (%0, %1), %%"REG_a" \n\t"
00614 "pcmpeqb %%mm6, %%mm6 \n\t"
00615
00616
00617
00618
00619
00620 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
00621 "movq (%0, %1, 4), %%mm0 \n\t"
00622 "pxor %%mm6, %%mm1 \n\t"
00623 PAVGB(%%mm1, %%mm0)
00624
00625
00626 "movq (%%"REG_a", %1, 4), %%mm2 \n\t"
00627 "movq (%%"REG_a", %1), %%mm3 \n\t"
00628 "pxor %%mm6, %%mm2 \n\t"
00629 "movq %%mm2, %%mm5 \n\t"
00630 "movq "MANGLE(b80)", %%mm4 \n\t"
00631 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00632 PAVGB(%%mm3, %%mm2)
00633 PAVGB(%%mm0, %%mm4)
00634 PAVGB(%%mm2, %%mm4)
00635 PAVGB(%%mm0, %%mm4)
00636
00637
00638 "movq (%%"REG_a"), %%mm2 \n\t"
00639 "pxor %%mm6, %%mm2 \n\t"
00640 PAVGB(%%mm3, %%mm2)
00641 PAVGB((%0), %%mm1)
00642 "movq "MANGLE(b80)", %%mm3 \n\t"
00643 PAVGB(%%mm2, %%mm3)
00644 PAVGB(%%mm1, %%mm3)
00645 PAVGB(%%mm2, %%mm3)
00646
00647
00648 PAVGB((%%REGc, %1), %%mm5)
00649 "movq (%%"REG_c", %1, 2), %%mm1 \n\t"
00650 "pxor %%mm6, %%mm1 \n\t"
00651 PAVGB((%0, %1, 4), %%mm1)
00652 "movq "MANGLE(b80)", %%mm2 \n\t"
00653 PAVGB(%%mm5, %%mm2)
00654 PAVGB(%%mm1, %%mm2)
00655 PAVGB(%%mm5, %%mm2)
00656
00657
00658 "movq "MANGLE(b00)", %%mm1 \n\t"
00659 "movq "MANGLE(b00)", %%mm5 \n\t"
00660 "psubb %%mm2, %%mm1 \n\t"
00661 "psubb %%mm3, %%mm5 \n\t"
00662 PMAXUB(%%mm1, %%mm2)
00663 PMAXUB(%%mm5, %%mm3)
00664 PMINUB(%%mm2, %%mm3, %%mm1)
00665
00666
00667
00668 "movq "MANGLE(b00)", %%mm7 \n\t"
00669 "movq %2, %%mm2 \n\t"
00670 PAVGB(%%mm6, %%mm2)
00671 "psubb %%mm6, %%mm2 \n\t"
00672
00673 "movq %%mm4, %%mm1 \n\t"
00674 "pcmpgtb %%mm7, %%mm1 \n\t"
00675 "pxor %%mm1, %%mm4 \n\t"
00676 "psubb %%mm1, %%mm4 \n\t"
00677 "pcmpgtb %%mm4, %%mm2 \n\t"
00678 "psubusb %%mm3, %%mm4 \n\t"
00679
00680
00681 "movq %%mm4, %%mm3 \n\t"
00682 "psubusb "MANGLE(b01)", %%mm4 \n\t"
00683 PAVGB(%%mm7, %%mm4)
00684 PAVGB(%%mm7, %%mm4)
00685 "paddb %%mm3, %%mm4 \n\t"
00686 "pand %%mm2, %%mm4 \n\t"
00687
00688 "movq "MANGLE(b80)", %%mm5 \n\t"
00689 "psubb %%mm0, %%mm5 \n\t"
00690 "paddsb %%mm6, %%mm5 \n\t"
00691 "pcmpgtb %%mm5, %%mm7 \n\t"
00692 "pxor %%mm7, %%mm5 \n\t"
00693
00694 PMINUB(%%mm5, %%mm4, %%mm3)
00695 "pxor %%mm1, %%mm7 \n\t"
00696
00697 "pand %%mm7, %%mm4 \n\t"
00698 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00699 "movq (%0, %1, 4), %%mm2 \n\t"
00700 "pxor %%mm1, %%mm0 \n\t"
00701 "pxor %%mm1, %%mm2 \n\t"
00702 "paddb %%mm4, %%mm0 \n\t"
00703 "psubb %%mm4, %%mm2 \n\t"
00704 "pxor %%mm1, %%mm0 \n\t"
00705 "pxor %%mm1, %%mm2 \n\t"
00706 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00707 "movq %%mm2, (%0, %1, 4) \n\t"
00708
00709 :
00710 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
00711 : "%"REG_a, "%"REG_c
00712 );
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769 #elif HAVE_MMX
00770 src+= stride*4;
00771 __asm__ volatile(
00772 "pxor %%mm7, %%mm7 \n\t"
00773 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
00774 "and "ALIGN_MASK", %%"REG_c" \n\t"
00775
00776
00777
00778
00779 "movq (%0), %%mm0 \n\t"
00780 "movq %%mm0, %%mm1 \n\t"
00781 "punpcklbw %%mm7, %%mm0 \n\t"
00782 "punpckhbw %%mm7, %%mm1 \n\t"
00783
00784 "movq (%0, %1), %%mm2 \n\t"
00785 "lea (%0, %1, 2), %%"REG_a" \n\t"
00786 "movq %%mm2, %%mm3 \n\t"
00787 "punpcklbw %%mm7, %%mm2 \n\t"
00788 "punpckhbw %%mm7, %%mm3 \n\t"
00789
00790 "movq (%%"REG_a"), %%mm4 \n\t"
00791 "movq %%mm4, %%mm5 \n\t"
00792 "punpcklbw %%mm7, %%mm4 \n\t"
00793 "punpckhbw %%mm7, %%mm5 \n\t"
00794
00795 "paddw %%mm0, %%mm0 \n\t"
00796 "paddw %%mm1, %%mm1 \n\t"
00797 "psubw %%mm4, %%mm2 \n\t"
00798 "psubw %%mm5, %%mm3 \n\t"
00799 "psubw %%mm2, %%mm0 \n\t"
00800 "psubw %%mm3, %%mm1 \n\t"
00801
00802 "psllw $2, %%mm2 \n\t"
00803 "psllw $2, %%mm3 \n\t"
00804 "psubw %%mm2, %%mm0 \n\t"
00805 "psubw %%mm3, %%mm1 \n\t"
00806
00807 "movq (%%"REG_a", %1), %%mm2 \n\t"
00808 "movq %%mm2, %%mm3 \n\t"
00809 "punpcklbw %%mm7, %%mm2 \n\t"
00810 "punpckhbw %%mm7, %%mm3 \n\t"
00811
00812 "psubw %%mm2, %%mm0 \n\t"
00813 "psubw %%mm3, %%mm1 \n\t"
00814 "psubw %%mm2, %%mm0 \n\t"
00815 "psubw %%mm3, %%mm1 \n\t"
00816 "movq %%mm0, (%%"REG_c") \n\t"
00817 "movq %%mm1, 8(%%"REG_c") \n\t"
00818
00819 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00820 "movq %%mm0, %%mm1 \n\t"
00821 "punpcklbw %%mm7, %%mm0 \n\t"
00822 "punpckhbw %%mm7, %%mm1 \n\t"
00823
00824 "psubw %%mm0, %%mm2 \n\t"
00825 "psubw %%mm1, %%mm3 \n\t"
00826 "movq %%mm2, 16(%%"REG_c") \n\t"
00827 "movq %%mm3, 24(%%"REG_c") \n\t"
00828 "paddw %%mm4, %%mm4 \n\t"
00829 "paddw %%mm5, %%mm5 \n\t"
00830 "psubw %%mm2, %%mm4 \n\t"
00831 "psubw %%mm3, %%mm5 \n\t"
00832
00833 "lea (%%"REG_a", %1), %0 \n\t"
00834 "psllw $2, %%mm2 \n\t"
00835 "psllw $2, %%mm3 \n\t"
00836 "psubw %%mm2, %%mm4 \n\t"
00837 "psubw %%mm3, %%mm5 \n\t"
00838
00839 "movq (%0, %1, 2), %%mm2 \n\t"
00840 "movq %%mm2, %%mm3 \n\t"
00841 "punpcklbw %%mm7, %%mm2 \n\t"
00842 "punpckhbw %%mm7, %%mm3 \n\t"
00843 "psubw %%mm2, %%mm4 \n\t"
00844 "psubw %%mm3, %%mm5 \n\t"
00845 "psubw %%mm2, %%mm4 \n\t"
00846 "psubw %%mm3, %%mm5 \n\t"
00847
00848 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00849 "punpcklbw %%mm7, %%mm6 \n\t"
00850 "psubw %%mm6, %%mm2 \n\t"
00851 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00852 "punpckhbw %%mm7, %%mm6 \n\t"
00853 "psubw %%mm6, %%mm3 \n\t"
00854
00855 "paddw %%mm0, %%mm0 \n\t"
00856 "paddw %%mm1, %%mm1 \n\t"
00857 "psubw %%mm2, %%mm0 \n\t"
00858 "psubw %%mm3, %%mm1 \n\t"
00859
00860 "psllw $2, %%mm2 \n\t"
00861 "psllw $2, %%mm3 \n\t"
00862 "psubw %%mm2, %%mm0 \n\t"
00863 "psubw %%mm3, %%mm1 \n\t"
00864
00865 "movq (%0, %1, 4), %%mm2 \n\t"
00866 "movq %%mm2, %%mm3 \n\t"
00867 "punpcklbw %%mm7, %%mm2 \n\t"
00868 "punpckhbw %%mm7, %%mm3 \n\t"
00869
00870 "paddw %%mm2, %%mm2 \n\t"
00871 "paddw %%mm3, %%mm3 \n\t"
00872 "psubw %%mm2, %%mm0 \n\t"
00873 "psubw %%mm3, %%mm1 \n\t"
00874
00875 "movq (%%"REG_c"), %%mm2 \n\t"
00876 "movq 8(%%"REG_c"), %%mm3 \n\t"
00877
00878 #if HAVE_MMX2
00879 "movq %%mm7, %%mm6 \n\t"
00880 "psubw %%mm0, %%mm6 \n\t"
00881 "pmaxsw %%mm6, %%mm0 \n\t"
00882 "movq %%mm7, %%mm6 \n\t"
00883 "psubw %%mm1, %%mm6 \n\t"
00884 "pmaxsw %%mm6, %%mm1 \n\t"
00885 "movq %%mm7, %%mm6 \n\t"
00886 "psubw %%mm2, %%mm6 \n\t"
00887 "pmaxsw %%mm6, %%mm2 \n\t"
00888 "movq %%mm7, %%mm6 \n\t"
00889 "psubw %%mm3, %%mm6 \n\t"
00890 "pmaxsw %%mm6, %%mm3 \n\t"
00891 #else
00892 "movq %%mm7, %%mm6 \n\t"
00893 "pcmpgtw %%mm0, %%mm6 \n\t"
00894 "pxor %%mm6, %%mm0 \n\t"
00895 "psubw %%mm6, %%mm0 \n\t"
00896 "movq %%mm7, %%mm6 \n\t"
00897 "pcmpgtw %%mm1, %%mm6 \n\t"
00898 "pxor %%mm6, %%mm1 \n\t"
00899 "psubw %%mm6, %%mm1 \n\t"
00900 "movq %%mm7, %%mm6 \n\t"
00901 "pcmpgtw %%mm2, %%mm6 \n\t"
00902 "pxor %%mm6, %%mm2 \n\t"
00903 "psubw %%mm6, %%mm2 \n\t"
00904 "movq %%mm7, %%mm6 \n\t"
00905 "pcmpgtw %%mm3, %%mm6 \n\t"
00906 "pxor %%mm6, %%mm3 \n\t"
00907 "psubw %%mm6, %%mm3 \n\t"
00908 #endif
00909
00910 #if HAVE_MMX2
00911 "pminsw %%mm2, %%mm0 \n\t"
00912 "pminsw %%mm3, %%mm1 \n\t"
00913 #else
00914 "movq %%mm0, %%mm6 \n\t"
00915 "psubusw %%mm2, %%mm6 \n\t"
00916 "psubw %%mm6, %%mm0 \n\t"
00917 "movq %%mm1, %%mm6 \n\t"
00918 "psubusw %%mm3, %%mm6 \n\t"
00919 "psubw %%mm6, %%mm1 \n\t"
00920 #endif
00921
00922 "movd %2, %%mm2 \n\t"
00923 "punpcklbw %%mm7, %%mm2 \n\t"
00924
00925 "movq %%mm7, %%mm6 \n\t"
00926 "pcmpgtw %%mm4, %%mm6 \n\t"
00927 "pxor %%mm6, %%mm4 \n\t"
00928 "psubw %%mm6, %%mm4 \n\t"
00929 "pcmpgtw %%mm5, %%mm7 \n\t"
00930 "pxor %%mm7, %%mm5 \n\t"
00931 "psubw %%mm7, %%mm5 \n\t"
00932
00933 "psllw $3, %%mm2 \n\t"
00934 "movq %%mm2, %%mm3 \n\t"
00935 "pcmpgtw %%mm4, %%mm2 \n\t"
00936 "pcmpgtw %%mm5, %%mm3 \n\t"
00937 "pand %%mm2, %%mm4 \n\t"
00938 "pand %%mm3, %%mm5 \n\t"
00939
00940
00941 "psubusw %%mm0, %%mm4 \n\t"
00942 "psubusw %%mm1, %%mm5 \n\t"
00943
00944
00945 "movq "MANGLE(w05)", %%mm2 \n\t"
00946 "pmullw %%mm2, %%mm4 \n\t"
00947 "pmullw %%mm2, %%mm5 \n\t"
00948 "movq "MANGLE(w20)", %%mm2 \n\t"
00949 "paddw %%mm2, %%mm4 \n\t"
00950 "paddw %%mm2, %%mm5 \n\t"
00951 "psrlw $6, %%mm4 \n\t"
00952 "psrlw $6, %%mm5 \n\t"
00953
00954 "movq 16(%%"REG_c"), %%mm0 \n\t"
00955 "movq 24(%%"REG_c"), %%mm1 \n\t"
00956
00957 "pxor %%mm2, %%mm2 \n\t"
00958 "pxor %%mm3, %%mm3 \n\t"
00959
00960 "pcmpgtw %%mm0, %%mm2 \n\t"
00961 "pcmpgtw %%mm1, %%mm3 \n\t"
00962 "pxor %%mm2, %%mm0 \n\t"
00963 "pxor %%mm3, %%mm1 \n\t"
00964 "psubw %%mm2, %%mm0 \n\t"
00965 "psubw %%mm3, %%mm1 \n\t"
00966 "psrlw $1, %%mm0 \n\t"
00967 "psrlw $1, %%mm1 \n\t"
00968
00969 "pxor %%mm6, %%mm2 \n\t"
00970 "pxor %%mm7, %%mm3 \n\t"
00971 "pand %%mm2, %%mm4 \n\t"
00972 "pand %%mm3, %%mm5 \n\t"
00973
00974 #if HAVE_MMX2
00975 "pminsw %%mm0, %%mm4 \n\t"
00976 "pminsw %%mm1, %%mm5 \n\t"
00977 #else
00978 "movq %%mm4, %%mm2 \n\t"
00979 "psubusw %%mm0, %%mm2 \n\t"
00980 "psubw %%mm2, %%mm4 \n\t"
00981 "movq %%mm5, %%mm2 \n\t"
00982 "psubusw %%mm1, %%mm2 \n\t"
00983 "psubw %%mm2, %%mm5 \n\t"
00984 #endif
00985 "pxor %%mm6, %%mm4 \n\t"
00986 "pxor %%mm7, %%mm5 \n\t"
00987 "psubw %%mm6, %%mm4 \n\t"
00988 "psubw %%mm7, %%mm5 \n\t"
00989 "packsswb %%mm5, %%mm4 \n\t"
00990 "movq (%0), %%mm0 \n\t"
00991 "paddb %%mm4, %%mm0 \n\t"
00992 "movq %%mm0, (%0) \n\t"
00993 "movq (%0, %1), %%mm0 \n\t"
00994 "psubb %%mm4, %%mm0 \n\t"
00995 "movq %%mm0, (%0, %1) \n\t"
00996
00997 : "+r" (src)
00998 : "r" ((x86_reg)stride), "m" (c->pQPb)
00999 : "%"REG_a, "%"REG_c
01000 );
01001 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01002 const int l1= stride;
01003 const int l2= stride + l1;
01004 const int l3= stride + l2;
01005 const int l4= stride + l3;
01006 const int l5= stride + l4;
01007 const int l6= stride + l5;
01008 const int l7= stride + l6;
01009 const int l8= stride + l7;
01010
01011 int x;
01012 src+= stride*3;
01013 for(x=0; x<BLOCK_SIZE; x++){
01014 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01015 if(FFABS(middleEnergy) < 8*c->QP){
01016 const int q=(src[l4] - src[l5])/2;
01017 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01018 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01019
01020 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
01021 d= FFMAX(d, 0);
01022
01023 d= (5*d + 32) >> 6;
01024 d*= FFSIGN(-middleEnergy);
01025
01026 if(q>0){
01027 d= d<0 ? 0 : d;
01028 d= d>q ? q : d;
01029 }else{
01030 d= d>0 ? 0 : d;
01031 d= d<q ? q : d;
01032 }
01033
01034 src[l4]-= d;
01035 src[l5]+= d;
01036 }
01037 src++;
01038 }
01039 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01040 }
01041 #endif //HAVE_ALTIVEC
01042
01043 #if !HAVE_ALTIVEC
01044 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01045 {
01046 #if HAVE_MMX2 || HAVE_AMD3DNOW
01047 __asm__ volatile(
01048 "pxor %%mm6, %%mm6 \n\t"
01049 "pcmpeqb %%mm7, %%mm7 \n\t"
01050 "movq %2, %%mm0 \n\t"
01051 "punpcklbw %%mm6, %%mm0 \n\t"
01052 "psrlw $1, %%mm0 \n\t"
01053 "psubw %%mm7, %%mm0 \n\t"
01054 "packuswb %%mm0, %%mm0 \n\t"
01055 "movq %%mm0, %3 \n\t"
01056
01057 "lea (%0, %1), %%"REG_a" \n\t"
01058 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01059
01060
01061
01062
01063 #undef REAL_FIND_MIN_MAX
01064 #undef FIND_MIN_MAX
01065 #if HAVE_MMX2
01066 #define REAL_FIND_MIN_MAX(addr)\
01067 "movq " #addr ", %%mm0 \n\t"\
01068 "pminub %%mm0, %%mm7 \n\t"\
01069 "pmaxub %%mm0, %%mm6 \n\t"
01070 #else
01071 #define REAL_FIND_MIN_MAX(addr)\
01072 "movq " #addr ", %%mm0 \n\t"\
01073 "movq %%mm7, %%mm1 \n\t"\
01074 "psubusb %%mm0, %%mm6 \n\t"\
01075 "paddb %%mm0, %%mm6 \n\t"\
01076 "psubusb %%mm0, %%mm1 \n\t"\
01077 "psubb %%mm1, %%mm7 \n\t"
01078 #endif
01079 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
01080
01081 FIND_MIN_MAX((%%REGa))
01082 FIND_MIN_MAX((%%REGa, %1))
01083 FIND_MIN_MAX((%%REGa, %1, 2))
01084 FIND_MIN_MAX((%0, %1, 4))
01085 FIND_MIN_MAX((%%REGd))
01086 FIND_MIN_MAX((%%REGd, %1))
01087 FIND_MIN_MAX((%%REGd, %1, 2))
01088 FIND_MIN_MAX((%0, %1, 8))
01089
01090 "movq %%mm7, %%mm4 \n\t"
01091 "psrlq $8, %%mm7 \n\t"
01092 #if HAVE_MMX2
01093 "pminub %%mm4, %%mm7 \n\t"
01094 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
01095 "pminub %%mm4, %%mm7 \n\t"
01096 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
01097 "pminub %%mm4, %%mm7 \n\t"
01098 #else
01099 "movq %%mm7, %%mm1 \n\t"
01100 "psubusb %%mm4, %%mm1 \n\t"
01101 "psubb %%mm1, %%mm7 \n\t"
01102 "movq %%mm7, %%mm4 \n\t"
01103 "psrlq $16, %%mm7 \n\t"
01104 "movq %%mm7, %%mm1 \n\t"
01105 "psubusb %%mm4, %%mm1 \n\t"
01106 "psubb %%mm1, %%mm7 \n\t"
01107 "movq %%mm7, %%mm4 \n\t"
01108 "psrlq $32, %%mm7 \n\t"
01109 "movq %%mm7, %%mm1 \n\t"
01110 "psubusb %%mm4, %%mm1 \n\t"
01111 "psubb %%mm1, %%mm7 \n\t"
01112 #endif
01113
01114
01115 "movq %%mm6, %%mm4 \n\t"
01116 "psrlq $8, %%mm6 \n\t"
01117 #if HAVE_MMX2
01118 "pmaxub %%mm4, %%mm6 \n\t"
01119 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
01120 "pmaxub %%mm4, %%mm6 \n\t"
01121 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
01122 "pmaxub %%mm4, %%mm6 \n\t"
01123 #else
01124 "psubusb %%mm4, %%mm6 \n\t"
01125 "paddb %%mm4, %%mm6 \n\t"
01126 "movq %%mm6, %%mm4 \n\t"
01127 "psrlq $16, %%mm6 \n\t"
01128 "psubusb %%mm4, %%mm6 \n\t"
01129 "paddb %%mm4, %%mm6 \n\t"
01130 "movq %%mm6, %%mm4 \n\t"
01131 "psrlq $32, %%mm6 \n\t"
01132 "psubusb %%mm4, %%mm6 \n\t"
01133 "paddb %%mm4, %%mm6 \n\t"
01134 #endif
01135 "movq %%mm6, %%mm0 \n\t"
01136 "psubb %%mm7, %%mm6 \n\t"
01137 "movd %%mm6, %%ecx \n\t"
01138 "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
01139 " jb 1f \n\t"
01140 "lea -24(%%"REG_SP"), %%"REG_c" \n\t"
01141 "and "ALIGN_MASK", %%"REG_c" \n\t"
01142 PAVGB(%%mm0, %%mm7)
01143 "punpcklbw %%mm7, %%mm7 \n\t"
01144 "punpcklbw %%mm7, %%mm7 \n\t"
01145 "punpcklbw %%mm7, %%mm7 \n\t"
01146 "movq %%mm7, (%%"REG_c") \n\t"
01147
01148 "movq (%0), %%mm0 \n\t"
01149 "movq %%mm0, %%mm1 \n\t"
01150 "movq %%mm0, %%mm2 \n\t"
01151 "psllq $8, %%mm1 \n\t"
01152 "psrlq $8, %%mm2 \n\t"
01153 "movd -4(%0), %%mm3 \n\t"
01154 "movd 8(%0), %%mm4 \n\t"
01155 "psrlq $24, %%mm3 \n\t"
01156 "psllq $56, %%mm4 \n\t"
01157 "por %%mm3, %%mm1 \n\t"
01158 "por %%mm4, %%mm2 \n\t"
01159 "movq %%mm1, %%mm3 \n\t"
01160 PAVGB(%%mm2, %%mm1)
01161 PAVGB(%%mm0, %%mm1)
01162 "psubusb %%mm7, %%mm0 \n\t"
01163 "psubusb %%mm7, %%mm2 \n\t"
01164 "psubusb %%mm7, %%mm3 \n\t"
01165 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t"
01166 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01167 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t"
01168 "paddb %%mm2, %%mm0 \n\t"
01169 "paddb %%mm3, %%mm0 \n\t"
01170
01171 "movq (%%"REG_a"), %%mm2 \n\t"
01172 "movq %%mm2, %%mm3 \n\t"
01173 "movq %%mm2, %%mm4 \n\t"
01174 "psllq $8, %%mm3 \n\t"
01175 "psrlq $8, %%mm4 \n\t"
01176 "movd -4(%%"REG_a"), %%mm5 \n\t"
01177 "movd 8(%%"REG_a"), %%mm6 \n\t"
01178 "psrlq $24, %%mm5 \n\t"
01179 "psllq $56, %%mm6 \n\t"
01180 "por %%mm5, %%mm3 \n\t"
01181 "por %%mm6, %%mm4 \n\t"
01182 "movq %%mm3, %%mm5 \n\t"
01183 PAVGB(%%mm4, %%mm3)
01184 PAVGB(%%mm2, %%mm3)
01185 "psubusb %%mm7, %%mm2 \n\t"
01186 "psubusb %%mm7, %%mm4 \n\t"
01187 "psubusb %%mm7, %%mm5 \n\t"
01188 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01189 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t"
01190 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t"
01191 "paddb %%mm4, %%mm2 \n\t"
01192 "paddb %%mm5, %%mm2 \n\t"
01193
01194 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01195 "movq " #src ", " #sx " \n\t" \
01196 "movq " #sx ", " #lx " \n\t" \
01197 "movq " #sx ", " #t0 " \n\t" \
01198 "psllq $8, " #lx " \n\t"\
01199 "psrlq $8, " #t0 " \n\t"\
01200 "movd -4" #src ", " #t1 " \n\t"\
01201 "psrlq $24, " #t1 " \n\t"\
01202 "por " #t1 ", " #lx " \n\t" \
01203 "movd 8" #src ", " #t1 " \n\t"\
01204 "psllq $56, " #t1 " \n\t"\
01205 "por " #t1 ", " #t0 " \n\t" \
01206 "movq " #lx ", " #t1 " \n\t" \
01207 PAVGB(t0, lx) \
01208 PAVGB(sx, lx) \
01209 PAVGB(lx, pplx) \
01210 "movq " #lx ", 8(%%"REG_c") \n\t"\
01211 "movq (%%"REG_c"), " #lx " \n\t"\
01212 "psubusb " #lx ", " #t1 " \n\t"\
01213 "psubusb " #lx ", " #t0 " \n\t"\
01214 "psubusb " #lx ", " #sx " \n\t"\
01215 "movq "MANGLE(b00)", " #lx " \n\t"\
01216 "pcmpeqb " #lx ", " #t1 " \n\t" \
01217 "pcmpeqb " #lx ", " #t0 " \n\t" \
01218 "pcmpeqb " #lx ", " #sx " \n\t" \
01219 "paddb " #t1 ", " #t0 " \n\t"\
01220 "paddb " #t0 ", " #sx " \n\t"\
01221 \
01222 PAVGB(plx, pplx) \
01223 "movq " #dst ", " #t0 " \n\t" \
01224 "movq " #t0 ", " #t1 " \n\t" \
01225 "psubusb %3, " #t0 " \n\t"\
01226 "paddusb %3, " #t1 " \n\t"\
01227 PMAXUB(t0, pplx)\
01228 PMINUB(t1, pplx, t0)\
01229 "paddb " #sx ", " #ppsx " \n\t"\
01230 "paddb " #psx ", " #ppsx " \n\t"\
01231 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
01232 "pand "MANGLE(b08)", " #ppsx " \n\t"\
01233 "pcmpeqb " #lx ", " #ppsx " \n\t"\
01234 "pand " #ppsx ", " #pplx " \n\t"\
01235 "pandn " #dst ", " #ppsx " \n\t"\
01236 "por " #pplx ", " #ppsx " \n\t"\
01237 "movq " #ppsx ", " #dst " \n\t"\
01238 "movq 8(%%"REG_c"), " #lx " \n\t"
01239
01240 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01241 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258 DERING_CORE((%%REGa) ,(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01259 DERING_CORE((%%REGa, %1) ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01260 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01261 DERING_CORE((%0, %1, 4) ,(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01262 DERING_CORE((%%REGd) ,(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01263 DERING_CORE((%%REGd, %1) ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01264 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01265 DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01266
01267 "1: \n\t"
01268 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
01269 : "%"REG_a, "%"REG_d, "%"REG_c
01270 );
01271 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01272 int y;
01273 int min=255;
01274 int max=0;
01275 int avg;
01276 uint8_t *p;
01277 int s[10];
01278 const int QP2= c->QP/2 + 1;
01279
01280 for(y=1; y<9; y++){
01281 int x;
01282 p= src + stride*y;
01283 for(x=1; x<9; x++){
01284 p++;
01285 if(*p > max) max= *p;
01286 if(*p < min) min= *p;
01287 }
01288 }
01289 avg= (min + max + 1)>>1;
01290
01291 if(max - min <deringThreshold) return;
01292
01293 for(y=0; y<10; y++){
01294 int t = 0;
01295
01296 if(src[stride*y + 0] > avg) t+= 1;
01297 if(src[stride*y + 1] > avg) t+= 2;
01298 if(src[stride*y + 2] > avg) t+= 4;
01299 if(src[stride*y + 3] > avg) t+= 8;
01300 if(src[stride*y + 4] > avg) t+= 16;
01301 if(src[stride*y + 5] > avg) t+= 32;
01302 if(src[stride*y + 6] > avg) t+= 64;
01303 if(src[stride*y + 7] > avg) t+= 128;
01304 if(src[stride*y + 8] > avg) t+= 256;
01305 if(src[stride*y + 9] > avg) t+= 512;
01306
01307 t |= (~t)<<16;
01308 t &= (t<<1) & (t>>1);
01309 s[y] = t;
01310 }
01311
01312 for(y=1; y<9; y++){
01313 int t = s[y-1] & s[y] & s[y+1];
01314 t|= t>>16;
01315 s[y-1]= t;
01316 }
01317
01318 for(y=1; y<9; y++){
01319 int x;
01320 int t = s[y-1];
01321
01322 p= src + stride*y;
01323 for(x=1; x<9; x++){
01324 p++;
01325 if(t & (1<<x)){
01326 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01327 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
01328 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01329 f= (f + 8)>>4;
01330
01331 #ifdef DEBUG_DERING_THRESHOLD
01332 __asm__ volatile("emms\n\t":);
01333 {
01334 static long long numPixels=0;
01335 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01336
01337
01338
01339 if(max-min < 20){
01340 static int numSkipped=0;
01341 static int errorSum=0;
01342 static int worstQP=0;
01343 static int worstRange=0;
01344 static int worstDiff=0;
01345 int diff= (f - *p);
01346 int absDiff= FFABS(diff);
01347 int error= diff*diff;
01348
01349 if(x==1 || x==8 || y==1 || y==8) continue;
01350
01351 numSkipped++;
01352 if(absDiff > worstDiff){
01353 worstDiff= absDiff;
01354 worstQP= QP;
01355 worstRange= max-min;
01356 }
01357 errorSum+= error;
01358
01359 if(1024LL*1024LL*1024LL % numSkipped == 0){
01360 av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
01361 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01362 (float)errorSum/numSkipped, numSkipped, worstQP, worstRange,
01363 worstDiff, (float)numSkipped/numPixels);
01364 }
01365 }
01366 }
01367 #endif
01368 if (*p + QP2 < f) *p= *p + QP2;
01369 else if(*p - QP2 > f) *p= *p - QP2;
01370 else *p=f;
01371 }
01372 }
01373 }
01374 #ifdef DEBUG_DERING_THRESHOLD
01375 if(max-min < 20){
01376 for(y=1; y<9; y++){
01377 int x;
01378 int t = 0;
01379 p= src + stride*y;
01380 for(x=1; x<9; x++){
01381 p++;
01382 *p = FFMIN(*p + 20, 255);
01383 }
01384 }
01385
01386 }
01387 #endif
01388 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01389 }
01390 #endif //HAVE_ALTIVEC
01391
01398 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01399 {
01400 #if HAVE_MMX2 || HAVE_AMD3DNOW
01401 src+= 4*stride;
01402 __asm__ volatile(
01403 "lea (%0, %1), %%"REG_a" \n\t"
01404 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
01405
01406
01407
01408 "movq (%0), %%mm0 \n\t"
01409 "movq (%%"REG_a", %1), %%mm1 \n\t"
01410 PAVGB(%%mm1, %%mm0)
01411 "movq %%mm0, (%%"REG_a") \n\t"
01412 "movq (%0, %1, 4), %%mm0 \n\t"
01413 PAVGB(%%mm0, %%mm1)
01414 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
01415 "movq (%%"REG_c", %1), %%mm1 \n\t"
01416 PAVGB(%%mm1, %%mm0)
01417 "movq %%mm0, (%%"REG_c") \n\t"
01418 "movq (%0, %1, 8), %%mm0 \n\t"
01419 PAVGB(%%mm0, %%mm1)
01420 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
01421
01422 : : "r" (src), "r" ((x86_reg)stride)
01423 : "%"REG_a, "%"REG_c
01424 );
01425 #else
01426 int a, b, x;
01427 src+= 4*stride;
01428
01429 for(x=0; x<2; x++){
01430 a= *(uint32_t*)&src[stride*0];
01431 b= *(uint32_t*)&src[stride*2];
01432 *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01433 a= *(uint32_t*)&src[stride*4];
01434 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01435 b= *(uint32_t*)&src[stride*6];
01436 *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01437 a= *(uint32_t*)&src[stride*8];
01438 *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01439 src += 4;
01440 }
01441 #endif
01442 }
01443
01451 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01452 {
01453 #if HAVE_MMX2 || HAVE_AMD3DNOW
01454 src+= stride*3;
01455 __asm__ volatile(
01456 "lea (%0, %1), %%"REG_a" \n\t"
01457 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01458 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
01459 "add %1, %%"REG_c" \n\t"
01460 "pxor %%mm7, %%mm7 \n\t"
01461
01462
01463
01464 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01465 "movq " #a ", %%mm0 \n\t"\
01466 "movq " #b ", %%mm1 \n\t"\
01467 "movq " #d ", %%mm2 \n\t"\
01468 "movq " #e ", %%mm3 \n\t"\
01469 PAVGB(%%mm2, %%mm1) \
01470 PAVGB(%%mm3, %%mm0) \
01471 "movq %%mm0, %%mm2 \n\t"\
01472 "punpcklbw %%mm7, %%mm0 \n\t"\
01473 "punpckhbw %%mm7, %%mm2 \n\t"\
01474 "movq %%mm1, %%mm3 \n\t"\
01475 "punpcklbw %%mm7, %%mm1 \n\t"\
01476 "punpckhbw %%mm7, %%mm3 \n\t"\
01477 "psubw %%mm1, %%mm0 \n\t" \
01478 "psubw %%mm3, %%mm2 \n\t" \
01479 "psraw $3, %%mm0 \n\t" \
01480 "psraw $3, %%mm2 \n\t" \
01481 "psubw %%mm0, %%mm1 \n\t" \
01482 "psubw %%mm2, %%mm3 \n\t" \
01483 "packuswb %%mm3, %%mm1 \n\t"\
01484 "movq %%mm1, " #c " \n\t"
01485 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
01486
01487 DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
01488 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%0, %1, 8))
01489 DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
01490 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
01491
01492 : : "r" (src), "r" ((x86_reg)stride)
01493 : "%"REG_a, "%"REG_d, "%"REG_c
01494 );
01495 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01496 int x;
01497 src+= stride*3;
01498 for(x=0; x<8; x++){
01499 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01500 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01501 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01502 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01503 src++;
01504 }
01505 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01506 }
01507
01515 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01516 {
01517 #if HAVE_MMX2 || HAVE_AMD3DNOW
01518 src+= stride*4;
01519 __asm__ volatile(
01520 "lea (%0, %1), %%"REG_a" \n\t"
01521 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01522 "pxor %%mm7, %%mm7 \n\t"
01523 "movq (%2), %%mm0 \n\t"
01524
01525
01526
01527 #define REAL_DEINT_FF(a,b,c,d)\
01528 "movq " #a ", %%mm1 \n\t"\
01529 "movq " #b ", %%mm2 \n\t"\
01530 "movq " #c ", %%mm3 \n\t"\
01531 "movq " #d ", %%mm4 \n\t"\
01532 PAVGB(%%mm3, %%mm1) \
01533 PAVGB(%%mm4, %%mm0) \
01534 "movq %%mm0, %%mm3 \n\t"\
01535 "punpcklbw %%mm7, %%mm0 \n\t"\
01536 "punpckhbw %%mm7, %%mm3 \n\t"\
01537 "movq %%mm1, %%mm4 \n\t"\
01538 "punpcklbw %%mm7, %%mm1 \n\t"\
01539 "punpckhbw %%mm7, %%mm4 \n\t"\
01540 "psllw $2, %%mm1 \n\t"\
01541 "psllw $2, %%mm4 \n\t"\
01542 "psubw %%mm0, %%mm1 \n\t"\
01543 "psubw %%mm3, %%mm4 \n\t"\
01544 "movq %%mm2, %%mm5 \n\t"\
01545 "movq %%mm2, %%mm0 \n\t"\
01546 "punpcklbw %%mm7, %%mm2 \n\t"\
01547 "punpckhbw %%mm7, %%mm5 \n\t"\
01548 "paddw %%mm2, %%mm1 \n\t"\
01549 "paddw %%mm5, %%mm4 \n\t"\
01550 "psraw $2, %%mm1 \n\t"\
01551 "psraw $2, %%mm4 \n\t"\
01552 "packuswb %%mm4, %%mm1 \n\t"\
01553 "movq %%mm1, " #b " \n\t"\
01554
01555 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
01556
01557 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
01558 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01559 DEINT_FF((%0, %1, 4) , (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
01560 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01561
01562 "movq %%mm0, (%2) \n\t"
01563 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
01564 : "%"REG_a, "%"REG_d
01565 );
01566 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01567 int x;
01568 src+= stride*4;
01569 for(x=0; x<8; x++){
01570 int t1= tmp[x];
01571 int t2= src[stride*1];
01572
01573 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01574 t1= src[stride*4];
01575 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01576 t2= src[stride*6];
01577 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01578 t1= src[stride*8];
01579 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01580 tmp[x]= t1;
01581
01582 src++;
01583 }
01584 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01585 }
01586
01594 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01595 {
01596 #if HAVE_MMX2 || HAVE_AMD3DNOW
01597 src+= stride*4;
01598 __asm__ volatile(
01599 "lea (%0, %1), %%"REG_a" \n\t"
01600 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01601 "pxor %%mm7, %%mm7 \n\t"
01602 "movq (%2), %%mm0 \n\t"
01603 "movq (%3), %%mm1 \n\t"
01604
01605
01606
01607 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01608 "movq " #a ", %%mm2 \n\t"\
01609 "movq " #b ", %%mm3 \n\t"\
01610 "movq " #c ", %%mm4 \n\t"\
01611 PAVGB(t2, %%mm3) \
01612 PAVGB(t1, %%mm4) \
01613 "movq %%mm2, %%mm5 \n\t"\
01614 "movq %%mm2, " #t1 " \n\t"\
01615 "punpcklbw %%mm7, %%mm2 \n\t"\
01616 "punpckhbw %%mm7, %%mm5 \n\t"\
01617 "movq %%mm2, %%mm6 \n\t"\
01618 "paddw %%mm2, %%mm2 \n\t"\
01619 "paddw %%mm6, %%mm2 \n\t"\
01620 "movq %%mm5, %%mm6 \n\t"\
01621 "paddw %%mm5, %%mm5 \n\t"\
01622 "paddw %%mm6, %%mm5 \n\t"\
01623 "movq %%mm3, %%mm6 \n\t"\
01624 "punpcklbw %%mm7, %%mm3 \n\t"\
01625 "punpckhbw %%mm7, %%mm6 \n\t"\
01626 "paddw %%mm3, %%mm3 \n\t"\
01627 "paddw %%mm6, %%mm6 \n\t"\
01628 "paddw %%mm3, %%mm2 \n\t"\
01629 "paddw %%mm6, %%mm5 \n\t"\
01630 "movq %%mm4, %%mm6 \n\t"\
01631 "punpcklbw %%mm7, %%mm4 \n\t"\
01632 "punpckhbw %%mm7, %%mm6 \n\t"\
01633 "psubw %%mm4, %%mm2 \n\t"\
01634 "psubw %%mm6, %%mm5 \n\t"\
01635 "psraw $2, %%mm2 \n\t"\
01636 "psraw $2, %%mm5 \n\t"\
01637 "packuswb %%mm5, %%mm2 \n\t"\
01638 "movq %%mm2, " #a " \n\t"\
01639
01640 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
01641
01642 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
01643 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
01644 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
01645 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01646 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
01647 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
01648 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
01649 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01650
01651 "movq %%mm0, (%2) \n\t"
01652 "movq %%mm1, (%3) \n\t"
01653 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
01654 : "%"REG_a, "%"REG_d
01655 );
01656 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01657 int x;
01658 src+= stride*4;
01659 for(x=0; x<8; x++){
01660 int t1= tmp[x];
01661 int t2= tmp2[x];
01662 int t3= src[0];
01663
01664 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01665 t1= src[stride*1];
01666 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01667 t2= src[stride*2];
01668 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01669 t3= src[stride*3];
01670 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01671 t1= src[stride*4];
01672 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01673 t2= src[stride*5];
01674 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01675 t3= src[stride*6];
01676 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01677 t1= src[stride*7];
01678 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01679
01680 tmp[x]= t3;
01681 tmp2[x]= t1;
01682
01683 src++;
01684 }
01685 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01686 }
01687
01695 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01696 {
01697 #if HAVE_MMX2 || HAVE_AMD3DNOW
01698 src+= 4*stride;
01699 __asm__ volatile(
01700 "lea (%0, %1), %%"REG_a" \n\t"
01701 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01702
01703
01704
01705 "movq (%2), %%mm0 \n\t"
01706 "movq (%%"REG_a"), %%mm1 \n\t"
01707 PAVGB(%%mm1, %%mm0)
01708 "movq (%0), %%mm2 \n\t"
01709 PAVGB(%%mm2, %%mm0)
01710 "movq %%mm0, (%0) \n\t"
01711 "movq (%%"REG_a", %1), %%mm0 \n\t"
01712 PAVGB(%%mm0, %%mm2)
01713 PAVGB(%%mm1, %%mm2)
01714 "movq %%mm2, (%%"REG_a") \n\t"
01715 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
01716 PAVGB(%%mm2, %%mm1)
01717 PAVGB(%%mm0, %%mm1)
01718 "movq %%mm1, (%%"REG_a", %1) \n\t"
01719 "movq (%0, %1, 4), %%mm1 \n\t"
01720 PAVGB(%%mm1, %%mm0)
01721 PAVGB(%%mm2, %%mm0)
01722 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
01723 "movq (%%"REG_d"), %%mm0 \n\t"
01724 PAVGB(%%mm0, %%mm2)
01725 PAVGB(%%mm1, %%mm2)
01726 "movq %%mm2, (%0, %1, 4) \n\t"
01727 "movq (%%"REG_d", %1), %%mm2 \n\t"
01728 PAVGB(%%mm2, %%mm1)
01729 PAVGB(%%mm0, %%mm1)
01730 "movq %%mm1, (%%"REG_d") \n\t"
01731 "movq (%%"REG_d", %1, 2), %%mm1 \n\t"
01732 PAVGB(%%mm1, %%mm0)
01733 PAVGB(%%mm2, %%mm0)
01734 "movq %%mm0, (%%"REG_d", %1) \n\t"
01735 "movq (%0, %1, 8), %%mm0 \n\t"
01736 PAVGB(%%mm0, %%mm2)
01737 PAVGB(%%mm1, %%mm2)
01738 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01739 "movq %%mm1, (%2) \n\t"
01740
01741 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
01742 : "%"REG_a, "%"REG_d
01743 );
01744 #else //HAVE_MMX2 || HAVE_AMD3DNOW
01745 int a, b, c, x;
01746 src+= 4*stride;
01747
01748 for(x=0; x<2; x++){
01749 a= *(uint32_t*)&tmp[stride*0];
01750 b= *(uint32_t*)&src[stride*0];
01751 c= *(uint32_t*)&src[stride*1];
01752 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01753 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01754
01755 a= *(uint32_t*)&src[stride*2];
01756 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01757 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01758
01759 b= *(uint32_t*)&src[stride*3];
01760 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01761 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01762
01763 c= *(uint32_t*)&src[stride*4];
01764 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01765 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01766
01767 a= *(uint32_t*)&src[stride*5];
01768 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01769 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01770
01771 b= *(uint32_t*)&src[stride*6];
01772 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01773 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01774
01775 c= *(uint32_t*)&src[stride*7];
01776 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01777 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01778
01779 a= *(uint32_t*)&src[stride*8];
01780 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01781 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01782
01783 *(uint32_t*)&tmp[stride*0]= c;
01784 src += 4;
01785 tmp += 4;
01786 }
01787 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
01788 }
01789
01796 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01797 {
01798 #if HAVE_MMX
01799 src+= 4*stride;
01800 #if HAVE_MMX2
01801 __asm__ volatile(
01802 "lea (%0, %1), %%"REG_a" \n\t"
01803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01804
01805
01806
01807 "movq (%0), %%mm0 \n\t"
01808 "movq (%%"REG_a", %1), %%mm2 \n\t"
01809 "movq (%%"REG_a"), %%mm1 \n\t"
01810 "movq %%mm0, %%mm3 \n\t"
01811 "pmaxub %%mm1, %%mm0 \n\t"
01812 "pminub %%mm3, %%mm1 \n\t"
01813 "pmaxub %%mm2, %%mm1 \n\t"
01814 "pminub %%mm1, %%mm0 \n\t"
01815 "movq %%mm0, (%%"REG_a") \n\t"
01816
01817 "movq (%0, %1, 4), %%mm0 \n\t"
01818 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
01819 "movq %%mm2, %%mm3 \n\t"
01820 "pmaxub %%mm1, %%mm2 \n\t"
01821 "pminub %%mm3, %%mm1 \n\t"
01822 "pmaxub %%mm0, %%mm1 \n\t"
01823 "pminub %%mm1, %%mm2 \n\t"
01824 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
01825
01826 "movq (%%"REG_d"), %%mm2 \n\t"
01827 "movq (%%"REG_d", %1), %%mm1 \n\t"
01828 "movq %%mm2, %%mm3 \n\t"
01829 "pmaxub %%mm0, %%mm2 \n\t"
01830 "pminub %%mm3, %%mm0 \n\t"
01831 "pmaxub %%mm1, %%mm0 \n\t"
01832 "pminub %%mm0, %%mm2 \n\t"
01833 "movq %%mm2, (%%"REG_d") \n\t"
01834
01835 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
01836 "movq (%0, %1, 8), %%mm0 \n\t"
01837 "movq %%mm2, %%mm3 \n\t"
01838 "pmaxub %%mm0, %%mm2 \n\t"
01839 "pminub %%mm3, %%mm0 \n\t"
01840 "pmaxub %%mm1, %%mm0 \n\t"
01841 "pminub %%mm0, %%mm2 \n\t"
01842 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01843
01844
01845 : : "r" (src), "r" ((x86_reg)stride)
01846 : "%"REG_a, "%"REG_d
01847 );
01848
01849 #else // MMX without MMX2
01850 __asm__ volatile(
01851 "lea (%0, %1), %%"REG_a" \n\t"
01852 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01853
01854
01855 "pxor %%mm7, %%mm7 \n\t"
01856
01857 #define REAL_MEDIAN(a,b,c)\
01858 "movq " #a ", %%mm0 \n\t"\
01859 "movq " #b ", %%mm2 \n\t"\
01860 "movq " #c ", %%mm1 \n\t"\
01861 "movq %%mm0, %%mm3 \n\t"\
01862 "movq %%mm1, %%mm4 \n\t"\
01863 "movq %%mm2, %%mm5 \n\t"\
01864 "psubusb %%mm1, %%mm3 \n\t"\
01865 "psubusb %%mm2, %%mm4 \n\t"\
01866 "psubusb %%mm0, %%mm5 \n\t"\
01867 "pcmpeqb %%mm7, %%mm3 \n\t"\
01868 "pcmpeqb %%mm7, %%mm4 \n\t"\
01869 "pcmpeqb %%mm7, %%mm5 \n\t"\
01870 "movq %%mm3, %%mm6 \n\t"\
01871 "pxor %%mm4, %%mm3 \n\t"\
01872 "pxor %%mm5, %%mm4 \n\t"\
01873 "pxor %%mm6, %%mm5 \n\t"\
01874 "por %%mm3, %%mm1 \n\t"\
01875 "por %%mm4, %%mm2 \n\t"\
01876 "por %%mm5, %%mm0 \n\t"\
01877 "pand %%mm2, %%mm0 \n\t"\
01878 "pand %%mm1, %%mm0 \n\t"\
01879 "movq %%mm0, " #b " \n\t"
01880 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
01881
01882 MEDIAN((%0) , (%%REGa) , (%%REGa, %1))
01883 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
01884 MEDIAN((%0, %1, 4) , (%%REGd) , (%%REGd, %1))
01885 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
01886
01887 : : "r" (src), "r" ((x86_reg)stride)
01888 : "%"REG_a, "%"REG_d
01889 );
01890 #endif //HAVE_MMX2
01891 #else //HAVE_MMX
01892 int x, y;
01893 src+= 4*stride;
01894
01895 for(x=0; x<8; x++){
01896 uint8_t *colsrc = src;
01897 for (y=0; y<4; y++){
01898 int a, b, c, d, e, f;
01899 a = colsrc[0 ];
01900 b = colsrc[stride ];
01901 c = colsrc[stride*2];
01902 d = (a-b)>>31;
01903 e = (b-c)>>31;
01904 f = (c-a)>>31;
01905 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
01906 colsrc += stride*2;
01907 }
01908 src++;
01909 }
01910 #endif //HAVE_MMX
01911 }
01912
01913 #if HAVE_MMX
01914
01917 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
01918 {
01919 __asm__(
01920 "lea (%0, %1), %%"REG_a" \n\t"
01921
01922
01923 "movq (%0), %%mm0 \n\t"
01924 "movq (%%"REG_a"), %%mm1 \n\t"
01925 "movq %%mm0, %%mm2 \n\t"
01926 "punpcklbw %%mm1, %%mm0 \n\t"
01927 "punpckhbw %%mm1, %%mm2 \n\t"
01928
01929 "movq (%%"REG_a", %1), %%mm1 \n\t"
01930 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
01931 "movq %%mm1, %%mm4 \n\t"
01932 "punpcklbw %%mm3, %%mm1 \n\t"
01933 "punpckhbw %%mm3, %%mm4 \n\t"
01934
01935 "movq %%mm0, %%mm3 \n\t"
01936 "punpcklwd %%mm1, %%mm0 \n\t"
01937 "punpckhwd %%mm1, %%mm3 \n\t"
01938 "movq %%mm2, %%mm1 \n\t"
01939 "punpcklwd %%mm4, %%mm2 \n\t"
01940 "punpckhwd %%mm4, %%mm1 \n\t"
01941
01942 "movd %%mm0, 128(%2) \n\t"
01943 "psrlq $32, %%mm0 \n\t"
01944 "movd %%mm0, 144(%2) \n\t"
01945 "movd %%mm3, 160(%2) \n\t"
01946 "psrlq $32, %%mm3 \n\t"
01947 "movd %%mm3, 176(%2) \n\t"
01948 "movd %%mm3, 48(%3) \n\t"
01949 "movd %%mm2, 192(%2) \n\t"
01950 "movd %%mm2, 64(%3) \n\t"
01951 "psrlq $32, %%mm2 \n\t"
01952 "movd %%mm2, 80(%3) \n\t"
01953 "movd %%mm1, 96(%3) \n\t"
01954 "psrlq $32, %%mm1 \n\t"
01955 "movd %%mm1, 112(%3) \n\t"
01956
01957 "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t"
01958
01959 "movq (%0, %1, 4), %%mm0 \n\t"
01960 "movq (%%"REG_a"), %%mm1 \n\t"
01961 "movq %%mm0, %%mm2 \n\t"
01962 "punpcklbw %%mm1, %%mm0 \n\t"
01963 "punpckhbw %%mm1, %%mm2 \n\t"
01964
01965 "movq (%%"REG_a", %1), %%mm1 \n\t"
01966 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
01967 "movq %%mm1, %%mm4 \n\t"
01968 "punpcklbw %%mm3, %%mm1 \n\t"
01969 "punpckhbw %%mm3, %%mm4 \n\t"
01970
01971 "movq %%mm0, %%mm3 \n\t"
01972 "punpcklwd %%mm1, %%mm0 \n\t"
01973 "punpckhwd %%mm1, %%mm3 \n\t"
01974 "movq %%mm2, %%mm1 \n\t"
01975 "punpcklwd %%mm4, %%mm2 \n\t"
01976 "punpckhwd %%mm4, %%mm1 \n\t"
01977
01978 "movd %%mm0, 132(%2) \n\t"
01979 "psrlq $32, %%mm0 \n\t"
01980 "movd %%mm0, 148(%2) \n\t"
01981 "movd %%mm3, 164(%2) \n\t"
01982 "psrlq $32, %%mm3 \n\t"
01983 "movd %%mm3, 180(%2) \n\t"
01984 "movd %%mm3, 52(%3) \n\t"
01985 "movd %%mm2, 196(%2) \n\t"
01986 "movd %%mm2, 68(%3) \n\t"
01987 "psrlq $32, %%mm2 \n\t"
01988 "movd %%mm2, 84(%3) \n\t"
01989 "movd %%mm1, 100(%3) \n\t"
01990 "psrlq $32, %%mm1 \n\t"
01991 "movd %%mm1, 116(%3) \n\t"
01992
01993
01994 :: "r" (src), "r" ((x86_reg)srcStride), "r" (dst1), "r" (dst2)
01995 : "%"REG_a
01996 );
01997 }
01998
02002 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02003 {
02004 __asm__(
02005 "lea (%0, %1), %%"REG_a" \n\t"
02006 "lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
02007
02008
02009 "movq (%2), %%mm0 \n\t"
02010 "movq 16(%2), %%mm1 \n\t"
02011 "movq %%mm0, %%mm2 \n\t"
02012 "punpcklbw %%mm1, %%mm0 \n\t"
02013 "punpckhbw %%mm1, %%mm2 \n\t"
02014
02015 "movq 32(%2), %%mm1 \n\t"
02016 "movq 48(%2), %%mm3 \n\t"
02017 "movq %%mm1, %%mm4 \n\t"
02018 "punpcklbw %%mm3, %%mm1 \n\t"
02019 "punpckhbw %%mm3, %%mm4 \n\t"
02020
02021 "movq %%mm0, %%mm3 \n\t"
02022 "punpcklwd %%mm1, %%mm0 \n\t"
02023 "punpckhwd %%mm1, %%mm3 \n\t"
02024 "movq %%mm2, %%mm1 \n\t"
02025 "punpcklwd %%mm4, %%mm2 \n\t"
02026 "punpckhwd %%mm4, %%mm1 \n\t"
02027
02028 "movd %%mm0, (%0) \n\t"
02029 "psrlq $32, %%mm0 \n\t"
02030 "movd %%mm0, (%%"REG_a") \n\t"
02031 "movd %%mm3, (%%"REG_a", %1) \n\t"
02032 "psrlq $32, %%mm3 \n\t"
02033 "movd %%mm3, (%%"REG_a", %1, 2) \n\t"
02034 "movd %%mm2, (%0, %1, 4) \n\t"
02035 "psrlq $32, %%mm2 \n\t"
02036 "movd %%mm2, (%%"REG_d") \n\t"
02037 "movd %%mm1, (%%"REG_d", %1) \n\t"
02038 "psrlq $32, %%mm1 \n\t"
02039 "movd %%mm1, (%%"REG_d", %1, 2) \n\t"
02040
02041
02042 "movq 64(%2), %%mm0 \n\t"
02043 "movq 80(%2), %%mm1 \n\t"
02044 "movq %%mm0, %%mm2 \n\t"
02045 "punpcklbw %%mm1, %%mm0 \n\t"
02046 "punpckhbw %%mm1, %%mm2 \n\t"
02047
02048 "movq 96(%2), %%mm1 \n\t"
02049 "movq 112(%2), %%mm3 \n\t"
02050 "movq %%mm1, %%mm4 \n\t"
02051 "punpcklbw %%mm3, %%mm1 \n\t"
02052 "punpckhbw %%mm3, %%mm4 \n\t"
02053
02054 "movq %%mm0, %%mm3 \n\t"
02055 "punpcklwd %%mm1, %%mm0 \n\t"
02056 "punpckhwd %%mm1, %%mm3 \n\t"
02057 "movq %%mm2, %%mm1 \n\t"
02058 "punpcklwd %%mm4, %%mm2 \n\t"
02059 "punpckhwd %%mm4, %%mm1 \n\t"
02060
02061 "movd %%mm0, 4(%0) \n\t"
02062 "psrlq $32, %%mm0 \n\t"
02063 "movd %%mm0, 4(%%"REG_a") \n\t"
02064 "movd %%mm3, 4(%%"REG_a", %1) \n\t"
02065 "psrlq $32, %%mm3 \n\t"
02066 "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t"
02067 "movd %%mm2, 4(%0, %1, 4) \n\t"
02068 "psrlq $32, %%mm2 \n\t"
02069 "movd %%mm2, 4(%%"REG_d") \n\t"
02070 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
02071 "psrlq $32, %%mm1 \n\t"
02072 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
02073
02074 :: "r" (dst), "r" ((x86_reg)dstStride), "r" (src)
02075 : "%"REG_a, "%"REG_d
02076 );
02077 }
02078 #endif //HAVE_MMX
02079
02080
02081 #if !HAVE_ALTIVEC
02082 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02083 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
02084 {
02085
02086 tempBlurredPast[127]= maxNoise[0];
02087 tempBlurredPast[128]= maxNoise[1];
02088 tempBlurredPast[129]= maxNoise[2];
02089
02090 #define FAST_L2_DIFF
02091
02092 #if HAVE_MMX2 || HAVE_AMD3DNOW
02093 __asm__ volatile(
02094 "lea (%2, %2, 2), %%"REG_a" \n\t"
02095 "lea (%2, %2, 4), %%"REG_d" \n\t"
02096 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02097
02098
02099
02100 #ifdef L1_DIFF //needs mmx2
02101 "movq (%0), %%mm0 \n\t"
02102 "psadbw (%1), %%mm0 \n\t"
02103 "movq (%0, %2), %%mm1 \n\t"
02104 "psadbw (%1, %2), %%mm1 \n\t"
02105 "movq (%0, %2, 2), %%mm2 \n\t"
02106 "psadbw (%1, %2, 2), %%mm2 \n\t"
02107 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02108 "psadbw (%1, %%"REG_a"), %%mm3 \n\t"
02109
02110 "movq (%0, %2, 4), %%mm4 \n\t"
02111 "paddw %%mm1, %%mm0 \n\t"
02112 "psadbw (%1, %2, 4), %%mm4 \n\t"
02113 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02114 "paddw %%mm2, %%mm0 \n\t"
02115 "psadbw (%1, %%"REG_d"), %%mm5 \n\t"
02116 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02117 "paddw %%mm3, %%mm0 \n\t"
02118 "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t"
02119 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02120 "paddw %%mm4, %%mm0 \n\t"
02121 "psadbw (%1, %%"REG_c"), %%mm7 \n\t"
02122 "paddw %%mm5, %%mm6 \n\t"
02123 "paddw %%mm7, %%mm6 \n\t"
02124 "paddw %%mm6, %%mm0 \n\t"
02125 #else //L1_DIFF
02126 #if defined (FAST_L2_DIFF)
02127 "pcmpeqb %%mm7, %%mm7 \n\t"
02128 "movq "MANGLE(b80)", %%mm6 \n\t"
02129 "pxor %%mm0, %%mm0 \n\t"
02130 #define REAL_L2_DIFF_CORE(a, b)\
02131 "movq " #a ", %%mm5 \n\t"\
02132 "movq " #b ", %%mm2 \n\t"\
02133 "pxor %%mm7, %%mm2 \n\t"\
02134 PAVGB(%%mm2, %%mm5)\
02135 "paddb %%mm6, %%mm5 \n\t"\
02136 "movq %%mm5, %%mm2 \n\t"\
02137 "psllw $8, %%mm5 \n\t"\
02138 "pmaddwd %%mm5, %%mm5 \n\t"\
02139 "pmaddwd %%mm2, %%mm2 \n\t"\
02140 "paddd %%mm2, %%mm5 \n\t"\
02141 "psrld $14, %%mm5 \n\t"\
02142 "paddd %%mm5, %%mm0 \n\t"
02143
02144 #else //defined (FAST_L2_DIFF)
02145 "pxor %%mm7, %%mm7 \n\t"
02146 "pxor %%mm0, %%mm0 \n\t"
02147 #define REAL_L2_DIFF_CORE(a, b)\
02148 "movq " #a ", %%mm5 \n\t"\
02149 "movq " #b ", %%mm2 \n\t"\
02150 "movq %%mm5, %%mm1 \n\t"\
02151 "movq %%mm2, %%mm3 \n\t"\
02152 "punpcklbw %%mm7, %%mm5 \n\t"\
02153 "punpckhbw %%mm7, %%mm1 \n\t"\
02154 "punpcklbw %%mm7, %%mm2 \n\t"\
02155 "punpckhbw %%mm7, %%mm3 \n\t"\
02156 "psubw %%mm2, %%mm5 \n\t"\
02157 "psubw %%mm3, %%mm1 \n\t"\
02158 "pmaddwd %%mm5, %%mm5 \n\t"\
02159 "pmaddwd %%mm1, %%mm1 \n\t"\
02160 "paddd %%mm1, %%mm5 \n\t"\
02161 "paddd %%mm5, %%mm0 \n\t"
02162
02163 #endif //defined (FAST_L2_DIFF)
02164
02165 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
02166
02167 L2_DIFF_CORE((%0) , (%1))
02168 L2_DIFF_CORE((%0, %2) , (%1, %2))
02169 L2_DIFF_CORE((%0, %2, 2) , (%1, %2, 2))
02170 L2_DIFF_CORE((%0, %%REGa) , (%1, %%REGa))
02171 L2_DIFF_CORE((%0, %2, 4) , (%1, %2, 4))
02172 L2_DIFF_CORE((%0, %%REGd) , (%1, %%REGd))
02173 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02174 L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
02175
02176 #endif //L1_DIFF
02177
02178 "movq %%mm0, %%mm4 \n\t"
02179 "psrlq $32, %%mm0 \n\t"
02180 "paddd %%mm0, %%mm4 \n\t"
02181 "movd %%mm4, %%ecx \n\t"
02182 "shll $2, %%ecx \n\t"
02183 "mov %3, %%"REG_d" \n\t"
02184 "addl -4(%%"REG_d"), %%ecx \n\t"
02185 "addl 4(%%"REG_d"), %%ecx \n\t"
02186 "addl -1024(%%"REG_d"), %%ecx \n\t"
02187 "addl $4, %%ecx \n\t"
02188 "addl 1024(%%"REG_d"), %%ecx \n\t"
02189 "shrl $3, %%ecx \n\t"
02190 "movl %%ecx, (%%"REG_d") \n\t"
02191
02192
02193
02194
02195 "cmpl 512(%%"REG_d"), %%ecx \n\t"
02196 " jb 2f \n\t"
02197 "cmpl 516(%%"REG_d"), %%ecx \n\t"
02198 " jb 1f \n\t"
02199
02200 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02201 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02202 "movq (%0), %%mm0 \n\t"
02203 "movq (%0, %2), %%mm1 \n\t"
02204 "movq (%0, %2, 2), %%mm2 \n\t"
02205 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02206 "movq (%0, %2, 4), %%mm4 \n\t"
02207 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02208 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02209 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02210 "movq %%mm0, (%1) \n\t"
02211 "movq %%mm1, (%1, %2) \n\t"
02212 "movq %%mm2, (%1, %2, 2) \n\t"
02213 "movq %%mm3, (%1, %%"REG_a") \n\t"
02214 "movq %%mm4, (%1, %2, 4) \n\t"
02215 "movq %%mm5, (%1, %%"REG_d") \n\t"
02216 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02217 "movq %%mm7, (%1, %%"REG_c") \n\t"
02218 "jmp 4f \n\t"
02219
02220 "1: \n\t"
02221 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02222 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02223 "movq (%0), %%mm0 \n\t"
02224 PAVGB((%1), %%mm0)
02225 "movq (%0, %2), %%mm1 \n\t"
02226 PAVGB((%1, %2), %%mm1)
02227 "movq (%0, %2, 2), %%mm2 \n\t"
02228 PAVGB((%1, %2, 2), %%mm2)
02229 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02230 PAVGB((%1, %%REGa), %%mm3)
02231 "movq (%0, %2, 4), %%mm4 \n\t"
02232 PAVGB((%1, %2, 4), %%mm4)
02233 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02234 PAVGB((%1, %%REGd), %%mm5)
02235 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02236 PAVGB((%1, %%REGa, 2), %%mm6)
02237 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02238 PAVGB((%1, %%REGc), %%mm7)
02239 "movq %%mm0, (%1) \n\t"
02240 "movq %%mm1, (%1, %2) \n\t"
02241 "movq %%mm2, (%1, %2, 2) \n\t"
02242 "movq %%mm3, (%1, %%"REG_a") \n\t"
02243 "movq %%mm4, (%1, %2, 4) \n\t"
02244 "movq %%mm5, (%1, %%"REG_d") \n\t"
02245 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02246 "movq %%mm7, (%1, %%"REG_c") \n\t"
02247 "movq %%mm0, (%0) \n\t"
02248 "movq %%mm1, (%0, %2) \n\t"
02249 "movq %%mm2, (%0, %2, 2) \n\t"
02250 "movq %%mm3, (%0, %%"REG_a") \n\t"
02251 "movq %%mm4, (%0, %2, 4) \n\t"
02252 "movq %%mm5, (%0, %%"REG_d") \n\t"
02253 "movq %%mm6, (%0, %%"REG_a", 2) \n\t"
02254 "movq %%mm7, (%0, %%"REG_c") \n\t"
02255 "jmp 4f \n\t"
02256
02257 "2: \n\t"
02258 "cmpl 508(%%"REG_d"), %%ecx \n\t"
02259 " jb 3f \n\t"
02260
02261 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02262 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02263 "movq (%0), %%mm0 \n\t"
02264 "movq (%0, %2), %%mm1 \n\t"
02265 "movq (%0, %2, 2), %%mm2 \n\t"
02266 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02267 "movq (%1), %%mm4 \n\t"
02268 "movq (%1, %2), %%mm5 \n\t"
02269 "movq (%1, %2, 2), %%mm6 \n\t"
02270 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02271 PAVGB(%%mm4, %%mm0)
02272 PAVGB(%%mm5, %%mm1)
02273 PAVGB(%%mm6, %%mm2)
02274 PAVGB(%%mm7, %%mm3)
02275 PAVGB(%%mm4, %%mm0)
02276 PAVGB(%%mm5, %%mm1)
02277 PAVGB(%%mm6, %%mm2)
02278 PAVGB(%%mm7, %%mm3)
02279 "movq %%mm0, (%1) \n\t"
02280 "movq %%mm1, (%1, %2) \n\t"
02281 "movq %%mm2, (%1, %2, 2) \n\t"
02282 "movq %%mm3, (%1, %%"REG_a") \n\t"
02283 "movq %%mm0, (%0) \n\t"
02284 "movq %%mm1, (%0, %2) \n\t"
02285 "movq %%mm2, (%0, %2, 2) \n\t"
02286 "movq %%mm3, (%0, %%"REG_a") \n\t"
02287
02288 "movq (%0, %2, 4), %%mm0 \n\t"
02289 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02290 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02291 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02292 "movq (%1, %2, 4), %%mm4 \n\t"
02293 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02294 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02295 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02296 PAVGB(%%mm4, %%mm0)
02297 PAVGB(%%mm5, %%mm1)
02298 PAVGB(%%mm6, %%mm2)
02299 PAVGB(%%mm7, %%mm3)
02300 PAVGB(%%mm4, %%mm0)
02301 PAVGB(%%mm5, %%mm1)
02302 PAVGB(%%mm6, %%mm2)
02303 PAVGB(%%mm7, %%mm3)
02304 "movq %%mm0, (%1, %2, 4) \n\t"
02305 "movq %%mm1, (%1, %%"REG_d") \n\t"
02306 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02307 "movq %%mm3, (%1, %%"REG_c") \n\t"
02308 "movq %%mm0, (%0, %2, 4) \n\t"
02309 "movq %%mm1, (%0, %%"REG_d") \n\t"
02310 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02311 "movq %%mm3, (%0, %%"REG_c") \n\t"
02312 "jmp 4f \n\t"
02313
02314 "3: \n\t"
02315 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02316 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02317 "movq (%0), %%mm0 \n\t"
02318 "movq (%0, %2), %%mm1 \n\t"
02319 "movq (%0, %2, 2), %%mm2 \n\t"
02320 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02321 "movq (%1), %%mm4 \n\t"
02322 "movq (%1, %2), %%mm5 \n\t"
02323 "movq (%1, %2, 2), %%mm6 \n\t"
02324 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02325 PAVGB(%%mm4, %%mm0)
02326 PAVGB(%%mm5, %%mm1)
02327 PAVGB(%%mm6, %%mm2)
02328 PAVGB(%%mm7, %%mm3)
02329 PAVGB(%%mm4, %%mm0)
02330 PAVGB(%%mm5, %%mm1)
02331 PAVGB(%%mm6, %%mm2)
02332 PAVGB(%%mm7, %%mm3)
02333 PAVGB(%%mm4, %%mm0)
02334 PAVGB(%%mm5, %%mm1)
02335 PAVGB(%%mm6, %%mm2)
02336 PAVGB(%%mm7, %%mm3)
02337 "movq %%mm0, (%1) \n\t"
02338 "movq %%mm1, (%1, %2) \n\t"
02339 "movq %%mm2, (%1, %2, 2) \n\t"
02340 "movq %%mm3, (%1, %%"REG_a") \n\t"
02341 "movq %%mm0, (%0) \n\t"
02342 "movq %%mm1, (%0, %2) \n\t"
02343 "movq %%mm2, (%0, %2, 2) \n\t"
02344 "movq %%mm3, (%0, %%"REG_a") \n\t"
02345
02346 "movq (%0, %2, 4), %%mm0 \n\t"
02347 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02348 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02349 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02350 "movq (%1, %2, 4), %%mm4 \n\t"
02351 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02352 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02353 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02354 PAVGB(%%mm4, %%mm0)
02355 PAVGB(%%mm5, %%mm1)
02356 PAVGB(%%mm6, %%mm2)
02357 PAVGB(%%mm7, %%mm3)
02358 PAVGB(%%mm4, %%mm0)
02359 PAVGB(%%mm5, %%mm1)
02360 PAVGB(%%mm6, %%mm2)
02361 PAVGB(%%mm7, %%mm3)
02362 PAVGB(%%mm4, %%mm0)
02363 PAVGB(%%mm5, %%mm1)
02364 PAVGB(%%mm6, %%mm2)
02365 PAVGB(%%mm7, %%mm3)
02366 "movq %%mm0, (%1, %2, 4) \n\t"
02367 "movq %%mm1, (%1, %%"REG_d") \n\t"
02368 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02369 "movq %%mm3, (%1, %%"REG_c") \n\t"
02370 "movq %%mm0, (%0, %2, 4) \n\t"
02371 "movq %%mm1, (%0, %%"REG_d") \n\t"
02372 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02373 "movq %%mm3, (%0, %%"REG_c") \n\t"
02374
02375 "4: \n\t"
02376
02377 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
02378 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02379 );
02380 #else //HAVE_MMX2 || HAVE_AMD3DNOW
02381 {
02382 int y;
02383 int d=0;
02384
02385 int i;
02386
02387 for(y=0; y<8; y++){
02388 int x;
02389 for(x=0; x<8; x++){
02390 int ref= tempBlurred[ x + y*stride ];
02391 int cur= src[ x + y*stride ];
02392 int d1=ref - cur;
02393
02394
02395
02396 d+= d1*d1;
02397
02398 }
02399 }
02400 i=d;
02401 d= (
02402 4*d
02403 +(*(tempBlurredPast-256))
02404 +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
02405 +(*(tempBlurredPast+256))
02406 +4)>>3;
02407 *tempBlurredPast=i;
02408
02409
02410
02411
02412
02413
02414
02415
02416
02417 if(d > maxNoise[1]){
02418 if(d < maxNoise[2]){
02419 for(y=0; y<8; y++){
02420 int x;
02421 for(x=0; x<8; x++){
02422 int ref= tempBlurred[ x + y*stride ];
02423 int cur= src[ x + y*stride ];
02424 tempBlurred[ x + y*stride ]=
02425 src[ x + y*stride ]=
02426 (ref + cur + 1)>>1;
02427 }
02428 }
02429 }else{
02430 for(y=0; y<8; y++){
02431 int x;
02432 for(x=0; x<8; x++){
02433 tempBlurred[ x + y*stride ]= src[ x + y*stride ];
02434 }
02435 }
02436 }
02437 }else{
02438 if(d < maxNoise[0]){
02439 for(y=0; y<8; y++){
02440 int x;
02441 for(x=0; x<8; x++){
02442 int ref= tempBlurred[ x + y*stride ];
02443 int cur= src[ x + y*stride ];
02444 tempBlurred[ x + y*stride ]=
02445 src[ x + y*stride ]=
02446 (ref*7 + cur + 4)>>3;
02447 }
02448 }
02449 }else{
02450 for(y=0; y<8; y++){
02451 int x;
02452 for(x=0; x<8; x++){
02453 int ref= tempBlurred[ x + y*stride ];
02454 int cur= src[ x + y*stride ];
02455 tempBlurred[ x + y*stride ]=
02456 src[ x + y*stride ]=
02457 (ref*3 + cur + 2)>>2;
02458 }
02459 }
02460 }
02461 }
02462 }
02463 #endif //HAVE_MMX2 || HAVE_AMD3DNOW
02464 }
02465 #endif //HAVE_ALTIVEC
02466
02467 #if HAVE_MMX
02468
02471 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02472 int64_t dc_mask, eq_mask, both_masks;
02473 int64_t sums[10*8*2];
02474 src+= step*3;
02475
02476 __asm__ volatile(
02477 "movq %0, %%mm7 \n\t"
02478 "movq %1, %%mm6 \n\t"
02479 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
02480 );
02481
02482 __asm__ volatile(
02483 "lea (%2, %3), %%"REG_a" \n\t"
02484
02485
02486
02487 "movq (%2), %%mm0 \n\t"
02488 "movq (%%"REG_a"), %%mm1 \n\t"
02489 "movq %%mm1, %%mm3 \n\t"
02490 "movq %%mm1, %%mm4 \n\t"
02491 "psubb %%mm1, %%mm0 \n\t"
02492 "paddb %%mm7, %%mm0 \n\t"
02493 "pcmpgtb %%mm6, %%mm0 \n\t"
02494
02495 "movq (%%"REG_a",%3), %%mm2 \n\t"
02496 PMAXUB(%%mm2, %%mm4)
02497 PMINUB(%%mm2, %%mm3, %%mm5)
02498 "psubb %%mm2, %%mm1 \n\t"
02499 "paddb %%mm7, %%mm1 \n\t"
02500 "pcmpgtb %%mm6, %%mm1 \n\t"
02501 "paddb %%mm1, %%mm0 \n\t"
02502
02503 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02504 PMAXUB(%%mm1, %%mm4)
02505 PMINUB(%%mm1, %%mm3, %%mm5)
02506 "psubb %%mm1, %%mm2 \n\t"
02507 "paddb %%mm7, %%mm2 \n\t"
02508 "pcmpgtb %%mm6, %%mm2 \n\t"
02509 "paddb %%mm2, %%mm0 \n\t"
02510
02511 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
02512
02513 "movq (%2, %3, 4), %%mm2 \n\t"
02514 PMAXUB(%%mm2, %%mm4)
02515 PMINUB(%%mm2, %%mm3, %%mm5)
02516 "psubb %%mm2, %%mm1 \n\t"
02517 "paddb %%mm7, %%mm1 \n\t"
02518 "pcmpgtb %%mm6, %%mm1 \n\t"
02519 "paddb %%mm1, %%mm0 \n\t"
02520
02521 "movq (%%"REG_a"), %%mm1 \n\t"
02522 PMAXUB(%%mm1, %%mm4)
02523 PMINUB(%%mm1, %%mm3, %%mm5)
02524 "psubb %%mm1, %%mm2 \n\t"
02525 "paddb %%mm7, %%mm2 \n\t"
02526 "pcmpgtb %%mm6, %%mm2 \n\t"
02527 "paddb %%mm2, %%mm0 \n\t"
02528
02529 "movq (%%"REG_a", %3), %%mm2 \n\t"
02530 PMAXUB(%%mm2, %%mm4)
02531 PMINUB(%%mm2, %%mm3, %%mm5)
02532 "psubb %%mm2, %%mm1 \n\t"
02533 "paddb %%mm7, %%mm1 \n\t"
02534 "pcmpgtb %%mm6, %%mm1 \n\t"
02535 "paddb %%mm1, %%mm0 \n\t"
02536
02537 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02538 PMAXUB(%%mm1, %%mm4)
02539 PMINUB(%%mm1, %%mm3, %%mm5)
02540 "psubb %%mm1, %%mm2 \n\t"
02541 "paddb %%mm7, %%mm2 \n\t"
02542 "pcmpgtb %%mm6, %%mm2 \n\t"
02543 "paddb %%mm2, %%mm0 \n\t"
02544
02545 "movq (%2, %3, 8), %%mm2 \n\t"
02546 PMAXUB(%%mm2, %%mm4)
02547 PMINUB(%%mm2, %%mm3, %%mm5)
02548 "psubb %%mm2, %%mm1 \n\t"
02549 "paddb %%mm7, %%mm1 \n\t"
02550 "pcmpgtb %%mm6, %%mm1 \n\t"
02551 "paddb %%mm1, %%mm0 \n\t"
02552
02553 "movq (%%"REG_a", %3, 4), %%mm1 \n\t"
02554 "psubb %%mm1, %%mm2 \n\t"
02555 "paddb %%mm7, %%mm2 \n\t"
02556 "pcmpgtb %%mm6, %%mm2 \n\t"
02557 "paddb %%mm2, %%mm0 \n\t"
02558 "psubusb %%mm3, %%mm4 \n\t"
02559
02560 "pxor %%mm6, %%mm6 \n\t"
02561 "movq %4, %%mm7 \n\t"
02562 "paddusb %%mm7, %%mm7 \n\t"
02563 "psubusb %%mm4, %%mm7 \n\t"
02564 "pcmpeqb %%mm6, %%mm7 \n\t"
02565 "pcmpeqb %%mm6, %%mm7 \n\t"
02566 "movq %%mm7, %1 \n\t"
02567
02568 "movq %5, %%mm7 \n\t"
02569 "punpcklbw %%mm7, %%mm7 \n\t"
02570 "punpcklbw %%mm7, %%mm7 \n\t"
02571 "punpcklbw %%mm7, %%mm7 \n\t"
02572 "psubb %%mm0, %%mm6 \n\t"
02573 "pcmpgtb %%mm7, %%mm6 \n\t"
02574 "movq %%mm6, %0 \n\t"
02575
02576 : "=m" (eq_mask), "=m" (dc_mask)
02577 : "r" (src), "r" ((x86_reg)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02578 : "%"REG_a
02579 );
02580
02581 both_masks = dc_mask & eq_mask;
02582
02583 if(both_masks){
02584 x86_reg offset= -8*step;
02585 int64_t *temp_sums= sums;
02586
02587 __asm__ volatile(
02588 "movq %2, %%mm0 \n\t"
02589 "pxor %%mm4, %%mm4 \n\t"
02590
02591 "movq (%0), %%mm6 \n\t"
02592 "movq (%0, %1), %%mm5 \n\t"
02593 "movq %%mm5, %%mm1 \n\t"
02594 "movq %%mm6, %%mm2 \n\t"
02595 "psubusb %%mm6, %%mm5 \n\t"
02596 "psubusb %%mm1, %%mm2 \n\t"
02597 "por %%mm5, %%mm2 \n\t"
02598 "psubusb %%mm2, %%mm0 \n\t"
02599 "pcmpeqb %%mm4, %%mm0 \n\t"
02600
02601 "pxor %%mm6, %%mm1 \n\t"
02602 "pand %%mm0, %%mm1 \n\t"
02603 "pxor %%mm1, %%mm6 \n\t"
02604
02605
02606 "movq (%0, %1, 8), %%mm5 \n\t"
02607 "add %1, %0 \n\t"
02608 "movq (%0, %1, 8), %%mm7 \n\t"
02609 "movq %%mm5, %%mm1 \n\t"
02610 "movq %%mm7, %%mm2 \n\t"
02611 "psubusb %%mm7, %%mm5 \n\t"
02612 "psubusb %%mm1, %%mm2 \n\t"
02613 "por %%mm5, %%mm2 \n\t"
02614 "movq %2, %%mm0 \n\t"
02615 "psubusb %%mm2, %%mm0 \n\t"
02616 "pcmpeqb %%mm4, %%mm0 \n\t"
02617
02618 "pxor %%mm7, %%mm1 \n\t"
02619 "pand %%mm0, %%mm1 \n\t"
02620 "pxor %%mm1, %%mm7 \n\t"
02621
02622 "movq %%mm6, %%mm5 \n\t"
02623 "punpckhbw %%mm4, %%mm6 \n\t"
02624 "punpcklbw %%mm4, %%mm5 \n\t"
02625
02626
02627 "movq %%mm5, %%mm0 \n\t"
02628 "movq %%mm6, %%mm1 \n\t"
02629 "psllw $2, %%mm0 \n\t"
02630 "psllw $2, %%mm1 \n\t"
02631 "paddw "MANGLE(w04)", %%mm0 \n\t"
02632 "paddw "MANGLE(w04)", %%mm1 \n\t"
02633
02634 #define NEXT\
02635 "movq (%0), %%mm2 \n\t"\
02636 "movq (%0), %%mm3 \n\t"\
02637 "add %1, %0 \n\t"\
02638 "punpcklbw %%mm4, %%mm2 \n\t"\
02639 "punpckhbw %%mm4, %%mm3 \n\t"\
02640 "paddw %%mm2, %%mm0 \n\t"\
02641 "paddw %%mm3, %%mm1 \n\t"
02642
02643 #define PREV\
02644 "movq (%0), %%mm2 \n\t"\
02645 "movq (%0), %%mm3 \n\t"\
02646 "add %1, %0 \n\t"\
02647 "punpcklbw %%mm4, %%mm2 \n\t"\
02648 "punpckhbw %%mm4, %%mm3 \n\t"\
02649 "psubw %%mm2, %%mm0 \n\t"\
02650 "psubw %%mm3, %%mm1 \n\t"
02651
02652
02653 NEXT
02654 NEXT
02655 NEXT
02656 "movq %%mm0, (%3) \n\t"
02657 "movq %%mm1, 8(%3) \n\t"
02658
02659 NEXT
02660 "psubw %%mm5, %%mm0 \n\t"
02661 "psubw %%mm6, %%mm1 \n\t"
02662 "movq %%mm0, 16(%3) \n\t"
02663 "movq %%mm1, 24(%3) \n\t"
02664
02665 NEXT
02666 "psubw %%mm5, %%mm0 \n\t"
02667 "psubw %%mm6, %%mm1 \n\t"
02668 "movq %%mm0, 32(%3) \n\t"
02669 "movq %%mm1, 40(%3) \n\t"
02670
02671 NEXT
02672 "psubw %%mm5, %%mm0 \n\t"
02673 "psubw %%mm6, %%mm1 \n\t"
02674 "movq %%mm0, 48(%3) \n\t"
02675 "movq %%mm1, 56(%3) \n\t"
02676
02677 NEXT
02678 "psubw %%mm5, %%mm0 \n\t"
02679 "psubw %%mm6, %%mm1 \n\t"
02680 "movq %%mm0, 64(%3) \n\t"
02681 "movq %%mm1, 72(%3) \n\t"
02682
02683 "movq %%mm7, %%mm6 \n\t"
02684 "punpckhbw %%mm4, %%mm7 \n\t"
02685 "punpcklbw %%mm4, %%mm6 \n\t"
02686
02687 NEXT
02688 "mov %4, %0 \n\t"
02689 "add %1, %0 \n\t"
02690 PREV
02691 "movq %%mm0, 80(%3) \n\t"
02692 "movq %%mm1, 88(%3) \n\t"
02693
02694 PREV
02695 "paddw %%mm6, %%mm0 \n\t"
02696 "paddw %%mm7, %%mm1 \n\t"
02697 "movq %%mm0, 96(%3) \n\t"
02698 "movq %%mm1, 104(%3) \n\t"
02699
02700 PREV
02701 "paddw %%mm6, %%mm0 \n\t"
02702 "paddw %%mm7, %%mm1 \n\t"
02703 "movq %%mm0, 112(%3) \n\t"
02704 "movq %%mm1, 120(%3) \n\t"
02705
02706 PREV
02707 "paddw %%mm6, %%mm0 \n\t"
02708 "paddw %%mm7, %%mm1 \n\t"
02709 "movq %%mm0, 128(%3) \n\t"
02710 "movq %%mm1, 136(%3) \n\t"
02711
02712 PREV
02713 "paddw %%mm6, %%mm0 \n\t"
02714 "paddw %%mm7, %%mm1 \n\t"
02715 "movq %%mm0, 144(%3) \n\t"
02716 "movq %%mm1, 152(%3) \n\t"
02717
02718 "mov %4, %0 \n\t"
02719
02720 : "+&r"(src)
02721 : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src)
02722 );
02723
02724 src+= step;
02725
02726 __asm__ volatile(
02727 "movq %4, %%mm6 \n\t"
02728 "pcmpeqb %%mm5, %%mm5 \n\t"
02729 "pxor %%mm6, %%mm5 \n\t"
02730 "pxor %%mm7, %%mm7 \n\t"
02731
02732 "1: \n\t"
02733 "movq (%1), %%mm0 \n\t"
02734 "movq 8(%1), %%mm1 \n\t"
02735 "paddw 32(%1), %%mm0 \n\t"
02736 "paddw 40(%1), %%mm1 \n\t"
02737 "movq (%0, %3), %%mm2 \n\t"
02738 "movq %%mm2, %%mm3 \n\t"
02739 "movq %%mm2, %%mm4 \n\t"
02740 "punpcklbw %%mm7, %%mm2 \n\t"
02741 "punpckhbw %%mm7, %%mm3 \n\t"
02742 "paddw %%mm2, %%mm0 \n\t"
02743 "paddw %%mm3, %%mm1 \n\t"
02744 "paddw %%mm2, %%mm0 \n\t"
02745 "paddw %%mm3, %%mm1 \n\t"
02746 "psrlw $4, %%mm0 \n\t"
02747 "psrlw $4, %%mm1 \n\t"
02748 "packuswb %%mm1, %%mm0 \n\t"
02749 "pand %%mm6, %%mm0 \n\t"
02750 "pand %%mm5, %%mm4 \n\t"
02751 "por %%mm4, %%mm0 \n\t"
02752 "movq %%mm0, (%0, %3) \n\t"
02753 "add $16, %1 \n\t"
02754 "add %2, %0 \n\t"
02755 " js 1b \n\t"
02756
02757 : "+r"(offset), "+r"(temp_sums)
02758 : "r" ((x86_reg)step), "r"(src - offset), "m"(both_masks)
02759 );
02760 }else
02761 src+= step;
02762
02763 if(eq_mask != -1LL){
02764 uint8_t *temp_src= src;
02765 __asm__ volatile(
02766 "pxor %%mm7, %%mm7 \n\t"
02767 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
02768 "and "ALIGN_MASK", %%"REG_c" \n\t"
02769
02770
02771
02772 "movq (%0), %%mm0 \n\t"
02773 "movq %%mm0, %%mm1 \n\t"
02774 "punpcklbw %%mm7, %%mm0 \n\t"
02775 "punpckhbw %%mm7, %%mm1 \n\t"
02776
02777 "movq (%0, %1), %%mm2 \n\t"
02778 "lea (%0, %1, 2), %%"REG_a" \n\t"
02779 "movq %%mm2, %%mm3 \n\t"
02780 "punpcklbw %%mm7, %%mm2 \n\t"
02781 "punpckhbw %%mm7, %%mm3 \n\t"
02782
02783 "movq (%%"REG_a"), %%mm4 \n\t"
02784 "movq %%mm4, %%mm5 \n\t"
02785 "punpcklbw %%mm7, %%mm4 \n\t"
02786 "punpckhbw %%mm7, %%mm5 \n\t"
02787
02788 "paddw %%mm0, %%mm0 \n\t"
02789 "paddw %%mm1, %%mm1 \n\t"
02790 "psubw %%mm4, %%mm2 \n\t"
02791 "psubw %%mm5, %%mm3 \n\t"
02792 "psubw %%mm2, %%mm0 \n\t"
02793 "psubw %%mm3, %%mm1 \n\t"
02794
02795 "psllw $2, %%mm2 \n\t"
02796 "psllw $2, %%mm3 \n\t"
02797 "psubw %%mm2, %%mm0 \n\t"
02798 "psubw %%mm3, %%mm1 \n\t"
02799
02800 "movq (%%"REG_a", %1), %%mm2 \n\t"
02801 "movq %%mm2, %%mm3 \n\t"
02802 "punpcklbw %%mm7, %%mm2 \n\t"
02803 "punpckhbw %%mm7, %%mm3 \n\t"
02804
02805 "psubw %%mm2, %%mm0 \n\t"
02806 "psubw %%mm3, %%mm1 \n\t"
02807 "psubw %%mm2, %%mm0 \n\t"
02808 "psubw %%mm3, %%mm1 \n\t"
02809 "movq %%mm0, (%%"REG_c") \n\t"
02810 "movq %%mm1, 8(%%"REG_c") \n\t"
02811
02812 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
02813 "movq %%mm0, %%mm1 \n\t"
02814 "punpcklbw %%mm7, %%mm0 \n\t"
02815 "punpckhbw %%mm7, %%mm1 \n\t"
02816
02817 "psubw %%mm0, %%mm2 \n\t"
02818 "psubw %%mm1, %%mm3 \n\t"
02819 "movq %%mm2, 16(%%"REG_c") \n\t"
02820 "movq %%mm3, 24(%%"REG_c") \n\t"
02821 "paddw %%mm4, %%mm4 \n\t"
02822 "paddw %%mm5, %%mm5 \n\t"
02823 "psubw %%mm2, %%mm4 \n\t"
02824 "psubw %%mm3, %%mm5 \n\t"
02825
02826 "lea (%%"REG_a", %1), %0 \n\t"
02827 "psllw $2, %%mm2 \n\t"
02828 "psllw $2, %%mm3 \n\t"
02829 "psubw %%mm2, %%mm4 \n\t"
02830 "psubw %%mm3, %%mm5 \n\t"
02831
02832 "movq (%0, %1, 2), %%mm2 \n\t"
02833 "movq %%mm2, %%mm3 \n\t"
02834 "punpcklbw %%mm7, %%mm2 \n\t"
02835 "punpckhbw %%mm7, %%mm3 \n\t"
02836 "psubw %%mm2, %%mm4 \n\t"
02837 "psubw %%mm3, %%mm5 \n\t"
02838 "psubw %%mm2, %%mm4 \n\t"
02839 "psubw %%mm3, %%mm5 \n\t"
02840
02841 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02842 "punpcklbw %%mm7, %%mm6 \n\t"
02843 "psubw %%mm6, %%mm2 \n\t"
02844 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
02845 "punpckhbw %%mm7, %%mm6 \n\t"
02846 "psubw %%mm6, %%mm3 \n\t"
02847
02848 "paddw %%mm0, %%mm0 \n\t"
02849 "paddw %%mm1, %%mm1 \n\t"
02850 "psubw %%mm2, %%mm0 \n\t"
02851 "psubw %%mm3, %%mm1 \n\t"
02852
02853 "psllw $2, %%mm2 \n\t"
02854 "psllw $2, %%mm3 \n\t"
02855 "psubw %%mm2, %%mm0 \n\t"
02856 "psubw %%mm3, %%mm1 \n\t"
02857
02858 "movq (%0, %1, 4), %%mm2 \n\t"
02859 "movq %%mm2, %%mm3 \n\t"
02860 "punpcklbw %%mm7, %%mm2 \n\t"
02861 "punpckhbw %%mm7, %%mm3 \n\t"
02862
02863 "paddw %%mm2, %%mm2 \n\t"
02864 "paddw %%mm3, %%mm3 \n\t"
02865 "psubw %%mm2, %%mm0 \n\t"
02866 "psubw %%mm3, %%mm1 \n\t"
02867
02868 "movq (%%"REG_c"), %%mm2 \n\t"
02869 "movq 8(%%"REG_c"), %%mm3 \n\t"
02870
02871 #if HAVE_MMX2
02872 "movq %%mm7, %%mm6 \n\t"
02873 "psubw %%mm0, %%mm6 \n\t"
02874 "pmaxsw %%mm6, %%mm0 \n\t"
02875 "movq %%mm7, %%mm6 \n\t"
02876 "psubw %%mm1, %%mm6 \n\t"
02877 "pmaxsw %%mm6, %%mm1 \n\t"
02878 "movq %%mm7, %%mm6 \n\t"
02879 "psubw %%mm2, %%mm6 \n\t"
02880 "pmaxsw %%mm6, %%mm2 \n\t"
02881 "movq %%mm7, %%mm6 \n\t"
02882 "psubw %%mm3, %%mm6 \n\t"
02883 "pmaxsw %%mm6, %%mm3 \n\t"
02884 #else
02885 "movq %%mm7, %%mm6 \n\t"
02886 "pcmpgtw %%mm0, %%mm6 \n\t"
02887 "pxor %%mm6, %%mm0 \n\t"
02888 "psubw %%mm6, %%mm0 \n\t"
02889 "movq %%mm7, %%mm6 \n\t"
02890 "pcmpgtw %%mm1, %%mm6 \n\t"
02891 "pxor %%mm6, %%mm1 \n\t"
02892 "psubw %%mm6, %%mm1 \n\t"
02893 "movq %%mm7, %%mm6 \n\t"
02894 "pcmpgtw %%mm2, %%mm6 \n\t"
02895 "pxor %%mm6, %%mm2 \n\t"
02896 "psubw %%mm6, %%mm2 \n\t"
02897 "movq %%mm7, %%mm6 \n\t"
02898 "pcmpgtw %%mm3, %%mm6 \n\t"
02899 "pxor %%mm6, %%mm3 \n\t"
02900 "psubw %%mm6, %%mm3 \n\t"
02901 #endif
02902
02903 #if HAVE_MMX2
02904 "pminsw %%mm2, %%mm0 \n\t"
02905 "pminsw %%mm3, %%mm1 \n\t"
02906 #else
02907 "movq %%mm0, %%mm6 \n\t"
02908 "psubusw %%mm2, %%mm6 \n\t"
02909 "psubw %%mm6, %%mm0 \n\t"
02910 "movq %%mm1, %%mm6 \n\t"
02911 "psubusw %%mm3, %%mm6 \n\t"
02912 "psubw %%mm6, %%mm1 \n\t"
02913 #endif
02914
02915 "movd %2, %%mm2 \n\t"
02916 "punpcklbw %%mm7, %%mm2 \n\t"
02917
02918 "movq %%mm7, %%mm6 \n\t"
02919 "pcmpgtw %%mm4, %%mm6 \n\t"
02920 "pxor %%mm6, %%mm4 \n\t"
02921 "psubw %%mm6, %%mm4 \n\t"
02922 "pcmpgtw %%mm5, %%mm7 \n\t"
02923 "pxor %%mm7, %%mm5 \n\t"
02924 "psubw %%mm7, %%mm5 \n\t"
02925
02926 "psllw $3, %%mm2 \n\t"
02927 "movq %%mm2, %%mm3 \n\t"
02928 "pcmpgtw %%mm4, %%mm2 \n\t"
02929 "pcmpgtw %%mm5, %%mm3 \n\t"
02930 "pand %%mm2, %%mm4 \n\t"
02931 "pand %%mm3, %%mm5 \n\t"
02932
02933
02934 "psubusw %%mm0, %%mm4 \n\t"
02935 "psubusw %%mm1, %%mm5 \n\t"
02936
02937
02938 "movq "MANGLE(w05)", %%mm2 \n\t"
02939 "pmullw %%mm2, %%mm4 \n\t"
02940 "pmullw %%mm2, %%mm5 \n\t"
02941 "movq "MANGLE(w20)", %%mm2 \n\t"
02942 "paddw %%mm2, %%mm4 \n\t"
02943 "paddw %%mm2, %%mm5 \n\t"
02944 "psrlw $6, %%mm4 \n\t"
02945 "psrlw $6, %%mm5 \n\t"
02946
02947 "movq 16(%%"REG_c"), %%mm0 \n\t"
02948 "movq 24(%%"REG_c"), %%mm1 \n\t"
02949
02950 "pxor %%mm2, %%mm2 \n\t"
02951 "pxor %%mm3, %%mm3 \n\t"
02952
02953 "pcmpgtw %%mm0, %%mm2 \n\t"
02954 "pcmpgtw %%mm1, %%mm3 \n\t"
02955 "pxor %%mm2, %%mm0 \n\t"
02956 "pxor %%mm3, %%mm1 \n\t"
02957 "psubw %%mm2, %%mm0 \n\t"
02958 "psubw %%mm3, %%mm1 \n\t"
02959 "psrlw $1, %%mm0 \n\t"
02960 "psrlw $1, %%mm1 \n\t"
02961
02962 "pxor %%mm6, %%mm2 \n\t"
02963 "pxor %%mm7, %%mm3 \n\t"
02964 "pand %%mm2, %%mm4 \n\t"
02965 "pand %%mm3, %%mm5 \n\t"
02966
02967 #if HAVE_MMX2
02968 "pminsw %%mm0, %%mm4 \n\t"
02969 "pminsw %%mm1, %%mm5 \n\t"
02970 #else
02971 "movq %%mm4, %%mm2 \n\t"
02972 "psubusw %%mm0, %%mm2 \n\t"
02973 "psubw %%mm2, %%mm4 \n\t"
02974 "movq %%mm5, %%mm2 \n\t"
02975 "psubusw %%mm1, %%mm2 \n\t"
02976 "psubw %%mm2, %%mm5 \n\t"
02977 #endif
02978 "pxor %%mm6, %%mm4 \n\t"
02979 "pxor %%mm7, %%mm5 \n\t"
02980 "psubw %%mm6, %%mm4 \n\t"
02981 "psubw %%mm7, %%mm5 \n\t"
02982 "packsswb %%mm5, %%mm4 \n\t"
02983 "movq %3, %%mm1 \n\t"
02984 "pandn %%mm4, %%mm1 \n\t"
02985 "movq (%0), %%mm0 \n\t"
02986 "paddb %%mm1, %%mm0 \n\t"
02987 "movq %%mm0, (%0) \n\t"
02988 "movq (%0, %1), %%mm0 \n\t"
02989 "psubb %%mm1, %%mm0 \n\t"
02990 "movq %%mm0, (%0, %1) \n\t"
02991
02992 : "+r" (temp_src)
02993 : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask)
02994 : "%"REG_a, "%"REG_c
02995 );
02996 }
02997
02998
02999
03000
03001
03002 }
03003 #endif //HAVE_MMX
03004
03005 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03006 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03007
03012 #undef REAL_SCALED_CPY
03013 #undef SCALED_CPY
03014
03015 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
03016 int levelFix, int64_t *packedOffsetAndScale)
03017 {
03018 #if !HAVE_MMX
03019 int i;
03020 #endif
03021 if(levelFix){
03022 #if HAVE_MMX
03023 __asm__ volatile(
03024 "movq (%%"REG_a"), %%mm2 \n\t"
03025 "movq 8(%%"REG_a"), %%mm3 \n\t"
03026 "lea (%2,%4), %%"REG_a" \n\t"
03027 "lea (%3,%5), %%"REG_d" \n\t"
03028 "pxor %%mm4, %%mm4 \n\t"
03029 #if HAVE_MMX2
03030 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03031 "movq " #src1 ", %%mm0 \n\t"\
03032 "movq " #src1 ", %%mm5 \n\t"\
03033 "movq " #src2 ", %%mm1 \n\t"\
03034 "movq " #src2 ", %%mm6 \n\t"\
03035 "punpcklbw %%mm0, %%mm0 \n\t"\
03036 "punpckhbw %%mm5, %%mm5 \n\t"\
03037 "punpcklbw %%mm1, %%mm1 \n\t"\
03038 "punpckhbw %%mm6, %%mm6 \n\t"\
03039 "pmulhuw %%mm3, %%mm0 \n\t"\
03040 "pmulhuw %%mm3, %%mm5 \n\t"\
03041 "pmulhuw %%mm3, %%mm1 \n\t"\
03042 "pmulhuw %%mm3, %%mm6 \n\t"\
03043 "psubw %%mm2, %%mm0 \n\t"\
03044 "psubw %%mm2, %%mm5 \n\t"\
03045 "psubw %%mm2, %%mm1 \n\t"\
03046 "psubw %%mm2, %%mm6 \n\t"\
03047 "packuswb %%mm5, %%mm0 \n\t"\
03048 "packuswb %%mm6, %%mm1 \n\t"\
03049 "movq %%mm0, " #dst1 " \n\t"\
03050 "movq %%mm1, " #dst2 " \n\t"\
03051
03052 #else //HAVE_MMX2
03053 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03054 "movq " #src1 ", %%mm0 \n\t"\
03055 "movq " #src1 ", %%mm5 \n\t"\
03056 "punpcklbw %%mm4, %%mm0 \n\t"\
03057 "punpckhbw %%mm4, %%mm5 \n\t"\
03058 "psubw %%mm2, %%mm0 \n\t"\
03059 "psubw %%mm2, %%mm5 \n\t"\
03060 "movq " #src2 ", %%mm1 \n\t"\
03061 "psllw $6, %%mm0 \n\t"\
03062 "psllw $6, %%mm5 \n\t"\
03063 "pmulhw %%mm3, %%mm0 \n\t"\
03064 "movq " #src2 ", %%mm6 \n\t"\
03065 "pmulhw %%mm3, %%mm5 \n\t"\
03066 "punpcklbw %%mm4, %%mm1 \n\t"\
03067 "punpckhbw %%mm4, %%mm6 \n\t"\
03068 "psubw %%mm2, %%mm1 \n\t"\
03069 "psubw %%mm2, %%mm6 \n\t"\
03070 "psllw $6, %%mm1 \n\t"\
03071 "psllw $6, %%mm6 \n\t"\
03072 "pmulhw %%mm3, %%mm1 \n\t"\
03073 "pmulhw %%mm3, %%mm6 \n\t"\
03074 "packuswb %%mm5, %%mm0 \n\t"\
03075 "packuswb %%mm6, %%mm1 \n\t"\
03076 "movq %%mm0, " #dst1 " \n\t"\
03077 "movq %%mm1, " #dst2 " \n\t"\
03078
03079 #endif //HAVE_MMX2
03080 #define SCALED_CPY(src1, src2, dst1, dst2)\
03081 REAL_SCALED_CPY(src1, src2, dst1, dst2)
03082
03083 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
03084 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03085 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03086 "lea (%%"REG_a",%4,4), %%"REG_a" \n\t"
03087 "lea (%%"REG_d",%5,4), %%"REG_d" \n\t"
03088 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03089
03090
03091 : "=&a" (packedOffsetAndScale)
03092 : "0" (packedOffsetAndScale),
03093 "r"(src),
03094 "r"(dst),
03095 "r" ((x86_reg)srcStride),
03096 "r" ((x86_reg)dstStride)
03097 : "%"REG_d
03098 );
03099 #else //HAVE_MMX
03100 for(i=0; i<8; i++)
03101 memcpy( &(dst[dstStride*i]),
03102 &(src[srcStride*i]), BLOCK_SIZE);
03103 #endif //HAVE_MMX
03104 }else{
03105 #if HAVE_MMX
03106 __asm__ volatile(
03107 "lea (%0,%2), %%"REG_a" \n\t"
03108 "lea (%1,%3), %%"REG_d" \n\t"
03109
03110 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
03111 "movq " #src1 ", %%mm0 \n\t"\
03112 "movq " #src2 ", %%mm1 \n\t"\
03113 "movq %%mm0, " #dst1 " \n\t"\
03114 "movq %%mm1, " #dst2 " \n\t"\
03115
03116 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03117 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03118
03119 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
03120 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03121 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03122 "lea (%%"REG_a",%2,4), %%"REG_a" \n\t"
03123 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
03124 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03125
03126 : : "r" (src),
03127 "r" (dst),
03128 "r" ((x86_reg)srcStride),
03129 "r" ((x86_reg)dstStride)
03130 : "%"REG_a, "%"REG_d
03131 );
03132 #else //HAVE_MMX
03133 for(i=0; i<8; i++)
03134 memcpy( &(dst[dstStride*i]),
03135 &(src[srcStride*i]), BLOCK_SIZE);
03136 #endif //HAVE_MMX
03137 }
03138 }
03139
03143 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03144 {
03145 #if HAVE_MMX
03146 __asm__ volatile(
03147 "movq (%0), %%mm0 \n\t"
03148 "add %1, %0 \n\t"
03149 "movq %%mm0, (%0) \n\t"
03150 "movq %%mm0, (%0, %1) \n\t"
03151 "movq %%mm0, (%0, %1, 2) \n\t"
03152 : "+r" (src)
03153 : "r" ((x86_reg)-stride)
03154 );
03155 #else
03156 int i;
03157 uint8_t *p=src;
03158 for(i=0; i<3; i++){
03159 p-= stride;
03160 memcpy(p, src, 8);
03161 }
03162 #endif
03163 }
03164
03168 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03169 const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03170 {
03171 DECLARE_ALIGNED(8, PPContext, c)= *c2;
03172 int x,y;
03173 #ifdef COMPILE_TIME_MODE
03174 const int mode= COMPILE_TIME_MODE;
03175 #else
03176 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03177 #endif
03178 int black=0, white=255;
03179 int QPCorrecture= 256*256;
03180
03181 int copyAhead;
03182 #if HAVE_MMX
03183 int i;
03184 #endif
03185
03186 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03187 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03188
03189
03190 uint64_t * const yHistogram= c.yHistogram;
03191 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03192 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03193
03194
03195 #if HAVE_MMX
03196 for(i=0; i<57; i++){
03197 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03198 int threshold= offset*2 + 1;
03199 c.mmxDcOffset[i]= 0x7F - offset;
03200 c.mmxDcThreshold[i]= 0x7F - threshold;
03201 c.mmxDcOffset[i]*= 0x0101010101010101LL;
03202 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03203 }
03204 #endif
03205
03206 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03207 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
03208 || (mode & FFMPEG_DEINT_FILTER)
03209 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03210 else if( (mode & V_DEBLOCK)
03211 || (mode & LINEAR_IPOL_DEINT_FILTER)
03212 || (mode & MEDIAN_DEINT_FILTER)
03213 || (mode & V_A_DEBLOCK)) copyAhead=13;
03214 else if(mode & V_X1_FILTER) copyAhead=11;
03215
03216 else if(mode & DERING) copyAhead=9;
03217 else copyAhead=8;
03218
03219 copyAhead-= 8;
03220
03221 if(!isColor){
03222 uint64_t sum= 0;
03223 int i;
03224 uint64_t maxClipped;
03225 uint64_t clipped;
03226 double scale;
03227
03228 c.frameNum++;
03229
03230 if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
03231
03232 for(i=0; i<256; i++){
03233 sum+= yHistogram[i];
03234 }
03235
03236
03237 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03238
03239 clipped= sum;
03240 for(black=255; black>0; black--){
03241 if(clipped < maxClipped) break;
03242 clipped-= yHistogram[black];
03243 }
03244
03245 clipped= sum;
03246 for(white=0; white<256; white++){
03247 if(clipped < maxClipped) break;
03248 clipped-= yHistogram[white];
03249 }
03250
03251 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03252
03253 #if HAVE_MMX2
03254 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03255 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03256 #else
03257 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03258 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03259 #endif
03260
03261 c.packedYOffset|= c.packedYOffset<<32;
03262 c.packedYOffset|= c.packedYOffset<<16;
03263
03264 c.packedYScale|= c.packedYScale<<32;
03265 c.packedYScale|= c.packedYScale<<16;
03266
03267 if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
03268 else QPCorrecture= 256*256;
03269 }else{
03270 c.packedYScale= 0x0100010001000100LL;
03271 c.packedYOffset= 0;
03272 QPCorrecture= 256*256;
03273 }
03274
03275
03276 y=-BLOCK_SIZE;
03277 {
03278 const uint8_t *srcBlock= &(src[y*srcStride]);
03279 uint8_t *dstBlock= tempDst + dstStride;
03280
03281
03282
03283
03284 for(x=0; x<width; x+=BLOCK_SIZE){
03285
03286 #if HAVE_MMX2
03287
03288
03289
03290
03291
03292
03293
03294 __asm__(
03295 "mov %4, %%"REG_a" \n\t"
03296 "shr $2, %%"REG_a" \n\t"
03297 "and $6, %%"REG_a" \n\t"
03298 "add %5, %%"REG_a" \n\t"
03299 "mov %%"REG_a", %%"REG_d" \n\t"
03300 "imul %1, %%"REG_a" \n\t"
03301 "imul %3, %%"REG_d" \n\t"
03302 "prefetchnta 32(%%"REG_a", %0) \n\t"
03303 "prefetcht0 32(%%"REG_d", %2) \n\t"
03304 "add %1, %%"REG_a" \n\t"
03305 "add %3, %%"REG_d" \n\t"
03306 "prefetchnta 32(%%"REG_a", %0) \n\t"
03307 "prefetcht0 32(%%"REG_d", %2) \n\t"
03308 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03309 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03310 : "%"REG_a, "%"REG_d
03311 );
03312
03313 #elif HAVE_AMD3DNOW
03314
03315
03316
03317
03318
03319
03320 #endif
03321
03322 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03323 srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03324
03325 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03326
03327 if(mode & LINEAR_IPOL_DEINT_FILTER)
03328 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03329 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03330 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03331 else if(mode & MEDIAN_DEINT_FILTER)
03332 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03333 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03334 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03335 else if(mode & FFMPEG_DEINT_FILTER)
03336 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03337 else if(mode & LOWPASS5_DEINT_FILTER)
03338 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03339
03340
03341
03342 dstBlock+=8;
03343 srcBlock+=8;
03344 }
03345 if(width==FFABS(dstStride))
03346 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03347 else{
03348 int i;
03349 for(i=0; i<copyAhead; i++){
03350 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03351 }
03352 }
03353 }
03354
03355 for(y=0; y<height; y+=BLOCK_SIZE){
03356
03357 const uint8_t *srcBlock= &(src[y*srcStride]);
03358 uint8_t *dstBlock= &(dst[y*dstStride]);
03359 #if HAVE_MMX
03360 uint8_t *tempBlock1= c.tempBlocks;
03361 uint8_t *tempBlock2= c.tempBlocks + 8;
03362 #endif
03363 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03364 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
03365 int QP=0;
03366
03367
03368 if(y+15 >= height){
03369 int i;
03370
03371
03372 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03373 FFMAX(height-y-copyAhead, 0), srcStride);
03374
03375
03376 for(i=FFMAX(height-y, 8); i<copyAhead+8; i++)
03377 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), FFABS(srcStride));
03378
03379
03380 linecpy(tempDst, dstBlock - dstStride, FFMIN(height-y+1, copyAhead+1), dstStride);
03381
03382
03383 for(i=height-y+1; i<=copyAhead; i++)
03384 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), FFABS(dstStride));
03385
03386 dstBlock= tempDst + dstStride;
03387 srcBlock= tempSrc;
03388 }
03389
03390
03391
03392
03393 for(x=0; x<width; x+=BLOCK_SIZE){
03394 const int stride= dstStride;
03395 #if HAVE_MMX
03396 uint8_t *tmpXchg;
03397 #endif
03398 if(isColor){
03399 QP= QPptr[x>>qpHShift];
03400 c.nonBQP= nonBQPptr[x>>qpHShift];
03401 }else{
03402 QP= QPptr[x>>4];
03403 QP= (QP* QPCorrecture + 256*128)>>16;
03404 c.nonBQP= nonBQPptr[x>>4];
03405 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03406 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03407 }
03408 c.QP= QP;
03409 #if HAVE_MMX
03410 __asm__ volatile(
03411 "movd %1, %%mm7 \n\t"
03412 "packuswb %%mm7, %%mm7 \n\t"
03413 "packuswb %%mm7, %%mm7 \n\t"
03414 "packuswb %%mm7, %%mm7 \n\t"
03415 "movq %%mm7, %0 \n\t"
03416 : "=m" (c.pQPb)
03417 : "r" (QP)
03418 );
03419 #endif
03420
03421
03422 #if HAVE_MMX2
03423
03424
03425
03426
03427
03428
03429
03430 __asm__(
03431 "mov %4, %%"REG_a" \n\t"
03432 "shr $2, %%"REG_a" \n\t"
03433 "and $6, %%"REG_a" \n\t"
03434 "add %5, %%"REG_a" \n\t"
03435 "mov %%"REG_a", %%"REG_d" \n\t"
03436 "imul %1, %%"REG_a" \n\t"
03437 "imul %3, %%"REG_d" \n\t"
03438 "prefetchnta 32(%%"REG_a", %0) \n\t"
03439 "prefetcht0 32(%%"REG_d", %2) \n\t"
03440 "add %1, %%"REG_a" \n\t"
03441 "add %3, %%"REG_d" \n\t"
03442 "prefetchnta 32(%%"REG_a", %0) \n\t"
03443 "prefetcht0 32(%%"REG_d", %2) \n\t"
03444 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
03445 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
03446 : "%"REG_a, "%"REG_d
03447 );
03448
03449 #elif HAVE_AMD3DNOW
03450
03451
03452
03453
03454
03455
03456 #endif
03457
03458 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03459 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03460
03461 if(mode & LINEAR_IPOL_DEINT_FILTER)
03462 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03463 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03464 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03465 else if(mode & MEDIAN_DEINT_FILTER)
03466 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03467 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03468 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03469 else if(mode & FFMPEG_DEINT_FILTER)
03470 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03471 else if(mode & LOWPASS5_DEINT_FILTER)
03472 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03473
03474
03475
03476
03477
03478 if(y + 8 < height){
03479 if(mode & V_X1_FILTER)
03480 RENAME(vertX1Filter)(dstBlock, stride, &c);
03481 else if(mode & V_DEBLOCK){
03482 const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03483
03484 if(t==1)
03485 RENAME(doVertLowPass)(dstBlock, stride, &c);
03486 else if(t==2)
03487 RENAME(doVertDefFilter)(dstBlock, stride, &c);
03488 }else if(mode & V_A_DEBLOCK){
03489 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03490 }
03491 }
03492
03493 #if HAVE_MMX
03494 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03495 #endif
03496
03497 if(x - 8 >= 0){
03498 #if HAVE_MMX
03499 if(mode & H_X1_FILTER)
03500 RENAME(vertX1Filter)(tempBlock1, 16, &c);
03501 else if(mode & H_DEBLOCK){
03502
03503 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03504
03505 if(t==1)
03506 RENAME(doVertLowPass)(tempBlock1, 16, &c);
03507 else if(t==2)
03508 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03509 }else if(mode & H_A_DEBLOCK){
03510 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03511 }
03512
03513 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03514
03515 #else
03516 if(mode & H_X1_FILTER)
03517 horizX1Filter(dstBlock-4, stride, QP);
03518 else if(mode & H_DEBLOCK){
03519 #if HAVE_ALTIVEC
03520 DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
03521 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03522
03523 const int t=vertClassify_altivec(tempBlock-48, 16, &c);
03524 if(t==1) {
03525 doVertLowPass_altivec(tempBlock-48, 16, &c);
03526 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03527 }
03528 else if(t==2) {
03529 doVertDefFilter_altivec(tempBlock-48, 16, &c);
03530 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03531 }
03532 #else
03533 const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03534
03535 if(t==1)
03536 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03537 else if(t==2)
03538 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03539 #endif
03540 }else if(mode & H_A_DEBLOCK){
03541 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03542 }
03543 #endif //HAVE_MMX
03544 if(mode & DERING){
03545
03546 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03547 }
03548
03549 if(mode & TEMP_NOISE_FILTER)
03550 {
03551 RENAME(tempNoiseReducer)(dstBlock-8, stride,
03552 c.tempBlurred[isColor] + y*dstStride + x,
03553 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03554 c.ppMode.maxTmpNoise);
03555 }
03556 }
03557
03558 dstBlock+=8;
03559 srcBlock+=8;
03560
03561 #if HAVE_MMX
03562 tmpXchg= tempBlock1;
03563 tempBlock1= tempBlock2;
03564 tempBlock2 = tmpXchg;
03565 #endif
03566 }
03567
03568 if(mode & DERING){
03569 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03570 }
03571
03572 if((mode & TEMP_NOISE_FILTER)){
03573 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03574 c.tempBlurred[isColor] + y*dstStride + x,
03575 c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
03576 c.ppMode.maxTmpNoise);
03577 }
03578
03579
03580 if(y+15 >= height){
03581 uint8_t *dstBlock= &(dst[y*dstStride]);
03582 if(width==FFABS(dstStride))
03583 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03584 else{
03585 int i;
03586 for(i=0; i<height-y; i++){
03587 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03588 }
03589 }
03590 }
03591
03592
03593
03594
03595
03596
03597
03598
03599
03600 }
03601 #if HAVE_AMD3DNOW
03602 __asm__ volatile("femms");
03603 #elif HAVE_MMX
03604 __asm__ volatile("emms");
03605 #endif
03606
03607 #ifdef DEBUG_BRIGHTNESS
03608 if(!isColor){
03609 int max=1;
03610 int i;
03611 for(i=0; i<256; i++)
03612 if(yHistogram[i] > max) max=yHistogram[i];
03613
03614 for(i=1; i<256; i++){
03615 int x;
03616 int start=yHistogram[i-1]/(max/256+1);
03617 int end=yHistogram[i]/(max/256+1);
03618 int inc= end > start ? 1 : -1;
03619 for(x=start; x!=end+inc; x+=inc)
03620 dst[ i*dstStride + x]+=128;
03621 }
03622
03623 for(i=0; i<100; i+=2){
03624 dst[ (white)*dstStride + i]+=128;
03625 dst[ (black)*dstStride + i]+=128;
03626 }
03627 }
03628 #endif
03629
03630 *c2= c;
03631
03632 }