00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <stddef.h>
00028
00029 #undef PREFETCH
00030 #undef MOVNTQ
00031 #undef EMMS
00032 #undef SFENCE
00033 #undef MMREG_SIZE
00034 #undef PAVGB
00035
00036 #if HAVE_SSE2
00037 #define MMREG_SIZE 16
00038 #else
00039 #define MMREG_SIZE 8
00040 #endif
00041
00042 #if HAVE_AMD3DNOW
00043 #define PREFETCH "prefetch"
00044 #define PAVGB "pavgusb"
00045 #elif HAVE_MMX2
00046 #define PREFETCH "prefetchnta"
00047 #define PAVGB "pavgb"
00048 #else
00049 #define PREFETCH " # nop"
00050 #endif
00051
00052 #if HAVE_AMD3DNOW
00053
00054 #define EMMS "femms"
00055 #else
00056 #define EMMS "emms"
00057 #endif
00058
00059 #if HAVE_MMX2
00060 #define MOVNTQ "movntq"
00061 #define SFENCE "sfence"
00062 #else
00063 #define MOVNTQ "movq"
00064 #define SFENCE " # nop"
00065 #endif
00066
00067 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
00068 {
00069 uint8_t *dest = dst;
00070 const uint8_t *s = src;
00071 const uint8_t *end;
00072 #if HAVE_MMX
00073 const uint8_t *mm_end;
00074 #endif
00075 end = s + src_size;
00076 #if HAVE_MMX
00077 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00078 mm_end = end - 23;
00079 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
00080 while (s < mm_end) {
00081 __asm__ volatile(
00082 PREFETCH" 32%1 \n\t"
00083 "movd %1, %%mm0 \n\t"
00084 "punpckldq 3%1, %%mm0 \n\t"
00085 "movd 6%1, %%mm1 \n\t"
00086 "punpckldq 9%1, %%mm1 \n\t"
00087 "movd 12%1, %%mm2 \n\t"
00088 "punpckldq 15%1, %%mm2 \n\t"
00089 "movd 18%1, %%mm3 \n\t"
00090 "punpckldq 21%1, %%mm3 \n\t"
00091 "por %%mm7, %%mm0 \n\t"
00092 "por %%mm7, %%mm1 \n\t"
00093 "por %%mm7, %%mm2 \n\t"
00094 "por %%mm7, %%mm3 \n\t"
00095 MOVNTQ" %%mm0, %0 \n\t"
00096 MOVNTQ" %%mm1, 8%0 \n\t"
00097 MOVNTQ" %%mm2, 16%0 \n\t"
00098 MOVNTQ" %%mm3, 24%0"
00099 :"=m"(*dest)
00100 :"m"(*s)
00101 :"memory");
00102 dest += 32;
00103 s += 24;
00104 }
00105 __asm__ volatile(SFENCE:::"memory");
00106 __asm__ volatile(EMMS:::"memory");
00107 #endif
00108 while (s < end) {
00109 #if HAVE_BIGENDIAN
00110
00111 *dest++ = 255;
00112 *dest++ = s[2];
00113 *dest++ = s[1];
00114 *dest++ = s[0];
00115 s+=3;
00116 #else
00117 *dest++ = *s++;
00118 *dest++ = *s++;
00119 *dest++ = *s++;
00120 *dest++ = 255;
00121 #endif
00122 }
00123 }
00124
00125 #define STORE_BGR24_MMX \
00126 "psrlq $8, %%mm2 \n\t" \
00127 "psrlq $8, %%mm3 \n\t" \
00128 "psrlq $8, %%mm6 \n\t" \
00129 "psrlq $8, %%mm7 \n\t" \
00130 "pand "MANGLE(mask24l)", %%mm0\n\t" \
00131 "pand "MANGLE(mask24l)", %%mm1\n\t" \
00132 "pand "MANGLE(mask24l)", %%mm4\n\t" \
00133 "pand "MANGLE(mask24l)", %%mm5\n\t" \
00134 "pand "MANGLE(mask24h)", %%mm2\n\t" \
00135 "pand "MANGLE(mask24h)", %%mm3\n\t" \
00136 "pand "MANGLE(mask24h)", %%mm6\n\t" \
00137 "pand "MANGLE(mask24h)", %%mm7\n\t" \
00138 "por %%mm2, %%mm0 \n\t" \
00139 "por %%mm3, %%mm1 \n\t" \
00140 "por %%mm6, %%mm4 \n\t" \
00141 "por %%mm7, %%mm5 \n\t" \
00142 \
00143 "movq %%mm1, %%mm2 \n\t" \
00144 "movq %%mm4, %%mm3 \n\t" \
00145 "psllq $48, %%mm2 \n\t" \
00146 "psllq $32, %%mm3 \n\t" \
00147 "pand "MANGLE(mask24hh)", %%mm2\n\t" \
00148 "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
00149 "por %%mm2, %%mm0 \n\t" \
00150 "psrlq $16, %%mm1 \n\t" \
00151 "psrlq $32, %%mm4 \n\t" \
00152 "psllq $16, %%mm5 \n\t" \
00153 "por %%mm3, %%mm1 \n\t" \
00154 "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
00155 "por %%mm5, %%mm4 \n\t" \
00156 \
00157 MOVNTQ" %%mm0, %0 \n\t" \
00158 MOVNTQ" %%mm1, 8%0 \n\t" \
00159 MOVNTQ" %%mm4, 16%0"
00160
00161
00162 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00163 {
00164 uint8_t *dest = dst;
00165 const uint8_t *s = src;
00166 const uint8_t *end;
00167 #if HAVE_MMX
00168 const uint8_t *mm_end;
00169 #endif
00170 end = s + src_size;
00171 #if HAVE_MMX
00172 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00173 mm_end = end - 31;
00174 while (s < mm_end) {
00175 __asm__ volatile(
00176 PREFETCH" 32%1 \n\t"
00177 "movq %1, %%mm0 \n\t"
00178 "movq 8%1, %%mm1 \n\t"
00179 "movq 16%1, %%mm4 \n\t"
00180 "movq 24%1, %%mm5 \n\t"
00181 "movq %%mm0, %%mm2 \n\t"
00182 "movq %%mm1, %%mm3 \n\t"
00183 "movq %%mm4, %%mm6 \n\t"
00184 "movq %%mm5, %%mm7 \n\t"
00185 STORE_BGR24_MMX
00186 :"=m"(*dest)
00187 :"m"(*s)
00188 :"memory");
00189 dest += 24;
00190 s += 32;
00191 }
00192 __asm__ volatile(SFENCE:::"memory");
00193 __asm__ volatile(EMMS:::"memory");
00194 #endif
00195 while (s < end) {
00196 #if HAVE_BIGENDIAN
00197
00198 s++;
00199 dest[2] = *s++;
00200 dest[1] = *s++;
00201 dest[0] = *s++;
00202 dest += 3;
00203 #else
00204 *dest++ = *s++;
00205 *dest++ = *s++;
00206 *dest++ = *s++;
00207 s++;
00208 #endif
00209 }
00210 }
00211
00212
00213
00214
00215
00216
00217
00218 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
00219 {
00220 register const uint8_t* s=src;
00221 register uint8_t* d=dst;
00222 register const uint8_t *end;
00223 const uint8_t *mm_end;
00224 end = s + src_size;
00225 #if HAVE_MMX
00226 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00227 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
00228 mm_end = end - 15;
00229 while (s<mm_end) {
00230 __asm__ volatile(
00231 PREFETCH" 32%1 \n\t"
00232 "movq %1, %%mm0 \n\t"
00233 "movq 8%1, %%mm2 \n\t"
00234 "movq %%mm0, %%mm1 \n\t"
00235 "movq %%mm2, %%mm3 \n\t"
00236 "pand %%mm4, %%mm0 \n\t"
00237 "pand %%mm4, %%mm2 \n\t"
00238 "paddw %%mm1, %%mm0 \n\t"
00239 "paddw %%mm3, %%mm2 \n\t"
00240 MOVNTQ" %%mm0, %0 \n\t"
00241 MOVNTQ" %%mm2, 8%0"
00242 :"=m"(*d)
00243 :"m"(*s)
00244 );
00245 d+=16;
00246 s+=16;
00247 }
00248 __asm__ volatile(SFENCE:::"memory");
00249 __asm__ volatile(EMMS:::"memory");
00250 #endif
00251 mm_end = end - 3;
00252 while (s < mm_end) {
00253 register unsigned x= *((const uint32_t *)s);
00254 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00255 d+=4;
00256 s+=4;
00257 }
00258 if (s < end) {
00259 register unsigned short x= *((const uint16_t *)s);
00260 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00261 }
00262 }
00263
00264 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
00265 {
00266 register const uint8_t* s=src;
00267 register uint8_t* d=dst;
00268 register const uint8_t *end;
00269 const uint8_t *mm_end;
00270 end = s + src_size;
00271 #if HAVE_MMX
00272 __asm__ volatile(PREFETCH" %0"::"m"(*s));
00273 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
00274 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
00275 mm_end = end - 15;
00276 while (s<mm_end) {
00277 __asm__ volatile(
00278 PREFETCH" 32%1 \n\t"
00279 "movq %1, %%mm0 \n\t"
00280 "movq 8%1, %%mm2 \n\t"
00281 "movq %%mm0, %%mm1 \n\t"
00282 "movq %%mm2, %%mm3 \n\t"
00283 "psrlq $1, %%mm0 \n\t"
00284 "psrlq $1, %%mm2 \n\t"
00285 "pand %%mm7, %%mm0 \n\t"
00286 "pand %%mm7, %%mm2 \n\t"
00287 "pand %%mm6, %%mm1 \n\t"
00288 "pand %%mm6, %%mm3 \n\t"
00289 "por %%mm1, %%mm0 \n\t"
00290 "por %%mm3, %%mm2 \n\t"
00291 MOVNTQ" %%mm0, %0 \n\t"
00292 MOVNTQ" %%mm2, 8%0"
00293 :"=m"(*d)
00294 :"m"(*s)
00295 );
00296 d+=16;
00297 s+=16;
00298 }
00299 __asm__ volatile(SFENCE:::"memory");
00300 __asm__ volatile(EMMS:::"memory");
00301 #endif
00302 mm_end = end - 3;
00303 while (s < mm_end) {
00304 register uint32_t x= *((const uint32_t*)s);
00305 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00306 s+=4;
00307 d+=4;
00308 }
00309 if (s < end) {
00310 register uint16_t x= *((const uint16_t*)s);
00311 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00312 }
00313 }
00314
00315 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
00316 {
00317 const uint8_t *s = src;
00318 const uint8_t *end;
00319 #if HAVE_MMX
00320 const uint8_t *mm_end;
00321 #endif
00322 uint16_t *d = (uint16_t *)dst;
00323 end = s + src_size;
00324 #if HAVE_MMX
00325 mm_end = end - 15;
00326 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00327 __asm__ volatile(
00328 "movq %3, %%mm5 \n\t"
00329 "movq %4, %%mm6 \n\t"
00330 "movq %5, %%mm7 \n\t"
00331 "jmp 2f \n\t"
00332 ASMALIGN(4)
00333 "1: \n\t"
00334 PREFETCH" 32(%1) \n\t"
00335 "movd (%1), %%mm0 \n\t"
00336 "movd 4(%1), %%mm3 \n\t"
00337 "punpckldq 8(%1), %%mm0 \n\t"
00338 "punpckldq 12(%1), %%mm3 \n\t"
00339 "movq %%mm0, %%mm1 \n\t"
00340 "movq %%mm3, %%mm4 \n\t"
00341 "pand %%mm6, %%mm0 \n\t"
00342 "pand %%mm6, %%mm3 \n\t"
00343 "pmaddwd %%mm7, %%mm0 \n\t"
00344 "pmaddwd %%mm7, %%mm3 \n\t"
00345 "pand %%mm5, %%mm1 \n\t"
00346 "pand %%mm5, %%mm4 \n\t"
00347 "por %%mm1, %%mm0 \n\t"
00348 "por %%mm4, %%mm3 \n\t"
00349 "psrld $5, %%mm0 \n\t"
00350 "pslld $11, %%mm3 \n\t"
00351 "por %%mm3, %%mm0 \n\t"
00352 MOVNTQ" %%mm0, (%0) \n\t"
00353 "add $16, %1 \n\t"
00354 "add $8, %0 \n\t"
00355 "2: \n\t"
00356 "cmp %2, %1 \n\t"
00357 " jb 1b \n\t"
00358 : "+r" (d), "+r"(s)
00359 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00360 );
00361 #else
00362 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00363 __asm__ volatile(
00364 "movq %0, %%mm7 \n\t"
00365 "movq %1, %%mm6 \n\t"
00366 ::"m"(red_16mask),"m"(green_16mask));
00367 while (s < mm_end) {
00368 __asm__ volatile(
00369 PREFETCH" 32%1 \n\t"
00370 "movd %1, %%mm0 \n\t"
00371 "movd 4%1, %%mm3 \n\t"
00372 "punpckldq 8%1, %%mm0 \n\t"
00373 "punpckldq 12%1, %%mm3 \n\t"
00374 "movq %%mm0, %%mm1 \n\t"
00375 "movq %%mm0, %%mm2 \n\t"
00376 "movq %%mm3, %%mm4 \n\t"
00377 "movq %%mm3, %%mm5 \n\t"
00378 "psrlq $3, %%mm0 \n\t"
00379 "psrlq $3, %%mm3 \n\t"
00380 "pand %2, %%mm0 \n\t"
00381 "pand %2, %%mm3 \n\t"
00382 "psrlq $5, %%mm1 \n\t"
00383 "psrlq $5, %%mm4 \n\t"
00384 "pand %%mm6, %%mm1 \n\t"
00385 "pand %%mm6, %%mm4 \n\t"
00386 "psrlq $8, %%mm2 \n\t"
00387 "psrlq $8, %%mm5 \n\t"
00388 "pand %%mm7, %%mm2 \n\t"
00389 "pand %%mm7, %%mm5 \n\t"
00390 "por %%mm1, %%mm0 \n\t"
00391 "por %%mm4, %%mm3 \n\t"
00392 "por %%mm2, %%mm0 \n\t"
00393 "por %%mm5, %%mm3 \n\t"
00394 "psllq $16, %%mm3 \n\t"
00395 "por %%mm3, %%mm0 \n\t"
00396 MOVNTQ" %%mm0, %0 \n\t"
00397 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00398 d += 4;
00399 s += 16;
00400 }
00401 #endif
00402 __asm__ volatile(SFENCE:::"memory");
00403 __asm__ volatile(EMMS:::"memory");
00404 #endif
00405 while (s < end) {
00406 register int rgb = *(const uint32_t*)s; s += 4;
00407 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00408 }
00409 }
00410
00411 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00412 {
00413 const uint8_t *s = src;
00414 const uint8_t *end;
00415 #if HAVE_MMX
00416 const uint8_t *mm_end;
00417 #endif
00418 uint16_t *d = (uint16_t *)dst;
00419 end = s + src_size;
00420 #if HAVE_MMX
00421 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00422 __asm__ volatile(
00423 "movq %0, %%mm7 \n\t"
00424 "movq %1, %%mm6 \n\t"
00425 ::"m"(red_16mask),"m"(green_16mask));
00426 mm_end = end - 15;
00427 while (s < mm_end) {
00428 __asm__ volatile(
00429 PREFETCH" 32%1 \n\t"
00430 "movd %1, %%mm0 \n\t"
00431 "movd 4%1, %%mm3 \n\t"
00432 "punpckldq 8%1, %%mm0 \n\t"
00433 "punpckldq 12%1, %%mm3 \n\t"
00434 "movq %%mm0, %%mm1 \n\t"
00435 "movq %%mm0, %%mm2 \n\t"
00436 "movq %%mm3, %%mm4 \n\t"
00437 "movq %%mm3, %%mm5 \n\t"
00438 "psllq $8, %%mm0 \n\t"
00439 "psllq $8, %%mm3 \n\t"
00440 "pand %%mm7, %%mm0 \n\t"
00441 "pand %%mm7, %%mm3 \n\t"
00442 "psrlq $5, %%mm1 \n\t"
00443 "psrlq $5, %%mm4 \n\t"
00444 "pand %%mm6, %%mm1 \n\t"
00445 "pand %%mm6, %%mm4 \n\t"
00446 "psrlq $19, %%mm2 \n\t"
00447 "psrlq $19, %%mm5 \n\t"
00448 "pand %2, %%mm2 \n\t"
00449 "pand %2, %%mm5 \n\t"
00450 "por %%mm1, %%mm0 \n\t"
00451 "por %%mm4, %%mm3 \n\t"
00452 "por %%mm2, %%mm0 \n\t"
00453 "por %%mm5, %%mm3 \n\t"
00454 "psllq $16, %%mm3 \n\t"
00455 "por %%mm3, %%mm0 \n\t"
00456 MOVNTQ" %%mm0, %0 \n\t"
00457 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00458 d += 4;
00459 s += 16;
00460 }
00461 __asm__ volatile(SFENCE:::"memory");
00462 __asm__ volatile(EMMS:::"memory");
00463 #endif
00464 while (s < end) {
00465 register int rgb = *(const uint32_t*)s; s += 4;
00466 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00467 }
00468 }
00469
00470 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
00471 {
00472 const uint8_t *s = src;
00473 const uint8_t *end;
00474 #if HAVE_MMX
00475 const uint8_t *mm_end;
00476 #endif
00477 uint16_t *d = (uint16_t *)dst;
00478 end = s + src_size;
00479 #if HAVE_MMX
00480 mm_end = end - 15;
00481 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00482 __asm__ volatile(
00483 "movq %3, %%mm5 \n\t"
00484 "movq %4, %%mm6 \n\t"
00485 "movq %5, %%mm7 \n\t"
00486 "jmp 2f \n\t"
00487 ASMALIGN(4)
00488 "1: \n\t"
00489 PREFETCH" 32(%1) \n\t"
00490 "movd (%1), %%mm0 \n\t"
00491 "movd 4(%1), %%mm3 \n\t"
00492 "punpckldq 8(%1), %%mm0 \n\t"
00493 "punpckldq 12(%1), %%mm3 \n\t"
00494 "movq %%mm0, %%mm1 \n\t"
00495 "movq %%mm3, %%mm4 \n\t"
00496 "pand %%mm6, %%mm0 \n\t"
00497 "pand %%mm6, %%mm3 \n\t"
00498 "pmaddwd %%mm7, %%mm0 \n\t"
00499 "pmaddwd %%mm7, %%mm3 \n\t"
00500 "pand %%mm5, %%mm1 \n\t"
00501 "pand %%mm5, %%mm4 \n\t"
00502 "por %%mm1, %%mm0 \n\t"
00503 "por %%mm4, %%mm3 \n\t"
00504 "psrld $6, %%mm0 \n\t"
00505 "pslld $10, %%mm3 \n\t"
00506 "por %%mm3, %%mm0 \n\t"
00507 MOVNTQ" %%mm0, (%0) \n\t"
00508 "add $16, %1 \n\t"
00509 "add $8, %0 \n\t"
00510 "2: \n\t"
00511 "cmp %2, %1 \n\t"
00512 " jb 1b \n\t"
00513 : "+r" (d), "+r"(s)
00514 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00515 );
00516 #else
00517 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00518 __asm__ volatile(
00519 "movq %0, %%mm7 \n\t"
00520 "movq %1, %%mm6 \n\t"
00521 ::"m"(red_15mask),"m"(green_15mask));
00522 while (s < mm_end) {
00523 __asm__ volatile(
00524 PREFETCH" 32%1 \n\t"
00525 "movd %1, %%mm0 \n\t"
00526 "movd 4%1, %%mm3 \n\t"
00527 "punpckldq 8%1, %%mm0 \n\t"
00528 "punpckldq 12%1, %%mm3 \n\t"
00529 "movq %%mm0, %%mm1 \n\t"
00530 "movq %%mm0, %%mm2 \n\t"
00531 "movq %%mm3, %%mm4 \n\t"
00532 "movq %%mm3, %%mm5 \n\t"
00533 "psrlq $3, %%mm0 \n\t"
00534 "psrlq $3, %%mm3 \n\t"
00535 "pand %2, %%mm0 \n\t"
00536 "pand %2, %%mm3 \n\t"
00537 "psrlq $6, %%mm1 \n\t"
00538 "psrlq $6, %%mm4 \n\t"
00539 "pand %%mm6, %%mm1 \n\t"
00540 "pand %%mm6, %%mm4 \n\t"
00541 "psrlq $9, %%mm2 \n\t"
00542 "psrlq $9, %%mm5 \n\t"
00543 "pand %%mm7, %%mm2 \n\t"
00544 "pand %%mm7, %%mm5 \n\t"
00545 "por %%mm1, %%mm0 \n\t"
00546 "por %%mm4, %%mm3 \n\t"
00547 "por %%mm2, %%mm0 \n\t"
00548 "por %%mm5, %%mm3 \n\t"
00549 "psllq $16, %%mm3 \n\t"
00550 "por %%mm3, %%mm0 \n\t"
00551 MOVNTQ" %%mm0, %0 \n\t"
00552 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00553 d += 4;
00554 s += 16;
00555 }
00556 #endif
00557 __asm__ volatile(SFENCE:::"memory");
00558 __asm__ volatile(EMMS:::"memory");
00559 #endif
00560 while (s < end) {
00561 register int rgb = *(const uint32_t*)s; s += 4;
00562 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00563 }
00564 }
00565
00566 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00567 {
00568 const uint8_t *s = src;
00569 const uint8_t *end;
00570 #if HAVE_MMX
00571 const uint8_t *mm_end;
00572 #endif
00573 uint16_t *d = (uint16_t *)dst;
00574 end = s + src_size;
00575 #if HAVE_MMX
00576 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00577 __asm__ volatile(
00578 "movq %0, %%mm7 \n\t"
00579 "movq %1, %%mm6 \n\t"
00580 ::"m"(red_15mask),"m"(green_15mask));
00581 mm_end = end - 15;
00582 while (s < mm_end) {
00583 __asm__ volatile(
00584 PREFETCH" 32%1 \n\t"
00585 "movd %1, %%mm0 \n\t"
00586 "movd 4%1, %%mm3 \n\t"
00587 "punpckldq 8%1, %%mm0 \n\t"
00588 "punpckldq 12%1, %%mm3 \n\t"
00589 "movq %%mm0, %%mm1 \n\t"
00590 "movq %%mm0, %%mm2 \n\t"
00591 "movq %%mm3, %%mm4 \n\t"
00592 "movq %%mm3, %%mm5 \n\t"
00593 "psllq $7, %%mm0 \n\t"
00594 "psllq $7, %%mm3 \n\t"
00595 "pand %%mm7, %%mm0 \n\t"
00596 "pand %%mm7, %%mm3 \n\t"
00597 "psrlq $6, %%mm1 \n\t"
00598 "psrlq $6, %%mm4 \n\t"
00599 "pand %%mm6, %%mm1 \n\t"
00600 "pand %%mm6, %%mm4 \n\t"
00601 "psrlq $19, %%mm2 \n\t"
00602 "psrlq $19, %%mm5 \n\t"
00603 "pand %2, %%mm2 \n\t"
00604 "pand %2, %%mm5 \n\t"
00605 "por %%mm1, %%mm0 \n\t"
00606 "por %%mm4, %%mm3 \n\t"
00607 "por %%mm2, %%mm0 \n\t"
00608 "por %%mm5, %%mm3 \n\t"
00609 "psllq $16, %%mm3 \n\t"
00610 "por %%mm3, %%mm0 \n\t"
00611 MOVNTQ" %%mm0, %0 \n\t"
00612 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00613 d += 4;
00614 s += 16;
00615 }
00616 __asm__ volatile(SFENCE:::"memory");
00617 __asm__ volatile(EMMS:::"memory");
00618 #endif
00619 while (s < end) {
00620 register int rgb = *(const uint32_t*)s; s += 4;
00621 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00622 }
00623 }
00624
00625 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00626 {
00627 const uint8_t *s = src;
00628 const uint8_t *end;
00629 #if HAVE_MMX
00630 const uint8_t *mm_end;
00631 #endif
00632 uint16_t *d = (uint16_t *)dst;
00633 end = s + src_size;
00634 #if HAVE_MMX
00635 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00636 __asm__ volatile(
00637 "movq %0, %%mm7 \n\t"
00638 "movq %1, %%mm6 \n\t"
00639 ::"m"(red_16mask),"m"(green_16mask));
00640 mm_end = end - 11;
00641 while (s < mm_end) {
00642 __asm__ volatile(
00643 PREFETCH" 32%1 \n\t"
00644 "movd %1, %%mm0 \n\t"
00645 "movd 3%1, %%mm3 \n\t"
00646 "punpckldq 6%1, %%mm0 \n\t"
00647 "punpckldq 9%1, %%mm3 \n\t"
00648 "movq %%mm0, %%mm1 \n\t"
00649 "movq %%mm0, %%mm2 \n\t"
00650 "movq %%mm3, %%mm4 \n\t"
00651 "movq %%mm3, %%mm5 \n\t"
00652 "psrlq $3, %%mm0 \n\t"
00653 "psrlq $3, %%mm3 \n\t"
00654 "pand %2, %%mm0 \n\t"
00655 "pand %2, %%mm3 \n\t"
00656 "psrlq $5, %%mm1 \n\t"
00657 "psrlq $5, %%mm4 \n\t"
00658 "pand %%mm6, %%mm1 \n\t"
00659 "pand %%mm6, %%mm4 \n\t"
00660 "psrlq $8, %%mm2 \n\t"
00661 "psrlq $8, %%mm5 \n\t"
00662 "pand %%mm7, %%mm2 \n\t"
00663 "pand %%mm7, %%mm5 \n\t"
00664 "por %%mm1, %%mm0 \n\t"
00665 "por %%mm4, %%mm3 \n\t"
00666 "por %%mm2, %%mm0 \n\t"
00667 "por %%mm5, %%mm3 \n\t"
00668 "psllq $16, %%mm3 \n\t"
00669 "por %%mm3, %%mm0 \n\t"
00670 MOVNTQ" %%mm0, %0 \n\t"
00671 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00672 d += 4;
00673 s += 12;
00674 }
00675 __asm__ volatile(SFENCE:::"memory");
00676 __asm__ volatile(EMMS:::"memory");
00677 #endif
00678 while (s < end) {
00679 const int b = *s++;
00680 const int g = *s++;
00681 const int r = *s++;
00682 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00683 }
00684 }
00685
00686 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
00687 {
00688 const uint8_t *s = src;
00689 const uint8_t *end;
00690 #if HAVE_MMX
00691 const uint8_t *mm_end;
00692 #endif
00693 uint16_t *d = (uint16_t *)dst;
00694 end = s + src_size;
00695 #if HAVE_MMX
00696 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00697 __asm__ volatile(
00698 "movq %0, %%mm7 \n\t"
00699 "movq %1, %%mm6 \n\t"
00700 ::"m"(red_16mask),"m"(green_16mask));
00701 mm_end = end - 15;
00702 while (s < mm_end) {
00703 __asm__ volatile(
00704 PREFETCH" 32%1 \n\t"
00705 "movd %1, %%mm0 \n\t"
00706 "movd 3%1, %%mm3 \n\t"
00707 "punpckldq 6%1, %%mm0 \n\t"
00708 "punpckldq 9%1, %%mm3 \n\t"
00709 "movq %%mm0, %%mm1 \n\t"
00710 "movq %%mm0, %%mm2 \n\t"
00711 "movq %%mm3, %%mm4 \n\t"
00712 "movq %%mm3, %%mm5 \n\t"
00713 "psllq $8, %%mm0 \n\t"
00714 "psllq $8, %%mm3 \n\t"
00715 "pand %%mm7, %%mm0 \n\t"
00716 "pand %%mm7, %%mm3 \n\t"
00717 "psrlq $5, %%mm1 \n\t"
00718 "psrlq $5, %%mm4 \n\t"
00719 "pand %%mm6, %%mm1 \n\t"
00720 "pand %%mm6, %%mm4 \n\t"
00721 "psrlq $19, %%mm2 \n\t"
00722 "psrlq $19, %%mm5 \n\t"
00723 "pand %2, %%mm2 \n\t"
00724 "pand %2, %%mm5 \n\t"
00725 "por %%mm1, %%mm0 \n\t"
00726 "por %%mm4, %%mm3 \n\t"
00727 "por %%mm2, %%mm0 \n\t"
00728 "por %%mm5, %%mm3 \n\t"
00729 "psllq $16, %%mm3 \n\t"
00730 "por %%mm3, %%mm0 \n\t"
00731 MOVNTQ" %%mm0, %0 \n\t"
00732 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00733 d += 4;
00734 s += 12;
00735 }
00736 __asm__ volatile(SFENCE:::"memory");
00737 __asm__ volatile(EMMS:::"memory");
00738 #endif
00739 while (s < end) {
00740 const int r = *s++;
00741 const int g = *s++;
00742 const int b = *s++;
00743 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00744 }
00745 }
00746
00747 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00748 {
00749 const uint8_t *s = src;
00750 const uint8_t *end;
00751 #if HAVE_MMX
00752 const uint8_t *mm_end;
00753 #endif
00754 uint16_t *d = (uint16_t *)dst;
00755 end = s + src_size;
00756 #if HAVE_MMX
00757 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00758 __asm__ volatile(
00759 "movq %0, %%mm7 \n\t"
00760 "movq %1, %%mm6 \n\t"
00761 ::"m"(red_15mask),"m"(green_15mask));
00762 mm_end = end - 11;
00763 while (s < mm_end) {
00764 __asm__ volatile(
00765 PREFETCH" 32%1 \n\t"
00766 "movd %1, %%mm0 \n\t"
00767 "movd 3%1, %%mm3 \n\t"
00768 "punpckldq 6%1, %%mm0 \n\t"
00769 "punpckldq 9%1, %%mm3 \n\t"
00770 "movq %%mm0, %%mm1 \n\t"
00771 "movq %%mm0, %%mm2 \n\t"
00772 "movq %%mm3, %%mm4 \n\t"
00773 "movq %%mm3, %%mm5 \n\t"
00774 "psrlq $3, %%mm0 \n\t"
00775 "psrlq $3, %%mm3 \n\t"
00776 "pand %2, %%mm0 \n\t"
00777 "pand %2, %%mm3 \n\t"
00778 "psrlq $6, %%mm1 \n\t"
00779 "psrlq $6, %%mm4 \n\t"
00780 "pand %%mm6, %%mm1 \n\t"
00781 "pand %%mm6, %%mm4 \n\t"
00782 "psrlq $9, %%mm2 \n\t"
00783 "psrlq $9, %%mm5 \n\t"
00784 "pand %%mm7, %%mm2 \n\t"
00785 "pand %%mm7, %%mm5 \n\t"
00786 "por %%mm1, %%mm0 \n\t"
00787 "por %%mm4, %%mm3 \n\t"
00788 "por %%mm2, %%mm0 \n\t"
00789 "por %%mm5, %%mm3 \n\t"
00790 "psllq $16, %%mm3 \n\t"
00791 "por %%mm3, %%mm0 \n\t"
00792 MOVNTQ" %%mm0, %0 \n\t"
00793 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00794 d += 4;
00795 s += 12;
00796 }
00797 __asm__ volatile(SFENCE:::"memory");
00798 __asm__ volatile(EMMS:::"memory");
00799 #endif
00800 while (s < end) {
00801 const int b = *s++;
00802 const int g = *s++;
00803 const int r = *s++;
00804 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00805 }
00806 }
00807
00808 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
00809 {
00810 const uint8_t *s = src;
00811 const uint8_t *end;
00812 #if HAVE_MMX
00813 const uint8_t *mm_end;
00814 #endif
00815 uint16_t *d = (uint16_t *)dst;
00816 end = s + src_size;
00817 #if HAVE_MMX
00818 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
00819 __asm__ volatile(
00820 "movq %0, %%mm7 \n\t"
00821 "movq %1, %%mm6 \n\t"
00822 ::"m"(red_15mask),"m"(green_15mask));
00823 mm_end = end - 15;
00824 while (s < mm_end) {
00825 __asm__ volatile(
00826 PREFETCH" 32%1 \n\t"
00827 "movd %1, %%mm0 \n\t"
00828 "movd 3%1, %%mm3 \n\t"
00829 "punpckldq 6%1, %%mm0 \n\t"
00830 "punpckldq 9%1, %%mm3 \n\t"
00831 "movq %%mm0, %%mm1 \n\t"
00832 "movq %%mm0, %%mm2 \n\t"
00833 "movq %%mm3, %%mm4 \n\t"
00834 "movq %%mm3, %%mm5 \n\t"
00835 "psllq $7, %%mm0 \n\t"
00836 "psllq $7, %%mm3 \n\t"
00837 "pand %%mm7, %%mm0 \n\t"
00838 "pand %%mm7, %%mm3 \n\t"
00839 "psrlq $6, %%mm1 \n\t"
00840 "psrlq $6, %%mm4 \n\t"
00841 "pand %%mm6, %%mm1 \n\t"
00842 "pand %%mm6, %%mm4 \n\t"
00843 "psrlq $19, %%mm2 \n\t"
00844 "psrlq $19, %%mm5 \n\t"
00845 "pand %2, %%mm2 \n\t"
00846 "pand %2, %%mm5 \n\t"
00847 "por %%mm1, %%mm0 \n\t"
00848 "por %%mm4, %%mm3 \n\t"
00849 "por %%mm2, %%mm0 \n\t"
00850 "por %%mm5, %%mm3 \n\t"
00851 "psllq $16, %%mm3 \n\t"
00852 "por %%mm3, %%mm0 \n\t"
00853 MOVNTQ" %%mm0, %0 \n\t"
00854 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00855 d += 4;
00856 s += 12;
00857 }
00858 __asm__ volatile(SFENCE:::"memory");
00859 __asm__ volatile(EMMS:::"memory");
00860 #endif
00861 while (s < end) {
00862 const int r = *s++;
00863 const int g = *s++;
00864 const int b = *s++;
00865 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00866 }
00867 }
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00891 {
00892 const uint16_t *end;
00893 #if HAVE_MMX
00894 const uint16_t *mm_end;
00895 #endif
00896 uint8_t *d = dst;
00897 const uint16_t *s = (const uint16_t*)src;
00898 end = s + src_size/2;
00899 #if HAVE_MMX
00900 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
00901 mm_end = end - 7;
00902 while (s < mm_end) {
00903 __asm__ volatile(
00904 PREFETCH" 32%1 \n\t"
00905 "movq %1, %%mm0 \n\t"
00906 "movq %1, %%mm1 \n\t"
00907 "movq %1, %%mm2 \n\t"
00908 "pand %2, %%mm0 \n\t"
00909 "pand %3, %%mm1 \n\t"
00910 "pand %4, %%mm2 \n\t"
00911 "psllq $3, %%mm0 \n\t"
00912 "psrlq $2, %%mm1 \n\t"
00913 "psrlq $7, %%mm2 \n\t"
00914 "movq %%mm0, %%mm3 \n\t"
00915 "movq %%mm1, %%mm4 \n\t"
00916 "movq %%mm2, %%mm5 \n\t"
00917 "punpcklwd %5, %%mm0 \n\t"
00918 "punpcklwd %5, %%mm1 \n\t"
00919 "punpcklwd %5, %%mm2 \n\t"
00920 "punpckhwd %5, %%mm3 \n\t"
00921 "punpckhwd %5, %%mm4 \n\t"
00922 "punpckhwd %5, %%mm5 \n\t"
00923 "psllq $8, %%mm1 \n\t"
00924 "psllq $16, %%mm2 \n\t"
00925 "por %%mm1, %%mm0 \n\t"
00926 "por %%mm2, %%mm0 \n\t"
00927 "psllq $8, %%mm4 \n\t"
00928 "psllq $16, %%mm5 \n\t"
00929 "por %%mm4, %%mm3 \n\t"
00930 "por %%mm5, %%mm3 \n\t"
00931
00932 "movq %%mm0, %%mm6 \n\t"
00933 "movq %%mm3, %%mm7 \n\t"
00934
00935 "movq 8%1, %%mm0 \n\t"
00936 "movq 8%1, %%mm1 \n\t"
00937 "movq 8%1, %%mm2 \n\t"
00938 "pand %2, %%mm0 \n\t"
00939 "pand %3, %%mm1 \n\t"
00940 "pand %4, %%mm2 \n\t"
00941 "psllq $3, %%mm0 \n\t"
00942 "psrlq $2, %%mm1 \n\t"
00943 "psrlq $7, %%mm2 \n\t"
00944 "movq %%mm0, %%mm3 \n\t"
00945 "movq %%mm1, %%mm4 \n\t"
00946 "movq %%mm2, %%mm5 \n\t"
00947 "punpcklwd %5, %%mm0 \n\t"
00948 "punpcklwd %5, %%mm1 \n\t"
00949 "punpcklwd %5, %%mm2 \n\t"
00950 "punpckhwd %5, %%mm3 \n\t"
00951 "punpckhwd %5, %%mm4 \n\t"
00952 "punpckhwd %5, %%mm5 \n\t"
00953 "psllq $8, %%mm1 \n\t"
00954 "psllq $16, %%mm2 \n\t"
00955 "por %%mm1, %%mm0 \n\t"
00956 "por %%mm2, %%mm0 \n\t"
00957 "psllq $8, %%mm4 \n\t"
00958 "psllq $16, %%mm5 \n\t"
00959 "por %%mm4, %%mm3 \n\t"
00960 "por %%mm5, %%mm3 \n\t"
00961
00962 :"=m"(*d)
00963 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
00964 :"memory");
00965
00966 __asm__ volatile(
00967 "movq %%mm0, %%mm4 \n\t"
00968 "movq %%mm3, %%mm5 \n\t"
00969 "movq %%mm6, %%mm0 \n\t"
00970 "movq %%mm7, %%mm1 \n\t"
00971
00972 "movq %%mm4, %%mm6 \n\t"
00973 "movq %%mm5, %%mm7 \n\t"
00974 "movq %%mm0, %%mm2 \n\t"
00975 "movq %%mm1, %%mm3 \n\t"
00976
00977 STORE_BGR24_MMX
00978
00979 :"=m"(*d)
00980 :"m"(*s)
00981 :"memory");
00982 d += 24;
00983 s += 8;
00984 }
00985 __asm__ volatile(SFENCE:::"memory");
00986 __asm__ volatile(EMMS:::"memory");
00987 #endif
00988 while (s < end) {
00989 register uint16_t bgr;
00990 bgr = *s++;
00991 *d++ = (bgr&0x1F)<<3;
00992 *d++ = (bgr&0x3E0)>>2;
00993 *d++ = (bgr&0x7C00)>>7;
00994 }
00995 }
00996
00997 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00998 {
00999 const uint16_t *end;
01000 #if HAVE_MMX
01001 const uint16_t *mm_end;
01002 #endif
01003 uint8_t *d = (uint8_t *)dst;
01004 const uint16_t *s = (const uint16_t *)src;
01005 end = s + src_size/2;
01006 #if HAVE_MMX
01007 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01008 mm_end = end - 7;
01009 while (s < mm_end) {
01010 __asm__ volatile(
01011 PREFETCH" 32%1 \n\t"
01012 "movq %1, %%mm0 \n\t"
01013 "movq %1, %%mm1 \n\t"
01014 "movq %1, %%mm2 \n\t"
01015 "pand %2, %%mm0 \n\t"
01016 "pand %3, %%mm1 \n\t"
01017 "pand %4, %%mm2 \n\t"
01018 "psllq $3, %%mm0 \n\t"
01019 "psrlq $3, %%mm1 \n\t"
01020 "psrlq $8, %%mm2 \n\t"
01021 "movq %%mm0, %%mm3 \n\t"
01022 "movq %%mm1, %%mm4 \n\t"
01023 "movq %%mm2, %%mm5 \n\t"
01024 "punpcklwd %5, %%mm0 \n\t"
01025 "punpcklwd %5, %%mm1 \n\t"
01026 "punpcklwd %5, %%mm2 \n\t"
01027 "punpckhwd %5, %%mm3 \n\t"
01028 "punpckhwd %5, %%mm4 \n\t"
01029 "punpckhwd %5, %%mm5 \n\t"
01030 "psllq $8, %%mm1 \n\t"
01031 "psllq $16, %%mm2 \n\t"
01032 "por %%mm1, %%mm0 \n\t"
01033 "por %%mm2, %%mm0 \n\t"
01034 "psllq $8, %%mm4 \n\t"
01035 "psllq $16, %%mm5 \n\t"
01036 "por %%mm4, %%mm3 \n\t"
01037 "por %%mm5, %%mm3 \n\t"
01038
01039 "movq %%mm0, %%mm6 \n\t"
01040 "movq %%mm3, %%mm7 \n\t"
01041
01042 "movq 8%1, %%mm0 \n\t"
01043 "movq 8%1, %%mm1 \n\t"
01044 "movq 8%1, %%mm2 \n\t"
01045 "pand %2, %%mm0 \n\t"
01046 "pand %3, %%mm1 \n\t"
01047 "pand %4, %%mm2 \n\t"
01048 "psllq $3, %%mm0 \n\t"
01049 "psrlq $3, %%mm1 \n\t"
01050 "psrlq $8, %%mm2 \n\t"
01051 "movq %%mm0, %%mm3 \n\t"
01052 "movq %%mm1, %%mm4 \n\t"
01053 "movq %%mm2, %%mm5 \n\t"
01054 "punpcklwd %5, %%mm0 \n\t"
01055 "punpcklwd %5, %%mm1 \n\t"
01056 "punpcklwd %5, %%mm2 \n\t"
01057 "punpckhwd %5, %%mm3 \n\t"
01058 "punpckhwd %5, %%mm4 \n\t"
01059 "punpckhwd %5, %%mm5 \n\t"
01060 "psllq $8, %%mm1 \n\t"
01061 "psllq $16, %%mm2 \n\t"
01062 "por %%mm1, %%mm0 \n\t"
01063 "por %%mm2, %%mm0 \n\t"
01064 "psllq $8, %%mm4 \n\t"
01065 "psllq $16, %%mm5 \n\t"
01066 "por %%mm4, %%mm3 \n\t"
01067 "por %%mm5, %%mm3 \n\t"
01068 :"=m"(*d)
01069 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01070 :"memory");
01071
01072 __asm__ volatile(
01073 "movq %%mm0, %%mm4 \n\t"
01074 "movq %%mm3, %%mm5 \n\t"
01075 "movq %%mm6, %%mm0 \n\t"
01076 "movq %%mm7, %%mm1 \n\t"
01077
01078 "movq %%mm4, %%mm6 \n\t"
01079 "movq %%mm5, %%mm7 \n\t"
01080 "movq %%mm0, %%mm2 \n\t"
01081 "movq %%mm1, %%mm3 \n\t"
01082
01083 STORE_BGR24_MMX
01084
01085 :"=m"(*d)
01086 :"m"(*s)
01087 :"memory");
01088 d += 24;
01089 s += 8;
01090 }
01091 __asm__ volatile(SFENCE:::"memory");
01092 __asm__ volatile(EMMS:::"memory");
01093 #endif
01094 while (s < end) {
01095 register uint16_t bgr;
01096 bgr = *s++;
01097 *d++ = (bgr&0x1F)<<3;
01098 *d++ = (bgr&0x7E0)>>3;
01099 *d++ = (bgr&0xF800)>>8;
01100 }
01101 }
01102
01103
01104
01105
01106
01107
01108
01109
01110 #define PACK_RGB32 \
01111 "packuswb %%mm7, %%mm0 \n\t" \
01112 "packuswb %%mm7, %%mm1 \n\t" \
01113 "packuswb %%mm7, %%mm2 \n\t" \
01114 "punpcklbw %%mm1, %%mm0 \n\t" \
01115 "punpcklbw %%mm6, %%mm2 \n\t" \
01116 "movq %%mm0, %%mm3 \n\t" \
01117 "punpcklwd %%mm2, %%mm0 \n\t" \
01118 "punpckhwd %%mm2, %%mm3 \n\t" \
01119 MOVNTQ" %%mm0, %0 \n\t" \
01120 MOVNTQ" %%mm3, 8%0 \n\t" \
01121
01122 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
01123 {
01124 const uint16_t *end;
01125 #if HAVE_MMX
01126 const uint16_t *mm_end;
01127 #endif
01128 uint8_t *d = dst;
01129 const uint16_t *s = (const uint16_t *)src;
01130 end = s + src_size/2;
01131 #if HAVE_MMX
01132 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01133 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01134 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01135 mm_end = end - 3;
01136 while (s < mm_end) {
01137 __asm__ volatile(
01138 PREFETCH" 32%1 \n\t"
01139 "movq %1, %%mm0 \n\t"
01140 "movq %1, %%mm1 \n\t"
01141 "movq %1, %%mm2 \n\t"
01142 "pand %2, %%mm0 \n\t"
01143 "pand %3, %%mm1 \n\t"
01144 "pand %4, %%mm2 \n\t"
01145 "psllq $3, %%mm0 \n\t"
01146 "psrlq $2, %%mm1 \n\t"
01147 "psrlq $7, %%mm2 \n\t"
01148 PACK_RGB32
01149 :"=m"(*d)
01150 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01151 :"memory");
01152 d += 16;
01153 s += 4;
01154 }
01155 __asm__ volatile(SFENCE:::"memory");
01156 __asm__ volatile(EMMS:::"memory");
01157 #endif
01158 while (s < end) {
01159 register uint16_t bgr;
01160 bgr = *s++;
01161 #if HAVE_BIGENDIAN
01162 *d++ = 255;
01163 *d++ = (bgr&0x7C00)>>7;
01164 *d++ = (bgr&0x3E0)>>2;
01165 *d++ = (bgr&0x1F)<<3;
01166 #else
01167 *d++ = (bgr&0x1F)<<3;
01168 *d++ = (bgr&0x3E0)>>2;
01169 *d++ = (bgr&0x7C00)>>7;
01170 *d++ = 255;
01171 #endif
01172 }
01173 }
01174
01175 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
01176 {
01177 const uint16_t *end;
01178 #if HAVE_MMX
01179 const uint16_t *mm_end;
01180 #endif
01181 uint8_t *d = dst;
01182 const uint16_t *s = (const uint16_t*)src;
01183 end = s + src_size/2;
01184 #if HAVE_MMX
01185 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
01186 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
01187 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
01188 mm_end = end - 3;
01189 while (s < mm_end) {
01190 __asm__ volatile(
01191 PREFETCH" 32%1 \n\t"
01192 "movq %1, %%mm0 \n\t"
01193 "movq %1, %%mm1 \n\t"
01194 "movq %1, %%mm2 \n\t"
01195 "pand %2, %%mm0 \n\t"
01196 "pand %3, %%mm1 \n\t"
01197 "pand %4, %%mm2 \n\t"
01198 "psllq $3, %%mm0 \n\t"
01199 "psrlq $3, %%mm1 \n\t"
01200 "psrlq $8, %%mm2 \n\t"
01201 PACK_RGB32
01202 :"=m"(*d)
01203 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01204 :"memory");
01205 d += 16;
01206 s += 4;
01207 }
01208 __asm__ volatile(SFENCE:::"memory");
01209 __asm__ volatile(EMMS:::"memory");
01210 #endif
01211 while (s < end) {
01212 register uint16_t bgr;
01213 bgr = *s++;
01214 #if HAVE_BIGENDIAN
01215 *d++ = 255;
01216 *d++ = (bgr&0xF800)>>8;
01217 *d++ = (bgr&0x7E0)>>3;
01218 *d++ = (bgr&0x1F)<<3;
01219 #else
01220 *d++ = (bgr&0x1F)<<3;
01221 *d++ = (bgr&0x7E0)>>3;
01222 *d++ = (bgr&0xF800)>>8;
01223 *d++ = 255;
01224 #endif
01225 }
01226 }
01227
01228 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
01229 {
01230 x86_reg idx = 15 - src_size;
01231 const uint8_t *s = src-idx;
01232 uint8_t *d = dst-idx;
01233 #if HAVE_MMX
01234 __asm__ volatile(
01235 "test %0, %0 \n\t"
01236 "jns 2f \n\t"
01237 PREFETCH" (%1, %0) \n\t"
01238 "movq %3, %%mm7 \n\t"
01239 "pxor %4, %%mm7 \n\t"
01240 "movq %%mm7, %%mm6 \n\t"
01241 "pxor %5, %%mm7 \n\t"
01242 ASMALIGN(4)
01243 "1: \n\t"
01244 PREFETCH" 32(%1, %0) \n\t"
01245 "movq (%1, %0), %%mm0 \n\t"
01246 "movq 8(%1, %0), %%mm1 \n\t"
01247 # if HAVE_MMX2
01248 "pshufw $177, %%mm0, %%mm3 \n\t"
01249 "pshufw $177, %%mm1, %%mm5 \n\t"
01250 "pand %%mm7, %%mm0 \n\t"
01251 "pand %%mm6, %%mm3 \n\t"
01252 "pand %%mm7, %%mm1 \n\t"
01253 "pand %%mm6, %%mm5 \n\t"
01254 "por %%mm3, %%mm0 \n\t"
01255 "por %%mm5, %%mm1 \n\t"
01256 # else
01257 "movq %%mm0, %%mm2 \n\t"
01258 "movq %%mm1, %%mm4 \n\t"
01259 "pand %%mm7, %%mm0 \n\t"
01260 "pand %%mm6, %%mm2 \n\t"
01261 "pand %%mm7, %%mm1 \n\t"
01262 "pand %%mm6, %%mm4 \n\t"
01263 "movq %%mm2, %%mm3 \n\t"
01264 "movq %%mm4, %%mm5 \n\t"
01265 "pslld $16, %%mm2 \n\t"
01266 "psrld $16, %%mm3 \n\t"
01267 "pslld $16, %%mm4 \n\t"
01268 "psrld $16, %%mm5 \n\t"
01269 "por %%mm2, %%mm0 \n\t"
01270 "por %%mm4, %%mm1 \n\t"
01271 "por %%mm3, %%mm0 \n\t"
01272 "por %%mm5, %%mm1 \n\t"
01273 # endif
01274 MOVNTQ" %%mm0, (%2, %0) \n\t"
01275 MOVNTQ" %%mm1, 8(%2, %0) \n\t"
01276 "add $16, %0 \n\t"
01277 "js 1b \n\t"
01278 SFENCE" \n\t"
01279 EMMS" \n\t"
01280 "2: \n\t"
01281 : "+&r"(idx)
01282 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01283 : "memory");
01284 #endif
01285 for (; idx<15; idx+=4) {
01286 register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
01287 v &= 0xff00ff;
01288 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01289 }
01290 }
01291
01292 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01293 {
01294 unsigned i;
01295 #if HAVE_MMX
01296 x86_reg mmx_size= 23 - src_size;
01297 __asm__ volatile (
01298 "test %%"REG_a", %%"REG_a" \n\t"
01299 "jns 2f \n\t"
01300 "movq "MANGLE(mask24r)", %%mm5 \n\t"
01301 "movq "MANGLE(mask24g)", %%mm6 \n\t"
01302 "movq "MANGLE(mask24b)", %%mm7 \n\t"
01303 ASMALIGN(4)
01304 "1: \n\t"
01305 PREFETCH" 32(%1, %%"REG_a") \n\t"
01306 "movq (%1, %%"REG_a"), %%mm0 \n\t"
01307 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01308 "movq 2(%1, %%"REG_a"), %%mm2 \n\t"
01309 "psllq $16, %%mm0 \n\t"
01310 "pand %%mm5, %%mm0 \n\t"
01311 "pand %%mm6, %%mm1 \n\t"
01312 "pand %%mm7, %%mm2 \n\t"
01313 "por %%mm0, %%mm1 \n\t"
01314 "por %%mm2, %%mm1 \n\t"
01315 "movq 6(%1, %%"REG_a"), %%mm0 \n\t"
01316 MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t"
01317 "movq 8(%1, %%"REG_a"), %%mm1 \n\t"
01318 "movq 10(%1, %%"REG_a"), %%mm2 \n\t"
01319 "pand %%mm7, %%mm0 \n\t"
01320 "pand %%mm5, %%mm1 \n\t"
01321 "pand %%mm6, %%mm2 \n\t"
01322 "por %%mm0, %%mm1 \n\t"
01323 "por %%mm2, %%mm1 \n\t"
01324 "movq 14(%1, %%"REG_a"), %%mm0 \n\t"
01325 MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t"
01326 "movq 16(%1, %%"REG_a"), %%mm1 \n\t"
01327 "movq 18(%1, %%"REG_a"), %%mm2 \n\t"
01328 "pand %%mm6, %%mm0 \n\t"
01329 "pand %%mm7, %%mm1 \n\t"
01330 "pand %%mm5, %%mm2 \n\t"
01331 "por %%mm0, %%mm1 \n\t"
01332 "por %%mm2, %%mm1 \n\t"
01333 MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
01334 "add $24, %%"REG_a" \n\t"
01335 " js 1b \n\t"
01336 "2: \n\t"
01337 : "+a" (mmx_size)
01338 : "r" (src-mmx_size), "r"(dst-mmx_size)
01339 );
01340
01341 __asm__ volatile(SFENCE:::"memory");
01342 __asm__ volatile(EMMS:::"memory");
01343
01344 if (mmx_size==23) return;
01345
01346 src+= src_size;
01347 dst+= src_size;
01348 src_size= 23-mmx_size;
01349 src-= src_size;
01350 dst-= src_size;
01351 #endif
01352 for (i=0; i<src_size; i+=3) {
01353 register uint8_t x;
01354 x = src[i + 2];
01355 dst[i + 1] = src[i + 1];
01356 dst[i + 2] = src[i + 0];
01357 dst[i + 0] = x;
01358 }
01359 }
01360
01361 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01362 long width, long height,
01363 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01364 {
01365 long y;
01366 const x86_reg chromWidth= width>>1;
01367 for (y=0; y<height; y++) {
01368 #if HAVE_MMX
01369
01370 __asm__ volatile(
01371 "xor %%"REG_a", %%"REG_a" \n\t"
01372 ASMALIGN(4)
01373 "1: \n\t"
01374 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01375 PREFETCH" 32(%2, %%"REG_a") \n\t"
01376 PREFETCH" 32(%3, %%"REG_a") \n\t"
01377 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01378 "movq %%mm0, %%mm2 \n\t"
01379 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01380 "punpcklbw %%mm1, %%mm0 \n\t"
01381 "punpckhbw %%mm1, %%mm2 \n\t"
01382
01383 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01384 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01385 "movq %%mm3, %%mm4 \n\t"
01386 "movq %%mm5, %%mm6 \n\t"
01387 "punpcklbw %%mm0, %%mm3 \n\t"
01388 "punpckhbw %%mm0, %%mm4 \n\t"
01389 "punpcklbw %%mm2, %%mm5 \n\t"
01390 "punpckhbw %%mm2, %%mm6 \n\t"
01391
01392 MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
01393 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01394 MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
01395 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01396
01397 "add $8, %%"REG_a" \n\t"
01398 "cmp %4, %%"REG_a" \n\t"
01399 " jb 1b \n\t"
01400 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01401 : "%"REG_a
01402 );
01403 #else
01404
01405 #if ARCH_ALPHA && HAVE_MVI
01406 #define pl2yuy2(n) \
01407 y1 = yc[n]; \
01408 y2 = yc2[n]; \
01409 u = uc[n]; \
01410 v = vc[n]; \
01411 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
01412 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
01413 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
01414 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
01415 yuv1 = (u << 8) + (v << 24); \
01416 yuv2 = yuv1 + y2; \
01417 yuv1 += y1; \
01418 qdst[n] = yuv1; \
01419 qdst2[n] = yuv2;
01420
01421 int i;
01422 uint64_t *qdst = (uint64_t *) dst;
01423 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
01424 const uint32_t *yc = (uint32_t *) ysrc;
01425 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
01426 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
01427 for (i = 0; i < chromWidth; i += 8) {
01428 uint64_t y1, y2, yuv1, yuv2;
01429 uint64_t u, v;
01430
01431 __asm__("ldq $31,64(%0)" :: "r"(yc));
01432 __asm__("ldq $31,64(%0)" :: "r"(yc2));
01433 __asm__("ldq $31,64(%0)" :: "r"(uc));
01434 __asm__("ldq $31,64(%0)" :: "r"(vc));
01435
01436 pl2yuy2(0);
01437 pl2yuy2(1);
01438 pl2yuy2(2);
01439 pl2yuy2(3);
01440
01441 yc += 4;
01442 yc2 += 4;
01443 uc += 4;
01444 vc += 4;
01445 qdst += 4;
01446 qdst2 += 4;
01447 }
01448 y++;
01449 ysrc += lumStride;
01450 dst += dstStride;
01451
01452 #elif HAVE_FAST_64BIT
01453 int i;
01454 uint64_t *ldst = (uint64_t *) dst;
01455 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01456 for (i = 0; i < chromWidth; i += 2) {
01457 uint64_t k, l;
01458 k = yc[0] + (uc[0] << 8) +
01459 (yc[1] << 16) + (vc[0] << 24);
01460 l = yc[2] + (uc[1] << 8) +
01461 (yc[3] << 16) + (vc[1] << 24);
01462 *ldst++ = k + (l << 32);
01463 yc += 4;
01464 uc += 2;
01465 vc += 2;
01466 }
01467
01468 #else
01469 int i, *idst = (int32_t *) dst;
01470 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01471 for (i = 0; i < chromWidth; i++) {
01472 #if HAVE_BIGENDIAN
01473 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
01474 (yc[1] << 8) + (vc[0] << 0);
01475 #else
01476 *idst++ = yc[0] + (uc[0] << 8) +
01477 (yc[1] << 16) + (vc[0] << 24);
01478 #endif
01479 yc += 2;
01480 uc++;
01481 vc++;
01482 }
01483 #endif
01484 #endif
01485 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01486 usrc += chromStride;
01487 vsrc += chromStride;
01488 }
01489 ysrc += lumStride;
01490 dst += dstStride;
01491 }
01492 #if HAVE_MMX
01493 __asm__(EMMS" \n\t"
01494 SFENCE" \n\t"
01495 :::"memory");
01496 #endif
01497 }
01498
01503 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01504 long width, long height,
01505 long lumStride, long chromStride, long dstStride)
01506 {
01507
01508 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01509 }
01510
01511 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01512 long width, long height,
01513 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01514 {
01515 long y;
01516 const x86_reg chromWidth= width>>1;
01517 for (y=0; y<height; y++) {
01518 #if HAVE_MMX
01519
01520 __asm__ volatile(
01521 "xor %%"REG_a", %%"REG_a" \n\t"
01522 ASMALIGN(4)
01523 "1: \n\t"
01524 PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
01525 PREFETCH" 32(%2, %%"REG_a") \n\t"
01526 PREFETCH" 32(%3, %%"REG_a") \n\t"
01527 "movq (%2, %%"REG_a"), %%mm0 \n\t"
01528 "movq %%mm0, %%mm2 \n\t"
01529 "movq (%3, %%"REG_a"), %%mm1 \n\t"
01530 "punpcklbw %%mm1, %%mm0 \n\t"
01531 "punpckhbw %%mm1, %%mm2 \n\t"
01532
01533 "movq (%1, %%"REG_a",2), %%mm3 \n\t"
01534 "movq 8(%1, %%"REG_a",2), %%mm5 \n\t"
01535 "movq %%mm0, %%mm4 \n\t"
01536 "movq %%mm2, %%mm6 \n\t"
01537 "punpcklbw %%mm3, %%mm0 \n\t"
01538 "punpckhbw %%mm3, %%mm4 \n\t"
01539 "punpcklbw %%mm5, %%mm2 \n\t"
01540 "punpckhbw %%mm5, %%mm6 \n\t"
01541
01542 MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
01543 MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
01544 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
01545 MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
01546
01547 "add $8, %%"REG_a" \n\t"
01548 "cmp %4, %%"REG_a" \n\t"
01549 " jb 1b \n\t"
01550 ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01551 : "%"REG_a
01552 );
01553 #else
01554
01555
01556 #if HAVE_FAST_64BIT
01557 int i;
01558 uint64_t *ldst = (uint64_t *) dst;
01559 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01560 for (i = 0; i < chromWidth; i += 2) {
01561 uint64_t k, l;
01562 k = uc[0] + (yc[0] << 8) +
01563 (vc[0] << 16) + (yc[1] << 24);
01564 l = uc[1] + (yc[2] << 8) +
01565 (vc[1] << 16) + (yc[3] << 24);
01566 *ldst++ = k + (l << 32);
01567 yc += 4;
01568 uc += 2;
01569 vc += 2;
01570 }
01571
01572 #else
01573 int i, *idst = (int32_t *) dst;
01574 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01575 for (i = 0; i < chromWidth; i++) {
01576 #if HAVE_BIGENDIAN
01577 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
01578 (vc[0] << 8) + (yc[1] << 0);
01579 #else
01580 *idst++ = uc[0] + (yc[0] << 8) +
01581 (vc[0] << 16) + (yc[1] << 24);
01582 #endif
01583 yc += 2;
01584 uc++;
01585 vc++;
01586 }
01587 #endif
01588 #endif
01589 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
01590 usrc += chromStride;
01591 vsrc += chromStride;
01592 }
01593 ysrc += lumStride;
01594 dst += dstStride;
01595 }
01596 #if HAVE_MMX
01597 __asm__(EMMS" \n\t"
01598 SFENCE" \n\t"
01599 :::"memory");
01600 #endif
01601 }
01602
01607 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01608 long width, long height,
01609 long lumStride, long chromStride, long dstStride)
01610 {
01611
01612 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01613 }
01614
01618 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01619 long width, long height,
01620 long lumStride, long chromStride, long dstStride)
01621 {
01622 RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01623 }
01624
01628 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01629 long width, long height,
01630 long lumStride, long chromStride, long dstStride)
01631 {
01632 RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01633 }
01634
01639 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01640 long width, long height,
01641 long lumStride, long chromStride, long srcStride)
01642 {
01643 long y;
01644 const x86_reg chromWidth= width>>1;
01645 for (y=0; y<height; y+=2) {
01646 #if HAVE_MMX
01647 __asm__ volatile(
01648 "xor %%"REG_a", %%"REG_a" \n\t"
01649 "pcmpeqw %%mm7, %%mm7 \n\t"
01650 "psrlw $8, %%mm7 \n\t"
01651 ASMALIGN(4)
01652 "1: \n\t"
01653 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01654 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01655 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01656 "movq %%mm0, %%mm2 \n\t"
01657 "movq %%mm1, %%mm3 \n\t"
01658 "psrlw $8, %%mm0 \n\t"
01659 "psrlw $8, %%mm1 \n\t"
01660 "pand %%mm7, %%mm2 \n\t"
01661 "pand %%mm7, %%mm3 \n\t"
01662 "packuswb %%mm1, %%mm0 \n\t"
01663 "packuswb %%mm3, %%mm2 \n\t"
01664
01665 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01666
01667 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01668 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01669 "movq %%mm1, %%mm3 \n\t"
01670 "movq %%mm2, %%mm4 \n\t"
01671 "psrlw $8, %%mm1 \n\t"
01672 "psrlw $8, %%mm2 \n\t"
01673 "pand %%mm7, %%mm3 \n\t"
01674 "pand %%mm7, %%mm4 \n\t"
01675 "packuswb %%mm2, %%mm1 \n\t"
01676 "packuswb %%mm4, %%mm3 \n\t"
01677
01678 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01679
01680 "movq %%mm0, %%mm2 \n\t"
01681 "movq %%mm1, %%mm3 \n\t"
01682 "psrlw $8, %%mm0 \n\t"
01683 "psrlw $8, %%mm1 \n\t"
01684 "pand %%mm7, %%mm2 \n\t"
01685 "pand %%mm7, %%mm3 \n\t"
01686 "packuswb %%mm1, %%mm0 \n\t"
01687 "packuswb %%mm3, %%mm2 \n\t"
01688
01689 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01690 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01691
01692 "add $8, %%"REG_a" \n\t"
01693 "cmp %4, %%"REG_a" \n\t"
01694 " jb 1b \n\t"
01695 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01696 : "memory", "%"REG_a
01697 );
01698
01699 ydst += lumStride;
01700 src += srcStride;
01701
01702 __asm__ volatile(
01703 "xor %%"REG_a", %%"REG_a" \n\t"
01704 ASMALIGN(4)
01705 "1: \n\t"
01706 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01707 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01708 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01709 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01710 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01711 "pand %%mm7, %%mm0 \n\t"
01712 "pand %%mm7, %%mm1 \n\t"
01713 "pand %%mm7, %%mm2 \n\t"
01714 "pand %%mm7, %%mm3 \n\t"
01715 "packuswb %%mm1, %%mm0 \n\t"
01716 "packuswb %%mm3, %%mm2 \n\t"
01717
01718 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01719 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01720
01721 "add $8, %%"REG_a" \n\t"
01722 "cmp %4, %%"REG_a" \n\t"
01723 " jb 1b \n\t"
01724
01725 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01726 : "memory", "%"REG_a
01727 );
01728 #else
01729 long i;
01730 for (i=0; i<chromWidth; i++) {
01731 ydst[2*i+0] = src[4*i+0];
01732 udst[i] = src[4*i+1];
01733 ydst[2*i+1] = src[4*i+2];
01734 vdst[i] = src[4*i+3];
01735 }
01736 ydst += lumStride;
01737 src += srcStride;
01738
01739 for (i=0; i<chromWidth; i++) {
01740 ydst[2*i+0] = src[4*i+0];
01741 ydst[2*i+1] = src[4*i+2];
01742 }
01743 #endif
01744 udst += chromStride;
01745 vdst += chromStride;
01746 ydst += lumStride;
01747 src += srcStride;
01748 }
01749 #if HAVE_MMX
01750 __asm__ volatile(EMMS" \n\t"
01751 SFENCE" \n\t"
01752 :::"memory");
01753 #endif
01754 }
01755
01756 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
01757 uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01758 long width, long height, long lumStride, long chromStride)
01759 {
01760
01761 memcpy(ydst, ysrc, width*height);
01762
01763
01764 }
01765
01766 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
01767 {
01768 long x,y;
01769
01770 dst[0]= src[0];
01771
01772
01773 for (x=0; x<srcWidth-1; x++) {
01774 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01775 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01776 }
01777 dst[2*srcWidth-1]= src[srcWidth-1];
01778
01779 dst+= dstStride;
01780
01781 for (y=1; y<srcHeight; y++) {
01782 #if HAVE_MMX2 || HAVE_AMD3DNOW
01783 const x86_reg mmxSize= srcWidth&~15;
01784 __asm__ volatile(
01785 "mov %4, %%"REG_a" \n\t"
01786 "1: \n\t"
01787 "movq (%0, %%"REG_a"), %%mm0 \n\t"
01788 "movq (%1, %%"REG_a"), %%mm1 \n\t"
01789 "movq 1(%0, %%"REG_a"), %%mm2 \n\t"
01790 "movq 1(%1, %%"REG_a"), %%mm3 \n\t"
01791 "movq -1(%0, %%"REG_a"), %%mm4 \n\t"
01792 "movq -1(%1, %%"REG_a"), %%mm5 \n\t"
01793 PAVGB" %%mm0, %%mm5 \n\t"
01794 PAVGB" %%mm0, %%mm3 \n\t"
01795 PAVGB" %%mm0, %%mm5 \n\t"
01796 PAVGB" %%mm0, %%mm3 \n\t"
01797 PAVGB" %%mm1, %%mm4 \n\t"
01798 PAVGB" %%mm1, %%mm2 \n\t"
01799 PAVGB" %%mm1, %%mm4 \n\t"
01800 PAVGB" %%mm1, %%mm2 \n\t"
01801 "movq %%mm5, %%mm7 \n\t"
01802 "movq %%mm4, %%mm6 \n\t"
01803 "punpcklbw %%mm3, %%mm5 \n\t"
01804 "punpckhbw %%mm3, %%mm7 \n\t"
01805 "punpcklbw %%mm2, %%mm4 \n\t"
01806 "punpckhbw %%mm2, %%mm6 \n\t"
01807 #if 1
01808 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t"
01809 MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01810 MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t"
01811 MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01812 #else
01813 "movq %%mm5, (%2, %%"REG_a", 2) \n\t"
01814 "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t"
01815 "movq %%mm4, (%3, %%"REG_a", 2) \n\t"
01816 "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t"
01817 #endif
01818 "add $8, %%"REG_a" \n\t"
01819 " js 1b \n\t"
01820 :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
01821 "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01822 "g" (-mmxSize)
01823 : "%"REG_a
01824
01825 );
01826 #else
01827 const x86_reg mmxSize=1;
01828 #endif
01829 dst[0 ]= (3*src[0] + src[srcStride])>>2;
01830 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
01831
01832 for (x=mmxSize-1; x<srcWidth-1; x++) {
01833 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
01834 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
01835 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
01836 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
01837 }
01838 dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2;
01839 dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01840
01841 dst+=dstStride*2;
01842 src+=srcStride;
01843 }
01844
01845
01846 #if 1
01847 dst[0]= src[0];
01848
01849 for (x=0; x<srcWidth-1; x++) {
01850 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
01851 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
01852 }
01853 dst[2*srcWidth-1]= src[srcWidth-1];
01854 #else
01855 for (x=0; x<srcWidth; x++) {
01856 dst[2*x+0]=
01857 dst[2*x+1]= src[x];
01858 }
01859 #endif
01860
01861 #if HAVE_MMX
01862 __asm__ volatile(EMMS" \n\t"
01863 SFENCE" \n\t"
01864 :::"memory");
01865 #endif
01866 }
01867
01874 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01875 long width, long height,
01876 long lumStride, long chromStride, long srcStride)
01877 {
01878 long y;
01879 const x86_reg chromWidth= width>>1;
01880 for (y=0; y<height; y+=2) {
01881 #if HAVE_MMX
01882 __asm__ volatile(
01883 "xor %%"REG_a", %%"REG_a" \n\t"
01884 "pcmpeqw %%mm7, %%mm7 \n\t"
01885 "psrlw $8, %%mm7 \n\t"
01886 ASMALIGN(4)
01887 "1: \n\t"
01888 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01889 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01890 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01891 "movq %%mm0, %%mm2 \n\t"
01892 "movq %%mm1, %%mm3 \n\t"
01893 "pand %%mm7, %%mm0 \n\t"
01894 "pand %%mm7, %%mm1 \n\t"
01895 "psrlw $8, %%mm2 \n\t"
01896 "psrlw $8, %%mm3 \n\t"
01897 "packuswb %%mm1, %%mm0 \n\t"
01898 "packuswb %%mm3, %%mm2 \n\t"
01899
01900 MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
01901
01902 "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t"
01903 "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t"
01904 "movq %%mm1, %%mm3 \n\t"
01905 "movq %%mm2, %%mm4 \n\t"
01906 "pand %%mm7, %%mm1 \n\t"
01907 "pand %%mm7, %%mm2 \n\t"
01908 "psrlw $8, %%mm3 \n\t"
01909 "psrlw $8, %%mm4 \n\t"
01910 "packuswb %%mm2, %%mm1 \n\t"
01911 "packuswb %%mm4, %%mm3 \n\t"
01912
01913 MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01914
01915 "movq %%mm0, %%mm2 \n\t"
01916 "movq %%mm1, %%mm3 \n\t"
01917 "psrlw $8, %%mm0 \n\t"
01918 "psrlw $8, %%mm1 \n\t"
01919 "pand %%mm7, %%mm2 \n\t"
01920 "pand %%mm7, %%mm3 \n\t"
01921 "packuswb %%mm1, %%mm0 \n\t"
01922 "packuswb %%mm3, %%mm2 \n\t"
01923
01924 MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
01925 MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
01926
01927 "add $8, %%"REG_a" \n\t"
01928 "cmp %4, %%"REG_a" \n\t"
01929 " jb 1b \n\t"
01930 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01931 : "memory", "%"REG_a
01932 );
01933
01934 ydst += lumStride;
01935 src += srcStride;
01936
01937 __asm__ volatile(
01938 "xor %%"REG_a", %%"REG_a" \n\t"
01939 ASMALIGN(4)
01940 "1: \n\t"
01941 PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
01942 "movq (%0, %%"REG_a", 4), %%mm0 \n\t"
01943 "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t"
01944 "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t"
01945 "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t"
01946 "psrlw $8, %%mm0 \n\t"
01947 "psrlw $8, %%mm1 \n\t"
01948 "psrlw $8, %%mm2 \n\t"
01949 "psrlw $8, %%mm3 \n\t"
01950 "packuswb %%mm1, %%mm0 \n\t"
01951 "packuswb %%mm3, %%mm2 \n\t"
01952
01953 MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
01954 MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01955
01956 "add $8, %%"REG_a" \n\t"
01957 "cmp %4, %%"REG_a" \n\t"
01958 " jb 1b \n\t"
01959
01960 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01961 : "memory", "%"REG_a
01962 );
01963 #else
01964 long i;
01965 for (i=0; i<chromWidth; i++) {
01966 udst[i] = src[4*i+0];
01967 ydst[2*i+0] = src[4*i+1];
01968 vdst[i] = src[4*i+2];
01969 ydst[2*i+1] = src[4*i+3];
01970 }
01971 ydst += lumStride;
01972 src += srcStride;
01973
01974 for (i=0; i<chromWidth; i++) {
01975 ydst[2*i+0] = src[4*i+1];
01976 ydst[2*i+1] = src[4*i+3];
01977 }
01978 #endif
01979 udst += chromStride;
01980 vdst += chromStride;
01981 ydst += lumStride;
01982 src += srcStride;
01983 }
01984 #if HAVE_MMX
01985 __asm__ volatile(EMMS" \n\t"
01986 SFENCE" \n\t"
01987 :::"memory");
01988 #endif
01989 }
01990
01998 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01999 long width, long height,
02000 long lumStride, long chromStride, long srcStride)
02001 {
02002 long y;
02003 const x86_reg chromWidth= width>>1;
02004 #if HAVE_MMX
02005 for (y=0; y<height-2; y+=2) {
02006 long i;
02007 for (i=0; i<2; i++) {
02008 __asm__ volatile(
02009 "mov %2, %%"REG_a" \n\t"
02010 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
02011 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02012 "pxor %%mm7, %%mm7 \n\t"
02013 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02014 ASMALIGN(4)
02015 "1: \n\t"
02016 PREFETCH" 64(%0, %%"REG_d") \n\t"
02017 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02018 "movd 3(%0, %%"REG_d"), %%mm1 \n\t"
02019 "punpcklbw %%mm7, %%mm0 \n\t"
02020 "punpcklbw %%mm7, %%mm1 \n\t"
02021 "movd 6(%0, %%"REG_d"), %%mm2 \n\t"
02022 "movd 9(%0, %%"REG_d"), %%mm3 \n\t"
02023 "punpcklbw %%mm7, %%mm2 \n\t"
02024 "punpcklbw %%mm7, %%mm3 \n\t"
02025 "pmaddwd %%mm6, %%mm0 \n\t"
02026 "pmaddwd %%mm6, %%mm1 \n\t"
02027 "pmaddwd %%mm6, %%mm2 \n\t"
02028 "pmaddwd %%mm6, %%mm3 \n\t"
02029 #ifndef FAST_BGR2YV12
02030 "psrad $8, %%mm0 \n\t"
02031 "psrad $8, %%mm1 \n\t"
02032 "psrad $8, %%mm2 \n\t"
02033 "psrad $8, %%mm3 \n\t"
02034 #endif
02035 "packssdw %%mm1, %%mm0 \n\t"
02036 "packssdw %%mm3, %%mm2 \n\t"
02037 "pmaddwd %%mm5, %%mm0 \n\t"
02038 "pmaddwd %%mm5, %%mm2 \n\t"
02039 "packssdw %%mm2, %%mm0 \n\t"
02040 "psraw $7, %%mm0 \n\t"
02041
02042 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02043 "movd 15(%0, %%"REG_d"), %%mm1 \n\t"
02044 "punpcklbw %%mm7, %%mm4 \n\t"
02045 "punpcklbw %%mm7, %%mm1 \n\t"
02046 "movd 18(%0, %%"REG_d"), %%mm2 \n\t"
02047 "movd 21(%0, %%"REG_d"), %%mm3 \n\t"
02048 "punpcklbw %%mm7, %%mm2 \n\t"
02049 "punpcklbw %%mm7, %%mm3 \n\t"
02050 "pmaddwd %%mm6, %%mm4 \n\t"
02051 "pmaddwd %%mm6, %%mm1 \n\t"
02052 "pmaddwd %%mm6, %%mm2 \n\t"
02053 "pmaddwd %%mm6, %%mm3 \n\t"
02054 #ifndef FAST_BGR2YV12
02055 "psrad $8, %%mm4 \n\t"
02056 "psrad $8, %%mm1 \n\t"
02057 "psrad $8, %%mm2 \n\t"
02058 "psrad $8, %%mm3 \n\t"
02059 #endif
02060 "packssdw %%mm1, %%mm4 \n\t"
02061 "packssdw %%mm3, %%mm2 \n\t"
02062 "pmaddwd %%mm5, %%mm4 \n\t"
02063 "pmaddwd %%mm5, %%mm2 \n\t"
02064 "add $24, %%"REG_d" \n\t"
02065 "packssdw %%mm2, %%mm4 \n\t"
02066 "psraw $7, %%mm4 \n\t"
02067
02068 "packuswb %%mm4, %%mm0 \n\t"
02069 "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
02070
02071 MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
02072 "add $8, %%"REG_a" \n\t"
02073 " js 1b \n\t"
02074 : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
02075 : "%"REG_a, "%"REG_d
02076 );
02077 ydst += lumStride;
02078 src += srcStride;
02079 }
02080 src -= srcStride*2;
02081 __asm__ volatile(
02082 "mov %4, %%"REG_a" \n\t"
02083 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02084 "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
02085 "pxor %%mm7, %%mm7 \n\t"
02086 "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
02087 "add %%"REG_d", %%"REG_d" \n\t"
02088 ASMALIGN(4)
02089 "1: \n\t"
02090 PREFETCH" 64(%0, %%"REG_d") \n\t"
02091 PREFETCH" 64(%1, %%"REG_d") \n\t"
02092 #if HAVE_MMX2 || HAVE_AMD3DNOW
02093 "movq (%0, %%"REG_d"), %%mm0 \n\t"
02094 "movq (%1, %%"REG_d"), %%mm1 \n\t"
02095 "movq 6(%0, %%"REG_d"), %%mm2 \n\t"
02096 "movq 6(%1, %%"REG_d"), %%mm3 \n\t"
02097 PAVGB" %%mm1, %%mm0 \n\t"
02098 PAVGB" %%mm3, %%mm2 \n\t"
02099 "movq %%mm0, %%mm1 \n\t"
02100 "movq %%mm2, %%mm3 \n\t"
02101 "psrlq $24, %%mm0 \n\t"
02102 "psrlq $24, %%mm2 \n\t"
02103 PAVGB" %%mm1, %%mm0 \n\t"
02104 PAVGB" %%mm3, %%mm2 \n\t"
02105 "punpcklbw %%mm7, %%mm0 \n\t"
02106 "punpcklbw %%mm7, %%mm2 \n\t"
02107 #else
02108 "movd (%0, %%"REG_d"), %%mm0 \n\t"
02109 "movd (%1, %%"REG_d"), %%mm1 \n\t"
02110 "movd 3(%0, %%"REG_d"), %%mm2 \n\t"
02111 "movd 3(%1, %%"REG_d"), %%mm3 \n\t"
02112 "punpcklbw %%mm7, %%mm0 \n\t"
02113 "punpcklbw %%mm7, %%mm1 \n\t"
02114 "punpcklbw %%mm7, %%mm2 \n\t"
02115 "punpcklbw %%mm7, %%mm3 \n\t"
02116 "paddw %%mm1, %%mm0 \n\t"
02117 "paddw %%mm3, %%mm2 \n\t"
02118 "paddw %%mm2, %%mm0 \n\t"
02119 "movd 6(%0, %%"REG_d"), %%mm4 \n\t"
02120 "movd 6(%1, %%"REG_d"), %%mm1 \n\t"
02121 "movd 9(%0, %%"REG_d"), %%mm2 \n\t"
02122 "movd 9(%1, %%"REG_d"), %%mm3 \n\t"
02123 "punpcklbw %%mm7, %%mm4 \n\t"
02124 "punpcklbw %%mm7, %%mm1 \n\t"
02125 "punpcklbw %%mm7, %%mm2 \n\t"
02126 "punpcklbw %%mm7, %%mm3 \n\t"
02127 "paddw %%mm1, %%mm4 \n\t"
02128 "paddw %%mm3, %%mm2 \n\t"
02129 "paddw %%mm4, %%mm2 \n\t"
02130 "psrlw $2, %%mm0 \n\t"
02131 "psrlw $2, %%mm2 \n\t"
02132 #endif
02133 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02134 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02135
02136 "pmaddwd %%mm0, %%mm1 \n\t"
02137 "pmaddwd %%mm2, %%mm3 \n\t"
02138 "pmaddwd %%mm6, %%mm0 \n\t"
02139 "pmaddwd %%mm6, %%mm2 \n\t"
02140 #ifndef FAST_BGR2YV12
02141 "psrad $8, %%mm0 \n\t"
02142 "psrad $8, %%mm1 \n\t"
02143 "psrad $8, %%mm2 \n\t"
02144 "psrad $8, %%mm3 \n\t"
02145 #endif
02146 "packssdw %%mm2, %%mm0 \n\t"
02147 "packssdw %%mm3, %%mm1 \n\t"
02148 "pmaddwd %%mm5, %%mm0 \n\t"
02149 "pmaddwd %%mm5, %%mm1 \n\t"
02150 "packssdw %%mm1, %%mm0 \n\t"
02151 "psraw $7, %%mm0 \n\t"
02152
02153 #if HAVE_MMX2 || HAVE_AMD3DNOW
02154 "movq 12(%0, %%"REG_d"), %%mm4 \n\t"
02155 "movq 12(%1, %%"REG_d"), %%mm1 \n\t"
02156 "movq 18(%0, %%"REG_d"), %%mm2 \n\t"
02157 "movq 18(%1, %%"REG_d"), %%mm3 \n\t"
02158 PAVGB" %%mm1, %%mm4 \n\t"
02159 PAVGB" %%mm3, %%mm2 \n\t"
02160 "movq %%mm4, %%mm1 \n\t"
02161 "movq %%mm2, %%mm3 \n\t"
02162 "psrlq $24, %%mm4 \n\t"
02163 "psrlq $24, %%mm2 \n\t"
02164 PAVGB" %%mm1, %%mm4 \n\t"
02165 PAVGB" %%mm3, %%mm2 \n\t"
02166 "punpcklbw %%mm7, %%mm4 \n\t"
02167 "punpcklbw %%mm7, %%mm2 \n\t"
02168 #else
02169 "movd 12(%0, %%"REG_d"), %%mm4 \n\t"
02170 "movd 12(%1, %%"REG_d"), %%mm1 \n\t"
02171 "movd 15(%0, %%"REG_d"), %%mm2 \n\t"
02172 "movd 15(%1, %%"REG_d"), %%mm3 \n\t"
02173 "punpcklbw %%mm7, %%mm4 \n\t"
02174 "punpcklbw %%mm7, %%mm1 \n\t"
02175 "punpcklbw %%mm7, %%mm2 \n\t"
02176 "punpcklbw %%mm7, %%mm3 \n\t"
02177 "paddw %%mm1, %%mm4 \n\t"
02178 "paddw %%mm3, %%mm2 \n\t"
02179 "paddw %%mm2, %%mm4 \n\t"
02180 "movd 18(%0, %%"REG_d"), %%mm5 \n\t"
02181 "movd 18(%1, %%"REG_d"), %%mm1 \n\t"
02182 "movd 21(%0, %%"REG_d"), %%mm2 \n\t"
02183 "movd 21(%1, %%"REG_d"), %%mm3 \n\t"
02184 "punpcklbw %%mm7, %%mm5 \n\t"
02185 "punpcklbw %%mm7, %%mm1 \n\t"
02186 "punpcklbw %%mm7, %%mm2 \n\t"
02187 "punpcklbw %%mm7, %%mm3 \n\t"
02188 "paddw %%mm1, %%mm5 \n\t"
02189 "paddw %%mm3, %%mm2 \n\t"
02190 "paddw %%mm5, %%mm2 \n\t"
02191 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
02192 "psrlw $2, %%mm4 \n\t"
02193 "psrlw $2, %%mm2 \n\t"
02194 #endif
02195 "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
02196 "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
02197
02198 "pmaddwd %%mm4, %%mm1 \n\t"
02199 "pmaddwd %%mm2, %%mm3 \n\t"
02200 "pmaddwd %%mm6, %%mm4 \n\t"
02201 "pmaddwd %%mm6, %%mm2 \n\t"
02202 #ifndef FAST_BGR2YV12
02203 "psrad $8, %%mm4 \n\t"
02204 "psrad $8, %%mm1 \n\t"
02205 "psrad $8, %%mm2 \n\t"
02206 "psrad $8, %%mm3 \n\t"
02207 #endif
02208 "packssdw %%mm2, %%mm4 \n\t"
02209 "packssdw %%mm3, %%mm1 \n\t"
02210 "pmaddwd %%mm5, %%mm4 \n\t"
02211 "pmaddwd %%mm5, %%mm1 \n\t"
02212 "add $24, %%"REG_d" \n\t"
02213 "packssdw %%mm1, %%mm4 \n\t"
02214 "psraw $7, %%mm4 \n\t"
02215
02216 "movq %%mm0, %%mm1 \n\t"
02217 "punpckldq %%mm4, %%mm0 \n\t"
02218 "punpckhdq %%mm4, %%mm1 \n\t"
02219 "packsswb %%mm1, %%mm0 \n\t"
02220 "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
02221 "movd %%mm0, (%2, %%"REG_a") \n\t"
02222 "punpckhdq %%mm0, %%mm0 \n\t"
02223 "movd %%mm0, (%3, %%"REG_a") \n\t"
02224 "add $4, %%"REG_a" \n\t"
02225 " js 1b \n\t"
02226 : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
02227 : "%"REG_a, "%"REG_d
02228 );
02229
02230 udst += chromStride;
02231 vdst += chromStride;
02232 src += srcStride*2;
02233 }
02234
02235 __asm__ volatile(EMMS" \n\t"
02236 SFENCE" \n\t"
02237 :::"memory");
02238 #else
02239 y=0;
02240 #endif
02241 for (; y<height; y+=2) {
02242 long i;
02243 for (i=0; i<chromWidth; i++) {
02244 unsigned int b = src[6*i+0];
02245 unsigned int g = src[6*i+1];
02246 unsigned int r = src[6*i+2];
02247
02248 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02249 unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
02250 unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
02251
02252 udst[i] = U;
02253 vdst[i] = V;
02254 ydst[2*i] = Y;
02255
02256 b = src[6*i+3];
02257 g = src[6*i+4];
02258 r = src[6*i+5];
02259
02260 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02261 ydst[2*i+1] = Y;
02262 }
02263 ydst += lumStride;
02264 src += srcStride;
02265
02266 for (i=0; i<chromWidth; i++) {
02267 unsigned int b = src[6*i+0];
02268 unsigned int g = src[6*i+1];
02269 unsigned int r = src[6*i+2];
02270
02271 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02272
02273 ydst[2*i] = Y;
02274
02275 b = src[6*i+3];
02276 g = src[6*i+4];
02277 r = src[6*i+5];
02278
02279 Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02280 ydst[2*i+1] = Y;
02281 }
02282 udst += chromStride;
02283 vdst += chromStride;
02284 ydst += lumStride;
02285 src += srcStride;
02286 }
02287 }
02288
02289 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
02290 long width, long height, long src1Stride,
02291 long src2Stride, long dstStride)
02292 {
02293 long h;
02294
02295 for (h=0; h < height; h++) {
02296 long w;
02297
02298 #if HAVE_MMX
02299 #if HAVE_SSE2
02300 __asm__(
02301 "xor %%"REG_a", %%"REG_a" \n\t"
02302 "1: \n\t"
02303 PREFETCH" 64(%1, %%"REG_a") \n\t"
02304 PREFETCH" 64(%2, %%"REG_a") \n\t"
02305 "movdqa (%1, %%"REG_a"), %%xmm0 \n\t"
02306 "movdqa (%1, %%"REG_a"), %%xmm1 \n\t"
02307 "movdqa (%2, %%"REG_a"), %%xmm2 \n\t"
02308 "punpcklbw %%xmm2, %%xmm0 \n\t"
02309 "punpckhbw %%xmm2, %%xmm1 \n\t"
02310 "movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t"
02311 "movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t"
02312 "add $16, %%"REG_a" \n\t"
02313 "cmp %3, %%"REG_a" \n\t"
02314 " jb 1b \n\t"
02315 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
02316 : "memory", "%"REG_a""
02317 );
02318 #else
02319 __asm__(
02320 "xor %%"REG_a", %%"REG_a" \n\t"
02321 "1: \n\t"
02322 PREFETCH" 64(%1, %%"REG_a") \n\t"
02323 PREFETCH" 64(%2, %%"REG_a") \n\t"
02324 "movq (%1, %%"REG_a"), %%mm0 \n\t"
02325 "movq 8(%1, %%"REG_a"), %%mm2 \n\t"
02326 "movq %%mm0, %%mm1 \n\t"
02327 "movq %%mm2, %%mm3 \n\t"
02328 "movq (%2, %%"REG_a"), %%mm4 \n\t"
02329 "movq 8(%2, %%"REG_a"), %%mm5 \n\t"
02330 "punpcklbw %%mm4, %%mm0 \n\t"
02331 "punpckhbw %%mm4, %%mm1 \n\t"
02332 "punpcklbw %%mm5, %%mm2 \n\t"
02333 "punpckhbw %%mm5, %%mm3 \n\t"
02334 MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t"
02335 MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t"
02336 MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t"
02337 MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t"
02338 "add $16, %%"REG_a" \n\t"
02339 "cmp %3, %%"REG_a" \n\t"
02340 " jb 1b \n\t"
02341 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
02342 : "memory", "%"REG_a
02343 );
02344 #endif
02345 for (w= (width&(~15)); w < width; w++) {
02346 dest[2*w+0] = src1[w];
02347 dest[2*w+1] = src2[w];
02348 }
02349 #else
02350 for (w=0; w < width; w++) {
02351 dest[2*w+0] = src1[w];
02352 dest[2*w+1] = src2[w];
02353 }
02354 #endif
02355 dest += dstStride;
02356 src1 += src1Stride;
02357 src2 += src2Stride;
02358 }
02359 #if HAVE_MMX
02360 __asm__(
02361 EMMS" \n\t"
02362 SFENCE" \n\t"
02363 ::: "memory"
02364 );
02365 #endif
02366 }
02367
02368 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02369 uint8_t *dst1, uint8_t *dst2,
02370 long width, long height,
02371 long srcStride1, long srcStride2,
02372 long dstStride1, long dstStride2)
02373 {
02374 x86_reg y;
02375 long x,w,h;
02376 w=width/2; h=height/2;
02377 #if HAVE_MMX
02378 __asm__ volatile(
02379 PREFETCH" %0 \n\t"
02380 PREFETCH" %1 \n\t"
02381 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02382 #endif
02383 for (y=0;y<h;y++) {
02384 const uint8_t* s1=src1+srcStride1*(y>>1);
02385 uint8_t* d=dst1+dstStride1*y;
02386 x=0;
02387 #if HAVE_MMX
02388 for (;x<w-31;x+=32) {
02389 __asm__ volatile(
02390 PREFETCH" 32%1 \n\t"
02391 "movq %1, %%mm0 \n\t"
02392 "movq 8%1, %%mm2 \n\t"
02393 "movq 16%1, %%mm4 \n\t"
02394 "movq 24%1, %%mm6 \n\t"
02395 "movq %%mm0, %%mm1 \n\t"
02396 "movq %%mm2, %%mm3 \n\t"
02397 "movq %%mm4, %%mm5 \n\t"
02398 "movq %%mm6, %%mm7 \n\t"
02399 "punpcklbw %%mm0, %%mm0 \n\t"
02400 "punpckhbw %%mm1, %%mm1 \n\t"
02401 "punpcklbw %%mm2, %%mm2 \n\t"
02402 "punpckhbw %%mm3, %%mm3 \n\t"
02403 "punpcklbw %%mm4, %%mm4 \n\t"
02404 "punpckhbw %%mm5, %%mm5 \n\t"
02405 "punpcklbw %%mm6, %%mm6 \n\t"
02406 "punpckhbw %%mm7, %%mm7 \n\t"
02407 MOVNTQ" %%mm0, %0 \n\t"
02408 MOVNTQ" %%mm1, 8%0 \n\t"
02409 MOVNTQ" %%mm2, 16%0 \n\t"
02410 MOVNTQ" %%mm3, 24%0 \n\t"
02411 MOVNTQ" %%mm4, 32%0 \n\t"
02412 MOVNTQ" %%mm5, 40%0 \n\t"
02413 MOVNTQ" %%mm6, 48%0 \n\t"
02414 MOVNTQ" %%mm7, 56%0"
02415 :"=m"(d[2*x])
02416 :"m"(s1[x])
02417 :"memory");
02418 }
02419 #endif
02420 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02421 }
02422 for (y=0;y<h;y++) {
02423 const uint8_t* s2=src2+srcStride2*(y>>1);
02424 uint8_t* d=dst2+dstStride2*y;
02425 x=0;
02426 #if HAVE_MMX
02427 for (;x<w-31;x+=32) {
02428 __asm__ volatile(
02429 PREFETCH" 32%1 \n\t"
02430 "movq %1, %%mm0 \n\t"
02431 "movq 8%1, %%mm2 \n\t"
02432 "movq 16%1, %%mm4 \n\t"
02433 "movq 24%1, %%mm6 \n\t"
02434 "movq %%mm0, %%mm1 \n\t"
02435 "movq %%mm2, %%mm3 \n\t"
02436 "movq %%mm4, %%mm5 \n\t"
02437 "movq %%mm6, %%mm7 \n\t"
02438 "punpcklbw %%mm0, %%mm0 \n\t"
02439 "punpckhbw %%mm1, %%mm1 \n\t"
02440 "punpcklbw %%mm2, %%mm2 \n\t"
02441 "punpckhbw %%mm3, %%mm3 \n\t"
02442 "punpcklbw %%mm4, %%mm4 \n\t"
02443 "punpckhbw %%mm5, %%mm5 \n\t"
02444 "punpcklbw %%mm6, %%mm6 \n\t"
02445 "punpckhbw %%mm7, %%mm7 \n\t"
02446 MOVNTQ" %%mm0, %0 \n\t"
02447 MOVNTQ" %%mm1, 8%0 \n\t"
02448 MOVNTQ" %%mm2, 16%0 \n\t"
02449 MOVNTQ" %%mm3, 24%0 \n\t"
02450 MOVNTQ" %%mm4, 32%0 \n\t"
02451 MOVNTQ" %%mm5, 40%0 \n\t"
02452 MOVNTQ" %%mm6, 48%0 \n\t"
02453 MOVNTQ" %%mm7, 56%0"
02454 :"=m"(d[2*x])
02455 :"m"(s2[x])
02456 :"memory");
02457 }
02458 #endif
02459 for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02460 }
02461 #if HAVE_MMX
02462 __asm__(
02463 EMMS" \n\t"
02464 SFENCE" \n\t"
02465 ::: "memory"
02466 );
02467 #endif
02468 }
02469
02470 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02471 uint8_t *dst,
02472 long width, long height,
02473 long srcStride1, long srcStride2,
02474 long srcStride3, long dstStride)
02475 {
02476 x86_reg x;
02477 long y,w,h;
02478 w=width/2; h=height;
02479 for (y=0;y<h;y++) {
02480 const uint8_t* yp=src1+srcStride1*y;
02481 const uint8_t* up=src2+srcStride2*(y>>2);
02482 const uint8_t* vp=src3+srcStride3*(y>>2);
02483 uint8_t* d=dst+dstStride*y;
02484 x=0;
02485 #if HAVE_MMX
02486 for (;x<w-7;x+=8) {
02487 __asm__ volatile(
02488 PREFETCH" 32(%1, %0) \n\t"
02489 PREFETCH" 32(%2, %0) \n\t"
02490 PREFETCH" 32(%3, %0) \n\t"
02491 "movq (%1, %0, 4), %%mm0 \n\t"
02492 "movq (%2, %0), %%mm1 \n\t"
02493 "movq (%3, %0), %%mm2 \n\t"
02494 "movq %%mm0, %%mm3 \n\t"
02495 "movq %%mm1, %%mm4 \n\t"
02496 "movq %%mm2, %%mm5 \n\t"
02497 "punpcklbw %%mm1, %%mm1 \n\t"
02498 "punpcklbw %%mm2, %%mm2 \n\t"
02499 "punpckhbw %%mm4, %%mm4 \n\t"
02500 "punpckhbw %%mm5, %%mm5 \n\t"
02501
02502 "movq %%mm1, %%mm6 \n\t"
02503 "punpcklbw %%mm2, %%mm1 \n\t"
02504 "punpcklbw %%mm1, %%mm0 \n\t"
02505 "punpckhbw %%mm1, %%mm3 \n\t"
02506 MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
02507 MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
02508
02509 "punpckhbw %%mm2, %%mm6 \n\t"
02510 "movq 8(%1, %0, 4), %%mm0 \n\t"
02511 "movq %%mm0, %%mm3 \n\t"
02512 "punpcklbw %%mm6, %%mm0 \n\t"
02513 "punpckhbw %%mm6, %%mm3 \n\t"
02514 MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
02515 MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
02516
02517 "movq %%mm4, %%mm6 \n\t"
02518 "movq 16(%1, %0, 4), %%mm0 \n\t"
02519 "movq %%mm0, %%mm3 \n\t"
02520 "punpcklbw %%mm5, %%mm4 \n\t"
02521 "punpcklbw %%mm4, %%mm0 \n\t"
02522 "punpckhbw %%mm4, %%mm3 \n\t"
02523 MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
02524 MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
02525
02526 "punpckhbw %%mm5, %%mm6 \n\t"
02527 "movq 24(%1, %0, 4), %%mm0 \n\t"
02528 "movq %%mm0, %%mm3 \n\t"
02529 "punpcklbw %%mm6, %%mm0 \n\t"
02530 "punpckhbw %%mm6, %%mm3 \n\t"
02531 MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
02532 MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
02533
02534 : "+r" (x)
02535 : "r"(yp), "r" (up), "r"(vp), "r"(d)
02536 :"memory");
02537 }
02538 #endif
02539 for (; x<w; x++) {
02540 const long x2 = x<<2;
02541 d[8*x+0] = yp[x2];
02542 d[8*x+1] = up[x];
02543 d[8*x+2] = yp[x2+1];
02544 d[8*x+3] = vp[x];
02545 d[8*x+4] = yp[x2+2];
02546 d[8*x+5] = up[x];
02547 d[8*x+6] = yp[x2+3];
02548 d[8*x+7] = vp[x];
02549 }
02550 }
02551 #if HAVE_MMX
02552 __asm__(
02553 EMMS" \n\t"
02554 SFENCE" \n\t"
02555 ::: "memory"
02556 );
02557 #endif
02558 }
02559
02560 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
02561 {
02562 dst += count;
02563 src += 2*count;
02564 count= - count;
02565
02566 #if HAVE_MMX
02567 if(count <= -16) {
02568 count += 15;
02569 __asm__ volatile(
02570 "pcmpeqw %%mm7, %%mm7 \n\t"
02571 "psrlw $8, %%mm7 \n\t"
02572 "1: \n\t"
02573 "movq -30(%1, %0, 2), %%mm0 \n\t"
02574 "movq -22(%1, %0, 2), %%mm1 \n\t"
02575 "movq -14(%1, %0, 2), %%mm2 \n\t"
02576 "movq -6(%1, %0, 2), %%mm3 \n\t"
02577 "pand %%mm7, %%mm0 \n\t"
02578 "pand %%mm7, %%mm1 \n\t"
02579 "pand %%mm7, %%mm2 \n\t"
02580 "pand %%mm7, %%mm3 \n\t"
02581 "packuswb %%mm1, %%mm0 \n\t"
02582 "packuswb %%mm3, %%mm2 \n\t"
02583 MOVNTQ" %%mm0,-15(%2, %0) \n\t"
02584 MOVNTQ" %%mm2,- 7(%2, %0) \n\t"
02585 "add $16, %0 \n\t"
02586 " js 1b \n\t"
02587 : "+r"(count)
02588 : "r"(src), "r"(dst)
02589 );
02590 count -= 15;
02591 }
02592 #endif
02593 while(count<0) {
02594 dst[count]= src[2*count];
02595 count++;
02596 }
02597 }
02598
02599 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02600 {
02601 dst0+= count;
02602 dst1+= count;
02603 src += 4*count;
02604 count= - count;
02605 #if HAVE_MMX
02606 if(count <= -8) {
02607 count += 7;
02608 __asm__ volatile(
02609 "pcmpeqw %%mm7, %%mm7 \n\t"
02610 "psrlw $8, %%mm7 \n\t"
02611 "1: \n\t"
02612 "movq -28(%1, %0, 4), %%mm0 \n\t"
02613 "movq -20(%1, %0, 4), %%mm1 \n\t"
02614 "movq -12(%1, %0, 4), %%mm2 \n\t"
02615 "movq -4(%1, %0, 4), %%mm3 \n\t"
02616 "pand %%mm7, %%mm0 \n\t"
02617 "pand %%mm7, %%mm1 \n\t"
02618 "pand %%mm7, %%mm2 \n\t"
02619 "pand %%mm7, %%mm3 \n\t"
02620 "packuswb %%mm1, %%mm0 \n\t"
02621 "packuswb %%mm3, %%mm2 \n\t"
02622 "movq %%mm0, %%mm1 \n\t"
02623 "movq %%mm2, %%mm3 \n\t"
02624 "psrlw $8, %%mm0 \n\t"
02625 "psrlw $8, %%mm2 \n\t"
02626 "pand %%mm7, %%mm1 \n\t"
02627 "pand %%mm7, %%mm3 \n\t"
02628 "packuswb %%mm2, %%mm0 \n\t"
02629 "packuswb %%mm3, %%mm1 \n\t"
02630 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02631 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02632 "add $8, %0 \n\t"
02633 " js 1b \n\t"
02634 : "+r"(count)
02635 : "r"(src), "r"(dst0), "r"(dst1)
02636 );
02637 count -= 7;
02638 }
02639 #endif
02640 while(count<0) {
02641 dst0[count]= src[4*count+0];
02642 dst1[count]= src[4*count+2];
02643 count++;
02644 }
02645 }
02646
02647 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02648 {
02649 dst0 += count;
02650 dst1 += count;
02651 src0 += 4*count;
02652 src1 += 4*count;
02653 count= - count;
02654 #ifdef PAVGB
02655 if(count <= -8) {
02656 count += 7;
02657 __asm__ volatile(
02658 "pcmpeqw %%mm7, %%mm7 \n\t"
02659 "psrlw $8, %%mm7 \n\t"
02660 "1: \n\t"
02661 "movq -28(%1, %0, 4), %%mm0 \n\t"
02662 "movq -20(%1, %0, 4), %%mm1 \n\t"
02663 "movq -12(%1, %0, 4), %%mm2 \n\t"
02664 "movq -4(%1, %0, 4), %%mm3 \n\t"
02665 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02666 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02667 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02668 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02669 "pand %%mm7, %%mm0 \n\t"
02670 "pand %%mm7, %%mm1 \n\t"
02671 "pand %%mm7, %%mm2 \n\t"
02672 "pand %%mm7, %%mm3 \n\t"
02673 "packuswb %%mm1, %%mm0 \n\t"
02674 "packuswb %%mm3, %%mm2 \n\t"
02675 "movq %%mm0, %%mm1 \n\t"
02676 "movq %%mm2, %%mm3 \n\t"
02677 "psrlw $8, %%mm0 \n\t"
02678 "psrlw $8, %%mm2 \n\t"
02679 "pand %%mm7, %%mm1 \n\t"
02680 "pand %%mm7, %%mm3 \n\t"
02681 "packuswb %%mm2, %%mm0 \n\t"
02682 "packuswb %%mm3, %%mm1 \n\t"
02683 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02684 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02685 "add $8, %0 \n\t"
02686 " js 1b \n\t"
02687 : "+r"(count)
02688 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02689 );
02690 count -= 7;
02691 }
02692 #endif
02693 while(count<0) {
02694 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02695 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02696 count++;
02697 }
02698 }
02699
02700 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02701 {
02702 dst0+= count;
02703 dst1+= count;
02704 src += 4*count;
02705 count= - count;
02706 #if HAVE_MMX
02707 if(count <= -8) {
02708 count += 7;
02709 __asm__ volatile(
02710 "pcmpeqw %%mm7, %%mm7 \n\t"
02711 "psrlw $8, %%mm7 \n\t"
02712 "1: \n\t"
02713 "movq -28(%1, %0, 4), %%mm0 \n\t"
02714 "movq -20(%1, %0, 4), %%mm1 \n\t"
02715 "movq -12(%1, %0, 4), %%mm2 \n\t"
02716 "movq -4(%1, %0, 4), %%mm3 \n\t"
02717 "psrlw $8, %%mm0 \n\t"
02718 "psrlw $8, %%mm1 \n\t"
02719 "psrlw $8, %%mm2 \n\t"
02720 "psrlw $8, %%mm3 \n\t"
02721 "packuswb %%mm1, %%mm0 \n\t"
02722 "packuswb %%mm3, %%mm2 \n\t"
02723 "movq %%mm0, %%mm1 \n\t"
02724 "movq %%mm2, %%mm3 \n\t"
02725 "psrlw $8, %%mm0 \n\t"
02726 "psrlw $8, %%mm2 \n\t"
02727 "pand %%mm7, %%mm1 \n\t"
02728 "pand %%mm7, %%mm3 \n\t"
02729 "packuswb %%mm2, %%mm0 \n\t"
02730 "packuswb %%mm3, %%mm1 \n\t"
02731 MOVNTQ" %%mm0,- 7(%3, %0) \n\t"
02732 MOVNTQ" %%mm1,- 7(%2, %0) \n\t"
02733 "add $8, %0 \n\t"
02734 " js 1b \n\t"
02735 : "+r"(count)
02736 : "r"(src), "r"(dst0), "r"(dst1)
02737 );
02738 count -= 7;
02739 }
02740 #endif
02741 src++;
02742 while(count<0) {
02743 dst0[count]= src[4*count+0];
02744 dst1[count]= src[4*count+2];
02745 count++;
02746 }
02747 }
02748
02749 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
02750 {
02751 dst0 += count;
02752 dst1 += count;
02753 src0 += 4*count;
02754 src1 += 4*count;
02755 count= - count;
02756 #ifdef PAVGB
02757 if(count <= -8) {
02758 count += 7;
02759 __asm__ volatile(
02760 "pcmpeqw %%mm7, %%mm7 \n\t"
02761 "psrlw $8, %%mm7 \n\t"
02762 "1: \n\t"
02763 "movq -28(%1, %0, 4), %%mm0 \n\t"
02764 "movq -20(%1, %0, 4), %%mm1 \n\t"
02765 "movq -12(%1, %0, 4), %%mm2 \n\t"
02766 "movq -4(%1, %0, 4), %%mm3 \n\t"
02767 PAVGB" -28(%2, %0, 4), %%mm0 \n\t"
02768 PAVGB" -20(%2, %0, 4), %%mm1 \n\t"
02769 PAVGB" -12(%2, %0, 4), %%mm2 \n\t"
02770 PAVGB" - 4(%2, %0, 4), %%mm3 \n\t"
02771 "psrlw $8, %%mm0 \n\t"
02772 "psrlw $8, %%mm1 \n\t"
02773 "psrlw $8, %%mm2 \n\t"
02774 "psrlw $8, %%mm3 \n\t"
02775 "packuswb %%mm1, %%mm0 \n\t"
02776 "packuswb %%mm3, %%mm2 \n\t"
02777 "movq %%mm0, %%mm1 \n\t"
02778 "movq %%mm2, %%mm3 \n\t"
02779 "psrlw $8, %%mm0 \n\t"
02780 "psrlw $8, %%mm2 \n\t"
02781 "pand %%mm7, %%mm1 \n\t"
02782 "pand %%mm7, %%mm3 \n\t"
02783 "packuswb %%mm2, %%mm0 \n\t"
02784 "packuswb %%mm3, %%mm1 \n\t"
02785 MOVNTQ" %%mm0,- 7(%4, %0) \n\t"
02786 MOVNTQ" %%mm1,- 7(%3, %0) \n\t"
02787 "add $8, %0 \n\t"
02788 " js 1b \n\t"
02789 : "+r"(count)
02790 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
02791 );
02792 count -= 7;
02793 }
02794 #endif
02795 src0++;
02796 src1++;
02797 while(count<0) {
02798 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
02799 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
02800 count++;
02801 }
02802 }
02803
02804 static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02805 long width, long height,
02806 long lumStride, long chromStride, long srcStride)
02807 {
02808 long y;
02809 const long chromWidth= -((-width)>>1);
02810
02811 for (y=0; y<height; y++) {
02812 RENAME(extract_even)(src, ydst, width);
02813 if(y&1) {
02814 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
02815 udst+= chromStride;
02816 vdst+= chromStride;
02817 }
02818
02819 src += srcStride;
02820 ydst+= lumStride;
02821 }
02822 #if HAVE_MMX
02823 __asm__(
02824 EMMS" \n\t"
02825 SFENCE" \n\t"
02826 ::: "memory"
02827 );
02828 #endif
02829 }
02830
02831 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02832 long width, long height,
02833 long lumStride, long chromStride, long srcStride)
02834 {
02835 long y;
02836 const long chromWidth= -((-width)>>1);
02837
02838 for (y=0; y<height; y++) {
02839 RENAME(extract_even)(src, ydst, width);
02840 RENAME(extract_odd2)(src, udst, vdst, chromWidth);
02841
02842 src += srcStride;
02843 ydst+= lumStride;
02844 udst+= chromStride;
02845 vdst+= chromStride;
02846 }
02847 #if HAVE_MMX
02848 __asm__(
02849 EMMS" \n\t"
02850 SFENCE" \n\t"
02851 ::: "memory"
02852 );
02853 #endif
02854 }
02855
02856 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02857 long width, long height,
02858 long lumStride, long chromStride, long srcStride)
02859 {
02860 long y;
02861 const long chromWidth= -((-width)>>1);
02862
02863 for (y=0; y<height; y++) {
02864 RENAME(extract_even)(src+1, ydst, width);
02865 if(y&1) {
02866 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
02867 udst+= chromStride;
02868 vdst+= chromStride;
02869 }
02870
02871 src += srcStride;
02872 ydst+= lumStride;
02873 }
02874 #if HAVE_MMX
02875 __asm__(
02876 EMMS" \n\t"
02877 SFENCE" \n\t"
02878 ::: "memory"
02879 );
02880 #endif
02881 }
02882
02883 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
02884 long width, long height,
02885 long lumStride, long chromStride, long srcStride)
02886 {
02887 long y;
02888 const long chromWidth= -((-width)>>1);
02889
02890 for (y=0; y<height; y++) {
02891 RENAME(extract_even)(src+1, ydst, width);
02892 RENAME(extract_even2)(src, udst, vdst, chromWidth);
02893
02894 src += srcStride;
02895 ydst+= lumStride;
02896 udst+= chromStride;
02897 vdst+= chromStride;
02898 }
02899 #if HAVE_MMX
02900 __asm__(
02901 EMMS" \n\t"
02902 SFENCE" \n\t"
02903 ::: "memory"
02904 );
02905 #endif
02906 }
02907
02908 static inline void RENAME(rgb2rgb_init)(void)
02909 {
02910 rgb15to16 = RENAME(rgb15to16);
02911 rgb15tobgr24 = RENAME(rgb15tobgr24);
02912 rgb15to32 = RENAME(rgb15to32);
02913 rgb16tobgr24 = RENAME(rgb16tobgr24);
02914 rgb16to32 = RENAME(rgb16to32);
02915 rgb16to15 = RENAME(rgb16to15);
02916 rgb24tobgr16 = RENAME(rgb24tobgr16);
02917 rgb24tobgr15 = RENAME(rgb24tobgr15);
02918 rgb24tobgr32 = RENAME(rgb24tobgr32);
02919 rgb32to16 = RENAME(rgb32to16);
02920 rgb32to15 = RENAME(rgb32to15);
02921 rgb32tobgr24 = RENAME(rgb32tobgr24);
02922 rgb24to15 = RENAME(rgb24to15);
02923 rgb24to16 = RENAME(rgb24to16);
02924 rgb24tobgr24 = RENAME(rgb24tobgr24);
02925 rgb32tobgr32 = RENAME(rgb32tobgr32);
02926 rgb32tobgr16 = RENAME(rgb32tobgr16);
02927 rgb32tobgr15 = RENAME(rgb32tobgr15);
02928 yv12toyuy2 = RENAME(yv12toyuy2);
02929 yv12touyvy = RENAME(yv12touyvy);
02930 yuv422ptoyuy2 = RENAME(yuv422ptoyuy2);
02931 yuv422ptouyvy = RENAME(yuv422ptouyvy);
02932 yuy2toyv12 = RENAME(yuy2toyv12);
02933
02934 planar2x = RENAME(planar2x);
02935 rgb24toyv12 = RENAME(rgb24toyv12);
02936 interleaveBytes = RENAME(interleaveBytes);
02937 vu9_to_vu12 = RENAME(vu9_to_vu12);
02938 yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
02939
02940 uyvytoyuv420 = RENAME(uyvytoyuv420);
02941 uyvytoyuv422 = RENAME(uyvytoyuv422);
02942 yuyvtoyuv420 = RENAME(yuyvtoyuv420);
02943 yuyvtoyuv422 = RENAME(yuyvtoyuv422);
02944 }