86 register const uint8_t *s = src;
88 const uint8_t *mm_end = end - 3;
91 register unsigned x = *((
const uint32_t *)s);
92 *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
97 register unsigned short x = *((
const uint16_t *)s);
98 *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
105 register const uint8_t *s = src;
107 const uint8_t *mm_end = end - 3;
110 register uint32_t x = *((
const uint32_t *)s);
111 *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
116 register uint16_t x = *((
const uint16_t *)s);
117 *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
123 uint16_t *d = (uint16_t *)dst;
128 register int rgb = *(
const uint32_t *)s;
130 *d++ = ((rgb & 0xFF) >> 3) +
131 ((rgb & 0xFC00) >> 5) +
132 ((rgb & 0xF80000) >> 8);
139 uint16_t *d = (uint16_t *)dst;
144 register int rgb = *(
const uint32_t *)s;
146 *d++ = ((rgb & 0xF8) << 8) +
147 ((rgb & 0xFC00) >> 5) +
148 ((rgb & 0xF80000) >> 19);
154 uint16_t *d = (uint16_t *)dst;
159 register int rgb = *(
const uint32_t *)s;
161 *d++ = ((rgb & 0xFF) >> 3) +
162 ((rgb & 0xF800) >> 6) +
163 ((rgb & 0xF80000) >> 9);
170 uint16_t *d = (uint16_t *)dst;
175 register int rgb = *(
const uint32_t *)s;
177 *d++ = ((rgb & 0xF8) << 7) +
178 ((rgb & 0xF800) >> 6) +
179 ((rgb & 0xF80000) >> 19);
186 uint16_t *d = (uint16_t *)dst;
194 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
200 uint16_t *d = (uint16_t *)dst;
208 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
215 uint16_t *d = (uint16_t *)dst;
223 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
229 uint16_t *d = (uint16_t *)dst;
237 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
245 const uint16_t *s = (
const uint16_t *)src;
246 const uint16_t *
end = s + src_size / 2;
249 register uint16_t bgr = *s++;
250 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
251 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
252 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
260 const uint16_t *s = (
const uint16_t *)src;
261 const uint16_t *
end = s + src_size / 2;
264 register uint16_t bgr = *s++;
265 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
266 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
267 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
274 const uint16_t *s = (
const uint16_t *)src;
275 const uint16_t *
end = s + src_size / 2;
278 register uint16_t bgr = *s++;
281 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
282 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
283 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
285 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
286 *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
287 *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
296 const uint16_t *s = (
const uint16_t *)src;
297 const uint16_t *
end = s + src_size / 2;
300 register uint16_t bgr = *s++;
303 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
304 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
305 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
307 *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
308 *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
309 *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
318 int idx = 15 - src_size;
322 for (; idx < 15; idx += 4) {
323 register int v = *(
const uint32_t *)&s[idx],
g = v & 0xff00ff00;
325 *(uint32_t *)&d[idx] = (v >> 16) +
g + (v << 16);
333 for (i = 0; i < src_size; i += 3) {
334 register uint8_t x = src[i + 2];
335 dst[i + 1] = src[i + 1];
336 dst[i + 2] = src[i + 0];
344 int lumStride,
int chromStride,
345 int dstStride,
int vertLumPerChroma)
348 const int chromWidth = width >> 1;
350 for (y = 0; y <
height; y++) {
352 uint64_t *ldst = (uint64_t *)dst;
353 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
354 for (i = 0; i < chromWidth; i += 2) {
355 uint64_t k = yc[0] + (uc[0] << 8) +
356 (yc[1] << 16) + (unsigned)(vc[0] << 24);
357 uint64_t l = yc[2] + (uc[1] << 8) +
358 (yc[3] << 16) + (unsigned)(vc[1] << 24);
359 *ldst++ = k + (l << 32);
367 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
369 for (i = 0; i < chromWidth; i++) {
371 *idst++ = (yc[0] << 24) + (uc[0] << 16) +
372 (yc[1] << 8) + (vc[0] << 0);
374 *idst++ = yc[0] + (uc[0] << 8) +
375 (yc[1] << 16) + (vc[0] << 24);
382 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
398 int chromStride,
int dstStride)
402 chromStride, dstStride, 2);
408 int lumStride,
int chromStride,
409 int dstStride,
int vertLumPerChroma)
412 const int chromWidth = width >> 1;
414 for (y = 0; y <
height; y++) {
416 uint64_t *ldst = (uint64_t *)dst;
417 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
418 for (i = 0; i < chromWidth; i += 2) {
419 uint64_t k = uc[0] + (yc[0] << 8) +
420 (vc[0] << 16) + (unsigned)(yc[1] << 24);
421 uint64_t l = uc[1] + (yc[2] << 8) +
422 (vc[1] << 16) + (unsigned)(yc[3] << 24);
423 *ldst++ = k + (l << 32);
431 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
433 for (i = 0; i < chromWidth; i++) {
435 *idst++ = (uc[0] << 24) + (yc[0] << 16) +
436 (vc[0] << 8) + (yc[1] << 0);
438 *idst++ = uc[0] + (yc[0] << 8) +
439 (vc[0] << 16) + (yc[1] << 24);
446 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
462 int chromStride,
int dstStride)
466 chromStride, dstStride, 2);
475 int chromStride,
int dstStride)
478 chromStride, dstStride, 1);
487 int chromStride,
int dstStride)
490 chromStride, dstStride, 1);
500 int chromStride,
int srcStride)
503 const int chromWidth = width >> 1;
505 for (y = 0; y <
height; y += 2) {
507 for (i = 0; i < chromWidth; i++) {
508 ydst[2 * i + 0] = src[4 * i + 0];
509 udst[i] = src[4 * i + 1];
510 ydst[2 * i + 1] = src[4 * i + 2];
511 vdst[i] = src[4 * i + 3];
516 for (i = 0; i < chromWidth; i++) {
517 ydst[2 * i + 0] = src[4 * i + 0];
518 ydst[2 * i + 1] = src[4 * i + 2];
528 int srcHeight,
int srcStride,
int dstStride)
535 for (x = 0; x < srcWidth - 1; x++) {
536 dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
537 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
539 dst[2 * srcWidth - 1] = src[srcWidth - 1];
543 for (y = 1; y < srcHeight; y++) {
544 const int mmxSize = 1;
546 dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
547 dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
549 for (x = mmxSize - 1; x < srcWidth - 1; x++) {
550 dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
551 dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
552 dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
553 dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
555 dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
556 dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
558 dst += dstStride * 2;
565 for (x = 0; x < srcWidth - 1; x++) {
566 dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
567 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
569 dst[2 * srcWidth - 1] = src[srcWidth - 1];
581 int chromStride,
int srcStride)
584 const int chromWidth = width >> 1;
586 for (y = 0; y <
height; y += 2) {
588 for (i = 0; i < chromWidth; i++) {
589 udst[i] = src[4 * i + 0];
590 ydst[2 * i + 0] = src[4 * i + 1];
591 vdst[i] = src[4 * i + 2];
592 ydst[2 * i + 1] = src[4 * i + 3];
597 for (i = 0; i < chromWidth; i++) {
598 ydst[2 * i + 0] = src[4 * i + 1];
599 ydst[2 * i + 1] = src[4 * i + 3];
617 int chromStride,
int srcStride)
620 const int chromWidth = width >> 1;
622 for (y = 0; y <
height; y += 2) {
624 for (i = 0; i < chromWidth; i++) {
625 unsigned int b = src[6 * i + 0];
626 unsigned int g = src[6 * i + 1];
627 unsigned int r = src[6 * i + 2];
650 for (i = 0; i < chromWidth; i++) {
651 unsigned int b = src[6 * i + 0];
652 unsigned int g = src[6 * i + 1];
653 unsigned int r = src[6 * i + 2];
675 int src1Stride,
int src2Stride,
int dstStride)
679 for (h = 0; h <
height; h++) {
681 for (w = 0; w <
width; w++) {
682 dest[2 * w + 0] = src1[w];
683 dest[2 * w + 1] = src2[w];
694 int srcStride1,
int srcStride2,
695 int dstStride1,
int dstStride2)
701 for (y = 0; y < h; y++) {
702 const uint8_t *
s1 = src1 + srcStride1 * (y >> 1);
703 uint8_t *d = dst1 + dstStride1 * y;
704 for (x = 0; x < w; x++)
705 d[2 * x] = d[2 * x + 1] = s1[x];
707 for (y = 0; y < h; y++) {
708 const uint8_t *
s2 = src2 + srcStride2 * (y >> 1);
709 uint8_t *d = dst2 + dstStride2 * y;
710 for (x = 0; x < w; x++)
711 d[2 * x] = d[2 * x + 1] = s2[x];
718 int srcStride1,
int srcStride2,
719 int srcStride3,
int dstStride)
725 for (y = 0; y < h; y++) {
726 const uint8_t *yp = src1 + srcStride1 * y;
727 const uint8_t *up = src2 + srcStride2 * (y >> 2);
728 const uint8_t *vp = src3 + srcStride3 * (y >> 2);
729 uint8_t *d = dst + dstStride * y;
730 for (x = 0; x < w; x++) {
731 const int x2 = x << 2;
732 d[8 * x + 0] = yp[x2];
733 d[8 * x + 1] = up[x];
734 d[8 * x + 2] = yp[x2 + 1];
735 d[8 * x + 3] = vp[x];
736 d[8 * x + 4] = yp[x2 + 2];
737 d[8 * x + 5] = up[x];
738 d[8 * x + 6] = yp[x2 + 3];
739 d[8 * x + 7] = vp[x];
750 dst[count] = src[2 * count];
763 dst0[count] = src[4 * count + 0];
764 dst1[count] = src[4 * count + 2];
778 dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
779 dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
793 dst0[count] = src[4 * count + 0];
794 dst1[count] = src[4 * count + 2];
810 dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
811 dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
818 int lumStride,
int chromStride,
int srcStride)
821 const int chromWidth = -((-
width) >> 1);
823 for (y = 0; y <
height; y++) {
838 int lumStride,
int chromStride,
int srcStride)
841 const int chromWidth = -((-
width) >> 1);
843 for (y = 0; y <
height; y++) {
856 int lumStride,
int chromStride,
int srcStride)
859 const int chromWidth = -((-
width) >> 1);
861 for (y = 0; y <
height; y++) {
876 int lumStride,
int chromStride,
int srcStride)
879 const int chromWidth = -((-
width) >> 1);
881 for (y = 0; y <
height; y++) {