00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093
00094 #include "config.h"
00095 #include "libswscale/rgb2rgb.h"
00096 #include "libswscale/swscale.h"
00097 #include "libswscale/swscale_internal.h"
00098 #include "libavutil/cpu.h"
00099 #include "libavutil/pixdesc.h"
00100 #include "yuv2rgb_altivec.h"
00101
00102 #undef PROFILE_THE_BEAST
00103 #undef INC_SCALING
00104
00105 typedef unsigned char ubyte;
00106 typedef signed char sbyte;
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 static const vector unsigned char
00145 perm_rgb_0 = { 0x00, 0x01, 0x10, 0x02, 0x03, 0x11, 0x04, 0x05,
00146 0x12, 0x06, 0x07, 0x13, 0x08, 0x09, 0x14, 0x0a },
00147 perm_rgb_1 = { 0x0b, 0x15, 0x0c, 0x0d, 0x16, 0x0e, 0x0f, 0x17,
00148 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
00149 perm_rgb_2 = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
00150 0x00, 0x01, 0x18, 0x02, 0x03, 0x19, 0x04, 0x05 },
00151 perm_rgb_3 = { 0x1a, 0x06, 0x07, 0x1b, 0x08, 0x09, 0x1c, 0x0a,
00152 0x0b, 0x1d, 0x0c, 0x0d, 0x1e, 0x0e, 0x0f, 0x1f };
00153
00154 #define vec_merge3(x2, x1, x0, y0, y1, y2) \
00155 do { \
00156 __typeof__(x0) o0, o2, o3; \
00157 o0 = vec_mergeh(x0, x1); \
00158 y0 = vec_perm(o0, x2, perm_rgb_0); \
00159 o2 = vec_perm(o0, x2, perm_rgb_1); \
00160 o3 = vec_mergel(x0, x1); \
00161 y1 = vec_perm(o3, o2, perm_rgb_2); \
00162 y2 = vec_perm(o3, o2, perm_rgb_3); \
00163 } while (0)
00164
00165 #define vec_mstbgr24(x0, x1, x2, ptr) \
00166 do { \
00167 __typeof__(x0) _0, _1, _2; \
00168 vec_merge3(x0, x1, x2, _0, _1, _2); \
00169 vec_st(_0, 0, ptr++); \
00170 vec_st(_1, 0, ptr++); \
00171 vec_st(_2, 0, ptr++); \
00172 } while (0)
00173
00174 #define vec_mstrgb24(x0, x1, x2, ptr) \
00175 do { \
00176 __typeof__(x0) _0, _1, _2; \
00177 vec_merge3(x2, x1, x0, _0, _1, _2); \
00178 vec_st(_0, 0, ptr++); \
00179 vec_st(_1, 0, ptr++); \
00180 vec_st(_2, 0, ptr++); \
00181 } while (0)
00182
00183
00184
00185
00186
00187 #define vec_mstrgb32(T, x0, x1, x2, x3, ptr) \
00188 do { \
00189 T _0, _1, _2, _3; \
00190 _0 = vec_mergeh(x0, x1); \
00191 _1 = vec_mergeh(x2, x3); \
00192 _2 = (T) vec_mergeh((vector unsigned short) _0, \
00193 (vector unsigned short) _1); \
00194 _3 = (T) vec_mergel((vector unsigned short) _0, \
00195 (vector unsigned short) _1); \
00196 vec_st(_2, 0 * 16, (T *) ptr); \
00197 vec_st(_3, 1 * 16, (T *) ptr); \
00198 _0 = vec_mergel(x0, x1); \
00199 _1 = vec_mergel(x2, x3); \
00200 _2 = (T) vec_mergeh((vector unsigned short) _0, \
00201 (vector unsigned short) _1); \
00202 _3 = (T) vec_mergel((vector unsigned short) _0, \
00203 (vector unsigned short) _1); \
00204 vec_st(_2, 2 * 16, (T *) ptr); \
00205 vec_st(_3, 3 * 16, (T *) ptr); \
00206 ptr += 4; \
00207 } while (0)
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 #define vec_unh(x) \
00223 (vector signed short) \
00224 vec_perm(x, (__typeof__(x)) { 0 }, \
00225 ((vector unsigned char) { \
00226 0x10, 0x00, 0x10, 0x01, 0x10, 0x02, 0x10, 0x03, \
00227 0x10, 0x04, 0x10, 0x05, 0x10, 0x06, 0x10, 0x07 }))
00228
00229 #define vec_unl(x) \
00230 (vector signed short) \
00231 vec_perm(x, (__typeof__(x)) { 0 }, \
00232 ((vector unsigned char) { \
00233 0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B, \
00234 0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F }))
00235
00236 #define vec_clip_s16(x) \
00237 vec_max(vec_min(x, ((vector signed short) { \
00238 235, 235, 235, 235, 235, 235, 235, 235 })), \
00239 ((vector signed short) { 16, 16, 16, 16, 16, 16, 16, 16 }))
00240
00241 #define vec_packclp(x, y) \
00242 (vector unsigned char) \
00243 vec_packs((vector unsigned short) \
00244 vec_max(x, ((vector signed short) { 0 })), \
00245 (vector unsigned short) \
00246 vec_max(y, ((vector signed short) { 0 })))
00247
00248
00249
00250 static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
00251 vector signed short U, vector signed short V,
00252 vector signed short *R, vector signed short *G,
00253 vector signed short *B)
00254 {
00255 vector signed short vx, ux, uvx;
00256
00257 Y = vec_mradds(Y, c->CY, c->OY);
00258 U = vec_sub(U, (vector signed short)
00259 vec_splat((vector signed short) { 128 }, 0));
00260 V = vec_sub(V, (vector signed short)
00261 vec_splat((vector signed short) { 128 }, 0));
00262
00263
00264 ux = vec_sl(U, c->CSHIFT);
00265 *B = vec_mradds(ux, c->CBU, Y);
00266
00267
00268 vx = vec_sl(V, c->CSHIFT);
00269 *R = vec_mradds(vx, c->CRV, Y);
00270
00271
00272 uvx = vec_mradds(U, c->CGU, Y);
00273 *G = vec_mradds(V, c->CGV, uvx);
00274 }
00275
00276
00277
00278
00279
00280
00281
00282 #define DEFCSP420_CVT(name, out_pixels) \
00283 static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
00284 int *instrides, int srcSliceY, int srcSliceH, \
00285 unsigned char **oplanes, int *outstrides) \
00286 { \
00287 int w = c->srcW; \
00288 int h = srcSliceH; \
00289 int i, j; \
00290 int instrides_scl[3]; \
00291 vector unsigned char y0, y1; \
00292 \
00293 vector signed char u, v; \
00294 \
00295 vector signed short Y0, Y1, Y2, Y3; \
00296 vector signed short U, V; \
00297 vector signed short vx, ux, uvx; \
00298 vector signed short vx0, ux0, uvx0; \
00299 vector signed short vx1, ux1, uvx1; \
00300 vector signed short R0, G0, B0; \
00301 vector signed short R1, G1, B1; \
00302 vector unsigned char R, G, B; \
00303 \
00304 const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00305 vector unsigned char align_perm; \
00306 \
00307 vector signed short lCY = c->CY; \
00308 vector signed short lOY = c->OY; \
00309 vector signed short lCRV = c->CRV; \
00310 vector signed short lCBU = c->CBU; \
00311 vector signed short lCGU = c->CGU; \
00312 vector signed short lCGV = c->CGV; \
00313 vector unsigned short lCSHIFT = c->CSHIFT; \
00314 \
00315 const ubyte *y1i = in[0]; \
00316 const ubyte *y2i = in[0] + instrides[0]; \
00317 const ubyte *ui = in[1]; \
00318 const ubyte *vi = in[2]; \
00319 \
00320 vector unsigned char *oute = \
00321 (vector unsigned char *) \
00322 (oplanes[0] + srcSliceY * outstrides[0]); \
00323 vector unsigned char *outo = \
00324 (vector unsigned char *) \
00325 (oplanes[0] + srcSliceY * outstrides[0] + outstrides[0]); \
00326 \
00327 \
00328 instrides_scl[0] = instrides[0] * 2 - w; \
00329 \
00330 instrides_scl[1] = instrides[1] - w / 2; \
00331 \
00332 instrides_scl[2] = instrides[2] - w / 2; \
00333 \
00334 for (i = 0; i < h / 2; i++) { \
00335 vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \
00336 vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \
00337 \
00338 for (j = 0; j < w / 16; j++) { \
00339 y1ivP = (const vector unsigned char *) y1i; \
00340 y2ivP = (const vector unsigned char *) y2i; \
00341 uivP = (const vector unsigned char *) ui; \
00342 vivP = (const vector unsigned char *) vi; \
00343 \
00344 align_perm = vec_lvsl(0, y1i); \
00345 y0 = (vector unsigned char) \
00346 vec_perm(y1ivP[0], y1ivP[1], align_perm); \
00347 \
00348 align_perm = vec_lvsl(0, y2i); \
00349 y1 = (vector unsigned char) \
00350 vec_perm(y2ivP[0], y2ivP[1], align_perm); \
00351 \
00352 align_perm = vec_lvsl(0, ui); \
00353 u = (vector signed char) \
00354 vec_perm(uivP[0], uivP[1], align_perm); \
00355 \
00356 align_perm = vec_lvsl(0, vi); \
00357 v = (vector signed char) \
00358 vec_perm(vivP[0], vivP[1], align_perm); \
00359 \
00360 u = (vector signed char) \
00361 vec_sub(u, \
00362 (vector signed char) \
00363 vec_splat((vector signed char) { 128 }, 0)); \
00364 v = (vector signed char) \
00365 vec_sub(v, \
00366 (vector signed char) \
00367 vec_splat((vector signed char) { 128 }, 0)); \
00368 \
00369 U = vec_unpackh(u); \
00370 V = vec_unpackh(v); \
00371 \
00372 Y0 = vec_unh(y0); \
00373 Y1 = vec_unl(y0); \
00374 Y2 = vec_unh(y1); \
00375 Y3 = vec_unl(y1); \
00376 \
00377 Y0 = vec_mradds(Y0, lCY, lOY); \
00378 Y1 = vec_mradds(Y1, lCY, lOY); \
00379 Y2 = vec_mradds(Y2, lCY, lOY); \
00380 Y3 = vec_mradds(Y3, lCY, lOY); \
00381 \
00382 \
00383 ux = vec_sl(U, lCSHIFT); \
00384 ux = vec_mradds(ux, lCBU, (vector signed short) { 0 }); \
00385 ux0 = vec_mergeh(ux, ux); \
00386 ux1 = vec_mergel(ux, ux); \
00387 \
00388 \
00389 vx = vec_sl(V, lCSHIFT); \
00390 vx = vec_mradds(vx, lCRV, (vector signed short) { 0 }); \
00391 vx0 = vec_mergeh(vx, vx); \
00392 vx1 = vec_mergel(vx, vx); \
00393 \
00394 \
00395 uvx = vec_mradds(U, lCGU, (vector signed short) { 0 }); \
00396 uvx = vec_mradds(V, lCGV, uvx); \
00397 uvx0 = vec_mergeh(uvx, uvx); \
00398 uvx1 = vec_mergel(uvx, uvx); \
00399 \
00400 R0 = vec_add(Y0, vx0); \
00401 G0 = vec_add(Y0, uvx0); \
00402 B0 = vec_add(Y0, ux0); \
00403 R1 = vec_add(Y1, vx1); \
00404 G1 = vec_add(Y1, uvx1); \
00405 B1 = vec_add(Y1, ux1); \
00406 \
00407 R = vec_packclp(R0, R1); \
00408 G = vec_packclp(G0, G1); \
00409 B = vec_packclp(B0, B1); \
00410 \
00411 out_pixels(R, G, B, oute); \
00412 \
00413 R0 = vec_add(Y2, vx0); \
00414 G0 = vec_add(Y2, uvx0); \
00415 B0 = vec_add(Y2, ux0); \
00416 R1 = vec_add(Y3, vx1); \
00417 G1 = vec_add(Y3, uvx1); \
00418 B1 = vec_add(Y3, ux1); \
00419 R = vec_packclp(R0, R1); \
00420 G = vec_packclp(G0, G1); \
00421 B = vec_packclp(B0, B1); \
00422 \
00423 \
00424 out_pixels(R, G, B, outo); \
00425 \
00426 y1i += 16; \
00427 y2i += 16; \
00428 ui += 8; \
00429 vi += 8; \
00430 } \
00431 \
00432 outo += (outstrides[0]) >> 4; \
00433 oute += (outstrides[0]) >> 4; \
00434 \
00435 ui += instrides_scl[1]; \
00436 vi += instrides_scl[2]; \
00437 y1i += instrides_scl[0]; \
00438 y2i += instrides_scl[0]; \
00439 } \
00440 return srcSliceH; \
00441 }
00442
00443 #define out_abgr(a, b, c, ptr) \
00444 vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), c, b, a, ptr)
00445 #define out_bgra(a, b, c, ptr) \
00446 vec_mstrgb32(__typeof__(a), c, b, a, ((__typeof__(a)) { 255 }), ptr)
00447 #define out_rgba(a, b, c, ptr) \
00448 vec_mstrgb32(__typeof__(a), a, b, c, ((__typeof__(a)) { 255 }), ptr)
00449 #define out_argb(a, b, c, ptr) \
00450 vec_mstrgb32(__typeof__(a), ((__typeof__(a)) { 255 }), a, b, c, ptr)
00451 #define out_rgb24(a, b, c, ptr) vec_mstrgb24(a, b, c, ptr)
00452 #define out_bgr24(a, b, c, ptr) vec_mstbgr24(a, b, c, ptr)
00453
00454 DEFCSP420_CVT(yuv2_abgr, out_abgr)
00455 DEFCSP420_CVT(yuv2_bgra, out_bgra)
00456 DEFCSP420_CVT(yuv2_rgba, out_rgba)
00457 DEFCSP420_CVT(yuv2_argb, out_argb)
00458 DEFCSP420_CVT(yuv2_rgb24, out_rgb24)
00459 DEFCSP420_CVT(yuv2_bgr24, out_bgr24)
00460
00461
00462
00463 static const vector unsigned char
00464 demux_u = { 0x10, 0x00, 0x10, 0x00,
00465 0x10, 0x04, 0x10, 0x04,
00466 0x10, 0x08, 0x10, 0x08,
00467 0x10, 0x0c, 0x10, 0x0c },
00468 demux_v = { 0x10, 0x02, 0x10, 0x02,
00469 0x10, 0x06, 0x10, 0x06,
00470 0x10, 0x0A, 0x10, 0x0A,
00471 0x10, 0x0E, 0x10, 0x0E },
00472 demux_y = { 0x10, 0x01, 0x10, 0x03,
00473 0x10, 0x05, 0x10, 0x07,
00474 0x10, 0x09, 0x10, 0x0B,
00475 0x10, 0x0D, 0x10, 0x0F };
00476
00477
00478
00479
00480 static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in,
00481 int *instrides, int srcSliceY, int srcSliceH,
00482 unsigned char **oplanes, int *outstrides)
00483 {
00484 int w = c->srcW;
00485 int h = srcSliceH;
00486 int i, j;
00487 vector unsigned char uyvy;
00488 vector signed short Y, U, V;
00489 vector signed short R0, G0, B0, R1, G1, B1;
00490 vector unsigned char R, G, B;
00491 vector unsigned char *out;
00492 const ubyte *img;
00493
00494 img = in[0];
00495 out = (vector unsigned char *) (oplanes[0] + srcSliceY * outstrides[0]);
00496
00497 for (i = 0; i < h; i++)
00498 for (j = 0; j < w / 16; j++) {
00499 uyvy = vec_ld(0, img);
00500
00501 U = (vector signed short)
00502 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
00503 V = (vector signed short)
00504 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
00505 Y = (vector signed short)
00506 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
00507
00508 cvtyuvtoRGB(c, Y, U, V, &R0, &G0, &B0);
00509
00510 uyvy = vec_ld(16, img);
00511
00512 U = (vector signed short)
00513 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_u);
00514 V = (vector signed short)
00515 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_v);
00516 Y = (vector signed short)
00517 vec_perm(uyvy, (vector unsigned char) { 0 }, demux_y);
00518
00519 cvtyuvtoRGB(c, Y, U, V, &R1, &G1, &B1);
00520
00521 R = vec_packclp(R0, R1);
00522 G = vec_packclp(G0, G1);
00523 B = vec_packclp(B0, B1);
00524
00525
00526 out_rgba(R, G, B, out);
00527
00528 img += 32;
00529 }
00530 return srcSliceH;
00531 }
00532
00533
00534
00535
00536
00537
00538
00539 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00540 {
00541 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
00542 return NULL;
00543
00544
00545
00546
00547
00548
00549
00550
00551 if ((c->srcW & 0xf) != 0)
00552 return NULL;
00553
00554 switch (c->srcFormat) {
00555 case PIX_FMT_YUV410P:
00556 case PIX_FMT_YUV420P:
00557
00558 case PIX_FMT_GRAY8:
00559 case PIX_FMT_NV12:
00560 case PIX_FMT_NV21:
00561 if ((c->srcH & 0x1) != 0)
00562 return NULL;
00563
00564 switch (c->dstFormat) {
00565 case PIX_FMT_RGB24:
00566 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00567 return altivec_yuv2_rgb24;
00568 case PIX_FMT_BGR24:
00569 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00570 return altivec_yuv2_bgr24;
00571 case PIX_FMT_ARGB:
00572 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00573 return altivec_yuv2_argb;
00574 case PIX_FMT_ABGR:
00575 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00576 return altivec_yuv2_abgr;
00577 case PIX_FMT_RGBA:
00578 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00579 return altivec_yuv2_rgba;
00580 case PIX_FMT_BGRA:
00581 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00582 return altivec_yuv2_bgra;
00583 default: return NULL;
00584 }
00585 break;
00586
00587 case PIX_FMT_UYVY422:
00588 switch (c->dstFormat) {
00589 case PIX_FMT_BGR32:
00590 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00591 return altivec_uyvy_rgb32;
00592 default: return NULL;
00593 }
00594 break;
00595 }
00596 return NULL;
00597 }
00598
00599 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4],
00600 int brightness, int contrast,
00601 int saturation)
00602 {
00603 union {
00604 DECLARE_ALIGNED(16, signed short, tmp)[8];
00605 vector signed short vec;
00606 } buf;
00607
00608 buf.tmp[0] = ((0xffffLL) * contrast >> 8) >> 9;
00609 buf.tmp[1] = -256 * brightness;
00610 buf.tmp[2] = (inv_table[0] >> 3) * (contrast >> 16) * (saturation >> 16);
00611 buf.tmp[3] = (inv_table[1] >> 3) * (contrast >> 16) * (saturation >> 16);
00612 buf.tmp[4] = -((inv_table[2] >> 1) * (contrast >> 16) * (saturation >> 16));
00613 buf.tmp[5] = -((inv_table[3] >> 1) * (contrast >> 16) * (saturation >> 16));
00614
00615 c->CSHIFT = (vector unsigned short) vec_splat_u16(2);
00616 c->CY = vec_splat((vector signed short) buf.vec, 0);
00617 c->OY = vec_splat((vector signed short) buf.vec, 1);
00618 c->CRV = vec_splat((vector signed short) buf.vec, 2);
00619 c->CBU = vec_splat((vector signed short) buf.vec, 3);
00620 c->CGU = vec_splat((vector signed short) buf.vec, 4);
00621 c->CGV = vec_splat((vector signed short) buf.vec, 5);
00622 return;
00623 }
00624
00625 static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c,
00626 const int16_t *lumFilter,
00627 const int16_t **lumSrc,
00628 int lumFilterSize,
00629 const int16_t *chrFilter,
00630 const int16_t **chrUSrc,
00631 const int16_t **chrVSrc,
00632 int chrFilterSize,
00633 const int16_t **alpSrc,
00634 uint8_t *dest,
00635 int dstW, int dstY,
00636 enum PixelFormat target)
00637 {
00638 int i, j;
00639 vector signed short X, X0, X1, Y0, U0, V0, Y1, U1, V1, U, V;
00640 vector signed short R0, G0, B0, R1, G1, B1;
00641
00642 vector unsigned char R, G, B;
00643 vector unsigned char *out, *nout;
00644
00645 vector signed short RND = vec_splat_s16(1 << 3);
00646 vector unsigned short SCL = vec_splat_u16(4);
00647 DECLARE_ALIGNED(16, unsigned int, scratch)[16];
00648
00649 vector signed short *YCoeffs, *CCoeffs;
00650
00651 YCoeffs = c->vYCoeffsBank + dstY * lumFilterSize;
00652 CCoeffs = c->vCCoeffsBank + dstY * chrFilterSize;
00653
00654 out = (vector unsigned char *) dest;
00655
00656 for (i = 0; i < dstW; i += 16) {
00657 Y0 = RND;
00658 Y1 = RND;
00659
00660 for (j = 0; j < lumFilterSize; j++) {
00661 X0 = vec_ld(0, &lumSrc[j][i]);
00662 X1 = vec_ld(16, &lumSrc[j][i]);
00663 Y0 = vec_mradds(X0, YCoeffs[j], Y0);
00664 Y1 = vec_mradds(X1, YCoeffs[j], Y1);
00665 }
00666
00667 U = RND;
00668 V = RND;
00669
00670 for (j = 0; j < chrFilterSize; j++) {
00671 X = vec_ld(0, &chrUSrc[j][i / 2]);
00672 U = vec_mradds(X, CCoeffs[j], U);
00673 X = vec_ld(0, &chrVSrc[j][i / 2]);
00674 V = vec_mradds(X, CCoeffs[j], V);
00675 }
00676
00677
00678 Y0 = vec_sra(Y0, SCL);
00679 Y1 = vec_sra(Y1, SCL);
00680 U = vec_sra(U, SCL);
00681 V = vec_sra(V, SCL);
00682
00683 Y0 = vec_clip_s16(Y0);
00684 Y1 = vec_clip_s16(Y1);
00685 U = vec_clip_s16(U);
00686 V = vec_clip_s16(V);
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697 U0 = vec_mergeh(U, U);
00698 V0 = vec_mergeh(V, V);
00699
00700 U1 = vec_mergel(U, U);
00701 V1 = vec_mergel(V, V);
00702
00703 cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
00704 cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
00705
00706 R = vec_packclp(R0, R1);
00707 G = vec_packclp(G0, G1);
00708 B = vec_packclp(B0, B1);
00709
00710 switch (target) {
00711 case PIX_FMT_ABGR:
00712 out_abgr(R, G, B, out);
00713 break;
00714 case PIX_FMT_BGRA:
00715 out_bgra(R, G, B, out);
00716 break;
00717 case PIX_FMT_RGBA:
00718 out_rgba(R, G, B, out);
00719 break;
00720 case PIX_FMT_ARGB:
00721 out_argb(R, G, B, out);
00722 break;
00723 case PIX_FMT_RGB24:
00724 out_rgb24(R, G, B, out);
00725 break;
00726 case PIX_FMT_BGR24:
00727 out_bgr24(R, G, B, out);
00728 break;
00729 default:
00730 {
00731
00732
00733 static int printed_error_message;
00734 if (!printed_error_message) {
00735 av_log(c, AV_LOG_ERROR,
00736 "altivec_yuv2packedX doesn't support %s output\n",
00737 av_get_pix_fmt_name(c->dstFormat));
00738 printed_error_message = 1;
00739 }
00740 return;
00741 }
00742 }
00743 }
00744
00745 if (i < dstW) {
00746 i -= 16;
00747
00748 Y0 = RND;
00749 Y1 = RND;
00750
00751 for (j = 0; j < lumFilterSize; j++) {
00752 X0 = vec_ld(0, &lumSrc[j][i]);
00753 X1 = vec_ld(16, &lumSrc[j][i]);
00754 Y0 = vec_mradds(X0, YCoeffs[j], Y0);
00755 Y1 = vec_mradds(X1, YCoeffs[j], Y1);
00756 }
00757
00758 U = RND;
00759 V = RND;
00760
00761 for (j = 0; j < chrFilterSize; j++) {
00762 X = vec_ld(0, &chrUSrc[j][i / 2]);
00763 U = vec_mradds(X, CCoeffs[j], U);
00764 X = vec_ld(0, &chrVSrc[j][i / 2]);
00765 V = vec_mradds(X, CCoeffs[j], V);
00766 }
00767
00768
00769 Y0 = vec_sra(Y0, SCL);
00770 Y1 = vec_sra(Y1, SCL);
00771 U = vec_sra(U, SCL);
00772 V = vec_sra(V, SCL);
00773
00774 Y0 = vec_clip_s16(Y0);
00775 Y1 = vec_clip_s16(Y1);
00776 U = vec_clip_s16(U);
00777 V = vec_clip_s16(V);
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788 U0 = vec_mergeh(U, U);
00789 V0 = vec_mergeh(V, V);
00790
00791 U1 = vec_mergel(U, U);
00792 V1 = vec_mergel(V, V);
00793
00794 cvtyuvtoRGB(c, Y0, U0, V0, &R0, &G0, &B0);
00795 cvtyuvtoRGB(c, Y1, U1, V1, &R1, &G1, &B1);
00796
00797 R = vec_packclp(R0, R1);
00798 G = vec_packclp(G0, G1);
00799 B = vec_packclp(B0, B1);
00800
00801 nout = (vector unsigned char *) scratch;
00802 switch (target) {
00803 case PIX_FMT_ABGR:
00804 out_abgr(R, G, B, nout);
00805 break;
00806 case PIX_FMT_BGRA:
00807 out_bgra(R, G, B, nout);
00808 break;
00809 case PIX_FMT_RGBA:
00810 out_rgba(R, G, B, nout);
00811 break;
00812 case PIX_FMT_ARGB:
00813 out_argb(R, G, B, nout);
00814 break;
00815 case PIX_FMT_RGB24:
00816 out_rgb24(R, G, B, nout);
00817 break;
00818 case PIX_FMT_BGR24:
00819 out_bgr24(R, G, B, nout);
00820 break;
00821 default:
00822
00823 av_log(c, AV_LOG_ERROR,
00824 "altivec_yuv2packedX doesn't support %s output\n",
00825 av_get_pix_fmt_name(c->dstFormat));
00826 return;
00827 }
00828
00829 memcpy(&((uint32_t *) dest)[i], scratch, (dstW - i) / 4);
00830 }
00831 }
00832
00833 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
00834 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, \
00835 const int16_t *lumFilter, \
00836 const int16_t **lumSrc, \
00837 int lumFilterSize, \
00838 const int16_t *chrFilter, \
00839 const int16_t **chrUSrc, \
00840 const int16_t **chrVSrc, \
00841 int chrFilterSize, \
00842 const int16_t **alpSrc, \
00843 uint8_t *dest, int dstW, int dstY) \
00844 { \
00845 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
00846 chrFilter, chrUSrc, chrVSrc, \
00847 chrFilterSize, alpSrc, \
00848 dest, dstW, dstY, pixfmt); \
00849 }
00850
00851 YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
00852 YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
00853 YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
00854 YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
00855 YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
00856 YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);