00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093 #include "config.h"
00094 #include "libswscale/rgb2rgb.h"
00095 #include "libswscale/swscale.h"
00096 #include "libswscale/swscale_internal.h"
00097
00098 #undef PROFILE_THE_BEAST
00099 #undef INC_SCALING
00100
00101 typedef unsigned char ubyte;
00102 typedef signed char sbyte;
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 static
00142 const vector unsigned char
00143 perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00144 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
00145 perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00146 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
00147 perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00148 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
00149 perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00150 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
00151
00152 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00153 do { \
00154 __typeof__(x0) o0,o2,o3; \
00155 o0 = vec_mergeh (x0,x1); \
00156 y0 = vec_perm (o0, x2, perm_rgb_0); \
00157 o2 = vec_perm (o0, x2, perm_rgb_1); \
00158 o3 = vec_mergel (x0,x1); \
00159 y1 = vec_perm (o3,o2,perm_rgb_2); \
00160 y2 = vec_perm (o3,o2,perm_rgb_3); \
00161 } while(0)
00162
00163 #define vec_mstbgr24(x0,x1,x2,ptr) \
00164 do { \
00165 __typeof__(x0) _0,_1,_2; \
00166 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00167 vec_st (_0, 0, ptr++); \
00168 vec_st (_1, 0, ptr++); \
00169 vec_st (_2, 0, ptr++); \
00170 } while (0)
00171
00172 #define vec_mstrgb24(x0,x1,x2,ptr) \
00173 do { \
00174 __typeof__(x0) _0,_1,_2; \
00175 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00176 vec_st (_0, 0, ptr++); \
00177 vec_st (_1, 0, ptr++); \
00178 vec_st (_2, 0, ptr++); \
00179 } while (0)
00180
00181
00182
00183
00184
00185 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00186 do { \
00187 T _0,_1,_2,_3; \
00188 _0 = vec_mergeh (x0,x1); \
00189 _1 = vec_mergeh (x2,x3); \
00190 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00191 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00192 vec_st (_2, 0*16, (T *)ptr); \
00193 vec_st (_3, 1*16, (T *)ptr); \
00194 _0 = vec_mergel (x0,x1); \
00195 _1 = vec_mergel (x2,x3); \
00196 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00197 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00198 vec_st (_2, 2*16, (T *)ptr); \
00199 vec_st (_3, 3*16, (T *)ptr); \
00200 ptr += 4; \
00201 } while (0)
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define vec_unh(x) \
00221 (vector signed short) \
00222 vec_perm(x,(__typeof__(x)){0}, \
00223 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00224 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
00225 #define vec_unl(x) \
00226 (vector signed short) \
00227 vec_perm(x,(__typeof__(x)){0}, \
00228 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00229 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
00230
00231 #define vec_clip_s16(x) \
00232 vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
00233 ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
00234
00235 #define vec_packclp(x,y) \
00236 (vector unsigned char)vec_packs \
00237 ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
00238 (vector unsigned short)vec_max (y,((vector signed short) {0})))
00239
00240
00241
00242
00243 static inline void cvtyuvtoRGB (SwsContext *c,
00244 vector signed short Y, vector signed short U, vector signed short V,
00245 vector signed short *R, vector signed short *G, vector signed short *B)
00246 {
00247 vector signed short vx,ux,uvx;
00248
00249 Y = vec_mradds (Y, c->CY, c->OY);
00250 U = vec_sub (U,(vector signed short)
00251 vec_splat((vector signed short){128},0));
00252 V = vec_sub (V,(vector signed short)
00253 vec_splat((vector signed short){128},0));
00254
00255
00256 ux = vec_sl (U, c->CSHIFT);
00257 *B = vec_mradds (ux, c->CBU, Y);
00258
00259
00260 vx = vec_sl (V, c->CSHIFT);
00261 *R = vec_mradds (vx, c->CRV, Y);
00262
00263
00264 uvx = vec_mradds (U, c->CGU, Y);
00265 *G = vec_mradds (V, c->CGV, uvx);
00266 }
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276 #define DEFCSP420_CVT(name,out_pixels) \
00277 static int altivec_##name (SwsContext *c, \
00278 unsigned char **in, int *instrides, \
00279 int srcSliceY, int srcSliceH, \
00280 unsigned char **oplanes, int *outstrides) \
00281 { \
00282 int w = c->srcW; \
00283 int h = srcSliceH; \
00284 int i,j; \
00285 int instrides_scl[3]; \
00286 vector unsigned char y0,y1; \
00287 \
00288 vector signed char u,v; \
00289 \
00290 vector signed short Y0,Y1,Y2,Y3; \
00291 vector signed short U,V; \
00292 vector signed short vx,ux,uvx; \
00293 vector signed short vx0,ux0,uvx0; \
00294 vector signed short vx1,ux1,uvx1; \
00295 vector signed short R0,G0,B0; \
00296 vector signed short R1,G1,B1; \
00297 vector unsigned char R,G,B; \
00298 \
00299 vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00300 vector unsigned char align_perm; \
00301 \
00302 vector signed short \
00303 lCY = c->CY, \
00304 lOY = c->OY, \
00305 lCRV = c->CRV, \
00306 lCBU = c->CBU, \
00307 lCGU = c->CGU, \
00308 lCGV = c->CGV; \
00309 \
00310 vector unsigned short lCSHIFT = c->CSHIFT; \
00311 \
00312 ubyte *y1i = in[0]; \
00313 ubyte *y2i = in[0]+instrides[0]; \
00314 ubyte *ui = in[1]; \
00315 ubyte *vi = in[2]; \
00316 \
00317 vector unsigned char *oute \
00318 = (vector unsigned char *) \
00319 (oplanes[0]+srcSliceY*outstrides[0]); \
00320 vector unsigned char *outo \
00321 = (vector unsigned char *) \
00322 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00323 \
00324 \
00325 instrides_scl[0] = instrides[0]*2-w; \
00326 instrides_scl[1] = instrides[1]-w/2; \
00327 instrides_scl[2] = instrides[2]-w/2; \
00328 \
00329 \
00330 for (i=0;i<h/2;i++) { \
00331 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00332 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00333 \
00334 for (j=0;j<w/16;j++) { \
00335 \
00336 y1ivP = (vector unsigned char *)y1i; \
00337 y2ivP = (vector unsigned char *)y2i; \
00338 uivP = (vector unsigned char *)ui; \
00339 vivP = (vector unsigned char *)vi; \
00340 \
00341 align_perm = vec_lvsl (0, y1i); \
00342 y0 = (vector unsigned char) \
00343 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
00344 \
00345 align_perm = vec_lvsl (0, y2i); \
00346 y1 = (vector unsigned char) \
00347 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
00348 \
00349 align_perm = vec_lvsl (0, ui); \
00350 u = (vector signed char) \
00351 vec_perm (uivP[0], uivP[1], align_perm); \
00352 \
00353 align_perm = vec_lvsl (0, vi); \
00354 v = (vector signed char) \
00355 vec_perm (vivP[0], vivP[1], align_perm); \
00356 \
00357 u = (vector signed char) \
00358 vec_sub (u,(vector signed char) \
00359 vec_splat((vector signed char){128},0)); \
00360 v = (vector signed char) \
00361 vec_sub (v,(vector signed char) \
00362 vec_splat((vector signed char){128},0)); \
00363 \
00364 U = vec_unpackh (u); \
00365 V = vec_unpackh (v); \
00366 \
00367 \
00368 Y0 = vec_unh (y0); \
00369 Y1 = vec_unl (y0); \
00370 Y2 = vec_unh (y1); \
00371 Y3 = vec_unl (y1); \
00372 \
00373 Y0 = vec_mradds (Y0, lCY, lOY); \
00374 Y1 = vec_mradds (Y1, lCY, lOY); \
00375 Y2 = vec_mradds (Y2, lCY, lOY); \
00376 Y3 = vec_mradds (Y3, lCY, lOY); \
00377 \
00378 \
00379 ux = vec_sl (U, lCSHIFT); \
00380 ux = vec_mradds (ux, lCBU, (vector signed short){0}); \
00381 ux0 = vec_mergeh (ux,ux); \
00382 ux1 = vec_mergel (ux,ux); \
00383 \
00384 \
00385 vx = vec_sl (V, lCSHIFT); \
00386 vx = vec_mradds (vx, lCRV, (vector signed short){0}); \
00387 vx0 = vec_mergeh (vx,vx); \
00388 vx1 = vec_mergel (vx,vx); \
00389 \
00390 \
00391 uvx = vec_mradds (U, lCGU, (vector signed short){0}); \
00392 uvx = vec_mradds (V, lCGV, uvx); \
00393 uvx0 = vec_mergeh (uvx,uvx); \
00394 uvx1 = vec_mergel (uvx,uvx); \
00395 \
00396 R0 = vec_add (Y0,vx0); \
00397 G0 = vec_add (Y0,uvx0); \
00398 B0 = vec_add (Y0,ux0); \
00399 R1 = vec_add (Y1,vx1); \
00400 G1 = vec_add (Y1,uvx1); \
00401 B1 = vec_add (Y1,ux1); \
00402 \
00403 R = vec_packclp (R0,R1); \
00404 G = vec_packclp (G0,G1); \
00405 B = vec_packclp (B0,B1); \
00406 \
00407 out_pixels(R,G,B,oute); \
00408 \
00409 R0 = vec_add (Y2,vx0); \
00410 G0 = vec_add (Y2,uvx0); \
00411 B0 = vec_add (Y2,ux0); \
00412 R1 = vec_add (Y3,vx1); \
00413 G1 = vec_add (Y3,uvx1); \
00414 B1 = vec_add (Y3,ux1); \
00415 R = vec_packclp (R0,R1); \
00416 G = vec_packclp (G0,G1); \
00417 B = vec_packclp (B0,B1); \
00418 \
00419 \
00420 out_pixels(R,G,B,outo); \
00421 \
00422 y1i += 16; \
00423 y2i += 16; \
00424 ui += 8; \
00425 vi += 8; \
00426 \
00427 } \
00428 \
00429 outo += (outstrides[0])>>4; \
00430 oute += (outstrides[0])>>4; \
00431 \
00432 ui += instrides_scl[1]; \
00433 vi += instrides_scl[2]; \
00434 y1i += instrides_scl[0]; \
00435 y2i += instrides_scl[0]; \
00436 } \
00437 return srcSliceH; \
00438 }
00439
00440
00441 #define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
00442 #define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
00443 #define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
00444 #define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
00445 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00446 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00447
00448 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00449 #if 1
00450 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00451 #else
00452 static int altivec_yuv2_bgra32 (SwsContext *c,
00453 unsigned char **in, int *instrides,
00454 int srcSliceY, int srcSliceH,
00455 unsigned char **oplanes, int *outstrides)
00456 {
00457 int w = c->srcW;
00458 int h = srcSliceH;
00459 int i,j;
00460 int instrides_scl[3];
00461 vector unsigned char y0,y1;
00462
00463 vector signed char u,v;
00464
00465 vector signed short Y0,Y1,Y2,Y3;
00466 vector signed short U,V;
00467 vector signed short vx,ux,uvx;
00468 vector signed short vx0,ux0,uvx0;
00469 vector signed short vx1,ux1,uvx1;
00470 vector signed short R0,G0,B0;
00471 vector signed short R1,G1,B1;
00472 vector unsigned char R,G,B;
00473
00474 vector unsigned char *uivP, *vivP;
00475 vector unsigned char align_perm;
00476
00477 vector signed short
00478 lCY = c->CY,
00479 lOY = c->OY,
00480 lCRV = c->CRV,
00481 lCBU = c->CBU,
00482 lCGU = c->CGU,
00483 lCGV = c->CGV;
00484
00485 vector unsigned short lCSHIFT = c->CSHIFT;
00486
00487 ubyte *y1i = in[0];
00488 ubyte *y2i = in[0]+w;
00489 ubyte *ui = in[1];
00490 ubyte *vi = in[2];
00491
00492 vector unsigned char *oute
00493 = (vector unsigned char *)
00494 (oplanes[0]+srcSliceY*outstrides[0]);
00495 vector unsigned char *outo
00496 = (vector unsigned char *)
00497 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
00498
00499
00500 instrides_scl[0] = instrides[0];
00501 instrides_scl[1] = instrides[1]-w/2;
00502 instrides_scl[2] = instrides[2]-w/2;
00503
00504
00505 for (i=0;i<h/2;i++) {
00506 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
00507 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
00508
00509 for (j=0;j<w/16;j++) {
00510
00511 y0 = vec_ldl (0,y1i);
00512 y1 = vec_ldl (0,y2i);
00513 uivP = (vector unsigned char *)ui;
00514 vivP = (vector unsigned char *)vi;
00515
00516 align_perm = vec_lvsl (0, ui);
00517 u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
00518
00519 align_perm = vec_lvsl (0, vi);
00520 v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
00521 u = (vector signed char)
00522 vec_sub (u,(vector signed char)
00523 vec_splat((vector signed char){128},0));
00524
00525 v = (vector signed char)
00526 vec_sub (v, (vector signed char)
00527 vec_splat((vector signed char){128},0));
00528
00529 U = vec_unpackh (u);
00530 V = vec_unpackh (v);
00531
00532
00533 Y0 = vec_unh (y0);
00534 Y1 = vec_unl (y0);
00535 Y2 = vec_unh (y1);
00536 Y3 = vec_unl (y1);
00537
00538 Y0 = vec_mradds (Y0, lCY, lOY);
00539 Y1 = vec_mradds (Y1, lCY, lOY);
00540 Y2 = vec_mradds (Y2, lCY, lOY);
00541 Y3 = vec_mradds (Y3, lCY, lOY);
00542
00543
00544 ux = vec_sl (U, lCSHIFT);
00545 ux = vec_mradds (ux, lCBU, (vector signed short){0});
00546 ux0 = vec_mergeh (ux,ux);
00547 ux1 = vec_mergel (ux,ux);
00548
00549
00550 vx = vec_sl (V, lCSHIFT);
00551 vx = vec_mradds (vx, lCRV, (vector signed short){0});
00552 vx0 = vec_mergeh (vx,vx);
00553 vx1 = vec_mergel (vx,vx);
00554
00555 uvx = vec_mradds (U, lCGU, (vector signed short){0});
00556 uvx = vec_mradds (V, lCGV, uvx);
00557 uvx0 = vec_mergeh (uvx,uvx);
00558 uvx1 = vec_mergel (uvx,uvx);
00559 R0 = vec_add (Y0,vx0);
00560 G0 = vec_add (Y0,uvx0);
00561 B0 = vec_add (Y0,ux0);
00562 R1 = vec_add (Y1,vx1);
00563 G1 = vec_add (Y1,uvx1);
00564 B1 = vec_add (Y1,ux1);
00565 R = vec_packclp (R0,R1);
00566 G = vec_packclp (G0,G1);
00567 B = vec_packclp (B0,B1);
00568
00569 out_argb(R,G,B,oute);
00570 R0 = vec_add (Y2,vx0);
00571 G0 = vec_add (Y2,uvx0);
00572 B0 = vec_add (Y2,ux0);
00573 R1 = vec_add (Y3,vx1);
00574 G1 = vec_add (Y3,uvx1);
00575 B1 = vec_add (Y3,ux1);
00576 R = vec_packclp (R0,R1);
00577 G = vec_packclp (G0,G1);
00578 B = vec_packclp (B0,B1);
00579
00580 out_argb(R,G,B,outo);
00581 y1i += 16;
00582 y2i += 16;
00583 ui += 8;
00584 vi += 8;
00585
00586 }
00587
00588 outo += (outstrides[0])>>4;
00589 oute += (outstrides[0])>>4;
00590
00591 ui += instrides_scl[1];
00592 vi += instrides_scl[2];
00593 y1i += instrides_scl[0];
00594 y2i += instrides_scl[0];
00595 }
00596 return srcSliceH;
00597 }
00598
00599 #endif
00600
00601
00602 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00603 DEFCSP420_CVT (yuv2_argb, out_argb)
00604 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00605 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00606
00607
00608
00609
00610 static
00611 const vector unsigned char
00612 demux_u = {0x10,0x00,0x10,0x00,
00613 0x10,0x04,0x10,0x04,
00614 0x10,0x08,0x10,0x08,
00615 0x10,0x0c,0x10,0x0c},
00616 demux_v = {0x10,0x02,0x10,0x02,
00617 0x10,0x06,0x10,0x06,
00618 0x10,0x0A,0x10,0x0A,
00619 0x10,0x0E,0x10,0x0E},
00620 demux_y = {0x10,0x01,0x10,0x03,
00621 0x10,0x05,0x10,0x07,
00622 0x10,0x09,0x10,0x0B,
00623 0x10,0x0D,0x10,0x0F};
00624
00625
00626
00627
00628 static int altivec_uyvy_rgb32 (SwsContext *c,
00629 unsigned char **in, int *instrides,
00630 int srcSliceY, int srcSliceH,
00631 unsigned char **oplanes, int *outstrides)
00632 {
00633 int w = c->srcW;
00634 int h = srcSliceH;
00635 int i,j;
00636 vector unsigned char uyvy;
00637 vector signed short Y,U,V;
00638 vector signed short R0,G0,B0,R1,G1,B1;
00639 vector unsigned char R,G,B;
00640 vector unsigned char *out;
00641 ubyte *img;
00642
00643 img = in[0];
00644 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00645
00646 for (i=0;i<h;i++) {
00647 for (j=0;j<w/16;j++) {
00648 uyvy = vec_ld (0, img);
00649 U = (vector signed short)
00650 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00651
00652 V = (vector signed short)
00653 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00654
00655 Y = (vector signed short)
00656 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00657
00658 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00659
00660 uyvy = vec_ld (16, img);
00661 U = (vector signed short)
00662 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00663
00664 V = (vector signed short)
00665 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00666
00667 Y = (vector signed short)
00668 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00669
00670 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00671
00672 R = vec_packclp (R0,R1);
00673 G = vec_packclp (G0,G1);
00674 B = vec_packclp (B0,B1);
00675
00676
00677 out_rgba (R,G,B,out);
00678
00679 img += 32;
00680 }
00681 }
00682 return srcSliceH;
00683 }
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00694 {
00695 if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
00696 return NULL;
00697
00698
00699
00700
00701
00702
00703
00704
00705 if ((c->srcW & 0xf) != 0) return NULL;
00706
00707 switch (c->srcFormat) {
00708 case PIX_FMT_YUV410P:
00709 case PIX_FMT_YUV420P:
00710
00711 case PIX_FMT_GRAY8:
00712 case PIX_FMT_NV12:
00713 case PIX_FMT_NV21:
00714 if ((c->srcH & 0x1) != 0)
00715 return NULL;
00716
00717 switch(c->dstFormat) {
00718 case PIX_FMT_RGB24:
00719 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00720 return altivec_yuv2_rgb24;
00721 case PIX_FMT_BGR24:
00722 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00723 return altivec_yuv2_bgr24;
00724 case PIX_FMT_ARGB:
00725 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00726 return altivec_yuv2_argb;
00727 case PIX_FMT_ABGR:
00728 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00729 return altivec_yuv2_abgr;
00730 case PIX_FMT_RGBA:
00731 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00732 return altivec_yuv2_rgba;
00733 case PIX_FMT_BGRA:
00734 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00735 return altivec_yuv2_bgra;
00736 default: return NULL;
00737 }
00738 break;
00739
00740 case PIX_FMT_UYVY422:
00741 switch(c->dstFormat) {
00742 case PIX_FMT_BGR32:
00743 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00744 return altivec_uyvy_rgb32;
00745 default: return NULL;
00746 }
00747 break;
00748
00749 }
00750 return NULL;
00751 }
00752
00753 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation)
00754 {
00755 union {
00756 DECLARE_ALIGNED(16, signed short, tmp)[8];
00757 vector signed short vec;
00758 } buf;
00759
00760 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
00761 buf.tmp[1] = -256*brightness;
00762 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
00763 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
00764 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
00765 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
00766
00767
00768 c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00769 c->CY = vec_splat ((vector signed short)buf.vec, 0);
00770 c->OY = vec_splat ((vector signed short)buf.vec, 1);
00771 c->CRV = vec_splat ((vector signed short)buf.vec, 2);
00772 c->CBU = vec_splat ((vector signed short)buf.vec, 3);
00773 c->CGU = vec_splat ((vector signed short)buf.vec, 4);
00774 c->CGV = vec_splat ((vector signed short)buf.vec, 5);
00775 return;
00776 }
00777
00778
00779 void
00780 ff_yuv2packedX_altivec(SwsContext *c,
00781 const int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
00782 const int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
00783 uint8_t *dest, int dstW, int dstY)
00784 {
00785 int i,j;
00786 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00787 vector signed short R0,G0,B0,R1,G1,B1;
00788
00789 vector unsigned char R,G,B;
00790 vector unsigned char *out,*nout;
00791
00792 vector signed short RND = vec_splat_s16(1<<3);
00793 vector unsigned short SCL = vec_splat_u16(4);
00794 DECLARE_ALIGNED(16, unsigned long, scratch)[16];
00795
00796 vector signed short *YCoeffs, *CCoeffs;
00797
00798 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00799 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00800
00801 out = (vector unsigned char *)dest;
00802
00803 for (i=0; i<dstW; i+=16) {
00804 Y0 = RND;
00805 Y1 = RND;
00806
00807 for (j=0; j<lumFilterSize; j++) {
00808 X0 = vec_ld (0, &lumSrc[j][i]);
00809 X1 = vec_ld (16, &lumSrc[j][i]);
00810 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00811 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00812 }
00813
00814 U = RND;
00815 V = RND;
00816
00817 for (j=0; j<chrFilterSize; j++) {
00818 X = vec_ld (0, &chrSrc[j][i/2]);
00819 U = vec_mradds (X, CCoeffs[j], U);
00820 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00821 V = vec_mradds (X, CCoeffs[j], V);
00822 }
00823
00824
00825 Y0 = vec_sra (Y0, SCL);
00826 Y1 = vec_sra (Y1, SCL);
00827 U = vec_sra (U, SCL);
00828 V = vec_sra (V, SCL);
00829
00830 Y0 = vec_clip_s16 (Y0);
00831 Y1 = vec_clip_s16 (Y1);
00832 U = vec_clip_s16 (U);
00833 V = vec_clip_s16 (V);
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844 U0 = vec_mergeh (U,U);
00845 V0 = vec_mergeh (V,V);
00846
00847 U1 = vec_mergel (U,U);
00848 V1 = vec_mergel (V,V);
00849
00850 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00851 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00852
00853 R = vec_packclp (R0,R1);
00854 G = vec_packclp (G0,G1);
00855 B = vec_packclp (B0,B1);
00856
00857 switch(c->dstFormat) {
00858 case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
00859 case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
00860 case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
00861 case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
00862 case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00863 case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00864 default:
00865 {
00866
00867
00868 static int printed_error_message;
00869 if (!printed_error_message) {
00870 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00871 sws_format_name(c->dstFormat));
00872 printed_error_message=1;
00873 }
00874 return;
00875 }
00876 }
00877 }
00878
00879 if (i < dstW) {
00880 i -= 16;
00881
00882 Y0 = RND;
00883 Y1 = RND;
00884
00885 for (j=0; j<lumFilterSize; j++) {
00886 X0 = vec_ld (0, &lumSrc[j][i]);
00887 X1 = vec_ld (16, &lumSrc[j][i]);
00888 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00889 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00890 }
00891
00892 U = RND;
00893 V = RND;
00894
00895 for (j=0; j<chrFilterSize; j++) {
00896 X = vec_ld (0, &chrSrc[j][i/2]);
00897 U = vec_mradds (X, CCoeffs[j], U);
00898 X = vec_ld (0, &chrSrc[j][i/2+2048]);
00899 V = vec_mradds (X, CCoeffs[j], V);
00900 }
00901
00902
00903 Y0 = vec_sra (Y0, SCL);
00904 Y1 = vec_sra (Y1, SCL);
00905 U = vec_sra (U, SCL);
00906 V = vec_sra (V, SCL);
00907
00908 Y0 = vec_clip_s16 (Y0);
00909 Y1 = vec_clip_s16 (Y1);
00910 U = vec_clip_s16 (U);
00911 V = vec_clip_s16 (V);
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922 U0 = vec_mergeh (U,U);
00923 V0 = vec_mergeh (V,V);
00924
00925 U1 = vec_mergel (U,U);
00926 V1 = vec_mergel (V,V);
00927
00928 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00929 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00930
00931 R = vec_packclp (R0,R1);
00932 G = vec_packclp (G0,G1);
00933 B = vec_packclp (B0,B1);
00934
00935 nout = (vector unsigned char *)scratch;
00936 switch(c->dstFormat) {
00937 case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
00938 case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
00939 case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
00940 case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
00941 case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00942 case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00943 default:
00944
00945 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00946 sws_format_name(c->dstFormat));
00947 return;
00948 }
00949
00950 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00951 }
00952
00953 }