43 #define YUV2RGB_LOOP(depth) \
44 h_size = (c->dstW + 7) & ~7; \
45 if (h_size * depth > FFABS(dstStride[0])) \
48 vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
50 for (y = 0; y < srcSliceH; y++) { \
51 uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
52 const uint8_t *py = src[0] + y * srcStride[0]; \
53 const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
54 const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
55 x86_reg index = -h_size / 2; \
57 extern void ff_yuv_420_rgb24_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
58 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
59 const uint8_t *py_2index);
60 extern void ff_yuv_420_bgr24_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
61 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
62 const uint8_t *py_2index);
64 extern void ff_yuv_420_rgb15_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
65 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
66 const uint8_t *py_2index);
67 extern void ff_yuv_420_rgb16_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
68 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
69 const uint8_t *py_2index);
70 extern void ff_yuv_420_rgb32_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
71 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
72 const uint8_t *py_2index);
73 extern void ff_yuv_420_bgr32_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
74 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
75 const uint8_t *py_2index);
76 extern void ff_yuva_420_rgb32_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
77 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
78 const uint8_t *py_2index,
const uint8_t *pa_2index);
79 extern void ff_yuva_420_bgr32_ssse3(
x86_reg index, uint8_t *image,
const uint8_t *pu_index,
80 const uint8_t *pv_index,
const uint64_t *pointer_c_dither,
81 const uint8_t *py_2index,
const uint8_t *pa_2index);
83 extern void ff_yuv_420_gbrp24_ssse3(
x86_reg index, uint8_t *image, uint8_t *dst_b, uint8_t *dst_r,
84 const uint8_t *pu_index,
const uint8_t *pv_index,
85 const uint64_t *pointer_c_dither,
86 const uint8_t *py_2index);
89 static inline int yuv420_rgb15_ssse3(
SwsContext *
c,
const uint8_t *
src[],
91 int srcSliceY,
int srcSliceH,
92 uint8_t *dst[],
int dstStride[])
94 int y, h_size, vshift;
107 static inline
int yuv420_rgb16_ssse3(
SwsContext *
c, const uint8_t *
src[],
109 int srcSliceY,
int srcSliceH,
110 uint8_t *dst[],
int dstStride[])
112 int y, h_size, vshift;
125 static inline
int yuv420_rgb32_ssse3(
SwsContext *
c, const uint8_t *
src[],
127 int srcSliceY,
int srcSliceH,
128 uint8_t *dst[],
int dstStride[])
130 int y, h_size, vshift;
139 static inline
int yuv420_bgr32_ssse3(
SwsContext *
c, const uint8_t *
src[],
141 int srcSliceY,
int srcSliceH,
142 uint8_t *dst[],
int dstStride[])
144 int y, h_size, vshift;
153 static inline
int yuva420_rgb32_ssse3(
SwsContext *
c, const uint8_t *
src[],
155 int srcSliceY,
int srcSliceH,
156 uint8_t *dst[],
int dstStride[])
158 int y, h_size, vshift;
161 const uint8_t *pa =
src[3] + y * srcStride[3];
167 static inline
int yuva420_bgr32_ssse3(
SwsContext *
c, const uint8_t *
src[],
169 int srcSliceY,
int srcSliceH,
170 uint8_t *dst[],
int dstStride[])
172 int y, h_size, vshift;
176 const uint8_t *pa =
src[3] + y * srcStride[3];
182 static inline
int yuv420_rgb24_ssse3(
SwsContext *
c, const uint8_t *
src[],
184 int srcSliceY,
int srcSliceH,
185 uint8_t *dst[],
int dstStride[])
187 int y, h_size, vshift;
196 static inline
int yuv420_bgr24_ssse3(
SwsContext *
c, const uint8_t *
src[],
198 int srcSliceY,
int srcSliceH,
199 uint8_t *dst[],
int dstStride[])
201 int y, h_size, vshift;
211 static inline int yuv420_gbrp_ssse3(
SwsContext *
c,
const uint8_t *
src[],
213 int srcSliceY,
int srcSliceH,
214 uint8_t *dst[],
int dstStride[])
216 int y, h_size, vshift;
218 h_size = (
c->dstW + 7) & ~7;
219 if (h_size * 3 >
FFABS(dstStride[0]))
224 for (y = 0; y < srcSliceH; y++) {
225 uint8_t *dst_g = dst[0] + (y + srcSliceY) * dstStride[0];
226 uint8_t *dst_b = dst[1] + (y + srcSliceY) * dstStride[1];
227 uint8_t *dst_r = dst[2] + (y + srcSliceY) * dstStride[2];
228 const uint8_t *py =
src[0] + y * srcStride[0];
229 const uint8_t *pu =
src[1] + (y >> vshift) * srcStride[1];
230 const uint8_t *pv =
src[2] + (y >> vshift) * srcStride[2];
233 ff_yuv_420_gbrp24_ssse3(
index, dst_g, dst_b, dst_r, pu -
index, pv -
index, &(
c->redDither), py - 2 *
index);
247 switch (
c->dstFormat) {
250 #if CONFIG_SWSCALE_ALPHA
251 return yuva420_rgb32_ssse3;
255 return yuv420_rgb32_ssse3;
258 #if CONFIG_SWSCALE_ALPHA
259 return yuva420_bgr32_ssse3;
263 return yuv420_bgr32_ssse3;
265 return yuv420_rgb24_ssse3;
267 return yuv420_bgr24_ssse3;
269 return yuv420_rgb16_ssse3;
271 return yuv420_rgb15_ssse3;
274 return yuv420_gbrp_ssse3;