FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 // MMXEXT versions
58 #if HAVE_MMXEXT_INLINE
59 #undef RENAME
60 #undef COMPILE_TEMPLATE_MMXEXT
61 #define COMPILE_TEMPLATE_MMXEXT 1
62 #define RENAME(a) a ## _mmxext
63 #include "swscale_template.c"
64 #endif
65 
67 {
68  const int dstH= c->dstH;
69  const int flags= c->flags;
70 
71  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
72  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
73  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
74 
75  int hasAlpha = c->needAlpha;
76  int32_t *vLumFilterPos= c->vLumFilterPos;
77  int32_t *vChrFilterPos= c->vChrFilterPos;
78  int16_t *vLumFilter= c->vLumFilter;
79  int16_t *vChrFilter= c->vChrFilter;
80  int32_t *lumMmxFilter= c->lumMmxFilter;
81  int32_t *chrMmxFilter= c->chrMmxFilter;
82  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
83  const int vLumFilterSize= c->vLumFilterSize;
84  const int vChrFilterSize= c->vChrFilterSize;
85  const int chrDstY= dstY>>c->chrDstVSubSample;
86  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
87  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
88 
89  c->blueDither= ff_dither8[dstY&1];
90  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
91  c->greenDither= ff_dither8[dstY&1];
92  else
93  c->greenDither= ff_dither4[dstY&1];
94  c->redDither= ff_dither8[(dstY+1)&1];
95  if (dstY < dstH - 2) {
96  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
97  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
98  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
99 
100  int i;
101  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
102  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
103 
104  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
105  for (i = 0; i < neg; i++)
106  tmpY[i] = lumSrcPtr[neg];
107  for ( ; i < end; i++)
108  tmpY[i] = lumSrcPtr[i];
109  for ( ; i < vLumFilterSize; i++)
110  tmpY[i] = tmpY[i-1];
111  lumSrcPtr = tmpY;
112 
113  if (alpSrcPtr) {
114  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
115  for (i = 0; i < neg; i++)
116  tmpA[i] = alpSrcPtr[neg];
117  for ( ; i < end; i++)
118  tmpA[i] = alpSrcPtr[i];
119  for ( ; i < vLumFilterSize; i++)
120  tmpA[i] = tmpA[i - 1];
121  alpSrcPtr = tmpA;
122  }
123  }
124  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
125  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
126  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
127  for (i = 0; i < neg; i++) {
128  tmpU[i] = chrUSrcPtr[neg];
129  }
130  for ( ; i < end; i++) {
131  tmpU[i] = chrUSrcPtr[i];
132  }
133  for ( ; i < vChrFilterSize; i++) {
134  tmpU[i] = tmpU[i - 1];
135  }
136  chrUSrcPtr = tmpU;
137  }
138 
139  if (flags & SWS_ACCURATE_RND) {
140  int s= APCK_SIZE / 8;
141  for (i=0; i<vLumFilterSize; i+=2) {
142  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
143  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
146  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
147  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
148  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
149  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
152  }
153  }
154  for (i=0; i<vChrFilterSize; i+=2) {
155  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
156  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
159  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
160  }
161  } else {
162  for (i=0; i<vLumFilterSize; i++) {
163  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
164  lumMmxFilter[4*i+2]=
165  lumMmxFilter[4*i+3]=
166  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
167  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
168  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
169  alpMmxFilter[4*i+2]=
170  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
171  }
172  }
173  for (i=0; i<vChrFilterSize; i++) {
174  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
175  chrMmxFilter[4*i+2]=
176  chrMmxFilter[4*i+3]=
177  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
178  }
179  }
180  }
181 }
182 #endif /* HAVE_INLINE_ASM */
183 
184 #define YUV2YUVX_FUNC_MMX(opt, step) \
185 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
186  uint8_t *dest, int dstW, \
187  const uint8_t *dither, int offset); \
188 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
189  const int16_t **src, uint8_t *dest, int dstW, \
190  const uint8_t *dither, int offset) \
191 { \
192  if(dstW > 0) \
193  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
194  return; \
195 }
196 
197 #define YUV2YUVX_FUNC(opt, step) \
198 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
199  uint8_t *dest, int dstW, \
200  const uint8_t *dither, int offset); \
201 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
202  const int16_t **src, uint8_t *dest, int dstW, \
203  const uint8_t *dither, int offset) \
204 { \
205  int remainder = (dstW % step); \
206  int pixelsProcessed = dstW - remainder; \
207  if(((uintptr_t)dest) & 15){ \
208  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
209  return; \
210  } \
211  if(pixelsProcessed > 0) \
212  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
213  if(remainder > 0){ \
214  ff_yuv2yuvX_mmxext(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
215  } \
216  return; \
217 }
218 
219 #if HAVE_MMXEXT_EXTERNAL
220 YUV2YUVX_FUNC_MMX(mmxext, 16)
221 #endif
222 #if HAVE_SSE3_EXTERNAL
223 YUV2YUVX_FUNC(sse3, 32)
224 #endif
225 #if HAVE_AVX2_EXTERNAL
226 YUV2YUVX_FUNC(avx2, 64)
227 #endif
228 
229 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
230 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
231  SwsContext *c, int16_t *data, \
232  int dstW, const uint8_t *src, \
233  const int16_t *filter, \
234  const int32_t *filterPos, int filterSize)
235 
236 #define SCALE_FUNCS(filter_n, opt) \
237  SCALE_FUNC(filter_n, 8, 15, opt); \
238  SCALE_FUNC(filter_n, 9, 15, opt); \
239  SCALE_FUNC(filter_n, 10, 15, opt); \
240  SCALE_FUNC(filter_n, 12, 15, opt); \
241  SCALE_FUNC(filter_n, 14, 15, opt); \
242  SCALE_FUNC(filter_n, 16, 15, opt); \
243  SCALE_FUNC(filter_n, 8, 19, opt); \
244  SCALE_FUNC(filter_n, 9, 19, opt); \
245  SCALE_FUNC(filter_n, 10, 19, opt); \
246  SCALE_FUNC(filter_n, 12, 19, opt); \
247  SCALE_FUNC(filter_n, 14, 19, opt); \
248  SCALE_FUNC(filter_n, 16, 19, opt)
249 
250 #define SCALE_FUNCS_MMX(opt) \
251  SCALE_FUNCS(4, opt); \
252  SCALE_FUNCS(8, opt); \
253  SCALE_FUNCS(X, opt)
254 
255 #define SCALE_FUNCS_SSE(opt) \
256  SCALE_FUNCS(4, opt); \
257  SCALE_FUNCS(8, opt); \
258  SCALE_FUNCS(X4, opt); \
259  SCALE_FUNCS(X8, opt)
260 
261 SCALE_FUNCS_SSE(sse2);
262 SCALE_FUNCS_SSE(ssse3);
263 SCALE_FUNCS_SSE(sse4);
264 
265 SCALE_FUNC(4, 8, 15, avx2);
266 SCALE_FUNC(X4, 8, 15, avx2);
267 
268 #define VSCALEX_FUNC(size, opt) \
269 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
270  const int16_t **src, uint8_t *dest, int dstW, \
271  const uint8_t *dither, int offset)
272 #define VSCALEX_FUNCS(opt) \
273  VSCALEX_FUNC(8, opt); \
274  VSCALEX_FUNC(9, opt); \
275  VSCALEX_FUNC(10, opt)
276 
277 VSCALEX_FUNC(8, mmxext);
278 VSCALEX_FUNCS(sse2);
279 VSCALEX_FUNCS(sse4);
280 VSCALEX_FUNC(16, sse4);
281 VSCALEX_FUNCS(avx);
282 
283 #define VSCALE_FUNC(size, opt) \
284 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
285  const uint8_t *dither, int offset)
286 #define VSCALE_FUNCS(opt1, opt2) \
287  VSCALE_FUNC(8, opt1); \
288  VSCALE_FUNC(9, opt2); \
289  VSCALE_FUNC(10, opt2); \
290  VSCALE_FUNC(16, opt1)
291 
292 VSCALE_FUNCS(sse2, sse2);
293 VSCALE_FUNC(16, sse4);
294 VSCALE_FUNCS(avx, avx);
295 
296 #define INPUT_Y_FUNC(fmt, opt) \
297 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
298  const uint8_t *unused1, const uint8_t *unused2, \
299  int w, uint32_t *unused, void *opq)
300 #define INPUT_UV_FUNC(fmt, opt) \
301 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
302  const uint8_t *unused0, \
303  const uint8_t *src1, \
304  const uint8_t *src2, \
305  int w, uint32_t *unused, void *opq)
306 #define INPUT_FUNC(fmt, opt) \
307  INPUT_Y_FUNC(fmt, opt); \
308  INPUT_UV_FUNC(fmt, opt)
309 #define INPUT_FUNCS(opt) \
310  INPUT_FUNC(uyvy, opt); \
311  INPUT_FUNC(yuyv, opt); \
312  INPUT_UV_FUNC(nv12, opt); \
313  INPUT_UV_FUNC(nv21, opt); \
314  INPUT_FUNC(rgba, opt); \
315  INPUT_FUNC(bgra, opt); \
316  INPUT_FUNC(argb, opt); \
317  INPUT_FUNC(abgr, opt); \
318  INPUT_FUNC(rgb24, opt); \
319  INPUT_FUNC(bgr24, opt)
320 
321 INPUT_FUNCS(sse2);
322 INPUT_FUNCS(ssse3);
323 INPUT_FUNCS(avx);
324 INPUT_FUNC(rgba, avx2);
325 INPUT_FUNC(bgra, avx2);
326 INPUT_FUNC(argb, avx2);
327 INPUT_FUNC(abgr, avx2);
328 INPUT_FUNC(rgb24, avx2);
329 INPUT_FUNC(bgr24, avx2);
330 
331 #if ARCH_X86_64
332 #define YUV2NV_DECL(fmt, opt) \
333 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
334  const int16_t *filter, int filterSize, \
335  const int16_t **u, const int16_t **v, \
336  uint8_t *dst, int dstWidth)
337 
338 YUV2NV_DECL(nv12, avx2);
339 YUV2NV_DECL(nv21, avx2);
340 
341 #define YUV2GBRP_FN_DECL(fmt, opt) \
342 void ff_yuv2##fmt##_full_X_ ##opt(SwsContext *c, const int16_t *lumFilter, \
343  const int16_t **lumSrcx, int lumFilterSize, \
344  const int16_t *chrFilter, const int16_t **chrUSrcx, \
345  const int16_t **chrVSrcx, int chrFilterSize, \
346  const int16_t **alpSrcx, uint8_t **dest, \
347  int dstW, int y)
348 
349 #define YUV2GBRP_DECL(opt) \
350 YUV2GBRP_FN_DECL(gbrp, opt); \
351 YUV2GBRP_FN_DECL(gbrap, opt); \
352 YUV2GBRP_FN_DECL(gbrp9le, opt); \
353 YUV2GBRP_FN_DECL(gbrp10le, opt); \
354 YUV2GBRP_FN_DECL(gbrap10le, opt); \
355 YUV2GBRP_FN_DECL(gbrp12le, opt); \
356 YUV2GBRP_FN_DECL(gbrap12le, opt); \
357 YUV2GBRP_FN_DECL(gbrp14le, opt); \
358 YUV2GBRP_FN_DECL(gbrp16le, opt); \
359 YUV2GBRP_FN_DECL(gbrap16le, opt); \
360 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
361 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
362 YUV2GBRP_FN_DECL(gbrp9be, opt); \
363 YUV2GBRP_FN_DECL(gbrp10be, opt); \
364 YUV2GBRP_FN_DECL(gbrap10be, opt); \
365 YUV2GBRP_FN_DECL(gbrp12be, opt); \
366 YUV2GBRP_FN_DECL(gbrap12be, opt); \
367 YUV2GBRP_FN_DECL(gbrp14be, opt); \
368 YUV2GBRP_FN_DECL(gbrp16be, opt); \
369 YUV2GBRP_FN_DECL(gbrap16be, opt); \
370 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
371 YUV2GBRP_FN_DECL(gbrapf32be, opt)
372 
373 YUV2GBRP_DECL(sse2);
374 YUV2GBRP_DECL(sse4);
375 YUV2GBRP_DECL(avx2);
376 
377 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
378 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
379  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
380  void *opq)
381 
382 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
383 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
384  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
385  void *opq)
386 
387 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
388 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
389  const uint8_t *src[4], int w, int32_t *rgb2yuv, \
390  void *opq)
391 
392 
393 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
394 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
395 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
396 
397 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
398 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
399 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
400 
401 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
402 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
403 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
404 
405 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
406 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
407 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
408 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
409 
410 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
411 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
412 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
413 
414 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
415 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
416 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
417 
418 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
419 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
420 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
421 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
422 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
423 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
424 
425 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
426 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
427 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
428 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
429 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
430 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
431 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
432 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
433 
434 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
435 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
436 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
437 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
438 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
439 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
440 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
441 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
442 
443 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
444 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
445 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
446 
447 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
448 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
449 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
450 
451 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
452 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
453 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
454 #endif
455 
456 #define RANGE_CONVERT_FUNCS(opt) do { \
457  if (c->dstBpc <= 14) { \
458  if (c->srcRange) { \
459  c->lumConvertRange = ff_lumRangeFromJpeg_ ##opt; \
460  c->chrConvertRange = ff_chrRangeFromJpeg_ ##opt; \
461  } else { \
462  c->lumConvertRange = ff_lumRangeToJpeg_ ##opt; \
463  c->chrConvertRange = ff_chrRangeToJpeg_ ##opt; \
464  } \
465  } \
466 } while (0)
467 
468 #define RANGE_CONVERT_FUNCS_DECL(opt) \
469 void ff_lumRangeFromJpeg_ ##opt(int16_t *dst, int width); \
470 void ff_chrRangeFromJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
471 void ff_lumRangeToJpeg_ ##opt(int16_t *dst, int width); \
472 void ff_chrRangeToJpeg_ ##opt(int16_t *dstU, int16_t *dstV, int width); \
473 
476 
478 {
479  if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
480  int cpu_flags = av_get_cpu_flags();
482  RANGE_CONVERT_FUNCS(avx2);
483  } else if (EXTERNAL_SSE2(cpu_flags)) {
484  RANGE_CONVERT_FUNCS(sse2);
485  }
486  }
487 }
488 
490 {
491  int cpu_flags = av_get_cpu_flags();
492 
493 #if HAVE_MMXEXT_INLINE
495  sws_init_swscale_mmxext(c);
496 #endif
497  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
498 #if HAVE_MMXEXT_EXTERNAL
500  c->yuv2planeX = yuv2yuvX_mmxext;
501 #endif
502 #if HAVE_SSE3_EXTERNAL
504  c->yuv2planeX = yuv2yuvX_sse3;
505 #endif
506 #if HAVE_AVX2_EXTERNAL
508  c->yuv2planeX = yuv2yuvX_avx2;
509 #endif
510  }
511 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
512  // The better yuv2planeX_8 functions need aligned stack on x86-32,
513  // so we use MMXEXT in this case if they are not available.
514  if (EXTERNAL_MMXEXT(cpu_flags)) {
515  if (c->dstBpc == 8 && !c->use_mmx_vfilter)
516  c->yuv2planeX = ff_yuv2planeX_8_mmxext;
517  }
518 #endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
519 
520 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
521  if (c->srcBpc == 8) { \
522  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
523  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
524  } else if (c->srcBpc == 9) { \
525  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
526  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
527  } else if (c->srcBpc == 10) { \
528  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
529  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
530  } else if (c->srcBpc == 12) { \
531  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
532  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
533  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
534  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
535  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
536  } else { /* c->srcBpc == 16 */ \
537  av_assert0(c->srcBpc == 16);\
538  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
539  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
540  } \
541 } while (0)
542 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
543 switch(c->dstBpc){ \
544  case 16: do_16_case; break; \
545  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
546  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
547  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
548  }
549 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
550  switch(c->dstBpc){ \
551  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt; break; \
552  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2plane1_10_ ## opt; break; \
553  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_9_ ## opt; break; \
554  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
555  default: av_assert0(c->dstBpc>8); \
556  }
557 #define case_rgb(x, X, opt) \
558  case AV_PIX_FMT_ ## X: \
559  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
560  if (!c->chrSrcHSubSample) \
561  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
562  break
563 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
564  switch (filtersize) { \
565  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
566  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
567  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
568  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
569  break; \
570  }
571  if (EXTERNAL_SSE2(cpu_flags)) {
572  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
573  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
574  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
575  HAVE_ALIGNED_STACK || ARCH_X86_64);
576  if (!(c->flags & SWS_ACCURATE_RND))
577  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2);
578 
579  switch (c->srcFormat) {
580  case AV_PIX_FMT_YA8:
581  c->lumToYV12 = ff_yuyvToY_sse2;
582  if (c->needAlpha)
583  c->alpToYV12 = ff_uyvyToY_sse2;
584  break;
585  case AV_PIX_FMT_YUYV422:
586  c->lumToYV12 = ff_yuyvToY_sse2;
587  c->chrToYV12 = ff_yuyvToUV_sse2;
588  break;
589  case AV_PIX_FMT_UYVY422:
590  c->lumToYV12 = ff_uyvyToY_sse2;
591  c->chrToYV12 = ff_uyvyToUV_sse2;
592  break;
593  case AV_PIX_FMT_NV12:
594  c->chrToYV12 = ff_nv12ToUV_sse2;
595  break;
596  case AV_PIX_FMT_NV21:
597  c->chrToYV12 = ff_nv21ToUV_sse2;
598  break;
599  case_rgb(rgb24, RGB24, sse2);
600  case_rgb(bgr24, BGR24, sse2);
601  case_rgb(bgra, BGRA, sse2);
602  case_rgb(rgba, RGBA, sse2);
603  case_rgb(abgr, ABGR, sse2);
604  case_rgb(argb, ARGB, sse2);
605  default:
606  break;
607  }
608  }
609  if (EXTERNAL_SSSE3(cpu_flags)) {
610  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
611  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
612  switch (c->srcFormat) {
613  case_rgb(rgb24, RGB24, ssse3);
614  case_rgb(bgr24, BGR24, ssse3);
615  default:
616  break;
617  }
618  }
619  if (EXTERNAL_SSE4(cpu_flags)) {
620  /* Xto15 don't need special sse4 functions */
621  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
622  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
623  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
624  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
625  HAVE_ALIGNED_STACK || ARCH_X86_64);
626  if (c->dstBpc == 16 && !isBE(c->dstFormat) && !(c->flags & SWS_ACCURATE_RND))
627  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
628  }
629 
630  if (EXTERNAL_AVX(cpu_flags)) {
631  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
632  HAVE_ALIGNED_STACK || ARCH_X86_64);
633  if (!(c->flags & SWS_ACCURATE_RND))
634  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx);
635 
636  switch (c->srcFormat) {
637  case AV_PIX_FMT_YUYV422:
638  c->chrToYV12 = ff_yuyvToUV_avx;
639  break;
640  case AV_PIX_FMT_UYVY422:
641  c->chrToYV12 = ff_uyvyToUV_avx;
642  break;
643  case AV_PIX_FMT_NV12:
644  c->chrToYV12 = ff_nv12ToUV_avx;
645  break;
646  case AV_PIX_FMT_NV21:
647  c->chrToYV12 = ff_nv21ToUV_avx;
648  break;
649  case_rgb(rgb24, RGB24, avx);
650  case_rgb(bgr24, BGR24, avx);
651  case_rgb(bgra, BGRA, avx);
652  case_rgb(rgba, RGBA, avx);
653  case_rgb(abgr, ABGR, avx);
654  case_rgb(argb, ARGB, avx);
655  default:
656  break;
657  }
658  }
659 
660 #if ARCH_X86_64
661 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
662  switch (filtersize) { \
663  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
664  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
665  break; \
666  }
667 
669  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
670  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
671  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
672  }
673  }
674 
676  if (ARCH_X86_64)
677  switch (c->srcFormat) {
678  case_rgb(rgb24, RGB24, avx2);
679  case_rgb(bgr24, BGR24, avx2);
680  case_rgb(bgra, BGRA, avx2);
681  case_rgb(rgba, RGBA, avx2);
682  case_rgb(abgr, ABGR, avx2);
683  case_rgb(argb, ARGB, avx2);
684  }
685  switch (c->dstFormat) {
686  case AV_PIX_FMT_NV12:
687  case AV_PIX_FMT_NV24:
688  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
689  break;
690  case AV_PIX_FMT_NV21:
691  case AV_PIX_FMT_NV42:
692  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
693  break;
694  default:
695  break;
696  }
697  }
698 
699 
700 #define INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(fmt, name, opt) \
701  case fmt: \
702  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
703 
704 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
705  case rgba_fmt: \
706  case rgb_fmt: \
707  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
708  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
709  break;
710 
711 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
712  case fmt: \
713  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
714  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
715  break;
716 
717 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
718  case fmt: \
719  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
720  break;
721 
722 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
723  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
724  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
725  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
726  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
727 
728 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
729  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##LE, name##le, opt) \
730  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
731  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(rgba_fmt##BE, name##be, opt) \
732  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
733 
734 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
735  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
736  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
737 
738 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
739  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
740  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
741 
742 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
743  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
744  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
745 
746 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
747  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, opt) \
748  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
749  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
750  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
751  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
752  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
753  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
754  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
755 
756 
757  if (EXTERNAL_SSE2(cpu_flags)) {
758  switch (c->srcFormat) {
759  INPUT_PLANER_RGB_A_FUNC_CASE_NOBREAK(AV_PIX_FMT_GBRAP, rgb, sse2);
760  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
761  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
762  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
763  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
764  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
765  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
766  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
767  default:
768  break;
769  }
770  }
771 
772  if (EXTERNAL_SSE4(cpu_flags)) {
773  switch (c->srcFormat) {
774  case AV_PIX_FMT_GBRAP:
775  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
776  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
777  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
778  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
779  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
780  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
781  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
782  default:
783  break;
784  }
785  }
786 
788  switch (c->srcFormat) {
789  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
790  default:
791  break;
792  }
793  }
794 
795  if(c->flags & SWS_FULL_CHR_H_INT) {
796 
797  /* yuv2gbrp uses the SwsContext for yuv coefficients
798  if struct offsets change the asm needs to be updated too */
799  av_assert0(offsetof(SwsContext, yuv2rgb_y_offset) == 40292);
800 
801 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
802  case fmt: \
803  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
804  break;
805 
806 #define YUV2ANYX_GBRAP_CASES(opt) \
807  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
808  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
809  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
810  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
811  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
812  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
813  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
814  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
815  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
816  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
817  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
818  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
819  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
820  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
821  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
822  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
823  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
826  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
827  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
828  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
829 
830  if (EXTERNAL_SSE2(cpu_flags)) {
831  switch (c->dstFormat) {
832  YUV2ANYX_GBRAP_CASES(sse2)
833  default:
834  break;
835  }
836  }
837 
838  if (EXTERNAL_SSE4(cpu_flags)) {
839  switch (c->dstFormat) {
840  YUV2ANYX_GBRAP_CASES(sse4)
841  default:
842  break;
843  }
844  }
845 
847  switch (c->dstFormat) {
848  YUV2ANYX_GBRAP_CASES(avx2)
849  default:
850  break;
851  }
852  }
853  }
854 
855 #endif
856 
858 }
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:420
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:501
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem_internal.h:86
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:62
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1057
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:140
mem_internal.h
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:432
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:413
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:514
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:412
RANGE_CONVERT_FUNCS
#define RANGE_CONVERT_FUNCS(opt)
Definition: swscale.c:456
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem_internal.h:87
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
INPUT_FUNC
#define INPUT_FUNC(fmt, opt)
Definition: swscale.c:306
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:309
rgb
Definition: rpzaenc.c:60
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:496
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:212
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:494
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:58
avassert.h
ff_sws_init_range_convert_x86
av_cold void ff_sws_init_range_convert_x86(SwsContext *c)
Definition: swscale.c:477
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:498
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:499
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:63
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1058
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:229
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:286
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:416
SwsContext::yuv2rgb_y_offset
int yuv2rgb_y_offset
Definition: swscale_internal.h:469
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:268
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:497
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:489
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1052
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:522
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:493
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:417
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:729
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
cpu.h
isAnyRGB
static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:837
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:86
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:508
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:471
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:421
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:90
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:495
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:466
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:97
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:88
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
U
#define U(x)
Definition: vpx_arith.h:37
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:509
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:184
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:88
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:515
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
RANGE_CONVERT_FUNCS_DECL
#define RANGE_CONVERT_FUNCS_DECL(opt)
Definition: swscale.c:468
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1055
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:283
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:197
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:301
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:255
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:325
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:64
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:272
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33