FFmpeg
swscale_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/x86/asm.h"
25 
26 #undef REAL_MOVNTQ
27 #undef MOVNTQ
28 #undef MOVNTQ2
29 #undef PREFETCH
30 
31 
32 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
33 #define MOVNTQ2 "movntq "
34 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
35 
36 #define YSCALEYUV2PACKEDX_UV \
37  __asm__ volatile(\
38  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
39  ".p2align 4 \n\t"\
40  "nop \n\t"\
41  "1: \n\t"\
42  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
43  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
44  "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
45  "movq %%mm3, %%mm4 \n\t"\
46  ".p2align 4 \n\t"\
47  "2: \n\t"\
48  "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
49  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
50  "add %6, %%"FF_REG_S" \n\t" \
51  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
52  "add $16, %%"FF_REG_d" \n\t"\
53  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
54  "pmulhw %%mm0, %%mm2 \n\t"\
55  "pmulhw %%mm0, %%mm5 \n\t"\
56  "paddw %%mm2, %%mm3 \n\t"\
57  "paddw %%mm5, %%mm4 \n\t"\
58  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
59  " jnz 2b \n\t"\
60 
61 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
62  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
63  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
64  "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
65  "movq "#dst1", "#dst2" \n\t"\
66  ".p2align 4 \n\t"\
67  "2: \n\t"\
68  "movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
69  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
70  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
71  "add $16, %%"FF_REG_d" \n\t"\
72  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
73  "pmulhw "#coeff", "#src1" \n\t"\
74  "pmulhw "#coeff", "#src2" \n\t"\
75  "paddw "#src1", "#dst1" \n\t"\
76  "paddw "#src2", "#dst2" \n\t"\
77  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
78  " jnz 2b \n\t"\
79 
80 #define YSCALEYUV2PACKEDX \
81  YSCALEYUV2PACKEDX_UV \
82  YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
83 
84 #define YSCALEYUV2PACKEDX_END \
85  :: "r" (&c->redDither), \
86  "m" (dummy), "m" (dummy), "m" (dummy),\
87  "r" (dest), "m" (dstW_reg), "m"(uv_off) \
88  NAMED_CONSTRAINTS_ADD(bF8,bFC) \
89  : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
90  );
91 
92 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
93  __asm__ volatile(\
94  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
95  ".p2align 4 \n\t"\
96  "nop \n\t"\
97  "1: \n\t"\
98  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
99  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
100  "pxor %%mm4, %%mm4 \n\t"\
101  "pxor %%mm5, %%mm5 \n\t"\
102  "pxor %%mm6, %%mm6 \n\t"\
103  "pxor %%mm7, %%mm7 \n\t"\
104  ".p2align 4 \n\t"\
105  "2: \n\t"\
106  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
107  "add %6, %%"FF_REG_S" \n\t" \
108  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
109  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
110  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
111  "movq %%mm0, %%mm3 \n\t"\
112  "punpcklwd %%mm1, %%mm0 \n\t"\
113  "punpckhwd %%mm1, %%mm3 \n\t"\
114  "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
115  "pmaddwd %%mm1, %%mm0 \n\t"\
116  "pmaddwd %%mm1, %%mm3 \n\t"\
117  "paddd %%mm0, %%mm4 \n\t"\
118  "paddd %%mm3, %%mm5 \n\t"\
119  "add %6, %%"FF_REG_S" \n\t" \
120  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
121  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
122  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
123  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
124  "movq %%mm2, %%mm0 \n\t"\
125  "punpcklwd %%mm3, %%mm2 \n\t"\
126  "punpckhwd %%mm3, %%mm0 \n\t"\
127  "pmaddwd %%mm1, %%mm2 \n\t"\
128  "pmaddwd %%mm1, %%mm0 \n\t"\
129  "paddd %%mm2, %%mm6 \n\t"\
130  "paddd %%mm0, %%mm7 \n\t"\
131  " jnz 2b \n\t"\
132  "psrad $16, %%mm4 \n\t"\
133  "psrad $16, %%mm5 \n\t"\
134  "psrad $16, %%mm6 \n\t"\
135  "psrad $16, %%mm7 \n\t"\
136  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
137  "packssdw %%mm5, %%mm4 \n\t"\
138  "packssdw %%mm7, %%mm6 \n\t"\
139  "paddw %%mm0, %%mm4 \n\t"\
140  "paddw %%mm0, %%mm6 \n\t"\
141  "movq %%mm4, "U_TEMP"(%0) \n\t"\
142  "movq %%mm6, "V_TEMP"(%0) \n\t"\
143 
144 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
145  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
146  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
147  "pxor %%mm1, %%mm1 \n\t"\
148  "pxor %%mm5, %%mm5 \n\t"\
149  "pxor %%mm7, %%mm7 \n\t"\
150  "pxor %%mm6, %%mm6 \n\t"\
151  ".p2align 4 \n\t"\
152  "2: \n\t"\
153  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
154  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
155  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
156  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
157  "movq %%mm0, %%mm3 \n\t"\
158  "punpcklwd %%mm4, %%mm0 \n\t"\
159  "punpckhwd %%mm4, %%mm3 \n\t"\
160  "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
161  "pmaddwd %%mm4, %%mm0 \n\t"\
162  "pmaddwd %%mm4, %%mm3 \n\t"\
163  "paddd %%mm0, %%mm1 \n\t"\
164  "paddd %%mm3, %%mm5 \n\t"\
165  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
166  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
167  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
168  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
169  "movq %%mm2, %%mm0 \n\t"\
170  "punpcklwd %%mm3, %%mm2 \n\t"\
171  "punpckhwd %%mm3, %%mm0 \n\t"\
172  "pmaddwd %%mm4, %%mm2 \n\t"\
173  "pmaddwd %%mm4, %%mm0 \n\t"\
174  "paddd %%mm2, %%mm7 \n\t"\
175  "paddd %%mm0, %%mm6 \n\t"\
176  " jnz 2b \n\t"\
177  "psrad $16, %%mm1 \n\t"\
178  "psrad $16, %%mm5 \n\t"\
179  "psrad $16, %%mm7 \n\t"\
180  "psrad $16, %%mm6 \n\t"\
181  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
182  "packssdw %%mm5, %%mm1 \n\t"\
183  "packssdw %%mm6, %%mm7 \n\t"\
184  "paddw %%mm0, %%mm1 \n\t"\
185  "paddw %%mm0, %%mm7 \n\t"\
186  "movq "U_TEMP"(%0), %%mm3 \n\t"\
187  "movq "V_TEMP"(%0), %%mm4 \n\t"\
188 
189 #define YSCALEYUV2PACKEDX_ACCURATE \
190  YSCALEYUV2PACKEDX_ACCURATE_UV \
191  YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
192 
193 #define YSCALEYUV2RGBX \
194  "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
195  "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
196  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
197  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
198  "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
199  "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
200  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
201  "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
202  "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
203  "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
204  "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
205  "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
206  "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
207  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
208  "paddw %%mm3, %%mm4 \n\t"\
209  "movq %%mm2, %%mm0 \n\t"\
210  "movq %%mm5, %%mm6 \n\t"\
211  "movq %%mm4, %%mm3 \n\t"\
212  "punpcklwd %%mm2, %%mm2 \n\t"\
213  "punpcklwd %%mm5, %%mm5 \n\t"\
214  "punpcklwd %%mm4, %%mm4 \n\t"\
215  "paddw %%mm1, %%mm2 \n\t"\
216  "paddw %%mm1, %%mm5 \n\t"\
217  "paddw %%mm1, %%mm4 \n\t"\
218  "punpckhwd %%mm0, %%mm0 \n\t"\
219  "punpckhwd %%mm6, %%mm6 \n\t"\
220  "punpckhwd %%mm3, %%mm3 \n\t"\
221  "paddw %%mm7, %%mm0 \n\t"\
222  "paddw %%mm7, %%mm6 \n\t"\
223  "paddw %%mm7, %%mm3 \n\t"\
224  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
225  "packuswb %%mm0, %%mm2 \n\t"\
226  "packuswb %%mm6, %%mm5 \n\t"\
227  "packuswb %%mm3, %%mm4 \n\t"\
228 
229 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
230  "movq "#b", "#q2" \n\t" /* B */\
231  "movq "#r", "#t" \n\t" /* R */\
232  "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
233  "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
234  "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
235  "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
236  "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
237  "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
238  "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
239  "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
240  "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
241  "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
242 \
243  MOVNTQ( q0, (dst, index, 4))\
244  MOVNTQ( b, 8(dst, index, 4))\
245  MOVNTQ( q2, 16(dst, index, 4))\
246  MOVNTQ( q3, 24(dst, index, 4))\
247 \
248  "add $8, "#index" \n\t"\
249  "cmp "dstw", "#index" \n\t"\
250  " jb 1b \n\t"
251 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
252 
253 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
254  const int16_t **lumSrc, int lumFilterSize,
255  const int16_t *chrFilter, const int16_t **chrUSrc,
256  const int16_t **chrVSrc,
257  int chrFilterSize, const int16_t **alpSrc,
258  uint8_t *dest, int dstW, int dstY)
259 {
260  x86_reg dummy=0;
261  x86_reg dstW_reg = dstW;
262  x86_reg uv_off = c->uv_offx2;
263 
264  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
267  "movq %%mm2, "U_TEMP"(%0) \n\t"
268  "movq %%mm4, "V_TEMP"(%0) \n\t"
269  "movq %%mm5, "Y_TEMP"(%0) \n\t"
271  "movq "Y_TEMP"(%0), %%mm5 \n\t"
272  "psraw $3, %%mm1 \n\t"
273  "psraw $3, %%mm7 \n\t"
274  "packuswb %%mm7, %%mm1 \n\t"
275  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
277  } else {
280  "pcmpeqd %%mm7, %%mm7 \n\t"
281  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
283  }
284 }
285 
286 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
287  const int16_t **lumSrc, int lumFilterSize,
288  const int16_t *chrFilter, const int16_t **chrUSrc,
289  const int16_t **chrVSrc,
290  int chrFilterSize, const int16_t **alpSrc,
291  uint8_t *dest, int dstW, int dstY)
292 {
293  x86_reg dummy=0;
294  x86_reg dstW_reg = dstW;
295  x86_reg uv_off = c->uv_offx2;
296 
297  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
300  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
301  "psraw $3, %%mm1 \n\t"
302  "psraw $3, %%mm7 \n\t"
303  "packuswb %%mm7, %%mm1 \n\t"
304  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
306  } else {
309  "pcmpeqd %%mm7, %%mm7 \n\t"
310  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
312  }
313 }
314 
315 static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
316  const int16_t **lumSrc, int lumFilterSize,
317  const int16_t *chrFilter, const int16_t **chrUSrc,
318  const int16_t **chrVSrc,
319  int chrFilterSize, const int16_t **alpSrc,
320  uint8_t *dest, int dstW, int dstY)
321 {
322  x86_reg dummy=0;
323  x86_reg dstW_reg = dstW;
324  x86_reg uv_off = c->uv_offx2;
325 
326  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
329  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
330  "psraw $3, %%mm1 \n\t"
331  "psraw $3, %%mm7 \n\t"
332  "packuswb %%mm7, %%mm1 \n\t"
333  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
335  } else {
338  "pcmpeqd %%mm7, %%mm7 \n\t"
339  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
341  }
342 }
343 
344 #define REAL_WRITERGB16(dst, dstw, index) \
345  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
346  "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
347  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
348  "psrlq $3, %%mm2 \n\t"\
349 \
350  "movq %%mm2, %%mm1 \n\t"\
351  "movq %%mm4, %%mm3 \n\t"\
352 \
353  "punpcklbw %%mm7, %%mm3 \n\t"\
354  "punpcklbw %%mm5, %%mm2 \n\t"\
355  "punpckhbw %%mm7, %%mm4 \n\t"\
356  "punpckhbw %%mm5, %%mm1 \n\t"\
357 \
358  "psllq $3, %%mm3 \n\t"\
359  "psllq $3, %%mm4 \n\t"\
360 \
361  "por %%mm3, %%mm2 \n\t"\
362  "por %%mm4, %%mm1 \n\t"\
363 \
364  MOVNTQ(%%mm2, (dst, index, 2))\
365  MOVNTQ(%%mm1, 8(dst, index, 2))\
366 \
367  "add $8, "#index" \n\t"\
368  "cmp "dstw", "#index" \n\t"\
369  " jb 1b \n\t"
370 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
371 
372 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
373  const int16_t **lumSrc, int lumFilterSize,
374  const int16_t *chrFilter, const int16_t **chrUSrc,
375  const int16_t **chrVSrc,
376  int chrFilterSize, const int16_t **alpSrc,
377  uint8_t *dest, int dstW, int dstY)
378 {
379  x86_reg dummy=0;
380  x86_reg dstW_reg = dstW;
381  x86_reg uv_off = c->uv_offx2;
382 
385  "pxor %%mm7, %%mm7 \n\t"
386  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
387  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
388  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
389  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
390  WRITERGB16(%4, "%5", %%FF_REGa)
392 }
393 
394 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
395  const int16_t **lumSrc, int lumFilterSize,
396  const int16_t *chrFilter, const int16_t **chrUSrc,
397  const int16_t **chrVSrc,
398  int chrFilterSize, const int16_t **alpSrc,
399  uint8_t *dest, int dstW, int dstY)
400 {
401  x86_reg dummy=0;
402  x86_reg dstW_reg = dstW;
403  x86_reg uv_off = c->uv_offx2;
404 
407  "pxor %%mm7, %%mm7 \n\t"
408  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
409  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
410  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
411  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
412  WRITERGB16(%4, "%5", %%FF_REGa)
414 }
415 
416 #define REAL_WRITERGB15(dst, dstw, index) \
417  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
418  "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
419  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
420  "psrlq $3, %%mm2 \n\t"\
421  "psrlq $1, %%mm5 \n\t"\
422 \
423  "movq %%mm2, %%mm1 \n\t"\
424  "movq %%mm4, %%mm3 \n\t"\
425 \
426  "punpcklbw %%mm7, %%mm3 \n\t"\
427  "punpcklbw %%mm5, %%mm2 \n\t"\
428  "punpckhbw %%mm7, %%mm4 \n\t"\
429  "punpckhbw %%mm5, %%mm1 \n\t"\
430 \
431  "psllq $2, %%mm3 \n\t"\
432  "psllq $2, %%mm4 \n\t"\
433 \
434  "por %%mm3, %%mm2 \n\t"\
435  "por %%mm4, %%mm1 \n\t"\
436 \
437  MOVNTQ(%%mm2, (dst, index, 2))\
438  MOVNTQ(%%mm1, 8(dst, index, 2))\
439 \
440  "add $8, "#index" \n\t"\
441  "cmp "dstw", "#index" \n\t"\
442  " jb 1b \n\t"
443 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
444 
445 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
446  const int16_t **lumSrc, int lumFilterSize,
447  const int16_t *chrFilter, const int16_t **chrUSrc,
448  const int16_t **chrVSrc,
449  int chrFilterSize, const int16_t **alpSrc,
450  uint8_t *dest, int dstW, int dstY)
451 {
452  x86_reg dummy=0;
453  x86_reg dstW_reg = dstW;
454  x86_reg uv_off = c->uv_offx2;
455 
458  "pxor %%mm7, %%mm7 \n\t"
459  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
460  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
461  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
462  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
463  WRITERGB15(%4, "%5", %%FF_REGa)
465 }
466 
467 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
468  const int16_t **lumSrc, int lumFilterSize,
469  const int16_t *chrFilter, const int16_t **chrUSrc,
470  const int16_t **chrVSrc,
471  int chrFilterSize, const int16_t **alpSrc,
472  uint8_t *dest, int dstW, int dstY)
473 {
474  x86_reg dummy=0;
475  x86_reg dstW_reg = dstW;
476  x86_reg uv_off = c->uv_offx2;
477 
480  "pxor %%mm7, %%mm7 \n\t"
481  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
482  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
483  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
484  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
485  WRITERGB15(%4, "%5", %%FF_REGa)
487 }
488 
489 #define WRITEBGR24MMX(dst, dstw, index) \
490  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
491  "movq %%mm2, %%mm1 \n\t" /* B */\
492  "movq %%mm5, %%mm6 \n\t" /* R */\
493  "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
494  "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
495  "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
496  "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
497  "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
498  "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
499  "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
500  "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
501  "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
502  "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
503 \
504  "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
505  "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
506  "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
507  "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
508 \
509  "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
510  "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
511  "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
512  "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
513 \
514  "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
515  "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
516  "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
517  "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
518 \
519  "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
520  "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
521  "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
522  "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
523  MOVNTQ(%%mm0, (dst))\
524 \
525  "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
526  "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
527  "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
528  "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
529  MOVNTQ(%%mm6, 8(dst))\
530 \
531  "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
532  "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
533  "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
534  MOVNTQ(%%mm5, 16(dst))\
535 \
536  "add $24, "#dst" \n\t"\
537 \
538  "add $8, "#index" \n\t"\
539  "cmp "dstw", "#index" \n\t"\
540  " jb 1b \n\t"
541 
542 #define WRITEBGR24MMXEXT(dst, dstw, index) \
543  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
544  "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
545  "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
546  "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
547  "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
548  "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
549 \
550  "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
551  "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
552  "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
553 \
554  "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
555  "por %%mm1, %%mm6 \n\t"\
556  "por %%mm3, %%mm6 \n\t"\
557  MOVNTQ(%%mm6, (dst))\
558 \
559  "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
560  "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
561  "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
562  "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
563 \
564  "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
565  "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
566  "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
567 \
568  "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
569  "por %%mm3, %%mm6 \n\t"\
570  MOVNTQ(%%mm6, 8(dst))\
571 \
572  "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
573  "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
574  "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
575 \
576  "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
577  "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
578  "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
579 \
580  "por %%mm1, %%mm3 \n\t"\
581  "por %%mm3, %%mm6 \n\t"\
582  MOVNTQ(%%mm6, 16(dst))\
583 \
584  "add $24, "#dst" \n\t"\
585 \
586  "add $8, "#index" \n\t"\
587  "cmp "dstw", "#index" \n\t"\
588  " jb 1b \n\t"
589 
590 #undef WRITEBGR24
591 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
592 
593 #if HAVE_6REGS
594 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
595  const int16_t **lumSrc, int lumFilterSize,
596  const int16_t *chrFilter, const int16_t **chrUSrc,
597  const int16_t **chrVSrc,
598  int chrFilterSize, const int16_t **alpSrc,
599  uint8_t *dest, int dstW, int dstY)
600 {
601  x86_reg dummy=0;
602  x86_reg dstW_reg = dstW;
603  x86_reg uv_off = c->uv_offx2;
604 
607  "pxor %%mm7, %%mm7 \n\t"
608  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
609  "add %4, %%"FF_REG_c" \n\t"
610  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
611  :: "r" (&c->redDither),
612  "m" (dummy), "m" (dummy), "m" (dummy),
613  "r" (dest), "m" (dstW_reg), "m"(uv_off)
614  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
615  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
616  );
617 }
618 
619 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
620  const int16_t **lumSrc, int lumFilterSize,
621  const int16_t *chrFilter, const int16_t **chrUSrc,
622  const int16_t **chrVSrc,
623  int chrFilterSize, const int16_t **alpSrc,
624  uint8_t *dest, int dstW, int dstY)
625 {
626  x86_reg dummy=0;
627  x86_reg dstW_reg = dstW;
628  x86_reg uv_off = c->uv_offx2;
629 
632  "pxor %%mm7, %%mm7 \n\t"
633  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
634  "add %4, %%"FF_REG_c" \n\t"
635  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
636  :: "r" (&c->redDither),
637  "m" (dummy), "m" (dummy), "m" (dummy),
638  "r" (dest), "m" (dstW_reg), "m"(uv_off)
639  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
640  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
641  );
642 }
643 #endif /* HAVE_6REGS */
644 
645 #define REAL_WRITEYUY2(dst, dstw, index) \
646  "packuswb %%mm3, %%mm3 \n\t"\
647  "packuswb %%mm4, %%mm4 \n\t"\
648  "packuswb %%mm7, %%mm1 \n\t"\
649  "punpcklbw %%mm4, %%mm3 \n\t"\
650  "movq %%mm1, %%mm7 \n\t"\
651  "punpcklbw %%mm3, %%mm1 \n\t"\
652  "punpckhbw %%mm3, %%mm7 \n\t"\
653 \
654  MOVNTQ(%%mm1, (dst, index, 2))\
655  MOVNTQ(%%mm7, 8(dst, index, 2))\
656 \
657  "add $8, "#index" \n\t"\
658  "cmp "dstw", "#index" \n\t"\
659  " jb 1b \n\t"
660 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
661 
662 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
663  const int16_t **lumSrc, int lumFilterSize,
664  const int16_t *chrFilter, const int16_t **chrUSrc,
665  const int16_t **chrVSrc,
666  int chrFilterSize, const int16_t **alpSrc,
667  uint8_t *dest, int dstW, int dstY)
668 {
669  x86_reg dummy=0;
670  x86_reg dstW_reg = dstW;
671  x86_reg uv_off = c->uv_offx2;
672 
674  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
675  "psraw $3, %%mm3 \n\t"
676  "psraw $3, %%mm4 \n\t"
677  "psraw $3, %%mm1 \n\t"
678  "psraw $3, %%mm7 \n\t"
679  WRITEYUY2(%4, "%5", %%FF_REGa)
681 }
682 
683 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
684  const int16_t **lumSrc, int lumFilterSize,
685  const int16_t *chrFilter, const int16_t **chrUSrc,
686  const int16_t **chrVSrc,
687  int chrFilterSize, const int16_t **alpSrc,
688  uint8_t *dest, int dstW, int dstY)
689 {
690  x86_reg dummy=0;
691  x86_reg dstW_reg = dstW;
692  x86_reg uv_off = c->uv_offx2;
693 
695  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
696  "psraw $3, %%mm3 \n\t"
697  "psraw $3, %%mm4 \n\t"
698  "psraw $3, %%mm1 \n\t"
699  "psraw $3, %%mm7 \n\t"
700  WRITEYUY2(%4, "%5", %%FF_REGa)
702 }
703 
704 #define REAL_YSCALEYUV2RGB_UV(index, c) \
705  "xor "#index", "#index" \n\t"\
706  ".p2align 4 \n\t"\
707  "1: \n\t"\
708  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
709  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
710  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
711  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
712  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
713  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
714  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
715  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
716  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
717  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
718  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
719  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
720  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
721  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
722  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
723  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
724  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
725  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
726  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
727  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
728  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
729  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
730 
731 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
732  "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
733  "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
734  "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
735  "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
736  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
737  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
738  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
739  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
740  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
741  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
742  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
743  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
744 
745 #define REAL_YSCALEYUV2RGB_COEFF(c) \
746  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
747  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
748  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
749  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
750  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
751  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
752  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
753  "paddw %%mm3, %%mm4 \n\t"\
754  "movq %%mm2, %%mm0 \n\t"\
755  "movq %%mm5, %%mm6 \n\t"\
756  "movq %%mm4, %%mm3 \n\t"\
757  "punpcklwd %%mm2, %%mm2 \n\t"\
758  "punpcklwd %%mm5, %%mm5 \n\t"\
759  "punpcklwd %%mm4, %%mm4 \n\t"\
760  "paddw %%mm1, %%mm2 \n\t"\
761  "paddw %%mm1, %%mm5 \n\t"\
762  "paddw %%mm1, %%mm4 \n\t"\
763  "punpckhwd %%mm0, %%mm0 \n\t"\
764  "punpckhwd %%mm6, %%mm6 \n\t"\
765  "punpckhwd %%mm3, %%mm3 \n\t"\
766  "paddw %%mm7, %%mm0 \n\t"\
767  "paddw %%mm7, %%mm6 \n\t"\
768  "paddw %%mm7, %%mm3 \n\t"\
769  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
770  "packuswb %%mm0, %%mm2 \n\t"\
771  "packuswb %%mm6, %%mm5 \n\t"\
772  "packuswb %%mm3, %%mm4 \n\t"\
773 
774 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
775 
776 #define YSCALEYUV2RGB(index, c) \
777  REAL_YSCALEYUV2RGB_UV(index, c) \
778  REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
779  REAL_YSCALEYUV2RGB_COEFF(c)
780 
781 /**
782  * vertical bilinear scale YV12 to RGB
783  */
784 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
785  const int16_t *ubuf[2], const int16_t *vbuf[2],
786  const int16_t *abuf[2], uint8_t *dest,
787  int dstW, int yalpha, int uvalpha, int y)
788 {
789  const int16_t *buf0 = buf[0], *buf1 = buf[1],
790  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
791 
792  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
793  const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
794 #if ARCH_X86_64
795  __asm__ volatile(
796  YSCALEYUV2RGB(%%r8, %5)
797  YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
798  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
799  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
800  "packuswb %%mm7, %%mm1 \n\t"
801  WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
802  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
803  "a" (&c->redDither),
804  "r" (abuf0), "r" (abuf1)
805  : "%r8"
806  );
807 #else
808  c->u_temp=(intptr_t)abuf0;
809  c->v_temp=(intptr_t)abuf1;
810  __asm__ volatile(
811  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
812  "mov %4, %%"FF_REG_b" \n\t"
813  "push %%"FF_REG_BP" \n\t"
814  YSCALEYUV2RGB(%%FF_REGBP, %5)
815  "push %0 \n\t"
816  "push %1 \n\t"
817  "mov "U_TEMP"(%5), %0 \n\t"
818  "mov "V_TEMP"(%5), %1 \n\t"
819  YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
820  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
821  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
822  "packuswb %%mm7, %%mm1 \n\t"
823  "pop %1 \n\t"
824  "pop %0 \n\t"
825  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
826  "pop %%"FF_REG_BP" \n\t"
827  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
828  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
829  "a" (&c->redDither)
830  );
831 #endif
832  } else {
833  __asm__ volatile(
834  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
835  "mov %4, %%"FF_REG_b" \n\t"
836  "push %%"FF_REG_BP" \n\t"
837  YSCALEYUV2RGB(%%FF_REGBP, %5)
838  "pcmpeqd %%mm7, %%mm7 \n\t"
839  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
840  "pop %%"FF_REG_BP" \n\t"
841  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
842  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
843  "a" (&c->redDither)
844  );
845  }
846 }
847 
848 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
849  const int16_t *ubuf[2], const int16_t *vbuf[2],
850  const int16_t *abuf[2], uint8_t *dest,
851  int dstW, int yalpha, int uvalpha, int y)
852 {
853  const int16_t *buf0 = buf[0], *buf1 = buf[1],
854  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
855 
856  __asm__ volatile(
857  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
858  "mov %4, %%"FF_REG_b" \n\t"
859  "push %%"FF_REG_BP" \n\t"
860  YSCALEYUV2RGB(%%FF_REGBP, %5)
861  "pxor %%mm7, %%mm7 \n\t"
862  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
863  "pop %%"FF_REG_BP" \n\t"
864  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
865  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
866  "a" (&c->redDither)
867  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
868  );
869 }
870 
871 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
872  const int16_t *ubuf[2], const int16_t *vbuf[2],
873  const int16_t *abuf[2], uint8_t *dest,
874  int dstW, int yalpha, int uvalpha, int y)
875 {
876  const int16_t *buf0 = buf[0], *buf1 = buf[1],
877  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
878 
879  __asm__ volatile(
880  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
881  "mov %4, %%"FF_REG_b" \n\t"
882  "push %%"FF_REG_BP" \n\t"
883  YSCALEYUV2RGB(%%FF_REGBP, %5)
884  "pxor %%mm7, %%mm7 \n\t"
885  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
886  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
887  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
888  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
889  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
890  "pop %%"FF_REG_BP" \n\t"
891  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
892  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
893  "a" (&c->redDither)
895  );
896 }
897 
898 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
899  const int16_t *ubuf[2], const int16_t *vbuf[2],
900  const int16_t *abuf[2], uint8_t *dest,
901  int dstW, int yalpha, int uvalpha, int y)
902 {
903  const int16_t *buf0 = buf[0], *buf1 = buf[1],
904  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
905 
906  __asm__ volatile(
907  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
908  "mov %4, %%"FF_REG_b" \n\t"
909  "push %%"FF_REG_BP" \n\t"
910  YSCALEYUV2RGB(%%FF_REGBP, %5)
911  "pxor %%mm7, %%mm7 \n\t"
912  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
913  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
914  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
915  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
916  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
917  "pop %%"FF_REG_BP" \n\t"
918  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
919  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
920  "a" (&c->redDither)
921  NAMED_CONSTRAINTS_ADD(bF8,bFC)
922  );
923 }
924 
925 #define REAL_YSCALEYUV2PACKED(index, c) \
926  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
927  "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
928  "psraw $3, %%mm0 \n\t"\
929  "psraw $3, %%mm1 \n\t"\
930  "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
931  "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
932  "xor "#index", "#index" \n\t"\
933  ".p2align 4 \n\t"\
934  "1: \n\t"\
935  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
936  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
937  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
938  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
939  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
940  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
941  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
942  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
943  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
944  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
945  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
946  "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
947  "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
948  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
949  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
950  "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
951  "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
952  "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
953  "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
954  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
955  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
956  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
957  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
958  "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
959  "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
960  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
961  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
962 
963 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
964 
965 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
966  const int16_t *ubuf[2], const int16_t *vbuf[2],
967  const int16_t *abuf[2], uint8_t *dest,
968  int dstW, int yalpha, int uvalpha, int y)
969 {
970  const int16_t *buf0 = buf[0], *buf1 = buf[1],
971  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
972 
973  __asm__ volatile(
974  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
975  "mov %4, %%"FF_REG_b" \n\t"
976  "push %%"FF_REG_BP" \n\t"
977  YSCALEYUV2PACKED(%%FF_REGBP, %5)
978  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
979  "pop %%"FF_REG_BP" \n\t"
980  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
981  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
982  "a" (&c->redDither)
983  );
984 }
985 
986 #define REAL_YSCALEYUV2RGB1(index, c) \
987  "xor "#index", "#index" \n\t"\
988  ".p2align 4 \n\t"\
989  "1: \n\t"\
990  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
991  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
992  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
993  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
994  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
995  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
996  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
997  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
998  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
999  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1000  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1001  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1002  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1003  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1004  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1005  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1006  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1007  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1008  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1009  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1010  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1011  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1012  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1013  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1014  "paddw %%mm3, %%mm4 \n\t"\
1015  "movq %%mm2, %%mm0 \n\t"\
1016  "movq %%mm5, %%mm6 \n\t"\
1017  "movq %%mm4, %%mm3 \n\t"\
1018  "punpcklwd %%mm2, %%mm2 \n\t"\
1019  "punpcklwd %%mm5, %%mm5 \n\t"\
1020  "punpcklwd %%mm4, %%mm4 \n\t"\
1021  "paddw %%mm1, %%mm2 \n\t"\
1022  "paddw %%mm1, %%mm5 \n\t"\
1023  "paddw %%mm1, %%mm4 \n\t"\
1024  "punpckhwd %%mm0, %%mm0 \n\t"\
1025  "punpckhwd %%mm6, %%mm6 \n\t"\
1026  "punpckhwd %%mm3, %%mm3 \n\t"\
1027  "paddw %%mm7, %%mm0 \n\t"\
1028  "paddw %%mm7, %%mm6 \n\t"\
1029  "paddw %%mm7, %%mm3 \n\t"\
1030  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1031  "packuswb %%mm0, %%mm2 \n\t"\
1032  "packuswb %%mm6, %%mm5 \n\t"\
1033  "packuswb %%mm3, %%mm4 \n\t"\
1034 
1035 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
1036 
1037 // do vertical chrominance interpolation
1038 #define REAL_YSCALEYUV2RGB1b(index, c) \
1039  "xor "#index", "#index" \n\t"\
1040  ".p2align 4 \n\t"\
1041  "1: \n\t"\
1042  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1043  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1044  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1045  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1046  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1047  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1048  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1049  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1050  "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
1051  "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
1052  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1053  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1054  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1055  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1056  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1057  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1058  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1059  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1060  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1061  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1062  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1063  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1064  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1065  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1066  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1067  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1068  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1069  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1070  "paddw %%mm3, %%mm4 \n\t"\
1071  "movq %%mm2, %%mm0 \n\t"\
1072  "movq %%mm5, %%mm6 \n\t"\
1073  "movq %%mm4, %%mm3 \n\t"\
1074  "punpcklwd %%mm2, %%mm2 \n\t"\
1075  "punpcklwd %%mm5, %%mm5 \n\t"\
1076  "punpcklwd %%mm4, %%mm4 \n\t"\
1077  "paddw %%mm1, %%mm2 \n\t"\
1078  "paddw %%mm1, %%mm5 \n\t"\
1079  "paddw %%mm1, %%mm4 \n\t"\
1080  "punpckhwd %%mm0, %%mm0 \n\t"\
1081  "punpckhwd %%mm6, %%mm6 \n\t"\
1082  "punpckhwd %%mm3, %%mm3 \n\t"\
1083  "paddw %%mm7, %%mm0 \n\t"\
1084  "paddw %%mm7, %%mm6 \n\t"\
1085  "paddw %%mm7, %%mm3 \n\t"\
1086  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1087  "packuswb %%mm0, %%mm2 \n\t"\
1088  "packuswb %%mm6, %%mm5 \n\t"\
1089  "packuswb %%mm3, %%mm4 \n\t"\
1090 
1091 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1092 
1093 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1094  "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
1095  "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
1096  "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
1097  "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
1098  "packuswb %%mm1, %%mm7 \n\t"
1099 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1100 
1101 /**
1102  * YV12 to RGB without scaling or interpolating
1103  */
1104 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1105  const int16_t *ubuf[2], const int16_t *vbuf[2],
1106  const int16_t *abuf0, uint8_t *dest,
1107  int dstW, int uvalpha, int y)
1108 {
1109  const int16_t *ubuf0 = ubuf[0];
1110  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1111 
1112  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1113  const int16_t *ubuf1 = ubuf[0];
1114  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1115  __asm__ volatile(
1116  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1117  "mov %4, %%"FF_REG_b" \n\t"
1118  "push %%"FF_REG_BP" \n\t"
1119  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1120  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1121  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1122  "pop %%"FF_REG_BP" \n\t"
1123  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1124  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1125  "a" (&c->redDither)
1126  );
1127  } else {
1128  __asm__ volatile(
1129  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1130  "mov %4, %%"FF_REG_b" \n\t"
1131  "push %%"FF_REG_BP" \n\t"
1132  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1133  "pcmpeqd %%mm7, %%mm7 \n\t"
1134  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1135  "pop %%"FF_REG_BP" \n\t"
1136  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1137  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1138  "a" (&c->redDither)
1139  );
1140  }
1141  } else {
1142  const int16_t *ubuf1 = ubuf[1];
1143  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1144  __asm__ volatile(
1145  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1146  "mov %4, %%"FF_REG_b" \n\t"
1147  "push %%"FF_REG_BP" \n\t"
1148  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1149  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1150  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1151  "pop %%"FF_REG_BP" \n\t"
1152  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1153  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1154  "a" (&c->redDither)
1155  );
1156  } else {
1157  __asm__ volatile(
1158  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1159  "mov %4, %%"FF_REG_b" \n\t"
1160  "push %%"FF_REG_BP" \n\t"
1161  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1162  "pcmpeqd %%mm7, %%mm7 \n\t"
1163  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1164  "pop %%"FF_REG_BP" \n\t"
1165  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1166  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1167  "a" (&c->redDither)
1168  );
1169  }
1170  }
1171 }
1172 
1173 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1174  const int16_t *ubuf[2], const int16_t *vbuf[2],
1175  const int16_t *abuf0, uint8_t *dest,
1176  int dstW, int uvalpha, int y)
1177 {
1178  const int16_t *ubuf0 = ubuf[0];
1179  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1180 
1181  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1182  const int16_t *ubuf1 = ubuf[0];
1183  __asm__ volatile(
1184  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1185  "mov %4, %%"FF_REG_b" \n\t"
1186  "push %%"FF_REG_BP" \n\t"
1187  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1188  "pxor %%mm7, %%mm7 \n\t"
1189  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1190  "pop %%"FF_REG_BP" \n\t"
1191  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1192  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1193  "a" (&c->redDither)
1194  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1195  );
1196  } else {
1197  const int16_t *ubuf1 = ubuf[1];
1198  __asm__ volatile(
1199  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1200  "mov %4, %%"FF_REG_b" \n\t"
1201  "push %%"FF_REG_BP" \n\t"
1202  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1203  "pxor %%mm7, %%mm7 \n\t"
1204  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1205  "pop %%"FF_REG_BP" \n\t"
1206  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1207  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1208  "a" (&c->redDither)
1209  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1210  );
1211  }
1212 }
1213 
1214 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1215  const int16_t *ubuf[2], const int16_t *vbuf[2],
1216  const int16_t *abuf0, uint8_t *dest,
1217  int dstW, int uvalpha, int y)
1218 {
1219  const int16_t *ubuf0 = ubuf[0];
1220  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1221 
1222  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1223  const int16_t *ubuf1 = ubuf[0];
1224  __asm__ volatile(
1225  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1226  "mov %4, %%"FF_REG_b" \n\t"
1227  "push %%"FF_REG_BP" \n\t"
1228  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1229  "pxor %%mm7, %%mm7 \n\t"
1230  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1231  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1232  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1233  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1234  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1235  "pop %%"FF_REG_BP" \n\t"
1236  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1237  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1238  "a" (&c->redDither)
1240  );
1241  } else {
1242  const int16_t *ubuf1 = ubuf[1];
1243  __asm__ volatile(
1244  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1245  "mov %4, %%"FF_REG_b" \n\t"
1246  "push %%"FF_REG_BP" \n\t"
1247  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1248  "pxor %%mm7, %%mm7 \n\t"
1249  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1250  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1251  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1252  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1253  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1254  "pop %%"FF_REG_BP" \n\t"
1255  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1256  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1257  "a" (&c->redDither)
1259  );
1260  }
1261 }
1262 
1263 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1264  const int16_t *ubuf[2], const int16_t *vbuf[2],
1265  const int16_t *abuf0, uint8_t *dest,
1266  int dstW, int uvalpha, int y)
1267 {
1268  const int16_t *ubuf0 = ubuf[0];
1269  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1270 
1271  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1272  const int16_t *ubuf1 = ubuf[0];
1273  __asm__ volatile(
1274  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1275  "mov %4, %%"FF_REG_b" \n\t"
1276  "push %%"FF_REG_BP" \n\t"
1277  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1278  "pxor %%mm7, %%mm7 \n\t"
1279  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1280  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1281  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1282  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1283  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1284  "pop %%"FF_REG_BP" \n\t"
1285  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1286  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1287  "a" (&c->redDither)
1288  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1289  );
1290  } else {
1291  const int16_t *ubuf1 = ubuf[1];
1292  __asm__ volatile(
1293  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1294  "mov %4, %%"FF_REG_b" \n\t"
1295  "push %%"FF_REG_BP" \n\t"
1296  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1297  "pxor %%mm7, %%mm7 \n\t"
1298  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1299  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1300  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1301  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1302  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1303  "pop %%"FF_REG_BP" \n\t"
1304  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1305  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1306  "a" (&c->redDither)
1307  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1308  );
1309  }
1310 }
1311 
1312 #define REAL_YSCALEYUV2PACKED1(index, c) \
1313  "xor "#index", "#index" \n\t"\
1314  ".p2align 4 \n\t"\
1315  "1: \n\t"\
1316  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1317  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1318  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1319  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1320  "psraw $7, %%mm3 \n\t" \
1321  "psraw $7, %%mm4 \n\t" \
1322  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1323  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1324  "psraw $7, %%mm1 \n\t" \
1325  "psraw $7, %%mm7 \n\t" \
1326 
1327 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1328 
1329 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1330  "xor "#index", "#index" \n\t"\
1331  ".p2align 4 \n\t"\
1332  "1: \n\t"\
1333  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1334  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1335  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1336  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1337  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1338  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1339  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1340  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1341  "psrlw $8, %%mm3 \n\t" \
1342  "psrlw $8, %%mm4 \n\t" \
1343  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1344  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1345  "psraw $7, %%mm1 \n\t" \
1346  "psraw $7, %%mm7 \n\t"
1347 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1348 
1349 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1350  const int16_t *ubuf[2], const int16_t *vbuf[2],
1351  const int16_t *abuf0, uint8_t *dest,
1352  int dstW, int uvalpha, int y)
1353 {
1354  const int16_t *ubuf0 = ubuf[0];
1355  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1356 
1357  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1358  const int16_t *ubuf1 = ubuf[0];
1359  __asm__ volatile(
1360  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1361  "mov %4, %%"FF_REG_b" \n\t"
1362  "push %%"FF_REG_BP" \n\t"
1363  YSCALEYUV2PACKED1(%%FF_REGBP, %5)
1364  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1365  "pop %%"FF_REG_BP" \n\t"
1366  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1367  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1368  "a" (&c->redDither)
1369  );
1370  } else {
1371  const int16_t *ubuf1 = ubuf[1];
1372  __asm__ volatile(
1373  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1374  "mov %4, %%"FF_REG_b" \n\t"
1375  "push %%"FF_REG_BP" \n\t"
1376  YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
1377  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1378  "pop %%"FF_REG_BP" \n\t"
1379  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1380  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1381  "a" (&c->redDither)
1382  );
1383  }
1384 }
1386 {
1387  enum AVPixelFormat dstFormat = c->dstFormat;
1388 
1389  c->use_mmx_vfilter= 0;
1390  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
1391  && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
1392  && !(c->flags & SWS_BITEXACT)) {
1393  if (c->flags & SWS_ACCURATE_RND) {
1394  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1395  switch (c->dstFormat) {
1396  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
1397 #if HAVE_6REGS
1398  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
1399 #endif
1400  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
1401  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
1402  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1403  default: break;
1404  }
1405  }
1406  } else {
1407  c->use_mmx_vfilter= 1;
1408  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1409  switch (c->dstFormat) {
1410  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
1411  case AV_PIX_FMT_BGR32: c->yuv2packedX = RENAME(yuv2bgr32_X); break;
1412 #if HAVE_6REGS
1413  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
1414 #endif
1415  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
1416  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
1417  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1418  default: break;
1419  }
1420  }
1421  }
1422  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1423  switch (c->dstFormat) {
1424  case AV_PIX_FMT_RGB32:
1425  c->yuv2packed1 = RENAME(yuv2rgb32_1);
1426  c->yuv2packed2 = RENAME(yuv2rgb32_2);
1427  break;
1428  case AV_PIX_FMT_BGR24:
1429  c->yuv2packed1 = RENAME(yuv2bgr24_1);
1430  c->yuv2packed2 = RENAME(yuv2bgr24_2);
1431  break;
1432  case AV_PIX_FMT_RGB555:
1433  c->yuv2packed1 = RENAME(yuv2rgb555_1);
1434  c->yuv2packed2 = RENAME(yuv2rgb555_2);
1435  break;
1436  case AV_PIX_FMT_RGB565:
1437  c->yuv2packed1 = RENAME(yuv2rgb565_1);
1438  c->yuv2packed2 = RENAME(yuv2rgb565_2);
1439  break;
1440  case AV_PIX_FMT_YUYV422:
1441  c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1442  c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1443  break;
1444  default:
1445  break;
1446  }
1447  }
1448  }
1449 
1450  if (c->srcBpc == 8 && c->dstBpc <= 14) {
1451  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1452  if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1453  c->hyscale_fast = ff_hyscale_fast_mmxext;
1454  c->hcscale_fast = ff_hcscale_fast_mmxext;
1455  } else {
1456  c->hyscale_fast = NULL;
1457  c->hcscale_fast = NULL;
1458  }
1459  }
1460 }
WRITEBGR32
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
Definition: swscale_template.c:251
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
YSCALEYUV2PACKEDX_ACCURATE
#define YSCALEYUV2PACKEDX_ACCURATE
Definition: swscale_template.c:189
ALP_MMX_FILTER_OFFSET
#define ALP_MMX_FILTER_OFFSET
Definition: swscale_internal.h:493
YSCALEYUV2RGB1
#define YSCALEYUV2RGB1(index, c)
Definition: swscale_template.c:1035
YSCALEYUV2PACKEDX_YA
#define YSCALEYUV2PACKEDX_YA(offset, coeff, src1, src2, dst1, dst2)
Definition: swscale_template.c:61
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:453
x86_reg
int x86_reg
Definition: asm.h:72
YSCALEYUV2RGB
#define YSCALEYUV2RGB(index, c)
Definition: swscale_template.c:776
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
SWS_FAST_BILINEAR
#define SWS_FAST_BILINEAR
Definition: swscale.h:65
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:706
SWS_BITEXACT
#define SWS_BITEXACT
Definition: swscale.h:115
DSTW_OFFSET
#define DSTW_OFFSET
Definition: swscale_internal.h:487
dummy
int dummy
Definition: motion.c:66
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:720
ff_hcscale_fast_mmxext
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
av_cold
#define av_cold
Definition: attributes.h:90
sws_init_swscale
static av_cold void sws_init_swscale(SwsContext *c)
Definition: swscale.c:559
NAMED_CONSTRAINTS_ADD
#define NAMED_CONSTRAINTS_ADD(...)
Definition: asm.h:145
BLUE_DITHER
#define BLUE_DITHER
Definition: swscale_internal.h:476
YSCALEYUV2RGB1b
#define YSCALEYUV2RGB1b(index, c)
Definition: swscale_template.c:1091
WRITERGB15
#define WRITERGB15(dst, dstw, index)
Definition: swscale_template.c:443
WRITEBGR24
#define WRITEBGR24(dst, dstw, index)
Definition: swscale_template.c:591
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:752
NULL
#define NULL
Definition: coverity.c:32
YSCALEYUV2PACKEDX
#define YSCALEYUV2PACKEDX
Definition: swscale_template.c:80
asm.h
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
U_TEMP
#define U_TEMP
Definition: swscale_internal.h:490
GREEN_DITHER
#define GREEN_DITHER
Definition: swscale_internal.h:475
YSCALEYUV2RGB1_ALPHA
#define YSCALEYUV2RGB1_ALPHA(index)
Definition: swscale_template.c:1099
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:97
RED_DITHER
#define RED_DITHER
Definition: swscale_internal.h:474
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:451
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:114
YSCALEYUV2RGB_YA
#define YSCALEYUV2RGB_YA(index, c, b1, b2)
Definition: swscale_template.c:774
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:466
swscale_internal.h
YSCALEYUV2PACKED
#define YSCALEYUV2PACKED(index, c)
Definition: swscale_template.c:963
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:465
V_TEMP
#define V_TEMP
Definition: swscale_internal.h:491
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
WRITEYUY2
#define WRITEYUY2(dst, dstw, index)
Definition: swscale_template.c:660
RENAME
#define RENAME(element)
Definition: ac3enc_template.c:44
Y_TEMP
#define Y_TEMP
Definition: swscale_internal.h:492
YSCALEYUV2PACKEDX_ACCURATE_YA
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset)
Definition: swscale_template.c:144
YSCALEYUV2RGBX
#define YSCALEYUV2RGBX
Definition: swscale_template.c:193
ff_hyscale_fast_mmxext
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
ESP_OFFSET
#define ESP_OFFSET
Definition: swscale_internal.h:488
YSCALEYUV2PACKEDX_END
#define YSCALEYUV2PACKEDX_END
Definition: swscale_template.c:84
WRITERGB16
#define WRITERGB16(dst, dstw, index)
Definition: swscale_template.c:370
YSCALEYUV2PACKED1
#define YSCALEYUV2PACKED1(index, c)
Definition: swscale_template.c:1327
SwsContext
Definition: swscale_internal.h:299
YSCALEYUV2PACKED1b
#define YSCALEYUV2PACKED1b(index, c)
Definition: swscale_template.c:1347