FFmpeg
output_lsx.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2023 Loongson Technology Corporation Limited
3  * Contributed by Lu Wang <wanglu@loongson.cn>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "swscale_loongarch.h"
24 
25 
26 /*Copy from libswscale/output.c*/
27 static av_always_inline void
28 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
29  unsigned A1, unsigned A2,
30  const void *_r, const void *_g, const void *_b, int y,
31  enum AVPixelFormat target, int hasAlpha)
32 {
33  if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
34  target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
35  uint32_t *dest = (uint32_t *) _dest;
36  const uint32_t *r = (const uint32_t *) _r;
37  const uint32_t *g = (const uint32_t *) _g;
38  const uint32_t *b = (const uint32_t *) _b;
39 
40 #if CONFIG_SMALL
41  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
42  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
43 #else
44 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
45  int sh = (target == AV_PIX_FMT_RGB32_1 ||
46  target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
47  av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
48 #endif
49  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
50  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
51 #endif
52  } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
53  uint8_t *dest = (uint8_t *) _dest;
54  const uint8_t *r = (const uint8_t *) _r;
55  const uint8_t *g = (const uint8_t *) _g;
56  const uint8_t *b = (const uint8_t *) _b;
57 
58 #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
59 #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
60 
61  dest[i * 6 + 0] = r_b[Y1];
62  dest[i * 6 + 1] = g[Y1];
63  dest[i * 6 + 2] = b_r[Y1];
64  dest[i * 6 + 3] = r_b[Y2];
65  dest[i * 6 + 4] = g[Y2];
66  dest[i * 6 + 5] = b_r[Y2];
67 #undef r_b
68 #undef b_r
69  } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
70  target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
71  target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
72  uint16_t *dest = (uint16_t *) _dest;
73  const uint16_t *r = (const uint16_t *) _r;
74  const uint16_t *g = (const uint16_t *) _g;
75  const uint16_t *b = (const uint16_t *) _b;
76  int dr1, dg1, db1, dr2, dg2, db2;
77 
78  if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
79  dr1 = ff_dither_2x2_8[ y & 1 ][0];
80  dg1 = ff_dither_2x2_4[ y & 1 ][0];
81  db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
82  dr2 = ff_dither_2x2_8[ y & 1 ][1];
83  dg2 = ff_dither_2x2_4[ y & 1 ][1];
84  db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
85  } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
86  dr1 = ff_dither_2x2_8[ y & 1 ][0];
87  dg1 = ff_dither_2x2_8[ y & 1 ][1];
88  db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
89  dr2 = ff_dither_2x2_8[ y & 1 ][1];
90  dg2 = ff_dither_2x2_8[ y & 1 ][0];
91  db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
92  } else {
93  dr1 = ff_dither_4x4_16[ y & 3 ][0];
94  dg1 = ff_dither_4x4_16[ y & 3 ][1];
95  db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
96  dr2 = ff_dither_4x4_16[ y & 3 ][1];
97  dg2 = ff_dither_4x4_16[ y & 3 ][0];
98  db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
99  }
100 
101  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
102  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
103  } else { /* 8/4 bits */
104  uint8_t *dest = (uint8_t *) _dest;
105  const uint8_t *r = (const uint8_t *) _r;
106  const uint8_t *g = (const uint8_t *) _g;
107  const uint8_t *b = (const uint8_t *) _b;
108  int dr1, dg1, db1, dr2, dg2, db2;
109 
110  if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
111  const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
112  const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
113  dr1 = dg1 = d32[(i * 2 + 0) & 7];
114  db1 = d64[(i * 2 + 0) & 7];
115  dr2 = dg2 = d32[(i * 2 + 1) & 7];
116  db2 = d64[(i * 2 + 1) & 7];
117  } else {
118  const uint8_t * const d64 = ff_dither_8x8_73 [y & 7];
119  const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
120  dr1 = db1 = d128[(i * 2 + 0) & 7];
121  dg1 = d64[(i * 2 + 0) & 7];
122  dr2 = db2 = d128[(i * 2 + 1) & 7];
123  dg2 = d64[(i * 2 + 1) & 7];
124  }
125 
126  if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
127  dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
128  ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
129  } else {
130  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
131  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
132  }
133  }
134 }
135 
136 #define WRITE_YUV2RGB_LSX(vec_y1, vec_y2, vec_u, vec_v, t1, t2, t3, t4) \
137 { \
138  Y1 = __lsx_vpickve2gr_w(vec_y1, t1); \
139  Y2 = __lsx_vpickve2gr_w(vec_y2, t2); \
140  U = __lsx_vpickve2gr_w(vec_u, t3); \
141  V = __lsx_vpickve2gr_w(vec_v, t4); \
142  r = c->table_rV[V]; \
143  g = (c->table_gU[U] + c->table_gV[V]); \
144  b = c->table_bU[U]; \
145  yuv2rgb_write(dest, count, Y1, Y2, 0, 0, \
146  r, g, b, y, target, 0); \
147  count++; \
148 }
149 
150 static void
151 yuv2rgb_X_template_lsx(SwsInternal *c, const int16_t *lumFilter,
152  const int16_t **lumSrc, int lumFilterSize,
153  const int16_t *chrFilter, const int16_t **chrUSrc,
154  const int16_t **chrVSrc, int chrFilterSize,
155  const int16_t **alpSrc, uint8_t *dest, int dstW,
156  int y, enum AVPixelFormat target, int hasAlpha)
157 {
158  int i, j;
159  int count = 0;
160  int t = 1 << 18;
161  int len = dstW >> 5;
162  int res = dstW & 31;
163  int len_count = (dstW + 1) >> 1;
164  const void *r, *g, *b;
165  int head = YUVRGB_TABLE_HEADROOM;
166  __m128i headroom = __lsx_vreplgr2vr_w(head);
167 
168  for (i = 0; i < len; i++) {
169  int Y1, Y2, U, V, count_lum = count << 1;
170  __m128i l_src1, l_src2, l_src3, l_src4, u_src1, u_src2, v_src1, v_src2;
171  __m128i yl_ev, yl_ev1, yl_ev2, yl_od1, yl_od2, yh_ev1, yh_ev2, yh_od1, yh_od2;
172  __m128i u_ev1, u_ev2, u_od1, u_od2, v_ev1, v_ev2, v_od1, v_od2, temp;
173 
174  yl_ev = __lsx_vldrepl_w(&t, 0);
175  yl_ev1 = yl_ev;
176  yl_od1 = yl_ev;
177  yh_ev1 = yl_ev;
178  yh_od1 = yl_ev;
179  u_ev1 = yl_ev;
180  v_ev1 = yl_ev;
181  u_od1 = yl_ev;
182  v_od1 = yl_ev;
183  yl_ev2 = yl_ev;
184  yl_od2 = yl_ev;
185  yh_ev2 = yl_ev;
186  yh_od2 = yl_ev;
187  u_ev2 = yl_ev;
188  v_ev2 = yl_ev;
189  u_od2 = yl_ev;
190  v_od2 = yl_ev;
191 
192  for (j = 0; j < lumFilterSize; j++) {
193  temp = __lsx_vldrepl_h((lumFilter + j), 0);
194  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 0, lumSrc[j] + count_lum,
195  16, l_src1, l_src2);
196  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 32, lumSrc[j] + count_lum,
197  48, l_src3, l_src4);
198  yl_ev1 = __lsx_vmaddwev_w_h(yl_ev1, temp, l_src1);
199  yl_od1 = __lsx_vmaddwod_w_h(yl_od1, temp, l_src1);
200  yh_ev1 = __lsx_vmaddwev_w_h(yh_ev1, temp, l_src3);
201  yh_od1 = __lsx_vmaddwod_w_h(yh_od1, temp, l_src3);
202  yl_ev2 = __lsx_vmaddwev_w_h(yl_ev2, temp, l_src2);
203  yl_od2 = __lsx_vmaddwod_w_h(yl_od2, temp, l_src2);
204  yh_ev2 = __lsx_vmaddwev_w_h(yh_ev2, temp, l_src4);
205  yh_od2 = __lsx_vmaddwod_w_h(yh_od2, temp, l_src4);
206  }
207  for (j = 0; j < chrFilterSize; j++) {
208  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
209  u_src1, v_src1);
210  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 16, chrVSrc[j] + count, 16,
211  u_src2, v_src2);
212  temp = __lsx_vldrepl_h((chrFilter + j), 0);
213  u_ev1 = __lsx_vmaddwev_w_h(u_ev1, temp, u_src1);
214  u_od1 = __lsx_vmaddwod_w_h(u_od1, temp, u_src1);
215  v_ev1 = __lsx_vmaddwev_w_h(v_ev1, temp, v_src1);
216  v_od1 = __lsx_vmaddwod_w_h(v_od1, temp, v_src1);
217  u_ev2 = __lsx_vmaddwev_w_h(u_ev2, temp, u_src2);
218  u_od2 = __lsx_vmaddwod_w_h(u_od2, temp, u_src2);
219  v_ev2 = __lsx_vmaddwev_w_h(v_ev2, temp, v_src2);
220  v_od2 = __lsx_vmaddwod_w_h(v_od2, temp, v_src2);
221  }
222  yl_ev1 = __lsx_vsrai_w(yl_ev1, 19);
223  yh_ev1 = __lsx_vsrai_w(yh_ev1, 19);
224  yl_od1 = __lsx_vsrai_w(yl_od1, 19);
225  yh_od1 = __lsx_vsrai_w(yh_od1, 19);
226  u_ev1 = __lsx_vsrai_w(u_ev1, 19);
227  v_ev1 = __lsx_vsrai_w(v_ev1, 19);
228  u_od1 = __lsx_vsrai_w(u_od1, 19);
229  v_od1 = __lsx_vsrai_w(v_od1, 19);
230  yl_ev2 = __lsx_vsrai_w(yl_ev2, 19);
231  yh_ev2 = __lsx_vsrai_w(yh_ev2, 19);
232  yl_od2 = __lsx_vsrai_w(yl_od2, 19);
233  yh_od2 = __lsx_vsrai_w(yh_od2, 19);
234  u_ev2 = __lsx_vsrai_w(u_ev2, 19);
235  v_ev2 = __lsx_vsrai_w(v_ev2, 19);
236  u_od2 = __lsx_vsrai_w(u_od2, 19);
237  v_od2 = __lsx_vsrai_w(v_od2, 19);
238  u_ev1 = __lsx_vadd_w(u_ev1, headroom);
239  v_ev1 = __lsx_vadd_w(v_ev1, headroom);
240  u_od1 = __lsx_vadd_w(u_od1, headroom);
241  v_od1 = __lsx_vadd_w(v_od1, headroom);
242  u_ev2 = __lsx_vadd_w(u_ev2, headroom);
243  v_ev2 = __lsx_vadd_w(v_ev2, headroom);
244  u_od2 = __lsx_vadd_w(u_od2, headroom);
245  v_od2 = __lsx_vadd_w(v_od2, headroom);
246 
247  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 0, 0, 0, 0);
248  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 1, 1, 0, 0);
249  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 2, 2, 1, 1);
250  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 3, 3, 1, 1);
251  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 0, 0, 2, 2);
252  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 1, 1, 2, 2);
253  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 2, 2, 3, 3);
254  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 3, 3, 3, 3);
255  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_ev2, v_ev2, 0, 0, 0, 0);
256  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_od2, v_od2, 1, 1, 0, 0);
257  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_ev2, v_ev2, 2, 2, 1, 1);
258  WRITE_YUV2RGB_LSX(yh_ev1, yh_od1, u_od2, v_od2, 3, 3, 1, 1);
259  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_ev2, v_ev2, 0, 0, 2, 2);
260  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_od2, v_od2, 1, 1, 2, 2);
261  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_ev2, v_ev2, 2, 2, 3, 3);
262  WRITE_YUV2RGB_LSX(yh_ev2, yh_od2, u_od2, v_od2, 3, 3, 3, 3);
263  }
264 
265  if (res >= 16) {
266  int Y1, Y2, U, V, count_lum = count << 1;
267  __m128i l_src1, l_src2, u_src1, v_src1;
268  __m128i yl_ev, yl_ev1, yl_ev2, yl_od1, yl_od2;
269  __m128i u_ev1, u_od1, v_ev1, v_od1, temp;
270 
271  yl_ev = __lsx_vldrepl_w(&t, 0);
272  yl_ev1 = yl_ev;
273  yl_od1 = yl_ev;
274  u_ev1 = yl_ev;
275  v_ev1 = yl_ev;
276  u_od1 = yl_ev;
277  v_od1 = yl_ev;
278  yl_ev2 = yl_ev;
279  yl_od2 = yl_ev;
280 
281  for (j = 0; j < lumFilterSize; j++) {
282  temp = __lsx_vldrepl_h((lumFilter + j), 0);
283  DUP2_ARG2(__lsx_vld, lumSrc[j] + count_lum, 0, lumSrc[j] + count_lum,
284  16, l_src1, l_src2);
285  yl_ev1 = __lsx_vmaddwev_w_h(yl_ev1, temp, l_src1);
286  yl_od1 = __lsx_vmaddwod_w_h(yl_od1, temp, l_src1);
287  yl_ev2 = __lsx_vmaddwev_w_h(yl_ev2, temp, l_src2);
288  yl_od2 = __lsx_vmaddwod_w_h(yl_od2, temp, l_src2);
289  }
290  for (j = 0; j < chrFilterSize; j++) {
291  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
292  u_src1, v_src1);
293  temp = __lsx_vldrepl_h((chrFilter + j), 0);
294  u_ev1 = __lsx_vmaddwev_w_h(u_ev1, temp, u_src1);
295  u_od1 = __lsx_vmaddwod_w_h(u_od1, temp, u_src1);
296  v_ev1 = __lsx_vmaddwev_w_h(v_ev1, temp, v_src1);
297  v_od1 = __lsx_vmaddwod_w_h(v_od1, temp, v_src1);
298  }
299  yl_ev1 = __lsx_vsrai_w(yl_ev1, 19);
300  yl_od1 = __lsx_vsrai_w(yl_od1, 19);
301  u_ev1 = __lsx_vsrai_w(u_ev1, 19);
302  v_ev1 = __lsx_vsrai_w(v_ev1, 19);
303  u_od1 = __lsx_vsrai_w(u_od1, 19);
304  v_od1 = __lsx_vsrai_w(v_od1, 19);
305  yl_ev2 = __lsx_vsrai_w(yl_ev2, 19);
306  yl_od2 = __lsx_vsrai_w(yl_od2, 19);
307  u_ev1 = __lsx_vadd_w(u_ev1, headroom);
308  v_ev1 = __lsx_vadd_w(v_ev1, headroom);
309  u_od1 = __lsx_vadd_w(u_od1, headroom);
310  v_od1 = __lsx_vadd_w(v_od1, headroom);
311 
312  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 0, 0, 0, 0);
313  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 1, 1, 0, 0);
314  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_ev1, v_ev1, 2, 2, 1, 1);
315  WRITE_YUV2RGB_LSX(yl_ev1, yl_od1, u_od1, v_od1, 3, 3, 1, 1);
316  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 0, 0, 2, 2);
317  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 1, 1, 2, 2);
318  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_ev1, v_ev1, 2, 2, 3, 3);
319  WRITE_YUV2RGB_LSX(yl_ev2, yl_od2, u_od1, v_od1, 3, 3, 3, 3);
320  res -= 16;
321  }
322 
323  if (res >= 8) {
324  int Y1, Y2, U, V, count_lum = count << 1;
325  __m128i l_src1, u_src, v_src;
326  __m128i yl_ev, yl_od;
327  __m128i u_ev, u_od, v_ev, v_od, temp;
328 
329  yl_ev = __lsx_vldrepl_w(&t, 0);
330  yl_od = yl_ev;
331  u_ev = yl_ev;
332  v_ev = yl_ev;
333  u_od = yl_ev;
334  v_od = yl_ev;
335  for (j = 0; j < lumFilterSize; j++) {
336  temp = __lsx_vldrepl_h((lumFilter + j), 0);
337  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
338  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
339  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
340  }
341  for (j = 0; j < chrFilterSize; j++) {
342  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
343  u_src, v_src);
344  temp = __lsx_vldrepl_h((chrFilter + j), 0);
345  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
346  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
347  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
348  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
349  }
350  yl_ev = __lsx_vsrai_w(yl_ev, 19);
351  yl_od = __lsx_vsrai_w(yl_od, 19);
352  u_ev = __lsx_vsrai_w(u_ev, 19);
353  v_ev = __lsx_vsrai_w(v_ev, 19);
354  u_od = __lsx_vsrai_w(u_od, 19);
355  v_od = __lsx_vsrai_w(v_od, 19);
356  u_ev = __lsx_vadd_w(u_ev, headroom);
357  v_ev = __lsx_vadd_w(v_ev, headroom);
358  u_od = __lsx_vadd_w(u_od, headroom);
359  v_od = __lsx_vadd_w(v_od, headroom);
360  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
361  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 1, 1, 0, 0);
362  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 2, 2, 1, 1);
363  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 3, 3, 1, 1);
364  res -= 8;
365  }
366 
367  if (res >= 4) {
368  int Y1, Y2, U, V, count_lum = count << 1;
369  __m128i l_src1, u_src, v_src;
370  __m128i yl_ev, yl_od;
371  __m128i u_ev, u_od, v_ev, v_od, temp;
372 
373  yl_ev = __lsx_vldrepl_w(&t, 0);
374  yl_od = yl_ev;
375  u_ev = yl_ev;
376  v_ev = yl_ev;
377  u_od = yl_ev;
378  v_od = yl_ev;
379  for (j = 0; j < lumFilterSize; j++) {
380  temp = __lsx_vldrepl_h((lumFilter + j), 0);
381  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
382  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
383  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
384  }
385  for (j = 0; j < chrFilterSize; j++) {
386  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
387  u_src, v_src);
388  temp = __lsx_vldrepl_h((chrFilter + j), 0);
389  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
390  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
391  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
392  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
393  }
394  yl_ev = __lsx_vsrai_w(yl_ev, 19);
395  yl_od = __lsx_vsrai_w(yl_od, 19);
396  u_ev = __lsx_vsrai_w(u_ev, 19);
397  v_ev = __lsx_vsrai_w(v_ev, 19);
398  u_od = __lsx_vsrai_w(u_od, 19);
399  v_od = __lsx_vsrai_w(v_od, 19);
400  u_ev = __lsx_vadd_w(u_ev, headroom);
401  v_ev = __lsx_vadd_w(v_ev, headroom);
402  u_od = __lsx_vadd_w(u_od, headroom);
403  v_od = __lsx_vadd_w(v_od, headroom);
404  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
405  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_od, v_od, 1, 1, 0, 0);
406  res -= 4;
407  }
408 
409  if (res >= 2) {
410  int Y1, Y2, U, V, count_lum = count << 1;
411  __m128i l_src1, u_src, v_src;
412  __m128i yl_ev, yl_od;
413  __m128i u_ev, u_od, v_ev, v_od, temp;
414 
415  yl_ev = __lsx_vldrepl_w(&t, 0);
416  yl_od = yl_ev;
417  u_ev = yl_ev;
418  v_ev = yl_ev;
419  u_od = yl_ev;
420  v_od = yl_ev;
421  for (j = 0; j < lumFilterSize; j++) {
422  temp = __lsx_vldrepl_h((lumFilter + j), 0);
423  l_src1 = __lsx_vld(lumSrc[j] + count_lum, 0);
424  yl_ev = __lsx_vmaddwev_w_h(yl_ev, temp, l_src1);
425  yl_od = __lsx_vmaddwod_w_h(yl_od, temp, l_src1);
426  }
427  for (j = 0; j < chrFilterSize; j++) {
428  DUP2_ARG2(__lsx_vld, chrUSrc[j] + count, 0, chrVSrc[j] + count, 0,
429  u_src, v_src);
430  temp = __lsx_vldrepl_h((chrFilter + j), 0);
431  u_ev = __lsx_vmaddwev_w_h(u_ev, temp, u_src);
432  u_od = __lsx_vmaddwod_w_h(u_od, temp, u_src);
433  v_ev = __lsx_vmaddwev_w_h(v_ev, temp, v_src);
434  v_od = __lsx_vmaddwod_w_h(v_od, temp, v_src);
435  }
436  yl_ev = __lsx_vsrai_w(yl_ev, 19);
437  yl_od = __lsx_vsrai_w(yl_od, 19);
438  u_ev = __lsx_vsrai_w(u_ev, 19);
439  v_ev = __lsx_vsrai_w(v_ev, 19);
440  u_od = __lsx_vsrai_w(u_od, 19);
441  v_od = __lsx_vsrai_w(v_od, 19);
442  u_ev = __lsx_vadd_w(u_ev, headroom);
443  v_ev = __lsx_vadd_w(v_ev, headroom);
444  u_od = __lsx_vadd_w(u_od, headroom);
445  v_od = __lsx_vadd_w(v_od, headroom);
446  WRITE_YUV2RGB_LSX(yl_ev, yl_od, u_ev, v_ev, 0, 0, 0, 0);
447  res -= 2;
448  }
449 
450  for (; count < len_count; count++) {
451  int Y1 = 1 << 18;
452  int Y2 = Y1;
453  int U = Y1;
454  int V = Y1;
455 
456  for (j = 0; j < lumFilterSize; j++) {
457  Y1 += lumSrc[j][count * 2] * lumFilter[j];
458  Y2 += lumSrc[j][count * 2 + 1] * lumFilter[j];
459  }
460  for (j = 0; j < chrFilterSize; j++) {
461  U += chrUSrc[j][count] * chrFilter[j];
462  V += chrVSrc[j][count] * chrFilter[j];
463  }
464  Y1 >>= 19;
465  Y2 >>= 19;
466  U >>= 19;
467  V >>= 19;
468  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
469  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
470  c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
471  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
472 
473  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
474  r, g, b, y, target, 0);
475  }
476 }
477 
478 static void
479 yuv2rgb_2_template_lsx(SwsInternal *c, const int16_t *buf[2],
480  const int16_t *ubuf[2], const int16_t *vbuf[2],
481  const int16_t *abuf[2], uint8_t *dest, int dstW,
482  int yalpha, int uvalpha, int y,
483  enum AVPixelFormat target, int hasAlpha)
484 {
485  const int16_t *buf0 = buf[0], *buf1 = buf[1],
486  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
487  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
488  int yalpha1 = 4096 - yalpha;
489  int uvalpha1 = 4096 - uvalpha;
490  int i, count = 0;
491  int len = dstW - 7;
492  int len_count = (dstW + 1) >> 1;
493  const void *r, *g, *b;
494  int head = YUVRGB_TABLE_HEADROOM;
495  __m128i v_yalpha1 = __lsx_vreplgr2vr_w(yalpha1);
496  __m128i v_uvalpha1 = __lsx_vreplgr2vr_w(uvalpha1);
497  __m128i v_yalpha = __lsx_vreplgr2vr_w(yalpha);
498  __m128i v_uvalpha = __lsx_vreplgr2vr_w(uvalpha);
499  __m128i headroom = __lsx_vreplgr2vr_w(head);
500  __m128i zero = __lsx_vldi(0);
501 
502  for (i = 0; i < len; i += 8) {
503  int Y1, Y2, U, V;
504  int i_dex = i << 1;
505  int c_dex = count << 1;
506  __m128i y0_h, y0_l, y0, u0, v0;
507  __m128i y1_h, y1_l, y1, u1, v1;
508  __m128i y_l, y_h, u, v;
509 
510  DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
511  buf1, i_dex, y0, u0, v0, y1);
512  DUP2_ARG2(__lsx_vldx, ubuf1, c_dex, vbuf1, c_dex, u1, v1);
513  DUP2_ARG2(__lsx_vsllwil_w_h, y0, 0, y1, 0, y0_l, y1_l);
514  DUP2_ARG1(__lsx_vexth_w_h, y0, y1, y0_h, y1_h);
515  DUP4_ARG2(__lsx_vilvl_h, zero, u0, zero, u1, zero, v0, zero, v1,
516  u0, u1, v0, v1);
517  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
518  y0_h = __lsx_vmul_w(y0_h, v_yalpha1);
519  u0 = __lsx_vmul_w(u0, v_uvalpha1);
520  v0 = __lsx_vmul_w(v0, v_uvalpha1);
521  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
522  y_h = __lsx_vmadd_w(y0_h, v_yalpha, y1_h);
523  u = __lsx_vmadd_w(u0, v_uvalpha, u1);
524  v = __lsx_vmadd_w(v0, v_uvalpha, v1);
525  y_l = __lsx_vsrai_w(y_l, 19);
526  y_h = __lsx_vsrai_w(y_h, 19);
527  u = __lsx_vsrai_w(u, 19);
528  v = __lsx_vsrai_w(v, 19);
529  u = __lsx_vadd_w(u, headroom);
530  v = __lsx_vadd_w(v, headroom);
531  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
532  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
533  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 0, 1, 2, 2);
534  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 2, 3, 3, 3);
535  }
536  if (dstW - i >= 4) {
537  int Y1, Y2, U, V;
538  int i_dex = i << 1;
539  __m128i y0_l, y0, u0, v0;
540  __m128i y1_l, y1, u1, v1;
541  __m128i y_l, u, v;
542 
543  y0 = __lsx_vldx(buf0, i_dex);
544  u0 = __lsx_vldrepl_d((ubuf0 + count), 0);
545  v0 = __lsx_vldrepl_d((vbuf0 + count), 0);
546  y1 = __lsx_vldx(buf1, i_dex);
547  u1 = __lsx_vldrepl_d((ubuf1 + count), 0);
548  v1 = __lsx_vldrepl_d((vbuf1 + count), 0);
549  DUP2_ARG2(__lsx_vilvl_h, zero, y0, zero, y1, y0_l, y1_l);
550  DUP4_ARG2(__lsx_vilvl_h, zero, u0, zero, u1, zero, v0, zero, v1,
551  u0, u1, v0, v1);
552  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
553  u0 = __lsx_vmul_w(u0, v_uvalpha1);
554  v0 = __lsx_vmul_w(v0, v_uvalpha1);
555  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
556  u = __lsx_vmadd_w(u0, v_uvalpha, u1);
557  v = __lsx_vmadd_w(v0, v_uvalpha, v1);
558  y_l = __lsx_vsrai_w(y_l, 19);
559  u = __lsx_vsrai_w(u, 19);
560  v = __lsx_vsrai_w(v, 19);
561  u = __lsx_vadd_w(u, headroom);
562  v = __lsx_vadd_w(v, headroom);
563  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
564  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
565  i += 4;
566  }
567  for (; count < len_count; count++) {
568  int Y1 = (buf0[count * 2] * yalpha1 +
569  buf1[count * 2] * yalpha) >> 19;
570  int Y2 = (buf0[count * 2 + 1] * yalpha1 +
571  buf1[count * 2 + 1] * yalpha) >> 19;
572  int U = (ubuf0[count] * uvalpha1 + ubuf1[count] * uvalpha) >> 19;
573  int V = (vbuf0[count] * uvalpha1 + vbuf1[count] * uvalpha) >> 19;
574 
575  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
576  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
577  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
578  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
579 
580  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
581  r, g, b, y, target, 0);
582  }
583 }
584 
585 static void
586 yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
587  const int16_t *ubuf[2], const int16_t *vbuf[2],
588  const int16_t *abuf0, uint8_t *dest, int dstW,
589  int uvalpha, int y, enum AVPixelFormat target,
590  int hasAlpha)
591 {
592  const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
593  int i;
594  int len = (dstW - 7);
595  int len_count = (dstW + 1) >> 1;
596  const void *r, *g, *b;
597 
598  if (uvalpha < 2048) {
599  int count = 0;
600  int head = YUVRGB_TABLE_HEADROOM;
601  __m128i headroom = __lsx_vreplgr2vr_h(head);
602 
603  for (i = 0; i < len; i += 8) {
604  int Y1, Y2, U, V;
605  int i_dex = i << 1;
606  int c_dex = count << 1;
607  __m128i src_y, src_u, src_v;
608  __m128i u, v, uv, y_l, y_h;
609 
610  src_y = __lsx_vldx(buf0, i_dex);
611  DUP2_ARG2(__lsx_vldx, ubuf0, c_dex, vbuf0, c_dex, src_u, src_v);
612  src_y = __lsx_vsrari_h(src_y, 7);
613  src_u = __lsx_vsrari_h(src_u, 7);
614  src_v = __lsx_vsrari_h(src_v, 7);
615  y_l = __lsx_vsllwil_w_h(src_y, 0);
616  y_h = __lsx_vexth_w_h(src_y);
617  uv = __lsx_vilvl_h(src_v, src_u);
618  u = __lsx_vaddwev_w_h(uv, headroom);
619  v = __lsx_vaddwod_w_h(uv, headroom);
620  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
621  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
622  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 0, 1, 2, 2);
623  WRITE_YUV2RGB_LSX(y_h, y_h, u, v, 2, 3, 3, 3);
624  }
625  if (dstW - i >= 4){
626  int Y1, Y2, U, V;
627  int i_dex = i << 1;
628  __m128i src_y, src_u, src_v;
629  __m128i y_l, u, v, uv;
630 
631  src_y = __lsx_vldx(buf0, i_dex);
632  src_u = __lsx_vldrepl_d((ubuf0 + count), 0);
633  src_v = __lsx_vldrepl_d((vbuf0 + count), 0);
634  y_l = __lsx_vsrari_h(src_y, 7);
635  y_l = __lsx_vsllwil_w_h(y_l, 0);
636  uv = __lsx_vilvl_h(src_v, src_u);
637  uv = __lsx_vsrari_h(uv, 7);
638  u = __lsx_vaddwev_w_h(uv, headroom);
639  v = __lsx_vaddwod_w_h(uv, headroom);
640  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 0, 1, 0, 0);
641  WRITE_YUV2RGB_LSX(y_l, y_l, u, v, 2, 3, 1, 1);
642  i += 4;
643  }
644  for (; count < len_count; count++) {
645  int Y1 = (buf0[count * 2 ] + 64) >> 7;
646  int Y2 = (buf0[count * 2 + 1] + 64) >> 7;
647  int U = (ubuf0[count] + 64) >> 7;
648  int V = (vbuf0[count] + 64) >> 7;
649 
650  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
651  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
652  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
653  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
654 
655  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
656  r, g, b, y, target, 0);
657  }
658  } else {
659  const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
660  int count = 0;
661  int HEADROOM = YUVRGB_TABLE_HEADROOM;
662  __m128i headroom = __lsx_vreplgr2vr_w(HEADROOM);
663 
664  for (i = 0; i < len; i += 8) {
665  int Y1, Y2, U, V;
666  int i_dex = i << 1;
667  int c_dex = count << 1;
668  __m128i src_y, src_u0, src_v0, src_u1, src_v1;
669  __m128i y_l, y_h, u1, u2, v1, v2;
670 
671  DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
672  ubuf1, c_dex, src_y, src_u0, src_v0, src_u1);
673  src_v1 = __lsx_vldx(vbuf1, c_dex);
674  src_y = __lsx_vsrari_h(src_y, 7);
675  u1 = __lsx_vaddwev_w_h(src_u0, src_u1);
676  v1 = __lsx_vaddwod_w_h(src_u0, src_u1);
677  u2 = __lsx_vaddwev_w_h(src_v0, src_v1);
678  v2 = __lsx_vaddwod_w_h(src_v0, src_v1);
679  y_l = __lsx_vsllwil_w_h(src_y, 0);
680  y_h = __lsx_vexth_w_h(src_y);
681  u1 = __lsx_vsrari_w(u1, 8);
682  v1 = __lsx_vsrari_w(v1, 8);
683  u2 = __lsx_vsrari_w(u2, 8);
684  v2 = __lsx_vsrari_w(v2, 8);
685  u1 = __lsx_vadd_w(u1, headroom);
686  v1 = __lsx_vadd_w(v1, headroom);
687  u2 = __lsx_vadd_w(u2, headroom);
688  v2 = __lsx_vadd_w(v2, headroom);
689  WRITE_YUV2RGB_LSX(y_l, y_l, u1, v1, 0, 1, 0, 0);
690  WRITE_YUV2RGB_LSX(y_l, y_l, u2, v2, 2, 3, 0, 0);
691  WRITE_YUV2RGB_LSX(y_h, y_h, u1, v1, 0, 1, 1, 1);
692  WRITE_YUV2RGB_LSX(y_h, y_h, u2, v2, 2, 3, 1, 1);
693  }
694  if (dstW - i >= 4) {
695  int Y1, Y2, U, V;
696  int i_dex = i << 1;
697  __m128i src_y, src_u0, src_v0, src_u1, src_v1;
698  __m128i uv;
699 
700  src_y = __lsx_vldx(buf0, i_dex);
701  src_u0 = __lsx_vldrepl_d((ubuf0 + count), 0);
702  src_v0 = __lsx_vldrepl_d((vbuf0 + count), 0);
703  src_u1 = __lsx_vldrepl_d((ubuf1 + count), 0);
704  src_v1 = __lsx_vldrepl_d((vbuf1 + count), 0);
705 
706  src_u0 = __lsx_vilvl_h(src_u1, src_u0);
707  src_v0 = __lsx_vilvl_h(src_v1, src_v0);
708  src_y = __lsx_vsrari_h(src_y, 7);
709  src_y = __lsx_vsllwil_w_h(src_y, 0);
710  uv = __lsx_vilvl_h(src_v0, src_u0);
711  uv = __lsx_vhaddw_w_h(uv, uv);
712  uv = __lsx_vsrari_w(uv, 8);
713  uv = __lsx_vadd_w(uv, headroom);
714  WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 0, 1, 0, 1);
715  WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 2, 3, 2, 3);
716  i += 4;
717  }
718  for (; count < len_count; count++) {
719  int Y1 = (buf0[count * 2 ] + 64) >> 7;
720  int Y2 = (buf0[count * 2 + 1] + 64) >> 7;
721  int U = (ubuf0[count] + ubuf1[count] + 128) >> 8;
722  int V = (vbuf0[count] + vbuf1[count] + 128) >> 8;
723 
724  r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
725  g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] +
726  c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
727  b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
728 
729  yuv2rgb_write(dest, count, Y1, Y2, 0, 0,
730  r, g, b, y, target, 0);
731  }
732  }
733 }
734 
735 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
736 static void name ## ext ## _X_lsx(SwsInternal *c, const int16_t *lumFilter, \
737  const int16_t **lumSrc, int lumFilterSize, \
738  const int16_t *chrFilter, const int16_t **chrUSrc, \
739  const int16_t **chrVSrc, int chrFilterSize, \
740  const int16_t **alpSrc, uint8_t *dest, int dstW, \
741  int y) \
742 { \
743  name ## base ## _X_template_lsx(c, lumFilter, lumSrc, lumFilterSize, \
744  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
745  alpSrc, dest, dstW, y, fmt, hasAlpha); \
746 }
747 
748 #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
749 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
750 static void name ## ext ## _2_lsx(SwsInternal *c, const int16_t *buf[2], \
751  const int16_t *ubuf[2], const int16_t *vbuf[2], \
752  const int16_t *abuf[2], uint8_t *dest, int dstW, \
753  int yalpha, int uvalpha, int y) \
754 { \
755  name ## base ## _2_template_lsx(c, buf, ubuf, vbuf, abuf, dest, \
756  dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
757 }
758 
759 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
760 YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
761 static void name ## ext ## _1_lsx(SwsInternal *c, const int16_t *buf0, \
762  const int16_t *ubuf[2], const int16_t *vbuf[2], \
763  const int16_t *abuf0, uint8_t *dest, int dstW, \
764  int uvalpha, int y) \
765 { \
766  name ## base ## _1_template_lsx(c, buf0, ubuf, vbuf, abuf0, dest, \
767  dstW, uvalpha, y, fmt, hasAlpha); \
768 }
769 
770 #if CONFIG_SMALL
771 #else
772 #if CONFIG_SWSCALE_ALPHA
773 #endif
776 #endif
777 YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24, 0)
778 YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24, 0)
785 
786 // This function is copied from libswscale/output.c
788  uint8_t *dest, int i, int R, int A, int G, int B,
789  int y, enum AVPixelFormat target, int hasAlpha, int err[4])
790 {
791  int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
792 
793  if ((R | G | B) & 0xC0000000) {
794  R = av_clip_uintp2(R, 30);
795  G = av_clip_uintp2(G, 30);
796  B = av_clip_uintp2(B, 30);
797  }
798 
799  switch(target) {
800  case AV_PIX_FMT_ARGB:
801  dest[0] = hasAlpha ? A : 255;
802  dest[1] = R >> 22;
803  dest[2] = G >> 22;
804  dest[3] = B >> 22;
805  break;
806  case AV_PIX_FMT_RGB24:
807  dest[0] = R >> 22;
808  dest[1] = G >> 22;
809  dest[2] = B >> 22;
810  break;
811  case AV_PIX_FMT_RGBA:
812  dest[0] = R >> 22;
813  dest[1] = G >> 22;
814  dest[2] = B >> 22;
815  dest[3] = hasAlpha ? A : 255;
816  break;
817  case AV_PIX_FMT_ABGR:
818  dest[0] = hasAlpha ? A : 255;
819  dest[1] = B >> 22;
820  dest[2] = G >> 22;
821  dest[3] = R >> 22;
822  break;
823  case AV_PIX_FMT_BGR24:
824  dest[0] = B >> 22;
825  dest[1] = G >> 22;
826  dest[2] = R >> 22;
827  break;
828  case AV_PIX_FMT_BGRA:
829  dest[0] = B >> 22;
830  dest[1] = G >> 22;
831  dest[2] = R >> 22;
832  dest[3] = hasAlpha ? A : 255;
833  break;
836  case AV_PIX_FMT_BGR8:
837  case AV_PIX_FMT_RGB8:
838  {
839  int r,g,b;
840 
841  switch (c->dither) {
842  default:
843  case SWS_DITHER_AUTO:
844  case SWS_DITHER_ED:
845  R >>= 22;
846  G >>= 22;
847  B >>= 22;
848  R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
849  G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
850  B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
851  c->dither_error[0][i] = err[0];
852  c->dither_error[1][i] = err[1];
853  c->dither_error[2][i] = err[2];
854  r = R >> (isrgb8 ? 5 : 7);
855  g = G >> (isrgb8 ? 5 : 6);
856  b = B >> (isrgb8 ? 6 : 7);
857  r = av_clip(r, 0, isrgb8 ? 7 : 1);
858  g = av_clip(g, 0, isrgb8 ? 7 : 3);
859  b = av_clip(b, 0, isrgb8 ? 3 : 1);
860  err[0] = R - r*(isrgb8 ? 36 : 255);
861  err[1] = G - g*(isrgb8 ? 36 : 85);
862  err[2] = B - b*(isrgb8 ? 85 : 255);
863  break;
864  case SWS_DITHER_A_DITHER:
865  if (isrgb8) {
866  /* see http://pippin.gimp.org/a_dither/ for details/origin */
867 #define A_DITHER(u,v) (((((u)+((v)*236))*119)&0xff))
868  r = (((R >> 19) + A_DITHER(i,y) -96)>>8);
869  g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
870  b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
871  r = av_clip_uintp2(r, 3);
872  g = av_clip_uintp2(g, 3);
873  b = av_clip_uintp2(b, 2);
874  } else {
875  r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
876  g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
877  b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
878  r = av_clip_uintp2(r, 1);
879  g = av_clip_uintp2(g, 2);
880  b = av_clip_uintp2(b, 1);
881  }
882  break;
883  case SWS_DITHER_X_DITHER:
884  if (isrgb8) {
885  /* see http://pippin.gimp.org/a_dither/ for details/origin */
886 #define X_DITHER(u,v) (((((u)^((v)*237))*181)&0x1ff)/2)
887  r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
888  g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
889  b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
890  r = av_clip_uintp2(r, 3);
891  g = av_clip_uintp2(g, 3);
892  b = av_clip_uintp2(b, 2);
893  } else {
894  r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
895  g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
896  b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
897  r = av_clip_uintp2(r, 1);
898  g = av_clip_uintp2(g, 2);
899  b = av_clip_uintp2(b, 1);
900  }
901 
902  break;
903  }
904 
905  if(target == AV_PIX_FMT_BGR4_BYTE) {
906  dest[0] = r + 2*g + 8*b;
907  } else if(target == AV_PIX_FMT_RGB4_BYTE) {
908  dest[0] = b + 2*g + 8*r;
909  } else if(target == AV_PIX_FMT_BGR8) {
910  dest[0] = r + 8*g + 64*b;
911  } else if(target == AV_PIX_FMT_RGB8) {
912  dest[0] = b + 4*g + 32*r;
913  } else
914  av_assert2(0);
915  break; }
916  }
917 }
918 
919 #define YUVTORGB_SETUP_LSX \
920  int y_offset = c->yuv2rgb_y_offset; \
921  int y_coeff = c->yuv2rgb_y_coeff; \
922  int v2r_coe = c->yuv2rgb_v2r_coeff; \
923  int v2g_coe = c->yuv2rgb_v2g_coeff; \
924  int u2g_coe = c->yuv2rgb_u2g_coeff; \
925  int u2b_coe = c->yuv2rgb_u2b_coeff; \
926  __m128i offset = __lsx_vreplgr2vr_w(y_offset); \
927  __m128i coeff = __lsx_vreplgr2vr_w(y_coeff); \
928  __m128i v2r = __lsx_vreplgr2vr_w(v2r_coe); \
929  __m128i v2g = __lsx_vreplgr2vr_w(v2g_coe); \
930  __m128i u2g = __lsx_vreplgr2vr_w(u2g_coe); \
931  __m128i u2b = __lsx_vreplgr2vr_w(u2b_coe); \
932 
933 #define YUVTORGB_LSX(y, u, v, R, G, B, offset, coeff, \
934  y_temp, v2r, v2g, u2g, u2b) \
935 { \
936  y = __lsx_vsub_w(y, offset); \
937  y = __lsx_vmul_w(y, coeff); \
938  y = __lsx_vadd_w(y, y_temp); \
939  R = __lsx_vmadd_w(y, v, v2r); \
940  v = __lsx_vmadd_w(y, v, v2g); \
941  G = __lsx_vmadd_w(v, u, u2g); \
942  B = __lsx_vmadd_w(y, u, u2b); \
943 }
944 
945 #define WRITE_FULL_A_LSX(r, g, b, a, t1, s) \
946 { \
947  R = __lsx_vpickve2gr_w(r, t1); \
948  G = __lsx_vpickve2gr_w(g, t1); \
949  B = __lsx_vpickve2gr_w(b, t1); \
950  A = __lsx_vpickve2gr_w(a, t1); \
951  if (A & 0x100) \
952  A = av_clip_uint8(A); \
953  yuv2rgb_write_full(c, dest, i + s, R, A, G, B, y, target, hasAlpha, err);\
954  dest += step; \
955 }
956 
957 #define WRITE_FULL_LSX(r, g, b, t1, s) \
958 { \
959  R = __lsx_vpickve2gr_w(r, t1); \
960  G = __lsx_vpickve2gr_w(g, t1); \
961  B = __lsx_vpickve2gr_w(b, t1); \
962  yuv2rgb_write_full(c, dest, i + s, R, 0, G, B, y, target, hasAlpha, err); \
963  dest += step; \
964 }
965 
966 static void
967 yuv2rgb_full_X_template_lsx(SwsInternal *c, const int16_t *lumFilter,
968  const int16_t **lumSrc, int lumFilterSize,
969  const int16_t *chrFilter, const int16_t **chrUSrc,
970  const int16_t **chrVSrc, int chrFilterSize,
971  const int16_t **alpSrc, uint8_t *dest,
972  int dstW, int y, enum AVPixelFormat target,
973  int hasAlpha)
974 {
975  int i, j, B, G, R, A;
976  int step = (target == AV_PIX_FMT_RGB24 ||
977  target == AV_PIX_FMT_BGR24) ? 3 : 4;
978  int err[4] = {0};
979  int a_temp = 1 << 18;
980  int templ = 1 << 9;
981  int tempc = templ - (128 << 19);
982  int ytemp = 1 << 21;
983  int len = dstW - 7;
984  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
986 
987  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
988  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
989  step = 1;
990 
991  for (i = 0; i < len; i += 8) {
992  __m128i l_src, u_src, v_src;
993  __m128i y_ev, y_od, u_ev, u_od, v_ev, v_od, temp;
994  __m128i R_ev, R_od, G_ev, G_od, B_ev, B_od;
995  int n = i << 1;
996 
997  y_ev = y_od = __lsx_vreplgr2vr_w(templ);
998  u_ev = u_od = v_ev = v_od = __lsx_vreplgr2vr_w(tempc);
999  for (j = 0; j < lumFilterSize; j++) {
1000  temp = __lsx_vldrepl_h((lumFilter + j), 0);
1001  l_src = __lsx_vldx(lumSrc[j], n);
1002  y_ev = __lsx_vmaddwev_w_h(y_ev, l_src, temp);
1003  y_od = __lsx_vmaddwod_w_h(y_od, l_src, temp);
1004  }
1005  for (j = 0; j < chrFilterSize; j++) {
1006  temp = __lsx_vldrepl_h((chrFilter + j), 0);
1007  DUP2_ARG2(__lsx_vldx, chrUSrc[j], n, chrVSrc[j], n,
1008  u_src, v_src);
1009  DUP2_ARG3(__lsx_vmaddwev_w_h, u_ev, u_src, temp, v_ev,
1010  v_src, temp, u_ev, v_ev);
1011  DUP2_ARG3(__lsx_vmaddwod_w_h, u_od, u_src, temp, v_od,
1012  v_src, temp, u_od, v_od);
1013  }
1014  y_ev = __lsx_vsrai_w(y_ev, 10);
1015  y_od = __lsx_vsrai_w(y_od, 10);
1016  u_ev = __lsx_vsrai_w(u_ev, 10);
1017  u_od = __lsx_vsrai_w(u_od, 10);
1018  v_ev = __lsx_vsrai_w(v_ev, 10);
1019  v_od = __lsx_vsrai_w(v_od, 10);
1020  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1021  y_temp, v2r, v2g, u2g, u2b);
1022  YUVTORGB_LSX(y_od, u_od, v_od, R_od, G_od, B_od, offset, coeff,
1023  y_temp, v2r, v2g, u2g, u2b);
1024 
1025  if (hasAlpha) {
1026  __m128i a_src, a_ev, a_od;
1027 
1028  a_ev = a_od = __lsx_vreplgr2vr_w(a_temp);
1029  for (j = 0; j < lumFilterSize; j++) {
1030  temp = __lsx_vldrepl_h(lumFilter + j, 0);
1031  a_src = __lsx_vldx(alpSrc[j], n);
1032  a_ev = __lsx_vmaddwev_w_h(a_ev, a_src, temp);
1033  a_od = __lsx_vmaddwod_w_h(a_od, a_src, temp);
1034  }
1035  a_ev = __lsx_vsrai_w(a_ev, 19);
1036  a_od = __lsx_vsrai_w(a_od, 19);
1037  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1038  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 0, 1);
1039  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 2);
1040  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 1, 3);
1041  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 4);
1042  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 2, 5);
1043  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 6);
1044  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 3, 7);
1045  } else {
1046  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1047  WRITE_FULL_LSX(R_od, G_od, B_od, 0, 1);
1048  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 2);
1049  WRITE_FULL_LSX(R_od, G_od, B_od, 1, 3);
1050  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 4);
1051  WRITE_FULL_LSX(R_od, G_od, B_od, 2, 5);
1052  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 6);
1053  WRITE_FULL_LSX(R_od, G_od, B_od, 3, 7);
1054  }
1055  }
1056  if (dstW - i >= 4) {
1057  __m128i l_src, u_src, v_src;
1058  __m128i y_ev, u_ev, v_ev, uv, temp;
1059  __m128i R_ev, G_ev, B_ev;
1060  int n = i << 1;
1061 
1062  y_ev = __lsx_vreplgr2vr_w(templ);
1063  u_ev = v_ev = __lsx_vreplgr2vr_w(tempc);
1064  for (j = 0; j < lumFilterSize; j++) {
1065  temp = __lsx_vldrepl_h((lumFilter + j), 0);
1066  l_src = __lsx_vldx(lumSrc[j], n);
1067  l_src = __lsx_vilvl_h(l_src, l_src);
1068  y_ev = __lsx_vmaddwev_w_h(y_ev, l_src, temp);
1069  }
1070  for (j = 0; j < chrFilterSize; j++) {
1071  temp = __lsx_vldrepl_h((chrFilter + j), 0);
1072  DUP2_ARG2(__lsx_vldx, chrUSrc[j], n, chrVSrc[j], n, u_src, v_src);
1073  uv = __lsx_vilvl_h(v_src, u_src);
1074  u_ev = __lsx_vmaddwev_w_h(u_ev, uv, temp);
1075  v_ev = __lsx_vmaddwod_w_h(v_ev, uv, temp);
1076  }
1077  y_ev = __lsx_vsrai_w(y_ev, 10);
1078  u_ev = __lsx_vsrai_w(u_ev, 10);
1079  v_ev = __lsx_vsrai_w(v_ev, 10);
1080  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1081  y_temp, v2r, v2g, u2g, u2b);
1082 
1083  if (hasAlpha) {
1084  __m128i a_src, a_ev;
1085 
1086  a_ev = __lsx_vreplgr2vr_w(a_temp);
1087  for (j = 0; j < lumFilterSize; j++) {
1088  temp = __lsx_vldrepl_h(lumFilter + j, 0);
1089  a_src = __lsx_vldx(alpSrc[j], n);
1090  a_src = __lsx_vilvl_h(a_src, a_src);
1091  a_ev = __lsx_vmaddwev_w_h(a_ev, a_src, temp);
1092  }
1093  a_ev = __lsx_vsrai_w(a_ev, 19);
1094  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1095  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 1);
1096  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 2);
1097  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 3);
1098  } else {
1099  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1100  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 1);
1101  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 2);
1102  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 3);
1103  }
1104  i += 4;
1105  }
1106  for (; i < dstW; i++) {
1107  int Y = templ;
1108  int V, U = V = tempc;
1109 
1110  A = 0;
1111  for (j = 0; j < lumFilterSize; j++) {
1112  Y += lumSrc[j][i] * lumFilter[j];
1113  }
1114  for (j = 0; j < chrFilterSize; j++) {
1115  U += chrUSrc[j][i] * chrFilter[j];
1116  V += chrVSrc[j][i] * chrFilter[j];
1117 
1118  }
1119  Y >>= 10;
1120  U >>= 10;
1121  V >>= 10;
1122  if (hasAlpha) {
1123  A = 1 << 18;
1124  for (j = 0; j < lumFilterSize; j++) {
1125  A += alpSrc[j][i] * lumFilter[j];
1126  }
1127  A >>= 19;
1128  if (A & 0x100)
1129  A = av_clip_uint8(A);
1130  }
1131  Y -= y_offset;
1132  Y *= y_coeff;
1133  Y += ytemp;
1134  R = (unsigned)Y + V * v2r_coe;
1135  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1136  B = (unsigned)Y + U * u2b_coe;
1137  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1138  dest += step;
1139  }
1140  c->dither_error[0][i] = err[0];
1141  c->dither_error[1][i] = err[1];
1142  c->dither_error[2][i] = err[2];
1143 }
1144 
1145 static void
1147  const int16_t *ubuf[2], const int16_t *vbuf[2],
1148  const int16_t *abuf[2], uint8_t *dest, int dstW,
1149  int yalpha, int uvalpha, int y,
1150  enum AVPixelFormat target, int hasAlpha)
1151 {
1152  const int16_t *buf0 = buf[0], *buf1 = buf[1],
1153  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1154  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1155  *abuf0 = hasAlpha ? abuf[0] : NULL,
1156  *abuf1 = hasAlpha ? abuf[1] : NULL;
1157  int yalpha1 = 4096 - yalpha;
1158  int uvalpha1 = 4096 - uvalpha;
1159  int uvtemp = 128 << 19;
1160  int atemp = 1 << 18;
1161  int err[4] = {0};
1162  int ytemp = 1 << 21;
1163  int len = dstW - 7;
1164  int i, R, G, B, A;
1165  int step = (target == AV_PIX_FMT_RGB24 ||
1166  target == AV_PIX_FMT_BGR24) ? 3 : 4;
1167  __m128i v_uvalpha1 = __lsx_vreplgr2vr_w(uvalpha1);
1168  __m128i v_yalpha1 = __lsx_vreplgr2vr_w(yalpha1);
1169  __m128i v_uvalpha = __lsx_vreplgr2vr_w(uvalpha);
1170  __m128i v_yalpha = __lsx_vreplgr2vr_w(yalpha);
1171  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1172  __m128i a_bias = __lsx_vreplgr2vr_w(atemp);
1173  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
1175 
1176  av_assert2(yalpha <= 4096U);
1177  av_assert2(uvalpha <= 4096U);
1178 
1179  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
1180  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
1181  step = 1;
1182 
1183  for (i = 0; i < len; i += 8) {
1184  __m128i b0, b1, ub0, ub1, vb0, vb1;
1185  __m128i y0_l, y0_h, y1_l, y1_h, u0_l, u0_h;
1186  __m128i v0_l, v0_h, u1_l, u1_h, v1_l, v1_h;
1187  __m128i y_l, y_h, v_l, v_h, u_l, u_h;
1188  __m128i R_l, R_h, G_l, G_h, B_l, B_h;
1189  int n = i << 1;
1190 
1191  DUP4_ARG2(__lsx_vldx, buf0, n, buf1, n, ubuf0,
1192  n, ubuf1, n, b0, b1, ub0, ub1);
1193  DUP2_ARG2(__lsx_vldx, vbuf0, n, vbuf1, n, vb0 , vb1);
1194  DUP2_ARG2(__lsx_vsllwil_w_h, b0, 0, b1, 0, y0_l, y1_l);
1195  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, ub1, 0, vb0, 0, vb1, 0,
1196  u0_l, u1_l, v0_l, v1_l);
1197  DUP2_ARG1(__lsx_vexth_w_h, b0, b1, y0_h, y1_h);
1198  DUP4_ARG1(__lsx_vexth_w_h, ub0, ub1, vb0, vb1,
1199  u0_h, u1_h, v0_h, v1_h);
1200  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
1201  y0_h = __lsx_vmul_w(y0_h, v_yalpha1);
1202  u0_l = __lsx_vmul_w(u0_l, v_uvalpha1);
1203  u0_h = __lsx_vmul_w(u0_h, v_uvalpha1);
1204  v0_l = __lsx_vmul_w(v0_l, v_uvalpha1);
1205  v0_h = __lsx_vmul_w(v0_h, v_uvalpha1);
1206  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
1207  y_h = __lsx_vmadd_w(y0_h, v_yalpha, y1_h);
1208  u_l = __lsx_vmadd_w(u0_l, v_uvalpha, u1_l);
1209  u_h = __lsx_vmadd_w(u0_h, v_uvalpha, u1_h);
1210  v_l = __lsx_vmadd_w(v0_l, v_uvalpha, v1_l);
1211  v_h = __lsx_vmadd_w(v0_h, v_uvalpha, v1_h);
1212  u_l = __lsx_vsub_w(u_l, uv);
1213  u_h = __lsx_vsub_w(u_h, uv);
1214  v_l = __lsx_vsub_w(v_l, uv);
1215  v_h = __lsx_vsub_w(v_h, uv);
1216  y_l = __lsx_vsrai_w(y_l, 10);
1217  y_h = __lsx_vsrai_w(y_h, 10);
1218  u_l = __lsx_vsrai_w(u_l, 10);
1219  u_h = __lsx_vsrai_w(u_h, 10);
1220  v_l = __lsx_vsrai_w(v_l, 10);
1221  v_h = __lsx_vsrai_w(v_h, 10);
1222  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1223  y_temp, v2r, v2g, u2g, u2b);
1224  YUVTORGB_LSX(y_h, u_h, v_h, R_h, G_h, B_h, offset, coeff,
1225  y_temp, v2r, v2g, u2g, u2b);
1226 
1227  if (hasAlpha) {
1228  __m128i a0, a1, a0_l, a0_h;
1229  __m128i a_l, a_h, a1_l, a1_h;
1230 
1231  DUP2_ARG2(__lsx_vldx, abuf0, n, abuf1, n, a0, a1);
1232  DUP2_ARG2(__lsx_vsllwil_w_h, a0, 0, a1, 0, a0_l, a1_l);
1233  DUP2_ARG1(__lsx_vexth_w_h, a0, a1, a0_h, a1_h);
1234  a_l = __lsx_vmadd_w(a_bias, a0_l, v_yalpha1);
1235  a_h = __lsx_vmadd_w(a_bias, a0_h, v_yalpha1);
1236  a_l = __lsx_vmadd_w(a_l, v_yalpha, a1_l);
1237  a_h = __lsx_vmadd_w(a_h, v_yalpha, a1_h);
1238  a_l = __lsx_vsrai_w(a_l, 19);
1239  a_h = __lsx_vsrai_w(a_h, 19);
1240  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1241  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1242  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1243  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1244  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 0, 4);
1245  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 1, 5);
1246  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 2, 6);
1247  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 3, 7);
1248  } else {
1249  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1250  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1251  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1252  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1253  WRITE_FULL_LSX(R_h, G_h, B_h, 0, 4);
1254  WRITE_FULL_LSX(R_h, G_h, B_h, 1, 5);
1255  WRITE_FULL_LSX(R_h, G_h, B_h, 2, 6);
1256  WRITE_FULL_LSX(R_h, G_h, B_h, 3, 7);
1257  }
1258  }
1259  if (dstW - i >= 4) {
1260  __m128i b0, b1, ub0, ub1, vb0, vb1;
1261  __m128i y0_l, y1_l, u0_l;
1262  __m128i v0_l, u1_l, v1_l;
1263  __m128i y_l, u_l, v_l;
1264  __m128i R_l, G_l, B_l;
1265  int n = i << 1;
1266 
1267  DUP4_ARG2(__lsx_vldx, buf0, n, buf1, n, ubuf0, n,
1268  ubuf1, n, b0, b1, ub0, ub1);
1269  DUP2_ARG2(__lsx_vldx, vbuf0, n, vbuf1, n, vb0, vb1);
1270  DUP2_ARG2(__lsx_vsllwil_w_h, b0, 0, b1, 0, y0_l, y1_l);
1271  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, ub1, 0, vb0, 0, vb1, 0,
1272  u0_l, u1_l, v0_l, v1_l);
1273  y0_l = __lsx_vmul_w(y0_l, v_yalpha1);
1274  u0_l = __lsx_vmul_w(u0_l, v_uvalpha1);
1275  v0_l = __lsx_vmul_w(v0_l, v_uvalpha1);
1276  y_l = __lsx_vmadd_w(y0_l, v_yalpha, y1_l);
1277  u_l = __lsx_vmadd_w(u0_l, v_uvalpha, u1_l);
1278  v_l = __lsx_vmadd_w(v0_l, v_uvalpha, v1_l);
1279  u_l = __lsx_vsub_w(u_l, uv);
1280  v_l = __lsx_vsub_w(v_l, uv);
1281  y_l = __lsx_vsrai_w(y_l, 10);
1282  u_l = __lsx_vsrai_w(u_l, 10);
1283  v_l = __lsx_vsrai_w(v_l, 10);
1284  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1285  y_temp, v2r, v2g, u2g, u2b);
1286 
1287  if (hasAlpha) {
1288  __m128i a0, a1, a0_l;
1289  __m128i a_l, a1_l;
1290 
1291  DUP2_ARG2(__lsx_vldx, abuf0, n, abuf1, n, a0, a1);
1292  DUP2_ARG2(__lsx_vsllwil_w_h, a0, 0, a1, 0, a0_l, a1_l);
1293  a_l = __lsx_vmadd_w(a_bias, a0_l, v_yalpha1);
1294  a_l = __lsx_vmadd_w(a_l, v_yalpha, a1_l);
1295  a_l = __lsx_vsrai_w(a_l, 19);
1296  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1297  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1298  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1299  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1300  } else {
1301  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1302  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1303  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1304  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1305  }
1306  i += 4;
1307  }
1308  for (; i < dstW; i++){
1309  int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10;
1310  int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha- uvtemp) >> 10;
1311  int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha- uvtemp) >> 10;
1312 
1313  A = 0;
1314  if (hasAlpha){
1315  A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + atemp) >> 19;
1316  if (A & 0x100)
1317  A = av_clip_uint8(A);
1318  }
1319 
1320  Y -= y_offset;
1321  Y *= y_coeff;
1322  Y += ytemp;
1323  R = (unsigned)Y + V * v2r_coe;
1324  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1325  B = (unsigned)Y + U * u2b_coe;
1326  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1327  dest += step;
1328  }
1329  c->dither_error[0][i] = err[0];
1330  c->dither_error[1][i] = err[1];
1331  c->dither_error[2][i] = err[2];
1332 }
1333 
1334 static void
1336  const int16_t *ubuf[2], const int16_t *vbuf[2],
1337  const int16_t *abuf0, uint8_t *dest, int dstW,
1338  int uvalpha, int y, enum AVPixelFormat target,
1339  int hasAlpha)
1340 {
1341  const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1342  int i, B, G, R, A;
1343  int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
1344  int err[4] = {0};
1345  int ytemp = 1 << 21;
1346  int bias_int = 64;
1347  int len = dstW - 7;
1348  __m128i y_temp = __lsx_vreplgr2vr_w(ytemp);
1350 
1351  if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
1352  || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
1353  step = 1;
1354  if (uvalpha < 2048) {
1355  int uvtemp = 128 << 7;
1356  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1357  __m128i bias = __lsx_vreplgr2vr_w(bias_int);
1358 
1359  for (i = 0; i < len; i += 8) {
1360  __m128i b, ub, vb, ub_l, ub_h, vb_l, vb_h;
1361  __m128i y_l, y_h, u_l, u_h, v_l, v_h;
1362  __m128i R_l, R_h, G_l, G_h, B_l, B_h;
1363  int n = i << 1;
1364 
1365  DUP2_ARG2(__lsx_vldx, buf0, n, ubuf0, n, b, ub);
1366  vb = __lsx_vldx(vbuf0, n);
1367  y_l = __lsx_vsllwil_w_h(b, 2);
1368  y_h = __lsx_vexth_w_h(b);
1369  DUP2_ARG2(__lsx_vsllwil_w_h, ub, 0, vb, 0, ub_l, vb_l);
1370  DUP2_ARG1(__lsx_vexth_w_h, ub, vb, ub_h, vb_h);
1371  y_h = __lsx_vslli_w(y_h, 2);
1372  u_l = __lsx_vsub_w(ub_l, uv);
1373  u_h = __lsx_vsub_w(ub_h, uv);
1374  v_l = __lsx_vsub_w(vb_l, uv);
1375  v_h = __lsx_vsub_w(vb_h, uv);
1376  u_l = __lsx_vslli_w(u_l, 2);
1377  u_h = __lsx_vslli_w(u_h, 2);
1378  v_l = __lsx_vslli_w(v_l, 2);
1379  v_h = __lsx_vslli_w(v_h, 2);
1380  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1381  y_temp, v2r, v2g, u2g, u2b);
1382  YUVTORGB_LSX(y_h, u_h, v_h, R_h, G_h, B_h, offset, coeff,
1383  y_temp, v2r, v2g, u2g, u2b);
1384 
1385  if(hasAlpha) {
1386  __m128i a_src;
1387  __m128i a_l, a_h;
1388 
1389  a_src = __lsx_vld(abuf0 + i, 0);
1390  a_l = __lsx_vsllwil_w_h(a_src, 0);
1391  a_h = __lsx_vexth_w_h(a_src);
1392  a_l = __lsx_vadd_w(a_l, bias);
1393  a_h = __lsx_vadd_w(a_h, bias);
1394  a_l = __lsx_vsrai_w(a_l, 7);
1395  a_h = __lsx_vsrai_w(a_h, 7);
1396  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1397  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1398  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1399  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1400  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 0, 4);
1401  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 1, 5);
1402  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 2, 6);
1403  WRITE_FULL_A_LSX(R_h, G_h, B_h, a_h, 3, 7);
1404  } else {
1405  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1406  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1407  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1408  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1409  WRITE_FULL_LSX(R_h, G_h, B_h, 0, 4);
1410  WRITE_FULL_LSX(R_h, G_h, B_h, 1, 5);
1411  WRITE_FULL_LSX(R_h, G_h, B_h, 2, 6);
1412  WRITE_FULL_LSX(R_h, G_h, B_h, 3, 7);
1413  }
1414  }
1415  if (dstW - i >= 4) {
1416  __m128i b, ub, vb, ub_l, vb_l;
1417  __m128i y_l, u_l, v_l;
1418  __m128i R_l, G_l, B_l;
1419  int n = i << 1;
1420 
1421  DUP2_ARG2(__lsx_vldx, buf0, n, ubuf0, n, b, ub);
1422  vb = __lsx_vldx(vbuf0, n);
1423  y_l = __lsx_vsllwil_w_h(b, 0);
1424  DUP2_ARG2(__lsx_vsllwil_w_h, ub, 0, vb, 0, ub_l, vb_l);
1425  y_l = __lsx_vslli_w(y_l, 2);
1426  u_l = __lsx_vsub_w(ub_l, uv);
1427  v_l = __lsx_vsub_w(vb_l, uv);
1428  u_l = __lsx_vslli_w(u_l, 2);
1429  v_l = __lsx_vslli_w(v_l, 2);
1430  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1431  y_temp, v2r, v2g, u2g, u2b);
1432 
1433  if(hasAlpha) {
1434  __m128i a_src, a_l;
1435 
1436  a_src = __lsx_vldx(abuf0, n);
1437  a_src = __lsx_vsllwil_w_h(a_src, 0);
1438  a_l = __lsx_vadd_w(bias, a_src);
1439  a_l = __lsx_vsrai_w(a_l, 7);
1440  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1441  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1442  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1443  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1444  } else {
1445  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1446  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1447  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1448  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1449  }
1450  i += 4;
1451  }
1452  for (; i < dstW; i++) {
1453  int Y = buf0[i] << 2;
1454  int U = (ubuf0[i] - uvtemp) << 2;
1455  int V = (vbuf0[i] - uvtemp) << 2;
1456 
1457  A = 0;
1458  if(hasAlpha) {
1459  A = (abuf0[i] + 64) >> 7;
1460  if (A & 0x100)
1461  A = av_clip_uint8(A);
1462  }
1463  Y -= y_offset;
1464  Y *= y_coeff;
1465  Y += ytemp;
1466  R = (unsigned)Y + V * v2r_coe;
1467  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1468  B = (unsigned)Y + U * u2b_coe;
1469  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1470  dest += step;
1471  }
1472  } else {
1473  const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1474  int uvtemp = 128 << 8;
1475  __m128i uv = __lsx_vreplgr2vr_w(uvtemp);
1476  __m128i zero = __lsx_vldi(0);
1477  __m128i bias = __lsx_vreplgr2vr_h(bias_int);
1478 
1479  for (i = 0; i < len; i += 8) {
1480  __m128i b, ub0, ub1, vb0, vb1;
1481  __m128i y_ev, y_od, u_ev, u_od, v_ev, v_od;
1482  __m128i R_ev, R_od, G_ev, G_od, B_ev, B_od;
1483  int n = i << 1;
1484 
1485  DUP4_ARG2(__lsx_vldx, buf0, n, ubuf0, n, vbuf0, n,
1486  ubuf1, n, b, ub0, vb0, ub1);
1487  vb1 = __lsx_vldx(vbuf, n);
1488  y_ev = __lsx_vaddwev_w_h(b, zero);
1489  y_od = __lsx_vaddwod_w_h(b, zero);
1490  DUP2_ARG2(__lsx_vaddwev_w_h, ub0, vb0, ub1, vb1, u_ev, v_ev);
1491  DUP2_ARG2(__lsx_vaddwod_w_h, ub0, vb0, ub1, vb1, u_od, v_od);
1492  DUP2_ARG2(__lsx_vslli_w, y_ev, 2, y_od, 2, y_ev, y_od);
1493  DUP4_ARG2(__lsx_vsub_w, u_ev, uv, u_od, uv, v_ev, uv, v_od, uv,
1494  u_ev, u_od, v_ev, v_od);
1495  DUP4_ARG2(__lsx_vslli_w, u_ev, 1, u_od, 1, v_ev, 1, v_od, 1,
1496  u_ev, u_od, v_ev, v_od);
1497  YUVTORGB_LSX(y_ev, u_ev, v_ev, R_ev, G_ev, B_ev, offset, coeff,
1498  y_temp, v2r, v2g, u2g, u2b);
1499  YUVTORGB_LSX(y_od, u_od, v_od, R_od, G_od, B_od, offset, coeff,
1500  y_temp, v2r, v2g, u2g, u2b);
1501 
1502  if(hasAlpha) {
1503  __m128i a_src;
1504  __m128i a_ev, a_od;
1505 
1506  a_src = __lsx_vld(abuf0 + i, 0);
1507  a_ev = __lsx_vaddwev_w_h(bias, a_src);
1508  a_od = __lsx_vaddwod_w_h(bias, a_src);
1509  a_ev = __lsx_vsrai_w(a_ev, 7);
1510  a_od = __lsx_vsrai_w(a_od, 7);
1511  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 0, 0);
1512  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 0, 1);
1513  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 1, 2);
1514  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 1, 3);
1515  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 2, 4);
1516  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 2, 5);
1517  WRITE_FULL_A_LSX(R_ev, G_ev, B_ev, a_ev, 3, 6);
1518  WRITE_FULL_A_LSX(R_od, G_od, B_od, a_od, 3, 7);
1519  } else {
1520  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 0, 0);
1521  WRITE_FULL_LSX(R_od, G_od, B_od, 0, 1);
1522  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 1, 2);
1523  WRITE_FULL_LSX(R_od, G_od, B_od, 1, 3);
1524  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 2, 4);
1525  WRITE_FULL_LSX(R_od, G_od, B_od, 2, 5);
1526  WRITE_FULL_LSX(R_ev, G_ev, B_ev, 3, 6);
1527  WRITE_FULL_LSX(R_od, G_od, B_od, 3, 7);
1528  }
1529  }
1530  if (dstW - i >= 4) {
1531  __m128i b, ub0, ub1, vb0, vb1;
1532  __m128i y_l, u_l, v_l;
1533  __m128i R_l, G_l, B_l;
1534  int n = i << 1;
1535 
1536  DUP4_ARG2(__lsx_vldx, buf0, n, ubuf0, n, vbuf0, n,
1537  ubuf1, n, b, ub0, vb0, ub1);
1538  vb1 = __lsx_vldx(vbuf1, n);
1539  y_l = __lsx_vsllwil_w_h(b, 0);
1540  y_l = __lsx_vslli_w(y_l, 2);
1541  DUP4_ARG2(__lsx_vsllwil_w_h, ub0, 0, vb0, 0, ub1, 0, vb1, 0,
1542  ub0, vb0, ub1, vb1);
1543  DUP2_ARG2(__lsx_vadd_w, ub0, ub1, vb0, vb1, u_l, v_l);
1544  u_l = __lsx_vsub_w(u_l, uv);
1545  v_l = __lsx_vsub_w(v_l, uv);
1546  u_l = __lsx_vslli_w(u_l, 1);
1547  v_l = __lsx_vslli_w(v_l, 1);
1548  YUVTORGB_LSX(y_l, u_l, v_l, R_l, G_l, B_l, offset, coeff,
1549  y_temp, v2r, v2g, u2g, u2b);
1550 
1551  if(hasAlpha) {
1552  __m128i a_src;
1553  __m128i a_l;
1554 
1555  a_src = __lsx_vld(abuf0 + i, 0);
1556  a_src = __lsx_vilvl_h(a_src, a_src);
1557  a_l = __lsx_vaddwev_w_h(bias, a_l);
1558  a_l = __lsx_vsrai_w(a_l, 7);
1559  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 0, 0);
1560  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 1, 1);
1561  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 2, 2);
1562  WRITE_FULL_A_LSX(R_l, G_l, B_l, a_l, 3, 3);
1563  } else {
1564  WRITE_FULL_LSX(R_l, G_l, B_l, 0, 0);
1565  WRITE_FULL_LSX(R_l, G_l, B_l, 1, 1);
1566  WRITE_FULL_LSX(R_l, G_l, B_l, 2, 2);
1567  WRITE_FULL_LSX(R_l, G_l, B_l, 3, 3);
1568  }
1569  i += 4;
1570  }
1571  for (; i < dstW; i++) {
1572  int Y = buf0[i] << 2;
1573  int U = (ubuf0[i] + ubuf1[i] - uvtemp) << 1;
1574  int V = (vbuf0[i] + vbuf1[i] - uvtemp) << 1;
1575 
1576  A = 0;
1577  if(hasAlpha) {
1578  A = (abuf0[i] + 64) >> 7;
1579  if (A & 0x100)
1580  A = av_clip_uint8(A);
1581  }
1582  Y -= y_offset;
1583  Y *= y_coeff;
1584  Y += ytemp;
1585  R = (unsigned)Y + V * v2r_coe;
1586  G = (unsigned)Y + V * v2g_coe + U * u2g_coe;
1587  B = (unsigned)Y + U * u2b_coe;
1588  yuv2rgb_write_full(c, dest, i, R, A, G, B, y, target, hasAlpha, err);
1589  dest += step;
1590  }
1591  }
1592  c->dither_error[0][i] = err[0];
1593  c->dither_error[1][i] = err[1];
1594  c->dither_error[2][i] = err[2];
1595 }
1596 
1597 #if CONFIG_SMALL
1598 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,
1599  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1600 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,
1601  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1602 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,
1603  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1604 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,
1605  CONFIG_SWSCALE_ALPHA && c->needAlpha)
1606 #else
1607 #if CONFIG_SWSCALE_ALPHA
1608 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1)
1609 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1)
1610 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1)
1611 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1)
1612 #endif
1613 YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0)
1614 YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0)
1615 YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0)
1616 YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0)
1617 #endif
1618 YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0)
1619 YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0)
1620 
1621 YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0)
1622 YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0)
1623 YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0)
1624 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0)
1625 
1626 
1628  yuv2planar1_fn *yuv2plane1,
1630  yuv2interleavedX_fn *yuv2nv12cX,
1631  yuv2packed1_fn *yuv2packed1,
1632  yuv2packed2_fn *yuv2packed2,
1633  yuv2packedX_fn *yuv2packedX,
1634  yuv2anyX_fn *yuv2anyX)
1635 {
1636  enum AVPixelFormat dstFormat = c->dstFormat;
1637 
1638  /* Add initialization once optimized */
1639  if (isSemiPlanarYUV(dstFormat) && isDataInHighBits(dstFormat)) {
1640  } else if (is16BPS(dstFormat)) {
1641  } else if (isNBPS(dstFormat)) {
1642  } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
1643  } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
1644  } else {
1645  *yuv2plane1 = yuv2plane1_8_lsx;
1647  }
1648 
1649  if(c->flags & SWS_FULL_CHR_H_INT) {
1650  switch (c->dstFormat) {
1651  case AV_PIX_FMT_RGBA:
1652 #if CONFIG_SMALL
1653  c->yuv2packedX = yuv2rgba32_full_X_lsx;
1654  c->yuv2packed2 = yuv2rgba32_full_2_lsx;
1655  c->yuv2packed1 = yuv2rgba32_full_1_lsx;
1656 #else
1657 #if CONFIG_SWSCALE_ALPHA
1658  if (c->needAlpha) {
1659  c->yuv2packedX = yuv2rgba32_full_X_lsx;
1660  c->yuv2packed2 = yuv2rgba32_full_2_lsx;
1661  c->yuv2packed1 = yuv2rgba32_full_1_lsx;
1662  } else
1663 #endif /* CONFIG_SWSCALE_ALPHA */
1664  {
1665  c->yuv2packedX = yuv2rgbx32_full_X_lsx;
1666  c->yuv2packed2 = yuv2rgbx32_full_2_lsx;
1667  c->yuv2packed1 = yuv2rgbx32_full_1_lsx;
1668  }
1669 #endif /* !CONFIG_SMALL */
1670  break;
1671  case AV_PIX_FMT_ARGB:
1672 #if CONFIG_SMALL
1673  c->yuv2packedX = yuv2argb32_full_X_lsx;
1674  c->yuv2packed2 = yuv2argb32_full_2_lsx;
1675  c->yuv2packed1 = yuv2argb32_full_1_lsx;
1676 #else
1677 #if CONFIG_SWSCALE_ALPHA
1678  if (c->needAlpha) {
1679  c->yuv2packedX = yuv2argb32_full_X_lsx;
1680  c->yuv2packed2 = yuv2argb32_full_2_lsx;
1681  c->yuv2packed1 = yuv2argb32_full_1_lsx;
1682  } else
1683 #endif /* CONFIG_SWSCALE_ALPHA */
1684  {
1685  c->yuv2packedX = yuv2xrgb32_full_X_lsx;
1686  c->yuv2packed2 = yuv2xrgb32_full_2_lsx;
1687  c->yuv2packed1 = yuv2xrgb32_full_1_lsx;
1688  }
1689 #endif /* !CONFIG_SMALL */
1690  break;
1691  case AV_PIX_FMT_BGRA:
1692 #if CONFIG_SMALL
1693  c->yuv2packedX = yuv2bgra32_full_X_lsx;
1694  c->yuv2packed2 = yuv2bgra32_full_2_lsx;
1695  c->yuv2packed1 = yuv2bgra32_full_1_lsx;
1696 #else
1697 #if CONFIG_SWSCALE_ALPHA
1698  if (c->needAlpha) {
1699  c->yuv2packedX = yuv2bgra32_full_X_lsx;
1700  c->yuv2packed2 = yuv2bgra32_full_2_lsx;
1701  c->yuv2packed1 = yuv2bgra32_full_1_lsx;
1702  } else
1703 #endif /* CONFIG_SWSCALE_ALPHA */
1704  {
1705  c->yuv2packedX = yuv2bgrx32_full_X_lsx;
1706  c->yuv2packed2 = yuv2bgrx32_full_2_lsx;
1707  c->yuv2packed1 = yuv2bgrx32_full_1_lsx;
1708  }
1709 #endif /* !CONFIG_SMALL */
1710  break;
1711  case AV_PIX_FMT_ABGR:
1712 #if CONFIG_SMALL
1713  c->yuv2packedX = yuv2abgr32_full_X_lsx;
1714  c->yuv2packed2 = yuv2abgr32_full_2_lsx;
1715  c->yuv2packed1 = yuv2abgr32_full_1_lsx;
1716 #else
1717 #if CONFIG_SWSCALE_ALPHA
1718  if (c->needAlpha) {
1719  c->yuv2packedX = yuv2abgr32_full_X_lsx;
1720  c->yuv2packed2 = yuv2abgr32_full_2_lsx;
1721  c->yuv2packed1 = yuv2abgr32_full_1_lsx;
1722  } else
1723 #endif /* CONFIG_SWSCALE_ALPHA */
1724  {
1725  c->yuv2packedX = yuv2xbgr32_full_X_lsx;
1726  c->yuv2packed2 = yuv2xbgr32_full_2_lsx;
1727  c->yuv2packed1 = yuv2xbgr32_full_1_lsx;
1728  }
1729 #endif /* !CONFIG_SMALL */
1730  break;
1731  case AV_PIX_FMT_RGB24:
1732  c->yuv2packedX = yuv2rgb24_full_X_lsx;
1733  c->yuv2packed2 = yuv2rgb24_full_2_lsx;
1734  c->yuv2packed1 = yuv2rgb24_full_1_lsx;
1735  break;
1736  case AV_PIX_FMT_BGR24:
1737  c->yuv2packedX = yuv2bgr24_full_X_lsx;
1738  c->yuv2packed2 = yuv2bgr24_full_2_lsx;
1739  c->yuv2packed1 = yuv2bgr24_full_1_lsx;
1740  break;
1741  case AV_PIX_FMT_BGR4_BYTE:
1742  c->yuv2packedX = yuv2bgr4_byte_full_X_lsx;
1743  c->yuv2packed2 = yuv2bgr4_byte_full_2_lsx;
1744  c->yuv2packed1 = yuv2bgr4_byte_full_1_lsx;
1745  break;
1746  case AV_PIX_FMT_RGB4_BYTE:
1747  c->yuv2packedX = yuv2rgb4_byte_full_X_lsx;
1748  c->yuv2packed2 = yuv2rgb4_byte_full_2_lsx;
1749  c->yuv2packed1 = yuv2rgb4_byte_full_1_lsx;
1750  break;
1751  case AV_PIX_FMT_BGR8:
1752  c->yuv2packedX = yuv2bgr8_full_X_lsx;
1753  c->yuv2packed2 = yuv2bgr8_full_2_lsx;
1754  c->yuv2packed1 = yuv2bgr8_full_1_lsx;
1755  break;
1756  case AV_PIX_FMT_RGB8:
1757  c->yuv2packedX = yuv2rgb8_full_X_lsx;
1758  c->yuv2packed2 = yuv2rgb8_full_2_lsx;
1759  c->yuv2packed1 = yuv2rgb8_full_1_lsx;
1760  break;
1761  }
1762  } else {
1763  switch (c->dstFormat) {
1764  case AV_PIX_FMT_RGB32:
1765  case AV_PIX_FMT_BGR32:
1766 #if CONFIG_SMALL
1767 #else
1768 #if CONFIG_SWSCALE_ALPHA
1769  if (c->needAlpha) {
1770  } else
1771 #endif /* CONFIG_SWSCALE_ALPHA */
1772  {
1773  c->yuv2packed1 = yuv2rgbx32_1_lsx;
1774  c->yuv2packed2 = yuv2rgbx32_2_lsx;
1775  c->yuv2packedX = yuv2rgbx32_X_lsx;
1776  }
1777 #endif /* !CONFIG_SMALL */
1778  break;
1779  case AV_PIX_FMT_RGB32_1:
1780  case AV_PIX_FMT_BGR32_1:
1781 #if CONFIG_SMALL
1782 #else
1783 #if CONFIG_SWSCALE_ALPHA
1784  if (c->needAlpha) {
1785  } else
1786 #endif /* CONFIG_SWSCALE_ALPHA */
1787  {
1788  c->yuv2packed1 = yuv2rgbx32_1_1_lsx;
1789  c->yuv2packed2 = yuv2rgbx32_1_2_lsx;
1790  c->yuv2packedX = yuv2rgbx32_1_X_lsx;
1791  }
1792 #endif /* !CONFIG_SMALL */
1793  break;
1794  case AV_PIX_FMT_RGB24:
1795  c->yuv2packed1 = yuv2rgb24_1_lsx;
1796  c->yuv2packed2 = yuv2rgb24_2_lsx;
1797  c->yuv2packedX = yuv2rgb24_X_lsx;
1798  break;
1799  case AV_PIX_FMT_BGR24:
1800  c->yuv2packed1 = yuv2bgr24_1_lsx;
1801  c->yuv2packed2 = yuv2bgr24_2_lsx;
1802  c->yuv2packedX = yuv2bgr24_X_lsx;
1803  break;
1804  case AV_PIX_FMT_RGB565LE:
1805  case AV_PIX_FMT_RGB565BE:
1806  case AV_PIX_FMT_BGR565LE:
1807  case AV_PIX_FMT_BGR565BE:
1808  c->yuv2packed1 = yuv2rgb16_1_lsx;
1809  c->yuv2packed2 = yuv2rgb16_2_lsx;
1810  c->yuv2packedX = yuv2rgb16_X_lsx;
1811  break;
1812  case AV_PIX_FMT_RGB555LE:
1813  case AV_PIX_FMT_RGB555BE:
1814  case AV_PIX_FMT_BGR555LE:
1815  case AV_PIX_FMT_BGR555BE:
1816  c->yuv2packed1 = yuv2rgb15_1_lsx;
1817  c->yuv2packed2 = yuv2rgb15_2_lsx;
1818  c->yuv2packedX = yuv2rgb15_X_lsx;
1819  break;
1820  case AV_PIX_FMT_RGB444LE:
1821  case AV_PIX_FMT_RGB444BE:
1822  case AV_PIX_FMT_BGR444LE:
1823  case AV_PIX_FMT_BGR444BE:
1824  c->yuv2packed1 = yuv2rgb12_1_lsx;
1825  c->yuv2packed2 = yuv2rgb12_2_lsx;
1826  c->yuv2packedX = yuv2rgb12_X_lsx;
1827  break;
1828  case AV_PIX_FMT_RGB8:
1829  case AV_PIX_FMT_BGR8:
1830  c->yuv2packed1 = yuv2rgb8_1_lsx;
1831  c->yuv2packed2 = yuv2rgb8_2_lsx;
1832  c->yuv2packedX = yuv2rgb8_X_lsx;
1833  break;
1834  case AV_PIX_FMT_RGB4:
1835  case AV_PIX_FMT_BGR4:
1836  c->yuv2packed1 = yuv2rgb4_1_lsx;
1837  c->yuv2packed2 = yuv2rgb4_2_lsx;
1838  c->yuv2packedX = yuv2rgb4_X_lsx;
1839  break;
1840  case AV_PIX_FMT_RGB4_BYTE:
1841  case AV_PIX_FMT_BGR4_BYTE:
1842  c->yuv2packed1 = yuv2rgb4b_1_lsx;
1843  c->yuv2packed2 = yuv2rgb4b_2_lsx;
1844  c->yuv2packedX = yuv2rgb4b_X_lsx;
1845  break;
1846  }
1847  }
1848 }
A
#define A(x)
Definition: vpx_arith.h:28
yuv2planar1_fn
void(* yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output without any additional vertical scaling (...
Definition: swscale_internal.h:121
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
av_clip
#define av_clip
Definition: common.h:100
ff_dither_4x4_16
const uint8_t ff_dither_4x4_16[][8]
Definition: output.c:51
r
const char * r
Definition: vf_curves.c:127
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:477
AV_PIX_FMT_RGB444LE
@ AV_PIX_FMT_RGB444LE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:136
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
ff_dither_8x8_32
const uint8_t ff_dither_8x8_32[][8]
Definition: output.c:59
av_clip_uintp2
#define av_clip_uintp2
Definition: common.h:124
WRITE_FULL_A_LSX
#define WRITE_FULL_A_LSX(r, g, b, a, t1, s)
Definition: output_lsx.c:945
A_DITHER
#define A_DITHER(u, v)
SWS_DITHER_A_DITHER
@ SWS_DITHER_A_DITHER
Definition: swscale_internal.h:80
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
b
#define b
Definition: input.c:41
yuv2planeX
static void FUNC() yuv2planeX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Definition: swscale_ppc_template.c:84
R
#define R
Definition: huffyuv.h:44
AV_PIX_FMT_RGB32_1
#define AV_PIX_FMT_RGB32_1
Definition: pixfmt.h:476
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
yuv2rgb_full_2_template_lsx
static void yuv2rgb_full_2_template_lsx(SwsInternal *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:1146
DUP2_ARG2
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:58
A2
@ A2
Definition: mvs.c:525
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
YUVTORGB_SETUP_LSX
#define YUVTORGB_SETUP_LSX
Definition: output_lsx.c:919
AV_PIX_FMT_RGB555BE
@ AV_PIX_FMT_RGB555BE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:114
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:729
rgb
Definition: rpzaenc.c:60
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2034
ub
#define ub(width, name)
Definition: cbs_h2645.c:401
yuv2rgb_2_template_lsx
static void yuv2rgb_2_template_lsx(SwsInternal *c, const int16_t *buf[2], const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:479
swscale_loongarch.h
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:743
AV_PIX_FMT_BGR8
@ AV_PIX_FMT_BGR8
packed RGB 3:3:2, 8bpp, (msb)2B 3G 3R(lsb)
Definition: pixfmt.h:90
av_cold
#define av_cold
Definition: attributes.h:90
r_b
#define r_b
YUVRGB_TABLE_HEADROOM
#define YUVRGB_TABLE_HEADROOM
Definition: swscale_internal.h:45
DUP4_ARG2
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:76
yuv2packed2_fn
void(* yuv2packed2_fn)(SwsInternal *c, const int16_t *lumSrc[2], const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc[2], uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing bilinear scalin...
Definition: swscale_internal.h:227
SWS_DITHER_ED
@ SWS_DITHER_ED
Definition: swscale_internal.h:79
YUVTORGB_LSX
#define YUVTORGB_LSX(y, u, v, R, G, B, offset, coeff, y_temp, v2r, v2g, u2g, u2b)
Definition: output_lsx.c:933
g
const char * g
Definition: vf_curves.c:128
B
#define B
Definition: huffyuv.h:42
ff_dither_2x2_4
const uint8_t ff_dither_2x2_4[][8]
Definition: output.c:39
ff_dither_8x8_220
const uint8_t ff_dither_8x8_220[][8]
Definition: output.c:84
AV_PIX_FMT_RGB4
@ AV_PIX_FMT_RGB4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1R 2G 1B(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:94
AV_PIX_FMT_BGR32_1
#define AV_PIX_FMT_BGR32_1
Definition: pixfmt.h:478
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:775
AV_PIX_FMT_RGB565LE
@ AV_PIX_FMT_RGB565LE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), little-endian
Definition: pixfmt.h:113
yuv2plane1_8_lsx
void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
NULL
#define NULL
Definition: coverity.c:32
X_DITHER
#define X_DITHER(u, v)
bias
static int bias(int x, int c)
Definition: vqcdec.c:115
yuv2rgb_1_template_lsx
static void yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:586
V
#define V
Definition: avdct.c:31
AV_PIX_FMT_BGR565LE
@ AV_PIX_FMT_BGR565LE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), little-endian
Definition: pixfmt.h:118
yuv2rgb_write_full
static av_always_inline void yuv2rgb_write_full(SwsInternal *c, uint8_t *dest, int i, int R, int A, int G, int B, int y, enum AVPixelFormat target, int hasAlpha, int err[4])
Definition: output_lsx.c:787
YUV2RGBWRAPPER
#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha)
Definition: output_lsx.c:759
AV_PIX_FMT_RGB8
@ AV_PIX_FMT_RGB8
packed RGB 3:3:2, 8bpp, (msb)3R 3G 2B(lsb)
Definition: pixfmt.h:93
AV_PIX_FMT_BGR4
@ AV_PIX_FMT_BGR4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1B 2G 1R(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:91
b_r
#define b_r
ff_sws_init_output_lsx
av_cold void ff_sws_init_output_lsx(SwsInternal *c, yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX, yuv2interleavedX_fn *yuv2nv12cX, yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2, yuv2packedX_fn *yuv2packedX, yuv2anyX_fn *yuv2anyX)
Definition: output_lsx.c:1627
AV_PIX_FMT_BGR555BE
@ AV_PIX_FMT_BGR555BE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:119
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_PIX_FMT_BGR4_BYTE
@ AV_PIX_FMT_BGR4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1B 2G 1R(lsb)
Definition: pixfmt.h:92
isDataInHighBits
static av_always_inline int isDataInHighBits(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:938
yuv2packedX_fn
void(* yuv2packedX_fn)(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output by doing multi-point ver...
Definition: swscale_internal.h:259
DUP4_ARG1
#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:70
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
DUP2_ARG1
#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:52
A1
@ A1
Definition: mvs.c:524
AV_PIX_FMT_RGB444BE
@ AV_PIX_FMT_RGB444BE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:137
WRITE_FULL_LSX
#define WRITE_FULL_LSX(r, g, b, t1, s)
Definition: output_lsx.c:957
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Perform full chroma upsampling when upscaling to RGB.
Definition: swscale.h:182
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:495
DUP2_ARG3
#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:64
yuv2planeX_8_lsx
void yuv2planeX_8_lsx(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
AV_PIX_FMT_BGR444BE
@ AV_PIX_FMT_BGR444BE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:139
WRITE_YUV2RGB_LSX
#define WRITE_YUV2RGB_LSX(vec_y1, vec_y2, vec_u, vec_v, t1, t2, t3, t4)
Definition: output_lsx.c:136
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:475
a0
static double a0(void *priv, double x, double y)
Definition: vf_xfade.c:2028
AV_PIX_FMT_BGR565BE
@ AV_PIX_FMT_BGR565BE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), big-endian
Definition: pixfmt.h:117
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
ff_dither_8x8_73
const uint8_t ff_dither_8x8_73[][8]
Definition: output.c:71
zero
static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:121
Y
#define Y
Definition: boxblur.h:37
yuv2anyX_fn
void(* yuv2anyX_fn)(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t **dest, int dstW, int y)
Write one line of horizontally scaled Y/U/V/A to YUV/RGB output by doing multi-point vertical scaling...
Definition: swscale_internal.h:293
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
AV_PIX_FMT_RGB555LE
@ AV_PIX_FMT_RGB555LE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:115
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
yuv2rgb_full_1_template_lsx
static void yuv2rgb_full_1_template_lsx(SwsInternal *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], const int16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:1335
AV_PIX_FMT_BGR444
#define AV_PIX_FMT_BGR444
Definition: pixfmt.h:496
yuv2rgb_X_template_lsx
static void yuv2rgb_X_template_lsx(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:151
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:490
av_always_inline
#define av_always_inline
Definition: attributes.h:49
yuv2interleavedX_fn
void(* yuv2interleavedX_fn)(enum AVPixelFormat dstFormat, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, uint8_t *dest, int dstW)
Write one line of horizontally scaled chroma to interleaved output with multi-point vertical scaling ...
Definition: swscale_internal.h:157
len
int len
Definition: vorbis_enc_data.h:426
AV_PIX_FMT_BGR565
#define AV_PIX_FMT_BGR565
Definition: pixfmt.h:494
AV_PIX_FMT_RGB4_BYTE
@ AV_PIX_FMT_RGB4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1R 2G 1B(lsb)
Definition: pixfmt.h:95
headroom
static int headroom(int *la)
Definition: nellymoser.c:106
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:489
yuv2rgb_full_X_template_lsx
static void yuv2rgb_full_X_template_lsx(SwsInternal *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, int dstW, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:967
yuv2packed1_fn
void(* yuv2packed1_fn)(SwsInternal *c, const int16_t *lumSrc, const int16_t *chrUSrc[2], const int16_t *chrVSrc[2], const int16_t *alpSrc, uint8_t *dest, int dstW, int uvalpha, int y)
Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB output without any additional v...
Definition: swscale_internal.h:194
SwsInternal
Definition: swscale_internal.h:330
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
U
#define U(x)
Definition: vpx_arith.h:37
yuv2planarX_fn
void(* yuv2planarX_fn)(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
Write one line of horizontally scaled data to planar output with multi-point vertical scaling between...
Definition: swscale_internal.h:137
temp
else temp
Definition: vf_mcdeint.c:263
av_clip_uint8
#define av_clip_uint8
Definition: common.h:106
G
#define G
Definition: huffyuv.h:43
AV_PIX_FMT_RGB565BE
@ AV_PIX_FMT_RGB565BE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), big-endian
Definition: pixfmt.h:112
loongson_intrinsics.h
AV_PIX_FMT_BGR555LE
@ AV_PIX_FMT_BGR555LE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:120
yuv2rgb_write
static av_always_inline void yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, unsigned A1, unsigned A2, const void *_r, const void *_g, const void *_b, int y, enum AVPixelFormat target, int hasAlpha)
Definition: output_lsx.c:28
SWS_DITHER_AUTO
@ SWS_DITHER_AUTO
Definition: swscale_internal.h:77
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
b0
static double b0(void *priv, double x, double y)
Definition: vf_xfade.c:2033
a1
static double a1(void *priv, double x, double y)
Definition: vf_xfade.c:2029
d128
const uint8_t * d128
Definition: yuv2rgb.c:458
SWS_DITHER_X_DITHER
@ SWS_DITHER_X_DITHER
Definition: swscale_internal.h:81
AV_PIX_FMT_BGR444LE
@ AV_PIX_FMT_BGR444LE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:138
yuv2rgb
static void yuv2rgb(uint8_t *out, int ridx, int Y, int U, int V)
Definition: g2meet.c:263
ff_dither_2x2_8
const uint8_t ff_dither_2x2_8[][8]
Definition: output.c:45
AV_PIX_FMT_RGB444
#define AV_PIX_FMT_RGB444
Definition: pixfmt.h:491