FFmpeg
vc1dsp_lasx.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Loongson Technology Corporation Limited
3  * Contributed by Hao Chen <chenhao@loongson.cn>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "vc1dsp_loongarch.h"
24 
26 {
27  int32_t con_4 = 4;
28  int32_t con_64 = 64;
29  __m256i in0, in1, in2, in3;
30  __m256i temp0, temp1, temp2, temp3, t1, t2, t3, t4, t5, t6, t7, t8;
31  __m256i const_1 = {0x000c000c000c000c, 0x000c000c000c000c,
32  0x000c000c000c000c, 0x000c000c000c000c};
33  __m256i const_2 = {0xfff4000cfff4000c, 0xfff4000cfff4000c,
34  0xfff4000cfff4000c, 0xfff4000cfff4000c};
35  __m256i const_3 = {0x0006001000060010, 0x0006001000060010,
36  0x0006001000060010, 0x0006001000060010};
37  __m256i const_4 = {0xfff00006fff00006, 0xfff00006fff00006,
38  0xfff00006fff00006, 0xfff00006fff00006};
39  __m256i const_5 = {0x000f0010000f0010, 0x000f0010000f0010,
40  0x000f0010000f0010, 0x000f0010000f0010};
41  __m256i const_6 = {0x0004000900040009, 0x0004000900040009,
42  0x0004000900040009, 0x0004000900040009};
43  __m256i const_7 = {0xfffc000ffffc000f, 0xfffc000ffffc000f,
44  0xfffc000ffffc000f, 0xfffc000ffffc000f};
45  __m256i const_8 = {0xfff7fff0fff7fff0, 0xfff7fff0fff7fff0,
46  0xfff7fff0fff7fff0, 0xfff7fff0fff7fff0};
47  __m256i const_9 = {0xfff00009fff00009, 0xfff00009fff00009,
48  0xfff00009fff00009, 0xfff00009fff00009};
49  __m256i const_10 = {0x000f0004000f0004, 0x000f0004000f0004,
50  0x000f0004000f0004, 0x000f0004000f0004};
51  __m256i const_11 = {0xfff70004fff70004, 0xfff70004fff70004,
52  0xfff70004fff70004, 0xfff70004fff70004};
53  __m256i const_12 = {0xfff0000ffff0000f, 0xfff0000ffff0000f,
54  0xfff0000ffff0000f, 0xfff0000ffff0000f};
55 
56  DUP4_ARG2(__lasx_xvld, block, 0, block, 32, block, 64, block, 96,
57  in0, in1, in2, in3);
58  DUP4_ARG2(__lasx_xvpermi_d, in0, 0xD8, in1, 0xD8, in2, 0xD8, in3, 0xD8,
59  in0, in1, in2, in3);
60  /* first loops */
61  DUP2_ARG2(__lasx_xvilvl_h, in2, in0, in3, in1, temp0, temp1);
62  t2 = __lasx_xvreplgr2vr_w(con_4);
63  DUP2_ARG3(__lasx_xvdp2add_w_h, t2, temp0, const_1, t2, temp0,
64  const_2, t1, t2);
65  DUP2_ARG2(__lasx_xvdp2_w_h, temp1, const_3, temp1, const_4, t3, t4);
66 
67  t5 = __lasx_xvadd_w(t1, t3);
68  t6 = __lasx_xvadd_w(t2, t4);
69  t7 = __lasx_xvsub_w(t2, t4);
70  t8 = __lasx_xvsub_w(t1, t3);
71 
72  DUP2_ARG2(__lasx_xvilvh_h, in1, in0, in3, in2, temp0, temp1);
73  temp2 = __lasx_xvdp2_w_h(const_5, temp0);
74  t1 = __lasx_xvdp2add_w_h(temp2, temp1, const_6);
75  temp2 = __lasx_xvdp2_w_h(const_7, temp0);
76  t2 = __lasx_xvdp2add_w_h(temp2, temp1, const_8);
77  temp2 = __lasx_xvdp2_w_h(const_9, temp0);
78  t3 = __lasx_xvdp2add_w_h(temp2, temp1, const_10);
79  temp2 = __lasx_xvdp2_w_h(const_11, temp0);
80  t4 = __lasx_xvdp2add_w_h(temp2, temp1, const_12);
81 
82  DUP4_ARG2(__lasx_xvadd_w, t1, t5, t6, t2, t7, t3, t8, t4,
83  temp0, temp1, temp2, temp3);
84  DUP4_ARG2(__lasx_xvsub_w, t8, t4, t7, t3, t6, t2, t5, t1,
85  in0, in1, in2, in3);
86  DUP4_ARG2(__lasx_xvsrai_w, temp0, 3, temp1, 3, temp2, 3, temp3, 3,
87  temp0, temp1, temp2, temp3);
88  DUP4_ARG2(__lasx_xvsrai_w, in0, 3, in1, 3, in2, 3, in3, 3,
89  in0, in1, in2, in3);
90 
91  /* second loops */
92  DUP4_ARG2(__lasx_xvpackev_h, temp1, temp0, temp3, temp2, in1, in0,
93  in3, in2, temp0, temp1, temp2, temp3);
94  DUP2_ARG2(__lasx_xvilvl_w, temp1, temp0, temp3, temp2, t1, t3);
95  DUP2_ARG2(__lasx_xvilvh_w, temp1, temp0, temp3, temp2, t2, t4);
96  DUP4_ARG3(__lasx_xvpermi_q, t3, t1, 0x20, t3, t1, 0x31, t4, t2, 0x20,
97  t4, t2, 0x31, in0, in1, in2, in3);
98  DUP2_ARG2(__lasx_xvilvl_h, in1, in0, in3, in2, temp0, temp1);
99  t3 = __lasx_xvreplgr2vr_w(con_64);
100  DUP2_ARG3(__lasx_xvdp2add_w_h, t3, temp0, const_1, t3, temp0,
101  const_2, t1, t2);
102  DUP2_ARG2(__lasx_xvdp2_w_h, temp1, const_3, temp1, const_4, t3, t4);
103 
104  t5 = __lasx_xvadd_w(t1, t3);
105  t6 = __lasx_xvadd_w(t2, t4);
106  t7 = __lasx_xvsub_w(t2, t4);
107  t8 = __lasx_xvsub_w(t1, t3);
108 
109  DUP2_ARG2(__lasx_xvilvh_h, in2, in0, in3, in1, temp0, temp1);
110  temp2 = __lasx_xvdp2_w_h(const_5, temp0);
111  t1 = __lasx_xvdp2add_w_h(temp2, temp1, const_6);
112  temp2 = __lasx_xvdp2_w_h(const_7, temp0);
113  t2 = __lasx_xvdp2add_w_h(temp2, temp1, const_8);
114  temp2 = __lasx_xvdp2_w_h(const_9, temp0);
115  t3 = __lasx_xvdp2add_w_h(temp2, temp1, const_10);
116  temp2 = __lasx_xvdp2_w_h(const_11, temp0);
117  t4 = __lasx_xvdp2add_w_h(temp2, temp1, const_12);
118 
119  DUP4_ARG2(__lasx_xvadd_w, t5, t1, t6, t2, t7, t3, t8, t4,
120  temp0, temp1, temp2, temp3);
121  DUP4_ARG2(__lasx_xvsub_w, t8, t4, t7, t3, t6, t2, t5, t1,
122  in0, in1, in2, in3);
123  DUP4_ARG2(__lasx_xvaddi_wu, in0, 1, in1, 1, in2, 1, in3, 1,
124  in0, in1, in2, in3);
125  DUP4_ARG3(__lasx_xvsrani_h_w, temp1, temp0, 7, temp3, temp2, 7,
126  in1, in0, 7, in3, in2, 7, t1, t2, t3, t4);
127  DUP4_ARG2(__lasx_xvpermi_d, t1, 0xD8, t2, 0xD8, t3, 0xD8, t4, 0xD8,
128  in0, in1, in2, in3);
129  __lasx_xvst(in0, block, 0);
130  __lasx_xvst(in1, block, 32);
131  __lasx_xvst(in2, block, 64);
132  __lasx_xvst(in3, block, 96);
133 }
134 
135 void ff_vc1_inv_trans_8x8_dc_lasx(uint8_t *dest, ptrdiff_t stride,
136  int16_t *block)
137 {
138  int dc = block[0];
139  ptrdiff_t stride2 = stride << 1;
140  ptrdiff_t stride3 = stride2 + stride;
141  uint8_t *dst = dest + (stride2 << 1);
142  __m256i in0, in1, in2, in3, in4, in5, in6, in7;
143  __m256i const_dc, temp0, temp1, temp2, temp3;
144  __m256i reg0, reg1, reg2, reg3;
145 
146  dc = (3 * dc + 1) >> 1;
147  dc = (3 * dc + 16) >> 5;
148 
149  const_dc = __lasx_xvreplgr2vr_h(dc);
150  DUP4_ARG2(__lasx_xvldrepl_d, dest, 0, dest + stride, 0, dest + stride2,
151  0, dest + stride3, 0, in0, in1, in2, in3);
152  DUP4_ARG2(__lasx_xvldrepl_d, dst, 0, dst + stride, 0, dst + stride2,
153  0, dst + stride3, 0, in4, in5, in6, in7);
154 
155  DUP4_ARG2(__lasx_xvilvl_d, in1, in0, in3, in2, in5, in4, in7, in6,
156  temp0, temp1, temp2, temp3);
157  DUP4_ARG1(__lasx_vext2xv_hu_bu, temp0, temp1, temp2, temp3,
158  temp0, temp1, temp2, temp3);
159 
160  DUP4_ARG2(__lasx_xvadd_h, temp0, const_dc, temp1, const_dc, temp2,
161  const_dc, temp3, const_dc, reg0, reg1, reg2, reg3);
162  DUP2_ARG3(__lasx_xvssrarni_bu_h, reg1, reg0, 0, reg3, reg2, 0,
163  temp0, temp1);
164  __lasx_xvstelm_d(temp0, dest, 0, 0);
165  __lasx_xvstelm_d(temp0, dest + stride, 0, 2);
166  __lasx_xvstelm_d(temp0, dest + stride2, 0, 1);
167  __lasx_xvstelm_d(temp0, dest + stride3, 0, 3);
168  __lasx_xvstelm_d(temp1, dst, 0, 0);
169  __lasx_xvstelm_d(temp1, dst + stride, 0, 2);
170  __lasx_xvstelm_d(temp1, dst + stride2, 0, 1);
171  __lasx_xvstelm_d(temp1, dst + stride3, 0, 3);
172 }
173 
174 void ff_vc1_inv_trans_8x4_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
175 {
176  ptrdiff_t stride2 = stride << 1;
177  ptrdiff_t stride3 = stride2 + stride;
178  __m256i shift = {0x0000000400000000, 0x0000000500000001,
179  0x0000000600000002, 0x0000000700000003};
180  __m256i const_64 = {0x0000004000000040, 0x0000004000000040,
181  0x0000004000000040, 0x0000004000000040};
182  __m256i const_1 = {0x00060010000C000C, 0x00060010000C000C,
183  0x00060010000C000C, 0x00060010000C000C};
184  __m256i const_2 = {0xFFF00006FFF4000C, 0xFFF00006FFF4000C,
185  0xFFF00006FFF4000C, 0xFFF00006FFF4000C};
186  __m256i const_3 = {0x0004000F00090010, 0x0004000F00090010,
187  0x0004000F00090010, 0x0004000F00090010};
188  __m256i const_4 = {0xFFF7FFFCFFF0000F, 0xFFF7FFFCFFF0000F,
189  0xFFF7FFFCFFF0000F, 0xFFF7FFFCFFF0000F};
190  __m256i const_5 = {0x000FFFF000040009, 0x000FFFF000040009,
191  0x000FFFF000040009, 0x000FFFF000040009};
192  __m256i const_6 = {0xFFF0FFF7000F0004, 0xFFF0FFF7000F0004,
193  0xFFF0FFF7000F0004, 0xFFF0FFF7000F0004};
194  __m256i const_7 = {0x0000000000000004, 0x0000000000000004,
195  0x0000000000000004, 0x0000000000000004};
196  __m256i const_8 = {0x0011001100110011, 0x0011001100110011,
197  0x0011001100110011, 0x0011001100110011};
198  __m256i const_9 = {0xFFEF0011FFEF0011, 0xFFEF0011FFEF0011,
199  0xFFEF0011FFEF0011, 0xFFEF0011FFEF0011};
200  __m256i const_10 = {0x000A0016000A0016, 0x000A0016000A0016,
201  0x000A0016000A0016, 0x000A0016000A0016};
202  __m256i const_11 = {0x0016FFF60016FFF6, 0x0016FFF60016FFF6,
203  0x0016FFF60016FFF6, 0x0016FFF60016FFF6};
204  __m256i in0, in1;
205  __m256i temp0, temp1, temp2, temp3, t1, t2, t3, t4;
206 
207  DUP2_ARG2(__lasx_xvld, block, 0, block, 32, in0, in1);
208  /* first loops */
209  temp0 = __lasx_xvpermi_d(in0, 0xB1);
210  temp1 = __lasx_xvpermi_d(in1, 0xB1);
211  DUP2_ARG2(__lasx_xvilvl_h, temp0, in0, temp1, in1, temp0, temp1);
212  temp2 = __lasx_xvpickev_w(temp1, temp0);
213  temp3 = __lasx_xvpickod_w(temp1, temp0);
214 
215  DUP2_ARG2(__lasx_xvdp2_w_h, temp2, const_1, temp2, const_2, temp0, temp1);
216  t1 = __lasx_xvadd_w(temp0, const_7);
217  t2 = __lasx_xvadd_w(temp1, const_7);
218  temp0 = __lasx_xvpickev_w(t2, t1);
219  temp1 = __lasx_xvpickod_w(t2, t1);
220  t3 = __lasx_xvadd_w(temp0, temp1);
221  t4 = __lasx_xvsub_w(temp0, temp1);
222  t4 = __lasx_xvpermi_d(t4, 0xB1);
223 
224  DUP4_ARG2(__lasx_xvdp4_d_h, temp3, const_3, temp3, const_4, temp3,
225  const_5, temp3, const_6, t1, t2, temp0, temp1);
226  temp2 = __lasx_xvpickev_w(t2, t1);
227  temp3 = __lasx_xvpickev_w(temp1, temp0);
228 
229  t1 = __lasx_xvadd_w(temp2, t3);
230  t2 = __lasx_xvadd_w(temp3, t4);
231  temp0 = __lasx_xvsub_w(t4, temp3);
232  temp1 = __lasx_xvsub_w(t3, temp2);
233  /* second loops */
234  DUP2_ARG3(__lasx_xvsrani_h_w, t2, t1, 3, temp1, temp0, 3, temp2, temp3);
235  temp3 = __lasx_xvshuf4i_h(temp3, 0x4E);
236  temp0 = __lasx_xvpermi_q(temp3, temp2, 0x20);
237  temp1 = __lasx_xvpermi_q(temp3, temp2, 0x31);
238  DUP2_ARG3(__lasx_xvdp2add_w_h, const_64, temp0, const_8, const_64, temp0,
239  const_9, t1, t2);
240  DUP2_ARG2(__lasx_xvdp2_w_h, temp1, const_10, temp1, const_11, t3, t4);
241  temp0 = __lasx_xvadd_w(t1, t3);
242  temp1 = __lasx_xvsub_w(t2, t4);
243  temp2 = __lasx_xvadd_w(t2, t4);
244  temp3 = __lasx_xvsub_w(t1, t3);
245  DUP4_ARG2(__lasx_xvsrai_w, temp0, 7, temp1, 7, temp2, 7, temp3, 7,
246  t1, t2, t3, t4);
247 
248  temp0 = __lasx_xvldrepl_d(dest, 0);
249  DUP4_ARG2(__lasx_xvldrepl_d, dest, 0, dest + stride, 0, dest + stride2, 0,
250  dest + stride3, 0, temp0, temp1, temp2, temp3);
251  DUP4_ARG1(__lasx_vext2xv_wu_bu, temp0, temp1, temp2, temp3,
252  temp0, temp1, temp2, temp3);
253  DUP4_ARG2(__lasx_xvadd_w, temp0, t1, temp1, t2, temp2, t3, temp3, t4,
254  t1, t2, t3, t4);
255  DUP4_ARG1(__lasx_xvclip255_w, t1, t2, t3, t4, t1, t2, t3, t4);
256  DUP2_ARG2(__lasx_xvpickev_h, t2, t1, t4, t3, temp0, temp1);
257  temp2 = __lasx_xvpickev_b(temp1, temp0);
258  temp0 = __lasx_xvperm_w(temp2, shift);
259  __lasx_xvstelm_d(temp0, dest, 0, 0);
260  __lasx_xvstelm_d(temp0, dest + stride, 0, 1);
261  __lasx_xvstelm_d(temp0, dest + stride2, 0, 2);
262  __lasx_xvstelm_d(temp0, dest + stride3, 0, 3);
263 }
264 
265 void ff_vc1_inv_trans_8x4_dc_lasx(uint8_t *dest, ptrdiff_t stride,
266  int16_t *block)
267 {
268  int dc = block[0];
269  ptrdiff_t stride2 = stride << 1;
270  ptrdiff_t stride3 = stride2 + stride;
271  __m256i in0, in1, in2, in3;
272  __m256i const_dc, temp0, temp1, reg0, reg1;
273 
274  dc = (3 * dc + 1) >> 1;
275  dc = (17 * dc + 64) >> 7;
276  const_dc = __lasx_xvreplgr2vr_h(dc);
277 
278  DUP4_ARG2(__lasx_xvldrepl_d, dest, 0, dest + stride, 0, dest + stride2,
279  0, dest + stride3, 0, in0, in1, in2, in3);
280  DUP2_ARG2(__lasx_xvilvl_d, in1, in0, in3, in2, temp0, temp1);
281  DUP2_ARG1(__lasx_vext2xv_hu_bu, temp0, temp1, temp0, temp1);
282  DUP2_ARG2(__lasx_xvadd_h, temp0, const_dc, temp1, const_dc, reg0, reg1);
283  temp0 = __lasx_xvssrarni_bu_h(reg1, reg0, 0);
284  __lasx_xvstelm_d(temp0, dest, 0, 0);
285  __lasx_xvstelm_d(temp0, dest + stride, 0, 2);
286  __lasx_xvstelm_d(temp0, dest + stride2, 0, 1);
287  __lasx_xvstelm_d(temp0, dest + stride3, 0, 3);
288 }
289 
290 void ff_vc1_inv_trans_4x8_dc_lasx(uint8_t *dest, ptrdiff_t stride,
291  int16_t *block)
292 {
293  int dc = block[0];
294  ptrdiff_t stride2 = stride << 1;
295  ptrdiff_t stride3 = stride2 + stride;
296  uint8_t *dst = dest + (stride2 << 1);
297  __m256i in0, in1, in2, in3, in4, in5, in6, in7;
298  __m256i const_dc, temp0, temp1, temp2, temp3, reg0, reg1;
299 
300  dc = (17 * dc + 4) >> 3;
301  dc = (12 * dc + 64) >> 7;
302  const_dc = __lasx_xvreplgr2vr_h(dc);
303 
304  DUP4_ARG2(__lasx_xvldrepl_w, dest, 0, dest + stride, 0, dest + stride2,
305  0, dest + stride3, 0, in0, in1, in2, in3);
306  DUP4_ARG2(__lasx_xvldrepl_w, dst, 0, dst + stride, 0, dst + stride2,
307  0, dst + stride3, 0, in4, in5, in6, in7);
308 
309  DUP4_ARG2(__lasx_xvilvl_w, in1, in0, in3, in2, in5, in4, in7, in6,
310  temp0, temp1, temp2, temp3);
311  DUP2_ARG2(__lasx_xvilvl_d, temp1, temp0, temp3, temp2, reg0, reg1);
312  DUP2_ARG1(__lasx_vext2xv_hu_bu, reg0, reg1, temp0, temp1);
313  DUP2_ARG2(__lasx_xvadd_h, temp0, const_dc, temp1, const_dc, reg0, reg1);
314  temp0 = __lasx_xvssrarni_bu_h(reg1, reg0, 0);
315  __lasx_xvstelm_w(temp0, dest, 0, 0);
316  __lasx_xvstelm_w(temp0, dest + stride, 0, 1);
317  __lasx_xvstelm_w(temp0, dest + stride2, 0, 4);
318  __lasx_xvstelm_w(temp0, dest + stride3, 0, 5);
319  __lasx_xvstelm_w(temp0, dst, 0, 2);
320  __lasx_xvstelm_w(temp0, dst + stride, 0, 3);
321  __lasx_xvstelm_w(temp0, dst + stride2, 0, 6);
322  __lasx_xvstelm_w(temp0, dst + stride3, 0, 7);
323 }
324 
325 void ff_vc1_inv_trans_4x8_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
326 {
327  ptrdiff_t stride2 = stride << 1;
328  ptrdiff_t stride3 = stride2 + stride;
329  uint8_t *dst = dest + (stride2 << 1);
330  __m256i in0, in1, in2, in3;
331  __m256i temp0, temp1, temp2, temp3, t1, t2, t3, t4;
332 
333  __m256i const_1 = {0x0011001100110011, 0x0011001100110011,
334  0x0011001100110011, 0x0011001100110011};
335  __m256i const_2 = {0xFFEF0011FFEF0011, 0xFFEF0011FFEF0011,
336  0xFFEF0011FFEF0011, 0xFFEF0011FFEF0011};
337  __m256i const_3 = {0x000A0016000A0016, 0x000A0016000A0016,
338  0x000A0016000A0016, 0x000A0016000A0016};
339  __m256i const_4 = {0x0016FFF60016FFF6, 0x0016FFF60016FFF6,
340  0x0016FFF60016FFF6, 0x0016FFF60016FFF6};
341  __m256i const_5 = {0x0000000400000004, 0x0000000400000004,
342  0x0000000400000004, 0x0000000400000004};
343  __m256i const_6 = {0x0000004000000040, 0x0000004000000040,
344  0x0000004000000040, 0x0000004000000040};
345  __m256i const_7 = {0x000C000C000C000C, 0X000C000C000C000C,
346  0xFFF4000CFFF4000C, 0xFFF4000CFFF4000C};
347  __m256i const_8 = {0x0006001000060010, 0x0006001000060010,
348  0xFFF00006FFF00006, 0xFFF00006FFF00006};
349  __m256i const_9 = {0x0009001000090010, 0x0009001000090010,
350  0x0004000F0004000F, 0x0004000F0004000F};
351  __m256i const_10 = {0xFFF0000FFFF0000F, 0xFFF0000FFFF0000F,
352  0xFFF7FFFCFFF7FFFC, 0xFFF7FFFCFFF7FFFC};
353  __m256i const_11 = {0x0004000900040009, 0x0004000900040009,
354  0x000FFFF0000FFFF0, 0x000FFFF0000FFFF0};
355  __m256i const_12 = {0x000F0004000F0004, 0x000F0004000F0004,
356  0xFFF0FFF7FFF0FFF7, 0xFFF0FFF7FFF0FFF7};
357  __m256i shift = {0x0000000400000000, 0x0000000600000002,
358  0x0000000500000001, 0x0000000700000003};
359 
360  /* first loops */
361  DUP4_ARG2(__lasx_xvld, block, 0, block, 32, block, 64, block, 96,
362  in0, in1, in2, in3);
363  in0 = __lasx_xvilvl_d(in1, in0);
364  in1 = __lasx_xvilvl_d(in3, in2);
365  temp0 = __lasx_xvpickev_h(in1, in0);
366  temp1 = __lasx_xvpickod_h(in1, in0);
367  temp0 = __lasx_xvperm_w(temp0, shift);
368  temp1 = __lasx_xvperm_w(temp1, shift);
369 
370  DUP2_ARG3(__lasx_xvdp2add_w_h, const_5, temp0, const_1, const_5, temp0,
371  const_2, t1, t2);
372  DUP2_ARG2(__lasx_xvdp2_w_h, temp1, const_3, temp1, const_4, t3, t4);
373 
374  temp0 = __lasx_xvadd_w(t1, t3);
375  temp1 = __lasx_xvsub_w(t2, t4);
376  temp2 = __lasx_xvadd_w(t2, t4);
377  temp3 = __lasx_xvsub_w(t1, t3);
378  DUP4_ARG2(__lasx_xvsrai_w, temp0, 3, temp1, 3, temp2, 3, temp3, 3,
379  temp0, temp1, temp2, temp3);
380 
381  /* second loops */
382  t1 = __lasx_xvpickev_w(temp1, temp0);
383  t2 = __lasx_xvpickev_w(temp3, temp2);
384  t1 = __lasx_xvpickev_h(t2, t1);
385  t3 = __lasx_xvpickod_w(temp1, temp0);
386  t4 = __lasx_xvpickod_w(temp3, temp2);
387  temp1 = __lasx_xvpickev_h(t4, t3);
388  temp2 = __lasx_xvpermi_q(t1, t1, 0x00);
389  temp3 = __lasx_xvpermi_q(t1, t1, 0x11);
390  t1 = __lasx_xvdp2add_w_h(const_6, temp2, const_7);
391  t2 = __lasx_xvdp2_w_h(temp3, const_8);
392  t3 = __lasx_xvadd_w(t1, t2);
393  t4 = __lasx_xvsub_w(t1, t2);
394  t4 = __lasx_xvpermi_d(t4, 0x4E);
395 
396  DUP4_ARG2(__lasx_xvdp2_w_h, temp1, const_9, temp1, const_10, temp1,
397  const_11, temp1, const_12, t1, t2, temp2, temp3);
398 
399  temp0 = __lasx_xvpermi_q(t2, t1, 0x20);
400  temp1 = __lasx_xvpermi_q(t2, t1, 0x31);
401  t1 = __lasx_xvadd_w(temp0, temp1);
402  temp0 = __lasx_xvpermi_q(temp3, temp2, 0x20);
403  temp1 = __lasx_xvpermi_q(temp3, temp2, 0x31);
404  t2 = __lasx_xvadd_w(temp1, temp0);
405  temp0 = __lasx_xvadd_w(t1, t3);
406  temp1 = __lasx_xvadd_w(t2, t4);
407  temp2 = __lasx_xvsub_w(t4, t2);
408  temp3 = __lasx_xvsub_w(t3, t1);
409  temp2 = __lasx_xvaddi_wu(temp2, 1);
410  temp3 = __lasx_xvaddi_wu(temp3, 1);
411  DUP4_ARG2(__lasx_xvsrai_w, temp0, 7, temp1, 7, temp2, 7, temp3, 7,
412  temp0, temp1, temp2, temp3);
413 
414  DUP4_ARG2(__lasx_xvldrepl_w, dest, 0, dest + stride, 0, dest + stride2, 0,
415  dest + stride3, 0, const_1, const_2, const_3, const_4);
416  DUP4_ARG2(__lasx_xvldrepl_w, dst, 0, dst + stride, 0, dst + stride2, 0,
417  dst + stride3, 0, const_5, const_6, const_7, const_8);
418 
419  DUP4_ARG2(__lasx_xvilvl_w, const_2, const_1, const_4, const_3, const_5,
420  const_6, const_7, const_8, const_1, const_2, const_3, const_4);
421  DUP4_ARG1(__lasx_vext2xv_wu_bu, const_1, const_2, const_3, const_4,
422  const_1, const_2, const_3, const_4);
423  DUP4_ARG2(__lasx_xvadd_w, temp0, const_1, temp1, const_2, temp2, const_3,
424  temp3, const_4, temp0, temp1, temp2, temp3);
425  DUP4_ARG1(__lasx_xvclip255_w, temp0, temp1, temp2, temp3,
426  temp0, temp1, temp2, temp3);
427  DUP2_ARG2(__lasx_xvpickev_h, temp1, temp0, temp3, temp2, temp0, temp1);
428  temp0 = __lasx_xvpickev_b(temp1, temp0);
429  __lasx_xvstelm_w(temp0, dest, 0, 0);
430  __lasx_xvstelm_w(temp0, dest + stride, 0, 4);
431  __lasx_xvstelm_w(temp0, dest + stride2, 0, 1);
432  __lasx_xvstelm_w(temp0, dest + stride3, 0, 5);
433  __lasx_xvstelm_w(temp0, dst, 0, 6);
434  __lasx_xvstelm_w(temp0, dst + stride, 0, 2);
435  __lasx_xvstelm_w(temp0, dst + stride2, 0, 7);
436  __lasx_xvstelm_w(temp0, dst + stride3, 0, 3);
437 }
438 
439 void ff_vc1_inv_trans_4x4_dc_lasx(uint8_t *dest, ptrdiff_t stride,
440  int16_t *block)
441 {
442  int dc = block[0];
443  uint8_t *dst1 = dest + stride;
444  uint8_t *dst2 = dst1 + stride;
445  uint8_t *dst3 = dst2 + stride;
446  __m256i in0, in1, in2, in3, temp0, temp1, const_dc;
447  __m256i zero = {0};
448 
449  dc = (17 * dc + 4) >> 3;
450  dc = (17 * dc + 64) >> 7;
451  const_dc = __lasx_xvreplgr2vr_h(dc);
452 
453  DUP4_ARG2(__lasx_xvldrepl_w, dest, 0, dst1, 0, dst2, 0, dst3, 0,
454  in0, in1, in2, in3);
455  DUP2_ARG2(__lasx_xvilvl_w, in1, in0, in3, in2, temp0, temp1);
456  in0 = __lasx_xvpermi_q(temp1, temp0, 0x20);
457  temp0 = __lasx_xvilvl_b(zero, in0);
458  in0 = __lasx_xvadd_h(temp0, const_dc);
459  temp0 = __lasx_xvssrarni_bu_h(in0, in0, 0);
460  __lasx_xvstelm_w(temp0, dest, 0, 0);
461  __lasx_xvstelm_w(temp0, dst1, 0, 1);
462  __lasx_xvstelm_w(temp0, dst2, 0, 4);
463  __lasx_xvstelm_w(temp0, dst3, 0, 5);
464 }
465 
466 void ff_vc1_inv_trans_4x4_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
467 {
468  uint8_t *dst1 = dest + stride;
469  uint8_t *dst2 = dst1 + stride;
470  uint8_t *dst3 = dst2 + stride;
471  __m256i in0, in1, in2, in3;
472  __m256i temp0, temp1, temp2, temp3, t1, t2;
473 
474  __m256i const_1 = {0x0011001100110011, 0xFFEF0011FFEF0011,
475  0x0011001100110011, 0xFFEF0011FFEF0011};
476  __m256i const_2 = {0x000A0016000A0016, 0x0016FFF60016FFF6,
477  0x000A0016000A0016, 0x0016FFF60016FFF6};
478  __m256i const_64 = {0x0000004000000040, 0x0000004000000040,
479  0x0000004000000040, 0x0000004000000040};
480 
481  DUP2_ARG2(__lasx_xvld, block, 0, block, 32, in0, in1);
482  /* first loops */
483  temp0 = __lasx_xvilvl_d(in1, in0);
484  temp1 = __lasx_xvpickev_h(temp0, temp0);
485  temp2 = __lasx_xvpickod_h(temp0, temp0);
486  DUP2_ARG2(__lasx_xvdp2_w_h, temp1, const_1, temp2, const_2, t1, t2);
487  t1 = __lasx_xvaddi_wu(t1, 4);
488  in0 = __lasx_xvadd_w(t1, t2);
489  in1 = __lasx_xvsub_w(t1, t2);
490  DUP2_ARG2(__lasx_xvsrai_w, in0, 3, in1, 3, in0, in1);
491  /* second loops */
492  temp0 = __lasx_xvpickev_h(in1, in0);
493  temp1 = __lasx_xvpermi_q(temp0, temp0, 0x00);
494  temp2 = __lasx_xvpermi_q(temp0, temp0, 0x11);
495  const_1 = __lasx_xvpermi_d(const_1, 0xD8);
496  const_2 = __lasx_xvpermi_d(const_2, 0xD8);
497  t1 = __lasx_xvdp2add_w_h(const_64, temp1, const_1);
498  t2 = __lasx_xvdp2_w_h(temp2, const_2);
499  in0 = __lasx_xvadd_w(t1, t2);
500  in1 = __lasx_xvsub_w(t1, t2);
501  DUP2_ARG2(__lasx_xvsrai_w, in0, 7, in1, 7, in0, in1);
502  temp0 = __lasx_xvshuf4i_w(in0, 0x9C);
503  temp1 = __lasx_xvshuf4i_w(in1, 0x9C);
504 
505  DUP4_ARG2(__lasx_xvldrepl_w, dest, 0, dst1, 0, dst2, 0, dst3, 0,
506  in0, in1, in2, in3);
507  temp2 = __lasx_xvilvl_w(in2, in0);
508  temp2 = __lasx_vext2xv_wu_bu(temp2);
509  temp3 = __lasx_xvilvl_w(in1, in3);
510  temp3 = __lasx_vext2xv_wu_bu(temp3);
511  temp0 = __lasx_xvadd_w(temp0, temp2);
512  temp1 = __lasx_xvadd_w(temp1, temp3);
513  DUP2_ARG1(__lasx_xvclip255_w, temp0, temp1, temp0, temp1);
514  temp1 = __lasx_xvpickev_h(temp1, temp0);
515  temp0 = __lasx_xvpickev_b(temp1, temp1);
516  __lasx_xvstelm_w(temp0, dest, 0, 0);
517  __lasx_xvstelm_w(temp0, dst1, 0, 5);
518  __lasx_xvstelm_w(temp0, dst2, 0, 4);
519  __lasx_xvstelm_w(temp0, dst3, 0, 1);
520 }
521 
522 static void put_vc1_mspel_mc_h_v_lasx(uint8_t *dst, const uint8_t *src,
523  ptrdiff_t stride, int hmode, int vmode,
524  int rnd)
525 {
526  __m256i in0, in1, in2, in3;
527  __m256i t0, t1, t2, t3, t4, t5, t6, t7;
528  __m256i temp0, temp1, const_para1_2, const_para0_3;
529  __m256i const_r, const_sh;
530  __m256i sh = {0x0000000400000000, 0x0000000500000001,
531  0x0000000600000002, 0x0000000700000003};
532  static const uint8_t para_value[][4] = {{4, 3, 53, 18},
533  {1, 1, 9, 9},
534  {3, 4, 18, 53}};
535  static const int shift_value[] = {0, 5, 1, 5};
536  int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;
537  int r = (1 << (shift - 1)) + rnd - 1;
538  const uint8_t *para_v = para_value[vmode - 1];
539  ptrdiff_t stride2 = stride << 1;
540  ptrdiff_t stride4 = stride << 2;
541  ptrdiff_t stride3 = stride2 + stride;
542 
543  const_r = __lasx_xvreplgr2vr_h(r);
544  const_sh = __lasx_xvreplgr2vr_h(shift);
545  src -= 1, src -= stride;
546  const_para0_3 = __lasx_xvldrepl_h(para_v, 0);
547  const_para1_2 = __lasx_xvldrepl_h(para_v, 2);
548  DUP4_ARG2(__lasx_xvld, src, 0, src + stride, 0, src + stride2, 0,
549  src + stride3, 0, in0, in1, in2, in3);
550  DUP4_ARG2(__lasx_xvpermi_d, in0, 0xD8, in1, 0xD8, in2, 0xD8, in3, 0xD8,
551  in0, in1, in2, in3);
552  DUP2_ARG2(__lasx_xvilvl_b, in2, in1, in3, in0, temp0, temp1);
553  t0 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
554  t0 = __lasx_xvdp2sub_h_bu(t0, temp1, const_para0_3);
555  src += stride4;
556  in0 = __lasx_xvld(src, 0);
557  in0 = __lasx_xvpermi_d(in0, 0xD8);
558  DUP2_ARG2(__lasx_xvilvl_b, in3, in2, in0, in1, temp0, temp1);
559  t1 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
560  t1 = __lasx_xvdp2sub_h_bu(t1, temp1, const_para0_3);
561  src += stride;
562  in1 = __lasx_xvld(src, 0);
563  in1 = __lasx_xvpermi_d(in1, 0xD8);
564  DUP2_ARG2(__lasx_xvilvl_b, in0, in3, in1, in2, temp0, temp1);
565  t2 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
566  t2 = __lasx_xvdp2sub_h_bu(t2, temp1, const_para0_3);
567  src += stride;
568  in2 = __lasx_xvld(src, 0);
569  in2 = __lasx_xvpermi_d(in2, 0xD8);
570  DUP2_ARG2(__lasx_xvilvl_b, in1, in0, in2, in3, temp0, temp1);
571  t3 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
572  t3 = __lasx_xvdp2sub_h_bu(t3, temp1, const_para0_3);
573  src += stride;
574  in3 = __lasx_xvld(src, 0);
575  in3 = __lasx_xvpermi_d(in3, 0xD8);
576  DUP2_ARG2(__lasx_xvilvl_b, in2, in1, in3, in0, temp0, temp1);
577  t4 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
578  t4 = __lasx_xvdp2sub_h_bu(t4, temp1, const_para0_3);
579  src += stride;
580  in0 = __lasx_xvld(src, 0);
581  in0 = __lasx_xvpermi_d(in0, 0xD8);
582  DUP2_ARG2(__lasx_xvilvl_b, in3, in2, in0, in1, temp0, temp1);
583  t5 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
584  t5 = __lasx_xvdp2sub_h_bu(t5, temp1, const_para0_3);
585  src += stride;
586  in1 = __lasx_xvld(src, 0);
587  in1 = __lasx_xvpermi_d(in1, 0xD8);
588  DUP2_ARG2(__lasx_xvilvl_b, in0, in3, in1, in2, temp0, temp1);
589  t6 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
590  t6 = __lasx_xvdp2sub_h_bu(t6, temp1, const_para0_3);
591  src += stride;
592  in2 = __lasx_xvld(src, 0);
593  in2 = __lasx_xvpermi_d(in2, 0xD8);
594  DUP2_ARG2(__lasx_xvilvl_b, in1, in0, in2, in3, temp0, temp1);
595  t7 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
596  t7 = __lasx_xvdp2sub_h_bu(t7, temp1, const_para0_3);
597  DUP4_ARG2(__lasx_xvadd_h, t0, const_r, t1, const_r, t2, const_r, t3,
598  const_r, t0, t1, t2, t3);
599  DUP4_ARG2(__lasx_xvadd_h, t4, const_r, t5, const_r, t6, const_r, t7,
600  const_r, t4, t5, t6, t7);
601  DUP4_ARG2(__lasx_xvsra_h, t0, const_sh, t1, const_sh, t2, const_sh,
602  t3, const_sh, t0, t1, t2, t3);
603  DUP4_ARG2(__lasx_xvsra_h, t4, const_sh, t5, const_sh, t6, const_sh,
604  t7, const_sh, t4, t5, t6, t7);
605  LASX_TRANSPOSE8x8_H(t0, t1, t2, t3, t4, t5, t6, t7, t0,
606  t1, t2, t3, t4, t5, t6, t7);
607  para_v = para_value[hmode - 1];
608  const_para0_3 = __lasx_xvldrepl_h(para_v, 0);
609  const_para1_2 = __lasx_xvldrepl_h(para_v, 2);
610  const_para0_3 = __lasx_vext2xv_h_b(const_para0_3);
611  const_para1_2 = __lasx_vext2xv_h_b(const_para1_2);
612  r = 64 - rnd;
613  const_r = __lasx_xvreplgr2vr_w(r);
614  DUP4_ARG2(__lasx_xvpermi_d, t0, 0x72, t1, 0x72, t2, 0x72, t0, 0xD8,
615  in0, in1, in2, t0);
616  DUP4_ARG2(__lasx_xvpermi_d, t1, 0xD8, t2, 0xD8, t3, 0xD8, t4, 0xD8,
617  t1, t2, t3, t4);
618  DUP2_ARG2(__lasx_xvpermi_d, t5, 0xD8, t6, 0xD8, t5, t6);
619  t7 = __lasx_xvpermi_d(t7, 0xD8);
620  DUP2_ARG2(__lasx_xvilvl_h, t2, t1, t3, t0, temp0, temp1);
621  t0 = __lasx_xvdp2_w_h(temp0, const_para1_2);
622  t0 = __lasx_xvdp2sub_w_h(t0, temp1, const_para0_3);
623  DUP2_ARG2(__lasx_xvilvl_h, t3, t2, t4, t1, temp0, temp1);
624  t1 = __lasx_xvdp2_w_h(temp0, const_para1_2);
625  t1 = __lasx_xvdp2sub_w_h(t1, temp1, const_para0_3);
626  DUP2_ARG2(__lasx_xvilvl_h, t4, t3, t5, t2, temp0, temp1);
627  t2 = __lasx_xvdp2_w_h(temp0, const_para1_2);
628  t2 = __lasx_xvdp2sub_w_h(t2, temp1, const_para0_3);
629  DUP2_ARG2(__lasx_xvilvl_h, t5, t4, t6, t3, temp0, temp1);
630  t3 = __lasx_xvdp2_w_h(temp0, const_para1_2);
631  t3 = __lasx_xvdp2sub_w_h(t3, temp1, const_para0_3);
632  DUP2_ARG2(__lasx_xvilvl_h, t6, t5, t7, t4, temp0, temp1);
633  t4 = __lasx_xvdp2_w_h(temp0, const_para1_2);
634  t4 = __lasx_xvdp2sub_w_h(t4, temp1, const_para0_3);
635  DUP2_ARG2(__lasx_xvilvl_h, t7, t6, in0, t5, temp0, temp1);
636  t5 = __lasx_xvdp2_w_h(temp0, const_para1_2);
637  t5 = __lasx_xvdp2sub_w_h(t5, temp1, const_para0_3);
638  DUP2_ARG2(__lasx_xvilvl_h, in0, t7, in1, t6, temp0, temp1);
639  t6 = __lasx_xvdp2_w_h(temp0, const_para1_2);
640  t6 = __lasx_xvdp2sub_w_h(t6, temp1, const_para0_3);
641  DUP2_ARG2(__lasx_xvilvl_h, in1, in0, in2, t7, temp0, temp1);
642  t7 = __lasx_xvdp2_w_h(temp0, const_para1_2);
643  t7 = __lasx_xvdp2sub_w_h(t7, temp1, const_para0_3);
644  DUP4_ARG2(__lasx_xvadd_w, t0, const_r, t1, const_r, t2, const_r,
645  t3, const_r, t0, t1, t2, t3);
646  DUP4_ARG2(__lasx_xvadd_w, t4, const_r, t5, const_r, t6, const_r,
647  t7, const_r, t4, t5, t6, t7);
648  DUP4_ARG2(__lasx_xvsrai_w, t0, 7, t1, 7, t2, 7, t3, 7, t0, t1, t2, t3);
649  DUP4_ARG2(__lasx_xvsrai_w, t4, 7, t5, 7, t6, 7, t7, 7, t4, t5, t6, t7);
650  LASX_TRANSPOSE8x8_W(t0, t1, t2, t3, t4, t5, t6, t7,
651  t0, t1, t2, t3, t4, t5, t6, t7);
652  DUP4_ARG1(__lasx_xvclip255_w, t0, t1, t2, t3, t0, t1, t2, t3);
653  DUP4_ARG1(__lasx_xvclip255_w, t4, t5, t6, t7, t4, t5, t6, t7);
654  DUP4_ARG2(__lasx_xvpickev_h, t1, t0, t3, t2, t5, t4, t7, t6,
655  t0, t1, t2, t3);
656  DUP2_ARG2(__lasx_xvpickev_b, t1, t0, t3, t2, t0, t1);
657  t0 = __lasx_xvperm_w(t0, sh);
658  t1 = __lasx_xvperm_w(t1, sh);
659  __lasx_xvstelm_d(t0, dst, 0, 0);
660  __lasx_xvstelm_d(t0, dst + stride, 0, 1);
661  __lasx_xvstelm_d(t0, dst + stride2, 0, 2);
662  __lasx_xvstelm_d(t0, dst + stride3, 0, 3);
663  dst += stride4;
664  __lasx_xvstelm_d(t1, dst, 0, 0);
665  __lasx_xvstelm_d(t1, dst + stride, 0, 1);
666  __lasx_xvstelm_d(t1, dst + stride2, 0, 2);
667  __lasx_xvstelm_d(t1, dst + stride3, 0, 3);
668 }
669 
670 #define PUT_VC1_MSPEL_MC_LASX(hmode, vmode) \
671 void ff_put_vc1_mspel_mc ## hmode ## vmode ## _lasx(uint8_t *dst, \
672  const uint8_t *src, \
673  ptrdiff_t stride, int rnd) \
674 { \
675  put_vc1_mspel_mc_h_v_lasx(dst, src, stride, hmode, vmode, rnd); \
676 } \
677 void ff_put_vc1_mspel_mc ## hmode ## vmode ## _16_lasx(uint8_t *dst, \
678  const uint8_t *src, \
679  ptrdiff_t stride, int rnd) \
680 { \
681  put_vc1_mspel_mc_h_v_lasx(dst, src, stride, hmode, vmode, rnd); \
682  put_vc1_mspel_mc_h_v_lasx(dst + 8, src + 8, stride, hmode, vmode, rnd); \
683  dst += 8 * stride, src += 8 * stride; \
684  put_vc1_mspel_mc_h_v_lasx(dst, src, stride, hmode, vmode, rnd); \
685  put_vc1_mspel_mc_h_v_lasx(dst + 8, src + 8, stride, hmode, vmode, rnd); \
686 }
687 
691 
695 
699 
700 void ff_put_no_rnd_vc1_chroma_mc8_lasx(uint8_t *dst /* align 8 */,
701  uint8_t *src /* align 1 */,
702  ptrdiff_t stride, int h, int x, int y)
703 {
704  const int intA = (8 - x) * (8 - y);
705  const int intB = (x) * (8 - y);
706  const int intC = (8 - x) * (y);
707  const int intD = (x) * (y);
708  __m256i src00, src01, src10, src11;
709  __m256i A, B, C, D;
710  int i;
711 
712  av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
713 
714  A = __lasx_xvreplgr2vr_h(intA);
715  B = __lasx_xvreplgr2vr_h(intB);
716  C = __lasx_xvreplgr2vr_h(intC);
717  D = __lasx_xvreplgr2vr_h(intD);
718  for(i = 0; i < h; i++){
719  DUP2_ARG2(__lasx_xvld, src, 0, src, 1, src00, src01);
720  src += stride;
721  DUP2_ARG2(__lasx_xvld, src, 0, src, 1, src10, src11);
722 
723  DUP4_ARG1(__lasx_vext2xv_hu_bu, src00, src01, src10, src11,
724  src00, src01, src10, src11);
725  DUP4_ARG2(__lasx_xvmul_h, src00, A, src01, B, src10, C, src11, D,
726  src00, src01, src10, src11);
727  src00 = __lasx_xvadd_h(src00, src01);
728  src10 = __lasx_xvadd_h(src10, src11);
729  src00 = __lasx_xvadd_h(src00, src10);
730  src00 = __lasx_xvaddi_hu(src00, 28);
731  src00 = __lasx_xvsrli_h(src00, 6);
732  src00 = __lasx_xvpickev_b(src00, src00);
733  __lasx_xvstelm_d(src00, dst, 0, 0);
734  dst += stride;
735  }
736 }
737 
738 static void put_vc1_mspel_mc_v_lasx(uint8_t *dst, const uint8_t *src,
739  ptrdiff_t stride, int vmode, int rnd)
740 {
741  __m256i in0, in1, in2, in3, temp0, temp1, t0;
742  __m256i const_para0_3, const_para1_2, const_r, const_sh;
743  static const uint16_t para_value[][2] = {{0x0304, 0x1235},
744  {0x0101, 0x0909},
745  {0x0403, 0x3512}};
746  const uint16_t *para_v = para_value[vmode - 1];
747  static const int shift_value[] = {0, 6, 4, 6};
748  static int add_value[3];
749  ptrdiff_t stride_2x = stride << 1;
750  int i = 0;
751  add_value[2] = add_value[0] = 31 + rnd, add_value[1] = 7 + rnd;
752 
753  const_r = __lasx_xvreplgr2vr_h(add_value[vmode - 1]);
754  const_sh = __lasx_xvreplgr2vr_h(shift_value[vmode]);
755  const_para0_3 = __lasx_xvreplgr2vr_h(*para_v);
756  const_para1_2 = __lasx_xvreplgr2vr_h(*(para_v + 1));
757 
758  DUP2_ARG2(__lasx_xvld, src - stride, 0, src, 0, in0, in1);
759  in2 = __lasx_xvld(src + stride, 0);
760  in0 = __lasx_xvpermi_d(in0, 0xD8);
761  in1 = __lasx_xvpermi_d(in1, 0xD8);
762  in2 = __lasx_xvpermi_d(in2, 0xD8);
763  for (; i < 16; i++) {
764  in3 = __lasx_xvld(src + stride_2x, 0);
765  in3 = __lasx_xvpermi_d(in3, 0xD8);
766  DUP2_ARG2(__lasx_xvilvl_b, in2, in1, in3, in0, temp0, temp1);
767  t0 = __lasx_xvdp2_h_bu(temp0, const_para1_2);
768  t0 = __lasx_xvdp2sub_h_bu(t0, temp1, const_para0_3);
769  t0 = __lasx_xvadd_h(t0, const_r);
770  t0 = __lasx_xvsra_h(t0, const_sh);
771  t0 = __lasx_xvclip255_h(t0);
772  t0 = __lasx_xvpickev_b(t0, t0);
773  __lasx_xvstelm_d(t0, dst, 0, 0);
774  __lasx_xvstelm_d(t0, dst, 8, 2);
775  dst += stride;
776  src += stride;
777  in0 = in1;
778  in1 = in2;
779  in2 = in3;
780  }
781 }
782 
783 #define PUT_VC1_MSPEL_MC_V_LASX(vmode) \
784 void ff_put_vc1_mspel_mc0 ## vmode ## _16_lasx(uint8_t *dst, \
785  const uint8_t *src, \
786  ptrdiff_t stride, int rnd) \
787 { \
788  put_vc1_mspel_mc_v_lasx(dst, src, stride, vmode, rnd); \
789 }
790 
794 
795 #define ROW_LASX(in0, in1, in2, in3, out0) \
796  DUP2_ARG2(__lasx_xvilvl_b, in2, in1, in3, in0, tmp0_m, tmp1_m); \
797  out0 = __lasx_xvdp2_h_bu(tmp0_m, const_para1_2); \
798  out0 = __lasx_xvdp2sub_h_bu(out0, tmp1_m, const_para0_3); \
799  out0 = __lasx_xvadd_h(out0, const_r); \
800  out0 = __lasx_xvsra_h(out0, const_sh); \
801  out0 = __lasx_xvclip255_h(out0); \
802  out0 = __lasx_xvpickev_b(out0, out0); \
803  out0 = __lasx_xvpermi_d(out0, 0xd8); \
804 
805 static void put_vc1_mspel_mc_h_lasx(uint8_t *dst, const uint8_t *src,
806  ptrdiff_t stride, int hmode, int rnd)
807 {
808  __m256i in0, in1, in2, in3, in4, in5, in6, in7,
809  in8, in9, in10, in11, in12, in13, in14, in15;
810  __m256i out0, out1, out2, out3, out4, out5, out6, out7, out8, out9,
811  out10, out11, out12, out13, out14, out15, out16, out17, out18;
812  __m256i const_para0_3, const_para1_2, const_r, const_sh;
813  __m256i tmp0_m, tmp1_m, tmp2_m, tmp3_m;
814  __m256i tmp4_m, tmp5_m, tmp6_m, tmp7_m;
815  __m256i t0, t1, t2, t3, t4, t5, t6, t7;
816  ptrdiff_t stride2 = stride << 1;
817  ptrdiff_t stride4 = stride << 2;
818  ptrdiff_t stride3 = stride2 + stride;
819  static const uint16_t para_value[][2] = {{0x0304, 0x1235},
820  {0x0101, 0x0909},
821  {0x0403, 0x3512}};
822  const uint16_t *para_v = para_value[hmode - 1];
823  static const int shift_value[] = {0, 6, 4, 6};
824  static int add_value[3];
825  uint8_t *_src = (uint8_t*)src - 1;
826  add_value[2] = add_value[0] = 32 - rnd, add_value[1] = 8 - rnd;
827 
828  const_r = __lasx_xvreplgr2vr_h(add_value[hmode - 1]);
829  const_sh = __lasx_xvreplgr2vr_h(shift_value[hmode]);
830  const_para0_3 = __lasx_xvreplgr2vr_h(*para_v);
831  const_para1_2 = __lasx_xvreplgr2vr_h(*(para_v + 1));
832 
833  in0 = __lasx_xvld(_src, 0);
834  DUP2_ARG2(__lasx_xvldx, _src, stride, _src, stride2, in1, in2);
835  in3 = __lasx_xvldx(_src, stride3);
836  _src += stride4;
837  in4 = __lasx_xvld(_src, 0);
838  DUP2_ARG2(__lasx_xvldx, _src, stride, _src, stride2, in5, in6);
839  in7 = __lasx_xvldx(_src, stride3);
840  _src += stride4;
841  in8 = __lasx_xvld(_src, 0);
842  DUP2_ARG2(__lasx_xvldx, _src, stride, _src, stride2, in9, in10);
843  in11 = __lasx_xvldx(_src, stride3);
844  _src += stride4;
845  in12 = __lasx_xvld(_src, 0);
846  DUP2_ARG2(__lasx_xvldx, _src, stride, _src, stride2, in13, in14);
847  in15 = __lasx_xvldx(_src, stride3);
848  DUP4_ARG2(__lasx_xvilvl_b, in2, in0, in3, in1, in6, in4, in7, in5,
849  tmp0_m, tmp1_m, tmp2_m, tmp3_m);
850  DUP4_ARG2(__lasx_xvilvl_b, in10, in8, in11, in9, in14, in12, in15, in13,
851  tmp4_m, tmp5_m, tmp6_m, tmp7_m);
852  DUP4_ARG2(__lasx_xvilvl_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
853  tmp7_m, tmp6_m, t0, t2, t4, t6);
854  DUP4_ARG2(__lasx_xvilvh_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
855  tmp7_m, tmp6_m, t1, t3, t5, t7);
856  DUP4_ARG2(__lasx_xvilvl_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp0_m, tmp4_m,
857  tmp1_m, tmp5_m);
858  DUP4_ARG2(__lasx_xvilvh_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp2_m, tmp6_m,
859  tmp3_m, tmp7_m);
860  DUP4_ARG2(__lasx_xvilvl_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
861  tmp7_m, tmp6_m, out0, out2, out4, out6);
862  DUP4_ARG2(__lasx_xvilvh_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
863  tmp7_m, tmp6_m, out1, out3, out5, out7);
864 
865  DUP4_ARG2(__lasx_xvilvh_b, in2, in0, in3, in1, in6, in4, in7, in5,
866  tmp0_m, tmp1_m, tmp2_m, tmp3_m);
867  DUP4_ARG2(__lasx_xvilvh_b, in10, in8, in11, in9, in14, in12, in15, in13,
868  tmp4_m, tmp5_m, tmp6_m, tmp7_m);
869  DUP4_ARG2(__lasx_xvilvl_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
870  tmp7_m, tmp6_m, t0, t2, t4, t6);
871  DUP4_ARG2(__lasx_xvilvh_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
872  tmp7_m, tmp6_m, t1, t3, t5, t7);
873  DUP4_ARG2(__lasx_xvilvl_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp0_m, tmp4_m,
874  tmp1_m, tmp5_m);
875  DUP4_ARG2(__lasx_xvilvh_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp2_m, tmp6_m,
876  tmp3_m, tmp7_m);
877  DUP4_ARG2(__lasx_xvilvl_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
878  tmp7_m, tmp6_m, out8, out10, out12, out14);
879  DUP4_ARG2(__lasx_xvilvh_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
880  tmp7_m, tmp6_m, out9, out11, out13, out15);
881  DUP2_ARG3(__lasx_xvpermi_q, out0, out0, 0x31, out1, out1, 0x31, out16, out17);
882  out18 = __lasx_xvpermi_q(out2, out2, 0x31);
883 
884  DUP4_ARG2(__lasx_xvpermi_d, out0, 0xD8, out1, 0xD8, out2, 0xD8, out3, 0xD8,
885  out0, out1, out2, out3);
886  DUP4_ARG2(__lasx_xvpermi_d, out4, 0xD8, out5, 0xD8, out6, 0xD8, out7, 0xD8,
887  out4, out5, out6, out7);
888  DUP4_ARG2(__lasx_xvpermi_d, out8, 0xD8, out9, 0xD8, out10, 0xD8, out11,
889  0xD8, out8, out9, out10, out11);
890  DUP4_ARG2(__lasx_xvpermi_d, out12, 0xD8, out13, 0xD8, out14, 0xD8, out15,
891  0xD8, out12, out13, out14, out15);
892  out16 = __lasx_xvpermi_d(out16, 0xD8);
893  out17 = __lasx_xvpermi_d(out17, 0xD8);
894  out18 = __lasx_xvpermi_d(out18, 0xD8);
895 
896  ROW_LASX(out0, out1, out2, out3, in0);
897  ROW_LASX(out1, out2, out3, out4, in1);
898  ROW_LASX(out2, out3, out4, out5, in2);
899  ROW_LASX(out3, out4, out5, out6, in3);
900  ROW_LASX(out4, out5, out6, out7, in4);
901  ROW_LASX(out5, out6, out7, out8, in5);
902  ROW_LASX(out6, out7, out8, out9, in6);
903  ROW_LASX(out7, out8, out9, out10, in7);
904  ROW_LASX(out8, out9, out10, out11, in8);
905  ROW_LASX(out9, out10, out11, out12, in9);
906  ROW_LASX(out10, out11, out12, out13, in10);
907  ROW_LASX(out11, out12, out13, out14, in11);
908  ROW_LASX(out12, out13, out14, out15, in12);
909  ROW_LASX(out13, out14, out15, out16, in13);
910  ROW_LASX(out14, out15, out16, out17, in14);
911  ROW_LASX(out15, out16, out17, out18, in15);
912 
913  DUP4_ARG2(__lasx_xvilvl_b, in2, in0, in3, in1, in6, in4, in7, in5,
914  tmp0_m, tmp1_m, tmp2_m, tmp3_m);
915  DUP4_ARG2(__lasx_xvilvl_b, in10, in8, in11, in9, in14, in12, in15, in13,
916  tmp4_m, tmp5_m, tmp6_m, tmp7_m);
917  DUP4_ARG2(__lasx_xvilvl_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
918  tmp7_m, tmp6_m, t0, t2, t4, t6);
919  DUP4_ARG2(__lasx_xvilvh_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
920  tmp7_m, tmp6_m, t1, t3, t5, t7);
921  DUP4_ARG2(__lasx_xvilvl_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp0_m, tmp4_m,
922  tmp1_m, tmp5_m);
923  DUP4_ARG2(__lasx_xvilvh_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp2_m, tmp6_m,
924  tmp3_m, tmp7_m);
925  DUP4_ARG2(__lasx_xvilvl_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
926  tmp7_m, tmp6_m, out0, out2, out4, out6);
927  DUP4_ARG2(__lasx_xvilvh_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
928  tmp7_m, tmp6_m, out1, out3, out5, out7);
929 
930  DUP4_ARG2(__lasx_xvilvh_b, in2, in0, in3, in1, in6, in4, in7, in5,
931  tmp0_m, tmp1_m, tmp2_m, tmp3_m);
932  DUP4_ARG2(__lasx_xvilvh_b, in10, in8, in11, in9, in14, in12, in15, in13,
933  tmp4_m, tmp5_m, tmp6_m, tmp7_m);
934  DUP4_ARG2(__lasx_xvilvl_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
935  tmp7_m, tmp6_m, t0, t2, t4, t6);
936  DUP4_ARG2(__lasx_xvilvh_b, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
937  tmp7_m, tmp6_m, t1, t3, t5, t7);
938  DUP4_ARG2(__lasx_xvilvl_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp0_m, tmp4_m,
939  tmp1_m, tmp5_m);
940  DUP4_ARG2(__lasx_xvilvh_w, t2, t0, t3, t1, t6, t4, t7, t5, tmp2_m, tmp6_m,
941  tmp3_m, tmp7_m);
942  DUP4_ARG2(__lasx_xvilvl_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
943  tmp7_m, tmp6_m, out8, out10, out12, out14);
944  DUP4_ARG2(__lasx_xvilvh_d, tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp5_m, tmp4_m,
945  tmp7_m, tmp6_m, out9, out11, out13, out15);
946  __lasx_xvstelm_d(out0, dst, 0, 0);
947  __lasx_xvstelm_d(out0, dst, 8, 1);
948  dst += stride;
949  __lasx_xvstelm_d(out1, dst, 0, 0);
950  __lasx_xvstelm_d(out1, dst, 8, 1);
951  dst += stride;
952  __lasx_xvstelm_d(out2, dst, 0, 0);
953  __lasx_xvstelm_d(out2, dst, 8, 1);
954  dst += stride;
955  __lasx_xvstelm_d(out3, dst, 0, 0);
956  __lasx_xvstelm_d(out3, dst, 8, 1);
957  dst += stride;
958  __lasx_xvstelm_d(out4, dst, 0, 0);
959  __lasx_xvstelm_d(out4, dst, 8, 1);
960  dst += stride;
961  __lasx_xvstelm_d(out5, dst, 0, 0);
962  __lasx_xvstelm_d(out5, dst, 8, 1);
963  dst += stride;
964  __lasx_xvstelm_d(out6, dst, 0, 0);
965  __lasx_xvstelm_d(out6, dst, 8, 1);
966  dst += stride;
967  __lasx_xvstelm_d(out7, dst, 0, 0);
968  __lasx_xvstelm_d(out7, dst, 8, 1);
969  dst += stride;
970  __lasx_xvstelm_d(out8, dst, 0, 0);
971  __lasx_xvstelm_d(out8, dst, 8, 1);
972  dst += stride;
973  __lasx_xvstelm_d(out9, dst, 0, 0);
974  __lasx_xvstelm_d(out9, dst, 8, 1);
975  dst += stride;
976  __lasx_xvstelm_d(out10, dst, 0, 0);
977  __lasx_xvstelm_d(out10, dst, 8, 1);
978  dst += stride;
979  __lasx_xvstelm_d(out11, dst, 0, 0);
980  __lasx_xvstelm_d(out11, dst, 8, 1);
981  dst += stride;
982  __lasx_xvstelm_d(out12, dst, 0, 0);
983  __lasx_xvstelm_d(out12, dst, 8, 1);
984  dst += stride;
985  __lasx_xvstelm_d(out13, dst, 0, 0);
986  __lasx_xvstelm_d(out13, dst, 8, 1);
987  dst += stride;
988  __lasx_xvstelm_d(out14, dst, 0, 0);
989  __lasx_xvstelm_d(out14, dst, 8, 1);
990  dst += stride;
991  __lasx_xvstelm_d(out15, dst, 0, 0);
992  __lasx_xvstelm_d(out15, dst, 8, 1);
993 }
994 
995 #define PUT_VC1_MSPEL_MC_H_LASX(hmode) \
996 void ff_put_vc1_mspel_mc ## hmode ## 0_16_lasx(uint8_t *dst, \
997  const uint8_t *src, \
998  ptrdiff_t stride, int rnd) \
999 { \
1000  put_vc1_mspel_mc_h_lasx(dst, src, stride, hmode, rnd); \
1001 }
1002 
r
const char * r
Definition: vf_curves.c:116
t0
#define t0
Definition: regdef.h:28
ff_vc1_inv_trans_8x4_dc_lasx
void ff_vc1_inv_trans_8x4_dc_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:265
t1
#define t1
Definition: regdef.h:29
DUP2_ARG2
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:58
D
D(D(float, sse)
Definition: rematrix_init.c:29
ff_vc1_inv_trans_4x8_dc_lasx
void ff_vc1_inv_trans_4x8_dc_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:290
A
#define A(x)
Definition: vp56_arith.h:28
ff_vc1_inv_trans_8x4_lasx
void ff_vc1_inv_trans_8x4_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:174
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
rnd
#define rnd()
Definition: checkasm.h:115
DUP4_ARG2
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:76
put_vc1_mspel_mc_v_lasx
static void put_vc1_mspel_mc_v_lasx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int vmode, int rnd)
Definition: vc1dsp_lasx.c:738
t7
#define t7
Definition: regdef.h:35
ROW_LASX
#define ROW_LASX(in0, in1, in2, in3, out0)
Definition: vc1dsp_lasx.c:795
ff_vc1_inv_trans_4x4_dc_lasx
void ff_vc1_inv_trans_4x4_dc_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:439
t5
#define t5
Definition: regdef.h:33
t6
#define t6
Definition: regdef.h:34
DUP4_ARG1
#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:70
DUP2_ARG1
#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:52
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
DUP2_ARG3
#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:64
ff_vc1_inv_trans_8x8_lasx
void ff_vc1_inv_trans_8x8_lasx(int16_t block[64])
Definition: vc1dsp_lasx.c:25
t8
#define t8
Definition: regdef.h:53
ff_vc1_inv_trans_8x8_dc_lasx
void ff_vc1_inv_trans_8x8_dc_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:135
put_vc1_mspel_mc_h_v_lasx
static void put_vc1_mspel_mc_h_v_lasx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int hmode, int vmode, int rnd)
Definition: vc1dsp_lasx.c:522
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
ff_vc1_inv_trans_4x4_lasx
void ff_vc1_inv_trans_4x4_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:466
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
ff_put_no_rnd_vc1_chroma_mc8_lasx
void ff_put_no_rnd_vc1_chroma_mc8_lasx(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y)
Definition: vc1dsp_lasx.c:700
stride
#define stride
Definition: h264pred_template.c:537
ff_vc1_inv_trans_4x8_lasx
void ff_vc1_inv_trans_4x8_lasx(uint8_t *dest, ptrdiff_t stride, int16_t *block)
Definition: vc1dsp_lasx.c:325
put_vc1_mspel_mc_h_lasx
static void put_vc1_mspel_mc_h_lasx(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int hmode, int rnd)
Definition: vc1dsp_lasx.c:805
B
#define B
Definition: huffyuvdsp.h:32
t2
#define t2
Definition: regdef.h:30
shift
static int shift(int a, int b)
Definition: sonic.c:88
vc1dsp_loongarch.h
zero
#define zero
Definition: regdef.h:64
loongson_intrinsics.h
PUT_VC1_MSPEL_MC_H_LASX
#define PUT_VC1_MSPEL_MC_H_LASX(hmode)
Definition: vc1dsp_lasx.c:995
PUT_VC1_MSPEL_MC_V_LASX
#define PUT_VC1_MSPEL_MC_V_LASX(vmode)
Definition: vc1dsp_lasx.c:783
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
PUT_VC1_MSPEL_MC_LASX
#define PUT_VC1_MSPEL_MC_LASX(hmode, vmode)
Definition: vc1dsp_lasx.c:670
DUP4_ARG3
#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:83