FFmpeg
vp9_mc_mmi.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavcodec/vp9dsp.h"
23 #include "vp9dsp_mips.h"
24 
25 #define GET_DATA_H_MMI \
26  "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \
27  "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \
28  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
29  "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \
30  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
31  "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \
32  "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \
33  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
34  "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \
35  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
36  "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \
37  "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \
38  "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \
39  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
40  "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \
41  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
42  "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \
43  "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \
44  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
45  "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \
46  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
47  "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t"
48 
49 #define GET_DATA_V_MMI \
50  "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \
51  "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \
52  "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
53  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
54  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
55  "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
56  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
57  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
58  "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
59  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
60  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
61  "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \
62  "pmaddhw %[srch], %[srch], %[filter10] \n\t" \
63  "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
64  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
65  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
66  "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
67  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
68  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
69  "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
70  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
71  "paddw %[srch], %[srch], %[ftmp12] \n\t"
72 
73 static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
74  uint8_t *dst, int32_t dst_stride,
75  const uint16_t *filter_x, int32_t w,
76  int32_t h)
77 {
78  double ftmp[15];
79  uint32_t tmp[2];
80  src -= 3;
81  src_stride -= w;
82  dst_stride -= w;
83  __asm__ volatile (
84  "move %[tmp1], %[width] \n\t"
85  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
86  "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
87  "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
88  "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
89  "gsldrc1 %[filter2], 0x08(%[filter]) \n\t"
90  "li %[tmp0], 0x07 \n\t"
91  "dmtc1 %[tmp0], %[ftmp13] \n\t"
92  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
93  "1: \n\t"
94  /* Get 8 data per row */
95  "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
96  "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
97  "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t"
98  "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t"
99  "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t"
100  "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t"
101  "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t"
102  "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t"
103  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
104  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
105  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
106  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
107  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
108  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
109  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
110  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
111  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
112  /* Get raw data */
114  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
115  %[ftmp6], %[tmp0])
116  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
117  %[ftmp6], %[tmp0])
118  "packsswh %[srcl], %[srcl], %[srch] \n\t"
119  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
120  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
121  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
122  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
123  /* Loop count */
124  "bnez %[width], 1b \n\t"
125  "move %[width], %[tmp1] \n\t"
126  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
127  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
128  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
129  "bnez %[height], 1b \n\t"
130  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
131  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
132  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
133  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
134  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
135  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
136  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
137  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
138  [src]"+&r"(src), [width]"+&r"(w),
139  [dst]"+&r"(dst), [height]"+&r"(h),
140  [ftmp13]"=&f"(ftmp[14])
141  : [filter]"r"(filter_x),
142  [src_stride]"r"((mips_reg)src_stride),
143  [dst_stride]"r"((mips_reg)dst_stride)
144  : "memory"
145  );
146 }
147 
148 static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
149  uint8_t *dst, int32_t dst_stride,
150  const int16_t *filter_y, int32_t w,
151  int32_t h)
152 {
153  double ftmp[17];
154  uint32_t tmp[1];
155  ptrdiff_t addr = src_stride;
156  src_stride -= w;
157  dst_stride -= w;
158 
159  __asm__ volatile (
160  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
161  "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
162  "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
163  "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
164  "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t"
165  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
166  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
167  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
168  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
169  "li %[tmp0], 0x07 \n\t"
170  "dmtc1 %[tmp0], %[ftmp13] \n\t"
171  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
172  "1: \n\t"
173  /* Get 8 data per column */
174  "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t"
175  "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t"
176  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
177  "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t"
178  "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t"
179  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
180  "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t"
181  "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t"
182  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
183  "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t"
184  "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t"
185  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
186  "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t"
187  "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t"
188  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
189  "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t"
190  "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t"
191  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
192  "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t"
193  "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t"
194  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
195  "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t"
196  "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t"
197  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
198  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
199  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
200  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
201  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
202  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
203  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
204  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
205  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
206  /* Get raw data */
208  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
209  %[ftmp6], %[tmp0])
210  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
211  %[ftmp6], %[tmp0])
212  "packsswh %[srcl], %[srcl], %[srch] \n\t"
213  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
214  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
215  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
216  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
217  /* Loop count */
218  "bnez %[width], 1b \n\t"
219  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
220  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
221  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
222  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
223  "bnez %[height], 1b \n\t"
224  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
225  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
226  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
227  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
228  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
229  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
230  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
231  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
232  [src]"+&r"(src), [dst]"+&r"(dst),
233  [width]"+&r"(w), [height]"+&r"(h),
234  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
235  : [filter]"r"(filter_y),
236  [src_stride]"r"((mips_reg)src_stride),
237  [dst_stride]"r"((mips_reg)dst_stride),
238  [addr]"r"((mips_reg)addr)
239  : "memory"
240  );
241 }
242 
243 static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
244  uint8_t *dst, int32_t dst_stride,
245  const uint16_t *filter_x, int32_t w,
246  int32_t h)
247 {
248  double ftmp[15];
249  uint32_t tmp[2];
250  src -= 3;
251  src_stride -= w;
252  dst_stride -= w;
253 
254  __asm__ volatile (
255  "move %[tmp1], %[width] \n\t"
256  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
257  "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
258  "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
259  "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
260  "gsldrc1 %[filter2], 0x08(%[filter]) \n\t"
261  "li %[tmp0], 0x07 \n\t"
262  "dmtc1 %[tmp0], %[ftmp13] \n\t"
263  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
264  "1: \n\t"
265  /* Get 8 data per row */
266  "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
267  "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
268  "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t"
269  "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t"
270  "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t"
271  "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t"
272  "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t"
273  "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t"
274  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
275  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
276  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
277  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
278  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
279  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
280  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
281  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
282  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
283  /* Get raw data */
285  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
286  %[ftmp6], %[tmp0])
287  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
288  %[ftmp6], %[tmp0])
289  "packsswh %[srcl], %[srcl], %[srch] \n\t"
290  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
291  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
292  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
293  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
294  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
295  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
296  "li %[tmp0], 0x10001 \n\t"
297  "dmtc1 %[tmp0], %[ftmp5] \n\t"
298  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
299  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
300  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
301  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
302  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
303  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
304  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
305  /* Loop count */
306  "bnez %[width], 1b \n\t"
307  "move %[width], %[tmp1] \n\t"
308  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
309  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
310  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
311  "bnez %[height], 1b \n\t"
312  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
313  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
314  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
315  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
316  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
317  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
318  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
319  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
320  [src]"+&r"(src), [width]"+&r"(w),
321  [dst]"+&r"(dst), [height]"+&r"(h),
322  [ftmp13]"=&f"(ftmp[14])
323  : [filter]"r"(filter_x),
324  [src_stride]"r"((mips_reg)src_stride),
325  [dst_stride]"r"((mips_reg)dst_stride)
326  : "memory"
327  );
328 }
329 
330 static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
331  uint8_t *dst, int32_t dst_stride,
332  const int16_t *filter_y, int32_t w,
333  int32_t h)
334 {
335  double ftmp[17];
336  uint32_t tmp[1];
337  ptrdiff_t addr = src_stride;
338  src_stride -= w;
339  dst_stride -= w;
340 
341  __asm__ volatile (
342  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
343  "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t"
344  "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t"
345  "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t"
346  "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t"
347  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
348  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
349  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
350  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
351  "li %[tmp0], 0x07 \n\t"
352  "dmtc1 %[tmp0], %[ftmp13] \n\t"
353  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
354  "1: \n\t"
355  /* Get 8 data per column */
356  "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t"
357  "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t"
358  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
359  "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t"
360  "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t"
361  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
362  "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t"
363  "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t"
364  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
365  "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t"
366  "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t"
367  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
368  "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t"
369  "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t"
370  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
371  "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t"
372  "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t"
373  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
374  "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t"
375  "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t"
376  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
377  "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t"
378  "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t"
379  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
380  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
381  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
382  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
383  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
384  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
385  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
386  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
387  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
388  /* Get raw data */
390  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
391  %[ftmp6], %[tmp0])
392  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
393  %[ftmp6], %[tmp0])
394  "packsswh %[srcl], %[srcl], %[srch] \n\t"
395  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
396  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
397  "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
398  "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
399  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
400  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
401  "li %[tmp0], 0x10001 \n\t"
402  "dmtc1 %[tmp0], %[ftmp5] \n\t"
403  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
404  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
405  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
406  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
407  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
408  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
409  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
410  /* Loop count */
411  "bnez %[width], 1b \n\t"
412  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
413  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
414  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
415  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
416  "bnez %[height], 1b \n\t"
417  : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
418  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
419  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
420  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
421  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
422  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
423  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
424  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
425  [src]"+&r"(src), [dst]"+&r"(dst),
426  [width]"+&r"(w), [height]"+&r"(h),
427  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
428  : [filter]"r"(filter_y),
429  [src_stride]"r"((mips_reg)src_stride),
430  [dst_stride]"r"((mips_reg)dst_stride),
431  [addr]"r"((mips_reg)addr)
432  : "memory"
433  );
434 }
435 
436 static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
437  uint8_t *dst, int32_t dst_stride,
438  int32_t w, int32_t h)
439 {
440  double ftmp[4];
441  uint32_t tmp[2];
442  src_stride -= w;
443  dst_stride -= w;
444 
445  __asm__ volatile (
446  "move %[tmp1], %[width] \n\t"
447  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
448  "li %[tmp0], 0x10001 \n\t"
449  "dmtc1 %[tmp0], %[ftmp3] \n\t"
450  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
451  "1: \n\t"
452  "gslwlc1 %[ftmp1], 0x07(%[src]) \n\t"
453  "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t"
454  "gslwlc1 %[ftmp2], 0x07(%[dst]) \n\t"
455  "gslwrc1 %[ftmp2], 0x00(%[dst]) \n\t"
456  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
457  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
458  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
459  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
460  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
461  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
462  "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
463  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
464  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
465  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
466  "bnez %[width], 1b \n\t"
467  "move %[width], %[tmp1] \n\t"
468  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
469  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
470  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
471  "bnez %[height], 1b \n\t"
472  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
473  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
474  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
475  [src]"+&r"(src), [dst]"+&r"(dst),
476  [width]"+&r"(w), [height]"+&r"(h)
477  : [src_stride]"r"((mips_reg)src_stride),
478  [dst_stride]"r"((mips_reg)dst_stride)
479  : "memory"
480  );
481 }
482 
483 static const int16_t vp9_subpel_filters_mmi[3][15][8] = {
484  [FILTER_8TAP_REGULAR] = {
485  {0, 1, -5, 126, 8, -3, 1, 0},
486  {-1, 3, -10, 122, 18, -6, 2, 0},
487  {-1, 4, -13, 118, 27, -9, 3, -1},
488  {-1, 4, -16, 112, 37, -11, 4, -1},
489  {-1, 5, -18, 105, 48, -14, 4, -1},
490  {-1, 5, -19, 97, 58, -16, 5, -1},
491  {-1, 6, -19, 88, 68, -18, 5, -1},
492  {-1, 6, -19, 78, 78, -19, 6, -1},
493  {-1, 5, -18, 68, 88, -19, 6, -1},
494  {-1, 5, -16, 58, 97, -19, 5, -1},
495  {-1, 4, -14, 48, 105, -18, 5, -1},
496  {-1, 4, -11, 37, 112, -16, 4, -1},
497  {-1, 3, -9, 27, 118, -13, 4, -1},
498  {0, 2, -6, 18, 122, -10, 3, -1},
499  {0, 1, -3, 8, 126, -5, 1, 0},
500  }, [FILTER_8TAP_SHARP] = {
501  {-1, 3, -7, 127, 8, -3, 1, 0},
502  {-2, 5, -13, 125, 17, -6, 3, -1},
503  {-3, 7, -17, 121, 27, -10, 5, -2},
504  {-4, 9, -20, 115, 37, -13, 6, -2},
505  {-4, 10, -23, 108, 48, -16, 8, -3},
506  {-4, 10, -24, 100, 59, -19, 9, -3},
507  {-4, 11, -24, 90, 70, -21, 10, -4},
508  {-4, 11, -23, 80, 80, -23, 11, -4},
509  {-4, 10, -21, 70, 90, -24, 11, -4},
510  {-3, 9, -19, 59, 100, -24, 10, -4},
511  {-3, 8, -16, 48, 108, -23, 10, -4},
512  {-2, 6, -13, 37, 115, -20, 9, -4},
513  {-2, 5, -10, 27, 121, -17, 7, -3},
514  {-1, 3, -6, 17, 125, -13, 5, -2},
515  {0, 1, -3, 8, 127, -7, 3, -1},
516  }, [FILTER_8TAP_SMOOTH] = {
517  {-3, -1, 32, 64, 38, 1, -3, 0},
518  {-2, -2, 29, 63, 41, 2, -3, 0},
519  {-2, -2, 26, 63, 43, 4, -4, 0},
520  {-2, -3, 24, 62, 46, 5, -4, 0},
521  {-2, -3, 21, 60, 49, 7, -4, 0},
522  {-1, -4, 18, 59, 51, 9, -4, 0},
523  {-1, -4, 16, 57, 53, 12, -4, -1},
524  {-1, -4, 14, 55, 55, 14, -4, -1},
525  {-1, -4, 12, 53, 57, 16, -4, -1},
526  {0, -4, 9, 51, 59, 18, -4, -1},
527  {0, -4, 7, 49, 60, 21, -3, -2},
528  {0, -4, 5, 46, 62, 24, -3, -2},
529  {0, -4, 4, 43, 63, 26, -2, -2},
530  {0, -3, 2, 41, 63, 29, -2, -2},
531  {0, -3, 1, 38, 64, 32, -1, -3},
532  }
533 };
534 
535 #define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \
536 void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
537  const uint8_t *src, \
538  ptrdiff_t srcstride, \
539  int h, int mx, int my) \
540 { \
541  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
542  \
543  convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
544 } \
545  \
546 void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
547  const uint8_t *src, \
548  ptrdiff_t srcstride, \
549  int h, int mx, int my) \
550 { \
551  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
552  \
553  src -= (3 * srcstride); \
554  convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
555 } \
556  \
557 void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
558  const uint8_t *src, \
559  ptrdiff_t srcstride, \
560  int h, int mx, int my) \
561 { \
562  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
563  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
564  \
565  int tmp_h = h + 7; \
566  uint8_t temp[64 * 71]; \
567  src -= (3 * srcstride); \
568  convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \
569  convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \
570 } \
571  \
572 void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
573  const uint8_t *src, \
574  ptrdiff_t srcstride, \
575  int h, int mx, int my) \
576 { \
577  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
578  \
579  convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
580 } \
581  \
582 void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
583  const uint8_t *src, \
584  ptrdiff_t srcstride, \
585  int h, int mx, int my) \
586 { \
587  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
588  \
589  src -= (3 * srcstride); \
590  convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
591 } \
592  \
593 void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
594  const uint8_t *src, \
595  ptrdiff_t srcstride, \
596  int h, int mx, int my) \
597 { \
598  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
599  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
600  \
601  uint8_t temp1[64 * 64]; \
602  uint8_t temp2[64 * 71]; \
603  int tmp_h = h + 7; \
604  src -= (3 * srcstride); \
605  convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \
606  convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \
607  convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \
608 }
609 
615 
621 
627 
628 #undef VP9_8TAP_MIPS_MMI_FUNC
convolve_avg_vert_mmi
static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:330
filter1
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:358
convolve_horiz_mmi
static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:73
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
w
uint8_t w
Definition: llviddspenc.c:38
convolve_vert_mmi
static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:148
GET_DATA_H_MMI
#define GET_DATA_H_MMI
Definition: vp9_mc_mmi.c:25
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
convolve_avg_horiz_mmi
static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:243
mips_reg
#define mips_reg
Definition: asmdefs.h:44
vp9_subpel_filters_mmi
static const int16_t vp9_subpel_filters_mmi[3][15][8]
Definition: vp9_mc_mmi.c:483
mmiutils.h
FILTER_8TAP_SHARP
@ FILTER_8TAP_SHARP
Definition: vp9.h:67
width
#define width
vp9dsp_mips.h
int32_t
int32_t
Definition: audio_convert.c:194
src
#define src
Definition: vp8dsp.c:254
vp9dsp.h
ROUND_POWER_OF_TWO_MMI
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)
brief: (((value) + (1 << ((n) - 1))) >> (n)) fr_i0: src & dst fr_i1: Operand number fr_t0,...
Definition: mmiutils.h:355
FILTER_8TAP_REGULAR
@ FILTER_8TAP_REGULAR
Definition: vp9.h:66
GET_DATA_V_MMI
#define GET_DATA_V_MMI
Definition: vp9_mc_mmi.c:49
height
#define height
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:50
FILTER_8TAP_SMOOTH
@ FILTER_8TAP_SMOOTH
Definition: vp9.h:65
uint8_t
uint8_t
Definition: audio_convert.c:194
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifoBuffer *values)
Definition: vf_deshake_opencl.c:903
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:47
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:48
convolve_avg_mmi
static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:436
VP9_8TAP_MIPS_MMI_FUNC
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)
Definition: vp9_mc_mmi.c:535
h
h
Definition: vp9dsp_template.c:2038