FFmpeg
vp9_mc_mmi.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavcodec/vp9dsp.h"
23 #include "vp9dsp_mips.h"
24 
25 #define GET_DATA_H_MMI \
26  "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \
27  "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \
28  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
29  "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \
30  "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
31  "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \
32  "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \
33  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
34  "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \
35  "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \
36  "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \
37  "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \
38  "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \
39  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
40  "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \
41  "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
42  "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \
43  "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \
44  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
45  "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \
46  "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \
47  "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t"
48 
49 #define GET_DATA_V_MMI \
50  "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \
51  "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \
52  "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
53  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
54  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
55  "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
56  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
57  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
58  "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
59  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
60  "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \
61  "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \
62  "pmaddhw %[srch], %[srch], %[filter10] \n\t" \
63  "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \
64  "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \
65  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
66  "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \
67  "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \
68  "paddw %[srch], %[srch], %[ftmp12] \n\t" \
69  "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \
70  "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \
71  "paddw %[srch], %[srch], %[ftmp12] \n\t"
72 
73 static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride,
74  uint8_t *dst, int32_t dst_stride,
75  const uint16_t *filter_x, int32_t w,
76  int32_t h)
77 {
78  double ftmp[15];
79  uint32_t tmp[2];
81  src -= 3;
82  src_stride -= w;
83  dst_stride -= w;
84  __asm__ volatile (
85  "move %[tmp1], %[width] \n\t"
86  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
87  MMI_ULDC1(%[filter1], %[filter], 0x00)
88  MMI_ULDC1(%[filter2], %[filter], 0x08)
89  "li %[tmp0], 0x07 \n\t"
90  "dmtc1 %[tmp0], %[ftmp13] \n\t"
91  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
92  "1: \n\t"
93  /* Get 8 data per row */
94  MMI_ULDC1(%[ftmp5], %[src], 0x00)
95  MMI_ULDC1(%[ftmp7], %[src], 0x01)
96  MMI_ULDC1(%[ftmp9], %[src], 0x02)
97  MMI_ULDC1(%[ftmp11], %[src], 0x03)
98  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
99  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
100  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
101  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
102  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
103  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
104  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
105  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
106  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
107  /* Get raw data */
109  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
110  %[ftmp6], %[tmp0])
111  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
112  %[ftmp6], %[tmp0])
113  "packsswh %[srcl], %[srcl], %[srch] \n\t"
114  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
115  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
116  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
117  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
118  /* Loop count */
119  "bnez %[width], 1b \n\t"
120  "move %[width], %[tmp1] \n\t"
121  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
122  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
123  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
124  "bnez %[height], 1b \n\t"
126  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
127  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
128  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
129  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
130  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
131  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
132  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
133  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
134  [src]"+&r"(src), [width]"+&r"(w),
135  [dst]"+&r"(dst), [height]"+&r"(h),
136  [ftmp13]"=&f"(ftmp[14])
137  : [filter]"r"(filter_x),
138  [src_stride]"r"((mips_reg)src_stride),
139  [dst_stride]"r"((mips_reg)dst_stride)
140  : "memory"
141  );
142 }
143 
144 static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride,
145  uint8_t *dst, int32_t dst_stride,
146  const int16_t *filter_y, int32_t w,
147  int32_t h)
148 {
149  double ftmp[17];
150  uint32_t tmp[1];
151  ptrdiff_t addr = src_stride;
153  src_stride -= w;
154  dst_stride -= w;
155 
156  __asm__ volatile (
157  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
158  MMI_ULDC1(%[ftmp4], %[filter], 0x00)
159  MMI_ULDC1(%[ftmp5], %[filter], 0x08)
160  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
161  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
162  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
163  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
164  "li %[tmp0], 0x07 \n\t"
165  "dmtc1 %[tmp0], %[ftmp13] \n\t"
166  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
167  "1: \n\t"
168  /* Get 8 data per column */
169  MMI_ULDC1(%[ftmp4], %[src], 0x0)
170  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
171  MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
172  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
173  MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
174  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
175  MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
176  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
177  MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
178  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
179  MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
180  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
181  MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
182  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
183  MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
184  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
185  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
186  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
187  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
188  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
189  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
190  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
191  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
192  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
193  /* Get raw data */
195  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
196  %[ftmp6], %[tmp0])
197  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
198  %[ftmp6], %[tmp0])
199  "packsswh %[srcl], %[srcl], %[srch] \n\t"
200  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
201  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
202  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
203  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
204  /* Loop count */
205  "bnez %[width], 1b \n\t"
206  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
207  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
208  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
209  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
210  "bnez %[height], 1b \n\t"
212  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
213  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
214  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
215  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
216  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
217  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
218  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
219  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
220  [src]"+&r"(src), [dst]"+&r"(dst),
221  [width]"+&r"(w), [height]"+&r"(h),
222  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
223  : [filter]"r"(filter_y),
224  [src_stride]"r"((mips_reg)src_stride),
225  [dst_stride]"r"((mips_reg)dst_stride),
226  [addr]"r"((mips_reg)addr)
227  : "memory"
228  );
229 }
230 
231 static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride,
232  uint8_t *dst, int32_t dst_stride,
233  const uint16_t *filter_x, int32_t w,
234  int32_t h)
235 {
236  double ftmp[15];
237  uint32_t tmp[2];
239  src -= 3;
240  src_stride -= w;
241  dst_stride -= w;
242 
243  __asm__ volatile (
244  "move %[tmp1], %[width] \n\t"
245  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
246  MMI_ULDC1(%[filter1], %[filter], 0x00)
247  MMI_ULDC1(%[filter2], %[filter], 0x08)
248  "li %[tmp0], 0x07 \n\t"
249  "dmtc1 %[tmp0], %[ftmp13] \n\t"
250  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
251  "1: \n\t"
252  /* Get 8 data per row */
253  MMI_ULDC1(%[ftmp5], %[src], 0x00)
254  MMI_ULDC1(%[ftmp7], %[src], 0x01)
255  MMI_ULDC1(%[ftmp9], %[src], 0x02)
256  MMI_ULDC1(%[ftmp11], %[src], 0x03)
257  "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
258  "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
259  "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
260  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
261  "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
262  "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
263  "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
264  "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
265  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
266  /* Get raw data */
268  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
269  %[ftmp6], %[tmp0])
270  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
271  %[ftmp6], %[tmp0])
272  "packsswh %[srcl], %[srcl], %[srch] \n\t"
273  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
274  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
275  MMI_ULDC1(%[ftmp4], %[dst], 0x0)
276  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
277  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
278  "li %[tmp0], 0x10001 \n\t"
279  "dmtc1 %[tmp0], %[ftmp5] \n\t"
280  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
281  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
282  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
283  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
284  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
285  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
286  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
287  /* Loop count */
288  "bnez %[width], 1b \n\t"
289  "move %[width], %[tmp1] \n\t"
290  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
291  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
292  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
293  "bnez %[height], 1b \n\t"
295  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
296  [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
297  [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
298  [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
299  [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
300  [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
301  [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
302  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
303  [src]"+&r"(src), [width]"+&r"(w),
304  [dst]"+&r"(dst), [height]"+&r"(h),
305  [ftmp13]"=&f"(ftmp[14])
306  : [filter]"r"(filter_x),
307  [src_stride]"r"((mips_reg)src_stride),
308  [dst_stride]"r"((mips_reg)dst_stride)
309  : "memory"
310  );
311 }
312 
313 static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride,
314  uint8_t *dst, int32_t dst_stride,
315  const int16_t *filter_y, int32_t w,
316  int32_t h)
317 {
318  double ftmp[17];
319  uint32_t tmp[1];
320  ptrdiff_t addr = src_stride;
322  src_stride -= w;
323  dst_stride -= w;
324 
325  __asm__ volatile (
326  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
327  MMI_ULDC1(%[ftmp4], %[filter], 0x00)
328  MMI_ULDC1(%[ftmp5], %[filter], 0x08)
329  "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t"
330  "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t"
331  "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t"
332  "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t"
333  "li %[tmp0], 0x07 \n\t"
334  "dmtc1 %[tmp0], %[ftmp13] \n\t"
335  "punpcklwd %[ftmp13], %[ftmp13], %[ftmp13] \n\t"
336  "1: \n\t"
337  /* Get 8 data per column */
338  MMI_ULDC1(%[ftmp4], %[src], 0x0)
339  PTR_ADDU "%[tmp0], %[src], %[addr] \n\t"
340  MMI_ULDC1(%[ftmp5], %[tmp0], 0x0)
341  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
342  MMI_ULDC1(%[ftmp6], %[tmp0], 0x0)
343  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
344  MMI_ULDC1(%[ftmp7], %[tmp0], 0x0)
345  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
346  MMI_ULDC1(%[ftmp8], %[tmp0], 0x0)
347  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
348  MMI_ULDC1(%[ftmp9], %[tmp0], 0x0)
349  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
350  MMI_ULDC1(%[ftmp10], %[tmp0], 0x0)
351  PTR_ADDU "%[tmp0], %[tmp0], %[addr] \n\t"
352  MMI_ULDC1(%[ftmp11], %[tmp0], 0x0)
353  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
354  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
355  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
356  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
357  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
358  "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
359  "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t"
360  "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
361  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
362  /* Get raw data */
364  ROUND_POWER_OF_TWO_MMI(%[srcl], %[ftmp13], %[ftmp5],
365  %[ftmp6], %[tmp0])
366  ROUND_POWER_OF_TWO_MMI(%[srch], %[ftmp13], %[ftmp5],
367  %[ftmp6], %[tmp0])
368  "packsswh %[srcl], %[srcl], %[srch] \n\t"
369  "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t"
370  "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
371  MMI_ULDC1(%[ftmp4], %[dst], 0x00)
372  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
373  "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
374  "li %[tmp0], 0x10001 \n\t"
375  "dmtc1 %[tmp0], %[ftmp5] \n\t"
376  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
377  "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
378  "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
379  "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
380  "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
381  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
382  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
383  /* Loop count */
384  "bnez %[width], 1b \n\t"
385  PTR_SUBU "%[width], %[addr], %[src_stride] \n\t"
386  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
387  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
388  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
389  "bnez %[height], 1b \n\t"
391  [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
392  [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]),
393  [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]),
394  [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]),
395  [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]),
396  [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]),
397  [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]),
398  [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]),
399  [src]"+&r"(src), [dst]"+&r"(dst),
400  [width]"+&r"(w), [height]"+&r"(h),
401  [tmp0]"=&r"(tmp[0]), [ftmp13]"=&f"(ftmp[16])
402  : [filter]"r"(filter_y),
403  [src_stride]"r"((mips_reg)src_stride),
404  [dst_stride]"r"((mips_reg)dst_stride),
405  [addr]"r"((mips_reg)addr)
406  : "memory"
407  );
408 }
409 
410 static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride,
411  uint8_t *dst, int32_t dst_stride,
412  int32_t w, int32_t h)
413 {
414  double ftmp[4];
415  uint32_t tmp[2];
417  src_stride -= w;
418  dst_stride -= w;
419 
420  __asm__ volatile (
421  "move %[tmp1], %[width] \n\t"
422  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
423  "li %[tmp0], 0x10001 \n\t"
424  "dmtc1 %[tmp0], %[ftmp3] \n\t"
425  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
426  "1: \n\t"
427  MMI_ULDC1(%[ftmp1], %[src], 0x00)
428  MMI_ULDC1(%[ftmp2], %[dst], 0x00)
429  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
430  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
431  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
432  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
433  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
434  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
435  "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
436  PTR_ADDIU "%[width], %[width], -0x04 \n\t"
437  PTR_ADDIU "%[dst], %[dst], 0x04 \n\t"
438  PTR_ADDIU "%[src], %[src], 0x04 \n\t"
439  "bnez %[width], 1b \n\t"
440  "move %[width], %[tmp1] \n\t"
441  PTR_ADDU "%[dst], %[dst], %[dst_stride] \n\t"
442  PTR_ADDU "%[src], %[src], %[src_stride] \n\t"
443  PTR_ADDIU "%[height], %[height], -0x01 \n\t"
444  "bnez %[height], 1b \n\t"
446  [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
447  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
448  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
449  [src]"+&r"(src), [dst]"+&r"(dst),
450  [width]"+&r"(w), [height]"+&r"(h)
451  : [src_stride]"r"((mips_reg)src_stride),
452  [dst_stride]"r"((mips_reg)dst_stride)
453  : "memory"
454  );
455 }
456 
457 static const int16_t vp9_subpel_filters_mmi[3][15][8] = {
458  [FILTER_8TAP_REGULAR] = {
459  {0, 1, -5, 126, 8, -3, 1, 0},
460  {-1, 3, -10, 122, 18, -6, 2, 0},
461  {-1, 4, -13, 118, 27, -9, 3, -1},
462  {-1, 4, -16, 112, 37, -11, 4, -1},
463  {-1, 5, -18, 105, 48, -14, 4, -1},
464  {-1, 5, -19, 97, 58, -16, 5, -1},
465  {-1, 6, -19, 88, 68, -18, 5, -1},
466  {-1, 6, -19, 78, 78, -19, 6, -1},
467  {-1, 5, -18, 68, 88, -19, 6, -1},
468  {-1, 5, -16, 58, 97, -19, 5, -1},
469  {-1, 4, -14, 48, 105, -18, 5, -1},
470  {-1, 4, -11, 37, 112, -16, 4, -1},
471  {-1, 3, -9, 27, 118, -13, 4, -1},
472  {0, 2, -6, 18, 122, -10, 3, -1},
473  {0, 1, -3, 8, 126, -5, 1, 0},
474  }, [FILTER_8TAP_SHARP] = {
475  {-1, 3, -7, 127, 8, -3, 1, 0},
476  {-2, 5, -13, 125, 17, -6, 3, -1},
477  {-3, 7, -17, 121, 27, -10, 5, -2},
478  {-4, 9, -20, 115, 37, -13, 6, -2},
479  {-4, 10, -23, 108, 48, -16, 8, -3},
480  {-4, 10, -24, 100, 59, -19, 9, -3},
481  {-4, 11, -24, 90, 70, -21, 10, -4},
482  {-4, 11, -23, 80, 80, -23, 11, -4},
483  {-4, 10, -21, 70, 90, -24, 11, -4},
484  {-3, 9, -19, 59, 100, -24, 10, -4},
485  {-3, 8, -16, 48, 108, -23, 10, -4},
486  {-2, 6, -13, 37, 115, -20, 9, -4},
487  {-2, 5, -10, 27, 121, -17, 7, -3},
488  {-1, 3, -6, 17, 125, -13, 5, -2},
489  {0, 1, -3, 8, 127, -7, 3, -1},
490  }, [FILTER_8TAP_SMOOTH] = {
491  {-3, -1, 32, 64, 38, 1, -3, 0},
492  {-2, -2, 29, 63, 41, 2, -3, 0},
493  {-2, -2, 26, 63, 43, 4, -4, 0},
494  {-2, -3, 24, 62, 46, 5, -4, 0},
495  {-2, -3, 21, 60, 49, 7, -4, 0},
496  {-1, -4, 18, 59, 51, 9, -4, 0},
497  {-1, -4, 16, 57, 53, 12, -4, -1},
498  {-1, -4, 14, 55, 55, 14, -4, -1},
499  {-1, -4, 12, 53, 57, 16, -4, -1},
500  {0, -4, 9, 51, 59, 18, -4, -1},
501  {0, -4, 7, 49, 60, 21, -3, -2},
502  {0, -4, 5, 46, 62, 24, -3, -2},
503  {0, -4, 4, 43, 63, 26, -2, -2},
504  {0, -3, 2, 41, 63, 29, -2, -2},
505  {0, -3, 1, 38, 64, 32, -1, -3},
506  }
507 };
508 
509 #define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX) \
510 void ff_put_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
511  const uint8_t *src, \
512  ptrdiff_t srcstride, \
513  int h, int mx, int my) \
514 { \
515  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
516  \
517  convolve_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
518 } \
519  \
520 void ff_put_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
521  const uint8_t *src, \
522  ptrdiff_t srcstride, \
523  int h, int mx, int my) \
524 { \
525  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
526  \
527  src -= (3 * srcstride); \
528  convolve_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
529 } \
530  \
531 void ff_put_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
532  const uint8_t *src, \
533  ptrdiff_t srcstride, \
534  int h, int mx, int my) \
535 { \
536  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
537  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
538  \
539  int tmp_h = h + 7; \
540  uint8_t temp[64 * 71]; \
541  src -= (3 * srcstride); \
542  convolve_horiz_mmi(src, srcstride, temp, 64, hfilter, SIZE, tmp_h); \
543  convolve_vert_mmi(temp, 64, dst, dststride, vfilter, SIZE, h); \
544 } \
545  \
546 void ff_avg_8tap_##TYPE##_##SIZE##h_mmi(uint8_t *dst, ptrdiff_t dststride, \
547  const uint8_t *src, \
548  ptrdiff_t srcstride, \
549  int h, int mx, int my) \
550 { \
551  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
552  \
553  convolve_avg_horiz_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
554 } \
555  \
556 void ff_avg_8tap_##TYPE##_##SIZE##v_mmi(uint8_t *dst, ptrdiff_t dststride, \
557  const uint8_t *src, \
558  ptrdiff_t srcstride, \
559  int h, int mx, int my) \
560 { \
561  const int16_t *filter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
562  \
563  src -= (3 * srcstride); \
564  convolve_avg_vert_mmi(src, srcstride, dst, dststride, filter, SIZE, h); \
565 } \
566  \
567 void ff_avg_8tap_##TYPE##_##SIZE##hv_mmi(uint8_t *dst, ptrdiff_t dststride, \
568  const uint8_t *src, \
569  ptrdiff_t srcstride, \
570  int h, int mx, int my) \
571 { \
572  const uint16_t *hfilter = vp9_subpel_filters_mmi[TYPE_IDX][mx-1]; \
573  const uint16_t *vfilter = vp9_subpel_filters_mmi[TYPE_IDX][my-1]; \
574  \
575  uint8_t temp1[64 * 64]; \
576  uint8_t temp2[64 * 71]; \
577  int tmp_h = h + 7; \
578  src -= (3 * srcstride); \
579  convolve_horiz_mmi(src, srcstride, temp2, 64, hfilter, SIZE, tmp_h); \
580  convolve_vert_mmi(temp2, 64, temp1, 64, vfilter, SIZE, h); \
581  convolve_avg_mmi(temp1, 64, dst, dststride, SIZE, h); \
582 }
583 
589 
595 
601 
602 #undef VP9_8TAP_MIPS_MMI_FUNC
convolve_avg_vert_mmi
static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:313
filter1
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
Definition: dcadsp.c:360
convolve_horiz_mmi
static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:73
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
convolve_vert_mmi
static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:144
GET_DATA_H_MMI
#define GET_DATA_H_MMI
Definition: vp9_mc_mmi.c:25
convolve_avg_horiz_mmi
static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const uint16_t *filter_x, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:231
mips_reg
#define mips_reg
Definition: asmdefs.h:46
vp9_subpel_filters_mmi
static const int16_t vp9_subpel_filters_mmi[3][15][8]
Definition: vp9_mc_mmi.c:457
mmiutils.h
FILTER_8TAP_SHARP
@ FILTER_8TAP_SHARP
Definition: vp9.h:67
width
#define width
vp9dsp_mips.h
vp9dsp.h
ROUND_POWER_OF_TWO_MMI
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)
brief: (((value) + (1 << ((n) - 1))) >> (n)) fr_i0: src & dst fr_i1: Operand number fr_t0,...
Definition: mmiutils.h:383
FILTER_8TAP_REGULAR
@ FILTER_8TAP_REGULAR
Definition: vp9.h:66
GET_DATA_V_MMI
#define GET_DATA_V_MMI
Definition: vp9_mc_mmi.c:49
height
#define height
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
FILTER_8TAP_SMOOTH
@ FILTER_8TAP_SMOOTH
Definition: vp9.h:65
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
smooth
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)
Definition: vf_deshake_opencl.c:887
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
convolve_avg_mmi
static void convolve_avg_mmi(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t w, int32_t h)
Definition: vp9_mc_mmi.c:410
int32_t
int32_t
Definition: audioconvert.c:56
VP9_8TAP_MIPS_MMI_FUNC
#define VP9_8TAP_MIPS_MMI_FUNC(SIZE, TYPE, TYPE_IDX)
Definition: vp9_mc_mmi.c:509
h
h
Definition: vp9dsp_template.c:2070
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40