Go to the documentation of this file.
31 #define DECLARE_DOUBLE_1 double db_1
32 #define DECLARE_DOUBLE_2 double db_2
33 #define DECLARE_UINT32_T uint32_t it_1
34 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1)
35 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2)
36 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1)
38 #define MMI_PCMPGTUB(dst, src1, src2) \
39 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \
40 "pmaxub %[db_2], "#src1", "#src2" \n\t" \
41 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \
42 "pxor "#dst", %[db_2], %[db_1] \n\t"
44 #define MMI_BTOH(dst_l, dst_r, src) \
45 "pxor %[db_1], %[db_1], %[db_1] \n\t" \
46 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \
47 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
48 "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
50 #define MMI_VP8_LOOP_FILTER \
52 "dmtc1 %[thresh], %[ftmp3] \n\t" \
53 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
54 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
55 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
56 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \
57 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \
58 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \
59 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \
61 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \
62 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
63 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \
64 "li %[tmp0], 0x09 \n\t" \
65 "dmtc1 %[tmp0], %[ftmp3] \n\t" \
66 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \
67 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
68 "dmtc1 %[e], %[ftmp3] \n\t" \
69 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
70 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
71 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
72 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \
73 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \
74 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \
75 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \
76 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
77 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
78 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \
79 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \
80 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
81 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
82 "dmtc1 %[i], %[ftmp3] \n\t" \
83 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
84 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
85 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
86 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \
87 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
88 "pxor %[mask], %[mask], %[ftmp3] \n\t" \
90 "li %[tmp0], 0x80808080 \n\t" \
91 "dmtc1 %[tmp0], %[ftmp7] \n\t" \
92 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
93 "pxor %[p2], %[p2], %[ftmp7] \n\t" \
94 "pxor %[p1], %[p1], %[ftmp7] \n\t" \
95 "pxor %[p0], %[p0], %[ftmp7] \n\t" \
96 "pxor %[q0], %[q0], %[ftmp7] \n\t" \
97 "pxor %[q1], %[q1], %[ftmp7] \n\t" \
98 "pxor %[q2], %[q2], %[ftmp7] \n\t" \
99 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \
100 "psubb %[ftmp5], %[q0], %[p0] \n\t" \
101 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \
102 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \
104 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \
105 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \
106 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \
108 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \
109 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
110 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \
112 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \
113 "pand %[ftmp1], %[ftmp1], %[mask] \n\t" \
114 "pand %[ftmp2], %[ftmp1], %[hev] \n\t" \
115 "li %[tmp0], 0x04040404 \n\t" \
116 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
117 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
118 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \
119 "li %[tmp0], 0x0B \n\t" \
120 "dmtc1 %[tmp0], %[ftmp4] \n\t" \
121 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \
122 "li %[tmp0], 0x03030303 \n\t" \
123 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
124 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
125 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \
126 "li %[tmp0], 0x0B \n\t" \
127 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
128 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \
129 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \
130 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
132 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
133 "pxor %[hev], %[hev], %[ftmp0] \n\t" \
134 "pand %[ftmp1], %[ftmp1], %[hev] \n\t" \
135 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \
136 "li %[tmp0], 0x07 \n\t" \
137 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
138 "li %[tmp0], 0x001b001b \n\t" \
139 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
140 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
141 "li %[tmp0], 0x003f003f \n\t" \
142 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
143 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
145 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
146 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
147 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
149 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
150 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
151 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
153 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
154 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \
155 "pxor %[q0], %[q0], %[ftmp7] \n\t" \
156 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
157 "pxor %[p0], %[p0], %[ftmp7] \n\t" \
158 "li %[tmp0], 0x00120012 \n\t" \
159 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
160 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
162 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
163 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
164 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
166 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
167 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
168 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
170 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
171 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \
172 "pxor %[q1], %[q1], %[ftmp7] \n\t" \
173 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \
174 "pxor %[p1], %[p1], %[ftmp7] \n\t" \
175 "li %[tmp0], 0x03 \n\t" \
176 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
178 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
179 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \
180 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
181 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
183 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
184 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
185 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
186 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
188 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
189 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \
190 "pxor %[q2], %[q2], %[ftmp7] \n\t" \
191 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \
192 "pxor %[p2], %[p2], %[ftmp7] \n\t"
194 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
195 MMI_ULWC1(%[ftmp1], src, 0x00) \
196 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
197 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
199 MMI_ULWC1(%[ftmp1], src, -0x01) \
200 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
201 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
202 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
204 MMI_ULWC1(%[ftmp1], src, -0x02) \
205 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
206 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
207 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
209 MMI_ULWC1(%[ftmp1], src, 0x01) \
210 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
211 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
213 MMI_ULWC1(%[ftmp1], src, 0x02) \
214 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
215 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
216 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
218 MMI_ULWC1(%[ftmp1], src, 0x03) \
219 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
220 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
221 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
223 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
224 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
225 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
226 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
228 MMI_SWC1(%[ftmp1], dst, 0x00)
231 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
232 MMI_ULWC1(%[ftmp1], src, 0x00) \
233 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
234 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
236 MMI_ULWC1(%[ftmp1], src, -0x01) \
237 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
238 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
239 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
241 MMI_ULWC1(%[ftmp1], src, 0x01) \
242 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
243 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
245 MMI_ULWC1(%[ftmp1], src, 0x02) \
246 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
247 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
248 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
250 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
252 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
253 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
255 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
256 MMI_SWC1(%[ftmp1], dst, 0x00)
259 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
260 MMI_ULWC1(%[ftmp1], src, 0x00) \
261 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
262 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
264 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
265 MMI_ULWC1(%[ftmp1], src1, 0x00) \
266 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
267 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
268 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
270 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
271 MMI_ULWC1(%[ftmp1], src1, 0x00) \
272 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
273 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
274 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
276 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
277 MMI_ULWC1(%[ftmp1], src1, 0x00) \
278 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
279 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
281 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
282 MMI_ULWC1(%[ftmp1], src1, 0x00) \
283 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
284 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
285 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
287 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
288 MMI_ULWC1(%[ftmp1], src1, 0x00) \
289 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
290 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
291 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
293 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
295 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
296 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
297 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
299 MMI_SWC1(%[ftmp1], dst, 0x00)
302 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
303 MMI_ULWC1(%[ftmp1], src, 0x00) \
304 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
305 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
307 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
308 MMI_ULWC1(%[ftmp1], src1, 0x00) \
309 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
310 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
311 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
313 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
314 MMI_ULWC1(%[ftmp1], src1, 0x00) \
315 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
316 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
318 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
319 MMI_ULWC1(%[ftmp1], src1, 0x00) \
320 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
321 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
322 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
324 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
326 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
327 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
328 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
330 MMI_SWC1(%[ftmp1], dst, 0x00)
333 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
334 MMI_ULDC1(%[ftmp1], src, 0x00) \
335 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
336 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
337 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
338 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
340 MMI_ULDC1(%[ftmp1], src, -0x01) \
341 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
342 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
343 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
344 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
345 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
346 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
348 MMI_ULDC1(%[ftmp1], src, -0x02) \
349 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
350 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
351 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
352 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
353 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
354 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
356 MMI_ULDC1(%[ftmp1], src, 0x01) \
357 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
358 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
359 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
360 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
362 MMI_ULDC1(%[ftmp1], src, 0x02) \
363 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
364 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
365 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
366 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
367 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
368 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
370 MMI_ULDC1(%[ftmp1], src, 0x03) \
371 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
372 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
373 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
374 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
375 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
376 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
378 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
379 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
381 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
382 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
383 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
384 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
385 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
387 MMI_SDC1(%[ftmp1], dst, 0x00)
390 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
391 MMI_ULDC1(%[ftmp1], src, 0x00) \
392 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
393 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
394 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
395 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
397 MMI_ULDC1(%[ftmp1], src, -0x01) \
398 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
399 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
400 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
401 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
402 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
403 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
405 MMI_ULDC1(%[ftmp1], src, 0x01) \
406 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
407 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
408 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
409 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
411 MMI_ULDC1(%[ftmp1], src, 0x02) \
412 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
413 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
414 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
415 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
416 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
417 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
419 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
420 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
422 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
423 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
424 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
425 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
427 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
428 MMI_SDC1(%[ftmp1], dst, 0x00)
431 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
432 MMI_ULDC1(%[ftmp1], src, 0x00) \
433 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
434 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
435 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
436 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
438 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
439 MMI_ULDC1(%[ftmp1], src1, 0x00) \
440 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
441 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
442 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
443 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
444 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
445 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
447 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
448 MMI_ULDC1(%[ftmp1], src1, 0x00) \
449 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
450 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
451 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
452 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
453 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
454 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
456 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
457 MMI_ULDC1(%[ftmp1], src1, 0x00) \
458 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
459 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
460 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
461 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
463 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
464 MMI_ULDC1(%[ftmp1], src1, 0x00) \
465 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
466 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
467 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
468 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
469 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
470 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
472 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
473 MMI_ULDC1(%[ftmp1], src1, 0x00) \
474 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
475 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
476 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
477 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
478 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
479 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
481 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
482 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
484 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
485 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
486 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
487 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
488 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
490 MMI_SDC1(%[ftmp1], dst, 0x00)
493 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
494 MMI_ULDC1(%[ftmp1], src, 0x00) \
495 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
496 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
497 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
498 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
500 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
501 MMI_ULDC1(%[ftmp1], src1, 0x00) \
502 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
503 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
504 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
505 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
506 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
507 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
509 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
510 MMI_ULDC1(%[ftmp1], src1, 0x00) \
511 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
512 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
513 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
514 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
516 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
517 MMI_ULDC1(%[ftmp1], src1, 0x00) \
518 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
519 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
520 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
521 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
522 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
523 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
525 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
526 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
528 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
529 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
530 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
531 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
532 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
534 MMI_SDC1(%[ftmp1], dst, 0x00)
537 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
538 MMI_ULDC1(%[ftmp1], src, 0x00) \
539 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
540 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
541 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \
542 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \
544 MMI_ULDC1(%[ftmp1], src, 0x01) \
545 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
546 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
547 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
548 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \
549 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
550 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
552 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
553 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
554 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
555 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
557 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
558 MMI_SDC1(%[ftmp1], dst, 0x00)
561 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
562 MMI_ULWC1(%[ftmp1], src, 0x00) \
563 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
564 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \
566 MMI_ULWC1(%[ftmp1], src, 0x01) \
567 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
568 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
569 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
571 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
572 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
574 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
575 MMI_SWC1(%[ftmp1], dst, 0x00)
578 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
579 MMI_ULDC1(%[ftmp1], src, 0x00) \
580 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
581 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
582 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \
583 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \
585 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
586 MMI_ULDC1(%[ftmp1], src1, 0x00) \
587 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
588 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
589 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
590 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \
591 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
592 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
594 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
595 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
596 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
597 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
599 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
600 MMI_SDC1(%[ftmp1], dst, 0x00)
603 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
604 MMI_ULWC1(%[ftmp1], src, 0x00) \
605 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
606 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \
608 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
609 MMI_ULWC1(%[ftmp1], src1, 0x00) \
610 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
611 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
612 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
614 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
615 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
617 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
618 MMI_SWC1(%[ftmp1], dst, 0x00)
622 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
623 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
625 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
626 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
628 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
629 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
631 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
632 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
634 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
635 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
637 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
638 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
640 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
641 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
645 #define FILTER_6TAP(src, F, stride) \
646 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
647 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
648 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
650 #define FILTER_4TAP(src, F, stride) \
651 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
652 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
655 { 0, 6, 123, 12, 1, 0 },
656 { 2, 11, 108, 36, 8, 1 },
657 { 0, 9, 93, 50, 6, 0 },
658 { 3, 16, 77, 77, 16, 3 },
659 { 0, 6, 50, 93, 9, 0 },
660 { 1, 8, 36, 108, 11, 2 },
661 { 0, 1, 12, 123, 6, 0 },
664 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
665 #define MUL_35468(a) (((a) * 35468) >> 16)
668 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
685 f1 =
FFMIN(
a + 4, 127) >> 3;
686 f2 =
FFMIN(
a + 3, 127) >> 3;
709 f1 =
FFMIN(
a + 4, 127) >> 3;
710 f2 =
FFMIN(
a + 3, 127) >> 3;
757 a0 = (27 *
w + 63) >> 7;
758 a1 = (18 *
w + 63) >> 7;
759 a2 = (9 *
w + 63) >> 7;
788 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
799 MMI_ULDC1(%[
q0], %[
dst], 0x0)
800 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
801 MMI_ULDC1(%[p0], %[tmp0], 0x0)
802 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
803 MMI_ULDC1(%[p1], %[tmp0], 0x0)
804 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
805 MMI_ULDC1(%[p2], %[tmp0], 0x0)
806 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
807 MMI_ULDC1(%[p3], %[tmp0], 0x0)
808 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
809 MMI_ULDC1(%[
q1], %[tmp0], 0x0)
810 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
811 MMI_ULDC1(%[q2], %[tmp0], 0x0)
812 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
813 MMI_ULDC1(%[q3], %[tmp0], 0x0)
816 MMI_USDC1(%[
q0], %[
dst], 0x0)
817 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
818 MMI_USDC1(%[p0], %[tmp0], 0x0)
819 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
820 MMI_USDC1(%[p1], %[tmp0], 0x0)
821 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
822 MMI_USDC1(%[p2], %[tmp0], 0x0)
823 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
824 MMI_USDC1(%[
q1], %[tmp0], 0x0)
825 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
826 MMI_USDC1(%[q2], %[tmp0], 0x0)
828 [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
829 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
830 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
831 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
832 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
833 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
834 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
835 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
836 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
847 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
851 for (
i = 0;
i < 8;
i++)
862 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
873 MMI_ULDC1(%[p3], %[
dst], -0x04)
874 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
875 MMI_ULDC1(%[p2], %[tmp0], -0x04)
876 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
877 MMI_ULDC1(%[p1], %[tmp0], -0x04)
878 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
879 MMI_ULDC1(%[p0], %[tmp0], -0x04)
880 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
881 MMI_ULDC1(%[
q0], %[tmp0], -0x04)
882 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
883 MMI_ULDC1(%[
q1], %[tmp0], -0x04)
884 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
885 MMI_ULDC1(%[q2], %[tmp0], -0x04)
886 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
887 MMI_ULDC1(%[q3], %[tmp0], -0x04)
890 %[
q0], %[
q1], %[q2], %[q3],
891 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
895 %[
q0], %[
q1], %[q2], %[q3],
896 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
898 MMI_USDC1(%[p3], %[
dst], -0x04)
899 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
900 MMI_USDC1(%[p2], %[
dst], -0x04)
901 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
902 MMI_USDC1(%[p1], %[
dst], -0x04)
903 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
904 MMI_USDC1(%[p0], %[
dst], -0x04)
905 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
906 MMI_USDC1(%[
q0], %[
dst], -0x04)
907 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
908 MMI_USDC1(%[
q1], %[
dst], -0x04)
909 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
910 MMI_USDC1(%[q2], %[
dst], -0x04)
911 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
912 MMI_USDC1(%[q3], %[
dst], -0x04)
914 [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
915 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
916 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
917 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
918 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
919 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
920 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
921 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
922 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
933 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
937 for (
i = 0;
i < 8;
i++)
954 MMI_LDC1(%[ftmp0], %[
dc], 0x00)
955 MMI_LDC1(%[ftmp1], %[
dc], 0x08)
956 MMI_LDC1(%[ftmp2], %[
dc], 0x10)
957 MMI_LDC1(%[ftmp3], %[
dc], 0x18)
958 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t"
959 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t"
960 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
961 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
962 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t"
963 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
964 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
965 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
966 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
967 MMI_SDC1(%[ftmp1], %[
dc], 0x08)
968 MMI_SDC1(%[ftmp2], %[
dc], 0x10)
969 MMI_SDC1(%[ftmp3], %[
dc], 0x18)
970 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
971 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
972 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
973 [ftmp6]
"=&f"(ftmp[6]),
975 [ftmp7]
"=&f"(ftmp[7])
976 : [
dc]
"r"((uint8_t*)
dc)
995 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
996 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
997 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
998 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1001 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1002 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
1003 MMI_SDC1(%[ftmp0], %[
dc], 0x08)
1004 MMI_SDC1(%[ftmp0], %[
dc], 0x10)
1005 MMI_SDC1(%[ftmp0], %[
dc], 0x18)
1007 [ftmp0]
"=&f"(ftmp[0])
1008 : [
dc]
"r"((uint8_t *)
dc)
1012 int t00, t01, t02, t03, t10, t11, t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1014 t00 =
dc[0] +
dc[12];
1015 t10 =
dc[1] +
dc[13];
1016 t20 =
dc[2] +
dc[14];
1017 t30 =
dc[3] +
dc[15];
1019 t03 =
dc[0] -
dc[12];
1020 t13 =
dc[1] -
dc[13];
1021 t23 =
dc[2] -
dc[14];
1022 t33 =
dc[3] -
dc[15];
1024 t01 =
dc[4] +
dc[ 8];
1025 t11 =
dc[5] +
dc[ 9];
1026 t21 =
dc[6] +
dc[10];
1027 t31 =
dc[7] +
dc[11];
1029 t02 =
dc[4] -
dc[ 8];
1030 t12 =
dc[5] -
dc[ 9];
1031 t22 =
dc[6] -
dc[10];
1032 t32 =
dc[7] -
dc[11];
1064 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1065 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1066 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1067 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1069 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1070 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1071 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1072 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1083 int val = (
dc[0] + 3) >> 3;
1114 ff_ph_4e7b_u.
i = 0x4e7b4e7b4e7b4e7bULL;
1115 ff_ph_22a3_u.
i = 0x22a322a322a322a3ULL;
1118 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1119 MMI_LDC1(%[ftmp1], %[
block], 0x00)
1120 MMI_LDC1(%[ftmp2], %[
block], 0x08)
1121 MMI_LDC1(%[ftmp3], %[
block], 0x10)
1122 MMI_LDC1(%[ftmp4], %[
block], 0x18)
1124 "li %[tmp0], 0x02 \n\t"
1125 "mtc1 %[tmp0], %[ftmp11] \n\t"
1128 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1130 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1132 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1133 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
1135 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1136 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
1138 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
1139 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
1141 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1142 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
1145 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
1146 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1148 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
1149 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
1151 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
1152 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1154 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
1155 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1157 MMI_SDC1(%[ftmp0], %[
block], 0x00)
1158 MMI_SDC1(%[ftmp0], %[
block], 0x08)
1159 MMI_SDC1(%[ftmp0], %[
block], 0x10)
1160 MMI_SDC1(%[ftmp0], %[
block], 0x18)
1163 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1166 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1168 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1170 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1171 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1172 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
1173 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1174 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1176 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1177 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1178 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
1179 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
1180 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1182 "li %[tmp0], 0x03 \n\t"
1183 "mtc1 %[tmp0], %[ftmp11] \n\t"
1184 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
1185 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t"
1186 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1187 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
1188 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t"
1189 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
1190 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
1191 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t"
1192 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
1193 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
1194 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t"
1195 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
1198 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1200 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1201 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1202 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1203 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1205 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1206 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1207 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1208 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1210 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1211 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1212 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1213 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1215 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1216 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1217 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1218 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1220 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1221 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1222 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1223 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1224 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1225 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1226 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1227 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1228 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1229 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1236 [ff_ph_4e7b]
"f"(ff_ph_4e7b_u.
f), [ff_ph_22a3]
"f"(ff_ph_22a3_u.
f)
1240 int i, t0, t1, t2, t3;
1243 for (
i = 0;
i < 4;
i++) {
1246 t2 = MUL_35468(
block[4 +
i]) - MUL_20091(
block[12 +
i]);
1247 t3 = MUL_20091(
block[4 +
i]) + MUL_35468(
block[12 +
i]);
1253 tmp[
i * 4 + 0] = t0 + t3;
1254 tmp[
i * 4 + 1] = t1 + t2;
1255 tmp[
i * 4 + 2] = t1 - t2;
1256 tmp[
i * 4 + 3] = t0 - t3;
1259 for (
i = 0;
i < 4;
i++) {
1262 t2 = MUL_35468(
tmp[4 +
i]) - MUL_20091(
tmp[12 +
i]);
1263 t3 = MUL_20091(
tmp[4 +
i]) + MUL_35468(
tmp[12 +
i]);
1284 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1285 "mtc1 %[dc], %[ftmp5] \n\t"
1286 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1287 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1288 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1289 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1290 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1291 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1292 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1293 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1294 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1295 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1296 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1297 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1298 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1299 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1300 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1301 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1302 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1303 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1304 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1305 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1306 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1307 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1308 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1309 [ftmp4]
"=&f"(ftmp[4]),
1311 [ftmp5]
"=&f"(ftmp[5])
1322 for (
i = 0;
i < 4;
i++) {
1352 int flim_I,
int hev_thresh)
1359 int flim_I,
int hev_thresh)
1367 int flim_E,
int flim_I,
int hev_thresh)
1374 int flim_E,
int flim_I,
int hev_thresh)
1382 int flim_E,
int flim_I,
int hev_thresh)
1386 for (
i = 0;
i < 16;
i++)
1397 int flim_E,
int flim_I,
int hev_thresh)
1401 for (
i = 0;
i < 16;
i++)
1412 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1419 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1429 for (
i = 0;
i < 16;
i++)
1438 for (
i = 0;
i < 16;
i++)
1454 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1455 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1456 "ldl %[tmp0], 0x0f(%[src]) \n\t"
1457 "ldr %[tmp0], 0x08(%[src]) \n\t"
1458 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1459 "ldl %[tmp1], 0x0f(%[addr0]) \n\t"
1460 "ldr %[tmp1], 0x08(%[addr0]) \n\t"
1461 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1462 MMI_SDC1(%[ftmp0], %[
dst], 0x00)
1463 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1464 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1465 "addiu %[h], %[h], -0x02 \n\t"
1466 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1467 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1468 "sdl %[tmp1], 0x0f(%[addr1]) \n\t"
1469 "sdr %[tmp1], 0x08(%[addr1]) \n\t"
1470 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1471 "bnez %[h], 1b \n\t"
1472 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1473 [tmp0]
"=&r"(
tmp[0]), [tmp1]
"=&r"(
tmp[1]),
1475 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1478 : [dststride]
"r"((
mips_reg)dststride),
1501 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1502 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1503 "ldl %[tmp0], 0x07(%[addr0]) \n\t"
1504 "ldr %[tmp0], 0x00(%[addr0]) \n\t"
1505 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1506 MMI_SDC1(%[ftmp0], %[
dst], 0x00)
1507 "addiu %[h], %[h], -0x02 \n\t"
1508 "sdl %[tmp0], 0x07(%[addr1]) \n\t"
1509 "sdr %[tmp0], 0x00(%[addr1]) \n\t"
1510 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1511 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1512 "bnez %[h], 1b \n\t"
1513 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1515 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1518 : [dststride]
"r"((
mips_reg)dststride),
1541 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1542 MMI_LWC1(%[ftmp0], %[
src], 0x00)
1543 "lwl %[tmp0], 0x03(%[addr0]) \n\t"
1544 "lwr %[tmp0], 0x00(%[addr0]) \n\t"
1545 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1546 MMI_SWC1(%[ftmp0], %[
dst], 0x00)
1547 "addiu %[h], %[h], -0x02 \n\t"
1548 "swl %[tmp0], 0x03(%[addr1]) \n\t"
1549 "swr %[tmp0], 0x00(%[addr1]) \n\t"
1550 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1551 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1552 "bnez %[h], 1b \n\t"
1553 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1555 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1558 : [dststride]
"r"((
mips_reg)dststride),
1608 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1609 "li %[tmp0], 0x07 \n\t"
1610 "mtc1 %[tmp0], %[ftmp4] \n\t"
1620 "addiu %[h], %[h], -0x01 \n\t"
1621 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1622 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1623 "bnez %[h], 1b \n\t"
1624 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1625 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1626 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1627 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1628 [ftmp8]
"=&f"(ftmp[8]),
1629 [tmp0]
"=&r"(
tmp[0]),
1631 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1636 [dststride]
"r"((
mips_reg)dststride),
1638 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1646 for (y = 0; y <
h; y++) {
1647 for (x = 0; x < 16; x++)
1684 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1685 "li %[tmp0], 0x07 \n\t"
1686 "mtc1 %[tmp0], %[ftmp4] \n\t"
1691 "addiu %[h], %[h], -0x01 \n\t"
1692 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1693 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1694 "bnez %[h], 1b \n\t"
1695 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1696 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1697 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1698 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1699 [ftmp8]
"=&f"(ftmp[8]),
1700 [tmp0]
"=&r"(
tmp[0]),
1706 [dststride]
"r"((
mips_reg)dststride),
1708 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1716 for (y = 0; y <
h; y++) {
1717 for (x = 0; x < 8; x++)
1749 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1750 "li %[tmp0], 0x07 \n\t"
1751 "mtc1 %[tmp0], %[ftmp4] \n\t"
1756 "addiu %[h], %[h], -0x01 \n\t"
1757 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1758 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1759 "bnez %[h], 1b \n\t"
1760 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1761 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1762 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1763 [tmp0]
"=&r"(
tmp[0]),
1769 [dststride]
"r"((
mips_reg)dststride),
1771 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1779 for (y = 0; y <
h; y++) {
1780 for (x = 0; x < 4; x++)
1830 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1831 "li %[tmp0], 0x07 \n\t"
1832 "mtc1 %[tmp0], %[ftmp4] \n\t"
1842 "addiu %[h], %[h], -0x01 \n\t"
1843 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1844 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1845 "bnez %[h], 1b \n\t"
1846 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1847 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1848 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1849 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1850 [ftmp8]
"=&f"(ftmp[8]),
1851 [tmp0]
"=&r"(
tmp[0]),
1853 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1858 [dststride]
"r"((
mips_reg)dststride),
1860 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
1861 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
1869 for (y = 0; y <
h; y++) {
1870 for (x = 0; x < 16; x++)
1910 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1911 "li %[tmp0], 0x07 \n\t"
1912 "mtc1 %[tmp0], %[ftmp4] \n\t"
1917 "addiu %[h], %[h], -0x01 \n\t"
1918 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1919 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1920 "bnez %[h], 1b \n\t"
1921 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1922 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1923 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1924 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1925 [ftmp8]
"=&f"(ftmp[8]),
1926 [tmp0]
"=&r"(
tmp[0]),
1932 [dststride]
"r"((
mips_reg)dststride),
1934 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
1935 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
1943 for (y = 0; y <
h; y++) {
1944 for (x = 0; x < 8; x++)
1980 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1981 "li %[tmp0], 0x07 \n\t"
1982 "mtc1 %[tmp0], %[ftmp4] \n\t"
1987 "addiu %[h], %[h], -0x01 \n\t"
1988 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1989 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1990 "bnez %[h], 1b \n\t"
1991 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1992 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1993 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1994 [tmp0]
"=&r"(
tmp[0]),
2000 [dststride]
"r"((
mips_reg)dststride),
2002 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2003 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2011 for (y = 0; y <
h; y++) {
2012 for (x = 0; x < 4; x++)
2058 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2059 "li %[tmp0], 0x07 \n\t"
2060 "mtc1 %[tmp0], %[ftmp4] \n\t"
2070 "addiu %[h], %[h], -0x01 \n\t"
2071 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2072 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2073 "bnez %[h], 1b \n\t"
2074 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2075 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2076 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2077 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2078 [ftmp8]
"=&f"(ftmp[8]),
2079 [tmp0]
"=&r"(
tmp[0]),
2081 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2087 [dststride]
"r"((
mips_reg)dststride),
2089 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2097 for (y = 0; y <
h; y++) {
2098 for (x = 0; x < 16; x++)
2135 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2136 "li %[tmp0], 0x07 \n\t"
2137 "mtc1 %[tmp0], %[ftmp4] \n\t"
2142 "addiu %[h], %[h], -0x01 \n\t"
2143 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2144 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2145 "bnez %[h], 1b \n\t"
2146 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2147 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2148 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2149 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2150 [ftmp8]
"=&f"(ftmp[8]),
2151 [tmp0]
"=&r"(
tmp[0]),
2158 [dststride]
"r"((
mips_reg)dststride),
2160 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2168 for (y = 0; y <
h; y++) {
2169 for (x = 0; x < 8; x++)
2202 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2203 "li %[tmp0], 0x07 \n\t"
2204 "mtc1 %[tmp0], %[ftmp4] \n\t"
2209 "addiu %[h], %[h], -0x01 \n\t"
2210 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2211 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2212 "bnez %[h], 1b \n\t"
2213 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2214 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2215 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2216 [tmp0]
"=&r"(
tmp[0]),
2223 [dststride]
"r"((
mips_reg)dststride),
2225 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2233 for (y = 0; y <
h; y++) {
2234 for (x = 0; x < 4; x++)
2284 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2285 "li %[tmp0], 0x07 \n\t"
2286 "mtc1 %[tmp0], %[ftmp4] \n\t"
2296 "addiu %[h], %[h], -0x01 \n\t"
2297 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2298 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2299 "bnez %[h], 1b \n\t"
2300 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2301 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2302 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2303 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2304 [ftmp8]
"=&f"(ftmp[8]),
2305 [tmp0]
"=&r"(
tmp[0]),
2307 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2313 [dststride]
"r"((
mips_reg)dststride),
2315 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2316 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2324 for (y = 0; y <
h; y++) {
2325 for (x = 0; x < 16; x++)
2366 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2367 "li %[tmp0], 0x07 \n\t"
2368 "mtc1 %[tmp0], %[ftmp4] \n\t"
2373 "addiu %[h], %[h], -0x01 \n\t"
2374 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2375 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2376 "bnez %[h], 1b \n\t"
2377 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2378 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2379 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2380 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2381 [ftmp8]
"=&f"(ftmp[8]),
2382 [tmp0]
"=&r"(
tmp[0]),
2389 [dststride]
"r"((
mips_reg)dststride),
2391 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2392 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2400 for (y = 0; y <
h; y++) {
2401 for (x = 0; x < 8; x++)
2438 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2439 "li %[tmp0], 0x07 \n\t"
2440 "mtc1 %[tmp0], %[ftmp4] \n\t"
2445 "addiu %[h], %[h], -0x01 \n\t"
2446 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2447 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2448 "bnez %[h], 1b \n\t"
2449 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2450 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2451 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2452 [tmp0]
"=&r"(
tmp[0]),
2459 [dststride]
"r"((
mips_reg)dststride),
2461 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2462 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2470 for (y = 0; y <
h; y++) {
2471 for (x = 0; x < 4; x++)
2484 uint8_t *
tmp = tmp_array;
2488 tmp = tmp_array + 16;
2494 uint8_t tmp_array[560];
2495 uint8_t *
tmp = tmp_array;
2499 for (y = 0; y <
h + 3; y++) {
2500 for (x = 0; x < 16; x++)
2506 tmp = tmp_array + 16;
2509 for (y = 0; y <
h; y++) {
2510 for (x = 0; x < 16; x++)
2523 uint8_t *
tmp = tmp_array;
2527 tmp = tmp_array + 8;
2533 uint8_t tmp_array[152];
2534 uint8_t *
tmp = tmp_array;
2538 for (y = 0; y <
h + 3; y++) {
2539 for (x = 0; x < 8; x++)
2545 tmp = tmp_array + 8;
2548 for (y = 0; y <
h; y++) {
2549 for (x = 0; x < 8; x++)
2562 uint8_t *
tmp = tmp_array;
2566 tmp = tmp_array + 4;
2572 uint8_t tmp_array[44];
2573 uint8_t *
tmp = tmp_array;
2577 for (y = 0; y <
h + 3; y++) {
2578 for (x = 0; x < 4; x++)
2583 tmp = tmp_array + 4;
2586 for (y = 0; y <
h; y++) {
2587 for (x = 0; x < 4; x++)
2600 uint8_t *
tmp = tmp_array;
2604 tmp = tmp_array + 32;
2610 uint8_t tmp_array[592];
2611 uint8_t *
tmp = tmp_array;
2615 for (y = 0; y <
h + 5; y++) {
2616 for (x = 0; x < 16; x++)
2622 tmp = tmp_array + 32;
2625 for (y = 0; y <
h; y++) {
2626 for (x = 0; x < 16; x++)
2639 uint8_t *
tmp = tmp_array;
2643 tmp = tmp_array + 16;
2649 uint8_t tmp_array[168];
2650 uint8_t *
tmp = tmp_array;
2654 for (y = 0; y <
h + 5; y++) {
2655 for (x = 0; x < 8; x++)
2661 tmp = tmp_array + 16;
2664 for (y = 0; y <
h; y++) {
2665 for (x = 0; x < 8; x++)
2678 uint8_t *
tmp = tmp_array;
2682 tmp = tmp_array + 8;
2688 uint8_t tmp_array[52];
2689 uint8_t *
tmp = tmp_array;
2693 for (y = 0; y <
h + 5; y++) {
2694 for (x = 0; x < 4; x++)
2700 tmp = tmp_array + 8;
2703 for (y = 0; y <
h; y++) {
2704 for (x = 0; x < 4; x++)
2717 uint8_t *
tmp = tmp_array;
2721 tmp = tmp_array + 16;
2727 uint8_t tmp_array[560];
2728 uint8_t *
tmp = tmp_array;
2732 for (y = 0; y <
h + 3; y++) {
2733 for (x = 0; x < 16; x++)
2739 tmp = tmp_array + 16;
2742 for (y = 0; y <
h; y++) {
2743 for (x = 0; x < 16; x++)
2756 uint8_t *
tmp = tmp_array;
2760 tmp = tmp_array + 8;
2766 uint8_t tmp_array[152];
2767 uint8_t *
tmp = tmp_array;
2771 for (y = 0; y <
h + 3; y++) {
2772 for (x = 0; x < 8; x++)
2778 tmp = tmp_array + 8;
2781 for (y = 0; y <
h; y++) {
2782 for (x = 0; x < 8; x++)
2795 uint8_t *
tmp = tmp_array;
2799 tmp = tmp_array + 4;
2805 uint8_t tmp_array[44];
2806 uint8_t *
tmp = tmp_array;
2810 for (y = 0; y <
h + 3; y++) {
2811 for (x = 0; x < 4; x++)
2817 tmp = tmp_array + 4;
2820 for (y = 0; y <
h; y++) {
2821 for (x = 0; x < 4; x++)
2834 uint8_t *
tmp = tmp_array;
2838 tmp = tmp_array + 32;
2844 uint8_t tmp_array[592];
2845 uint8_t *
tmp = tmp_array;
2849 for (y = 0; y <
h + 5; y++) {
2850 for (x = 0; x < 16; x++)
2856 tmp = tmp_array + 32;
2859 for (y = 0; y <
h; y++) {
2860 for (x = 0; x < 16; x++)
2873 uint8_t *
tmp = tmp_array;
2877 tmp = tmp_array + 16;
2883 uint8_t tmp_array[168];
2884 uint8_t *
tmp = tmp_array;
2888 for (y = 0; y <
h + 5; y++) {
2889 for (x = 0; x < 8; x++)
2895 tmp = tmp_array + 16;
2898 for (y = 0; y <
h; y++) {
2899 for (x = 0; x < 8; x++)
2912 uint8_t *
tmp = tmp_array;
2916 tmp = tmp_array + 8;
2922 uint8_t tmp_array[52];
2923 uint8_t *
tmp = tmp_array;
2927 for (y = 0; y <
h + 5; y++) {
2928 for (x = 0; x < 4; x++)
2934 tmp = tmp_array + 8;
2937 for (y = 0; y <
h; y++) {
2938 for (x = 0; x < 4; x++)
2947 ptrdiff_t sstride,
int h,
int mx,
int my)
2978 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2979 "li %[tmp0], 0x03 \n\t"
2980 "mtc1 %[tmp0], %[ftmp4] \n\t"
2981 "pshufh %[a], %[a], %[ftmp0] \n\t"
2982 "pshufh %[b], %[b], %[ftmp0] \n\t"
2992 "addiu %[h], %[h], -0x01 \n\t"
2993 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2994 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2995 "bnez %[h], 1b \n\t"
2996 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2997 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2998 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2999 [ftmp6]
"=&f"(ftmp[6]),
3000 [tmp0]
"=&r"(
tmp[0]),
3002 [dst0]
"=&r"(dst0), [
src0]
"=&r"(
src0),
3005 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3015 for (y = 0; y <
h; y++) {
3016 for (x = 0; x < 16; x++)
3025 ptrdiff_t sstride,
int h,
int mx,
int my)
3047 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3048 "li %[tmp0], 0x03 \n\t"
3049 "mtc1 %[tmp0], %[ftmp4] \n\t"
3050 "pshufh %[c], %[c], %[ftmp0] \n\t"
3051 "pshufh %[d], %[d], %[ftmp0] \n\t"
3061 "addiu %[h], %[h], -0x01 \n\t"
3062 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3063 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3064 "bnez %[h], 1b \n\t"
3065 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3066 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3067 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3068 [ftmp6]
"=&f"(ftmp[6]),
3069 [tmp0]
"=&r"(
tmp[0]),
3071 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
3075 [
c]
"+&f"(
c.f), [d]
"+&f"(d.
f)
3082 int c = 8 -
my, d =
my;
3085 for (y = 0; y <
h; y++) {
3086 for (x = 0; x < 16; x++)
3087 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3095 ptrdiff_t sstride,
int h,
int mx,
int my)
3099 uint8_t *
tmp = tmp_array;
3105 int c = 8 -
my, d =
my;
3107 uint8_t tmp_array[528];
3108 uint8_t *
tmp = tmp_array;
3110 for (y = 0; y <
h + 1; y++) {
3111 for (x = 0; x < 16; x++)
3119 for (y = 0; y <
h; y++) {
3120 for (x = 0; x < 16; x++)
3121 dst[x] = (
c *
tmp[x] + d *
tmp[x + 16] + 4) >> 3;
3129 ptrdiff_t sstride,
int h,
int mx,
int my)
3150 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3151 "li %[tmp0], 0x03 \n\t"
3152 "mtc1 %[tmp0], %[ftmp4] \n\t"
3153 "pshufh %[a], %[a], %[ftmp0] \n\t"
3154 "pshufh %[b], %[b], %[ftmp0] \n\t"
3159 "addiu %[h], %[h], -0x01 \n\t"
3160 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3161 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3162 "bnez %[h], 1b \n\t"
3163 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3164 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3165 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3166 [ftmp6]
"=&f"(ftmp[6]),
3167 [tmp0]
"=&r"(
tmp[0]),
3171 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3181 for (y = 0; y <
h; y++) {
3182 for (x = 0; x < 8; x++)
3191 ptrdiff_t sstride,
int h,
int mx,
int my)
3213 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3214 "li %[tmp0], 0x03 \n\t"
3215 "mtc1 %[tmp0], %[ftmp4] \n\t"
3216 "pshufh %[c], %[c], %[ftmp0] \n\t"
3217 "pshufh %[d], %[d], %[ftmp0] \n\t"
3222 "addiu %[h], %[h], -0x01 \n\t"
3223 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3224 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3225 "bnez %[h], 1b \n\t"
3226 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3227 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3228 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3229 [ftmp6]
"=&f"(ftmp[6]),
3230 [tmp0]
"=&r"(
tmp[0]),
3235 [
c]
"+&f"(
c.f), [d]
"+&f"(d.
f)
3242 int c = 8 -
my, d =
my;
3245 for (y = 0; y <
h; y++) {
3246 for (x = 0; x < 8; x++)
3247 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3255 ptrdiff_t sstride,
int h,
int mx,
int my)
3259 uint8_t *
tmp = tmp_array;
3265 int c = 8 -
my, d =
my;
3267 uint8_t tmp_array[136];
3268 uint8_t *
tmp = tmp_array;
3270 for (y = 0; y <
h + 1; y++) {
3271 for (x = 0; x < 8; x++)
3279 for (y = 0; y <
h; y++) {
3280 for (x = 0; x < 8; x++)
3281 dst[x] = (
c *
tmp[x] + d *
tmp[x + 8] + 4) >> 3;
3289 ptrdiff_t sstride,
int h,
int mx,
int my)
3307 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3308 "li %[tmp0], 0x03 \n\t"
3309 "mtc1 %[tmp0], %[ftmp4] \n\t"
3310 "pshufh %[a], %[a], %[ftmp0] \n\t"
3311 "pshufh %[b], %[b], %[ftmp0] \n\t"
3316 "addiu %[h], %[h], -0x01 \n\t"
3317 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3318 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3319 "bnez %[h], 1b \n\t"
3320 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3321 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3322 [ftmp4]
"=&f"(ftmp[4]),
3323 [tmp0]
"=&r"(
tmp[0]),
3328 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3338 for (y = 0; y <
h; y++) {
3339 for (x = 0; x < 4; x++)
3348 ptrdiff_t sstride,
int h,
int mx,
int my)
3367 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3368 "li %[tmp0], 0x03 \n\t"
3369 "mtc1 %[tmp0], %[ftmp4] \n\t"
3370 "pshufh %[c], %[c], %[ftmp0] \n\t"
3371 "pshufh %[d], %[d], %[ftmp0] \n\t"
3376 "addiu %[h], %[h], -0x01 \n\t"
3377 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3378 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3379 "bnez %[h], 1b \n\t"
3380 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3381 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3382 [ftmp4]
"=&f"(ftmp[4]),
3383 [tmp0]
"=&r"(
tmp[0]),
3389 [
c]
"+&f"(
c.f), [d]
"+&f"(d.
f)
3396 int c = 8 -
my, d =
my;
3399 for (y = 0; y <
h; y++) {
3400 for (x = 0; x < 4; x++)
3401 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3409 ptrdiff_t sstride,
int h,
int mx,
int my)
3413 uint8_t *
tmp = tmp_array;
3419 int c = 8 -
my, d =
my;
3421 uint8_t tmp_array[36];
3422 uint8_t *
tmp = tmp_array;
3424 for (y = 0; y <
h + 1; y++) {
3425 for (x = 0; x < 4; x++)
3433 for (y = 0; y <
h; y++) {
3434 for (x = 0; x < 4; x++)
3435 dst[x] = (
c *
tmp[x] + d *
tmp[x + 4] + 4) >> 3;
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
static const uint8_t q1[256]
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define FILTER_4TAP(src, F, stride)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
const union av_intfloat64 ff_pw_4
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define DECLARE_VAR_LOW32
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void(* filter)(uint8_t *src, int stride, int qscale)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define RESTRICT_ASM_DOUBLE_1
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
const union av_intfloat64 ff_pw_64
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static double val(void *priv, double ch)
static const uint64_t fourtap_subpel_filters[7][6]
static double a2(void *priv, double x, double y)
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t q0[256]
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
#define FILTER_6TAP(src, F, stride)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t ptrdiff_t srcstride
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
#define RESTRICT_ASM_UINT32_T
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define MMI_VP8_LOOP_FILTER
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define RESTRICT_ASM_DOUBLE_2
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define DECLARE_ALIGNED(n, t, v)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
static double a0(void *priv, double x, double y)
#define DECLARE_VAR_ALL64
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define i(width, name, range_min, range_max)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
#define RESTRICT_ASM_LOW32
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
The exact code depends on how similar the blocks are and how related they are to the block
static double a1(void *priv, double x, double y)
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
#define RESTRICT_ASM_ALL64
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint8_t subpel_filters[7][6]