30 int dstStride,
int srcStride,
int h)
37 "uld %[low32], 0x00(%[src]) \n\t"
38 "mtc1 %[low32], %[ftmp0] \n\t"
39 "gsswlc1 %[ftmp0], 0x03(%[dst]) \n\t"
40 "gsswrc1 %[ftmp0], 0x00(%[dst]) \n\t"
41 "addi %[h], %[h], -0x01 \n\t"
42 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
43 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
45 : [ftmp0]
"=&f"(ftmp[0]),
46 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
49 : [dstStride]
"r"((
mips_reg)dstStride),
56 int dstStride,
int srcStride,
int h)
62 "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
63 "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
64 "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
65 "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
66 "addi %[h], %[h], -0x01 \n\t"
67 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
68 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
70 : [ftmp0]
"=&f"(ftmp[0]),
71 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
73 : [dstStride]
"r"((
mips_reg)dstStride),
80 int dstStride,
int srcStride,
int h)
87 "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t"
88 "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t"
89 "ldl %[tmp0], 0x0f(%[src]) \n\t"
90 "ldr %[tmp0], 0x08(%[src]) \n\t"
91 "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
92 "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
93 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
94 "sdr %[tmp0], 0x08(%[dst]) \n\t"
95 "addi %[h], %[h], -0x01 \n\t"
96 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
97 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
99 : [ftmp0]
"=&f"(ftmp[0]),
101 [dst]
"+&r"(dst), [src]
"+&r"(src),
103 : [dstStride]
"r"((
mips_reg)dstStride),
109 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
110 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
112 int dstStride,
int srcStride)
119 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
120 "dli %[tmp0], 0x04 \n\t"
122 "uld %[low32], -0x02(%[src]) \n\t"
123 "mtc1 %[low32], %[ftmp1] \n\t"
124 "uld %[low32], -0x01(%[src]) \n\t"
125 "mtc1 %[low32], %[ftmp2] \n\t"
126 "uld %[low32], 0x00(%[src]) \n\t"
127 "mtc1 %[low32], %[ftmp3] \n\t"
128 "uld %[low32], 0x01(%[src]) \n\t"
129 "mtc1 %[low32], %[ftmp4] \n\t"
130 "uld %[low32], 0x02(%[src]) \n\t"
131 "mtc1 %[low32], %[ftmp5] \n\t"
132 "uld %[low32], 0x03(%[src]) \n\t"
133 "mtc1 %[low32], %[ftmp6] \n\t"
134 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
135 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
136 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
137 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
138 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
139 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
140 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
141 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
142 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
143 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
144 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
145 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
146 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
147 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
148 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
149 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
150 "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
151 "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
152 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
153 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
154 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
155 "bnez %[tmp0], 1b \n\t"
156 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
157 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
158 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
159 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
160 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
162 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
164 : [dstStride]
"r"((
mips_reg)dstStride),
165 [srcStride]
"r"((
mips_reg)srcStride),
173 int dstStride,
int srcStride)
179 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
180 "dli %[tmp0], 0x08 \n\t"
182 "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
183 "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
184 "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
185 "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
186 "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
187 "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
188 "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
189 "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
190 "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
191 "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
192 "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
193 "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
194 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
195 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
196 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
197 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
198 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
199 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
200 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
201 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
202 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
203 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
204 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
205 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
206 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
207 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
208 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
209 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
210 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
211 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
212 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
213 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
214 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
215 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
216 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
217 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
218 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
219 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
220 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
221 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
222 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
223 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
224 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
225 "gssdlc1 %[ftmp9], 0x07(%[dst]) \n\t"
226 "gssdrc1 %[ftmp9], 0x00(%[dst]) \n\t"
227 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
228 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
229 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
230 "bnez %[tmp0], 1b \n\t"
231 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
232 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
233 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
234 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
235 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
236 [ftmp10]
"=&f"(ftmp[10]),
238 [dst]
"+&r"(dst), [src]
"+&r"(src)
239 : [dstStride]
"r"((
mips_reg)dstStride),
240 [srcStride]
"r"((
mips_reg)srcStride),
248 int dstStride,
int srcStride)
259 int dstStride,
int srcStride)
266 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
267 "dli %[tmp0], 0x04 \n\t"
269 "uld %[low32], -0x02(%[src]) \n\t"
270 "mtc1 %[low32], %[ftmp1] \n\t"
271 "uld %[low32], -0x01(%[src]) \n\t"
272 "mtc1 %[low32], %[ftmp2] \n\t"
273 "uld %[low32], 0x00(%[src]) \n\t"
274 "mtc1 %[low32], %[ftmp3] \n\t"
275 "uld %[low32], 0x01(%[src]) \n\t"
276 "mtc1 %[low32], %[ftmp4] \n\t"
277 "uld %[low32], 0x02(%[src]) \n\t"
278 "mtc1 %[low32], %[ftmp5] \n\t"
279 "uld %[low32], 0x03(%[src]) \n\t"
280 "mtc1 %[low32], %[ftmp6] \n\t"
281 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
282 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
283 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
284 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
285 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
286 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
287 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
288 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
289 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
290 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
291 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
292 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
293 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
294 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
295 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
296 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
297 "lwc1 %[ftmp10], 0x00(%[dst]) \n\t"
298 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
299 "gsswlc1 %[ftmp9], 0x03(%[dst]) \n\t"
300 "gsswrc1 %[ftmp9], 0x00(%[dst]) \n\t"
301 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
302 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
303 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
304 "bnez %[tmp0], 1b \n\t"
305 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
306 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
307 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
308 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
309 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
310 [ftmp10]
"=&f"(ftmp[10]),
312 [dst]
"+&r"(dst), [src]
"+&r"(src),
314 : [dstStride]
"r"((
mips_reg)dstStride),
315 [srcStride]
"r"((
mips_reg)srcStride),
323 int dstStride,
int srcStride)
329 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
330 "dli %[tmp0], 0x08 \n\t"
332 "gsldlc1 %[ftmp1], 0x05(%[src]) \n\t"
333 "gsldrc1 %[ftmp1], -0x02(%[src]) \n\t"
334 "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
335 "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
336 "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
337 "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
338 "gsldlc1 %[ftmp4], 0x08(%[src]) \n\t"
339 "gsldrc1 %[ftmp4], 0x01(%[src]) \n\t"
340 "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
341 "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
342 "gsldlc1 %[ftmp6], 0x0a(%[src]) \n\t"
343 "gsldrc1 %[ftmp6], 0x03(%[src]) \n\t"
344 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
345 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
346 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
347 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
348 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
349 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
350 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
351 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
352 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
353 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
354 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
355 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
356 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
357 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
358 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
359 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
360 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
361 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
362 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
363 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
364 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
365 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
366 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
367 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
368 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
369 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
370 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
371 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
372 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
373 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
374 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
375 "ldc1 %[ftmp10], 0x00(%[dst]) \n\t"
376 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
377 "sdc1 %[ftmp9], 0x00(%[dst]) \n\t"
378 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
379 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
380 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
381 "bnez %[tmp0], 1b \n\t"
382 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
383 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
384 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
385 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
386 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
387 [ftmp10]
"=&f"(ftmp[10]),
389 [dst]
"+&r"(dst), [src]
"+&r"(src)
390 : [dstStride]
"r"((
mips_reg)dstStride),
391 [srcStride]
"r"((
mips_reg)srcStride),
399 int dstStride,
int srcStride)
410 int dstStride,
int srcStride)
416 src -= 2 * srcStride;
420 ".set noreorder \n\t"
421 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
422 "dli %[tmp0], 0x02 \n\t"
423 "uld %[low32], 0x00(%[src]) \n\t"
424 "mtc1 %[low32], %[ftmp1] \n\t"
425 "mtc1 %[tmp0], %[ftmp10] \n\t"
426 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
427 "dli %[tmp0], 0x05 \n\t"
428 "uld %[low32], 0x00(%[src]) \n\t"
429 "mtc1 %[low32], %[ftmp2] \n\t"
430 "mtc1 %[tmp0], %[ftmp11] \n\t"
431 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
432 "uld %[low32], 0x00(%[src]) \n\t"
433 "mtc1 %[low32], %[ftmp3] \n\t"
434 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
435 "uld %[low32], 0x00(%[src]) \n\t"
436 "mtc1 %[low32], %[ftmp4] \n\t"
437 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
438 "uld %[low32], 0x00(%[src]) \n\t"
439 "mtc1 %[low32], %[ftmp5] \n\t"
440 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
441 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
442 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
443 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
444 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
445 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
446 "uld %[low32], 0x00(%[src]) \n\t"
447 "mtc1 %[low32], %[ftmp6] \n\t"
448 "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
449 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
450 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
451 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
452 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
453 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
454 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
455 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
456 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
457 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
458 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
459 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
460 "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
461 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
462 "uld %[low32], 0x00(%[src]) \n\t"
463 "mtc1 %[low32], %[ftmp1] \n\t"
464 "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
465 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
466 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
467 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
468 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
469 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
470 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
471 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
472 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
473 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
474 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
475 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
476 "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
477 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
478 "uld %[low32], 0x00(%[src]) \n\t"
479 "mtc1 %[low32], %[ftmp2] \n\t"
480 "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
481 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
482 "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
483 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
484 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
485 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
486 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
487 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
488 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
489 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
490 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
491 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
492 "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
493 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
494 "uld %[low32], 0x00(%[src]) \n\t"
495 "mtc1 %[low32], %[ftmp3] \n\t"
496 "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
497 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
498 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
499 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
500 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
501 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
502 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
503 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
504 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
505 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
506 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
507 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
508 "swc1 %[ftmp7], 0x00(%[dst]) \n\t"
509 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
511 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
512 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
513 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
514 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
515 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
516 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
518 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
520 : [dstStride]
"r"((
mips_reg)dstStride),
521 [srcStride]
"r"((
mips_reg)srcStride),
528 int dstStride,
int srcStride)
536 src -= 2 * srcStride;
541 ".set noreorder \n\t"
542 "dli %[tmp0], 0x02 \n\t"
543 "uld %[low32], 0x00(%[src]) \n\t"
544 "mtc1 %[low32], %[ftmp0] \n\t"
545 "mtc1 %[tmp0], %[ftmp8] \n\t"
546 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
547 "dli %[tmp0], 0x05 \n\t"
548 "uld %[low32], 0x00(%[src]) \n\t"
549 "mtc1 %[low32], %[ftmp1] \n\t"
550 "mtc1 %[tmp0], %[ftmp9] \n\t"
551 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
552 "uld %[low32], 0x00(%[src]) \n\t"
553 "mtc1 %[low32], %[ftmp2] \n\t"
554 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
555 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
556 "uld %[low32], 0x00(%[src]) \n\t"
557 "mtc1 %[low32], %[ftmp3] \n\t"
558 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
559 "uld %[low32], 0x00(%[src]) \n\t"
560 "mtc1 %[low32], %[ftmp4] \n\t"
561 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
562 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
563 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
564 "uld %[low32], 0x00(%[src]) \n\t"
565 "mtc1 %[low32], %[ftmp5] \n\t"
566 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
567 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
568 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
569 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
570 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
571 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
572 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
573 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
574 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
575 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
576 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
577 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
578 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
579 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
580 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
581 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
582 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
583 "uld %[low32], 0x00(%[src]) \n\t"
584 "mtc1 %[low32], %[ftmp0] \n\t"
585 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
586 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
587 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
588 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
589 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
590 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
591 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
592 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
593 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
594 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
595 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
596 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
597 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
598 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
599 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
600 "uld %[low32], 0x00(%[src]) \n\t"
601 "mtc1 %[low32], %[ftmp1] \n\t"
602 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
603 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
604 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
605 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
606 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
607 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
608 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
609 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
610 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
611 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
612 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
613 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
614 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
615 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
616 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
617 "uld %[low32], 0x00(%[src]) \n\t"
618 "mtc1 %[low32], %[ftmp2] \n\t"
619 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
620 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
621 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
622 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
623 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
624 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
625 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
626 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
627 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
628 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
629 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
630 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
631 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
632 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
633 "uld %[low32], 0x00(%[src]) \n\t"
634 "mtc1 %[low32], %[ftmp3] \n\t"
635 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
636 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
637 "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
638 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
639 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
640 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
641 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
642 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
643 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
644 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
645 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
646 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
647 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
648 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
649 "uld %[low32], 0x00(%[src]) \n\t"
650 "mtc1 %[low32], %[ftmp4] \n\t"
651 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
652 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
653 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
654 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
655 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
656 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
657 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
658 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
659 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
660 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
661 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
662 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
663 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
664 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
665 "uld %[low32], 0x00(%[src]) \n\t"
666 "mtc1 %[low32], %[ftmp5] \n\t"
667 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
668 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
669 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
670 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
671 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
672 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
673 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
674 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
675 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
676 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
677 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
678 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
679 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
680 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
681 "uld %[low32], 0x00(%[src]) \n\t"
682 "mtc1 %[low32], %[ftmp0] \n\t"
683 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
684 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
685 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
686 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
687 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
688 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
689 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
690 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
691 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
692 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
693 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
694 "bne %[h], 0x10, 2f \n\t"
695 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
696 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
697 "uld %[low32], 0x00(%[src]) \n\t"
698 "mtc1 %[low32], %[ftmp1] \n\t"
699 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
700 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
701 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
702 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
703 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
704 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
705 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
706 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
707 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
708 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
709 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
710 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
711 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
712 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
713 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
714 "uld %[low32], 0x00(%[src]) \n\t"
715 "mtc1 %[low32], %[ftmp2] \n\t"
716 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
717 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
718 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
719 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
720 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
721 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
722 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
723 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
724 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
725 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
726 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
727 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
728 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
729 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
730 "uld %[low32], 0x00(%[src]) \n\t"
731 "mtc1 %[low32], %[ftmp3] \n\t"
732 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
733 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
734 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
735 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
736 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
737 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
738 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
739 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
740 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
741 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
742 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
743 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
744 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
745 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
746 "uld %[low32], 0x00(%[src]) \n\t"
747 "mtc1 %[low32], %[ftmp4] \n\t"
748 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
749 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
750 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
751 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
752 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
753 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
754 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
755 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
756 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
757 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
758 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
759 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
760 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
761 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
762 "uld %[low32], 0x00(%[src]) \n\t"
763 "mtc1 %[low32], %[ftmp5] \n\t"
764 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
765 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
766 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
767 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
768 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
769 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
770 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
771 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
772 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
773 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
774 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
775 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
776 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
777 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
778 "uld %[low32], 0x00(%[src]) \n\t"
779 "mtc1 %[low32], %[ftmp0] \n\t"
780 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
781 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
782 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
783 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
784 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
785 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
786 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
787 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
788 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
789 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
790 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
791 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
792 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
793 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
794 "uld %[low32], 0x00(%[src]) \n\t"
795 "mtc1 %[low32], %[ftmp1] \n\t"
796 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
797 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
798 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
799 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
800 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
801 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
802 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
803 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
804 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
805 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
806 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
807 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
808 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
809 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
810 "uld %[low32], 0x00(%[src]) \n\t"
811 "mtc1 %[low32], %[ftmp2] \n\t"
812 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
813 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
814 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
815 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
816 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
817 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
818 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
819 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
820 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
821 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
822 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
823 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
826 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
827 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
828 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
829 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
830 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
832 [src]
"+&r"(src), [dst]
"+&r"(dst),
835 : [dstStride]
"r"((
mips_reg)dstStride),
836 [srcStride]
"r"((
mips_reg)srcStride),
841 src += 4 - (h + 5) * srcStride;
842 dst += 4 - h * dstStride;
847 int dstStride,
int srcStride)
858 int dstStride,
int srcStride)
863 src -= 2 * srcStride;
867 ".set noreorder \n\t"
868 "dli %[tmp0], 0x02 \n\t"
869 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
870 "mtc1 %[tmp0], %[ftmp9] \n\t"
871 "dli %[tmp0], 0x05 \n\t"
872 "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
873 "mtc1 %[tmp0], %[ftmp8] \n\t"
874 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
875 "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
876 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
877 "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
878 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
879 "lwc1 %[ftmp3], 0x00(%[src]) \n\t"
880 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
881 "lwc1 %[ftmp4], 0x00(%[src]) \n\t"
882 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
883 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
884 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
885 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
886 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
887 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
888 "lwc1 %[ftmp5], 0x00(%[src]) \n\t"
889 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
890 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
891 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
892 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
893 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
894 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
895 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
896 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
897 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
898 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
899 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
900 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
901 "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
902 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
903 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
904 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
905 "lwc1 %[ftmp0], 0x00(%[src]) \n\t"
906 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
907 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
908 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
909 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
910 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
911 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
912 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
913 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
914 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
915 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
916 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
917 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
918 "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
919 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
920 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
921 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
922 "lwc1 %[ftmp1], 0x00(%[src]) \n\t"
923 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
924 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
925 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
926 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
927 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
928 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
929 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
930 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
931 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
932 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
933 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
934 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
935 "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
936 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
937 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
938 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
939 "lwc1 %[ftmp2], 0x00(%[src]) \n\t"
940 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
941 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
942 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
943 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
944 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
945 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
946 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
947 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
948 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
949 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
950 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
951 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
952 "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
953 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
954 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
955 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
957 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
958 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
959 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
960 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
961 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
963 [src]
"+&r"(src), [dst]
"+&r"(dst)
964 : [dstStride]
"r"((
mips_reg)dstStride),
965 [srcStride]
"r"((
mips_reg)srcStride),
972 int dstStride,
int srcStride)
980 src -= 2 * srcStride;
985 ".set noreorder \n\t"
986 "dli %[tmp0], 0x02 \n\t"
987 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
988 "mtc1 %[tmp0], %[ftmp9] \n\t"
989 "dli %[tmp0], 0x05 \n\t"
990 "uld %[low32], 0x00(%[src]) \n\t"
991 "mtc1 %[low32], %[ftmp0] \n\t"
992 "mtc1 %[tmp0], %[ftmp8] \n\t"
993 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
994 "uld %[low32], 0x00(%[src]) \n\t"
995 "mtc1 %[low32], %[ftmp1] \n\t"
996 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
997 "uld %[low32], 0x00(%[src]) \n\t"
998 "mtc1 %[low32], %[ftmp2] \n\t"
999 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1000 "uld %[low32], 0x00(%[src]) \n\t"
1001 "mtc1 %[low32], %[ftmp3] \n\t"
1002 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1003 "uld %[low32], 0x00(%[src]) \n\t"
1004 "mtc1 %[low32], %[ftmp4] \n\t"
1005 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1006 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1007 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1008 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1009 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1010 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1011 "uld %[low32], 0x00(%[src]) \n\t"
1012 "mtc1 %[low32], %[ftmp5] \n\t"
1013 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1014 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1015 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1016 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1017 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1018 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1019 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1020 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1021 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1022 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1023 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1024 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1025 "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1026 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1027 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1028 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1029 "uld %[low32], 0x00(%[src]) \n\t"
1030 "mtc1 %[low32], %[ftmp0] \n\t"
1031 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1032 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1033 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1034 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1035 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1036 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1037 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1038 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1039 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1040 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1041 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1042 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1043 "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1044 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1045 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1046 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1047 "uld %[low32], 0x00(%[src]) \n\t"
1048 "mtc1 %[low32], %[ftmp1] \n\t"
1049 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1050 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1051 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1052 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1053 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1054 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1055 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1056 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1057 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1058 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1059 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1060 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1061 "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1062 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1063 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1064 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1065 "uld %[low32], 0x00(%[src]) \n\t"
1066 "mtc1 %[low32], %[ftmp2] \n\t"
1067 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1068 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1069 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1070 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1071 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1072 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1073 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1074 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1075 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1076 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1077 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1078 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1079 "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1080 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1081 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1082 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1083 "uld %[low32], 0x00(%[src]) \n\t"
1084 "mtc1 %[low32], %[ftmp3] \n\t"
1085 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1086 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1087 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1088 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1089 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1090 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1091 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1092 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1093 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1094 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1095 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1096 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1097 "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
1098 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1099 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1100 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1101 "uld %[low32], 0x00(%[src]) \n\t"
1102 "mtc1 %[low32], %[ftmp4] \n\t"
1103 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1104 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1105 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1106 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1107 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1108 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1109 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1110 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1111 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1112 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1113 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1114 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1115 "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
1116 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1117 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1118 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1119 "uld %[low32], 0x00(%[src]) \n\t"
1120 "mtc1 %[low32], %[ftmp5] \n\t"
1121 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1122 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1123 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1124 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1125 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1126 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1127 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1128 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1129 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1130 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1131 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1132 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1133 "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1134 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1135 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1136 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1137 "uld %[low32], 0x00(%[src]) \n\t"
1138 "mtc1 %[low32], %[ftmp0] \n\t"
1139 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1140 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1141 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1142 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1143 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1144 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1145 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1146 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1147 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1148 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1149 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1150 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1151 "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1152 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1153 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1154 "bne %[h], 0x10, 2f \n\t"
1155 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1156 "uld %[low32], 0x00(%[src]) \n\t"
1157 "mtc1 %[low32], %[ftmp1] \n\t"
1158 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1159 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1160 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1161 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1162 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1163 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1164 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1165 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1166 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1167 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1168 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1169 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1170 "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1171 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1172 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1173 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1174 "uld %[low32], 0x00(%[src]) \n\t"
1175 "mtc1 %[low32], %[ftmp2] \n\t"
1176 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1177 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1178 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1179 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1180 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1181 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1182 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1183 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1184 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1185 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1186 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1187 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1188 "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1189 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1190 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1191 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1192 "uld %[low32], 0x00(%[src]) \n\t"
1193 "mtc1 %[low32], %[ftmp3] \n\t"
1194 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1195 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1196 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1197 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1198 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1199 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1200 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1201 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1202 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1203 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1204 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1205 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1206 "lwc1 %[ftmp4], 0x00(%[dst]) \n\t"
1207 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1208 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1209 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1210 "uld %[low32], 0x00(%[src]) \n\t"
1211 "mtc1 %[low32], %[ftmp4] \n\t"
1212 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1213 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1214 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1215 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1216 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1217 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1218 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1219 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1220 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1221 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1222 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1223 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1224 "lwc1 %[ftmp5], 0x00(%[dst]) \n\t"
1225 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1226 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1227 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1228 "uld %[low32], 0x00(%[src]) \n\t"
1229 "mtc1 %[low32], %[ftmp5] \n\t"
1230 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1231 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1232 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1233 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1234 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1235 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1236 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1237 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1238 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1239 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1240 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1241 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1242 "lwc1 %[ftmp0], 0x00(%[dst]) \n\t"
1243 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1244 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1245 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1246 "uld %[low32], 0x00(%[src]) \n\t"
1247 "mtc1 %[low32], %[ftmp0] \n\t"
1248 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1249 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1250 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1251 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1252 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1253 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1254 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1255 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1256 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1257 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1258 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1259 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1260 "lwc1 %[ftmp1], 0x00(%[dst]) \n\t"
1261 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1262 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1263 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1264 "uld %[low32], 0x00(%[src]) \n\t"
1265 "mtc1 %[low32], %[ftmp1] \n\t"
1266 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1267 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1268 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1269 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1270 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1271 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1272 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1273 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1274 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1275 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1276 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1277 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1278 "lwc1 %[ftmp2], 0x00(%[dst]) \n\t"
1279 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1280 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1281 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1282 "uld %[low32], 0x00(%[src]) \n\t"
1283 "mtc1 %[low32], %[ftmp2] \n\t"
1284 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1285 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1286 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1287 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1288 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1289 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1290 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1291 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1292 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1293 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1294 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1295 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1296 "lwc1 %[ftmp3], 0x00(%[dst]) \n\t"
1297 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1298 "swc1 %[ftmp6], 0x00(%[dst]) \n\t"
1299 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1302 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1303 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1304 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1305 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1306 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1307 [tmp0]
"=&r"(tmp[0]),
1308 [src]
"+&r"(src), [dst]
"+&r"(dst),
1311 : [dstStride]
"r"((
mips_reg)dstStride),
1312 [srcStride]
"r"((
mips_reg)srcStride),
1317 src += 4 - (h + 5) * srcStride;
1318 dst += 4 - h * dstStride;
1323 int dstStride,
int srcStride)
1334 int dstStride,
int srcStride)
1339 int16_t *
tmp = _tmp;
1347 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1348 "dli %[tmp0], 0x09 \n\t"
1350 "uld %[low32], -0x02(%[src]) \n\t"
1351 "mtc1 %[low32], %[ftmp1] \n\t"
1352 "uld %[low32], -0x01(%[src]) \n\t"
1353 "mtc1 %[low32], %[ftmp2] \n\t"
1354 "uld %[low32], 0x00(%[src]) \n\t"
1355 "mtc1 %[low32], %[ftmp3] \n\t"
1356 "uld %[low32], 0x01(%[src]) \n\t"
1357 "mtc1 %[low32], %[ftmp4] \n\t"
1358 "uld %[low32], 0x02(%[src]) \n\t"
1359 "mtc1 %[low32], %[ftmp5] \n\t"
1360 "uld %[low32], 0x03(%[src]) \n\t"
1361 "mtc1 %[low32], %[ftmp6] \n\t"
1362 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1363 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1364 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1365 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1366 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1367 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1368 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1369 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1370 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1371 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1372 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1373 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1374 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1375 "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
1376 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1377 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1378 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1379 "bnez %[tmp0], 1b \n\t"
1380 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1381 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1382 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1383 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1384 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1386 [tmp]
"+&r"(tmp), [
src]
"+&r"(
src),
1388 : [tmpStride]
"r"(8),
1389 [srcStride]
"r"((
mips_reg)srcStride),
1396 for (i=0; i<4; i++) {
1397 const int16_t tmpB= tmp[-8];
1398 const int16_t tmpA= tmp[-4];
1399 const int16_t tmp0= tmp[ 0];
1400 const int16_t tmp1= tmp[ 4];
1401 const int16_t tmp2= tmp[ 8];
1402 const int16_t tmp3= tmp[12];
1403 const int16_t tmp4= tmp[16];
1404 const int16_t tmp5= tmp[20];
1405 const int16_t tmp6= tmp[24];
1406 op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1407 op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1408 op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1409 op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1416 const uint8_t *
src, ptrdiff_t tmpStride, ptrdiff_t srcStride,
int size)
1418 int w = (size + 8) >> 2;
1423 src -= 2 * srcStride + 2;
1427 "dli %[tmp0], 0x02 \n\t"
1428 "uld %[low32], 0x00(%[src]) \n\t"
1429 "mtc1 %[low32], %[ftmp0] \n\t"
1430 "mtc1 %[tmp0], %[ftmp10] \n\t"
1431 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1432 "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1433 "uld %[low32], 0x00(%[src]) \n\t"
1434 "mtc1 %[low32], %[ftmp1] \n\t"
1435 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1436 "uld %[low32], 0x00(%[src]) \n\t"
1437 "mtc1 %[low32], %[ftmp2] \n\t"
1438 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1439 "uld %[low32], 0x00(%[src]) \n\t"
1440 "mtc1 %[low32], %[ftmp3] \n\t"
1441 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1442 "uld %[low32], 0x00(%[src]) \n\t"
1443 "mtc1 %[low32], %[ftmp4] \n\t"
1444 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1445 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1446 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1447 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1448 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1449 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1450 "uld %[low32], 0x00(%[src]) \n\t"
1451 "mtc1 %[low32], %[ftmp5] \n\t"
1452 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1453 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1454 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1455 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1456 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1457 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1458 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1459 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1460 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1461 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1462 "sdc1 %[ftmp6], 0x00(%[tmp]) \n\t"
1463 "uld %[low32], 0x00(%[src]) \n\t"
1464 "mtc1 %[low32], %[ftmp0] \n\t"
1465 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1466 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1467 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1468 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1469 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1470 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1471 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1472 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1473 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1474 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1475 "sdc1 %[ftmp6], 0x30(%[tmp]) \n\t"
1476 "uld %[low32], 0x00(%[src]) \n\t"
1477 "mtc1 %[low32], %[ftmp1] \n\t"
1478 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1479 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1480 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1481 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1482 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1483 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1484 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1485 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1486 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1487 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1488 "sdc1 %[ftmp6], 0x60(%[tmp]) \n\t"
1489 "uld %[low32], 0x00(%[src]) \n\t"
1490 "mtc1 %[low32], %[ftmp2] \n\t"
1491 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1492 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1493 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1494 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1495 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1496 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1497 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1498 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1499 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1500 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1501 "sdc1 %[ftmp6], 0x90(%[tmp]) \n\t"
1502 "uld %[low32], 0x00(%[src]) \n\t"
1503 "mtc1 %[low32], %[ftmp3] \n\t"
1504 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1505 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1506 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1507 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1508 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1509 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1510 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1511 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1512 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1513 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1514 "sdc1 %[ftmp6], 0xc0(%[tmp]) \n\t"
1515 "uld %[low32], 0x00(%[src]) \n\t"
1516 "mtc1 %[low32], %[ftmp4] \n\t"
1517 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1518 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1519 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1520 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1521 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1522 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1523 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1524 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1525 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1526 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1527 "sdc1 %[ftmp6], 0xf0(%[tmp]) \n\t"
1528 "uld %[low32], 0x00(%[src]) \n\t"
1529 "mtc1 %[low32], %[ftmp5] \n\t"
1530 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1531 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1532 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1533 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1534 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1535 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1536 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1537 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1538 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1539 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1540 "sdc1 %[ftmp6], 0x120(%[tmp]) \n\t"
1541 "uld %[low32], 0x00(%[src]) \n\t"
1542 "mtc1 %[low32], %[ftmp0] \n\t"
1543 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1544 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1545 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1546 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1547 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1548 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1549 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1550 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1551 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1552 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1553 "sdc1 %[ftmp6], 0x150(%[tmp]) \n\t"
1554 "bne %[size], 0x10, 2f \n\t"
1556 "uld %[low32], 0x00(%[src]) \n\t"
1557 "mtc1 %[low32], %[ftmp1] \n\t"
1558 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1559 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1560 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1561 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1562 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1563 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1564 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1565 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1566 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1567 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1568 "sdc1 %[ftmp6], 0x180(%[tmp]) \n\t"
1569 "uld %[low32], 0x00(%[src]) \n\t"
1570 "mtc1 %[low32], %[ftmp2] \n\t"
1571 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1572 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1573 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1574 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1575 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1576 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1577 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1578 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1579 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1580 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1581 "sdc1 %[ftmp6], 0x1b0(%[tmp]) \n\t"
1582 "uld %[low32], 0x00(%[src]) \n\t"
1583 "mtc1 %[low32], %[ftmp3] \n\t"
1584 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1585 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1586 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1587 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1588 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1589 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1590 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1591 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1592 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1593 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1594 "sdc1 %[ftmp6], 0x1e0(%[tmp]) \n\t"
1595 "uld %[low32], 0x00(%[src]) \n\t"
1596 "mtc1 %[low32], %[ftmp4] \n\t"
1597 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1598 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1599 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1600 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1601 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1602 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1603 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1604 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1605 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1606 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1607 "sdc1 %[ftmp6], 0x210(%[tmp]) \n\t"
1608 "uld %[low32], 0x00(%[src]) \n\t"
1609 "mtc1 %[low32], %[ftmp5] \n\t"
1610 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1611 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1612 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1613 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1614 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1615 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1616 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1617 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1618 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1619 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1620 "sdc1 %[ftmp6], 0x240(%[tmp]) \n\t"
1621 "uld %[low32], 0x00(%[src]) \n\t"
1622 "mtc1 %[low32], %[ftmp0] \n\t"
1623 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1624 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1625 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1626 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1627 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1628 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1629 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1630 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1631 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1632 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1633 "sdc1 %[ftmp6], 0x270(%[tmp]) \n\t"
1634 "uld %[low32], 0x00(%[src]) \n\t"
1635 "mtc1 %[low32], %[ftmp1] \n\t"
1636 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1637 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1638 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1639 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1640 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1641 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1642 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1643 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1644 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1645 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1646 "sdc1 %[ftmp6], 0x2a0(%[tmp]) \n\t"
1647 "uld %[low32], 0x00(%[src]) \n\t"
1648 "mtc1 %[low32], %[ftmp2] \n\t"
1649 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1650 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1651 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1652 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1653 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1654 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1655 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1656 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1657 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1658 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1659 "sdc1 %[ftmp6], 0x2d0(%[tmp]) \n\t"
1661 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1662 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1663 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1664 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1665 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1666 [ftmp10]
"=&f"(ftmp[10]),
1670 : [
tmp]
"r"(
tmp), [size]
"r"(size),
1671 [srcStride]
"r"((
mips_reg)srcStride),
1677 src += 4 - (size + 5) * srcStride;
1682 int16_t *
tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride,
int size)
1692 "dli %[tmp0], 0x02 \n\t"
1693 "mtc1 %[tmp0], %[ftmp8] \n\t"
1694 "dli %[tmp0], 0x06 \n\t"
1695 "mtc1 %[tmp0], %[ftmp9] \n\t"
1697 "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
1698 "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
1699 "ldc1 %[ftmp6], 0x10(%[tmp]) \n\t"
1700 "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
1701 "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
1702 "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
1703 "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
1704 "gsldlc1 %[ftmp5], 0x19(%[tmp]) \n\t"
1705 "gsldrc1 %[ftmp5], 0x12(%[tmp]) \n\t"
1706 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1707 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1708 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1709 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1710 "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
1711 "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
1712 "gsldlc1 %[ftmp6], 0x0d(%[tmp]) \n\t"
1713 "gsldrc1 %[ftmp6], 0x06(%[tmp]) \n\t"
1714 "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
1715 "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
1716 "gsldlc1 %[ftmp7], 0x15(%[tmp]) \n\t"
1717 "gsldrc1 %[ftmp7], 0x0e(%[tmp]) \n\t"
1718 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1719 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1720 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1721 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1722 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1723 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1724 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1725 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1726 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1727 "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t"
1728 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1729 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1730 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1731 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1732 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1733 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1734 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1735 "addi %[h], %[h], -0x01 \n\t"
1736 "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t"
1737 "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t"
1739 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1740 "bnez %[h], 1b \n\t"
1741 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1742 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1743 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1744 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1745 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1747 [tmp]
"+&r"(tmp), [dst]
"+&r"(dst),
1749 : [dstStride]
"r"((
mips_reg)dstStride)
1753 tmp += 8 - size * 24;
1754 dst += 8 - size * dstStride;
1759 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1760 ptrdiff_t srcStride,
int size)
1767 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1768 ptrdiff_t srcStride)
1775 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1776 ptrdiff_t srcStride)
1783 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1791 "dli %[tmp0], 0x02 \n\t"
1792 "mtc1 %[tmp0], %[ftmp7] \n\t"
1793 "dli %[tmp0], 0x05 \n\t"
1794 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1795 "mtc1 %[tmp0], %[ftmp8] \n\t"
1797 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
1798 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
1799 "gsldlc1 %[ftmp3], 0x08(%[src]) \n\t"
1800 "gsldrc1 %[ftmp3], 0x01(%[src]) \n\t"
1801 "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
1802 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1803 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1804 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1805 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1806 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1807 "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1808 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1809 "gsldlc1 %[ftmp3], 0x06(%[src]) \n\t"
1810 "gsldrc1 %[ftmp3], -0x01(%[src]) \n\t"
1811 "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
1812 "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
1813 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1814 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1815 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1816 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1817 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1818 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1819 "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1820 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1821 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
1822 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
1823 "uld %[low32], -0x02(%[src]) \n\t"
1824 "mtc1 %[low32], %[ftmp3] \n\t"
1825 "uld %[low32], 0x07(%[src]) \n\t"
1826 "mtc1 %[low32], %[ftmp6] \n\t"
1827 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1828 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1829 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1830 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1831 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1832 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1833 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1834 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1835 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
1836 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1837 "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
1838 "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
1839 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1840 PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
1841 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1843 "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
1844 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1845 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
1846 "bgtz %[h], 1b \n\t"
1847 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1848 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1849 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1850 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1851 [ftmp8]
"=&f"(ftmp[8]),
1852 [tmp0]
"=&r"(tmp[0]),
1853 [
src]
"+&r"(
src), [dst]
"+&r"(dst),
1854 [src2]
"+&r"(src2), [h]
"+&r"(h),
1856 : [src2Stride]
"r"((
mips_reg)src2Stride),
1857 [dstStride]
"r"((
mips_reg)dstStride),
1864 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride,
int h)
1871 "dli %[tmp0], 0x05 \n\t"
1872 "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
1873 "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
1874 "mtc1 %[tmp0], %[ftmp6] \n\t"
1875 "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
1876 "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
1877 "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
1878 "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
1879 "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
1880 "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
1881 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1882 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1883 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1884 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1885 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1886 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1887 "ldc1 %[ftmp5], 0x00(%[src8]) \n\t"
1888 "gsldxc1 %[ftmp4], 0x00(%[src8], %[src8Stride]) \n\t"
1889 "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1890 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1891 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
1892 "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
1893 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1894 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1895 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1896 [ftmp6]
"=&f"(ftmp[6]),
1898 : [src8]
"r"(src8), [src16]
"r"(src16),
1900 [src8Stride]
"r"((
mips_reg)src8Stride),
1901 [dstStride]
"r"((
mips_reg)dstStride)
1905 src8 += 2 * src8Stride;
1907 dst += 2 * dstStride;
1912 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1918 src += 8 * dstStride;
1919 dst += 8 * dstStride;
1920 src2 += 8 * src2Stride;
1928 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride,
int h)
1936 int dstStride,
int srcStride)
1941 int16_t *
tmp = _tmp;
1949 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1950 "dli %[tmp0], 0x09 \n\t"
1952 "uld %[low32], -0x02(%[src]) \n\t"
1953 "mtc1 %[low32], %[ftmp1] \n\t"
1954 "uld %[low32], -0x01(%[src]) \n\t"
1955 "mtc1 %[low32], %[ftmp2] \n\t"
1956 "uld %[low32], 0x00(%[src]) \n\t"
1957 "mtc1 %[low32], %[ftmp3] \n\t"
1958 "uld %[low32], 0x01(%[src]) \n\t"
1959 "mtc1 %[low32], %[ftmp4] \n\t"
1960 "uld %[low32], 0x02(%[src]) \n\t"
1961 "mtc1 %[low32], %[ftmp5] \n\t"
1962 "uld %[low32], 0x03(%[src]) \n\t"
1963 "mtc1 %[low32], %[ftmp6] \n\t"
1964 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1965 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1966 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1967 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1968 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1969 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1970 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1971 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1972 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1973 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1974 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1975 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1976 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1977 "sdc1 %[ftmp9], 0x00(%[tmp]) \n\t"
1978 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1979 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1980 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1981 "bnez %[tmp0], 1b \n\t"
1982 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1983 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1984 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1985 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1986 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1988 [tmp]
"+&r"(tmp), [
src]
"+&r"(
src),
1990 : [tmpStride]
"r"(8),
1991 [srcStride]
"r"((
mips_reg)srcStride),
1998 for (i=0; i<4; i++) {
1999 const int16_t tmpB= tmp[-8];
2000 const int16_t tmpA= tmp[-4];
2001 const int16_t tmp0= tmp[ 0];
2002 const int16_t tmp1= tmp[ 4];
2003 const int16_t tmp2= tmp[ 8];
2004 const int16_t tmp3= tmp[12];
2005 const int16_t tmp4= tmp[16];
2006 const int16_t tmp5= tmp[20];
2007 const int16_t tmp6= tmp[24];
2008 op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
2009 op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
2010 op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
2011 op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
2018 int16_t *
tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride,
int size)
2027 "dli %[tmp0], 0x02 \n\t"
2028 "mtc1 %[tmp0], %[ftmp9] \n\t"
2029 "dli %[tmp0], 0x06 \n\t"
2030 "mtc1 %[tmp0], %[ftmp10] \n\t"
2032 "ldc1 %[ftmp0], 0x00(%[tmp]) \n\t"
2033 "ldc1 %[ftmp3], 0x08(%[tmp]) \n\t"
2034 "gsldlc1 %[ftmp1], 0x09(%[tmp]) \n\t"
2035 "gsldrc1 %[ftmp1], 0x02(%[tmp]) \n\t"
2036 "gsldlc1 %[ftmp4], 0x11(%[tmp]) \n\t"
2037 "gsldrc1 %[ftmp4], 0x0a(%[tmp]) \n\t"
2038 "ldc1 %[ftmp7], 0x10(%[tmp]) \n\t"
2039 "gsldlc1 %[ftmp8], 0x19(%[tmp]) \n\t"
2040 "gsldrc1 %[ftmp8], 0x12(%[tmp]) \n\t"
2041 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2042 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2043 "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2044 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
2045 "gsldlc1 %[ftmp2], 0x0b(%[tmp]) \n\t"
2046 "gsldrc1 %[ftmp2], 0x04(%[tmp]) \n\t"
2047 "gsldlc1 %[ftmp5], 0x13(%[tmp]) \n\t"
2048 "gsldrc1 %[ftmp5], 0x0c(%[tmp]) \n\t"
2049 "gsldlc1 %[ftmp7], 0x0d(%[tmp]) \n\t"
2050 "gsldrc1 %[ftmp7], 0x06(%[tmp]) \n\t"
2051 "gsldlc1 %[ftmp8], 0x15(%[tmp]) \n\t"
2052 "gsldrc1 %[ftmp8], 0x0e(%[tmp]) \n\t"
2053 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2054 "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2055 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2056 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2057 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
2058 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
2059 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2060 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2061 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2062 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2063 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
2064 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
2065 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2066 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2067 "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
2068 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
2069 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
2070 "ldc1 %[ftmp6], 0x00(%[dst]) \n\t"
2071 "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2072 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
2073 "addi %[h], %[h], -0x01 \n\t"
2074 PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
2075 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2076 "bnez %[h], 1b \n\t"
2077 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2078 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2079 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2080 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2081 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2082 [ftmp10]
"=&f"(ftmp[10]),
2084 [
tmp]
"+&r"(
tmp), [dst]
"+&r"(dst),
2086 : [dstStride]
"r"((
mips_reg)dstStride)
2090 tmp += 8 - size * 24;
2091 dst += 8 - size * dstStride;
2096 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2097 ptrdiff_t srcStride,
int size)
2104 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2105 ptrdiff_t srcStride)
2112 const uint8_t *
src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
2113 ptrdiff_t srcStride)
2120 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2127 "dli %[tmp1], 0x02 \n\t"
2128 "ori %[tmp0], $0, 0x8 \n\t"
2129 "mtc1 %[tmp1], %[ftmp7] \n\t"
2130 "dli %[tmp1], 0x05 \n\t"
2131 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2132 "mtc1 %[tmp1], %[ftmp8] \n\t"
2134 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
2135 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
2136 "gsldlc1 %[ftmp2], 0x08(%[src]) \n\t"
2137 "gsldrc1 %[ftmp2], 0x01(%[src]) \n\t"
2138 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
2139 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2140 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2141 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2142 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2143 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2144 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2145 "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2146 "gsldlc1 %[ftmp2], 0x06(%[src]) \n\t"
2147 "gsldrc1 %[ftmp2], -0x01(%[src]) \n\t"
2148 "gsldlc1 %[ftmp5], 0x09(%[src]) \n\t"
2149 "gsldrc1 %[ftmp5], 0x02(%[src]) \n\t"
2150 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2151 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2152 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
2153 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
2154 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2155 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
2156 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2157 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2158 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
2159 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
2160 "uld %[low32], -0x02(%[src]) \n\t"
2161 "mtc1 %[low32], %[ftmp2] \n\t"
2162 "uld %[low32], 0x07(%[src]) \n\t"
2163 "mtc1 %[low32], %[ftmp6] \n\t"
2164 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2165 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2166 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2167 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
2168 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
2169 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
2170 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2171 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2172 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
2173 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2174 "gsldlc1 %[ftmp5], 0x07(%[src2]) \n\t"
2175 "gsldrc1 %[ftmp5], 0x00(%[src2]) \n\t"
2176 "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2177 "ldc1 %[ftmp9], 0x00(%[dst]) \n\t"
2178 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2179 "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
2180 PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
2181 "sdc1 %[ftmp1], 0x00(%[dst]) \n\t"
2182 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
2183 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2184 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
2185 "bgtz %[tmp0], 1b \n\t"
2186 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2187 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2188 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2189 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2190 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
2191 [tmp0]
"=&r"(tmp[0]), [tmp1]
"=&r"(tmp[1]),
2192 [dst]
"+&r"(dst), [src]
"+&r"(src),
2195 : [dstStride]
"r"((
mips_reg)dstStride),
2196 [src2Stride]
"r"((
mips_reg)src2Stride),
2203 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2209 src += 8 * dstStride;
2210 dst += 8 * dstStride;
2211 src2 += 8 * src2Stride;
2219 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride,
int b)
2226 "dli %[tmp0], 0x05 \n\t"
2227 "gsldlc1 %[ftmp0], 0x07(%[src16]) \n\t"
2228 "gsldrc1 %[ftmp0], 0x00(%[src16]) \n\t"
2229 "mtc1 %[tmp0], %[ftmp6] \n\t"
2230 "gsldlc1 %[ftmp1], 0x0f(%[src16]) \n\t"
2231 "gsldrc1 %[ftmp1], 0x08(%[src16]) \n\t"
2232 "gsldlc1 %[ftmp2], 0x37(%[src16]) \n\t"
2233 "gsldrc1 %[ftmp2], 0x30(%[src16]) \n\t"
2234 "gsldlc1 %[ftmp3], 0x3f(%[src16]) \n\t"
2235 "gsldrc1 %[ftmp3], 0x38(%[src16]) \n\t"
2236 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2237 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2238 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2239 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2240 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2241 "ldc1 %[ftmp4], 0x00(%[src8]) \n\t"
2242 "gsldxc1 %[ftmp5], 0x00(%[src8], %[src8Stride]) \n\t"
2243 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2244 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2245 "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2246 "ldc1 %[ftmp7], 0x00(%[dst]) \n\t"
2247 "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
2248 "sdc1 %[ftmp0], 0x00(%[dst]) \n\t"
2249 "gsldxc1 %[ftmp7], 0x00(%[dst], %[dstStride]) \n\t"
2250 "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2251 "gssdxc1 %[ftmp2], 0x00(%[dst], %[dstStride]) \n\t"
2252 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2253 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2254 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2255 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2257 : [src8]
"r"(src8), [src16]
"r"(src16),
2259 [src8Stride]
"r"((
mips_reg)src8Stride),
2260 [dstStride]
"r"((
mips_reg)dstStride)
2264 src8 += 2 * src8Stride;
2266 dst += 2 * dstStride;
2271 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride,
int b)
2311 uint8_t *
const full_mid= full + 8;
2322 uint8_t *
const full_mid= full + 8;
2331 uint8_t *
const full_mid= full + 8;
2342 uint8_t *
const full_mid= full + 8;
2355 uint8_t *
const full_mid= full + 8;
2368 uint8_t *
const full_mid= full + 8;
2381 uint8_t *
const full_mid= full + 8;
2420 uint8_t *
const full_mid= full + 8;
2433 uint8_t *
const full_mid= full + 8;
2475 uint8_t *
const full_mid= full + 8;
2486 uint8_t *
const full_mid= full + 8;
2495 uint8_t *
const full_mid= full + 8;
2506 uint8_t *
const full_mid= full + 8;
2519 uint8_t *
const full_mid= full + 8;
2532 uint8_t *
const full_mid= full + 8;
2545 uint8_t *
const full_mid= full + 8;
2584 uint8_t *
const full_mid= full + 8;
2597 uint8_t *
const full_mid= full + 8;
2639 uint8_t *
const full_mid= full + 16;
2650 uint8_t *
const full_mid= full + 16;
2659 uint8_t *
const full_mid= full + 16;
2670 uint8_t *
const full_mid= full + 16;
2683 uint8_t *
const full_mid= full + 16;
2696 uint8_t *
const full_mid= full + 16;
2709 uint8_t *
const full_mid= full + 16;
2721 uint16_t __attribute__ ((aligned(8)))
temp[192];
2731 int16_t *
const halfV = (int16_t *) (temp + 64);
2742 int16_t *
const halfV = (int16_t *) (temp + 64);
2753 int16_t *
const halfV = (int16_t *) (temp + 64);
2764 int16_t *
const halfV = (int16_t *) (temp + 64);
2803 uint8_t *
const full_mid= full + 16;
2814 uint8_t *
const full_mid= full + 16;
2823 uint8_t *
const full_mid= full + 16;
2834 uint8_t *
const full_mid= full + 16;
2847 uint8_t *
const full_mid= full + 16;
2860 uint8_t *
const full_mid= full + 16;
2873 uint8_t *
const full_mid= full + 16;
2885 uint16_t __attribute__ ((aligned(8)))
temp[192];
2895 int16_t *
const halfV = (int16_t *) (temp + 64);
2906 int16_t *
const halfV = (int16_t *) (temp + 64);
2917 int16_t *
const halfV = (int16_t *) (temp + 64);
2928 int16_t *
const halfV = (int16_t *) (temp + 64);
2967 uint8_t *
const full_mid= full + 32;
2978 uint8_t *
const full_mid= full + 32;
2987 uint8_t *
const full_mid= full + 32;
2998 uint8_t *
const full_mid= full + 32;
3011 uint8_t *
const full_mid= full + 32;
3024 uint8_t *
const full_mid= full + 32;
3037 uint8_t *
const full_mid= full + 32;
3049 uint16_t __attribute__ ((aligned(8)))
temp[384];
3059 int16_t *
const halfV = (int16_t *) (temp + 256);
3070 int16_t *
const halfV = (int16_t *) (temp + 256);
3081 int16_t *
const halfV = (int16_t *) (temp + 256);
3092 int16_t *
const halfV = (int16_t *) (temp + 256);
3131 uint8_t *
const full_mid= full + 32;
3142 uint8_t *
const full_mid= full + 32;
3151 uint8_t *
const full_mid= full + 32;
3162 uint8_t *
const full_mid= full + 32;
3175 uint8_t *
const full_mid= full + 32;
3188 uint8_t *
const full_mid= full + 32;
3201 uint8_t *
const full_mid= full + 32;
3213 uint16_t __attribute__ ((aligned(8)))
temp[384];
3223 int16_t *
const halfV = (int16_t *) (temp + 256);
3234 int16_t *
const halfV = (int16_t *) (temp + 256);
3245 int16_t *
const halfV = (int16_t *) (temp + 256);
3256 int16_t *
const halfV = (int16_t *) (temp + 256);
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp, const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
GLint GLenum GLboolean GLsizei stride
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)