00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavcodec/dsputil.h"
00023 #include "libavcodec/simple_idct.h"
00024 #include "libavutil/mem.h"
00025 #include "dsputil_mmx.h"
00026
00027 #if HAVE_INLINE_ASM
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00040 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00041 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00042 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00043 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
00044 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00045 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00046 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00047
00048 #define ROW_SHIFT 11
00049 #define COL_SHIFT 20 // 6
00050
00051 DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
00052 DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
00053
00054 DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
00055 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
00056
00057
00058 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
00059
00060
00061
00062
00063 C4, C4, C4, C4,
00064 C4, -C4, C4, -C4,
00065
00066 C2, C6, C2, C6,
00067 C6, -C2, C6, -C2,
00068
00069 C1, C3, C1, C3,
00070 C5, C7, C5, C7,
00071
00072 C3, -C7, C3, -C7,
00073 -C1, -C5, -C1, -C5,
00074
00075 C5, -C1, C5, -C1,
00076 C7, C3, C7, C3,
00077
00078 C7, -C5, C7, -C5,
00079 C3, -C1, C3, -C1
00080 };
00081
00082 static inline void idct(int16_t *block)
00083 {
00084 DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
00085 int16_t * const temp= (int16_t*)align_tmp;
00086
00087 __asm__ volatile(
00088 #if 0 //Alternative, simpler variant
00089
00090 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00091 "movq " #src0 ", %%mm0 \n\t" \
00092 "movq " #src4 ", %%mm1 \n\t" \
00093 "movq " #src1 ", %%mm2 \n\t" \
00094 "movq " #src5 ", %%mm3 \n\t" \
00095 "movq 16(%2), %%mm4 \n\t" \
00096 "pmaddwd %%mm0, %%mm4 \n\t" \
00097 "movq 24(%2), %%mm5 \n\t" \
00098 "pmaddwd %%mm5, %%mm0 \n\t" \
00099 "movq 32(%2), %%mm5 \n\t" \
00100 "pmaddwd %%mm1, %%mm5 \n\t" \
00101 "movq 40(%2), %%mm6 \n\t" \
00102 "pmaddwd %%mm6, %%mm1 \n\t" \
00103 "movq 48(%2), %%mm7 \n\t" \
00104 "pmaddwd %%mm2, %%mm7 \n\t" \
00105 #rounder ", %%mm4 \n\t"\
00106 "movq %%mm4, %%mm6 \n\t" \
00107 "paddd %%mm5, %%mm4 \n\t" \
00108 "psubd %%mm5, %%mm6 \n\t" \
00109 "movq 56(%2), %%mm5 \n\t" \
00110 "pmaddwd %%mm3, %%mm5 \n\t" \
00111 #rounder ", %%mm0 \n\t"\
00112 "paddd %%mm0, %%mm1 \n\t" \
00113 "paddd %%mm0, %%mm0 \n\t" \
00114 "psubd %%mm1, %%mm0 \n\t" \
00115 "pmaddwd 64(%2), %%mm2 \n\t" \
00116 "paddd %%mm5, %%mm7 \n\t" \
00117 "movq 72(%2), %%mm5 \n\t" \
00118 "pmaddwd %%mm3, %%mm5 \n\t" \
00119 "paddd %%mm4, %%mm7 \n\t" \
00120 "paddd %%mm4, %%mm4 \n\t" \
00121 "psubd %%mm7, %%mm4 \n\t" \
00122 "paddd %%mm2, %%mm5 \n\t" \
00123 "psrad $" #shift ", %%mm7 \n\t"\
00124 "psrad $" #shift ", %%mm4 \n\t"\
00125 "movq %%mm1, %%mm2 \n\t" \
00126 "paddd %%mm5, %%mm1 \n\t" \
00127 "psubd %%mm5, %%mm2 \n\t" \
00128 "psrad $" #shift ", %%mm1 \n\t"\
00129 "psrad $" #shift ", %%mm2 \n\t"\
00130 "packssdw %%mm1, %%mm7 \n\t" \
00131 "packssdw %%mm4, %%mm2 \n\t" \
00132 "movq %%mm7, " #dst " \n\t"\
00133 "movq " #src1 ", %%mm1 \n\t" \
00134 "movq 80(%2), %%mm4 \n\t" \
00135 "movq %%mm2, 24+" #dst " \n\t"\
00136 "pmaddwd %%mm1, %%mm4 \n\t" \
00137 "movq 88(%2), %%mm7 \n\t" \
00138 "pmaddwd 96(%2), %%mm1 \n\t" \
00139 "pmaddwd %%mm3, %%mm7 \n\t" \
00140 "movq %%mm0, %%mm2 \n\t" \
00141 "pmaddwd 104(%2), %%mm3 \n\t" \
00142 "paddd %%mm7, %%mm4 \n\t" \
00143 "paddd %%mm4, %%mm2 \n\t" \
00144 "psubd %%mm4, %%mm0 \n\t" \
00145 "psrad $" #shift ", %%mm2 \n\t"\
00146 "psrad $" #shift ", %%mm0 \n\t"\
00147 "movq %%mm6, %%mm4 \n\t" \
00148 "paddd %%mm1, %%mm3 \n\t" \
00149 "paddd %%mm3, %%mm6 \n\t" \
00150 "psubd %%mm3, %%mm4 \n\t" \
00151 "psrad $" #shift ", %%mm6 \n\t"\
00152 "packssdw %%mm6, %%mm2 \n\t" \
00153 "movq %%mm2, 8+" #dst " \n\t"\
00154 "psrad $" #shift ", %%mm4 \n\t"\
00155 "packssdw %%mm0, %%mm4 \n\t" \
00156 "movq %%mm4, 16+" #dst " \n\t"\
00157
00158 #define COL_IDCT(src0, src4, src1, src5, dst, shift) \
00159 "movq " #src0 ", %%mm0 \n\t" \
00160 "movq " #src4 ", %%mm1 \n\t" \
00161 "movq " #src1 ", %%mm2 \n\t" \
00162 "movq " #src5 ", %%mm3 \n\t" \
00163 "movq 16(%2), %%mm4 \n\t" \
00164 "pmaddwd %%mm0, %%mm4 \n\t" \
00165 "movq 24(%2), %%mm5 \n\t" \
00166 "pmaddwd %%mm5, %%mm0 \n\t" \
00167 "movq 32(%2), %%mm5 \n\t" \
00168 "pmaddwd %%mm1, %%mm5 \n\t" \
00169 "movq 40(%2), %%mm6 \n\t" \
00170 "pmaddwd %%mm6, %%mm1 \n\t" \
00171 "movq %%mm4, %%mm6 \n\t" \
00172 "movq 48(%2), %%mm7 \n\t" \
00173 "pmaddwd %%mm2, %%mm7 \n\t" \
00174 "paddd %%mm5, %%mm4 \n\t" \
00175 "psubd %%mm5, %%mm6 \n\t" \
00176 "movq %%mm0, %%mm5 \n\t" \
00177 "paddd %%mm1, %%mm0 \n\t" \
00178 "psubd %%mm1, %%mm5 \n\t" \
00179 "movq 56(%2), %%mm1 \n\t" \
00180 "pmaddwd %%mm3, %%mm1 \n\t" \
00181 "pmaddwd 64(%2), %%mm2 \n\t" \
00182 "paddd %%mm1, %%mm7 \n\t" \
00183 "movq 72(%2), %%mm1 \n\t" \
00184 "pmaddwd %%mm3, %%mm1 \n\t" \
00185 "paddd %%mm4, %%mm7 \n\t" \
00186 "paddd %%mm4, %%mm4 \n\t" \
00187 "psubd %%mm7, %%mm4 \n\t" \
00188 "paddd %%mm2, %%mm1 \n\t" \
00189 "psrad $" #shift ", %%mm7 \n\t"\
00190 "psrad $" #shift ", %%mm4 \n\t"\
00191 "movq %%mm0, %%mm2 \n\t" \
00192 "paddd %%mm1, %%mm0 \n\t" \
00193 "psubd %%mm1, %%mm2 \n\t" \
00194 "psrad $" #shift ", %%mm0 \n\t"\
00195 "psrad $" #shift ", %%mm2 \n\t"\
00196 "packssdw %%mm7, %%mm7 \n\t" \
00197 "movd %%mm7, " #dst " \n\t"\
00198 "packssdw %%mm0, %%mm0 \n\t" \
00199 "movd %%mm0, 16+" #dst " \n\t"\
00200 "packssdw %%mm2, %%mm2 \n\t" \
00201 "movd %%mm2, 96+" #dst " \n\t"\
00202 "packssdw %%mm4, %%mm4 \n\t" \
00203 "movd %%mm4, 112+" #dst " \n\t"\
00204 "movq " #src1 ", %%mm0 \n\t" \
00205 "movq 80(%2), %%mm4 \n\t" \
00206 "pmaddwd %%mm0, %%mm4 \n\t" \
00207 "movq 88(%2), %%mm7 \n\t" \
00208 "pmaddwd 96(%2), %%mm0 \n\t" \
00209 "pmaddwd %%mm3, %%mm7 \n\t" \
00210 "movq %%mm5, %%mm2 \n\t" \
00211 "pmaddwd 104(%2), %%mm3 \n\t" \
00212 "paddd %%mm7, %%mm4 \n\t" \
00213 "paddd %%mm4, %%mm2 \n\t" \
00214 "psubd %%mm4, %%mm5 \n\t" \
00215 "psrad $" #shift ", %%mm2 \n\t"\
00216 "psrad $" #shift ", %%mm5 \n\t"\
00217 "movq %%mm6, %%mm4 \n\t" \
00218 "paddd %%mm0, %%mm3 \n\t" \
00219 "paddd %%mm3, %%mm6 \n\t" \
00220 "psubd %%mm3, %%mm4 \n\t" \
00221 "psrad $" #shift ", %%mm6 \n\t"\
00222 "psrad $" #shift ", %%mm4 \n\t"\
00223 "packssdw %%mm2, %%mm2 \n\t" \
00224 "packssdw %%mm6, %%mm6 \n\t" \
00225 "movd %%mm2, 32+" #dst " \n\t"\
00226 "packssdw %%mm4, %%mm4 \n\t" \
00227 "packssdw %%mm5, %%mm5 \n\t" \
00228 "movd %%mm6, 48+" #dst " \n\t"\
00229 "movd %%mm4, 64+" #dst " \n\t"\
00230 "movd %%mm5, 80+" #dst " \n\t"\
00231
00232
00233 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00234 "movq " #src0 ", %%mm0 \n\t" \
00235 "movq " #src4 ", %%mm1 \n\t" \
00236 "movq " #src1 ", %%mm2 \n\t" \
00237 "movq " #src5 ", %%mm3 \n\t" \
00238 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00239 "pand %%mm0, %%mm4 \n\t"\
00240 "por %%mm1, %%mm4 \n\t"\
00241 "por %%mm2, %%mm4 \n\t"\
00242 "por %%mm3, %%mm4 \n\t"\
00243 "packssdw %%mm4,%%mm4 \n\t"\
00244 "movd %%mm4, %%eax \n\t"\
00245 "orl %%eax, %%eax \n\t"\
00246 "jz 1f \n\t"\
00247 "movq 16(%2), %%mm4 \n\t" \
00248 "pmaddwd %%mm0, %%mm4 \n\t" \
00249 "movq 24(%2), %%mm5 \n\t" \
00250 "pmaddwd %%mm5, %%mm0 \n\t" \
00251 "movq 32(%2), %%mm5 \n\t" \
00252 "pmaddwd %%mm1, %%mm5 \n\t" \
00253 "movq 40(%2), %%mm6 \n\t" \
00254 "pmaddwd %%mm6, %%mm1 \n\t" \
00255 "movq 48(%2), %%mm7 \n\t" \
00256 "pmaddwd %%mm2, %%mm7 \n\t" \
00257 #rounder ", %%mm4 \n\t"\
00258 "movq %%mm4, %%mm6 \n\t" \
00259 "paddd %%mm5, %%mm4 \n\t" \
00260 "psubd %%mm5, %%mm6 \n\t" \
00261 "movq 56(%2), %%mm5 \n\t" \
00262 "pmaddwd %%mm3, %%mm5 \n\t" \
00263 #rounder ", %%mm0 \n\t"\
00264 "paddd %%mm0, %%mm1 \n\t" \
00265 "paddd %%mm0, %%mm0 \n\t" \
00266 "psubd %%mm1, %%mm0 \n\t" \
00267 "pmaddwd 64(%2), %%mm2 \n\t" \
00268 "paddd %%mm5, %%mm7 \n\t" \
00269 "movq 72(%2), %%mm5 \n\t" \
00270 "pmaddwd %%mm3, %%mm5 \n\t" \
00271 "paddd %%mm4, %%mm7 \n\t" \
00272 "paddd %%mm4, %%mm4 \n\t" \
00273 "psubd %%mm7, %%mm4 \n\t" \
00274 "paddd %%mm2, %%mm5 \n\t" \
00275 "psrad $" #shift ", %%mm7 \n\t"\
00276 "psrad $" #shift ", %%mm4 \n\t"\
00277 "movq %%mm1, %%mm2 \n\t" \
00278 "paddd %%mm5, %%mm1 \n\t" \
00279 "psubd %%mm5, %%mm2 \n\t" \
00280 "psrad $" #shift ", %%mm1 \n\t"\
00281 "psrad $" #shift ", %%mm2 \n\t"\
00282 "packssdw %%mm1, %%mm7 \n\t" \
00283 "packssdw %%mm4, %%mm2 \n\t" \
00284 "movq %%mm7, " #dst " \n\t"\
00285 "movq " #src1 ", %%mm1 \n\t" \
00286 "movq 80(%2), %%mm4 \n\t" \
00287 "movq %%mm2, 24+" #dst " \n\t"\
00288 "pmaddwd %%mm1, %%mm4 \n\t" \
00289 "movq 88(%2), %%mm7 \n\t" \
00290 "pmaddwd 96(%2), %%mm1 \n\t" \
00291 "pmaddwd %%mm3, %%mm7 \n\t" \
00292 "movq %%mm0, %%mm2 \n\t" \
00293 "pmaddwd 104(%2), %%mm3 \n\t" \
00294 "paddd %%mm7, %%mm4 \n\t" \
00295 "paddd %%mm4, %%mm2 \n\t" \
00296 "psubd %%mm4, %%mm0 \n\t" \
00297 "psrad $" #shift ", %%mm2 \n\t"\
00298 "psrad $" #shift ", %%mm0 \n\t"\
00299 "movq %%mm6, %%mm4 \n\t" \
00300 "paddd %%mm1, %%mm3 \n\t" \
00301 "paddd %%mm3, %%mm6 \n\t" \
00302 "psubd %%mm3, %%mm4 \n\t" \
00303 "psrad $" #shift ", %%mm6 \n\t"\
00304 "packssdw %%mm6, %%mm2 \n\t" \
00305 "movq %%mm2, 8+" #dst " \n\t"\
00306 "psrad $" #shift ", %%mm4 \n\t"\
00307 "packssdw %%mm0, %%mm4 \n\t" \
00308 "movq %%mm4, 16+" #dst " \n\t"\
00309 "jmp 2f \n\t"\
00310 "1: \n\t"\
00311 "pslld $16, %%mm0 \n\t"\
00312 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
00313 "psrad $13, %%mm0 \n\t"\
00314 "packssdw %%mm0, %%mm0 \n\t"\
00315 "movq %%mm0, " #dst " \n\t"\
00316 "movq %%mm0, 8+" #dst " \n\t"\
00317 "movq %%mm0, 16+" #dst " \n\t"\
00318 "movq %%mm0, 24+" #dst " \n\t"\
00319 "2: \n\t"
00320
00321
00322
00323 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00324
00325
00326
00327
00328 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
00329 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
00330 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
00331
00332
00333
00334 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00335 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00336 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00337 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00338
00339 #else
00340
00341 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00342 "movq " #src0 ", %%mm0 \n\t" \
00343 "movq " #src4 ", %%mm1 \n\t" \
00344 "movq " #src1 ", %%mm2 \n\t" \
00345 "movq " #src5 ", %%mm3 \n\t" \
00346 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00347 "pand %%mm0, %%mm4 \n\t"\
00348 "por %%mm1, %%mm4 \n\t"\
00349 "por %%mm2, %%mm4 \n\t"\
00350 "por %%mm3, %%mm4 \n\t"\
00351 "packssdw %%mm4,%%mm4 \n\t"\
00352 "movd %%mm4, %%eax \n\t"\
00353 "orl %%eax, %%eax \n\t"\
00354 "jz 1f \n\t"\
00355 "movq 16(%2), %%mm4 \n\t" \
00356 "pmaddwd %%mm0, %%mm4 \n\t" \
00357 "movq 24(%2), %%mm5 \n\t" \
00358 "pmaddwd %%mm5, %%mm0 \n\t" \
00359 "movq 32(%2), %%mm5 \n\t" \
00360 "pmaddwd %%mm1, %%mm5 \n\t" \
00361 "movq 40(%2), %%mm6 \n\t" \
00362 "pmaddwd %%mm6, %%mm1 \n\t" \
00363 "movq 48(%2), %%mm7 \n\t" \
00364 "pmaddwd %%mm2, %%mm7 \n\t" \
00365 #rounder ", %%mm4 \n\t"\
00366 "movq %%mm4, %%mm6 \n\t" \
00367 "paddd %%mm5, %%mm4 \n\t" \
00368 "psubd %%mm5, %%mm6 \n\t" \
00369 "movq 56(%2), %%mm5 \n\t" \
00370 "pmaddwd %%mm3, %%mm5 \n\t" \
00371 #rounder ", %%mm0 \n\t"\
00372 "paddd %%mm0, %%mm1 \n\t" \
00373 "paddd %%mm0, %%mm0 \n\t" \
00374 "psubd %%mm1, %%mm0 \n\t" \
00375 "pmaddwd 64(%2), %%mm2 \n\t" \
00376 "paddd %%mm5, %%mm7 \n\t" \
00377 "movq 72(%2), %%mm5 \n\t" \
00378 "pmaddwd %%mm3, %%mm5 \n\t" \
00379 "paddd %%mm4, %%mm7 \n\t" \
00380 "paddd %%mm4, %%mm4 \n\t" \
00381 "psubd %%mm7, %%mm4 \n\t" \
00382 "paddd %%mm2, %%mm5 \n\t" \
00383 "psrad $" #shift ", %%mm7 \n\t"\
00384 "psrad $" #shift ", %%mm4 \n\t"\
00385 "movq %%mm1, %%mm2 \n\t" \
00386 "paddd %%mm5, %%mm1 \n\t" \
00387 "psubd %%mm5, %%mm2 \n\t" \
00388 "psrad $" #shift ", %%mm1 \n\t"\
00389 "psrad $" #shift ", %%mm2 \n\t"\
00390 "packssdw %%mm1, %%mm7 \n\t" \
00391 "packssdw %%mm4, %%mm2 \n\t" \
00392 "movq %%mm7, " #dst " \n\t"\
00393 "movq " #src1 ", %%mm1 \n\t" \
00394 "movq 80(%2), %%mm4 \n\t" \
00395 "movq %%mm2, 24+" #dst " \n\t"\
00396 "pmaddwd %%mm1, %%mm4 \n\t" \
00397 "movq 88(%2), %%mm7 \n\t" \
00398 "pmaddwd 96(%2), %%mm1 \n\t" \
00399 "pmaddwd %%mm3, %%mm7 \n\t" \
00400 "movq %%mm0, %%mm2 \n\t" \
00401 "pmaddwd 104(%2), %%mm3 \n\t" \
00402 "paddd %%mm7, %%mm4 \n\t" \
00403 "paddd %%mm4, %%mm2 \n\t" \
00404 "psubd %%mm4, %%mm0 \n\t" \
00405 "psrad $" #shift ", %%mm2 \n\t"\
00406 "psrad $" #shift ", %%mm0 \n\t"\
00407 "movq %%mm6, %%mm4 \n\t" \
00408 "paddd %%mm1, %%mm3 \n\t" \
00409 "paddd %%mm3, %%mm6 \n\t" \
00410 "psubd %%mm3, %%mm4 \n\t" \
00411 "psrad $" #shift ", %%mm6 \n\t"\
00412 "packssdw %%mm6, %%mm2 \n\t" \
00413 "movq %%mm2, 8+" #dst " \n\t"\
00414 "psrad $" #shift ", %%mm4 \n\t"\
00415 "packssdw %%mm0, %%mm4 \n\t" \
00416 "movq %%mm4, 16+" #dst " \n\t"\
00417 "jmp 2f \n\t"\
00418 "1: \n\t"\
00419 "pslld $16, %%mm0 \n\t"\
00420 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
00421 "psrad $13, %%mm0 \n\t"\
00422 "packssdw %%mm0, %%mm0 \n\t"\
00423 "movq %%mm0, " #dst " \n\t"\
00424 "movq %%mm0, 8+" #dst " \n\t"\
00425 "movq %%mm0, 16+" #dst " \n\t"\
00426 "movq %%mm0, 24+" #dst " \n\t"\
00427 "2: \n\t"
00428
00429 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
00430 "movq " #src0 ", %%mm0 \n\t" \
00431 "movq " #src4 ", %%mm1 \n\t" \
00432 "movq " #src1 ", %%mm2 \n\t" \
00433 "movq " #src5 ", %%mm3 \n\t" \
00434 "movq %%mm0, %%mm4 \n\t"\
00435 "por %%mm1, %%mm4 \n\t"\
00436 "por %%mm2, %%mm4 \n\t"\
00437 "por %%mm3, %%mm4 \n\t"\
00438 "packssdw %%mm4,%%mm4 \n\t"\
00439 "movd %%mm4, %%eax \n\t"\
00440 "orl %%eax, %%eax \n\t"\
00441 "jz " #bt " \n\t"\
00442 "movq 16(%2), %%mm4 \n\t" \
00443 "pmaddwd %%mm0, %%mm4 \n\t" \
00444 "movq 24(%2), %%mm5 \n\t" \
00445 "pmaddwd %%mm5, %%mm0 \n\t" \
00446 "movq 32(%2), %%mm5 \n\t" \
00447 "pmaddwd %%mm1, %%mm5 \n\t" \
00448 "movq 40(%2), %%mm6 \n\t" \
00449 "pmaddwd %%mm6, %%mm1 \n\t" \
00450 "movq 48(%2), %%mm7 \n\t" \
00451 "pmaddwd %%mm2, %%mm7 \n\t" \
00452 #rounder ", %%mm4 \n\t"\
00453 "movq %%mm4, %%mm6 \n\t" \
00454 "paddd %%mm5, %%mm4 \n\t" \
00455 "psubd %%mm5, %%mm6 \n\t" \
00456 "movq 56(%2), %%mm5 \n\t" \
00457 "pmaddwd %%mm3, %%mm5 \n\t" \
00458 #rounder ", %%mm0 \n\t"\
00459 "paddd %%mm0, %%mm1 \n\t" \
00460 "paddd %%mm0, %%mm0 \n\t" \
00461 "psubd %%mm1, %%mm0 \n\t" \
00462 "pmaddwd 64(%2), %%mm2 \n\t" \
00463 "paddd %%mm5, %%mm7 \n\t" \
00464 "movq 72(%2), %%mm5 \n\t" \
00465 "pmaddwd %%mm3, %%mm5 \n\t" \
00466 "paddd %%mm4, %%mm7 \n\t" \
00467 "paddd %%mm4, %%mm4 \n\t" \
00468 "psubd %%mm7, %%mm4 \n\t" \
00469 "paddd %%mm2, %%mm5 \n\t" \
00470 "psrad $" #shift ", %%mm7 \n\t"\
00471 "psrad $" #shift ", %%mm4 \n\t"\
00472 "movq %%mm1, %%mm2 \n\t" \
00473 "paddd %%mm5, %%mm1 \n\t" \
00474 "psubd %%mm5, %%mm2 \n\t" \
00475 "psrad $" #shift ", %%mm1 \n\t"\
00476 "psrad $" #shift ", %%mm2 \n\t"\
00477 "packssdw %%mm1, %%mm7 \n\t" \
00478 "packssdw %%mm4, %%mm2 \n\t" \
00479 "movq %%mm7, " #dst " \n\t"\
00480 "movq " #src1 ", %%mm1 \n\t" \
00481 "movq 80(%2), %%mm4 \n\t" \
00482 "movq %%mm2, 24+" #dst " \n\t"\
00483 "pmaddwd %%mm1, %%mm4 \n\t" \
00484 "movq 88(%2), %%mm7 \n\t" \
00485 "pmaddwd 96(%2), %%mm1 \n\t" \
00486 "pmaddwd %%mm3, %%mm7 \n\t" \
00487 "movq %%mm0, %%mm2 \n\t" \
00488 "pmaddwd 104(%2), %%mm3 \n\t" \
00489 "paddd %%mm7, %%mm4 \n\t" \
00490 "paddd %%mm4, %%mm2 \n\t" \
00491 "psubd %%mm4, %%mm0 \n\t" \
00492 "psrad $" #shift ", %%mm2 \n\t"\
00493 "psrad $" #shift ", %%mm0 \n\t"\
00494 "movq %%mm6, %%mm4 \n\t" \
00495 "paddd %%mm1, %%mm3 \n\t" \
00496 "paddd %%mm3, %%mm6 \n\t" \
00497 "psubd %%mm3, %%mm4 \n\t" \
00498 "psrad $" #shift ", %%mm6 \n\t"\
00499 "packssdw %%mm6, %%mm2 \n\t" \
00500 "movq %%mm2, 8+" #dst " \n\t"\
00501 "psrad $" #shift ", %%mm4 \n\t"\
00502 "packssdw %%mm0, %%mm4 \n\t" \
00503 "movq %%mm4, 16+" #dst " \n\t"\
00504
00505 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00506 "movq " #src0 ", %%mm0 \n\t" \
00507 "movq " #src4 ", %%mm1 \n\t" \
00508 "movq " #src1 ", %%mm2 \n\t" \
00509 "movq " #src5 ", %%mm3 \n\t" \
00510 "movq 16(%2), %%mm4 \n\t" \
00511 "pmaddwd %%mm0, %%mm4 \n\t" \
00512 "movq 24(%2), %%mm5 \n\t" \
00513 "pmaddwd %%mm5, %%mm0 \n\t" \
00514 "movq 32(%2), %%mm5 \n\t" \
00515 "pmaddwd %%mm1, %%mm5 \n\t" \
00516 "movq 40(%2), %%mm6 \n\t" \
00517 "pmaddwd %%mm6, %%mm1 \n\t" \
00518 "movq 48(%2), %%mm7 \n\t" \
00519 "pmaddwd %%mm2, %%mm7 \n\t" \
00520 #rounder ", %%mm4 \n\t"\
00521 "movq %%mm4, %%mm6 \n\t" \
00522 "paddd %%mm5, %%mm4 \n\t" \
00523 "psubd %%mm5, %%mm6 \n\t" \
00524 "movq 56(%2), %%mm5 \n\t" \
00525 "pmaddwd %%mm3, %%mm5 \n\t" \
00526 #rounder ", %%mm0 \n\t"\
00527 "paddd %%mm0, %%mm1 \n\t" \
00528 "paddd %%mm0, %%mm0 \n\t" \
00529 "psubd %%mm1, %%mm0 \n\t" \
00530 "pmaddwd 64(%2), %%mm2 \n\t" \
00531 "paddd %%mm5, %%mm7 \n\t" \
00532 "movq 72(%2), %%mm5 \n\t" \
00533 "pmaddwd %%mm3, %%mm5 \n\t" \
00534 "paddd %%mm4, %%mm7 \n\t" \
00535 "paddd %%mm4, %%mm4 \n\t" \
00536 "psubd %%mm7, %%mm4 \n\t" \
00537 "paddd %%mm2, %%mm5 \n\t" \
00538 "psrad $" #shift ", %%mm7 \n\t"\
00539 "psrad $" #shift ", %%mm4 \n\t"\
00540 "movq %%mm1, %%mm2 \n\t" \
00541 "paddd %%mm5, %%mm1 \n\t" \
00542 "psubd %%mm5, %%mm2 \n\t" \
00543 "psrad $" #shift ", %%mm1 \n\t"\
00544 "psrad $" #shift ", %%mm2 \n\t"\
00545 "packssdw %%mm1, %%mm7 \n\t" \
00546 "packssdw %%mm4, %%mm2 \n\t" \
00547 "movq %%mm7, " #dst " \n\t"\
00548 "movq " #src1 ", %%mm1 \n\t" \
00549 "movq 80(%2), %%mm4 \n\t" \
00550 "movq %%mm2, 24+" #dst " \n\t"\
00551 "pmaddwd %%mm1, %%mm4 \n\t" \
00552 "movq 88(%2), %%mm7 \n\t" \
00553 "pmaddwd 96(%2), %%mm1 \n\t" \
00554 "pmaddwd %%mm3, %%mm7 \n\t" \
00555 "movq %%mm0, %%mm2 \n\t" \
00556 "pmaddwd 104(%2), %%mm3 \n\t" \
00557 "paddd %%mm7, %%mm4 \n\t" \
00558 "paddd %%mm4, %%mm2 \n\t" \
00559 "psubd %%mm4, %%mm0 \n\t" \
00560 "psrad $" #shift ", %%mm2 \n\t"\
00561 "psrad $" #shift ", %%mm0 \n\t"\
00562 "movq %%mm6, %%mm4 \n\t" \
00563 "paddd %%mm1, %%mm3 \n\t" \
00564 "paddd %%mm3, %%mm6 \n\t" \
00565 "psubd %%mm3, %%mm4 \n\t" \
00566 "psrad $" #shift ", %%mm6 \n\t"\
00567 "packssdw %%mm6, %%mm2 \n\t" \
00568 "movq %%mm2, 8+" #dst " \n\t"\
00569 "psrad $" #shift ", %%mm4 \n\t"\
00570 "packssdw %%mm0, %%mm4 \n\t" \
00571 "movq %%mm4, 16+" #dst " \n\t"\
00572
00573
00574 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00575 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
00576 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
00577 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
00578
00579 #undef IDCT
00580 #define IDCT(src0, src4, src1, src5, dst, shift) \
00581 "movq " #src0 ", %%mm0 \n\t" \
00582 "movq " #src4 ", %%mm1 \n\t" \
00583 "movq " #src1 ", %%mm2 \n\t" \
00584 "movq " #src5 ", %%mm3 \n\t" \
00585 "movq 16(%2), %%mm4 \n\t" \
00586 "pmaddwd %%mm0, %%mm4 \n\t" \
00587 "movq 24(%2), %%mm5 \n\t" \
00588 "pmaddwd %%mm5, %%mm0 \n\t" \
00589 "movq 32(%2), %%mm5 \n\t" \
00590 "pmaddwd %%mm1, %%mm5 \n\t" \
00591 "movq 40(%2), %%mm6 \n\t" \
00592 "pmaddwd %%mm6, %%mm1 \n\t" \
00593 "movq %%mm4, %%mm6 \n\t" \
00594 "movq 48(%2), %%mm7 \n\t" \
00595 "pmaddwd %%mm2, %%mm7 \n\t" \
00596 "paddd %%mm5, %%mm4 \n\t" \
00597 "psubd %%mm5, %%mm6 \n\t" \
00598 "movq %%mm0, %%mm5 \n\t" \
00599 "paddd %%mm1, %%mm0 \n\t" \
00600 "psubd %%mm1, %%mm5 \n\t" \
00601 "movq 56(%2), %%mm1 \n\t" \
00602 "pmaddwd %%mm3, %%mm1 \n\t" \
00603 "pmaddwd 64(%2), %%mm2 \n\t" \
00604 "paddd %%mm1, %%mm7 \n\t" \
00605 "movq 72(%2), %%mm1 \n\t" \
00606 "pmaddwd %%mm3, %%mm1 \n\t" \
00607 "paddd %%mm4, %%mm7 \n\t" \
00608 "paddd %%mm4, %%mm4 \n\t" \
00609 "psubd %%mm7, %%mm4 \n\t" \
00610 "paddd %%mm2, %%mm1 \n\t" \
00611 "psrad $" #shift ", %%mm7 \n\t"\
00612 "psrad $" #shift ", %%mm4 \n\t"\
00613 "movq %%mm0, %%mm2 \n\t" \
00614 "paddd %%mm1, %%mm0 \n\t" \
00615 "psubd %%mm1, %%mm2 \n\t" \
00616 "psrad $" #shift ", %%mm0 \n\t"\
00617 "psrad $" #shift ", %%mm2 \n\t"\
00618 "packssdw %%mm7, %%mm7 \n\t" \
00619 "movd %%mm7, " #dst " \n\t"\
00620 "packssdw %%mm0, %%mm0 \n\t" \
00621 "movd %%mm0, 16+" #dst " \n\t"\
00622 "packssdw %%mm2, %%mm2 \n\t" \
00623 "movd %%mm2, 96+" #dst " \n\t"\
00624 "packssdw %%mm4, %%mm4 \n\t" \
00625 "movd %%mm4, 112+" #dst " \n\t"\
00626 "movq " #src1 ", %%mm0 \n\t" \
00627 "movq 80(%2), %%mm4 \n\t" \
00628 "pmaddwd %%mm0, %%mm4 \n\t" \
00629 "movq 88(%2), %%mm7 \n\t" \
00630 "pmaddwd 96(%2), %%mm0 \n\t" \
00631 "pmaddwd %%mm3, %%mm7 \n\t" \
00632 "movq %%mm5, %%mm2 \n\t" \
00633 "pmaddwd 104(%2), %%mm3 \n\t" \
00634 "paddd %%mm7, %%mm4 \n\t" \
00635 "paddd %%mm4, %%mm2 \n\t" \
00636 "psubd %%mm4, %%mm5 \n\t" \
00637 "psrad $" #shift ", %%mm2 \n\t"\
00638 "psrad $" #shift ", %%mm5 \n\t"\
00639 "movq %%mm6, %%mm4 \n\t" \
00640 "paddd %%mm0, %%mm3 \n\t" \
00641 "paddd %%mm3, %%mm6 \n\t" \
00642 "psubd %%mm3, %%mm4 \n\t" \
00643 "psrad $" #shift ", %%mm6 \n\t"\
00644 "psrad $" #shift ", %%mm4 \n\t"\
00645 "packssdw %%mm2, %%mm2 \n\t" \
00646 "packssdw %%mm6, %%mm6 \n\t" \
00647 "movd %%mm2, 32+" #dst " \n\t"\
00648 "packssdw %%mm4, %%mm4 \n\t" \
00649 "packssdw %%mm5, %%mm5 \n\t" \
00650 "movd %%mm6, 48+" #dst " \n\t"\
00651 "movd %%mm4, 64+" #dst " \n\t"\
00652 "movd %%mm5, 80+" #dst " \n\t"
00653
00654
00655
00656 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00657 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00658 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00659 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00660 "jmp 9f \n\t"
00661
00662 "# .p2align 4 \n\t"\
00663 "4: \n\t"
00664 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
00665 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
00666
00667 #undef IDCT
00668 #define IDCT(src0, src4, src1, src5, dst, shift) \
00669 "movq " #src0 ", %%mm0 \n\t" \
00670 "movq " #src4 ", %%mm1 \n\t" \
00671 "movq " #src5 ", %%mm3 \n\t" \
00672 "movq 16(%2), %%mm4 \n\t" \
00673 "pmaddwd %%mm0, %%mm4 \n\t" \
00674 "movq 24(%2), %%mm5 \n\t" \
00675 "pmaddwd %%mm5, %%mm0 \n\t" \
00676 "movq 32(%2), %%mm5 \n\t" \
00677 "pmaddwd %%mm1, %%mm5 \n\t" \
00678 "movq 40(%2), %%mm6 \n\t" \
00679 "pmaddwd %%mm6, %%mm1 \n\t" \
00680 "movq %%mm4, %%mm6 \n\t" \
00681 "paddd %%mm5, %%mm4 \n\t" \
00682 "psubd %%mm5, %%mm6 \n\t" \
00683 "movq %%mm0, %%mm5 \n\t" \
00684 "paddd %%mm1, %%mm0 \n\t" \
00685 "psubd %%mm1, %%mm5 \n\t" \
00686 "movq 56(%2), %%mm1 \n\t" \
00687 "pmaddwd %%mm3, %%mm1 \n\t" \
00688 "movq 72(%2), %%mm7 \n\t" \
00689 "pmaddwd %%mm3, %%mm7 \n\t" \
00690 "paddd %%mm4, %%mm1 \n\t" \
00691 "paddd %%mm4, %%mm4 \n\t" \
00692 "psubd %%mm1, %%mm4 \n\t" \
00693 "psrad $" #shift ", %%mm1 \n\t"\
00694 "psrad $" #shift ", %%mm4 \n\t"\
00695 "movq %%mm0, %%mm2 \n\t" \
00696 "paddd %%mm7, %%mm0 \n\t" \
00697 "psubd %%mm7, %%mm2 \n\t" \
00698 "psrad $" #shift ", %%mm0 \n\t"\
00699 "psrad $" #shift ", %%mm2 \n\t"\
00700 "packssdw %%mm1, %%mm1 \n\t" \
00701 "movd %%mm1, " #dst " \n\t"\
00702 "packssdw %%mm0, %%mm0 \n\t" \
00703 "movd %%mm0, 16+" #dst " \n\t"\
00704 "packssdw %%mm2, %%mm2 \n\t" \
00705 "movd %%mm2, 96+" #dst " \n\t"\
00706 "packssdw %%mm4, %%mm4 \n\t" \
00707 "movd %%mm4, 112+" #dst " \n\t"\
00708 "movq 88(%2), %%mm1 \n\t" \
00709 "pmaddwd %%mm3, %%mm1 \n\t" \
00710 "movq %%mm5, %%mm2 \n\t" \
00711 "pmaddwd 104(%2), %%mm3 \n\t" \
00712 "paddd %%mm1, %%mm2 \n\t" \
00713 "psubd %%mm1, %%mm5 \n\t" \
00714 "psrad $" #shift ", %%mm2 \n\t"\
00715 "psrad $" #shift ", %%mm5 \n\t"\
00716 "movq %%mm6, %%mm1 \n\t" \
00717 "paddd %%mm3, %%mm6 \n\t" \
00718 "psubd %%mm3, %%mm1 \n\t" \
00719 "psrad $" #shift ", %%mm6 \n\t"\
00720 "psrad $" #shift ", %%mm1 \n\t"\
00721 "packssdw %%mm2, %%mm2 \n\t" \
00722 "packssdw %%mm6, %%mm6 \n\t" \
00723 "movd %%mm2, 32+" #dst " \n\t"\
00724 "packssdw %%mm1, %%mm1 \n\t" \
00725 "packssdw %%mm5, %%mm5 \n\t" \
00726 "movd %%mm6, 48+" #dst " \n\t"\
00727 "movd %%mm1, 64+" #dst " \n\t"\
00728 "movd %%mm5, 80+" #dst " \n\t"
00729
00730
00731 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00732 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00733 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00734 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00735 "jmp 9f \n\t"
00736
00737 "# .p2align 4 \n\t"\
00738 "6: \n\t"
00739 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
00740
00741 #undef IDCT
00742 #define IDCT(src0, src4, src1, src5, dst, shift) \
00743 "movq " #src0 ", %%mm0 \n\t" \
00744 "movq " #src5 ", %%mm3 \n\t" \
00745 "movq 16(%2), %%mm4 \n\t" \
00746 "pmaddwd %%mm0, %%mm4 \n\t" \
00747 "movq 24(%2), %%mm5 \n\t" \
00748 "pmaddwd %%mm5, %%mm0 \n\t" \
00749 "movq %%mm4, %%mm6 \n\t" \
00750 "movq %%mm0, %%mm5 \n\t" \
00751 "movq 56(%2), %%mm1 \n\t" \
00752 "pmaddwd %%mm3, %%mm1 \n\t" \
00753 "movq 72(%2), %%mm7 \n\t" \
00754 "pmaddwd %%mm3, %%mm7 \n\t" \
00755 "paddd %%mm4, %%mm1 \n\t" \
00756 "paddd %%mm4, %%mm4 \n\t" \
00757 "psubd %%mm1, %%mm4 \n\t" \
00758 "psrad $" #shift ", %%mm1 \n\t"\
00759 "psrad $" #shift ", %%mm4 \n\t"\
00760 "movq %%mm0, %%mm2 \n\t" \
00761 "paddd %%mm7, %%mm0 \n\t" \
00762 "psubd %%mm7, %%mm2 \n\t" \
00763 "psrad $" #shift ", %%mm0 \n\t"\
00764 "psrad $" #shift ", %%mm2 \n\t"\
00765 "packssdw %%mm1, %%mm1 \n\t" \
00766 "movd %%mm1, " #dst " \n\t"\
00767 "packssdw %%mm0, %%mm0 \n\t" \
00768 "movd %%mm0, 16+" #dst " \n\t"\
00769 "packssdw %%mm2, %%mm2 \n\t" \
00770 "movd %%mm2, 96+" #dst " \n\t"\
00771 "packssdw %%mm4, %%mm4 \n\t" \
00772 "movd %%mm4, 112+" #dst " \n\t"\
00773 "movq 88(%2), %%mm1 \n\t" \
00774 "pmaddwd %%mm3, %%mm1 \n\t" \
00775 "movq %%mm5, %%mm2 \n\t" \
00776 "pmaddwd 104(%2), %%mm3 \n\t" \
00777 "paddd %%mm1, %%mm2 \n\t" \
00778 "psubd %%mm1, %%mm5 \n\t" \
00779 "psrad $" #shift ", %%mm2 \n\t"\
00780 "psrad $" #shift ", %%mm5 \n\t"\
00781 "movq %%mm6, %%mm1 \n\t" \
00782 "paddd %%mm3, %%mm6 \n\t" \
00783 "psubd %%mm3, %%mm1 \n\t" \
00784 "psrad $" #shift ", %%mm6 \n\t"\
00785 "psrad $" #shift ", %%mm1 \n\t"\
00786 "packssdw %%mm2, %%mm2 \n\t" \
00787 "packssdw %%mm6, %%mm6 \n\t" \
00788 "movd %%mm2, 32+" #dst " \n\t"\
00789 "packssdw %%mm1, %%mm1 \n\t" \
00790 "packssdw %%mm5, %%mm5 \n\t" \
00791 "movd %%mm6, 48+" #dst " \n\t"\
00792 "movd %%mm1, 64+" #dst " \n\t"\
00793 "movd %%mm5, 80+" #dst " \n\t"
00794
00795
00796
00797 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00798 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00799 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00800 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00801 "jmp 9f \n\t"
00802
00803 "# .p2align 4 \n\t"\
00804 "2: \n\t"
00805 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
00806
00807 #undef IDCT
00808 #define IDCT(src0, src4, src1, src5, dst, shift) \
00809 "movq " #src0 ", %%mm0 \n\t" \
00810 "movq " #src1 ", %%mm2 \n\t" \
00811 "movq " #src5 ", %%mm3 \n\t" \
00812 "movq 16(%2), %%mm4 \n\t" \
00813 "pmaddwd %%mm0, %%mm4 \n\t" \
00814 "movq 24(%2), %%mm5 \n\t" \
00815 "pmaddwd %%mm5, %%mm0 \n\t" \
00816 "movq %%mm4, %%mm6 \n\t" \
00817 "movq 48(%2), %%mm7 \n\t" \
00818 "pmaddwd %%mm2, %%mm7 \n\t" \
00819 "movq %%mm0, %%mm5 \n\t" \
00820 "movq 56(%2), %%mm1 \n\t" \
00821 "pmaddwd %%mm3, %%mm1 \n\t" \
00822 "pmaddwd 64(%2), %%mm2 \n\t" \
00823 "paddd %%mm1, %%mm7 \n\t" \
00824 "movq 72(%2), %%mm1 \n\t" \
00825 "pmaddwd %%mm3, %%mm1 \n\t" \
00826 "paddd %%mm4, %%mm7 \n\t" \
00827 "paddd %%mm4, %%mm4 \n\t" \
00828 "psubd %%mm7, %%mm4 \n\t" \
00829 "paddd %%mm2, %%mm1 \n\t" \
00830 "psrad $" #shift ", %%mm7 \n\t"\
00831 "psrad $" #shift ", %%mm4 \n\t"\
00832 "movq %%mm0, %%mm2 \n\t" \
00833 "paddd %%mm1, %%mm0 \n\t" \
00834 "psubd %%mm1, %%mm2 \n\t" \
00835 "psrad $" #shift ", %%mm0 \n\t"\
00836 "psrad $" #shift ", %%mm2 \n\t"\
00837 "packssdw %%mm7, %%mm7 \n\t" \
00838 "movd %%mm7, " #dst " \n\t"\
00839 "packssdw %%mm0, %%mm0 \n\t" \
00840 "movd %%mm0, 16+" #dst " \n\t"\
00841 "packssdw %%mm2, %%mm2 \n\t" \
00842 "movd %%mm2, 96+" #dst " \n\t"\
00843 "packssdw %%mm4, %%mm4 \n\t" \
00844 "movd %%mm4, 112+" #dst " \n\t"\
00845 "movq " #src1 ", %%mm0 \n\t" \
00846 "movq 80(%2), %%mm4 \n\t" \
00847 "pmaddwd %%mm0, %%mm4 \n\t" \
00848 "movq 88(%2), %%mm7 \n\t" \
00849 "pmaddwd 96(%2), %%mm0 \n\t" \
00850 "pmaddwd %%mm3, %%mm7 \n\t" \
00851 "movq %%mm5, %%mm2 \n\t" \
00852 "pmaddwd 104(%2), %%mm3 \n\t" \
00853 "paddd %%mm7, %%mm4 \n\t" \
00854 "paddd %%mm4, %%mm2 \n\t" \
00855 "psubd %%mm4, %%mm5 \n\t" \
00856 "psrad $" #shift ", %%mm2 \n\t"\
00857 "psrad $" #shift ", %%mm5 \n\t"\
00858 "movq %%mm6, %%mm4 \n\t" \
00859 "paddd %%mm0, %%mm3 \n\t" \
00860 "paddd %%mm3, %%mm6 \n\t" \
00861 "psubd %%mm3, %%mm4 \n\t" \
00862 "psrad $" #shift ", %%mm6 \n\t"\
00863 "psrad $" #shift ", %%mm4 \n\t"\
00864 "packssdw %%mm2, %%mm2 \n\t" \
00865 "packssdw %%mm6, %%mm6 \n\t" \
00866 "movd %%mm2, 32+" #dst " \n\t"\
00867 "packssdw %%mm4, %%mm4 \n\t" \
00868 "packssdw %%mm5, %%mm5 \n\t" \
00869 "movd %%mm6, 48+" #dst " \n\t"\
00870 "movd %%mm4, 64+" #dst " \n\t"\
00871 "movd %%mm5, 80+" #dst " \n\t"
00872
00873
00874 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00875 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00876 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00877 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00878 "jmp 9f \n\t"
00879
00880 "# .p2align 4 \n\t"\
00881 "3: \n\t"
00882 #undef IDCT
00883 #define IDCT(src0, src4, src1, src5, dst, shift) \
00884 "movq " #src0 ", %%mm0 \n\t" \
00885 "movq " #src1 ", %%mm2 \n\t" \
00886 "movq 16(%2), %%mm4 \n\t" \
00887 "pmaddwd %%mm0, %%mm4 \n\t" \
00888 "movq 24(%2), %%mm5 \n\t" \
00889 "pmaddwd %%mm5, %%mm0 \n\t" \
00890 "movq %%mm4, %%mm6 \n\t" \
00891 "movq 48(%2), %%mm7 \n\t" \
00892 "pmaddwd %%mm2, %%mm7 \n\t" \
00893 "movq %%mm0, %%mm5 \n\t" \
00894 "movq 64(%2), %%mm3 \n\t"\
00895 "pmaddwd %%mm2, %%mm3 \n\t" \
00896 "paddd %%mm4, %%mm7 \n\t" \
00897 "paddd %%mm4, %%mm4 \n\t" \
00898 "psubd %%mm7, %%mm4 \n\t" \
00899 "psrad $" #shift ", %%mm7 \n\t"\
00900 "psrad $" #shift ", %%mm4 \n\t"\
00901 "movq %%mm0, %%mm1 \n\t" \
00902 "paddd %%mm3, %%mm0 \n\t" \
00903 "psubd %%mm3, %%mm1 \n\t" \
00904 "psrad $" #shift ", %%mm0 \n\t"\
00905 "psrad $" #shift ", %%mm1 \n\t"\
00906 "packssdw %%mm7, %%mm7 \n\t" \
00907 "movd %%mm7, " #dst " \n\t"\
00908 "packssdw %%mm0, %%mm0 \n\t" \
00909 "movd %%mm0, 16+" #dst " \n\t"\
00910 "packssdw %%mm1, %%mm1 \n\t" \
00911 "movd %%mm1, 96+" #dst " \n\t"\
00912 "packssdw %%mm4, %%mm4 \n\t" \
00913 "movd %%mm4, 112+" #dst " \n\t"\
00914 "movq 80(%2), %%mm4 \n\t" \
00915 "pmaddwd %%mm2, %%mm4 \n\t" \
00916 "pmaddwd 96(%2), %%mm2 \n\t" \
00917 "movq %%mm5, %%mm1 \n\t" \
00918 "paddd %%mm4, %%mm1 \n\t" \
00919 "psubd %%mm4, %%mm5 \n\t" \
00920 "psrad $" #shift ", %%mm1 \n\t"\
00921 "psrad $" #shift ", %%mm5 \n\t"\
00922 "movq %%mm6, %%mm4 \n\t" \
00923 "paddd %%mm2, %%mm6 \n\t" \
00924 "psubd %%mm2, %%mm4 \n\t" \
00925 "psrad $" #shift ", %%mm6 \n\t"\
00926 "psrad $" #shift ", %%mm4 \n\t"\
00927 "packssdw %%mm1, %%mm1 \n\t" \
00928 "packssdw %%mm6, %%mm6 \n\t" \
00929 "movd %%mm1, 32+" #dst " \n\t"\
00930 "packssdw %%mm4, %%mm4 \n\t" \
00931 "packssdw %%mm5, %%mm5 \n\t" \
00932 "movd %%mm6, 48+" #dst " \n\t"\
00933 "movd %%mm4, 64+" #dst " \n\t"\
00934 "movd %%mm5, 80+" #dst " \n\t"
00935
00936
00937
00938 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
00939 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
00940 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
00941 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
00942 "jmp 9f \n\t"
00943
00944 "# .p2align 4 \n\t"\
00945 "5: \n\t"
00946 #undef IDCT
00947 #define IDCT(src0, src4, src1, src5, dst, shift) \
00948 "movq " #src0 ", %%mm0 \n\t" \
00949 "movq " #src4 ", %%mm1 \n\t" \
00950 "movq 16(%2), %%mm4 \n\t" \
00951 "pmaddwd %%mm0, %%mm4 \n\t" \
00952 "movq 24(%2), %%mm5 \n\t" \
00953 "pmaddwd %%mm5, %%mm0 \n\t" \
00954 "movq 32(%2), %%mm5 \n\t" \
00955 "pmaddwd %%mm1, %%mm5 \n\t" \
00956 "movq 40(%2), %%mm6 \n\t" \
00957 "pmaddwd %%mm6, %%mm1 \n\t" \
00958 "movq %%mm4, %%mm6 \n\t" \
00959 "paddd %%mm5, %%mm4 \n\t" \
00960 "psubd %%mm5, %%mm6 \n\t" \
00961 "movq %%mm0, %%mm5 \n\t" \
00962 "paddd %%mm1, %%mm0 \n\t" \
00963 "psubd %%mm1, %%mm5 \n\t" \
00964 "movq 8+" #src0 ", %%mm2 \n\t" \
00965 "movq 8+" #src4 ", %%mm3 \n\t" \
00966 "movq 16(%2), %%mm1 \n\t" \
00967 "pmaddwd %%mm2, %%mm1 \n\t" \
00968 "movq 24(%2), %%mm7 \n\t" \
00969 "pmaddwd %%mm7, %%mm2 \n\t" \
00970 "movq 32(%2), %%mm7 \n\t" \
00971 "pmaddwd %%mm3, %%mm7 \n\t" \
00972 "pmaddwd 40(%2), %%mm3 \n\t" \
00973 "paddd %%mm1, %%mm7 \n\t" \
00974 "paddd %%mm1, %%mm1 \n\t" \
00975 "psubd %%mm7, %%mm1 \n\t" \
00976 "paddd %%mm2, %%mm3 \n\t" \
00977 "paddd %%mm2, %%mm2 \n\t" \
00978 "psubd %%mm3, %%mm2 \n\t" \
00979 "psrad $" #shift ", %%mm4 \n\t"\
00980 "psrad $" #shift ", %%mm7 \n\t"\
00981 "psrad $" #shift ", %%mm3 \n\t"\
00982 "packssdw %%mm7, %%mm4 \n\t" \
00983 "movq %%mm4, " #dst " \n\t"\
00984 "psrad $" #shift ", %%mm0 \n\t"\
00985 "packssdw %%mm3, %%mm0 \n\t" \
00986 "movq %%mm0, 16+" #dst " \n\t"\
00987 "movq %%mm0, 96+" #dst " \n\t"\
00988 "movq %%mm4, 112+" #dst " \n\t"\
00989 "psrad $" #shift ", %%mm5 \n\t"\
00990 "psrad $" #shift ", %%mm6 \n\t"\
00991 "psrad $" #shift ", %%mm2 \n\t"\
00992 "packssdw %%mm2, %%mm5 \n\t" \
00993 "movq %%mm5, 32+" #dst " \n\t"\
00994 "psrad $" #shift ", %%mm1 \n\t"\
00995 "packssdw %%mm1, %%mm6 \n\t" \
00996 "movq %%mm6, 48+" #dst " \n\t"\
00997 "movq %%mm6, 64+" #dst " \n\t"\
00998 "movq %%mm5, 80+" #dst " \n\t"
00999
01000
01001
01002 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01003
01004 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01005
01006 "jmp 9f \n\t"
01007
01008
01009 "# .p2align 4 \n\t"\
01010 "1: \n\t"
01011 #undef IDCT
01012 #define IDCT(src0, src4, src1, src5, dst, shift) \
01013 "movq " #src0 ", %%mm0 \n\t" \
01014 "movq " #src4 ", %%mm1 \n\t" \
01015 "movq " #src1 ", %%mm2 \n\t" \
01016 "movq 16(%2), %%mm4 \n\t" \
01017 "pmaddwd %%mm0, %%mm4 \n\t" \
01018 "movq 24(%2), %%mm5 \n\t" \
01019 "pmaddwd %%mm5, %%mm0 \n\t" \
01020 "movq 32(%2), %%mm5 \n\t" \
01021 "pmaddwd %%mm1, %%mm5 \n\t" \
01022 "movq 40(%2), %%mm6 \n\t" \
01023 "pmaddwd %%mm6, %%mm1 \n\t" \
01024 "movq %%mm4, %%mm6 \n\t" \
01025 "movq 48(%2), %%mm7 \n\t" \
01026 "pmaddwd %%mm2, %%mm7 \n\t" \
01027 "paddd %%mm5, %%mm4 \n\t" \
01028 "psubd %%mm5, %%mm6 \n\t" \
01029 "movq %%mm0, %%mm5 \n\t" \
01030 "paddd %%mm1, %%mm0 \n\t" \
01031 "psubd %%mm1, %%mm5 \n\t" \
01032 "movq 64(%2), %%mm1 \n\t"\
01033 "pmaddwd %%mm2, %%mm1 \n\t" \
01034 "paddd %%mm4, %%mm7 \n\t" \
01035 "paddd %%mm4, %%mm4 \n\t" \
01036 "psubd %%mm7, %%mm4 \n\t" \
01037 "psrad $" #shift ", %%mm7 \n\t"\
01038 "psrad $" #shift ", %%mm4 \n\t"\
01039 "movq %%mm0, %%mm3 \n\t" \
01040 "paddd %%mm1, %%mm0 \n\t" \
01041 "psubd %%mm1, %%mm3 \n\t" \
01042 "psrad $" #shift ", %%mm0 \n\t"\
01043 "psrad $" #shift ", %%mm3 \n\t"\
01044 "packssdw %%mm7, %%mm7 \n\t" \
01045 "movd %%mm7, " #dst " \n\t"\
01046 "packssdw %%mm0, %%mm0 \n\t" \
01047 "movd %%mm0, 16+" #dst " \n\t"\
01048 "packssdw %%mm3, %%mm3 \n\t" \
01049 "movd %%mm3, 96+" #dst " \n\t"\
01050 "packssdw %%mm4, %%mm4 \n\t" \
01051 "movd %%mm4, 112+" #dst " \n\t"\
01052 "movq 80(%2), %%mm4 \n\t" \
01053 "pmaddwd %%mm2, %%mm4 \n\t" \
01054 "pmaddwd 96(%2), %%mm2 \n\t" \
01055 "movq %%mm5, %%mm3 \n\t" \
01056 "paddd %%mm4, %%mm3 \n\t" \
01057 "psubd %%mm4, %%mm5 \n\t" \
01058 "psrad $" #shift ", %%mm3 \n\t"\
01059 "psrad $" #shift ", %%mm5 \n\t"\
01060 "movq %%mm6, %%mm4 \n\t" \
01061 "paddd %%mm2, %%mm6 \n\t" \
01062 "psubd %%mm2, %%mm4 \n\t" \
01063 "psrad $" #shift ", %%mm6 \n\t"\
01064 "packssdw %%mm3, %%mm3 \n\t" \
01065 "movd %%mm3, 32+" #dst " \n\t"\
01066 "psrad $" #shift ", %%mm4 \n\t"\
01067 "packssdw %%mm6, %%mm6 \n\t" \
01068 "movd %%mm6, 48+" #dst " \n\t"\
01069 "packssdw %%mm4, %%mm4 \n\t" \
01070 "packssdw %%mm5, %%mm5 \n\t" \
01071 "movd %%mm4, 64+" #dst " \n\t"\
01072 "movd %%mm5, 80+" #dst " \n\t"
01073
01074
01075
01076 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01077 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
01078 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01079 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
01080 "jmp 9f \n\t"
01081
01082
01083 "# .p2align 4 \n\t"
01084 "7: \n\t"
01085 #undef IDCT
01086 #define IDCT(src0, src4, src1, src5, dst, shift) \
01087 "movq " #src0 ", %%mm0 \n\t" \
01088 "movq 16(%2), %%mm4 \n\t" \
01089 "pmaddwd %%mm0, %%mm4 \n\t" \
01090 "movq 24(%2), %%mm5 \n\t" \
01091 "pmaddwd %%mm5, %%mm0 \n\t" \
01092 "psrad $" #shift ", %%mm4 \n\t"\
01093 "psrad $" #shift ", %%mm0 \n\t"\
01094 "movq 8+" #src0 ", %%mm2 \n\t" \
01095 "movq 16(%2), %%mm1 \n\t" \
01096 "pmaddwd %%mm2, %%mm1 \n\t" \
01097 "movq 24(%2), %%mm7 \n\t" \
01098 "pmaddwd %%mm7, %%mm2 \n\t" \
01099 "movq 32(%2), %%mm7 \n\t" \
01100 "psrad $" #shift ", %%mm1 \n\t"\
01101 "packssdw %%mm1, %%mm4 \n\t" \
01102 "movq %%mm4, " #dst " \n\t"\
01103 "psrad $" #shift ", %%mm2 \n\t"\
01104 "packssdw %%mm2, %%mm0 \n\t" \
01105 "movq %%mm0, 16+" #dst " \n\t"\
01106 "movq %%mm0, 96+" #dst " \n\t"\
01107 "movq %%mm4, 112+" #dst " \n\t"\
01108 "movq %%mm0, 32+" #dst " \n\t"\
01109 "movq %%mm4, 48+" #dst " \n\t"\
01110 "movq %%mm4, 64+" #dst " \n\t"\
01111 "movq %%mm0, 80+" #dst " \n\t"
01112
01113
01114 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
01115
01116 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
01117
01118
01119
01120 #endif
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144 "9: \n\t"
01145 :: "r" (block), "r" (temp), "r" (coeffs)
01146 : "%eax"
01147 );
01148 }
01149
01150 void ff_simple_idct_mmx(int16_t *block)
01151 {
01152 idct(block);
01153 }
01154
01155
01156
01157 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01158 {
01159 idct(block);
01160 ff_put_pixels_clamped_mmx(block, dest, line_size);
01161 }
01162 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01163 {
01164 idct(block);
01165 ff_add_pixels_clamped_mmx(block, dest, line_size);
01166 }
01167
01168 #endif