00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavcodec/dsputil.h"
00024 #include "dsputil_sh4.h"
00025 #include "sh4.h"
00026
00027 #define c1 1.38703984532214752434
00028 #define c2 1.30656296487637657577
00029 #define c3 1.17587560241935884520
00030 #define c4 1.00000000000000000000
00031 #define c5 0.78569495838710234903
00032 #define c6 0.54119610014619712324
00033 #define c7 0.27589937928294311353
00034
00035 static const float even_table[] __attribute__ ((aligned(8))) = {
00036 c4, c4, c4, c4,
00037 c2, c6,-c6,-c2,
00038 c4,-c4,-c4, c4,
00039 c6,-c2, c2,-c6
00040 };
00041
00042 static const float odd_table[] __attribute__ ((aligned(8))) = {
00043 c1, c3, c5, c7,
00044 c3,-c7,-c1,-c5,
00045 c5,-c1, c7, c3,
00046 c7,-c5, c3,-c1
00047 };
00048
00049 #undef c1
00050 #undef c2
00051 #undef c3
00052 #undef c4
00053 #undef c5
00054 #undef c6
00055 #undef c7
00056
00057 #if 1
00058
00059 #define load_matrix(table) \
00060 do { \
00061 const float *t = table; \
00062 __asm__ volatile( \
00063 " fschg\n" \
00064 " fmov @%0+,xd0\n" \
00065 " fmov @%0+,xd2\n" \
00066 " fmov @%0+,xd4\n" \
00067 " fmov @%0+,xd6\n" \
00068 " fmov @%0+,xd8\n" \
00069 " fmov @%0+,xd10\n" \
00070 " fmov @%0+,xd12\n" \
00071 " fmov @%0+,xd14\n" \
00072 " fschg\n" \
00073 : "+r"(t) \
00074 ); \
00075 } while (0)
00076
00077 #define ftrv() \
00078 __asm__ volatile("ftrv xmtrx,fv0" \
00079 : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
00080
00081 #define DEFREG \
00082 register float fr0 __asm__("fr0"); \
00083 register float fr1 __asm__("fr1"); \
00084 register float fr2 __asm__("fr2"); \
00085 register float fr3 __asm__("fr3")
00086
00087 #else
00088
00089
00090
00091 static void ftrv_(const float xf[],float fv[])
00092 {
00093 float f0,f1,f2,f3;
00094 f0 = fv[0];
00095 f1 = fv[1];
00096 f2 = fv[2];
00097 f3 = fv[3];
00098 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
00099 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
00100 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
00101 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
00102 }
00103
00104 static void load_matrix_(float xf[],const float table[])
00105 {
00106 int i;
00107 for(i=0;i<16;i++) xf[i]=table[i];
00108 }
00109
00110 #define ftrv() ftrv_(xf,fv)
00111 #define load_matrix(table) load_matrix_(xf,table)
00112
00113 #define DEFREG \
00114 float fv[4],xf[16]
00115
00116 #define fr0 fv[0]
00117 #define fr1 fv[1]
00118 #define fr2 fv[2]
00119 #define fr3 fv[3]
00120
00121 #endif
00122
00123 #if 1
00124 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
00125 #else
00126 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
00127 #endif
00128
00129
00130
00131
00132 #if 1
00133
00134
00135 void idct_sh4(DCTELEM *block)
00136 {
00137 DEFREG;
00138
00139 int i;
00140 float tblock[8*8],*fblock;
00141 int ofs1,ofs2,ofs3;
00142 int fpscr;
00143
00144 fp_single_enter(fpscr);
00145
00146
00147
00148
00149 load_matrix(even_table);
00150
00151 fblock = tblock+4;
00152 i = 8;
00153 do {
00154 fr0 = block[0];
00155 fr1 = block[2];
00156 fr2 = block[4];
00157 fr3 = block[6];
00158 block+=8;
00159 ftrv();
00160 *--fblock = fr3;
00161 *--fblock = fr2;
00162 *--fblock = fr1;
00163 *--fblock = fr0;
00164 fblock+=8+4;
00165 } while(--i);
00166 block-=8*8;
00167 fblock-=8*8+4;
00168
00169 load_matrix(odd_table);
00170
00171 i = 8;
00172
00173 do {
00174 float t0,t1,t2,t3;
00175 fr0 = block[1];
00176 fr1 = block[3];
00177 fr2 = block[5];
00178 fr3 = block[7];
00179 block+=8;
00180 ftrv();
00181 t0 = *fblock++;
00182 t1 = *fblock++;
00183 t2 = *fblock++;
00184 t3 = *fblock++;
00185 fblock+=4;
00186 *--fblock = t0 - fr0;
00187 *--fblock = t1 - fr1;
00188 *--fblock = t2 - fr2;
00189 *--fblock = t3 - fr3;
00190 *--fblock = t3 + fr3;
00191 *--fblock = t2 + fr2;
00192 *--fblock = t1 + fr1;
00193 *--fblock = t0 + fr0;
00194 fblock+=8;
00195 } while(--i);
00196 block-=8*8;
00197 fblock-=8*8;
00198
00199
00200
00201
00202 load_matrix(even_table);
00203
00204 ofs1 = sizeof(float)*2*8;
00205 ofs2 = sizeof(float)*4*8;
00206 ofs3 = sizeof(float)*6*8;
00207
00208 i = 8;
00209
00210 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
00211
00212 do {
00213 fr0 = OA(fblock, 0);
00214 fr1 = OA(fblock,ofs1);
00215 fr2 = OA(fblock,ofs2);
00216 fr3 = OA(fblock,ofs3);
00217 ftrv();
00218 OA(fblock,0 ) = fr0;
00219 OA(fblock,ofs1) = fr1;
00220 OA(fblock,ofs2) = fr2;
00221 OA(fblock,ofs3) = fr3;
00222 fblock++;
00223 } while(--i);
00224 fblock-=8;
00225
00226 load_matrix(odd_table);
00227
00228 i=8;
00229 do {
00230 float t0,t1,t2,t3;
00231 t0 = OA(fblock, 0);
00232 t1 = OA(fblock,ofs1);
00233 t2 = OA(fblock,ofs2);
00234 t3 = OA(fblock,ofs3);
00235 fblock+=8;
00236 fr0 = OA(fblock, 0);
00237 fr1 = OA(fblock,ofs1);
00238 fr2 = OA(fblock,ofs2);
00239 fr3 = OA(fblock,ofs3);
00240 fblock+=-8+1;
00241 ftrv();
00242 block[8*0] = DESCALE(t0 + fr0,3);
00243 block[8*7] = DESCALE(t0 - fr0,3);
00244 block[8*1] = DESCALE(t1 + fr1,3);
00245 block[8*6] = DESCALE(t1 - fr1,3);
00246 block[8*2] = DESCALE(t2 + fr2,3);
00247 block[8*5] = DESCALE(t2 - fr2,3);
00248 block[8*3] = DESCALE(t3 + fr3,3);
00249 block[8*4] = DESCALE(t3 - fr3,3);
00250 block++;
00251 } while(--i);
00252
00253 fp_single_leave(fpscr);
00254 }
00255 #else
00256 void idct_sh4(DCTELEM *block)
00257 {
00258 DEFREG;
00259
00260 int i;
00261 float tblock[8*8],*fblock;
00262
00263
00264
00265
00266 load_matrix(even_table);
00267
00268 fblock = tblock;
00269 i = 8;
00270 do {
00271 fr0 = block[0];
00272 fr1 = block[2];
00273 fr2 = block[4];
00274 fr3 = block[6];
00275 block+=8;
00276 ftrv();
00277 fblock[0] = fr0;
00278 fblock[2] = fr1;
00279 fblock[4] = fr2;
00280 fblock[6] = fr3;
00281 fblock+=8;
00282 } while(--i);
00283 block-=8*8;
00284 fblock-=8*8;
00285
00286 load_matrix(odd_table);
00287
00288 i = 8;
00289
00290 do {
00291 float t0,t1,t2,t3;
00292 fr0 = block[1];
00293 fr1 = block[3];
00294 fr2 = block[5];
00295 fr3 = block[7];
00296 block+=8;
00297 ftrv();
00298 t0 = fblock[0];
00299 t1 = fblock[2];
00300 t2 = fblock[4];
00301 t3 = fblock[6];
00302 fblock[0] = t0 + fr0;
00303 fblock[7] = t0 - fr0;
00304 fblock[1] = t1 + fr1;
00305 fblock[6] = t1 - fr1;
00306 fblock[2] = t2 + fr2;
00307 fblock[5] = t2 - fr2;
00308 fblock[3] = t3 + fr3;
00309 fblock[4] = t3 - fr3;
00310 fblock+=8;
00311 } while(--i);
00312 block-=8*8;
00313 fblock-=8*8;
00314
00315
00316
00317
00318 load_matrix(even_table);
00319
00320 i = 8;
00321
00322 do {
00323 fr0 = fblock[8*0];
00324 fr1 = fblock[8*2];
00325 fr2 = fblock[8*4];
00326 fr3 = fblock[8*6];
00327 ftrv();
00328 fblock[8*0] = fr0;
00329 fblock[8*2] = fr1;
00330 fblock[8*4] = fr2;
00331 fblock[8*6] = fr3;
00332 fblock++;
00333 } while(--i);
00334 fblock-=8;
00335
00336 load_matrix(odd_table);
00337
00338 i=8;
00339 do {
00340 float t0,t1,t2,t3;
00341 fr0 = fblock[8*1];
00342 fr1 = fblock[8*3];
00343 fr2 = fblock[8*5];
00344 fr3 = fblock[8*7];
00345 ftrv();
00346 t0 = fblock[8*0];
00347 t1 = fblock[8*2];
00348 t2 = fblock[8*4];
00349 t3 = fblock[8*6];
00350 fblock++;
00351 block[8*0] = DESCALE(t0 + fr0,3);
00352 block[8*7] = DESCALE(t0 - fr0,3);
00353 block[8*1] = DESCALE(t1 + fr1,3);
00354 block[8*6] = DESCALE(t1 - fr1,3);
00355 block[8*2] = DESCALE(t2 + fr2,3);
00356 block[8*5] = DESCALE(t2 - fr2,3);
00357 block[8*3] = DESCALE(t3 + fr3,3);
00358 block[8*4] = DESCALE(t3 - fr3,3);
00359 block++;
00360 } while(--i);
00361 }
00362 #endif