00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00071 #include <stdlib.h>
00072 #include <stdio.h>
00073 #include "libavutil/common.h"
00074 #include "dsputil.h"
00075
00076 #define DCTSIZE 8
00077 #define GLOBAL(x) x
00078 #define RIGHT_SHIFT(x, n) ((x) >> (n))
00079
00080
00081
00082
00083
00084 #if DCTSIZE != 8
00085 Sorry, this code only copes with 8x8 DCTs.
00086 #endif
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107 #define CONST_BITS 8
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117 #if CONST_BITS == 8
00118 #define FIX_0_382683433 ((int32_t) 98)
00119 #define FIX_0_541196100 ((int32_t) 139)
00120 #define FIX_0_707106781 ((int32_t) 181)
00121 #define FIX_1_306562965 ((int32_t) 334)
00122 #else
00123 #define FIX_0_382683433 FIX(0.382683433)
00124 #define FIX_0_541196100 FIX(0.541196100)
00125 #define FIX_0_707106781 FIX(0.707106781)
00126 #define FIX_1_306562965 FIX(1.306562965)
00127 #endif
00128
00129
00130
00131
00132
00133
00134
00135 #ifndef USE_ACCURATE_ROUNDING
00136 #undef DESCALE
00137 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
00138 #endif
00139
00140
00141
00142
00143
00144
00145 #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
00146
00147 static av_always_inline void row_fdct(DCTELEM * data){
00148 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00149 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00150 int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00151 DCTELEM *dataptr;
00152 int ctr;
00153
00154
00155
00156 dataptr = data;
00157 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00158 tmp0 = dataptr[0] + dataptr[7];
00159 tmp7 = dataptr[0] - dataptr[7];
00160 tmp1 = dataptr[1] + dataptr[6];
00161 tmp6 = dataptr[1] - dataptr[6];
00162 tmp2 = dataptr[2] + dataptr[5];
00163 tmp5 = dataptr[2] - dataptr[5];
00164 tmp3 = dataptr[3] + dataptr[4];
00165 tmp4 = dataptr[3] - dataptr[4];
00166
00167
00168
00169 tmp10 = tmp0 + tmp3;
00170 tmp13 = tmp0 - tmp3;
00171 tmp11 = tmp1 + tmp2;
00172 tmp12 = tmp1 - tmp2;
00173
00174 dataptr[0] = tmp10 + tmp11;
00175 dataptr[4] = tmp10 - tmp11;
00176
00177 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00178 dataptr[2] = tmp13 + z1;
00179 dataptr[6] = tmp13 - z1;
00180
00181
00182
00183 tmp10 = tmp4 + tmp5;
00184 tmp11 = tmp5 + tmp6;
00185 tmp12 = tmp6 + tmp7;
00186
00187
00188 z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00189 z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;
00190 z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;
00191 z3 = MULTIPLY(tmp11, FIX_0_707106781);
00192
00193 z11 = tmp7 + z3;
00194 z13 = tmp7 - z3;
00195
00196 dataptr[5] = z13 + z2;
00197 dataptr[3] = z13 - z2;
00198 dataptr[1] = z11 + z4;
00199 dataptr[7] = z11 - z4;
00200
00201 dataptr += DCTSIZE;
00202 }
00203 }
00204
00205
00206
00207
00208
00209 GLOBAL(void)
00210 fdct_ifast (DCTELEM * data)
00211 {
00212 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00213 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00214 int_fast16_t z1, z2, z3, z4, z5, z11, z13;
00215 DCTELEM *dataptr;
00216 int ctr;
00217
00218 row_fdct(data);
00219
00220
00221
00222 dataptr = data;
00223 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00224 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00225 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00226 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00227 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00228 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00229 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00230 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00231 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00232
00233
00234
00235 tmp10 = tmp0 + tmp3;
00236 tmp13 = tmp0 - tmp3;
00237 tmp11 = tmp1 + tmp2;
00238 tmp12 = tmp1 - tmp2;
00239
00240 dataptr[DCTSIZE*0] = tmp10 + tmp11;
00241 dataptr[DCTSIZE*4] = tmp10 - tmp11;
00242
00243 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00244 dataptr[DCTSIZE*2] = tmp13 + z1;
00245 dataptr[DCTSIZE*6] = tmp13 - z1;
00246
00247
00248
00249 tmp10 = tmp4 + tmp5;
00250 tmp11 = tmp5 + tmp6;
00251 tmp12 = tmp6 + tmp7;
00252
00253
00254 z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433);
00255 z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;
00256 z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;
00257 z3 = MULTIPLY(tmp11, FIX_0_707106781);
00258
00259 z11 = tmp7 + z3;
00260 z13 = tmp7 - z3;
00261
00262 dataptr[DCTSIZE*5] = z13 + z2;
00263 dataptr[DCTSIZE*3] = z13 - z2;
00264 dataptr[DCTSIZE*1] = z11 + z4;
00265 dataptr[DCTSIZE*7] = z11 - z4;
00266
00267 dataptr++;
00268 }
00269 }
00270
00271
00272
00273
00274
00275 GLOBAL(void)
00276 fdct_ifast248 (DCTELEM * data)
00277 {
00278 int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00279 int_fast16_t tmp10, tmp11, tmp12, tmp13;
00280 int_fast16_t z1;
00281 DCTELEM *dataptr;
00282 int ctr;
00283
00284 row_fdct(data);
00285
00286
00287
00288 dataptr = data;
00289 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00290 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
00291 tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00292 tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
00293 tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
00294 tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
00295 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00296 tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
00297 tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
00298
00299
00300
00301 tmp10 = tmp0 + tmp3;
00302 tmp11 = tmp1 + tmp2;
00303 tmp12 = tmp1 - tmp2;
00304 tmp13 = tmp0 - tmp3;
00305
00306 dataptr[DCTSIZE*0] = tmp10 + tmp11;
00307 dataptr[DCTSIZE*4] = tmp10 - tmp11;
00308
00309 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00310 dataptr[DCTSIZE*2] = tmp13 + z1;
00311 dataptr[DCTSIZE*6] = tmp13 - z1;
00312
00313 tmp10 = tmp4 + tmp7;
00314 tmp11 = tmp5 + tmp6;
00315 tmp12 = tmp5 - tmp6;
00316 tmp13 = tmp4 - tmp7;
00317
00318 dataptr[DCTSIZE*1] = tmp10 + tmp11;
00319 dataptr[DCTSIZE*5] = tmp10 - tmp11;
00320
00321 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
00322 dataptr[DCTSIZE*3] = tmp13 + z1;
00323 dataptr[DCTSIZE*7] = tmp13 - z1;
00324
00325 dataptr++;
00326 }
00327 }
00328
00329
00330 #undef GLOBAL
00331 #undef CONST_BITS
00332 #undef DESCALE
00333 #undef FIX_0_541196100
00334 #undef FIX_1_306562965