44 #define XMIN(a,b) ((a) < (b) ? (a) : (b))
45 #define XMAX(a,b) ((a) > (b) ? (a) : (b))
51 { 0, 48, 12, 60, 3, 51, 15, 63, },
52 { 32, 16, 44, 28, 35, 19, 47, 31, },
53 { 8, 56, 4, 52, 11, 59, 7, 55, },
54 { 40, 24, 36, 20, 43, 27, 39, 23, },
55 { 2, 50, 14, 62, 1, 49, 13, 61, },
56 { 34, 18, 46, 30, 33, 17, 45, 29, },
57 { 10, 58, 6, 54, 9, 57, 5, 53, },
58 { 42, 26, 38, 22, 41, 25, 37, 21, },
73 #define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7)
74 #define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6
76 #define C2 ((int)(1024*0.45221175985034745004/2+0.5))
77 #define C3 ((int)(1024*0.36264567479870879474/2+0.5))
80 #define C4 ((int)(1024*0.1962505182412941918+0.5))
81 #define C5 ((int)(1024*0.0149276808419397944+0.5))
85 dst[0*step] = ((s +
s3)*C0 + 512) >> 10;
86 s= (s - 6*
s3)*
C1 + 512;
87 d= (s0-
s2)*
C4 + (s1-s2)*
C5;
88 dst[1*step] = (s + 2*d)>>10;
90 d= (s1-
s0)*
C2 + (s1-s2)*
C3;
91 dst[2*step] = (s + d)>>10;
92 dst[3*step] = (s - d)>>10;
101 dst[1*step]= 2*s3 +
s2;
102 dst[3*step]= s3 - 2*
s2;
108 dst2[i*step/2] += src[j*step] * cos(i*
M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/n);
109 if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20)
110 printf(
"%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],src[0*step], src[1*step], src[2*step], src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]);
142 int s0= src[0*4] + src[6*4];
143 int s1= src[1*4] + src[5*4];
144 int s2= src[2*4] + src[4*4];
163 "movq (%0), %%mm0 \n\t"
164 "movq 1*4*2(%0), %%mm1 \n\t"
165 "paddw 6*4*2(%0), %%mm0 \n\t"
166 "paddw 5*4*2(%0), %%mm1 \n\t"
167 "movq 2*4*2(%0), %%mm2 \n\t"
168 "movq 3*4*2(%0), %%mm3 \n\t"
169 "paddw 4*4*2(%0), %%mm2 \n\t"
170 "paddw %%mm3, %%mm3 \n\t"
171 "movq %%mm3, %%mm4 \n\t"
172 "psubw %%mm0, %%mm3 \n\t"
173 "paddw %%mm0, %%mm4 \n\t"
174 "movq %%mm2, %%mm0 \n\t"
175 "psubw %%mm1, %%mm2 \n\t"
176 "paddw %%mm1, %%mm0 \n\t"
177 "movq %%mm4, %%mm1 \n\t"
178 "psubw %%mm0, %%mm4 \n\t"
179 "paddw %%mm0, %%mm1 \n\t"
180 "movq %%mm3, %%mm0 \n\t"
181 "psubw %%mm2, %%mm3 \n\t"
182 "psubw %%mm2, %%mm3 \n\t"
183 "paddw %%mm0, %%mm2 \n\t"
184 "paddw %%mm0, %%mm2 \n\t"
185 "movq %%mm1, (%1) \n\t"
186 "movq %%mm4, 2*4*2(%1) \n\t"
187 "movq %%mm2, 1*4*2(%1) \n\t"
188 "movq %%mm3, 3*4*2(%1) \n\t"
189 ::
"r" (src),
"r"(dst)
200 #define SN1 2.2360679775
201 #define SN2 3.16227766017
206 N/(N1*
N0),
N/(N1*N1),
N/(N1*
N0),
N/(N1*N2),
208 N/(N2*
N0),
N/(N2*N1),
N/(N2*
N0),
N/(N2*N2),
213 N/(SN2*
SN0),
N/(SN2*SN2),
N/(SN2*
SN0),
N/(SN2*SN2),
215 N/(SN2*
SN0),
N/(SN2*SN2),
N/(SN2*
SN0),
N/(SN2*SN2),
224 for(qp=0; qp<99; qp++){
226 thres2[qp][i]= ((i&1)?
SN2:
SN0) * ((i&4)?
SN2:SN0) *
XMAX(1,qp) * (1<<2) - 1 - bias;
235 a= src[0] * factor[0];
237 unsigned int threshold1= thres2[qp][i];
238 unsigned int threshold2= (threshold1<<1);
240 if(((
unsigned)(level+threshold1))>threshold2){
241 a += level * factor[i];
244 return (a + (1<<11))>>12;
251 a= src[0] * factor[0];
253 unsigned int threshold1= thres2[qp][i];
254 unsigned int threshold2= (threshold1<<1);
256 if(((
unsigned)(level+threshold1))>threshold2){
257 if(((
unsigned)(level+2*threshold1))>2*threshold2){
258 a += level * factor[i];
260 if(level>0) a+= 2*(level - (int)threshold1)*factor[i];
261 else a+= 2*(level + (int)threshold1)*factor[i];
265 return (a + (1<<11))>>12;
272 a= src[0] * factor[0];
274 unsigned int threshold1= thres2[qp][i];
275 unsigned int threshold2= (threshold1<<1);
277 if(((
unsigned)(level+threshold1))>threshold2){
278 if(level>0) a+= (level - (int)threshold1)*factor[i];
279 else a+= (level + (int)threshold1)*factor[i];
282 return (a + (1<<11))>>12;
294 if (!src || !dst)
return;
297 fast_memcpy(p_src + index, src + y*src_stride, width);
299 p_src[index - x - 1]= p_src[index + x ];
300 p_src[index + width + x ]= p_src[index + width - x - 1];
304 fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
305 fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
310 for(x=-8; x<0; x+=4){
311 const int index= x + y*stride + (8-3)*(1+stride) + 8;
315 dctA_c(tp+4*8, src, stride);
318 const int qps= 3 + is_luma;
325 qp= qp_store[ (
XMIN(x, width-1)>>qps) + (
XMIN(y, height-1)>>qps) * qp_stride];
329 const int index= x + y*stride + (8-3)*(1+stride) + 8;
335 dctA_c(tp+4*8, src, stride);
340 v= (v +
dither[y&7][x&7])>>6;
341 if((
unsigned)v > 255)
343 dst[x + y*dst_stride]= v;
349 static int config(
struct vf_instance *vf,
351 unsigned int flags,
unsigned int outfmt){
352 int h= (height+16+15)&(~15);
354 vf->priv->temp_stride= (width+16+15)&(~15);
365 mpi->
planes[0]=vf->dmpi->planes[0];
366 mpi->
stride[0]=vf->dmpi->stride[0];
367 mpi->
width=vf->dmpi->width;
369 mpi->
planes[1]=vf->dmpi->planes[1];
370 mpi->
planes[2]=vf->dmpi->planes[2];
371 mpi->
stride[1]=vf->dmpi->stride[1];
372 mpi->
stride[2]=vf->dmpi->stride[2];
392 if(mpi->
qscale || vf->priv->qp){
412 static void uninit(
struct vf_instance *vf){
413 if(!vf->priv)
return;
441 static int control(
struct vf_instance *vf,
int request,
void*
data){
489 "Michael Niedermayer",