00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082
00083
00084
00085
00086
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090
00091 unsigned postproc_version(void)
00092 {
00093 return LIBPOSTPROC_VERSION_INT;
00094 }
00095
00096 const char *postproc_configuration(void)
00097 {
00098 return FFMPEG_CONFIGURATION;
00099 }
00100
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115
00116
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129
00130
00131 static struct PPFilter filters[]=
00132 {
00133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00135
00136
00137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00141 {"dr", "dering", 1, 5, 6, DERING},
00142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00151 {NULL, NULL,0,0,0,0}
00152 };
00153
00154 static const char *replaceTable[]=
00155 {
00156 "default", "hb:a,vb:a,dr:a",
00157 "de", "hb:a,vb:a,dr:a",
00158 "fast", "h1:a,v1:a,dr:a",
00159 "fa", "h1:a,v1:a,dr:a",
00160 "ac", "ha:a:128:7,va:a,dr:a",
00161 NULL
00162 };
00163
00164
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168 __asm__ volatile( "prefetchnta (%0)\n\t"
00169 : : "r" (p)
00170 );
00171 }
00172
00173 static inline void prefetcht0(void *p)
00174 {
00175 __asm__ volatile( "prefetcht0 (%0)\n\t"
00176 : : "r" (p)
00177 );
00178 }
00179
00180 static inline void prefetcht1(void *p)
00181 {
00182 __asm__ volatile( "prefetcht1 (%0)\n\t"
00183 : : "r" (p)
00184 );
00185 }
00186
00187 static inline void prefetcht2(void *p)
00188 {
00189 __asm__ volatile( "prefetcht2 (%0)\n\t"
00190 : : "r" (p)
00191 );
00192 }
00193 #endif
00194
00195
00196
00197
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203 int numEq= 0;
00204 int y;
00205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206 const int dcThreshold= dcOffset*2 + 1;
00207
00208 for(y=0; y<BLOCK_SIZE; y++){
00209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216 src+= stride;
00217 }
00218 return numEq > c->ppMode.flatnessThreshold;
00219 }
00220
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226 int numEq= 0;
00227 int y;
00228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229 const int dcThreshold= dcOffset*2 + 1;
00230
00231 src+= stride*4;
00232 for(y=0; y<BLOCK_SIZE-1; y++){
00233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241 src+= stride;
00242 }
00243 return numEq > c->ppMode.flatnessThreshold;
00244 }
00245
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248 int i;
00249 #if 1
00250 for(i=0; i<2; i++){
00251 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00252 src += stride;
00253 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00254 src += stride;
00255 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00256 src += stride;
00257 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00258 src += stride;
00259 }
00260 #else
00261 for(i=0; i<8; i++){
00262 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00263 src += stride;
00264 }
00265 #endif
00266 return 1;
00267 }
00268
00269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00270 {
00271 #if 1
00272 #if 1
00273 int x;
00274 src+= stride*4;
00275 for(x=0; x<BLOCK_SIZE; x+=4){
00276 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00277 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00278 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00279 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00280 }
00281 #else
00282 int x;
00283 src+= stride*3;
00284 for(x=0; x<BLOCK_SIZE; x++){
00285 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00286 }
00287 #endif
00288 return 1;
00289 #else
00290 int x;
00291 src+= stride*4;
00292 for(x=0; x<BLOCK_SIZE; x++){
00293 int min=255;
00294 int max=0;
00295 int y;
00296 for(y=0; y<8; y++){
00297 int v= src[x + y*stride];
00298 if(v>max) max=v;
00299 if(v<min) min=v;
00300 }
00301 if(max-min > 2*QP) return 0;
00302 }
00303 return 1;
00304 #endif
00305 }
00306
00307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309 if( isHorizDC_C(src, stride, c) ){
00310 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00311 return 1;
00312 else
00313 return 0;
00314 }else{
00315 return 2;
00316 }
00317 }
00318
00319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00320 {
00321 if( isVertDC_C(src, stride, c) ){
00322 if( isVertMinMaxOk_C(src, stride, c->QP) )
00323 return 1;
00324 else
00325 return 0;
00326 }else{
00327 return 2;
00328 }
00329 }
00330
00331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00332 {
00333 int y;
00334 for(y=0; y<BLOCK_SIZE; y++){
00335 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00336
00337 if(FFABS(middleEnergy) < 8*c->QP){
00338 const int q=(dst[3] - dst[4])/2;
00339 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00340 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00341
00342 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00343 d= FFMAX(d, 0);
00344
00345 d= (5*d + 32) >> 6;
00346 d*= FFSIGN(-middleEnergy);
00347
00348 if(q>0)
00349 {
00350 d= d<0 ? 0 : d;
00351 d= d>q ? q : d;
00352 }
00353 else
00354 {
00355 d= d>0 ? 0 : d;
00356 d= d<q ? q : d;
00357 }
00358
00359 dst[3]-= d;
00360 dst[4]+= d;
00361 }
00362 dst+= stride;
00363 }
00364 }
00365
00370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00371 {
00372 int y;
00373 for(y=0; y<BLOCK_SIZE; y++){
00374 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00375 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00376
00377 int sums[10];
00378 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00379 sums[1] = sums[0] - first + dst[3];
00380 sums[2] = sums[1] - first + dst[4];
00381 sums[3] = sums[2] - first + dst[5];
00382 sums[4] = sums[3] - first + dst[6];
00383 sums[5] = sums[4] - dst[0] + dst[7];
00384 sums[6] = sums[5] - dst[1] + last;
00385 sums[7] = sums[6] - dst[2] + last;
00386 sums[8] = sums[7] - dst[3] + last;
00387 sums[9] = sums[8] - dst[4] + last;
00388
00389 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00390 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00391 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00392 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00393 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00394 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00395 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00396 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00397
00398 dst+= stride;
00399 }
00400 }
00401
00410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00411 {
00412 int y;
00413 static uint64_t *lut= NULL;
00414 if(lut==NULL)
00415 {
00416 int i;
00417 lut = av_malloc(256*8);
00418 for(i=0; i<256; i++)
00419 {
00420 int v= i < 128 ? 2*i : 2*(i-256);
00421
00422
00423
00424
00425
00426
00427
00428
00429 uint64_t a= (v/16) & 0xFF;
00430 uint64_t b= (v*3/16) & 0xFF;
00431 uint64_t c= (v*5/16) & 0xFF;
00432 uint64_t d= (7*v/16) & 0xFF;
00433 uint64_t A= (0x100 - a)&0xFF;
00434 uint64_t B= (0x100 - b)&0xFF;
00435 uint64_t C= (0x100 - c)&0xFF;
00436 uint64_t D= (0x100 - c)&0xFF;
00437
00438 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00439 (D<<24) | (C<<16) | (B<<8) | (A);
00440
00441 }
00442 }
00443
00444 for(y=0; y<BLOCK_SIZE; y++){
00445 int a= src[1] - src[2];
00446 int b= src[3] - src[4];
00447 int c= src[5] - src[6];
00448
00449 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00450
00451 if(d < QP){
00452 int v = d * FFSIGN(-b);
00453
00454 src[1] +=v/8;
00455 src[2] +=v/4;
00456 src[3] +=3*v/8;
00457 src[4] -=3*v/8;
00458 src[5] -=v/4;
00459 src[6] -=v/8;
00460 }
00461 src+=stride;
00462 }
00463 }
00464
00468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00469 int y;
00470 const int QP= c->QP;
00471 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00472 const int dcThreshold= dcOffset*2 + 1;
00473
00474 src+= step*4;
00475 for(y=0; y<8; y++){
00476 int numEq= 0;
00477
00478 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00479 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00480 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00481 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00482 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00483 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00484 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00485 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00486 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00487 if(numEq > c->ppMode.flatnessThreshold){
00488 int min, max, x;
00489
00490 if(src[0] > src[step]){
00491 max= src[0];
00492 min= src[step];
00493 }else{
00494 max= src[step];
00495 min= src[0];
00496 }
00497 for(x=2; x<8; x+=2){
00498 if(src[x*step] > src[(x+1)*step]){
00499 if(src[x *step] > max) max= src[ x *step];
00500 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00501 }else{
00502 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00503 if(src[ x *step] < min) min= src[ x *step];
00504 }
00505 }
00506 if(max-min < 2*QP){
00507 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00508 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00509
00510 int sums[10];
00511 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00512 sums[1] = sums[0] - first + src[3*step];
00513 sums[2] = sums[1] - first + src[4*step];
00514 sums[3] = sums[2] - first + src[5*step];
00515 sums[4] = sums[3] - first + src[6*step];
00516 sums[5] = sums[4] - src[0*step] + src[7*step];
00517 sums[6] = sums[5] - src[1*step] + last;
00518 sums[7] = sums[6] - src[2*step] + last;
00519 sums[8] = sums[7] - src[3*step] + last;
00520 sums[9] = sums[8] - src[4*step] + last;
00521
00522 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00523 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00524 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00525 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00526 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00527 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00528 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00529 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00530 }
00531 }else{
00532 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00533
00534 if(FFABS(middleEnergy) < 8*QP){
00535 const int q=(src[3*step] - src[4*step])/2;
00536 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00537 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00538
00539 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00540 d= FFMAX(d, 0);
00541
00542 d= (5*d + 32) >> 6;
00543 d*= FFSIGN(-middleEnergy);
00544
00545 if(q>0){
00546 d= d<0 ? 0 : d;
00547 d= d>q ? q : d;
00548 }else{
00549 d= d>0 ? 0 : d;
00550 d= d<q ? q : d;
00551 }
00552
00553 src[3*step]-= d;
00554 src[4*step]+= d;
00555 }
00556 }
00557
00558 src += stride;
00559 }
00560
00561
00562
00563
00564
00565 }
00566
00567
00568
00569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00570 #define COMPILE_C
00571 #endif
00572
00573 #if HAVE_ALTIVEC
00574 #define COMPILE_ALTIVEC
00575 #endif //HAVE_ALTIVEC
00576
00577 #if ARCH_X86
00578
00579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00580 #define COMPILE_MMX
00581 #endif
00582
00583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00584 #define COMPILE_MMX2
00585 #endif
00586
00587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00588 #define COMPILE_3DNOW
00589 #endif
00590 #endif
00591
00592 #undef HAVE_MMX
00593 #define HAVE_MMX 0
00594 #undef HAVE_MMX2
00595 #define HAVE_MMX2 0
00596 #undef HAVE_AMD3DNOW
00597 #define HAVE_AMD3DNOW 0
00598 #undef HAVE_ALTIVEC
00599 #define HAVE_ALTIVEC 0
00600
00601 #ifdef COMPILE_C
00602 #define RENAME(a) a ## _C
00603 #include "postprocess_template.c"
00604 #endif
00605
00606 #ifdef COMPILE_ALTIVEC
00607 #undef RENAME
00608 #undef HAVE_ALTIVEC
00609 #define HAVE_ALTIVEC 1
00610 #define RENAME(a) a ## _altivec
00611 #include "postprocess_altivec_template.c"
00612 #include "postprocess_template.c"
00613 #endif
00614
00615
00616 #ifdef COMPILE_MMX
00617 #undef RENAME
00618 #undef HAVE_MMX
00619 #define HAVE_MMX 1
00620 #define RENAME(a) a ## _MMX
00621 #include "postprocess_template.c"
00622 #endif
00623
00624
00625 #ifdef COMPILE_MMX2
00626 #undef RENAME
00627 #undef HAVE_MMX
00628 #undef HAVE_MMX2
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 1
00631 #define RENAME(a) a ## _MMX2
00632 #include "postprocess_template.c"
00633 #endif
00634
00635
00636 #ifdef COMPILE_3DNOW
00637 #undef RENAME
00638 #undef HAVE_MMX
00639 #undef HAVE_MMX2
00640 #undef HAVE_AMD3DNOW
00641 #define HAVE_MMX 1
00642 #define HAVE_MMX2 0
00643 #define HAVE_AMD3DNOW 1
00644 #define RENAME(a) a ## _3DNow
00645 #include "postprocess_template.c"
00646 #endif
00647
00648
00649
00650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00651 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00652 {
00653 PPContext *c= (PPContext *)vc;
00654 PPMode *ppMode= (PPMode *)vm;
00655 c->ppMode= *ppMode;
00656
00657
00658
00659
00660 #if CONFIG_RUNTIME_CPUDETECT
00661 #if ARCH_X86
00662
00663 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00664 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00666 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00667 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00668 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00669 else
00670 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00671 #else
00672 #if HAVE_ALTIVEC
00673 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00674 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00675 else
00676 #endif
00677 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #else //CONFIG_RUNTIME_CPUDETECT
00680 #if HAVE_MMX2
00681 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00682 #elif HAVE_AMD3DNOW
00683 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00684 #elif HAVE_MMX
00685 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00686 #elif HAVE_ALTIVEC
00687 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00688 #else
00689 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00690 #endif
00691 #endif
00692 }
00693
00694
00695
00696
00697
00698
00699 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00700 const char *const pp_help=
00701 #else
00702 const char pp_help[] =
00703 #endif
00704 "Available postprocessing filters:\n"
00705 "Filters Options\n"
00706 "short long name short long option Description\n"
00707 "* * a autoq CPU power dependent enabler\n"
00708 " c chrom chrominance filtering enabled\n"
00709 " y nochrom chrominance filtering disabled\n"
00710 " n noluma luma filtering disabled\n"
00711 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00712 " 1. difference factor: default=32, higher -> more deblocking\n"
00713 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00714 " the h & v deblocking filters share these\n"
00715 " so you can't set different thresholds for h / v\n"
00716 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00717 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00718 "va vadeblock (2 threshold) vertical deblocking filter\n"
00719 "h1 x1hdeblock experimental h deblock filter 1\n"
00720 "v1 x1vdeblock experimental v deblock filter 1\n"
00721 "dr dering deringing filter\n"
00722 "al autolevels automatic brightness / contrast\n"
00723 " f fullyrange stretch luminance to (0..255)\n"
00724 "lb linblenddeint linear blend deinterlacer\n"
00725 "li linipoldeint linear interpolating deinterlace\n"
00726 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00727 "md mediandeint median deinterlacer\n"
00728 "fd ffmpegdeint ffmpeg deinterlacer\n"
00729 "l5 lowpass5 FIR lowpass deinterlacer\n"
00730 "de default hb:a,vb:a,dr:a\n"
00731 "fa fast h1:a,v1:a,dr:a\n"
00732 "ac ha:a:128:7,va:a,dr:a\n"
00733 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00734 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00735 "fq forceQuant <quantizer> force quantizer\n"
00736 "Usage:\n"
00737 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00738 "long form example:\n"
00739 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00740 "short form example:\n"
00741 "vb:a/hb:a/lb de,-vb\n"
00742 "more examples:\n"
00743 "tn:64:128:256\n"
00744 "\n"
00745 ;
00746
00747 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00748 {
00749 char temp[GET_MODE_BUFFER_SIZE];
00750 char *p= temp;
00751 static const char filterDelimiters[] = ",/";
00752 static const char optionDelimiters[] = ":";
00753 struct PPMode *ppMode;
00754 char *filterToken;
00755
00756 ppMode= av_malloc(sizeof(PPMode));
00757
00758 ppMode->lumMode= 0;
00759 ppMode->chromMode= 0;
00760 ppMode->maxTmpNoise[0]= 700;
00761 ppMode->maxTmpNoise[1]= 1500;
00762 ppMode->maxTmpNoise[2]= 3000;
00763 ppMode->maxAllowedY= 234;
00764 ppMode->minAllowedY= 16;
00765 ppMode->baseDcDiff= 256/8;
00766 ppMode->flatnessThreshold= 56-16-1;
00767 ppMode->maxClippedThreshold= 0.01;
00768 ppMode->error=0;
00769
00770 memset(temp, 0, GET_MODE_BUFFER_SIZE);
00771 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00772
00773 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00774
00775 for(;;){
00776 char *filterName;
00777 int q= 1000000;
00778 int chrom=-1;
00779 int luma=-1;
00780 char *option;
00781 char *options[OPTIONS_ARRAY_SIZE];
00782 int i;
00783 int filterNameOk=0;
00784 int numOfUnknownOptions=0;
00785 int enable=1;
00786
00787 filterToken= strtok(p, filterDelimiters);
00788 if(filterToken == NULL) break;
00789 p+= strlen(filterToken) + 1;
00790 filterName= strtok(filterToken, optionDelimiters);
00791 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00792
00793 if(*filterName == '-'){
00794 enable=0;
00795 filterName++;
00796 }
00797
00798 for(;;){
00799 option= strtok(NULL, optionDelimiters);
00800 if(option == NULL) break;
00801
00802 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00803 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00804 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00805 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00806 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00807 else{
00808 options[numOfUnknownOptions] = option;
00809 numOfUnknownOptions++;
00810 }
00811 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00812 }
00813 options[numOfUnknownOptions] = NULL;
00814
00815
00816 for(i=0; replaceTable[2*i]!=NULL; i++){
00817 if(!strcmp(replaceTable[2*i], filterName)){
00818 int newlen= strlen(replaceTable[2*i + 1]);
00819 int plen;
00820 int spaceLeft;
00821
00822 p--, *p=',';
00823
00824 plen= strlen(p);
00825 spaceLeft= p - temp + plen;
00826 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
00827 ppMode->error++;
00828 break;
00829 }
00830 memmove(p + newlen, p, plen+1);
00831 memcpy(p, replaceTable[2*i + 1], newlen);
00832 filterNameOk=1;
00833 }
00834 }
00835
00836 for(i=0; filters[i].shortName!=NULL; i++){
00837 if( !strcmp(filters[i].longName, filterName)
00838 || !strcmp(filters[i].shortName, filterName)){
00839 ppMode->lumMode &= ~filters[i].mask;
00840 ppMode->chromMode &= ~filters[i].mask;
00841
00842 filterNameOk=1;
00843 if(!enable) break;
00844
00845 if(q >= filters[i].minLumQuality && luma)
00846 ppMode->lumMode|= filters[i].mask;
00847 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00848 if(q >= filters[i].minChromQuality)
00849 ppMode->chromMode|= filters[i].mask;
00850
00851 if(filters[i].mask == LEVEL_FIX){
00852 int o;
00853 ppMode->minAllowedY= 16;
00854 ppMode->maxAllowedY= 234;
00855 for(o=0; options[o]!=NULL; o++){
00856 if( !strcmp(options[o],"fullyrange")
00857 ||!strcmp(options[o],"f")){
00858 ppMode->minAllowedY= 0;
00859 ppMode->maxAllowedY= 255;
00860 numOfUnknownOptions--;
00861 }
00862 }
00863 }
00864 else if(filters[i].mask == TEMP_NOISE_FILTER)
00865 {
00866 int o;
00867 int numOfNoises=0;
00868
00869 for(o=0; options[o]!=NULL; o++){
00870 char *tail;
00871 ppMode->maxTmpNoise[numOfNoises]=
00872 strtol(options[o], &tail, 0);
00873 if(tail!=options[o]){
00874 numOfNoises++;
00875 numOfUnknownOptions--;
00876 if(numOfNoises >= 3) break;
00877 }
00878 }
00879 }
00880 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00881 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00882 int o;
00883
00884 for(o=0; options[o]!=NULL && o<2; o++){
00885 char *tail;
00886 int val= strtol(options[o], &tail, 0);
00887 if(tail==options[o]) break;
00888
00889 numOfUnknownOptions--;
00890 if(o==0) ppMode->baseDcDiff= val;
00891 else ppMode->flatnessThreshold= val;
00892 }
00893 }
00894 else if(filters[i].mask == FORCE_QUANT){
00895 int o;
00896 ppMode->forcedQuant= 15;
00897
00898 for(o=0; options[o]!=NULL && o<1; o++){
00899 char *tail;
00900 int val= strtol(options[o], &tail, 0);
00901 if(tail==options[o]) break;
00902
00903 numOfUnknownOptions--;
00904 ppMode->forcedQuant= val;
00905 }
00906 }
00907 }
00908 }
00909 if(!filterNameOk) ppMode->error++;
00910 ppMode->error += numOfUnknownOptions;
00911 }
00912
00913 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00914 if(ppMode->error){
00915 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00916 av_free(ppMode);
00917 return NULL;
00918 }
00919 return ppMode;
00920 }
00921
00922 void pp_free_mode(pp_mode *mode){
00923 av_free(mode);
00924 }
00925
00926 static void reallocAlign(void **p, int alignment, int size){
00927 av_free(*p);
00928 *p= av_mallocz(size);
00929 }
00930
00931 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00932 int mbWidth = (width+15)>>4;
00933 int mbHeight= (height+15)>>4;
00934 int i;
00935
00936 c->stride= stride;
00937 c->qpStride= qpStride;
00938
00939 reallocAlign((void **)&c->tempDst, 8, stride*24);
00940 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00941 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00942 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00943 for(i=0; i<256; i++)
00944 c->yHistogram[i]= width*height/64*15/256;
00945
00946 for(i=0; i<3; i++){
00947
00948 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00949 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00950 }
00951
00952 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00953 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00954 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00955 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00956 }
00957
00958 static const char * context_to_name(void * ptr) {
00959 return "postproc";
00960 }
00961
00962 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00963
00964 pp_context *pp_get_context(int width, int height, int cpuCaps){
00965 PPContext *c= av_malloc(sizeof(PPContext));
00966 int stride= FFALIGN(width, 16);
00967 int qpStride= (width+15)/16 + 2;
00968
00969 memset(c, 0, sizeof(PPContext));
00970 c->av_class = &av_codec_context_class;
00971 c->cpuCaps= cpuCaps;
00972 if(cpuCaps&PP_FORMAT){
00973 c->hChromaSubSample= cpuCaps&0x3;
00974 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00975 }else{
00976 c->hChromaSubSample= 1;
00977 c->vChromaSubSample= 1;
00978 }
00979
00980 reallocBuffers(c, width, height, stride, qpStride);
00981
00982 c->frameNum=-1;
00983
00984 return c;
00985 }
00986
00987 void pp_free_context(void *vc){
00988 PPContext *c = (PPContext*)vc;
00989 int i;
00990
00991 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00992 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00993
00994 av_free(c->tempBlocks);
00995 av_free(c->yHistogram);
00996 av_free(c->tempDst);
00997 av_free(c->tempSrc);
00998 av_free(c->deintTemp);
00999 av_free(c->stdQPTable);
01000 av_free(c->nonBQPTable);
01001 av_free(c->forcedQPTable);
01002
01003 memset(c, 0, sizeof(PPContext));
01004
01005 av_free(c);
01006 }
01007
01008 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
01009 uint8_t * dst[3], const int dstStride[3],
01010 int width, int height,
01011 const QP_STORE_T *QP_store, int QPStride,
01012 pp_mode *vm, void *vc, int pict_type)
01013 {
01014 int mbWidth = (width+15)>>4;
01015 int mbHeight= (height+15)>>4;
01016 PPMode *mode = (PPMode*)vm;
01017 PPContext *c = (PPContext*)vc;
01018 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01019 int absQPStride = FFABS(QPStride);
01020
01021
01022 if(c->stride < minStride || c->qpStride < absQPStride)
01023 reallocBuffers(c, width, height,
01024 FFMAX(minStride, c->stride),
01025 FFMAX(c->qpStride, absQPStride));
01026
01027 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01028 int i;
01029 QP_store= c->forcedQPTable;
01030 absQPStride = QPStride = 0;
01031 if(mode->lumMode & FORCE_QUANT)
01032 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01033 else
01034 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01035 }
01036
01037 if(pict_type & PP_PICT_TYPE_QP2){
01038 int i;
01039 const int count= mbHeight * absQPStride;
01040 for(i=0; i<(count>>2); i++){
01041 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01042 }
01043 for(i<<=2; i<count; i++){
01044 c->stdQPTable[i] = QP_store[i]>>1;
01045 }
01046 QP_store= c->stdQPTable;
01047 QPStride= absQPStride;
01048 }
01049
01050 if(0){
01051 int x,y;
01052 for(y=0; y<mbHeight; y++){
01053 for(x=0; x<mbWidth; x++){
01054 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01055 }
01056 av_log(c, AV_LOG_INFO, "\n");
01057 }
01058 av_log(c, AV_LOG_INFO, "\n");
01059 }
01060
01061 if((pict_type&7)!=3){
01062 if (QPStride >= 0){
01063 int i;
01064 const int count= mbHeight * QPStride;
01065 for(i=0; i<(count>>2); i++){
01066 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01067 }
01068 for(i<<=2; i<count; i++){
01069 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01070 }
01071 } else {
01072 int i,j;
01073 for(i=0; i<mbHeight; i++) {
01074 for(j=0; j<absQPStride; j++) {
01075 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01076 }
01077 }
01078 }
01079 }
01080
01081 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01082 mode->lumMode, mode->chromMode);
01083
01084 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01085 width, height, QP_store, QPStride, 0, mode, c);
01086
01087 width = (width )>>c->hChromaSubSample;
01088 height = (height)>>c->vChromaSubSample;
01089
01090 if(mode->chromMode){
01091 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01092 width, height, QP_store, QPStride, 1, mode, c);
01093 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01094 width, height, QP_store, QPStride, 2, mode, c);
01095 }
01096 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01097 linecpy(dst[1], src[1], height, srcStride[1]);
01098 linecpy(dst[2], src[2], height, srcStride[2]);
01099 }else{
01100 int y;
01101 for(y=0; y<height; y++){
01102 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01103 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01104 }
01105 }
01106 }