00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082
00083
00084
00085
00086
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090
00091 unsigned postproc_version(void)
00092 {
00093 return LIBPOSTPROC_VERSION_INT;
00094 }
00095
00096 const char *postproc_configuration(void)
00097 {
00098 return FFMPEG_CONFIGURATION;
00099 }
00100
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104 return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115
00116
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129
00130
00131 static struct PPFilter filters[]=
00132 {
00133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00135
00136
00137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00141 {"dr", "dering", 1, 5, 6, DERING},
00142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00151 {NULL, NULL,0,0,0,0}
00152 };
00153
00154 static const char *replaceTable[]=
00155 {
00156 "default", "hb:a,vb:a,dr:a",
00157 "de", "hb:a,vb:a,dr:a",
00158 "fast", "h1:a,v1:a,dr:a",
00159 "fa", "h1:a,v1:a,dr:a",
00160 "ac", "ha:a:128:7,va:a,dr:a",
00161 NULL
00162 };
00163
00164
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168 __asm__ volatile( "prefetchnta (%0)\n\t"
00169 : : "r" (p)
00170 );
00171 }
00172
00173 static inline void prefetcht0(void *p)
00174 {
00175 __asm__ volatile( "prefetcht0 (%0)\n\t"
00176 : : "r" (p)
00177 );
00178 }
00179
00180 static inline void prefetcht1(void *p)
00181 {
00182 __asm__ volatile( "prefetcht1 (%0)\n\t"
00183 : : "r" (p)
00184 );
00185 }
00186
00187 static inline void prefetcht2(void *p)
00188 {
00189 __asm__ volatile( "prefetcht2 (%0)\n\t"
00190 : : "r" (p)
00191 );
00192 }
00193 #endif
00194
00195
00196
00197
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203 int numEq= 0;
00204 int y;
00205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206 const int dcThreshold= dcOffset*2 + 1;
00207
00208 for(y=0; y<BLOCK_SIZE; y++){
00209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216 src+= stride;
00217 }
00218 return numEq > c->ppMode.flatnessThreshold;
00219 }
00220
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226 int numEq= 0;
00227 int y;
00228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229 const int dcThreshold= dcOffset*2 + 1;
00230
00231 src+= stride*4;
00232 for(y=0; y<BLOCK_SIZE-1; y++){
00233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241 src+= stride;
00242 }
00243 return numEq > c->ppMode.flatnessThreshold;
00244 }
00245
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248 int i;
00249 for(i=0; i<2; i++){
00250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00251 src += stride;
00252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00253 src += stride;
00254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00255 src += stride;
00256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00257 src += stride;
00258 }
00259 return 1;
00260 }
00261
00262 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00263 {
00264 int x;
00265 src+= stride*4;
00266 for(x=0; x<BLOCK_SIZE; x+=4){
00267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00271 }
00272 return 1;
00273 }
00274
00275 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00276 {
00277 if( isHorizDC_C(src, stride, c) ){
00278 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00279 return 1;
00280 else
00281 return 0;
00282 }else{
00283 return 2;
00284 }
00285 }
00286
00287 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00288 {
00289 if( isVertDC_C(src, stride, c) ){
00290 if( isVertMinMaxOk_C(src, stride, c->QP) )
00291 return 1;
00292 else
00293 return 0;
00294 }else{
00295 return 2;
00296 }
00297 }
00298
00299 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00300 {
00301 int y;
00302 for(y=0; y<BLOCK_SIZE; y++){
00303 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00304
00305 if(FFABS(middleEnergy) < 8*c->QP){
00306 const int q=(dst[3] - dst[4])/2;
00307 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00308 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00309
00310 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00311 d= FFMAX(d, 0);
00312
00313 d= (5*d + 32) >> 6;
00314 d*= FFSIGN(-middleEnergy);
00315
00316 if(q>0)
00317 {
00318 d= d<0 ? 0 : d;
00319 d= d>q ? q : d;
00320 }
00321 else
00322 {
00323 d= d>0 ? 0 : d;
00324 d= d<q ? q : d;
00325 }
00326
00327 dst[3]-= d;
00328 dst[4]+= d;
00329 }
00330 dst+= stride;
00331 }
00332 }
00333
00338 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00339 {
00340 int y;
00341 for(y=0; y<BLOCK_SIZE; y++){
00342 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00343 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00344
00345 int sums[10];
00346 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00347 sums[1] = sums[0] - first + dst[3];
00348 sums[2] = sums[1] - first + dst[4];
00349 sums[3] = sums[2] - first + dst[5];
00350 sums[4] = sums[3] - first + dst[6];
00351 sums[5] = sums[4] - dst[0] + dst[7];
00352 sums[6] = sums[5] - dst[1] + last;
00353 sums[7] = sums[6] - dst[2] + last;
00354 sums[8] = sums[7] - dst[3] + last;
00355 sums[9] = sums[8] - dst[4] + last;
00356
00357 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00358 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00359 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00360 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00361 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00362 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00363 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00364 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00365
00366 dst+= stride;
00367 }
00368 }
00369
00378 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00379 {
00380 int y;
00381 static uint64_t *lut= NULL;
00382 if(lut==NULL)
00383 {
00384 int i;
00385 lut = av_malloc(256*8);
00386 for(i=0; i<256; i++)
00387 {
00388 int v= i < 128 ? 2*i : 2*(i-256);
00389
00390
00391
00392
00393
00394
00395
00396
00397 uint64_t a= (v/16) & 0xFF;
00398 uint64_t b= (v*3/16) & 0xFF;
00399 uint64_t c= (v*5/16) & 0xFF;
00400 uint64_t d= (7*v/16) & 0xFF;
00401 uint64_t A= (0x100 - a)&0xFF;
00402 uint64_t B= (0x100 - b)&0xFF;
00403 uint64_t C= (0x100 - c)&0xFF;
00404 uint64_t D= (0x100 - c)&0xFF;
00405
00406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00407 (D<<24) | (C<<16) | (B<<8) | (A);
00408
00409 }
00410 }
00411
00412 for(y=0; y<BLOCK_SIZE; y++){
00413 int a= src[1] - src[2];
00414 int b= src[3] - src[4];
00415 int c= src[5] - src[6];
00416
00417 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00418
00419 if(d < QP){
00420 int v = d * FFSIGN(-b);
00421
00422 src[1] +=v/8;
00423 src[2] +=v/4;
00424 src[3] +=3*v/8;
00425 src[4] -=3*v/8;
00426 src[5] -=v/4;
00427 src[6] -=v/8;
00428 }
00429 src+=stride;
00430 }
00431 }
00432
00436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00437 int y;
00438 const int QP= c->QP;
00439 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00440 const int dcThreshold= dcOffset*2 + 1;
00441
00442 src+= step*4;
00443 for(y=0; y<8; y++){
00444 int numEq= 0;
00445
00446 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00447 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00448 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00449 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00450 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00451 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00452 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00453 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00454 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00455 if(numEq > c->ppMode.flatnessThreshold){
00456 int min, max, x;
00457
00458 if(src[0] > src[step]){
00459 max= src[0];
00460 min= src[step];
00461 }else{
00462 max= src[step];
00463 min= src[0];
00464 }
00465 for(x=2; x<8; x+=2){
00466 if(src[x*step] > src[(x+1)*step]){
00467 if(src[x *step] > max) max= src[ x *step];
00468 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00469 }else{
00470 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00471 if(src[ x *step] < min) min= src[ x *step];
00472 }
00473 }
00474 if(max-min < 2*QP){
00475 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00476 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00477
00478 int sums[10];
00479 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00480 sums[1] = sums[0] - first + src[3*step];
00481 sums[2] = sums[1] - first + src[4*step];
00482 sums[3] = sums[2] - first + src[5*step];
00483 sums[4] = sums[3] - first + src[6*step];
00484 sums[5] = sums[4] - src[0*step] + src[7*step];
00485 sums[6] = sums[5] - src[1*step] + last;
00486 sums[7] = sums[6] - src[2*step] + last;
00487 sums[8] = sums[7] - src[3*step] + last;
00488 sums[9] = sums[8] - src[4*step] + last;
00489
00490 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00491 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00492 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00493 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00494 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00495 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00496 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00497 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00498 }
00499 }else{
00500 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00501
00502 if(FFABS(middleEnergy) < 8*QP){
00503 const int q=(src[3*step] - src[4*step])/2;
00504 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00505 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00506
00507 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00508 d= FFMAX(d, 0);
00509
00510 d= (5*d + 32) >> 6;
00511 d*= FFSIGN(-middleEnergy);
00512
00513 if(q>0){
00514 d= d<0 ? 0 : d;
00515 d= d>q ? q : d;
00516 }else{
00517 d= d>0 ? 0 : d;
00518 d= d<q ? q : d;
00519 }
00520
00521 src[3*step]-= d;
00522 src[4*step]+= d;
00523 }
00524 }
00525
00526 src += stride;
00527 }
00528
00529
00530
00531
00532
00533 }
00534
00535
00536
00537 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00538 #define COMPILE_C
00539 #endif
00540
00541 #if HAVE_ALTIVEC
00542 #define COMPILE_ALTIVEC
00543 #endif //HAVE_ALTIVEC
00544
00545 #if ARCH_X86
00546
00547 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00548 #define COMPILE_MMX
00549 #endif
00550
00551 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00552 #define COMPILE_MMX2
00553 #endif
00554
00555 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00556 #define COMPILE_3DNOW
00557 #endif
00558 #endif
00559
00560 #undef HAVE_MMX
00561 #define HAVE_MMX 0
00562 #undef HAVE_MMX2
00563 #define HAVE_MMX2 0
00564 #undef HAVE_AMD3DNOW
00565 #define HAVE_AMD3DNOW 0
00566 #undef HAVE_ALTIVEC
00567 #define HAVE_ALTIVEC 0
00568
00569 #ifdef COMPILE_C
00570 #define RENAME(a) a ## _C
00571 #include "postprocess_template.c"
00572 #endif
00573
00574 #ifdef COMPILE_ALTIVEC
00575 #undef RENAME
00576 #undef HAVE_ALTIVEC
00577 #define HAVE_ALTIVEC 1
00578 #define RENAME(a) a ## _altivec
00579 #include "postprocess_altivec_template.c"
00580 #include "postprocess_template.c"
00581 #endif
00582
00583
00584 #ifdef COMPILE_MMX
00585 #undef RENAME
00586 #undef HAVE_MMX
00587 #define HAVE_MMX 1
00588 #define RENAME(a) a ## _MMX
00589 #include "postprocess_template.c"
00590 #endif
00591
00592
00593 #ifdef COMPILE_MMX2
00594 #undef RENAME
00595 #undef HAVE_MMX
00596 #undef HAVE_MMX2
00597 #define HAVE_MMX 1
00598 #define HAVE_MMX2 1
00599 #define RENAME(a) a ## _MMX2
00600 #include "postprocess_template.c"
00601 #endif
00602
00603
00604 #ifdef COMPILE_3DNOW
00605 #undef RENAME
00606 #undef HAVE_MMX
00607 #undef HAVE_MMX2
00608 #undef HAVE_AMD3DNOW
00609 #define HAVE_MMX 1
00610 #define HAVE_MMX2 0
00611 #define HAVE_AMD3DNOW 1
00612 #define RENAME(a) a ## _3DNow
00613 #include "postprocess_template.c"
00614 #endif
00615
00616
00617
00618 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00619 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00620 {
00621 PPContext *c= (PPContext *)vc;
00622 PPMode *ppMode= (PPMode *)vm;
00623 c->ppMode= *ppMode;
00624
00625
00626
00627
00628 #if CONFIG_RUNTIME_CPUDETECT
00629 #if ARCH_X86
00630
00631 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00632 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00633 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00634 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00635 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00636 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00637 else
00638 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00639 #else
00640 #if HAVE_ALTIVEC
00641 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00642 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00643 else
00644 #endif
00645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00646 #endif
00647 #else
00648 #if HAVE_MMX2
00649 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00650 #elif HAVE_AMD3DNOW
00651 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00652 #elif HAVE_MMX
00653 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00654 #elif HAVE_ALTIVEC
00655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00656 #else
00657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00658 #endif
00659 #endif
00660 }
00661
00662
00663
00664
00665
00666
00667 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00668 const char *const pp_help=
00669 #else
00670 const char pp_help[] =
00671 #endif
00672 "Available postprocessing filters:\n"
00673 "Filters Options\n"
00674 "short long name short long option Description\n"
00675 "* * a autoq CPU power dependent enabler\n"
00676 " c chrom chrominance filtering enabled\n"
00677 " y nochrom chrominance filtering disabled\n"
00678 " n noluma luma filtering disabled\n"
00679 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00680 " 1. difference factor: default=32, higher -> more deblocking\n"
00681 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00682 " the h & v deblocking filters share these\n"
00683 " so you can't set different thresholds for h / v\n"
00684 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00685 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00686 "va vadeblock (2 threshold) vertical deblocking filter\n"
00687 "h1 x1hdeblock experimental h deblock filter 1\n"
00688 "v1 x1vdeblock experimental v deblock filter 1\n"
00689 "dr dering deringing filter\n"
00690 "al autolevels automatic brightness / contrast\n"
00691 " f fullyrange stretch luminance to (0..255)\n"
00692 "lb linblenddeint linear blend deinterlacer\n"
00693 "li linipoldeint linear interpolating deinterlace\n"
00694 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00695 "md mediandeint median deinterlacer\n"
00696 "fd ffmpegdeint ffmpeg deinterlacer\n"
00697 "l5 lowpass5 FIR lowpass deinterlacer\n"
00698 "de default hb:a,vb:a,dr:a\n"
00699 "fa fast h1:a,v1:a,dr:a\n"
00700 "ac ha:a:128:7,va:a,dr:a\n"
00701 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00702 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00703 "fq forceQuant <quantizer> force quantizer\n"
00704 "Usage:\n"
00705 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00706 "long form example:\n"
00707 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00708 "short form example:\n"
00709 "vb:a/hb:a/lb de,-vb\n"
00710 "more examples:\n"
00711 "tn:64:128:256\n"
00712 "\n"
00713 ;
00714
00715 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00716 {
00717 char temp[GET_MODE_BUFFER_SIZE];
00718 char *p= temp;
00719 static const char filterDelimiters[] = ",/";
00720 static const char optionDelimiters[] = ":";
00721 struct PPMode *ppMode;
00722 char *filterToken;
00723
00724 ppMode= av_malloc(sizeof(PPMode));
00725
00726 ppMode->lumMode= 0;
00727 ppMode->chromMode= 0;
00728 ppMode->maxTmpNoise[0]= 700;
00729 ppMode->maxTmpNoise[1]= 1500;
00730 ppMode->maxTmpNoise[2]= 3000;
00731 ppMode->maxAllowedY= 234;
00732 ppMode->minAllowedY= 16;
00733 ppMode->baseDcDiff= 256/8;
00734 ppMode->flatnessThreshold= 56-16-1;
00735 ppMode->maxClippedThreshold= 0.01;
00736 ppMode->error=0;
00737
00738 memset(temp, 0, GET_MODE_BUFFER_SIZE);
00739 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00740
00741 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00742
00743 for(;;){
00744 char *filterName;
00745 int q= 1000000;
00746 int chrom=-1;
00747 int luma=-1;
00748 char *option;
00749 char *options[OPTIONS_ARRAY_SIZE];
00750 int i;
00751 int filterNameOk=0;
00752 int numOfUnknownOptions=0;
00753 int enable=1;
00754
00755 filterToken= strtok(p, filterDelimiters);
00756 if(filterToken == NULL) break;
00757 p+= strlen(filterToken) + 1;
00758 filterName= strtok(filterToken, optionDelimiters);
00759 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00760
00761 if(*filterName == '-'){
00762 enable=0;
00763 filterName++;
00764 }
00765
00766 for(;;){
00767 option= strtok(NULL, optionDelimiters);
00768 if(option == NULL) break;
00769
00770 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00771 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00772 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00773 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00774 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00775 else{
00776 options[numOfUnknownOptions] = option;
00777 numOfUnknownOptions++;
00778 }
00779 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00780 }
00781 options[numOfUnknownOptions] = NULL;
00782
00783
00784 for(i=0; replaceTable[2*i]!=NULL; i++){
00785 if(!strcmp(replaceTable[2*i], filterName)){
00786 int newlen= strlen(replaceTable[2*i + 1]);
00787 int plen;
00788 int spaceLeft;
00789
00790 p--, *p=',';
00791
00792 plen= strlen(p);
00793 spaceLeft= p - temp + plen;
00794 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
00795 ppMode->error++;
00796 break;
00797 }
00798 memmove(p + newlen, p, plen+1);
00799 memcpy(p, replaceTable[2*i + 1], newlen);
00800 filterNameOk=1;
00801 }
00802 }
00803
00804 for(i=0; filters[i].shortName!=NULL; i++){
00805 if( !strcmp(filters[i].longName, filterName)
00806 || !strcmp(filters[i].shortName, filterName)){
00807 ppMode->lumMode &= ~filters[i].mask;
00808 ppMode->chromMode &= ~filters[i].mask;
00809
00810 filterNameOk=1;
00811 if(!enable) break;
00812
00813 if(q >= filters[i].minLumQuality && luma)
00814 ppMode->lumMode|= filters[i].mask;
00815 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00816 if(q >= filters[i].minChromQuality)
00817 ppMode->chromMode|= filters[i].mask;
00818
00819 if(filters[i].mask == LEVEL_FIX){
00820 int o;
00821 ppMode->minAllowedY= 16;
00822 ppMode->maxAllowedY= 234;
00823 for(o=0; options[o]!=NULL; o++){
00824 if( !strcmp(options[o],"fullyrange")
00825 ||!strcmp(options[o],"f")){
00826 ppMode->minAllowedY= 0;
00827 ppMode->maxAllowedY= 255;
00828 numOfUnknownOptions--;
00829 }
00830 }
00831 }
00832 else if(filters[i].mask == TEMP_NOISE_FILTER)
00833 {
00834 int o;
00835 int numOfNoises=0;
00836
00837 for(o=0; options[o]!=NULL; o++){
00838 char *tail;
00839 ppMode->maxTmpNoise[numOfNoises]=
00840 strtol(options[o], &tail, 0);
00841 if(tail!=options[o]){
00842 numOfNoises++;
00843 numOfUnknownOptions--;
00844 if(numOfNoises >= 3) break;
00845 }
00846 }
00847 }
00848 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00849 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00850 int o;
00851
00852 for(o=0; options[o]!=NULL && o<2; o++){
00853 char *tail;
00854 int val= strtol(options[o], &tail, 0);
00855 if(tail==options[o]) break;
00856
00857 numOfUnknownOptions--;
00858 if(o==0) ppMode->baseDcDiff= val;
00859 else ppMode->flatnessThreshold= val;
00860 }
00861 }
00862 else if(filters[i].mask == FORCE_QUANT){
00863 int o;
00864 ppMode->forcedQuant= 15;
00865
00866 for(o=0; options[o]!=NULL && o<1; o++){
00867 char *tail;
00868 int val= strtol(options[o], &tail, 0);
00869 if(tail==options[o]) break;
00870
00871 numOfUnknownOptions--;
00872 ppMode->forcedQuant= val;
00873 }
00874 }
00875 }
00876 }
00877 if(!filterNameOk) ppMode->error++;
00878 ppMode->error += numOfUnknownOptions;
00879 }
00880
00881 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00882 if(ppMode->error){
00883 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00884 av_free(ppMode);
00885 return NULL;
00886 }
00887 return ppMode;
00888 }
00889
00890 void pp_free_mode(pp_mode *mode){
00891 av_free(mode);
00892 }
00893
00894 static void reallocAlign(void **p, int alignment, int size){
00895 av_free(*p);
00896 *p= av_mallocz(size);
00897 }
00898
00899 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00900 int mbWidth = (width+15)>>4;
00901 int mbHeight= (height+15)>>4;
00902 int i;
00903
00904 c->stride= stride;
00905 c->qpStride= qpStride;
00906
00907 reallocAlign((void **)&c->tempDst, 8, stride*24);
00908 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00909 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00910 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00911 for(i=0; i<256; i++)
00912 c->yHistogram[i]= width*height/64*15/256;
00913
00914 for(i=0; i<3; i++){
00915
00916 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00917 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00918 }
00919
00920 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00921 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00922 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00923 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00924 }
00925
00926 static const char * context_to_name(void * ptr) {
00927 return "postproc";
00928 }
00929
00930 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00931
00932 pp_context *pp_get_context(int width, int height, int cpuCaps){
00933 PPContext *c= av_malloc(sizeof(PPContext));
00934 int stride= FFALIGN(width, 16);
00935 int qpStride= (width+15)/16 + 2;
00936
00937 memset(c, 0, sizeof(PPContext));
00938 c->av_class = &av_codec_context_class;
00939 c->cpuCaps= cpuCaps;
00940 if(cpuCaps&PP_FORMAT){
00941 c->hChromaSubSample= cpuCaps&0x3;
00942 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00943 }else{
00944 c->hChromaSubSample= 1;
00945 c->vChromaSubSample= 1;
00946 }
00947
00948 reallocBuffers(c, width, height, stride, qpStride);
00949
00950 c->frameNum=-1;
00951
00952 return c;
00953 }
00954
00955 void pp_free_context(void *vc){
00956 PPContext *c = (PPContext*)vc;
00957 int i;
00958
00959 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00960 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00961
00962 av_free(c->tempBlocks);
00963 av_free(c->yHistogram);
00964 av_free(c->tempDst);
00965 av_free(c->tempSrc);
00966 av_free(c->deintTemp);
00967 av_free(c->stdQPTable);
00968 av_free(c->nonBQPTable);
00969 av_free(c->forcedQPTable);
00970
00971 memset(c, 0, sizeof(PPContext));
00972
00973 av_free(c);
00974 }
00975
00976 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00977 uint8_t * dst[3], const int dstStride[3],
00978 int width, int height,
00979 const QP_STORE_T *QP_store, int QPStride,
00980 pp_mode *vm, void *vc, int pict_type)
00981 {
00982 int mbWidth = (width+15)>>4;
00983 int mbHeight= (height+15)>>4;
00984 PPMode *mode = (PPMode*)vm;
00985 PPContext *c = (PPContext*)vc;
00986 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
00987 int absQPStride = FFABS(QPStride);
00988
00989
00990 if(c->stride < minStride || c->qpStride < absQPStride)
00991 reallocBuffers(c, width, height,
00992 FFMAX(minStride, c->stride),
00993 FFMAX(c->qpStride, absQPStride));
00994
00995 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
00996 int i;
00997 QP_store= c->forcedQPTable;
00998 absQPStride = QPStride = 0;
00999 if(mode->lumMode & FORCE_QUANT)
01000 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01001 else
01002 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01003 }
01004
01005 if(pict_type & PP_PICT_TYPE_QP2){
01006 int i;
01007 const int count= mbHeight * absQPStride;
01008 for(i=0; i<(count>>2); i++){
01009 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01010 }
01011 for(i<<=2; i<count; i++){
01012 c->stdQPTable[i] = QP_store[i]>>1;
01013 }
01014 QP_store= c->stdQPTable;
01015 QPStride= absQPStride;
01016 }
01017
01018 if(0){
01019 int x,y;
01020 for(y=0; y<mbHeight; y++){
01021 for(x=0; x<mbWidth; x++){
01022 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01023 }
01024 av_log(c, AV_LOG_INFO, "\n");
01025 }
01026 av_log(c, AV_LOG_INFO, "\n");
01027 }
01028
01029 if((pict_type&7)!=3){
01030 if (QPStride >= 0){
01031 int i;
01032 const int count= mbHeight * QPStride;
01033 for(i=0; i<(count>>2); i++){
01034 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01035 }
01036 for(i<<=2; i<count; i++){
01037 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01038 }
01039 } else {
01040 int i,j;
01041 for(i=0; i<mbHeight; i++) {
01042 for(j=0; j<absQPStride; j++) {
01043 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01044 }
01045 }
01046 }
01047 }
01048
01049 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01050 mode->lumMode, mode->chromMode);
01051
01052 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01053 width, height, QP_store, QPStride, 0, mode, c);
01054
01055 width = (width )>>c->hChromaSubSample;
01056 height = (height)>>c->vChromaSubSample;
01057
01058 if(mode->chromMode){
01059 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01060 width, height, QP_store, QPStride, 1, mode, c);
01061 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01062 width, height, QP_store, QPStride, 2, mode, c);
01063 }
01064 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01065 linecpy(dst[1], src[1], height, srcStride[1]);
01066 linecpy(dst[2], src[2], height, srcStride[2]);
01067 }else{
01068 int y;
01069 for(y=0; y<height; y++){
01070 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01071 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01072 }
01073 }
01074 }