FFmpeg: libpostproc/postprocess.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 //#undef HAVE_MMX2
00083 //#define HAVE_AMD3DNOW
00084 //#undef HAVE_MMX
00085 //#undef ARCH_X86
00086 //#define DEBUG_BRIGHTNESS
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090 
00091 unsigned postproc_version(void)
00092 {
00093     return LIBPOSTPROC_VERSION_INT;
00094 }
00095 
00096 const char *postproc_configuration(void)
00097 {
00098     return FFMPEG_CONFIGURATION;
00099 }
00100 
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104     return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106 
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110 
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00116 
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127 
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129 
00130 
00131 static struct PPFilter filters[]=
00132 {
00133     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00134     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00135 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00136     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00137     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00138     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00139     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00140     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00141     {"dr", "dering",                1, 5, 6, DERING},
00142     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00143     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00147     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00148     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00149     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00150     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00151     {NULL, NULL,0,0,0,0} //End Marker
00152 };
00153 
00154 static const char *replaceTable[]=
00155 {
00156     "default",      "hb:a,vb:a,dr:a",
00157     "de",           "hb:a,vb:a,dr:a",
00158     "fast",         "h1:a,v1:a,dr:a",
00159     "fa",           "h1:a,v1:a,dr:a",
00160     "ac",           "ha:a:128:7,va:a,dr:a",
00161     NULL //End Marker
00162 };
00163 
00164 
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168     __asm__ volatile(   "prefetchnta (%0)\n\t"
00169         : : "r" (p)
00170     );
00171 }
00172 
00173 static inline void prefetcht0(void *p)
00174 {
00175     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00176         : : "r" (p)
00177     );
00178 }
00179 
00180 static inline void prefetcht1(void *p)
00181 {
00182     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00183         : : "r" (p)
00184     );
00185 }
00186 
00187 static inline void prefetcht2(void *p)
00188 {
00189     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00190         : : "r" (p)
00191     );
00192 }
00193 #endif
00194 
00195 /* The horizontal functions exist only in C because the MMX
00196  * code is faster with vertical filters and transposing. */
00197 
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203     int numEq= 0;
00204     int y;
00205     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206     const int dcThreshold= dcOffset*2 + 1;
00207 
00208     for(y=0; y<BLOCK_SIZE; y++){
00209         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216         src+= stride;
00217     }
00218     return numEq > c->ppMode.flatnessThreshold;
00219 }
00220 
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226     int numEq= 0;
00227     int y;
00228     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229     const int dcThreshold= dcOffset*2 + 1;
00230 
00231     src+= stride*4; // src points to begin of the 8x8 Block
00232     for(y=0; y<BLOCK_SIZE-1; y++){
00233         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241         src+= stride;
00242     }
00243     return numEq > c->ppMode.flatnessThreshold;
00244 }
00245 
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248     int i;
00249 #if 1
00250     for(i=0; i<2; i++){
00251         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00252         src += stride;
00253         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00254         src += stride;
00255         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00256         src += stride;
00257         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00258         src += stride;
00259     }
00260 #else
00261     for(i=0; i<8; i++){
00262         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00263         src += stride;
00264     }
00265 #endif
00266     return 1;
00267 }
00268 
00269 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00270 {
00271 #if 1
00272 #if 1
00273     int x;
00274     src+= stride*4;
00275     for(x=0; x<BLOCK_SIZE; x+=4){
00276         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00277         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00278         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00279         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00280     }
00281 #else
00282     int x;
00283     src+= stride*3;
00284     for(x=0; x<BLOCK_SIZE; x++){
00285         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00286     }
00287 #endif
00288     return 1;
00289 #else
00290     int x;
00291     src+= stride*4;
00292     for(x=0; x<BLOCK_SIZE; x++){
00293         int min=255;
00294         int max=0;
00295         int y;
00296         for(y=0; y<8; y++){
00297             int v= src[x + y*stride];
00298             if(v>max) max=v;
00299             if(v<min) min=v;
00300         }
00301         if(max-min > 2*QP) return 0;
00302     }
00303     return 1;
00304 #endif
00305 }
00306 
00307 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00308 {
00309     if( isHorizDC_C(src, stride, c) ){
00310         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00311             return 1;
00312         else
00313             return 0;
00314     }else{
00315         return 2;
00316     }
00317 }
00318 
00319 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00320 {
00321     if( isVertDC_C(src, stride, c) ){
00322         if( isVertMinMaxOk_C(src, stride, c->QP) )
00323             return 1;
00324         else
00325             return 0;
00326     }else{
00327         return 2;
00328     }
00329 }
00330 
00331 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00332 {
00333     int y;
00334     for(y=0; y<BLOCK_SIZE; y++){
00335         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00336 
00337         if(FFABS(middleEnergy) < 8*c->QP){
00338             const int q=(dst[3] - dst[4])/2;
00339             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00340             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00341 
00342             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00343             d= FFMAX(d, 0);
00344 
00345             d= (5*d + 32) >> 6;
00346             d*= FFSIGN(-middleEnergy);
00347 
00348             if(q>0)
00349             {
00350                 d= d<0 ? 0 : d;
00351                 d= d>q ? q : d;
00352             }
00353             else
00354             {
00355                 d= d>0 ? 0 : d;
00356                 d= d<q ? q : d;
00357             }
00358 
00359             dst[3]-= d;
00360             dst[4]+= d;
00361         }
00362         dst+= stride;
00363     }
00364 }
00365 
00370 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00371 {
00372     int y;
00373     for(y=0; y<BLOCK_SIZE; y++){
00374         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00375         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00376 
00377         int sums[10];
00378         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00379         sums[1] = sums[0] - first  + dst[3];
00380         sums[2] = sums[1] - first  + dst[4];
00381         sums[3] = sums[2] - first  + dst[5];
00382         sums[4] = sums[3] - first  + dst[6];
00383         sums[5] = sums[4] - dst[0] + dst[7];
00384         sums[6] = sums[5] - dst[1] + last;
00385         sums[7] = sums[6] - dst[2] + last;
00386         sums[8] = sums[7] - dst[3] + last;
00387         sums[9] = sums[8] - dst[4] + last;
00388 
00389         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00390         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00391         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00392         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00393         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00394         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00395         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00396         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00397 
00398         dst+= stride;
00399     }
00400 }
00401 
00410 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00411 {
00412     int y;
00413     static uint64_t *lut= NULL;
00414     if(lut==NULL)
00415     {
00416         int i;
00417         lut = av_malloc(256*8);
00418         for(i=0; i<256; i++)
00419         {
00420             int v= i < 128 ? 2*i : 2*(i-256);
00421 /*
00422 //Simulate 112242211 9-Tap filter
00423             uint64_t a= (v/16)  & 0xFF;
00424             uint64_t b= (v/8)   & 0xFF;
00425             uint64_t c= (v/4)   & 0xFF;
00426             uint64_t d= (3*v/8) & 0xFF;
00427 */
00428 //Simulate piecewise linear interpolation
00429             uint64_t a= (v/16)   & 0xFF;
00430             uint64_t b= (v*3/16) & 0xFF;
00431             uint64_t c= (v*5/16) & 0xFF;
00432             uint64_t d= (7*v/16) & 0xFF;
00433             uint64_t A= (0x100 - a)&0xFF;
00434             uint64_t B= (0x100 - b)&0xFF;
00435             uint64_t C= (0x100 - c)&0xFF;
00436             uint64_t D= (0x100 - c)&0xFF;
00437 
00438             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00439                        (D<<24) | (C<<16) | (B<<8)  | (A);
00440             //lut[i] = (v<<32) | (v<<24);
00441         }
00442     }
00443 
00444     for(y=0; y<BLOCK_SIZE; y++){
00445         int a= src[1] - src[2];
00446         int b= src[3] - src[4];
00447         int c= src[5] - src[6];
00448 
00449         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00450 
00451         if(d < QP){
00452             int v = d * FFSIGN(-b);
00453 
00454             src[1] +=v/8;
00455             src[2] +=v/4;
00456             src[3] +=3*v/8;
00457             src[4] -=3*v/8;
00458             src[5] -=v/4;
00459             src[6] -=v/8;
00460         }
00461         src+=stride;
00462     }
00463 }
00464 
00468 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00469     int y;
00470     const int QP= c->QP;
00471     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00472     const int dcThreshold= dcOffset*2 + 1;
00473 //START_TIMER
00474     src+= step*4; // src points to begin of the 8x8 Block
00475     for(y=0; y<8; y++){
00476         int numEq= 0;
00477 
00478         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00479         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00480         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00481         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00482         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00483         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00484         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00485         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00486         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00487         if(numEq > c->ppMode.flatnessThreshold){
00488             int min, max, x;
00489 
00490             if(src[0] > src[step]){
00491                 max= src[0];
00492                 min= src[step];
00493             }else{
00494                 max= src[step];
00495                 min= src[0];
00496             }
00497             for(x=2; x<8; x+=2){
00498                 if(src[x*step] > src[(x+1)*step]){
00499                         if(src[x    *step] > max) max= src[ x   *step];
00500                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00501                 }else{
00502                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00503                         if(src[ x   *step] < min) min= src[ x   *step];
00504                 }
00505             }
00506             if(max-min < 2*QP){
00507                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00508                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00509 
00510                 int sums[10];
00511                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00512                 sums[1] = sums[0] - first       + src[3*step];
00513                 sums[2] = sums[1] - first       + src[4*step];
00514                 sums[3] = sums[2] - first       + src[5*step];
00515                 sums[4] = sums[3] - first       + src[6*step];
00516                 sums[5] = sums[4] - src[0*step] + src[7*step];
00517                 sums[6] = sums[5] - src[1*step] + last;
00518                 sums[7] = sums[6] - src[2*step] + last;
00519                 sums[8] = sums[7] - src[3*step] + last;
00520                 sums[9] = sums[8] - src[4*step] + last;
00521 
00522                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00523                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00524                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00525                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00526                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00527                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00528                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00529                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00530             }
00531         }else{
00532             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00533 
00534             if(FFABS(middleEnergy) < 8*QP){
00535                 const int q=(src[3*step] - src[4*step])/2;
00536                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00537                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00538 
00539                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00540                 d= FFMAX(d, 0);
00541 
00542                 d= (5*d + 32) >> 6;
00543                 d*= FFSIGN(-middleEnergy);
00544 
00545                 if(q>0){
00546                     d= d<0 ? 0 : d;
00547                     d= d>q ? q : d;
00548                 }else{
00549                     d= d>0 ? 0 : d;
00550                     d= d<q ? q : d;
00551                 }
00552 
00553                 src[3*step]-= d;
00554                 src[4*step]+= d;
00555             }
00556         }
00557 
00558         src += stride;
00559     }
00560 /*if(step==16){
00561     STOP_TIMER("step16")
00562 }else{
00563     STOP_TIMER("stepX")
00564 }*/
00565 }
00566 
00567 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00568 //Plain C versions
00569 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00570 #define COMPILE_C
00571 #endif
00572 
00573 #if HAVE_ALTIVEC
00574 #define COMPILE_ALTIVEC
00575 #endif //HAVE_ALTIVEC
00576 
00577 #if ARCH_X86
00578 
00579 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00580 #define COMPILE_MMX
00581 #endif
00582 
00583 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00584 #define COMPILE_MMX2
00585 #endif
00586 
00587 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00588 #define COMPILE_3DNOW
00589 #endif
00590 #endif /* ARCH_X86 */
00591 
00592 #undef HAVE_MMX
00593 #define HAVE_MMX 0
00594 #undef HAVE_MMX2
00595 #define HAVE_MMX2 0
00596 #undef HAVE_AMD3DNOW
00597 #define HAVE_AMD3DNOW 0
00598 #undef HAVE_ALTIVEC
00599 #define HAVE_ALTIVEC 0
00600 
00601 #ifdef COMPILE_C
00602 #define RENAME(a) a ## _C
00603 #include "postprocess_template.c"
00604 #endif
00605 
00606 #ifdef COMPILE_ALTIVEC
00607 #undef RENAME
00608 #undef HAVE_ALTIVEC
00609 #define HAVE_ALTIVEC 1
00610 #define RENAME(a) a ## _altivec
00611 #include "postprocess_altivec_template.c"
00612 #include "postprocess_template.c"
00613 #endif
00614 
00615 //MMX versions
00616 #ifdef COMPILE_MMX
00617 #undef RENAME
00618 #undef HAVE_MMX
00619 #define HAVE_MMX 1
00620 #define RENAME(a) a ## _MMX
00621 #include "postprocess_template.c"
00622 #endif
00623 
00624 //MMX2 versions
00625 #ifdef COMPILE_MMX2
00626 #undef RENAME
00627 #undef HAVE_MMX
00628 #undef HAVE_MMX2
00629 #define HAVE_MMX 1
00630 #define HAVE_MMX2 1
00631 #define RENAME(a) a ## _MMX2
00632 #include "postprocess_template.c"
00633 #endif
00634 
00635 //3DNOW versions
00636 #ifdef COMPILE_3DNOW
00637 #undef RENAME
00638 #undef HAVE_MMX
00639 #undef HAVE_MMX2
00640 #undef HAVE_AMD3DNOW
00641 #define HAVE_MMX 1
00642 #define HAVE_MMX2 0
00643 #define HAVE_AMD3DNOW 1
00644 #define RENAME(a) a ## _3DNow
00645 #include "postprocess_template.c"
00646 #endif
00647 
00648 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00649 
00650 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00651         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00652 {
00653     PPContext *c= (PPContext *)vc;
00654     PPMode *ppMode= (PPMode *)vm;
00655     c->ppMode= *ppMode; //FIXME
00656 
00657     // Using ifs here as they are faster than function pointers although the
00658     // difference would not be measurable here but it is much better because
00659     // someone might exchange the CPU whithout restarting MPlayer ;)
00660 #if CONFIG_RUNTIME_CPUDETECT
00661 #if ARCH_X86
00662     // ordered per speed fastest first
00663     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00664         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00665     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00666         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00667     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00668         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00669     else
00670         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00671 #else
00672 #if HAVE_ALTIVEC
00673     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00674             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00675     else
00676 #endif
00677             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00678 #endif
00679 #else //CONFIG_RUNTIME_CPUDETECT
00680 #if   HAVE_MMX2
00681             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00682 #elif HAVE_AMD3DNOW
00683             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00684 #elif HAVE_MMX
00685             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00686 #elif HAVE_ALTIVEC
00687             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00688 #else
00689             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00690 #endif
00691 #endif 
00692 }
00693 
00694 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00695 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00696 
00697 /* -pp Command line Help
00698 */
00699 #if LIBPOSTPROC_VERSION_INT < (52<<16)
00700 const char *const pp_help=
00701 #else
00702 const char pp_help[] =
00703 #endif
00704 "Available postprocessing filters:\n"
00705 "Filters                        Options\n"
00706 "short  long name       short   long option     Description\n"
00707 "*      *               a       autoq           CPU power dependent enabler\n"
00708 "                       c       chrom           chrominance filtering enabled\n"
00709 "                       y       nochrom         chrominance filtering disabled\n"
00710 "                       n       noluma          luma filtering disabled\n"
00711 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00712 "       1. difference factor: default=32, higher -> more deblocking\n"
00713 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00714 "                       the h & v deblocking filters share these\n"
00715 "                       so you can't set different thresholds for h / v\n"
00716 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00717 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00718 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00719 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00720 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00721 "dr     dering                                  deringing filter\n"
00722 "al     autolevels                              automatic brightness / contrast\n"
00723 "                       f        fullyrange     stretch luminance to (0..255)\n"
00724 "lb     linblenddeint                           linear blend deinterlacer\n"
00725 "li     linipoldeint                            linear interpolating deinterlace\n"
00726 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00727 "md     mediandeint                             median deinterlacer\n"
00728 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00729 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00730 "de     default                                 hb:a,vb:a,dr:a\n"
00731 "fa     fast                                    h1:a,v1:a,dr:a\n"
00732 "ac                                             ha:a:128:7,va:a,dr:a\n"
00733 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00734 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00735 "fq     forceQuant      <quantizer>             force quantizer\n"
00736 "Usage:\n"
00737 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00738 "long form example:\n"
00739 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00740 "short form example:\n"
00741 "vb:a/hb:a/lb                                   de,-vb\n"
00742 "more examples:\n"
00743 "tn:64:128:256\n"
00744 "\n"
00745 ;
00746 
00747 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00748 {
00749     char temp[GET_MODE_BUFFER_SIZE];
00750     char *p= temp;
00751     static const char filterDelimiters[] = ",/";
00752     static const char optionDelimiters[] = ":";
00753     struct PPMode *ppMode;
00754     char *filterToken;
00755 
00756     ppMode= av_malloc(sizeof(PPMode));
00757 
00758     ppMode->lumMode= 0;
00759     ppMode->chromMode= 0;
00760     ppMode->maxTmpNoise[0]= 700;
00761     ppMode->maxTmpNoise[1]= 1500;
00762     ppMode->maxTmpNoise[2]= 3000;
00763     ppMode->maxAllowedY= 234;
00764     ppMode->minAllowedY= 16;
00765     ppMode->baseDcDiff= 256/8;
00766     ppMode->flatnessThreshold= 56-16-1;
00767     ppMode->maxClippedThreshold= 0.01;
00768     ppMode->error=0;
00769 
00770     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00771     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00772 
00773     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00774 
00775     for(;;){
00776         char *filterName;
00777         int q= 1000000; //PP_QUALITY_MAX;
00778         int chrom=-1;
00779         int luma=-1;
00780         char *option;
00781         char *options[OPTIONS_ARRAY_SIZE];
00782         int i;
00783         int filterNameOk=0;
00784         int numOfUnknownOptions=0;
00785         int enable=1; //does the user want us to enabled or disabled the filter
00786 
00787         filterToken= strtok(p, filterDelimiters);
00788         if(filterToken == NULL) break;
00789         p+= strlen(filterToken) + 1; // p points to next filterToken
00790         filterName= strtok(filterToken, optionDelimiters);
00791         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00792 
00793         if(*filterName == '-'){
00794             enable=0;
00795             filterName++;
00796         }
00797 
00798         for(;;){ //for all options
00799             option= strtok(NULL, optionDelimiters);
00800             if(option == NULL) break;
00801 
00802             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00803             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00804             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00805             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00806             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00807             else{
00808                 options[numOfUnknownOptions] = option;
00809                 numOfUnknownOptions++;
00810             }
00811             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00812         }
00813         options[numOfUnknownOptions] = NULL;
00814 
00815         /* replace stuff from the replace Table */
00816         for(i=0; replaceTable[2*i]!=NULL; i++){
00817             if(!strcmp(replaceTable[2*i], filterName)){
00818                 int newlen= strlen(replaceTable[2*i + 1]);
00819                 int plen;
00820                 int spaceLeft;
00821 
00822                 p--, *p=',';
00823 
00824                 plen= strlen(p);
00825                 spaceLeft= p - temp + plen;
00826                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00827                     ppMode->error++;
00828                     break;
00829                 }
00830                 memmove(p + newlen, p, plen+1);
00831                 memcpy(p, replaceTable[2*i + 1], newlen);
00832                 filterNameOk=1;
00833             }
00834         }
00835 
00836         for(i=0; filters[i].shortName!=NULL; i++){
00837             if(   !strcmp(filters[i].longName, filterName)
00838                || !strcmp(filters[i].shortName, filterName)){
00839                 ppMode->lumMode &= ~filters[i].mask;
00840                 ppMode->chromMode &= ~filters[i].mask;
00841 
00842                 filterNameOk=1;
00843                 if(!enable) break; // user wants to disable it
00844 
00845                 if(q >= filters[i].minLumQuality && luma)
00846                     ppMode->lumMode|= filters[i].mask;
00847                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00848                     if(q >= filters[i].minChromQuality)
00849                             ppMode->chromMode|= filters[i].mask;
00850 
00851                 if(filters[i].mask == LEVEL_FIX){
00852                     int o;
00853                     ppMode->minAllowedY= 16;
00854                     ppMode->maxAllowedY= 234;
00855                     for(o=0; options[o]!=NULL; o++){
00856                         if(  !strcmp(options[o],"fullyrange")
00857                            ||!strcmp(options[o],"f")){
00858                             ppMode->minAllowedY= 0;
00859                             ppMode->maxAllowedY= 255;
00860                             numOfUnknownOptions--;
00861                         }
00862                     }
00863                 }
00864                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00865                 {
00866                     int o;
00867                     int numOfNoises=0;
00868 
00869                     for(o=0; options[o]!=NULL; o++){
00870                         char *tail;
00871                         ppMode->maxTmpNoise[numOfNoises]=
00872                             strtol(options[o], &tail, 0);
00873                         if(tail!=options[o]){
00874                             numOfNoises++;
00875                             numOfUnknownOptions--;
00876                             if(numOfNoises >= 3) break;
00877                         }
00878                     }
00879                 }
00880                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00881                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00882                     int o;
00883 
00884                     for(o=0; options[o]!=NULL && o<2; o++){
00885                         char *tail;
00886                         int val= strtol(options[o], &tail, 0);
00887                         if(tail==options[o]) break;
00888 
00889                         numOfUnknownOptions--;
00890                         if(o==0) ppMode->baseDcDiff= val;
00891                         else ppMode->flatnessThreshold= val;
00892                     }
00893                 }
00894                 else if(filters[i].mask == FORCE_QUANT){
00895                     int o;
00896                     ppMode->forcedQuant= 15;
00897 
00898                     for(o=0; options[o]!=NULL && o<1; o++){
00899                         char *tail;
00900                         int val= strtol(options[o], &tail, 0);
00901                         if(tail==options[o]) break;
00902 
00903                         numOfUnknownOptions--;
00904                         ppMode->forcedQuant= val;
00905                     }
00906                 }
00907             }
00908         }
00909         if(!filterNameOk) ppMode->error++;
00910         ppMode->error += numOfUnknownOptions;
00911     }
00912 
00913     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00914     if(ppMode->error){
00915         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00916         av_free(ppMode);
00917         return NULL;
00918     }
00919     return ppMode;
00920 }
00921 
00922 void pp_free_mode(pp_mode *mode){
00923     av_free(mode);
00924 }
00925 
00926 static void reallocAlign(void **p, int alignment, int size){
00927     av_free(*p);
00928     *p= av_mallocz(size);
00929 }
00930 
00931 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00932     int mbWidth = (width+15)>>4;
00933     int mbHeight= (height+15)>>4;
00934     int i;
00935 
00936     c->stride= stride;
00937     c->qpStride= qpStride;
00938 
00939     reallocAlign((void **)&c->tempDst, 8, stride*24);
00940     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00941     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00942     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00943     for(i=0; i<256; i++)
00944             c->yHistogram[i]= width*height/64*15/256;
00945 
00946     for(i=0; i<3; i++){
00947         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
00948         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00949         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00950     }
00951 
00952     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00953     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00954     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00955     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00956 }
00957 
00958 static const char * context_to_name(void * ptr) {
00959     return "postproc";
00960 }
00961 
00962 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00963 
00964 pp_context *pp_get_context(int width, int height, int cpuCaps){
00965     PPContext *c= av_malloc(sizeof(PPContext));
00966     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00967     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00968 
00969     memset(c, 0, sizeof(PPContext));
00970     c->av_class = &av_codec_context_class;
00971     c->cpuCaps= cpuCaps;
00972     if(cpuCaps&PP_FORMAT){
00973         c->hChromaSubSample= cpuCaps&0x3;
00974         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00975     }else{
00976         c->hChromaSubSample= 1;
00977         c->vChromaSubSample= 1;
00978     }
00979 
00980     reallocBuffers(c, width, height, stride, qpStride);
00981 
00982     c->frameNum=-1;
00983 
00984     return c;
00985 }
00986 
00987 void pp_free_context(void *vc){
00988     PPContext *c = (PPContext*)vc;
00989     int i;
00990 
00991     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00992     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00993 
00994     av_free(c->tempBlocks);
00995     av_free(c->yHistogram);
00996     av_free(c->tempDst);
00997     av_free(c->tempSrc);
00998     av_free(c->deintTemp);
00999     av_free(c->stdQPTable);
01000     av_free(c->nonBQPTable);
01001     av_free(c->forcedQPTable);
01002 
01003     memset(c, 0, sizeof(PPContext));
01004 
01005     av_free(c);
01006 }
01007 
01008 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
01009                      uint8_t * dst[3], const int dstStride[3],
01010                      int width, int height,
01011                      const QP_STORE_T *QP_store,  int QPStride,
01012                      pp_mode *vm,  void *vc, int pict_type)
01013 {
01014     int mbWidth = (width+15)>>4;
01015     int mbHeight= (height+15)>>4;
01016     PPMode *mode = (PPMode*)vm;
01017     PPContext *c = (PPContext*)vc;
01018     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
01019     int absQPStride = FFABS(QPStride);
01020 
01021     // c->stride and c->QPStride are always positive
01022     if(c->stride < minStride || c->qpStride < absQPStride)
01023         reallocBuffers(c, width, height,
01024                        FFMAX(minStride, c->stride),
01025                        FFMAX(c->qpStride, absQPStride));
01026 
01027     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
01028         int i;
01029         QP_store= c->forcedQPTable;
01030         absQPStride = QPStride = 0;
01031         if(mode->lumMode & FORCE_QUANT)
01032             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
01033         else
01034             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01035     }
01036 
01037     if(pict_type & PP_PICT_TYPE_QP2){
01038         int i;
01039         const int count= mbHeight * absQPStride;
01040         for(i=0; i<(count>>2); i++){
01041             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01042         }
01043         for(i<<=2; i<count; i++){
01044             c->stdQPTable[i] = QP_store[i]>>1;
01045         }
01046         QP_store= c->stdQPTable;
01047         QPStride= absQPStride;
01048     }
01049 
01050     if(0){
01051         int x,y;
01052         for(y=0; y<mbHeight; y++){
01053             for(x=0; x<mbWidth; x++){
01054                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01055             }
01056             av_log(c, AV_LOG_INFO, "\n");
01057         }
01058         av_log(c, AV_LOG_INFO, "\n");
01059     }
01060 
01061     if((pict_type&7)!=3){
01062         if (QPStride >= 0){
01063             int i;
01064             const int count= mbHeight * QPStride;
01065             for(i=0; i<(count>>2); i++){
01066                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01067             }
01068             for(i<<=2; i<count; i++){
01069                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01070             }
01071         } else {
01072             int i,j;
01073             for(i=0; i<mbHeight; i++) {
01074                 for(j=0; j<absQPStride; j++) {
01075                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01076                 }
01077             }
01078         }
01079     }
01080 
01081     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01082            mode->lumMode, mode->chromMode);
01083 
01084     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01085                 width, height, QP_store, QPStride, 0, mode, c);
01086 
01087     width  = (width )>>c->hChromaSubSample;
01088     height = (height)>>c->vChromaSubSample;
01089 
01090     if(mode->chromMode){
01091         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01092                     width, height, QP_store, QPStride, 1, mode, c);
01093         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01094                     width, height, QP_store, QPStride, 2, mode, c);
01095     }
01096     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01097         linecpy(dst[1], src[1], height, srcStride[1]);
01098         linecpy(dst[2], src[2], height, srcStride[2]);
01099     }else{
01100         int y;
01101         for(y=0; y<height; y++){
01102             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01103             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01104         }
01105     }
01106 }