[FFmpeg-devel] [PATCH] swscale alpha channel support

Michael Niedermayer michaelni
Mon Mar 2 00:35:04 CET 2009


On Fri, Feb 27, 2009 at 11:30:25PM +0100, C?dric Schieli wrote:
[...]

> @@ -608,9 +635,15 @@
>          Y >>=10;\
>          U >>=10;\
>          V >>=10;\
> +        if (alpha){\
> +            A = 0;\
> +            for (j=0; j<lumFilterSize; j++)\
> +                A += alpSrc[j][i     ] * lumFilter[j];\
> +            A >>=10;\
> +        }\
>  

this does not look like its rounding to nearest


[...]
> @@ -972,6 +986,14 @@
>                      : "%"REG_a
>                  );
>              }
> +            if (CONFIG_SWSCALE_ALPHA && aDest){
> +                __asm__ volatile(
> +                    YSCALEYUV2YV121_ACCURATE
> +                    :: "r" (alpSrc+dstW), "r" (aDest+dstW),
> +                    "g" (-dstW)
> +                    : "%"REG_a
> +                );
> +            }
>          }else{
>              while(p--){
>                  __asm__ volatile(
> @@ -981,6 +1003,14 @@
>                      : "%"REG_a
>                  );
>              }
> +            if (CONFIG_SWSCALE_ALPHA && aDest){
> +                __asm__ volatile(
> +                    YSCALEYUV2YV121
> +                    :: "r" (alpSrc+dstW), "r" (aDest+dstW),
> +                    "g" (-dstW)
> +                    : "%"REG_a
> +                );
> +            }
>          }
>          return;
>      }

i would prefer if these wouldnt be duplicated in the generated code



[...]
> @@ -1095,11 +1147,28 @@
>              switch(c->dstFormat)
>              {
>              case PIX_FMT_RGB32:
> +                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
> +                    YSCALEYUV2PACKEDX
> +                    YSCALEYUV2RGBX
> +                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
> +                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
> +                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
> +                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET)
> +                    "psraw                        $3, %%mm1         \n\t"
> +                    "psraw                        $3, %%mm7         \n\t"
> +                    "packuswb                  %%mm7, %%mm1         \n\t"
> +                    "movq               "U_TEMP"(%0), %%mm2         \n\t"
> +                    "movq               "V_TEMP"(%0), %%mm4         \n\t"
> +                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"

it seems that YSCALEYUV2PACKEDX_YA could be changed to take teh registers as
parameters to avoid the movq TEMP stuff ?
also same applies to all other cases where its possible




[...]
> @@ -1191,6 +1260,32 @@
>          {
>              //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
>              case PIX_FMT_RGB32:
> +                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
> +                    *(uint16_t **)(&c->u_temp)=abuf0;
> +                    *(uint16_t **)(&c->v_temp)=abuf1;
> +                    __asm__ volatile(
> +                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
> +                    "mov        %4, %%"REG_b"               \n\t"
> +                    "push %%"REG_BP"                        \n\t"
> +                    YSCALEYUV2RGB(%%REGBP, %5)
> +                    "push                   %0              \n\t"
> +                    "push                   %1              \n\t"
> +                    "mov          "U_TEMP"(%5), %0          \n\t"
> +                    "mov          "V_TEMP"(%5), %1          \n\t"
> +                    YSCALEYUV2RGB_YA(%%REGBP, %5)
> +                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
> +                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
> +                    "packuswb            %%mm7, %%mm1       \n\t"
> +                    "pop                    %1              \n\t"
> +                    "pop                    %0              \n\t"
> +                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
> +                    "pop %%"REG_BP"                         \n\t"
> +                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
> +
> +                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
> +                    "a" (&c->redDither)

the push/pop in the inner loop can at least on x86_64 be avoided


[...]

> +static inline void RENAME(bgr32ToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
> +    int i;
> +    for (i=0; i<width; i++){
> +#ifdef WORDS_BIGENDIAN
> +        dst[i]= ((uint32_t *)src)[i]&0xFF;
> +#else
> +        dst[i]= ((uint32_t *)src)[i]>>24;
> +#endif
> +    }
> +}
> +
> +static inline void RENAME(bgr32_1ToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
> +    int i;
> +    for (i=0; i<width; i++){
> +#ifdef WORDS_BIGENDIAN
> +        dst[i]= ((uint32_t *)src)[i]>>24;
> +#else
> +        dst[i]= ((uint32_t *)src)[i]&0xFF;
> +#endif
> +    }
> +}
> +

for(i=0; i<w; i++)
    dst[i]= src[4*i];
and adjust src when calling
1 function less


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When the tyrant has disposed of foreign enemies by conquest or treaty, and
there is nothing more to fear from them, then he is always stirring up
some war or other, in order that the people may require a leader. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090302/e687d302/attachment.pgp>



More information about the ffmpeg-devel mailing list