[FFmpeg-devel] [RFC] Alpha support

Cédric Schieli cschieli
Sun Feb 1 16:52:02 CET 2009


[...]

> -    case PIX_FMT_RGB32:\
> > -    case PIX_FMT_BGR32:\
> > -    case PIX_FMT_RGB32_1:\
> > -    case PIX_FMT_BGR32_1:\
> > -        func(uint32_t)\
> > -            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
> > -            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
> > -        }                \
> > +    case PIX_FMT_RGBA:\
> > +    case PIX_FMT_BGRA:\
> > +        if (!CONFIG_SMALL && c->alpPixBuf)\
> > +        {\
> > +            func(uint32_t,1)\
> > +                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] +
> (A1<<24);\
> > +                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] +
> (A2<<24);\
> > +            }\
> > +        }else{\
> > +            func(uint32_t,CONFIG_SMALL)\
> > +                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] +
> (CONFIG_SMALL ? (A1<<24) : 0);\
> > +                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] +
> (CONFIG_SMALL ? (A2<<24) : 0);\
> > +            }\
> > +        }\
> > +        break;\
> > +    case PIX_FMT_ARGB:\
> > +    case PIX_FMT_ABGR:\
>
> is it faster the way you wrote it compared to a table that does <<24 vs.
> <<0 ?
> iam asking because the table would lead to simpler and less duplicated code
>

I've tried the table approach with this :

SwsContext *sws_getContext(...){
...
    if (c->alpPixBuf){
        int j = (c->dstFormat == PIX_FMT_RGBA || c->dstFormat ==
PIX_FMT_BGRA) ? 24 : 0;
        for(i=0; i<256; i++)
            c->table_A[i] = (i<<j);
    }else{
        memset(c->table_A, 0, sizeof(c->table_A));
    }
...
}

#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
    switch(c->dstFormat)\
    {\
    case PIX_FMT_RGB32:\
    case PIX_FMT_BGR32:\
    case PIX_FMT_RGB32_1:\
    case PIX_FMT_BGR32_1:\
        if (!CONFIG_SMALL && c->alpPixBuf){\
            uint32_t *a = c->table_A;\
            func(uint32_t,1)\
START_TIMER\
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + a[A1];\
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + a[A2];\
STOP_TIMER(__func__)\
            }\
        }else{\
            int needAlpha = (int)c->alpPixBuf;\
            uint32_t *a = c->table_A;\
            func(uint32_t,CONFIG_SMALL ? needAlpha : 0)\
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] +
(CONFIG_SMALL ? a[A1] : 0);\
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] +
(CONFIG_SMALL ? a[A2] : 0);\
            }\
        }\
        break;\
...

and benchmarked it with : (rgb32.png is a 640x400 RGB32 image containing
alpha information)
for i in $(seq 1 100); do ./ffmpeg -sws_flags +bitexact -i /tmp/rgb32.png -s
2048x8192 /tmp/out.png 2>&1 | grep dezi | tail -n 1; done

the result is an average of 420.82 dezicycles


in my original approach :

#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
    switch(c->dstFormat)\
    {\
    case PIX_FMT_RGBA:\
    case PIX_FMT_BGRA:\
        if (!CONFIG_SMALL && c->alpPixBuf)\
        {\
            func(uint32_t,1)\
START_TIMER\
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
STOP_TIMER(__func__)\
            }\
        }else{\
            int needAlpha = (int)c->alpPixBuf;\
            func(uint32_t,CONFIG_SMALL ? needAlpha : 0)\
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] +
(CONFIG_SMALL ? (A1<<24) : 0);\
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] +
(CONFIG_SMALL ? (A2<<24) : 0);\
            }\
        }\
        break;\
...

the same benchmark gives an average of 419.93 dezicycles

So there is a tiny (but still existing) penalty to the table approach.


Regards,
C?dric Schieli
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel at mplayerhq.hu
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-devel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: benchmark-table.log
Type: text/x-log
Size: 5999 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090201/d5493194/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: benchmark-case.log
Type: text/x-log
Size: 5999 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090201/d5493194/attachment-0001.bin>



More information about the ffmpeg-devel mailing list