[FFmpeg-devel] Extend/optimize RGB to RGB conversions funcs into rgb2rgb.c

yann.lepetitcorps at free.fr yann.lepetitcorps at free.fr
Sun Sep 9 01:30:11 CEST 2012


I have factorised into ff_fill_line_with_color() the pixel_step[0] and w *
pixel_step[0] computations when it's a packed format + the unroll of
the  "for (plane = 0; plane < 4; plane++)" loop when this is a planar format :

int ff_fill_line_with_color(uint8_t *line[4], int pixel_step[4], int w, uint8_t
dst_color[4],
                            enum PixelFormat pix_fmt, uint8_t rgba_color[4],
                            int *is_packed_rgba, uint8_t rgba_map_ptr[4])
{
    uint8_t rgba_map[4] = {0};
    int i, step;
    const AVPixFmtDescriptor *pix_desc = &av_pix_fmt_descriptors[pix_fmt];
    int hsub = pix_desc->log2_chroma_w;

    *is_packed_rgba = ff_fill_rgba_map(rgba_map, pix_fmt) >= 0;

    if (*is_packed_rgba) {

        step = (av_get_bits_per_pixel(pix_desc))>>3;
	w *= step;

	line[0] = av_malloc(w);
	pixel_step[0] = step;

        for (i = 0; i < 4; i++)
            dst_color[rgba_map[i]] = rgba_color[i];

        for (i = 0; i < w ; i += step)
            memcpy(line[0] + i, dst_color, step);

        if (rgba_map_ptr)
            memcpy(rgba_map_ptr, rgba_map, sizeof(uint8_t) * 4);

    } else {

#ifdef 0
        int plane;

        dst_color[0] = RGB_TO_Y_CCIR(rgba_color[0], rgba_color[1],
rgba_color[2]);
        dst_color[1] = RGB_TO_U_CCIR(rgba_color[0], rgba_color[1],
rgba_color[2], 0);
        dst_color[2] = RGB_TO_V_CCIR(rgba_color[0], rgba_color[1],
rgba_color[2], 0);
        dst_color[3] = rgba_color[3];

        for (plane = 0; plane < 4; plane++) {
            int line_size;
            int hsub1 = (plane == 1 || plane == 2) ? hsub : 0;

            pixel_step[plane] = 1;
            line_size = (w >> hsub1) * pixel_step[plane];
            line[plane] = av_malloc(line_size);
            memset(line[plane], dst_color[plane], line_size);
        }
#else

	pixel_step[0] = 1;
	line[0] = av_malloc(w);
	memset(line[0], RGB_TO_Y_CCIR(rgba_color[0], rgba_color[1], rgba_color[2]), w);

	step = w >> hsub;

	pixel_step[1] = 1;
	line[1] = av_malloc(step);
	memset(line[1], RGB_TO_U_CCIR(rgba_color[0], rgba_color[1], rgba_color[2]),
step);

	pixel_step[2] = 1;
	line[2] = av_malloc(step);
	memset(line[2], RGB_TO_V_CCIR(rgba_color[0], rgba_color[1], rgba_color[2]),
step);

	pixel_step[3] = 1;
	line[3] = av_malloc(w);
	memset(line[3], rgba_color[3], w);

#endif

    }

    return 0;
}

=> I think that similars factorisations can certainly to be make into
ff_draw_rectangle() and ff_copy_rectangle() funcs


@+
Yannoo

> I have make a verification on the 0.11.1.git version and found that the
> rgb32to24() and rgb24to32() are identics to the 0.8 version
>
> I think too that rgb[15,16]tobgr[24,32]() funcs can perhaps to be
> optimised by the use of one table conversion of 64K entries where the input
> rgb
> color with a format of 12/15/16 bits can to be used as an index into a
> conversion table that directly give us the converted 12/15/16/24/32 value in
> output
> (cf. this economize at lot of ">>", "<<", "&" or others "|" computations)
>
> Note, that the same idea can be used for rgb[12,15,16]tobgr[12,15,16] funcs
>
> I think too modify f_fill_line_with_color() func into
> libavfilter/drawutils.c  for to unroll the "for (plane = 0; plane < 4;
> plane++)"
> loop and to economize the hsub1 computation half of the time when this isn't
> a
> packet format
> (same thing on ff_draw_rectangle() and  ff_copy_rectangle() funcs for to
> always
> economize a lot of >> computations and pixelstep[plane] access)
>
> => I make a test and benchmark this for to see if this is really more
> speed than the actual implementation of them
>
>
> PS : I have see some new blending funcs such as blend_line/rectangle/pixel()
> into drawutils.c, so the overlay extension seem already to be here :)
>
>
> @+
> Yannoo
>
>
> > Hi,
> >
> > I plan to use FFMPEG on Android plateforms and have begin to look about
> > possibles rgb[8/16/24/32]torgb[8/16/24/32]() funcs optimisations that I
> have
> > found on the FFMPEG 0.8 version forAndroid.
> >
> > I think that rgb24to32() and rgb32to24() funcs in the libswscale/rgb2rgb.c
> > source file can be rewrited something like this :
> > (it's very more simpler to understand how they work on this form + this is
> > certainly very more speed)
> >
> > void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size )
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, psrc +=3, dst +=4 )
> >     {
> > #if HAVE_BIGENDIAN
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) */
> >         dst[0] = 255;
> >         dst[1] = psrc[0];
> >         dst[2] = psrc[1];
> >         dst[3] = psrc[2];
> > #else
> >         dst[0] = psrc[2];
> >         dst[1] = psrc[1];
> >         dst[2] = psrc[0];
> >         dst[3] = 255;
> > #endif
> >     }
> > }
> >
> > void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >    for ( i = 0 ; i  < src_size ; i += 4 , psrc += 4, dst +=3  )
> >    {
> > #if HAVE_BIGENDIAN
> >         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
> >         dst[0] = psrc[1];
> >         dst[1] = psrc[2];
> >         dst[2] = psrc[3];
> > #else
> >         dst[0] = psrc[2];
> >         dst[1] = psrc[1];
> >         dst[2] = psrc[0];
> > #endif
> >     }
> > }
> >
> >
> > And make too some fonctions for to extend conversions's possibilities :
> >
> > void rgb24to32_alpha(const uint8_t *src, uint8_t *dst, int src_size,
> uint8_t
> > alpha)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= alpha,R,G,B) */
> >
> > #if HAVE_BIGENDIAN
> >         dst[0] = alpha;
> >         dst[1] = psrc[0];
> >         dst[2] = psrc[1];
> >         dst[3] = psrc[2];
> > #else
> >         dst[0] = psrc[2];
> >         dst[1] = psrc[1];
> >         dst[2] = psrc[0];
> >         dst[3] = alpha;
> > #endif
> >     }
> > }
> >
> >
> > void rgb24to24_luminance(const uint8_t *src, uint8_t *dst, int src_size,
> int
> > minlum)
> > {
> >     int i, lum;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) [or no update if
> > luminance(RGB) < overlay] */
> >
> > #if HAVE_BIGENDIAN
> > 	lum = (66 * psrc[0] + 129 * psrc[1] + 25 * psrc[2] + 128) >> 8 + 16;
> > #else
> > 	lum = (66 * psrc[2] + 129 * psrc[1] + 25 * psrc[0] + 128) >> 8 + 16;
> > #endif
> >
> > 	if ( lum >= minlum )
> > 	{
> >         	dst[0] = psrc[0];
> >         	dst[1] = psrc[1];
> >         	dst[2] = psrc[2];
> > 	}
> >     }
> > }
> >
> > void rgb24to32_overlay(const uint8_t *src, uint8_t *dst, int src_size, int
> > overlay)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) [or no update if dst
> alpha
> > already > overlay] */
> >
> > #if HAVE_BIGENDIAN
> >
> > 	if ( dst[0] < overlay )
> > 	{
> > 		dst[0] = 255;
> >         	dst[1] = psrc[0];
> >         	dst[2] = psrc[1];
> >         	dst[3] = psrc[2];
> > 	}
> > #else
> > 	if ( dst[3] < overlay )
> > 	{
> >         	dst[0] = psrc[2];
> > 	        dst[1] = psrc[1];
> >         	dst[2] = psrc[0];
> > 		dst[3] = 255;
> > 	}
> > #endif
> >     }
> > }
> >
> > void rgb24to32_luminance(const uint8_t *src, uint8_t *dst, int src_size,
> int
> > minlum)
> > {
> >     int i, lum;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) [or no update if
> > luminance(RGB) < overlay] */
> >
> > #if HAVE_BIGENDIAN
> >
> > 	lum = (66 * psrc[0] + 129 * psrc[1] + 25 * psrc[2] + 128) >> 8 + 16;
> >
> > 	if ( lum >= minlum )
> > 	{
> > 		dst[0] = 255;
> >         	dst[1] = psrc[0];
> >         	dst[2] = psrc[1];
> >         	dst[3] = psrc[2];
> > 	}
> > #else
> >
> > 	lum = (66 * psrc[2] + 129 * psrc[1] + 25 * psrc[0] + 128) >> 8 + 16;
> >
> > 	if ( lum >= minlum )
> > 	{
> >         	dst[0] = psrc[2];
> > 	        dst[1] = psrc[1];
> >         	dst[2] = psrc[0];
> > 		dst[3] = 255;
> > 	}
> > #endif
> >     }
> > }
> >
> >
> > void rgb32to24_overlay(const uint8_t *src, uint8_t *dst, int src_size,
> > uint8_t
> > overlay)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >    for ( i = 0 ; i  < src_size ; i += 4 , psrc += 4, dst +=3  )
> >    {
> >         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) [or nothing if src alpha <
> > overlay] */
> >
> > #if HAVE_BIGENDIAN
> >
> > 	if( psrc[0] >= overlay )
> > 	{
> > 	        dst[0] = psrc[1];
> > 	        dst[1] = psrc[2];
> > 	        dst[2] = psrc[3];
> > 	}
> > #else
> > 	if( psrc[3] >= overlay )
> > 	{
> > 	        dst[0] = psrc[2];
> > 	        dst[1] = psrc[1];
> > 	        dst[2] = psrc[0];
> > 	}
> > #endif
> >     }
> > }
> >
> > void rgb32to32_overlay(const uint8_t *src, uint8_t *dst, int src_size, int
> > overlay)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) [or no update if dst
> alpha
> > already > overlay] */
> >
> > #if HAVE_BIGENDIAN
> >
> > 	if ( src[0] < overlay )
> > 	{
> > 		dst[0] = 255;
> >         	dst[1] = psrc[1];
> >         	dst[2] = psrc[2];
> >         	dst[3] = psrc[3];
> > 	}
> > #else
> > 	if ( src[3] < overlay )
> > 	{
> >         	dst[0] = psrc[0];
> > 	        dst[1] = psrc[1];
> >         	dst[2] = psrc[2];
> > 		dst[3] = 255;
> > 	}
> > #endif
> >     }
> > }
> >
> > void rgb32to32_overlay2(const uint8_t *src, uint8_t *dst, int src_size, int
> > overlay)
> > {
> >     int i;
> >     uint8_t *psrc = src;
> >
> >     for ( i = 0 ; i < src_size ; i += 3, src +=3, dst +=4 )
> >     {
> >
> >         /* RGB24 (= R,G,B) -> BGR32 (= 255,R,G,B) [or no update if src
> alpha
> > >
> > dst alpha] */
> >
> > #if HAVE_BIGENDIAN
> >
> > 	if ( (src[0] & overlay)  >= (dst[0] & overlay) )
> > 	{
> > 		dst[0] = psrc[0];
> >         	dst[1] = psrc[1];
> >         	dst[2] = psrc[2];
> >         	dst[3] = psrc[3];
> > 	}
> > #else
> > 	if ( (src[3] & overlay) >= (dst[3] & overlay) )
> > 	{
> >         	dst[0] = psrc[0];
> > 	        dst[1] = psrc[1];
> >         	dst[2] = psrc[2];
> > 		dst[3] = psrc|3];
> > 	}
> > #endif
> >     }
> > }
> >
> > Was the 0.8 version of FFMPEG the more adapted for Android developments or
> > newers version of FFMPEG are better ?
> > (I have found this version into a tuto that explain how to use FFMPEG with
> > JNI
> > calls on Android plateform)
> >
> >
> > @+
> > Yannoo
> >
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>




More information about the ffmpeg-devel mailing list