[FFmpeg-devel] swscale : add bitexact conv for grayf32 and gray16 to f32 conv

Michael Niedermayer michael at niedermayer.cc
Sat Aug 25 19:07:19 EEST 2018


On Thu, Aug 23, 2018 at 08:13:13PM +0200, Martin Vignali wrote:
[...]
>  swscale_internal.h |    2 -
>  utils.c            |   70 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 68 insertions(+), 4 deletions(-)
> 911c6d681b09ab719e2c8abe337887732c28b65e  0003-swscale-add-bit_exact-lut-creation-for-8bit-to-float.patch
> From e7b78d6416189a72695dac0680782a987c84b274 Mon Sep 17 00:00:00 2001
> From: Martin Vignali <martin.vignali at gmail.com>
> Date: Thu, 23 Aug 2018 18:40:54 +0200
> Subject: [PATCH 3/4] swscale : add bit_exact lut creation for 8bit to float
> 
> ---
>  libswscale/swscale_internal.h |  2 +-
>  libswscale/utils.c            | 70 +++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 68 insertions(+), 4 deletions(-)
> 
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3b6b682d5f..a9cd7bc8d7 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -336,7 +336,7 @@ typedef struct SwsContext {
>      uint32_t pal_yuv[256];
>      uint32_t pal_rgb[256];
>  
> -    float uint2float_lut[256];
> +    float *uint2float_lut; /*! store uint8 to float or uint16 to float */
>  
>      /**
>       * @name Scaled horizontal lines ring buffer.
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index 5e56371180..ce65467c35 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -1098,6 +1098,69 @@ static uint16_t * alloc_gamma_tbl(double e)
>      return tbl;
>  }
>  
> +static void inline fill_uint_to_float_lut(SwsContext *c, int bitdepth) {
> +    static const float float_mult8 = 1.0f / 255.0f;
> +    int i;
> +
> +    if (bitdepth == 8) { /*! fill uint8 to float lut */
> +        for (i = 0; i < 256; ++i){
> +            c->uint2float_lut[i] = (float)i * float_mult8;
> +        }
> +    } else { /*! unsupported bitdepth */
> +        av_assert0(0);
> +    }
> +}
> +
> +#define SIGN_EXP_MANT_TO_UINT32(sign, exp, mant) sign << 31 | exp << 23 | mant
> +
> +static void inline fill_uint_to_float_lut_bitexact(SwsContext *c, int bitdepth) {
> +    int i, j, exp, mant, div;
> +    uint32_t off_coeff_mant;
> +    uint32_t coeff_mant;
> +    uint32_t *lut = (uint32_t *)c->uint2float_lut;
> +    int min_loop = 1;
> +    int max_loop = 2;
> +
> +    if (bitdepth == 8) { /*! fill uint8 to float lut */
> +        lut[0] = 0;
> +        lut[1] = SIGN_EXP_MANT_TO_UINT32(0, 119, 32897);
> +
> +        exp = 119; /*! initial exp */
> +        off_coeff_mant = coeff_mant = 4210752;
> +
> +        for (j = 0; j < 7; ++j) {
> +            exp++;
> +            min_loop *= 2;
> +            max_loop *= 2;
> +            div = (max_loop - 1 - min_loop);
> +
> +            for (i = min_loop; i < max_loop; ++i) {
> +                mant = (i - min_loop) * coeff_mant / div + 32897;
> +                lut[i] = SIGN_EXP_MANT_TO_UINT32(0, exp, mant);
> +            }
> +
> +            if (j < 6) {
> +                off_coeff_mant >>= 1;
> +                coeff_mant |= off_coeff_mant;
> +            }
> +        }
> +        lut[255] = SIGN_EXP_MANT_TO_UINT32(0, 127, 0);
> +    } else { /*! unsupported bitdepth */
> +            av_assert0(0);
> +    }
> +}
> +

> +static void alloc_uint_to_float_lut(SwsContext *c, int bitdepth) {
> +    c->uint2float_lut = (float*)av_malloc(sizeof(float) * 1 << bitdepth);
> +    if (!c->uint2float_lut)
> +        return;
> +    if (c->flags & SWS_BITEXACT) {
> +        fill_uint_to_float_lut_bitexact(c, bitdepth);
> +    } else {
> +        fill_uint_to_float_lut(c, bitdepth);
> +    }

is this complexity really needed ?

does the LUT generation code produce different results on platforms ?
if so i would suggest to try to use double and to add a small offset if needed

a 8bit table has 256 entries, a 16bit table 65536
a difference would occur if a source value from 64bit floats gets rounded
differently to 32bit floats. If this occurs a small offset could be added
so that none of the 65536 cases end up close to being between 2 32bit floats

This would avoid teh rather complex code if it works

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Democracy is the form of government in which you can choose your dictator
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20180825/ba2ef422/attachment.sig>


More information about the ffmpeg-devel mailing list