[FFmpeg-devel] [PATCH v3 1/4] swscale/input: add rgbaf32 input support
Mark Reid
mindmark at gmail.com
Mon Nov 14 03:50:37 EET 2022
On Sun, Nov 13, 2022 at 1:25 PM Michael Niedermayer <michael at niedermayer.cc>
wrote:
> On Wed, Nov 02, 2022 at 09:00:07PM -0700, mindmark at gmail.com wrote:
> > From: Mark Reid <mindmark at gmail.com>
> >
> > ---
> > libswscale/input.c | 172 +++++++++++++++++++++++++++++++++++++++++++++
> > libswscale/utils.c | 4 ++
> > 2 files changed, 176 insertions(+)
> >
> > diff --git a/libswscale/input.c b/libswscale/input.c
> > index 7ff7bfaa01..4683284b0b 100644
> > --- a/libswscale/input.c
> > +++ b/libswscale/input.c
> > @@ -1284,6 +1284,136 @@ static void rgbaf16##endian_name##ToA_c(uint8_t
> *_dst, const uint8_t *_src, cons
> > rgbaf16_funcs_endian(le, 0)
> > rgbaf16_funcs_endian(be, 1)
> >
> > +#define rdpx(src) (is_be ? av_int2float(AV_RB32(&src)):
> av_int2float(AV_RL32(&src)))
> > +
> > +static av_always_inline void rgbaf32ToUV_half_endian(uint16_t *dstU,
> uint16_t *dstV, int is_be,
> > + const float *src,
> int width,
> > + int32_t *rgb2yuv,
> int comp)
> > +{
> > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu =
> rgb2yuv[BU_IDX];
> > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv =
> rgb2yuv[BV_IDX];
> > + int i;
> > + for (i = 0; i < width; i++) {
>
> > + int r = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+0]),
> 0.0f, 65535.0f)) +
> > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+4]),
> 0.0f, 65535.0f))) >> 1;
> > + int g = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+1]),
> 0.0f, 65535.0f)) +
> > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+5]),
> 0.0f, 65535.0f))) >> 1;
> > + int b = (lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+2]),
> 0.0f, 65535.0f)) +
> > + lrintf(av_clipf(65535.0f * rdpx(src[i*(comp*2)+6]),
> 0.0f, 65535.0f))) >> 1;
> > +
> > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1)))
> >> RGB2YUV_SHIFT;
> > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1)))
> >> RGB2YUV_SHIFT;
>
> I would expect this sort of code to use 2 lrintf() and 2 av_clipf() not 6
>
>
ya it is a bit excessive, I'll just remove the _half conversions for now,
they aren't strictly necessary as far as I can tell.
>
> > + }
> > +}
> > +
> > +static av_always_inline void rgbaf32ToUV_endian(uint16_t *dstU,
> uint16_t *dstV, int is_be,
> > + const float *src, int
> width,
> > + int32_t *rgb2yuv, int
> comp)
> > +{
> > + int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu =
> rgb2yuv[BU_IDX];
> > + int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv =
> rgb2yuv[BV_IDX];
> > + int i;
> > + for (i = 0; i < width; i++) {
> > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f,
> 65535.0f));
> > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f,
> 65535.0f));
> > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f,
> 65535.0f));
> > +
> > + dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1)))
> >> RGB2YUV_SHIFT;
> > + dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1)))
> >> RGB2YUV_SHIFT;
> > + }
> > +}
> > +
>
> > +static av_always_inline void rgbaf32ToY_endian(uint16_t *dst, const
> float *src, int is_be,
> > + int width, int32_t
> *rgb2yuv, int comp)
> > +{
> > + int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by =
> rgb2yuv[BY_IDX];
> > + int i;
> > + for (i = 0; i < width; i++) {
> > + int r = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+0]), 0.0f,
> 65535.0f));
> > + int g = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+1]), 0.0f,
> 65535.0f));
> > + int b = lrintf(av_clipf(65535.0f * rdpx(src[i*comp+2]), 0.0f,
> 65535.0f));
> > +
>
> > + dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >>
> RGB2YUV_SHIFT;
>
> there is one output so there should be only need for one clip and one
> float->int
>
This is matching the f32 planar version. I think I was paranoid about
things being bitexact for tests and that's why it's currently being done
this way.
I'll see what happens if I introduce more float operations, could I perhaps
do this in a later patch? some asm might have to change too.
> thx
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Any man who breaks a law that conscience tells him is unjust and willingly
> accepts the penalty by staying in jail in order to arouse the conscience
> of
> the community on the injustice of the law is at that moment expressing the
> very highest respect for law. - Martin Luther King Jr
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list