[FFmpeg-devel] [PATCH 4/4] lavc/vvc_mc R-V V sad
flow gg
hlefthleft at gmail.com
Tue Nov 26 05:01:56 EET 2024
Updated them.
Rémi Denis-Courmont <remi at remlab.net> 于2024年11月18日周一 04:23写道:
> Le sunnuntaina 17. marraskuuta 2024, 15.16.23 EET uk7b at foxmail.com a
> écrit :
> > From: sunyuechi <sunyuechi at iscas.ac.cn>
> >
> > k230 banana_f3
> > sad_8x16_c: 385.9 ( 1.00x) 403.1 ( 1.00x)
> > sad_8x16_rvv_i32: 108.1 ( 3.57x) 100.8 ( 4.00x)
> > sad_16x8_c: 376.6 ( 1.00x) 392.6 ( 1.00x)
> > sad_16x8_rvv_i32: 89.3 ( 4.21x) 69.5 ( 5.64x)
> > sad_16x16_c: 746.6 ( 1.00x) 757.3 ( 1.00x)
> > sad_16x16_rvv_i32: 135.8 ( 5.50x) 121.5 ( 6.23x)
> > ---
> > libavcodec/riscv/vvc/Makefile | 3 +-
> > libavcodec/riscv/vvc/vvc_sad_rvv.S | 58 ++++++++++++++++++++++++++++++
> > libavcodec/riscv/vvc/vvcdsp_init.c | 7 ++++
> > 3 files changed, 67 insertions(+), 1 deletion(-)
> > create mode 100644 libavcodec/riscv/vvc/vvc_sad_rvv.S
> >
> > diff --git a/libavcodec/riscv/vvc/Makefile
> b/libavcodec/riscv/vvc/Makefile
> > index 582b051579..6b9c618b33 100644
> > --- a/libavcodec/riscv/vvc/Makefile
> > +++ b/libavcodec/riscv/vvc/Makefile
> > @@ -1,2 +1,3 @@
> > OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvcdsp_init.o
> > -RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o
> > +RVV-OBJS-$(CONFIG_VVC_DECODER) += riscv/vvc/vvc_mc_rvv.o \
> > + riscv/vvc/vvc_sad_rvv.o
> > diff --git a/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > b/libavcodec/riscv/vvc/vvc_sad_rvv.S new file mode 100644
> > index 0000000000..acdc78d20d
> > --- /dev/null
> > +++ b/libavcodec/riscv/vvc/vvc_sad_rvv.S
> > @@ -0,0 +1,58 @@
> > +/*
> > + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> > (ISCAS). + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA + */
> > +
> > +#include "libavcodec/riscv/h26x/asm.S"
> > +
> > +.macro func_sad vlen
> > +func ff_vvc_sad_rvv_\vlen, zve32x, zbb, zba
> > + lpad 0
> > + slli t2, a3, 7 // dy * 128
> > + li t1, 4*128+4
> > + add t0, t2, a2 // dy * 128 + dx
> > + sub t1, t1, t2
> > + sub t1, t1, a2
> > + sh1add a0, t0, a0
> > + sh1add a1, t1, a1
> > + vsetvlstatic32 1, \vlen
> > + li t0, 16
> > + vmv.s.x v0, zero
> > + beq a4, t0, SAD\vlen\()16
> > + .irp w,8,16
> > +SAD\vlen\w:
> > + vsetvlstatic16 \w, \vlen
> > + addi a5, a5, -2
> > + vle16.v v8, (a0)
> > + vle16.v v16, (a1)
> > + vwsub.vv v24, v8, v16
>
> It might be faster to stick to 16-bit and only widen when accumulating.
> The
> distance between two 16-bit values is an unsigned 16-bit value afterall.
>
> > + vsetvlstatic32 \w, \vlen
> > + vneg.v v16, v24
> > + addi a0, a0, 2 * 128 * 2
> > + vmax.vv v24, v24, v16
> > + vredsum.vs v0, v24, v0
>
> Don't calculate a reduction in a loop.
>
> > + addi a1, a1, 2 * 128 * 2
> > + bnez a5, SAD\vlen\w
> > + vmv.x.s a0, v0
> > + ret
> > + .endr
> > +endfunc
> > +.endm
> > +
> > +func_sad 256
> > +func_sad 128
> > diff --git a/libavcodec/riscv/vvc/vvcdsp_init.c
> > b/libavcodec/riscv/vvc/vvcdsp_init.c index 2fe93029aa..1b228cc9f5 100644
> > --- a/libavcodec/riscv/vvc/vvcdsp_init.c
> > +++ b/libavcodec/riscv/vvc/vvcdsp_init.c
> > @@ -59,6 +59,9 @@ DMVR_PROTOTYPES(8, rvv_256)
> > c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_##opt; \
> > } while (0)
> >
> > +int ff_vvc_sad_rvv_128(const int16_t *src0, const int16_t *src1, int dx,
> > int dy, int block_w, int block_h); +int ff_vvc_sad_rvv_256(const int16_t
> > *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); +
> > #define PUT_PIXELS_PROTOTYPES2(bd, opt)
>
> > \ void bf(ff_vvc_put_pixels, bd, opt)(int16_t *dst,
>
> > \ const uint8_t *_src, const ptrdiff_t _src_stride,
>
> > \ @@ -97,6 +100,8 @@ void
> > ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd) FUNCS(LUMA,
> > rvv_256);
> > FUNCS(CHROMA, rvv_256);
> > break;
> > + case 10:
> > + c->inter.sad = ff_vvc_sad_rvv_256;
> > default:
> > break;
> > }
> > @@ -111,6 +116,8 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c,
> const
> > int bd) FUNCS(LUMA, rvv_128);
> > FUNCS(CHROMA, rvv_128);
> > break;
> > + case 10:
> > + c->inter.sad = ff_vvc_sad_rvv_128;
> > default:
> > break;
> > }
>
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list