[FFmpeg-devel] [PATCH v3 6/9] lavc/vp9dsp: R-V V mc bilin h v
Rémi Denis-Courmont
remi at remlab.net
Sat May 18 18:56:34 EEST 2024
Le maanantaina 13. toukokuuta 2024, 19.59.23 EEST uk7b at foxmail.com a écrit :
> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
> C908:
> vp9_avg_bilin_4h_8bpp_c: 5.2
> vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
> vp9_avg_bilin_4v_8bpp_c: 5.5
> vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
> vp9_avg_bilin_8h_8bpp_c: 20.0
> vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
> vp9_avg_bilin_8v_8bpp_c: 21.0
> vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
> vp9_avg_bilin_16h_8bpp_c: 78.2
> vp9_avg_bilin_16h_8bpp_rvv_i64: 9.0
> vp9_avg_bilin_16v_8bpp_c: 82.0
> vp9_avg_bilin_16v_8bpp_rvv_i64: 9.0
> vp9_avg_bilin_32h_8bpp_c: 325.5
> vp9_avg_bilin_32h_8bpp_rvv_i64: 26.2
> vp9_avg_bilin_32v_8bpp_c: 326.2
> vp9_avg_bilin_32v_8bpp_rvv_i64: 26.2
> vp9_avg_bilin_64h_8bpp_c: 1265.7
> vp9_avg_bilin_64h_8bpp_rvv_i64: 91.5
> vp9_avg_bilin_64v_8bpp_c: 1317.0
> vp9_avg_bilin_64v_8bpp_rvv_i64: 91.2
> vp9_put_bilin_4h_8bpp_c: 4.5
> vp9_put_bilin_4h_8bpp_rvv_i64: 1.7
> vp9_put_bilin_4v_8bpp_c: 4.7
> vp9_put_bilin_4v_8bpp_rvv_i64: 1.7
> vp9_put_bilin_8h_8bpp_c: 17.0
> vp9_put_bilin_8h_8bpp_rvv_i64: 3.5
> vp9_put_bilin_8v_8bpp_c: 18.0
> vp9_put_bilin_8v_8bpp_rvv_i64: 3.5
> vp9_put_bilin_16h_8bpp_c: 65.2
> vp9_put_bilin_16h_8bpp_rvv_i64: 7.5
> vp9_put_bilin_16v_8bpp_c: 85.7
> vp9_put_bilin_16v_8bpp_rvv_i64: 7.5
> vp9_put_bilin_32h_8bpp_c: 257.5
> vp9_put_bilin_32h_8bpp_rvv_i64: 23.5
> vp9_put_bilin_32v_8bpp_c: 274.5
> vp9_put_bilin_32v_8bpp_rvv_i64: 23.5
> vp9_put_bilin_64h_8bpp_c: 1040.5
> vp9_put_bilin_64h_8bpp_rvv_i64: 82.5
> vp9_put_bilin_64v_8bpp_c: 1108.7
> vp9_put_bilin_64v_8bpp_rvv_i64: 82.2
> ---
> libavcodec/riscv/vp9_mc_rvv.S | 43 ++++++++++++++++++++++++++++++++++
> libavcodec/riscv/vp9dsp_init.c | 21 +++++++++++++++++
> 2 files changed, 64 insertions(+)
>
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> index 5d917e7b98..986cc3760d 100644
> --- a/libavcodec/riscv/vp9_mc_rvv.S
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -53,6 +53,49 @@ func ff_avg\len\()_rvv, zve32x
> endfunc
> .endm
>
> +.macro bilin_load dst len op type mn
> +.ifc \type,v
> + add t5, a2, a3
> +.elseif \type == h
> + addi t5, a2, 1
> +.endif
> + vle8.v v8, (a2)
> + vle8.v v0, (t5)
> + vwmulu.vx v16, v0, \mn
> + vwmaccsu.vx v16, t1, v8
> + vwadd.wx v16, v16, t4
> + vnsra.wi v16, v16, 4
> + vadd.vv \dst, v16, v8
> +.ifc \op,avg
> + vle8.v v16, (a0)
> + vaaddu.vv \dst, \dst, v16
> +.endif
> +.endm
> +
> +.macro bilin_h_v len op type mn
> +func ff_\op\()_bilin_\len\()\type\()_rvv, zve32x
> +.ifc \op,avg
> + csrwi vxrm, 0
> +.endif
> + vsetvlstatic8 \len t0 64
> + li t4, 8
> + neg t1, \mn
> +1:
> + addi a4, a4, -1
> + bilin_load v0, \len, \op, \type, \mn
> + vse8.v v0, (a0)
> + add a2, a2, a3
> + add a0, a0, a1
> + bnez a4, 1b
> +
> + ret
> +endfunc
> +.endm
> +
> .irp len 64, 32, 16, 8, 4
Missing comma after len
> copy_avg \len
> + .irp op put avg
> + bilin_h_v \len \op h a5
> + bilin_h_v \len \op v a6
> + .endr
> .endr
> diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
> index 1922484a1d..ec6db51774 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -63,6 +63,27 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext
> *dsp, int bpp) init_fpel(3, 8);
> init_fpel(4, 4);
>
> + dsp->mc[0][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_64v_rvv;
> + dsp->mc[0][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_64h_rvv;
> + dsp->mc[0][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_64v_rvv;
> + dsp->mc[0][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_64h_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_32v_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_32h_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_32v_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_32h_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_16v_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_16h_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_16v_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_16h_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_8v_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_8h_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_8v_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_8h_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_4v_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_4h_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_4v_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_4h_rvv;
> +
> #undef init_fpel
> }
> #endif
--
レミ・デニ-クールモン
http://www.remlab.net/
More information about the ffmpeg-devel
mailing list