[FFmpeg-devel] [PATCH v4 2/4] lavc/vp9dsp: R-V V mc bilin hv
flow gg
hlefthleft at gmail.com
Sat Jun 15 14:52:12 EEST 2024
> Copying vectors is rarely justified - mostly only before destructive
> instructions such as FMA.
It is slightly different from VP8. In VP8, many scalar values are positive,
so the related calculations can be easily replaced. However, in this
context of VP9, since t2 is a negative number, vwmaccsu is required.
Therefore, unlike the logic in VP8, we cannot use vwmulu.vx before
bilin_load to avoid vmv.
<uk7b at foxmail.com> 于2024年6月15日周六 19:51写道:
> From: sunyuechi <sunyuechi at iscas.ac.cn>
>
> C908 X60
> vp9_avg_bilin_4hv_8bpp_c : 10.7 9.5
> vp9_avg_bilin_4hv_8bpp_rvv_i32 : 4.0 3.5
> vp9_avg_bilin_8hv_8bpp_c : 38.5 34.2
> vp9_avg_bilin_8hv_8bpp_rvv_i32 : 7.2 6.5
> vp9_avg_bilin_16hv_8bpp_c : 147.2 130.5
> vp9_avg_bilin_16hv_8bpp_rvv_i32 : 14.5 12.7
> vp9_avg_bilin_32hv_8bpp_c : 574.2 509.7
> vp9_avg_bilin_32hv_8bpp_rvv_i32 : 42.5 38.0
> vp9_avg_bilin_64hv_8bpp_c : 2321.2 2017.7
> vp9_avg_bilin_64hv_8bpp_rvv_i32 : 163.5 131.0
> vp9_put_bilin_4hv_8bpp_c : 10.0 8.7
> vp9_put_bilin_4hv_8bpp_rvv_i32 : 3.5 3.0
> vp9_put_bilin_8hv_8bpp_c : 35.2 31.2
> vp9_put_bilin_8hv_8bpp_rvv_i32 : 6.5 5.7
> vp9_put_bilin_16hv_8bpp_c : 134.0 119.0
> vp9_put_bilin_16hv_8bpp_rvv_i32 : 12.7 11.5
> vp9_put_bilin_32hv_8bpp_c : 538.5 464.2
> vp9_put_bilin_32hv_8bpp_rvv_i32 : 39.7 35.2
> vp9_put_bilin_64hv_8bpp_c : 2111.7 1833.2
> vp9_put_bilin_64hv_8bpp_rvv_i32 : 138.5 122.5
> ---
> libavcodec/riscv/vp9_mc_rvv.S | 38 +++++++++++++++++++++++++++++++++-
> libavcodec/riscv/vp9dsp_init.c | 10 +++++++++
> 2 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
> index fb7377048a..5241562531 100644
> --- a/libavcodec/riscv/vp9_mc_rvv.S
> +++ b/libavcodec/riscv/vp9_mc_rvv.S
> @@ -147,6 +147,40 @@ func ff_\op\()_vp9_bilin_64\type\()_rvv, zve32x
> endfunc
> .endm
>
> +.macro bilin_hv op
> +func ff_\op\()_vp9_bilin_64hv_rvv, zve32x
> + vsetvlstatic8 64, t0, 64
> +.Lbilin_hv\op:
> +.ifc \op,avg
> + csrwi vxrm, 0
> +.endif
> + neg t1, a5
> + neg t2, a6
> + li t4, 8
> + bilin_load_h v24, put, a5
> + add a2, a2, a3
> +1:
> + addi a4, a4, -1
> + bilin_load_h v4, put, a5
> + vwmulu.vx v16, v4, a6
> + vwmaccsu.vx v16, t2, v24
> + vwadd.wx v16, v16, t4
> + vnsra.wi v16, v16, 4
> + vadd.vv v0, v16, v24
> +.ifc \op,avg
> + vle8.v v16, (a0)
> + vaaddu.vv v0, v0, v16
> +.endif
> + vse8.v v0, (a0)
> + vmv.v.v v24, v4
> + add a2, a2, a3
> + add a0, a0, a1
> + bnez a4, 1b
> +
> + ret
> +endfunc
> +.endm
> +
> .irp len, 64, 32, 16, 8, 4
> copy_avg \len
> .endr
> @@ -155,6 +189,8 @@ bilin_h_v put, h, a5
> bilin_h_v avg, h, a5
> bilin_h_v put, v, a6
> bilin_h_v avg, v, a6
> +bilin_hv put
> +bilin_hv avg
>
> .macro func_bilin_h_v len, op, type
> func ff_\op\()_vp9_bilin_\len\()\type\()_rvv, zve32x
> @@ -165,7 +201,7 @@ endfunc
>
> .irp len, 32, 16, 8, 4
> .irp op, put, avg
> - .irp type, h, v
> + .irp type, h, v, hv
> func_bilin_h_v \len, \op, \type
> .endr
> .endr
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index 9606d8545f..b3700dfb08 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -83,6 +83,16 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext
> *dsp, int bpp)
> dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_4h_rvv;
> dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_4v_rvv;
> dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_4h_rvv;
> + dsp->mc[0][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_64hv_rvv;
> + dsp->mc[0][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_64hv_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_32hv_rvv;
> + dsp->mc[1][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_32hv_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_16hv_rvv;
> + dsp->mc[2][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_16hv_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_8hv_rvv;
> + dsp->mc[3][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_8hv_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_4hv_rvv;
> + dsp->mc[4][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_4hv_rvv;
>
> #undef init_fpel
> }
> --
> 2.45.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list