[FFmpeg-devel] [PATCH v2] avcodec/vvc: Don't use large array on stack
Zhao Zhili
quinkblack at foxmail.com
Fri Sep 20 16:44:13 EEST 2024
> On Sep 20, 2024, at 11:43, Zhao Zhili <quinkblack at foxmail.com> wrote:
>
> From: Zhao Zhili <zhilizhao at tencent.com>
>
> tmp_array in dmvr_hv takes 33024 bytes on stack, which can be
> dangerous.
I prefer version 3 personally
https://ffmpeg.org/pipermail/ffmpeg-devel/2024-September/333709.html
> ---
> libavcodec/vvc/ctu.h | 1 +
> libavcodec/vvc/dsp.h | 2 +-
> libavcodec/vvc/inter.c | 2 +-
> libavcodec/vvc/inter_template.c | 12 +++++++-----
> libavcodec/x86/vvc/vvcdsp_init.c | 8 ++++----
> tests/checkasm/vvc_mc.c | 17 +++++++++++++----
> 6 files changed, 27 insertions(+), 15 deletions(-)
>
> diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
> index eab4612561..eb3e51c7e5 100644
> --- a/libavcodec/vvc/ctu.h
> +++ b/libavcodec/vvc/ctu.h
> @@ -385,6 +385,7 @@ typedef struct VVCLocalContext {
> DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
> DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
> DECLARE_ALIGNED(32, int32_t, alf_gradient_tmp)[ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR];
> + DECLARE_ALIGNED(32, int16_t, dmvr_tmp)[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
>
> struct {
> int sbt_num_fourths_tb0; ///< SbtNumFourthsTb0
> diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
> index 635ebcafed..3594dfc5f5 100644
> --- a/libavcodec/vvc/dsp.h
> +++ b/libavcodec/vvc/dsp.h
> @@ -99,7 +99,7 @@ typedef struct VVCInterDSPContext {
>
> int (*sad)(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
> void (*dmvr[2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
> - intptr_t mx, intptr_t my, int width);
> + intptr_t mx, intptr_t my, int width, int16_t *tmp);
> } VVCInterDSPContext;
>
> struct VVCLocalContext;
> diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
> index 64a9dd1e46..48b633d580 100644
> --- a/libavcodec/vvc/inter.c
> +++ b/libavcodec/vvc/inter.c
> @@ -806,7 +806,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
> const int wrap_enabled = fc->ps.pps->r->pps_ref_wraparound_enabled_flag;
>
> MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
> - fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
> + fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w, lc->dmvr_tmp);
> }
>
> min_sad = fc->vvcdsp.inter.sad(tmp[L0], tmp[L1], dx, dy, block_w, block_h);
> diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
> index c073a73e76..fad1ba801f 100644
> --- a/libavcodec/vvc/inter_template.c
> +++ b/libavcodec/vvc/inter_template.c
> @@ -474,7 +474,8 @@ static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const i
>
> //8.5.3.2.2 Luma sample bilinear interpolation process
> static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
> - const int height, const intptr_t mx, const intptr_t my, const int width)
> + const int height, const intptr_t mx, const intptr_t my, const int width,
> + int16_t *tmp)
> {
> #if BIT_DEPTH != 10
> const pixel *src = (const pixel *)_src;
> @@ -502,7 +503,8 @@ static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_s
>
> //8.5.3.2.2 Luma sample bilinear interpolation process
> static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
> - const int height, const intptr_t mx, const intptr_t my, const int width)
> + const int height, const intptr_t mx, const intptr_t my, const int width,
> + int16_t *tmp)
> {
> const pixel *src = (const pixel*)_src;
> const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
> @@ -520,7 +522,8 @@ static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
>
> //8.5.3.2.2 Luma sample bilinear interpolation process
> static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
> - const int height, const intptr_t mx, const intptr_t my, const int width)
> + const int height, const intptr_t mx, const intptr_t my, const int width,
> + int16_t *tmp)
> {
> const pixel *src = (pixel*)_src;
> const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
> @@ -539,9 +542,8 @@ static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src
>
> //8.5.3.2.2 Luma sample bilinear interpolation process
> static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride,
> - const int height, const intptr_t mx, const intptr_t my, const int width)
> + const int height, const intptr_t mx, const intptr_t my, const int width, int16_t *tmp_array)
> {
> - int16_t tmp_array[(MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE];
> int16_t *tmp = tmp_array;
> const pixel *src = (const pixel*)_src;
> const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
> diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c
> index f3e2e3a27b..7ff3e2bdff 100644
> --- a/libavcodec/x86/vvc/vvcdsp_init.c
> +++ b/libavcodec/x86/vvc/vvcdsp_init.c
> @@ -90,13 +90,13 @@ AVG_PROTOTYPES(12, avx2)
>
> #define DMVR_PROTOTYPES(bd, opt) \
> void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
> - int height, intptr_t mx, intptr_t my, int width); \
> + int height, intptr_t mx, intptr_t my, int width, int16_t *unused); \
> void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
> - int height, intptr_t mx, intptr_t my, int width); \
> + int height, intptr_t mx, intptr_t my, int width, int16_t *unused); \
> void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
> - int height, intptr_t mx, intptr_t my, int width); \
> + int height, intptr_t mx, intptr_t my, int width, int16_t *unused); \
> void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
> - int height, intptr_t mx, intptr_t my, int width); \
> + int height, intptr_t mx, intptr_t my, int width, int16_t *unused); \
>
> DMVR_PROTOTYPES( 8, avx2)
> DMVR_PROTOTYPES(10, avx2)
> diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
> index 754cf19065..591557dfa6 100644
> --- a/tests/checkasm/vvc_mc.c
> +++ b/tests/checkasm/vvc_mc.c
> @@ -333,6 +333,7 @@ static void check_avg(void)
> }
>
> #define SR_RANGE 2
> +#define DMVR_TMP_BUF_SIZE ((MAX_PB_SIZE + BILINEAR_EXTRA) * MAX_PB_SIZE * sizeof(int16_t))
> static void check_dmvr(void)
> {
> LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
> @@ -340,14 +341,20 @@ static void check_dmvr(void)
> LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
> LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
> const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
> + int16_t *tmp0 = av_mallocz(DMVR_TMP_BUF_SIZE);
> + int16_t *tmp1 = av_mallocz(DMVR_TMP_BUF_SIZE);
>
> VVCDSPContext c;
> declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
> - intptr_t mx, intptr_t my, int width);
> + intptr_t mx, intptr_t my, int width, int16_t *tmp);
> +
> + if (!tmp0 || !tmp1)
> + fail();
>
> for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
> ff_vvc_dsp_init(&c, bit_depth);
> randomize_pixels(src0, src1, SRC_BUF_SIZE);
> + randomize_buffers(tmp0, tmp1, DMVR_TMP_BUF_SIZE / 2, UINT32_MAX);
> for (int i = 0; i < 2; i++) {
> for (int j = 0; j < 2; j++) {
> for (int h = 8; h <= 16; h *= 2) {
> @@ -371,8 +378,8 @@ static void check_dmvr(void)
> if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
> memset(dst0, 0, DST_BUF_SIZE);
> memset(dst1, 0, DST_BUF_SIZE);
> - call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> - call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> + call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp0);
> + call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp1);
> for (int k = 0; k < pred_h; k++) {
> if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
> fail();
> @@ -380,13 +387,15 @@ static void check_dmvr(void)
> }
> }
>
> - bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
> + bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w, tmp1);
> }
> }
> }
> }
> }
> }
> + av_free(tmp0);
> + av_free(tmp1);
> report("dmvr");
> }
>
> --
> 2.39.3
>
More information about the ffmpeg-devel
mailing list