[FFmpeg-devel] [PATCH 3/6] lavc/vvc: Store MIP information over entire CU area

Sat Nov 30 14:12:56 EET 2024

On 30/11/2024 06:46, Nuo Mi wrote:
> On Fri, Nov 29, 2024 at 6:19 AM Frank Plowman <post at frankplowman.com> wrote:
> 
>> Previously, the code only stored the MIP mode and transpose flag in the
>> relevant tables at the top-left corner of the CU.  This information ends
>> up being retrieved in ff_vvc_intra_pred_* not based on the CU position
>> but instead the transform unit position (specifically, using the x0 and
>> y0 from get_luma_predict_unit).  There might be multiple transform units
>> in a CU, hence the top-left corner of the transform unit might not
>> coincide with the top-left corner of the CU.  Consequently, we need to
>> store the MIP information at all positions in the CU, not only its
>> top-left corner, as we already do for the MIP flag.
>>
>> Signed-off-by: Frank Plowman <post at frankplowman.com>
>> ---
>>  libavcodec/vvc/ctu.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
>> index 1e06119cfd..0030938cf5 100644
>> --- a/libavcodec/vvc/ctu.c
>> +++ b/libavcodec/vvc/ctu.c
>> @@ -975,8 +975,8 @@ static void intra_luma_pred_modes(VVCLocalContext *lc)
>>              for (int y = 0; y < (cb_height>>log2_min_cb_size); y++) {
>>                  int width = cb_width>>log2_min_cb_size;
>>                  memset(&fc->tab.imf[x],  cu->intra_mip_flag, width);
>> -                fc->tab.imtf[x] = intra_mip_transposed_flag;
>> -                fc->tab.imm[x]  = intra_mip_mode;
>> +                memset(&fc->tab.imtf[x], intra_mip_transposed_flag,
>> width);
>> +                memset(&fc->tab.imm[x], intra_mip_mode, width);
> 
>  intra_mip_mode is 4 bits, 2 flags are 2 bits. maybe we can use a uint8_t
> for 3 fields,
> We only need 1 memset and save 2/3 memory.

I've implemented this (patch attached, to be applied atop the set), but
it's not as straightforward as it may seem.  In particular, because the
tables are read directly from when determining which CABAC context to
use for these flags, we have to add quite a lot of extra code in cabac.c
to support this special case where the MIP information is a bit field.
In my implementation, this was done by adding this coerce_to_bool
parameter to get_inc and get_top.  This does actually save a moderate
amount of memory though, ~1MB for 4K and ~256kB for 1080p.

> 
>>
> 
>                  x += pps->min_cb_width;
>>              }
>>              cu->intra_pred_mode_y = intra_mip_mode;
>> --
>> 2.47.0
>>


-- 
Frank
-------------- next part --------------
From 75da84032150ce1a76cca990f17ecef3ab20aba9 Mon Sep 17 00:00:00 2001
From: Frank Plowman <post at frankplowman.com>
Date: Sat, 30 Nov 2024 12:05:20 +0000
Subject: [PATCH 7/7] lavc/vvc: Use a bitfield to store MIP information

Signed-off-by: Frank Plowman <post at frankplowman.com>
---
 libavcodec/vvc/cabac.c          | 30 ++++++++++++++++++------------
 libavcodec/vvc/ctu.c            | 12 +++++++++---
 libavcodec/vvc/dec.c            |  2 --
 libavcodec/vvc/dec.h            |  4 +---
 libavcodec/vvc/dsp.c            |  9 +++++++++
 libavcodec/vvc/intra_template.c |  5 +++--
 6 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/libavcodec/vvc/cabac.c b/libavcodec/vvc/cabac.c
index 0d45eec751..88e13a2394 100644
--- a/libavcodec/vvc/cabac.c
+++ b/libavcodec/vvc/cabac.c
@@ -949,7 +949,8 @@ static int limited_kth_order_egk_decode(CABACContext *c, const int k, const int
 
 static av_always_inline
 void get_left_top(const VVCLocalContext *lc, uint8_t *left, uint8_t *top,
-    const int x0, const int y0, const uint8_t *left_ctx, const uint8_t *top_ctx)
+    const int x0, const int y0, const uint8_t *left_ctx, const uint8_t *top_ctx,
+    const int coerce_to_bool)
 {
     const VVCFrameContext *fc = lc->fc;
     const VVCSPS *sps         = fc->ps.sps;
@@ -963,13 +964,18 @@ void get_left_top(const VVCLocalContext *lc, uint8_t *left, uint8_t *top,
         *left = SAMPLE_CTB(left_ctx, x_cb - 1, y_cb);
     if (lc->ctb_up_flag || y0b)
         *top = SAMPLE_CTB(top_ctx, x_cb, y_cb - 1);
+
+    if (coerce_to_bool) {
+        *left = !!*left;
+        *top = !!*top;
+    }
 }
 
 static av_always_inline
-uint8_t get_inc(VVCLocalContext *lc, const uint8_t *ctx)
+uint8_t get_inc(VVCLocalContext *lc, const uint8_t *ctx, const int coerce_to_bool)
 {
     uint8_t left = 0, top = 0;
-    get_left_top(lc, &left, &top, lc->cu->x0, lc->cu->y0, ctx, ctx);
+    get_left_top(lc, &left, &top, lc->cu->x0, lc->cu->y0, ctx, ctx, coerce_to_bool);
     return left + top;
 }
 
@@ -1092,7 +1098,7 @@ int ff_vvc_split_cu_flag(VVCLocalContext *lc, const int x0, const int y0,
     {
         uint8_t inc = 0, left_height = cb_height, top_width = cb_width;
 
-        get_left_top(lc, &left_height, &top_width, x0, y0, fc->tab.cb_height[is_chroma], fc->tab.cb_width[is_chroma]);
+        get_left_top(lc, &left_height, &top_width, x0, y0, fc->tab.cb_height[is_chroma], fc->tab.cb_width[is_chroma], 0);
         inc += left_height < cb_height;
         inc += top_width   < cb_width;
         inc += (a->btv + a->bth + a->ttv + a->tth + 2 * a->qt - 1) / 2 * 3;
@@ -1109,7 +1115,7 @@ static int split_qt_flag_decode(VVCLocalContext *lc, const int x0, const int y0,
     int inc = 0;
     uint8_t depth_left = 0, depth_top = 0;
 
-    get_left_top(lc,  &depth_left, &depth_top, x0, y0, fc->tab.cqt_depth[ch_type], fc->tab.cqt_depth[ch_type]);
+    get_left_top(lc,  &depth_left, &depth_top, x0, y0, fc->tab.cqt_depth[ch_type], fc->tab.cqt_depth[ch_type], 0);
     inc += depth_left > cqt_depth;
     inc += depth_top  > cqt_depth;
     inc += (cqt_depth >= 2) * 3;
@@ -1198,7 +1204,7 @@ int ff_vvc_non_inter_flag(VVCLocalContext *lc, const int x0, const int y0, const
     const VVCFrameContext *fc = lc->fc;
     uint8_t inc, left = MODE_INTER, top = MODE_INTER;
 
-    get_left_top(lc, &left, &top, x0, y0, fc->tab.cpm[ch_type], fc->tab.cpm[ch_type]);
+    get_left_top(lc, &left, &top, x0, y0, fc->tab.cpm[ch_type], fc->tab.cpm[ch_type], 0);
     inc = left == MODE_INTRA || top == MODE_INTRA;
     return GET_CABAC(NON_INTER_FLAG + inc);
 }
@@ -1209,7 +1215,7 @@ int ff_vvc_pred_mode_flag(VVCLocalContext *lc, const int is_chroma)
     const CodingUnit *cu      = lc->cu;
     uint8_t inc, left = MODE_INTER, top = MODE_INTER;
 
-    get_left_top(lc, &left, &top, cu->x0, cu->y0, fc->tab.cpm[is_chroma], fc->tab.cpm[is_chroma]);
+    get_left_top(lc, &left, &top, cu->x0, cu->y0, fc->tab.cpm[is_chroma], fc->tab.cpm[is_chroma], 0);
     inc = left == MODE_INTRA || top == MODE_INTRA;
     return GET_CABAC(PRED_MODE_FLAG + inc);
 }
@@ -1241,7 +1247,7 @@ int ff_vvc_intra_bdpcm_chroma_dir_flag(VVCLocalContext *lc)
 
 int ff_vvc_cu_skip_flag(VVCLocalContext *lc, const uint8_t *cu_skip_flag)
 {
-    const int inc = get_inc(lc, cu_skip_flag);
+    const int inc = get_inc(lc, cu_skip_flag, 0);
     return GET_CABAC(CU_SKIP_FLAG + inc);
 }
 
@@ -1252,7 +1258,7 @@ int ff_vvc_pred_mode_ibc_flag(VVCLocalContext *lc, const int is_chroma)
     uint8_t left_mode = MODE_INTER, top_mode = MODE_INTER;
     int inc;
 
-    get_left_top(lc, &left_mode, &top_mode, cu->x0, cu->y0, fc->tab.cpm[is_chroma], fc->tab.cpm[is_chroma]);
+    get_left_top(lc, &left_mode, &top_mode, cu->x0, cu->y0, fc->tab.cpm[is_chroma], fc->tab.cpm[is_chroma], 0);
     inc = (left_mode == MODE_IBC) + (top_mode == MODE_IBC);
     return GET_CABAC(PRED_MODE_IBC_FLAG + inc);
 }
@@ -1261,7 +1267,7 @@ int ff_vvc_intra_mip_flag(VVCLocalContext *lc, const uint8_t *intra_mip_flag)
 {
     const int w   = lc->cu->cb_width;
     const int h   = lc->cu->cb_height;
-    const int inc =  (w > h * 2 || h > w * 2) ? 3 : get_inc(lc, intra_mip_flag);
+    const int inc =  (w > h * 2 || h > w * 2) ? 3 : get_inc(lc, intra_mip_flag, 1);
     return GET_CABAC(INTRA_MIP_FLAG + inc);
 }
 
@@ -1354,8 +1360,8 @@ static int get_inter_flag_inc(VVCLocalContext *lc, const int x0, const int y0)
     uint8_t left_affine = 0, top_affine = 0;
     const VVCFrameContext *fc = lc->fc;
 
-    get_left_top(lc, &left_merge, &top_merge, x0, y0, fc->tab.msf, fc->tab.msf);
-    get_left_top(lc, &left_affine, &top_affine, x0, y0, fc->tab.iaf, fc->tab.iaf);
+    get_left_top(lc, &left_merge, &top_merge, x0, y0, fc->tab.msf, fc->tab.msf, 0);
+    get_left_top(lc, &left_affine, &top_affine, x0, y0, fc->tab.iaf, fc->tab.iaf, 0);
     return (left_merge || left_affine) + (top_merge + top_affine);
 }
 
diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index 505099bc76..0ee957c05a 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -946,6 +946,12 @@ static void derive_chroma_intra_pred_mode(VVCLocalContext *lc,
     }
 }
 
+static av_always_inline uint8_t structure_mip_info(int intra_mip_flag,
+    int intra_mip_transposed_flag, int intra_mip_mode)
+{
+    return (intra_mip_mode << 2) | (intra_mip_transposed_flag << 1) | intra_mip_flag;
+}
+
 static void intra_luma_pred_modes(VVCLocalContext *lc)
 {
     VVCFrameContext *fc             = lc->fc;
@@ -974,9 +980,9 @@ static void intra_luma_pred_modes(VVCLocalContext *lc)
             int x = y_cb * pps->min_cb_width + x_cb;
             for (int y = 0; y < (cb_height>>log2_min_cb_size); y++) {
                 int width = cb_width>>log2_min_cb_size;
-                memset(&fc->tab.imf[x],  cu->intra_mip_flag, width);
-                memset(&fc->tab.imtf[x], intra_mip_transposed_flag, width);
-                memset(&fc->tab.imm[x], intra_mip_mode, width);
+                const uint8_t mip_info = structure_mip_info(cu->intra_mip_flag,
+                        intra_mip_transposed_flag, intra_mip_mode);
+                memset(&fc->tab.imf[x], mip_info, width);
                 x += pps->min_cb_width;
             }
             cu->intra_pred_mode_y = intra_mip_mode;
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index 50be9f9922..fef7339294 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -128,7 +128,6 @@ static void min_cb_tl_init(TabList *l, VVCFrameContext *fc)
     tl_init(l, 1, changed);
 
     TL_ADD(imf,  pic_size_in_min_cb);
-    TL_ADD(imm,  pic_size_in_min_cb);
 
     for (int i = LUMA; i <= CHROMA; i++)
         TL_ADD(cb_width[i],  pic_size_in_min_cb);   //is_a0_available requires this
@@ -143,7 +142,6 @@ static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc)
     tl_init(l, 0, changed);
 
     TL_ADD(skip, pic_size_in_min_cb);
-    TL_ADD(imtf, pic_size_in_min_cb);
     TL_ADD(ipm,  pic_size_in_min_cb);
 
     for (int i = LUMA; i <= CHROMA; i++) {
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index f7cd5b678c..0f8f1f721d 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -161,9 +161,7 @@ typedef struct VVCFrameContext {
         uint8_t *skip;                                  ///< CuSkipFlag[][]
         uint8_t *ispmf;                                 ///< intra_sub_partitions_mode_flag
         uint8_t *msm[2];                                ///< MttSplitMode[][][] in 32 pixels
-        uint8_t *imf;                                   ///< IntraMipFlag[][]
-        uint8_t *imtf;                                  ///< intra_mip_transposed_flag[][]
-        uint8_t *imm;                                   ///< intra_mip_mode[][]
+        uint8_t *imf;                                   ///< IntraMipFlag[][], intra_mip_transposed_flag[][], intra_mip_mode[][]
         uint8_t *ipm;                                   ///< IntraPredModeY[][]
         uint8_t *cpm[2];                                ///< CuPredMode[][][]
         uint8_t *msf;                                   ///< MergeSubblockFlag[][]
diff --git a/libavcodec/vvc/dsp.c b/libavcodec/vvc/dsp.c
index 9bfa46b03d..fc22c89cd6 100644
--- a/libavcodec/vvc/dsp.c
+++ b/libavcodec/vvc/dsp.c
@@ -44,6 +44,15 @@ static int vvc_sad(const int16_t *src0, const int16_t *src1, int dx, int dy,
     return sad;
 }
 
+static av_always_inline void destructure_mip_info(int *intra_mip_transposed_flag,
+    int *intra_mip_mode, const uint8_t mip_info)
+{
+    if (intra_mip_transposed_flag)
+        *intra_mip_transposed_flag = (mip_info >> 1) & 0x1;
+    if (intra_mip_mode)
+        *intra_mip_mode = (mip_info >> 2) & 0xf;
+}
+
 typedef struct IntraEdgeParams {
     uint8_t* top;
     uint8_t* left;
diff --git a/libavcodec/vvc/intra_template.c b/libavcodec/vvc/intra_template.c
index 62342c8142..44ba102797 100644
--- a/libavcodec/vvc/intra_template.c
+++ b/libavcodec/vvc/intra_template.c
@@ -627,8 +627,9 @@ static void FUNC(intra_pred)(const VVCLocalContext *lc, int x0, int y0,
     FUNC(prepare_intra_edge_params)(lc, &edge, src, stride, x, y, w, h, c_idx, is_intra_mip, mode, ref_idx, need_pdpc);
 
     if (is_intra_mip) {
-        int intra_mip_transposed_flag = SAMPLE_CTB(fc->tab.imtf, x_cb, y_cb);
-        int intra_mip_mode = SAMPLE_CTB(fc->tab.imm, x_cb, y_cb);
+        int intra_mip_transposed_flag;
+        int intra_mip_mode;
+        destructure_mip_info(&intra_mip_transposed_flag, &intra_mip_mode, intra_mip_flag);
 
         fc->vvcdsp.intra.pred_mip((uint8_t *)src, edge.top, edge.left,
                         w, h, stride, intra_mip_mode, intra_mip_transposed_flag);
-- 
2.47.0