[FFmpeg-cvslog] h264: move {mv, ref}_cache into the per-slice context

Sat Mar 21 14:14:43 CET 2015

ffmpeg | branch: master | Anton Khirnov <anton at khirnov.net> | Sat Jan 17 22:28:46 2015 +0100| [e6287f077c3e8e4aca11e61dd4bade1351439e6b] | committer: Anton Khirnov

h264: move {mv,ref}_cache into the per-slice context

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e6287f077c3e8e4aca11e61dd4bade1351439e6b
---

 libavcodec/h264.c             |   18 ++++---
 libavcodec/h264.h             |   27 +++++-----
 libavcodec/h264_cabac.c       |   67 ++++++++++++-------------
 libavcodec/h264_cavlc.c       |   44 ++++++++---------
 libavcodec/h264_direct.c      |  109 +++++++++++++++++++++--------------------
 libavcodec/h264_loopfilter.c  |   39 ++++++++-------
 libavcodec/h264_mb.c          |   81 ++++++++++++++++--------------
 libavcodec/h264_mb_template.c |   12 ++---
 libavcodec/h264_mc_template.c |   10 ++--
 libavcodec/h264_mvpred.h      |   64 ++++++++++++------------
 libavcodec/h264_slice.c       |    9 ++--
 libavcodec/svq3.c             |   35 ++++++-------
 12 files changed, 267 insertions(+), 248 deletions(-)

diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 2b29389..ee6d863 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -71,8 +71,8 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
         ref = 0;
     fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy],
                    2, 2, 2, ref, 1);
-    fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
-    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
+    fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
+    fill_rectangle(sl->mv_cache[0][scan8[0]], 4, 4, 8,
                    pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
     assert(!FRAME_MBAFF(h));
     ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);
@@ -484,12 +484,14 @@ int ff_h264_context_init(H264Context *h)
     FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1],
                       h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
 
-    h->ref_cache[0][scan8[5]  + 1] =
-    h->ref_cache[0][scan8[7]  + 1] =
-    h->ref_cache[0][scan8[13] + 1] =
-    h->ref_cache[1][scan8[5]  + 1] =
-    h->ref_cache[1][scan8[7]  + 1] =
-    h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
+    for (i = 0; i < h->nb_slice_ctx; i++) {
+        h->slice_ctx[i].ref_cache[0][scan8[5]  + 1] =
+        h->slice_ctx[i].ref_cache[0][scan8[7]  + 1] =
+        h->slice_ctx[i].ref_cache[0][scan8[13] + 1] =
+        h->slice_ctx[i].ref_cache[1][scan8[5]  + 1] =
+        h->slice_ctx[i].ref_cache[1][scan8[7]  + 1] =
+        h->slice_ctx[i].ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
+    }
 
     if (CONFIG_ERROR_RESILIENCE) {
         /* init ER */
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 521ceeb..189825a 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -347,6 +347,12 @@ typedef struct H264SliceContext {
      * is 64 if not available.
      */
     DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
+
+    /**
+     * Motion vector cache.
+     */
+    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
+    DECLARE_ALIGNED(8,  int8_t, ref_cache)[2][5 * 8];
 } H264SliceContext;
 
 /**
@@ -389,11 +395,6 @@ typedef struct H264Context {
 
     uint8_t (*non_zero_count)[48];
 
-    /**
-     * Motion vector cache.
-     */
-    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
-    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
 #define LIST_NOT_USED -1 // FIXME rename?
 #define PART_NOT_AVAILABLE -2
 
@@ -821,7 +822,8 @@ void h264_init_dequant_tables(H264Context *h);
 
 void ff_h264_direct_dist_scale_factor(H264Context *const h);
 void ff_h264_direct_ref_list_init(H264Context *const h);
-void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type);
+void ff_h264_pred_direct_motion(H264Context *const h, H264SliceContext *sl,
+                                int *mb_type);
 
 void ff_h264_filter_mb_fast(H264Context *h, H264SliceContext *sl, int mb_x, int mb_y,
                             uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
@@ -969,12 +971,13 @@ static av_always_inline void write_back_non_zero_count(H264Context *h,
 }
 
 static av_always_inline void write_back_motion_list(H264Context *h,
+                                                    H264SliceContext *sl,
                                                     int b_stride,
                                                     int b_xy, int b8_xy,
                                                     int mb_type, int list)
 {
     int16_t(*mv_dst)[2] = &h->cur_pic.motion_val[list][b_xy];
-    int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]];
+    int16_t(*mv_src)[2] = &sl->mv_cache[list][scan8[0]];
     AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0);
     AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1);
     AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2);
@@ -995,7 +998,7 @@ static av_always_inline void write_back_motion_list(H264Context *h,
 
     {
         int8_t *ref_index = &h->cur_pic.ref_index[list][b8_xy];
-        int8_t *ref_cache = h->ref_cache[list];
+        int8_t *ref_cache = sl->ref_cache[list];
         ref_index[0 + 0 * 2] = ref_cache[scan8[0]];
         ref_index[1 + 0 * 2] = ref_cache[scan8[4]];
         ref_index[0 + 1 * 2] = ref_cache[scan8[8]];
@@ -1003,20 +1006,22 @@ static av_always_inline void write_back_motion_list(H264Context *h,
     }
 }
 
-static av_always_inline void write_back_motion(H264Context *h, int mb_type)
+static av_always_inline void write_back_motion(H264Context *h,
+                                               H264SliceContext *sl,
+                                               int mb_type)
 {
     const int b_stride      = h->b_stride;
     const int b_xy  = 4 * h->mb_x + 4 * h->mb_y * h->b_stride; // try mb2b(8)_xy
     const int b8_xy = 4 * h->mb_xy;
 
     if (USES_LIST(mb_type, 0)) {
-        write_back_motion_list(h, b_stride, b_xy, b8_xy, mb_type, 0);
+        write_back_motion_list(h, sl, b_stride, b_xy, b8_xy, mb_type, 0);
     } else {
         fill_rectangle(&h->cur_pic.ref_index[0][b8_xy],
                        2, 2, 2, (uint8_t)LIST_NOT_USED, 1);
     }
     if (USES_LIST(mb_type, 1))
-        write_back_motion_list(h, b_stride, b_xy, b8_xy, mb_type, 1);
+        write_back_motion_list(h, sl, b_stride, b_xy, b8_xy, mb_type, 1);
 
     if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC(h)) {
         if (IS_8X8(mb_type)) {
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index bf80455..7fa1ae6 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1459,9 +1459,10 @@ static int decode_cabac_b_mb_sub_type( H264Context *h ) {
     return type;
 }
 
-static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
-    int refa = h->ref_cache[list][scan8[n] - 1];
-    int refb = h->ref_cache[list][scan8[n] - 8];
+static int decode_cabac_mb_ref(H264Context *h, H264SliceContext *sl, int list, int n)
+{
+    int refa = sl->ref_cache[list][scan8[n] - 1];
+    int refb = sl->ref_cache[list][scan8[n] - 8];
     int ref  = 0;
     int ctx  = 0;
 
@@ -2092,11 +2093,11 @@ decode_intra_mb:
             }
             if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
                           h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
-                ff_h264_pred_direct_motion(h, &mb_type);
-                h->ref_cache[0][scan8[4]] =
-                h->ref_cache[1][scan8[4]] =
-                h->ref_cache[0][scan8[12]] =
-                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+                ff_h264_pred_direct_motion(h, sl, &mb_type);
+                sl->ref_cache[0][scan8[4]] =
+                sl->ref_cache[1][scan8[4]] =
+                sl->ref_cache[0][scan8[12]] =
+                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
                     for( i = 0; i < 4; i++ )
                         fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, (h->sub_mb_type[i]>>1)&0xFF, 1 );
             }
@@ -2114,7 +2115,7 @@ decode_intra_mb:
                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
                         int rc = h->ref_count[list] << MB_MBAFF(h);
                         if (rc > 1) {
-                            ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
+                            ref[list][i] = decode_cabac_mb_ref(h, sl, list, 4 * i);
                             if (ref[list][i] >= (unsigned) rc) {
                                 av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], rc);
                                 return -1;
@@ -2124,8 +2125,8 @@ decode_intra_mb:
                     } else {
                         ref[list][i] = -1;
                     }
-                                                       h->ref_cache[list][ scan8[4*i]+1 ]=
-                    h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
+                    sl->ref_cache[list][scan8[4 * i] + 1] =
+                    sl->ref_cache[list][scan8[4 * i] + 8] = sl->ref_cache[list][scan8[4 * i] + 9] = ref[list][i];
                 }
         }
 
@@ -2134,7 +2135,7 @@ decode_intra_mb:
 
         for(list=0; list<h->list_count; list++){
             for(i=0; i<4; i++){
-                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
+                sl->ref_cache[list][scan8[4 * i]] = sl->ref_cache[list][scan8[4 * i] + 1];
                 if(IS_DIRECT(h->sub_mb_type[i])){
                     fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
                     continue;
@@ -2147,9 +2148,9 @@ decode_intra_mb:
                         int mpx, mpy;
                         int mx, my;
                         const int index= 4*i + block_width*j;
-                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
+                        int16_t (* mv_cache)[2] = &sl->mv_cache[list][ scan8[index] ];
                         uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
-                        pred_motion(h, sl, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
+                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
                         DECODE_CABAC_MB_MVD( h, list, index)
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
@@ -2183,13 +2184,13 @@ decode_intra_mb:
                         mvd_cache[ 0 ][1]= mpy;
                     }
                 }else{
-                    fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
+                    fill_rectangle(sl->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
                     fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
                 }
             }
         }
     } else if( IS_DIRECT(mb_type) ) {
-        ff_h264_pred_direct_motion(h, &mb_type);
+        ff_h264_pred_direct_motion(h, sl, &mb_type);
         fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
         fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
@@ -2200,25 +2201,25 @@ decode_intra_mb:
                 if(IS_DIR(mb_type, 0, list)){
                     int ref, rc = h->ref_count[list] << MB_MBAFF(h);
                     if (rc > 1) {
-                        ref= decode_cabac_mb_ref(h, list, 0);
+                        ref= decode_cabac_mb_ref(h, sl, list, 0);
                         if (ref >= (unsigned) rc) {
                             av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
                             return -1;
                         }
                     }else
                         ref=0;
-                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
+                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
                 }
             }
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
                     int mx,my,mpx,mpy;
-                    pred_motion(h, sl, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
+                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
                     DECODE_CABAC_MB_MVD( h, list, 0)
                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                     fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
-                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
                 }
             }
         }
@@ -2228,31 +2229,31 @@ decode_intra_mb:
                         if(IS_DIR(mb_type, i, list)){
                             int ref, rc = h->ref_count[list] << MB_MBAFF(h);
                             if (rc > 1) {
-                                ref= decode_cabac_mb_ref( h, list, 8*i );
+                                ref= decode_cabac_mb_ref(h, sl, list, 8 * i);
                                 if (ref >= (unsigned) rc) {
                                     av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
                                     return -1;
                                 }
                             }else
                                 ref=0;
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
+                            fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
                         }else
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
+                            fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
                     }
             }
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     if(IS_DIR(mb_type, i, list)){
                         int mx,my,mpx,mpy;
-                        pred_16x8_motion(h, sl, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
+                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
                         DECODE_CABAC_MB_MVD( h, list, 8*i)
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
+                        fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
                     }else{
                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
-                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                        fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                     }
                 }
             }
@@ -2263,31 +2264,31 @@ decode_intra_mb:
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
                             int ref, rc = h->ref_count[list] << MB_MBAFF(h);
                             if (rc > 1) {
-                                ref= decode_cabac_mb_ref( h, list, 4*i );
+                                ref= decode_cabac_mb_ref(h, sl, list, 4 * i);
                                 if (ref >= (unsigned) rc) {
                                     av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
                                     return -1;
                                 }
                             }else
                                 ref=0;
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
+                            fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
                         }else
-                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
+                            fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
                     }
             }
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     if(IS_DIR(mb_type, i, list)){
                         int mx,my,mpx,mpy;
-                        pred_8x16_motion(h, sl, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
+                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
                         DECODE_CABAC_MB_MVD( h, list, 4*i)
 
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
+                        fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
                     }else{
                         fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
-                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                        fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                     }
                 }
             }
@@ -2296,7 +2297,7 @@ decode_intra_mb:
 
    if( IS_INTER( mb_type ) ) {
         h->chroma_pred_mode_table[mb_xy] = 0;
-        write_back_motion( h, mb_type );
+        write_back_motion(h, sl, mb_type);
    }
 
     if( !IS_INTRA16x16( mb_type ) ) {
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 08580ed..37647b9 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -845,11 +845,11 @@ decode_intra_mb:
                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
             }
             if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
-                ff_h264_pred_direct_motion(h, &mb_type);
-                h->ref_cache[0][scan8[4]] =
-                h->ref_cache[1][scan8[4]] =
-                h->ref_cache[0][scan8[12]] =
-                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+                ff_h264_pred_direct_motion(h, sl, &mb_type);
+                sl->ref_cache[0][scan8[4]] =
+                sl->ref_cache[1][scan8[4]] =
+                sl->ref_cache[0][scan8[12]] =
+                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
             }
         }else{
             assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
@@ -895,11 +895,11 @@ decode_intra_mb:
         for(list=0; list<h->list_count; list++){
             for(i=0; i<4; i++){
                 if(IS_DIRECT(h->sub_mb_type[i])) {
-                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
+                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
                     continue;
                 }
-                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
-                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
+                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
+                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
 
                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
                     const int sub_mb_type= h->sub_mb_type[i];
@@ -907,8 +907,8 @@ decode_intra_mb:
                     for(j=0; j<sub_partition_count[i]; j++){
                         int mx, my;
                         const int index= 4*i + block_width*j;
-                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
-                        pred_motion(h, sl, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
+                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
+                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
                         mx += get_se_golomb(&h->gb);
                         my += get_se_golomb(&h->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
@@ -929,14 +929,14 @@ decode_intra_mb:
                         mv_cache[ 0 ][1]= my;
                     }
                 }else{
-                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
+                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
                     p[0] = p[1]=
                     p[8] = p[9]= 0;
                 }
             }
         }
     }else if(IS_DIRECT(mb_type)){
-        ff_h264_pred_direct_motion(h, &mb_type);
+        ff_h264_pred_direct_motion(h, sl, &mb_type);
         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
     }else{
         int list, mx, my, i;
@@ -957,17 +957,17 @@ decode_intra_mb:
                                 return -1;
                             }
                         }
-                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
+                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
                     }
             }
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
-                    pred_motion(h, sl, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
+                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
                     mx += get_se_golomb(&h->gb);
                     my += get_se_golomb(&h->gb);
                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
-                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
                 }
             }
         }
@@ -990,14 +990,14 @@ decode_intra_mb:
                             }
                         }else
                             val= LIST_NOT_USED&0xFF;
-                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
+                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
                     }
             }
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
-                        pred_16x8_motion(h, sl, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
+                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
                         mx += get_se_golomb(&h->gb);
                         my += get_se_golomb(&h->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
@@ -1005,7 +1005,7 @@ decode_intra_mb:
                         val= pack16to32(mx,my);
                     }else
                         val=0;
-                    fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
+                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
                 }
             }
         }else{
@@ -1028,14 +1028,14 @@ decode_intra_mb:
                             }
                         }else
                             val= LIST_NOT_USED&0xFF;
-                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
+                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
                     }
             }
             for(list=0; list<h->list_count; list++){
                 for(i=0; i<2; i++){
                     unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
-                        pred_8x16_motion(h, sl, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
+                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
                         mx += get_se_golomb(&h->gb);
                         my += get_se_golomb(&h->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
@@ -1043,14 +1043,14 @@ decode_intra_mb:
                         val= pack16to32(mx,my);
                     }else
                         val=0;
-                    fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
+                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
                 }
             }
         }
     }
 
     if(IS_INTER(mb_type))
-        write_back_motion(h, mb_type);
+        write_back_motion(h, sl, mb_type);
 
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&h->gb);
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 855526e..559b8ab 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -171,7 +171,8 @@ static void await_reference_mb_row(H264Context *const h, H264Picture *ref,
                              ref_field_picture && ref_field);
 }
 
-static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
+static void pred_spatial_direct_motion(H264Context *const h, H264SliceContext *sl,
+                                       int *mb_type)
 {
     int b8_stride = 2;
     int b4_stride = h->b_stride;
@@ -196,13 +197,13 @@ static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
 
     /* ref = min(neighbors) */
     for (list = 0; list < 2; list++) {
-        int left_ref     = h->ref_cache[list][scan8[0] - 1];
-        int top_ref      = h->ref_cache[list][scan8[0] - 8];
-        int refc         = h->ref_cache[list][scan8[0] - 8 + 4];
-        const int16_t *C = h->mv_cache[list][scan8[0]  - 8 + 4];
+        int left_ref     = sl->ref_cache[list][scan8[0] - 1];
+        int top_ref      = sl->ref_cache[list][scan8[0] - 8];
+        int refc         = sl->ref_cache[list][scan8[0] - 8 + 4];
+        const int16_t *C = sl->mv_cache[list][scan8[0]  - 8 + 4];
         if (refc == PART_NOT_AVAILABLE) {
-            refc = h->ref_cache[list][scan8[0] - 8 - 1];
-            C    = h->mv_cache[list][scan8[0]  - 8 - 1];
+            refc = sl->ref_cache[list][scan8[0] - 8 - 1];
+            C    = sl->mv_cache[list][scan8[0]  - 8 - 1];
         }
         ref[list] = FFMIN3((unsigned)left_ref,
                            (unsigned)top_ref,
@@ -210,8 +211,8 @@ static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
         if (ref[list] >= 0) {
             /* This is just pred_motion() but with the cases removed that
              * cannot happen for direct blocks. */
-            const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
-            const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
+            const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
+            const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
 
             int match_count = (left_ref == ref[list]) +
                               (top_ref  == ref[list]) +
@@ -246,10 +247,10 @@ static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
     }
 
     if (!(is_b8x8 | mv[0] | mv[1])) {
-        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
-        fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
-        fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
-        fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
         *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
                                  MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
                    MB_TYPE_16x16 | MB_TYPE_DIRECT2;
@@ -340,9 +341,9 @@ single_col:
                 continue;
             h->sub_mb_type[i8] = sub_mb_type;
 
-            fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
+            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
                            (uint8_t)ref[0], 1);
-            fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
+            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
                            (uint8_t)ref[1], 1);
             if (!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref &&
                 ((l1ref0[xy8] == 0 &&
@@ -363,8 +364,8 @@ single_col:
                 a = mv[0];
                 b = mv[1];
             }
-            fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
-            fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
+            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
+            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
         }
         if (!is_b8x8 && !(n & 3))
             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
@@ -373,8 +374,8 @@ single_col:
     } else if (IS_16X16(*mb_type)) {
         int a, b;
 
-        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
-        fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+        fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+        fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
         if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref &&
             ((l1ref0[0] == 0 &&
               FFABS(l1mv0[0][0]) <= 1 &&
@@ -392,8 +393,8 @@ single_col:
             a = mv[0];
             b = mv[1];
         }
-        fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
-        fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
+        fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
+        fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
     } else {
         int n = 0;
         for (i8 = 0; i8 < 4; i8++) {
@@ -404,11 +405,11 @@ single_col:
                 continue;
             h->sub_mb_type[i8] = sub_mb_type;
 
-            fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
-            fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
-            fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
+            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
+            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
+            fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
                            (uint8_t)ref[0], 1);
-            fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
+            fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
                            (uint8_t)ref[1], 1);
 
             assert(b8_stride == 2);
@@ -423,10 +424,10 @@ single_col:
                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
                     if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
                         if (ref[0] == 0)
-                            fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2,
+                            fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2,
                                            8, 0, 4);
                         if (ref[1] == 0)
-                            fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2,
+                            fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2,
                                            8, 0, 4);
                         n += 4;
                     }
@@ -437,9 +438,9 @@ single_col:
                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
                         if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
                             if (ref[0] == 0)
-                                AV_ZERO32(h->mv_cache[0][scan8[i8 * 4 + i4]]);
+                                AV_ZERO32(sl->mv_cache[0][scan8[i8 * 4 + i4]]);
                             if (ref[1] == 0)
-                                AV_ZERO32(h->mv_cache[1][scan8[i8 * 4 + i4]]);
+                                AV_ZERO32(sl->mv_cache[1][scan8[i8 * 4 + i4]]);
                             m++;
                         }
                     }
@@ -456,7 +457,8 @@ single_col:
     }
 }
 
-static void pred_temp_direct_motion(H264Context *const h, int *mb_type)
+static void pred_temp_direct_motion(H264Context *const h, H264SliceContext *sl,
+                                    int *mb_type)
 {
     int b8_stride = 2;
     int b4_stride = h->b_stride;
@@ -577,11 +579,11 @@ single_col:
                     continue;
                 h->sub_mb_type[i8] = sub_mb_type;
 
-                fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
+                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
                 if (IS_INTRA(mb_type_col[y8])) {
-                    fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
-                    fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
-                    fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
+                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
                     continue;
                 }
 
@@ -594,7 +596,7 @@ single_col:
                     l1mv = l1mv1;
                 }
                 scale = dist_scale_factor[ref0];
-                fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
+                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
                                ref0, 1);
 
                 {
@@ -602,9 +604,9 @@ single_col:
                     int my_col            = (mv_col[1] << y_shift) / 2;
                     int mx                = (scale * mv_col[0] + 128) >> 8;
                     int my                = (scale * my_col    + 128) >> 8;
-                    fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
+                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
                                    pack16to32(mx, my), 4);
-                    fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
+                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
                                    pack16to32(mx - mv_col[0], my - my_col), 4);
                 }
             }
@@ -616,7 +618,7 @@ single_col:
         if (IS_16X16(*mb_type)) {
             int ref, mv0, mv1;
 
-            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
+            fill_rectangle(&sl->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
             if (IS_INTRA(mb_type_col[0])) {
                 ref = mv0 = mv1 = 0;
             } else {
@@ -631,9 +633,9 @@ single_col:
                 mv0      = pack16to32(mv_l0[0], mv_l0[1]);
                 mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
             }
-            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
-            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
-            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
+            fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
+            fill_rectangle(&sl->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
+            fill_rectangle(&sl->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
         } else {
             for (i8 = 0; i8 < 4; i8++) {
                 const int x8 = i8 & 1;
@@ -644,11 +646,11 @@ single_col:
                 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
                     continue;
                 h->sub_mb_type[i8] = sub_mb_type;
-                fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
+                fill_rectangle(&sl->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
                 if (IS_INTRA(mb_type_col[0])) {
-                    fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
-                    fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
-                    fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
+                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
+                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
                     continue;
                 }
 
@@ -662,24 +664,24 @@ single_col:
                 }
                 scale = dist_scale_factor[ref0];
 
-                fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
+                fill_rectangle(&sl->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
                                ref0, 1);
                 if (IS_SUB_8X8(sub_mb_type)) {
                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
                     int mx                = (scale * mv_col[0] + 128) >> 8;
                     int my                = (scale * mv_col[1] + 128) >> 8;
-                    fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
+                    fill_rectangle(&sl->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
                                    pack16to32(mx, my), 4);
-                    fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
+                    fill_rectangle(&sl->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
                                    pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
                 } else {
                     for (i4 = 0; i4 < 4; i4++) {
                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
-                        int16_t *mv_l0 = h->mv_cache[0][scan8[i8 * 4 + i4]];
+                        int16_t *mv_l0 = sl->mv_cache[0][scan8[i8 * 4 + i4]];
                         mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
                         mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
-                        AV_WN32A(h->mv_cache[1][scan8[i8 * 4 + i4]],
+                        AV_WN32A(sl->mv_cache[1][scan8[i8 * 4 + i4]],
                                  pack16to32(mv_l0[0] - mv_col[0],
                                             mv_l0[1] - mv_col[1]));
                     }
@@ -689,10 +691,11 @@ single_col:
     }
 }
 
-void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type)
+void ff_h264_pred_direct_motion(H264Context *const h, H264SliceContext *sl,
+                                int *mb_type)
 {
     if (h->direct_spatial_mv_pred)
-        pred_spatial_direct_motion(h, mb_type);
+        pred_spatial_direct_motion(h, sl, mb_type);
     else
-        pred_temp_direct_motion(h, mb_type);
+        pred_temp_direct_motion(h, sl, mb_type);
 }
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index a33a66f..4d34a29 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -371,7 +371,7 @@ static av_always_inline void h264_filter_mb_fast_internal(H264Context *h,
             int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
             int step =  1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
             edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
-            h->h264dsp.h264_loop_filter_strength(bS, sl->non_zero_count_cache, h->ref_cache, h->mv_cache,
+            h->h264dsp.h264_loop_filter_strength(bS, sl->non_zero_count_cache, sl->ref_cache, sl->mv_cache,
                                               h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE(h));
         }
         if( IS_INTRA(left_type) )
@@ -438,29 +438,30 @@ void ff_h264_filter_mb_fast(H264Context *h, H264SliceContext *sl,
 #endif
 }
 
-static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
+static int check_mv(H264Context *h, H264SliceContext *sl, long b_idx, long bn_idx, int mvy_limit)
+{
     int v;
 
-    v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx];
-    if(!v && h->ref_cache[0][b_idx]!=-1)
-        v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
-           FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
+    v = sl->ref_cache[0][b_idx] != sl->ref_cache[0][bn_idx];
+    if (!v && sl->ref_cache[0][b_idx] != -1)
+        v = sl->mv_cache[0][b_idx][0] - sl->mv_cache[0][bn_idx][0] + 3 >= 7U |
+           FFABS(sl->mv_cache[0][b_idx][1] - sl->mv_cache[0][bn_idx][1]) >= mvy_limit;
 
     if(h->list_count==2){
         if(!v)
-            v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] |
-                h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
-                FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit;
+            v = sl->ref_cache[1][b_idx] != sl->ref_cache[1][bn_idx] |
+                sl->mv_cache[1][b_idx][0] - sl->mv_cache[1][bn_idx][0] + 3 >= 7U |
+                FFABS(sl->mv_cache[1][b_idx][1] - sl->mv_cache[1][bn_idx][1]) >= mvy_limit;
 
         if(v){
-            if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] |
-               h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx])
+            if (sl->ref_cache[0][b_idx] != sl->ref_cache[1][bn_idx] |
+                sl->ref_cache[1][b_idx] != sl->ref_cache[0][bn_idx])
                 return 1;
             return
-                h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U |
-                FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit |
-                h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U |
-                FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit;
+                sl->mv_cache[0][b_idx][0] - sl->mv_cache[1][bn_idx][0] + 3 >= 7U |
+                FFABS(sl->mv_cache[0][b_idx][1] - sl->mv_cache[1][bn_idx][1]) >= mvy_limit |
+                sl->mv_cache[1][b_idx][0] - sl->mv_cache[0][bn_idx][0] + 3 >= 7U |
+                FFABS(sl->mv_cache[1][b_idx][1] - sl->mv_cache[0][bn_idx][1]) >= mvy_limit;
         }
     }
 
@@ -564,7 +565,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, H264SliceContext *sl,
                     int b_idx= 8 + 4;
                     int bn_idx= b_idx - (dir ? 8:1);
 
-                    bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit);
+                    bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, sl, 8 + 4, bn_idx, mvy_limit);
                     mv_done = 1;
                 }
                 else
@@ -582,7 +583,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, H264SliceContext *sl,
                     }
                     else if(!mv_done)
                     {
-                        bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
+                        bS[i] = check_mv(h, sl, b_idx, bn_idx, mvy_limit);
                     }
                 }
             }
@@ -645,7 +646,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, H264SliceContext *sl,
                 int b_idx= 8 + 4 + edge * (dir ? 8:1);
                 int bn_idx= b_idx - (dir ? 8:1);
 
-                bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit);
+                bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, sl, b_idx, bn_idx, mvy_limit);
                 mv_done = 1;
             }
             else
@@ -663,7 +664,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, H264SliceContext *sl,
                 }
                 else if(!mv_done)
                 {
-                    bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
+                    bS[i] = check_mv(h, sl, b_idx, bn_idx, mvy_limit);
                 }
             }
 
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 44dd598..0e48bef 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -36,10 +36,11 @@
 #include "svq3.h"
 #include "thread.h"
 
-static inline int get_lowest_part_list_y(H264Context *h, H264Picture *pic, int n,
+static inline int get_lowest_part_list_y(H264Context *h, H264SliceContext *sl,
+                                         H264Picture *pic, int n,
                                          int height, int y_offset, int list)
 {
-    int raw_my             = h->mv_cache[list][scan8[n]][1];
+    int raw_my             = sl->mv_cache[list][scan8[n]][1];
     int filter_height_up   = (raw_my & 3) ? 2 : 0;
     int filter_height_down = (raw_my & 3) ? 3 : 0;
     int full_my            = (raw_my >> 2) + y_offset;
@@ -49,7 +50,8 @@ static inline int get_lowest_part_list_y(H264Context *h, H264Picture *pic, int n
     return FFMAX(abs(top), bottom);
 }
 
-static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
+static inline void get_lowest_part_y(H264Context *h, H264SliceContext *sl,
+                                     int refs[2][48], int n,
                                      int height, int y_offset, int list0,
                                      int list1, int *nrefs)
 {
@@ -58,7 +60,7 @@ static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
     y_offset += 16 * (h->mb_y >> MB_FIELD(h));
 
     if (list0) {
-        int ref_n = h->ref_cache[0][scan8[n]];
+        int ref_n = sl->ref_cache[0][scan8[n]];
         H264Picture *ref = &h->ref_list[0][ref_n];
 
         // Error resilience puts the current picture in the ref list.
@@ -66,7 +68,7 @@ static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
         // Fields can wait on each other, though.
         if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
             (ref->reference & 3) != h->picture_structure) {
-            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
+            my = get_lowest_part_list_y(h, sl, ref, n, height, y_offset, 0);
             if (refs[0][ref_n] < 0)
                 nrefs[0] += 1;
             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
@@ -74,12 +76,12 @@ static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
     }
 
     if (list1) {
-        int ref_n    = h->ref_cache[1][scan8[n]];
+        int ref_n    = sl->ref_cache[1][scan8[n]];
         H264Picture *ref = &h->ref_list[1][ref_n];
 
         if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
             (ref->reference & 3) != h->picture_structure) {
-            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
+            my = get_lowest_part_list_y(h, sl, ref, n, height, y_offset, 1);
             if (refs[1][ref_n] < 0)
                 nrefs[1] += 1;
             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
@@ -92,7 +94,7 @@ static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
  *
  * @param h the H264 context
  */
-static void await_references(H264Context *h)
+static void await_references(H264Context *h, H264SliceContext *sl)
 {
     const int mb_xy   = h->mb_xy;
     const int mb_type = h->cur_pic.mb_type[mb_xy];
@@ -103,17 +105,17 @@ static void await_references(H264Context *h)
     memset(refs, -1, sizeof(refs));
 
     if (IS_16X16(mb_type)) {
-        get_lowest_part_y(h, refs, 0, 16, 0,
+        get_lowest_part_y(h, sl, refs, 0, 16, 0,
                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
     } else if (IS_16X8(mb_type)) {
-        get_lowest_part_y(h, refs, 0, 8, 0,
+        get_lowest_part_y(h, sl, refs, 0, 8, 0,
                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
-        get_lowest_part_y(h, refs, 8, 8, 8,
+        get_lowest_part_y(h, sl, refs, 8, 8, 8,
                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
     } else if (IS_8X16(mb_type)) {
-        get_lowest_part_y(h, refs, 0, 16, 0,
+        get_lowest_part_y(h, sl, refs, 0, 16, 0,
                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
-        get_lowest_part_y(h, refs, 4, 16, 0,
+        get_lowest_part_y(h, sl, refs, 4, 16, 0,
                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
     } else {
         int i;
@@ -126,25 +128,25 @@ static void await_references(H264Context *h)
             int y_offset          = (i & 2) << 2;
 
             if (IS_SUB_8X8(sub_mb_type)) {
-                get_lowest_part_y(h, refs, n, 8, y_offset,
+                get_lowest_part_y(h, sl, refs, n, 8, y_offset,
                                   IS_DIR(sub_mb_type, 0, 0),
                                   IS_DIR(sub_mb_type, 0, 1),
                                   nrefs);
             } else if (IS_SUB_8X4(sub_mb_type)) {
-                get_lowest_part_y(h, refs, n, 4, y_offset,
+                get_lowest_part_y(h, sl, refs, n, 4, y_offset,
                                   IS_DIR(sub_mb_type, 0, 0),
                                   IS_DIR(sub_mb_type, 0, 1),
                                   nrefs);
-                get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
+                get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4,
                                   IS_DIR(sub_mb_type, 0, 0),
                                   IS_DIR(sub_mb_type, 0, 1),
                                   nrefs);
             } else if (IS_SUB_4X8(sub_mb_type)) {
-                get_lowest_part_y(h, refs, n, 8, y_offset,
+                get_lowest_part_y(h, sl, refs, n, 8, y_offset,
                                   IS_DIR(sub_mb_type, 0, 0),
                                   IS_DIR(sub_mb_type, 0, 1),
                                   nrefs);
-                get_lowest_part_y(h, refs, n + 1, 8, y_offset,
+                get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset,
                                   IS_DIR(sub_mb_type, 0, 0),
                                   IS_DIR(sub_mb_type, 0, 1),
                                   nrefs);
@@ -153,7 +155,7 @@ static void await_references(H264Context *h)
                 assert(IS_SUB_4X4(sub_mb_type));
                 for (j = 0; j < 4; j++) {
                     int sub_y_offset = y_offset + 2 * (j & 2);
-                    get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
+                    get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset,
                                       IS_DIR(sub_mb_type, 0, 0),
                                       IS_DIR(sub_mb_type, 0, 1),
                                       nrefs);
@@ -200,7 +202,8 @@ static void await_references(H264Context *h)
         }
 }
 
-static av_always_inline void mc_dir_part(H264Context *h, H264Picture *pic,
+static av_always_inline void mc_dir_part(H264Context *h, H264SliceContext *sl,
+                                         H264Picture *pic,
                                          int n, int square, int height,
                                          int delta, int list,
                                          uint8_t *dest_y, uint8_t *dest_cb,
@@ -210,8 +213,8 @@ static av_always_inline void mc_dir_part(H264Context *h, H264Picture *pic,
                                          h264_chroma_mc_func chroma_op,
                                          int pixel_shift, int chroma_idc)
 {
-    const int mx      = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
-    int my            = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
+    const int mx      = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
+    int my            = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
     const int luma_xy = (mx & 3) + ((my & 3) << 2);
     ptrdiff_t offset  = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
     uint8_t *src_y    = pic->f.data[0] + offset;
@@ -315,7 +318,8 @@ static av_always_inline void mc_dir_part(H264Context *h, H264Picture *pic,
               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 }
 
-static av_always_inline void mc_part_std(H264Context *h, int n, int square,
+static av_always_inline void mc_part_std(H264Context *h, H264SliceContext *sl,
+                                         int n, int square,
                                          int height, int delta,
                                          uint8_t *dest_y, uint8_t *dest_cb,
                                          uint8_t *dest_cr,
@@ -345,8 +349,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square,
     y_offset += 8 * (h->mb_y >> MB_FIELD(h));
 
     if (list0) {
-        H264Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
-        mc_dir_part(h, ref, n, square, height, delta, 0,
+        H264Picture *ref = &h->ref_list[0][sl->ref_cache[0][scan8[n]]];
+        mc_dir_part(h, sl, ref, n, square, height, delta, 0,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_op, chroma_op, pixel_shift, chroma_idc);
 
@@ -355,8 +359,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square,
     }
 
     if (list1) {
-        H264Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
-        mc_dir_part(h, ref, n, square, height, delta, 1,
+        H264Picture *ref = &h->ref_list[1][sl->ref_cache[1][scan8[n]]];
+        mc_dir_part(h, sl, ref, n, square, height, delta, 1,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_op, chroma_op, pixel_shift, chroma_idc);
     }
@@ -404,14 +408,14 @@ static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *
         uint8_t *tmp_cb = h->bipred_scratchpad;
         uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
         uint8_t *tmp_y  = h->bipred_scratchpad + 16 * h->mb_uvlinesize;
-        int refn0       = h->ref_cache[0][scan8[n]];
-        int refn1       = h->ref_cache[1][scan8[n]];
+        int refn0       = sl->ref_cache[0][scan8[n]];
+        int refn1       = sl->ref_cache[1][scan8[n]];
 
-        mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
+        mc_dir_part(h, sl, &h->ref_list[0][refn0], n, square, height, delta, 0,
                     dest_y, dest_cb, dest_cr,
                     x_offset, y_offset, qpix_put, chroma_put,
                     pixel_shift, chroma_idc);
-        mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
+        mc_dir_part(h, sl, &h->ref_list[1][refn1], n, square, height, delta, 1,
                     tmp_y, tmp_cb, tmp_cr,
                     x_offset, y_offset, qpix_put, chroma_put,
                     pixel_shift, chroma_idc);
@@ -447,9 +451,9 @@ static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *
         }
     } else {
         int list     = list1 ? 1 : 0;
-        int refn     = h->ref_cache[list][scan8[n]];
+        int refn     = sl->ref_cache[list][scan8[n]];
         H264Picture *ref = &h->ref_list[list][refn];
-        mc_dir_part(h, ref, n, square, height, delta, list,
+        mc_dir_part(h, sl, ref, n, square, height, delta, list,
                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
                     qpix_put, chroma_put, pixel_shift, chroma_idc);
 
@@ -470,15 +474,16 @@ static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *
     }
 }
 
-static av_always_inline void prefetch_motion(H264Context *h, int list,
-                                             int pixel_shift, int chroma_idc)
+static av_always_inline void prefetch_motion(H264Context *h, H264SliceContext *sl,
+                                             int list, int pixel_shift,
+                                             int chroma_idc)
 {
     /* fetch pixels for estimated mv 4 macroblocks ahead
      * optimized for 64byte cache lines */
-    const int refn = h->ref_cache[list][scan8[0]];
+    const int refn = sl->ref_cache[list][scan8[0]];
     if (refn >= 0) {
-        const int mx  = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8;
-        const int my  = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y;
+        const int mx  = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8;
+        const int my  = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y;
         uint8_t **src = h->ref_list[list][refn].f.data;
         int off       = (mx << pixel_shift) +
                         (my + (h->mb_x & 3) * 4) * h->mb_linesize +
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index fcd17e8..6235c11 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -81,13 +81,13 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h, H264SliceContext *sl)
                 if (!USES_LIST(mb_type, list))
                     continue;
                 if (IS_16X16(mb_type)) {
-                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    int8_t *ref = &sl->ref_cache[list][scan8[0]];
                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (h->mb_y & 1), 1);
                 } else {
                     for (i = 0; i < 16; i += 4) {
-                        int ref = h->ref_cache[list][scan8[i]];
+                        int ref = sl->ref_cache[list][scan8[i]];
                         if (ref >= 0)
-                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
+                            fill_rectangle(&sl->ref_cache[list][scan8[i]], 2, 2,
                                            8, (16 + ref) ^ (h->mb_y & 1), 1);
                     }
                 }
@@ -306,13 +306,13 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h, H264SliceContext
                 if (!USES_LIST(mb_type, list))
                     continue;
                 if (IS_16X16(mb_type)) {
-                    int8_t *ref = &h->ref_cache[list][scan8[0]];
+                    int8_t *ref = &sl->ref_cache[list][scan8[0]];
                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (h->mb_y & 1), 1);
                 } else {
                     for (i = 0; i < 16; i += 4) {
-                        int ref = h->ref_cache[list][scan8[i]];
+                        int ref = sl->ref_cache[list][scan8[i]];
                         if (ref >= 0)
-                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
+                            fill_rectangle(&sl->ref_cache[list][scan8[i]], 2, 2,
                                            8, (16 + ref) ^ (h->mb_y & 1), 1);
                     }
                 }
diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c
index 0e58eb3..11d7d1f 100644
--- a/libavcodec/h264_mc_template.c
+++ b/libavcodec/h264_mc_template.c
@@ -49,14 +49,14 @@ static void mc_part(H264Context *h, H264SliceContext *sl,
                     int list0, int list1)
 {
     if ((sl->use_weight == 2 && list0 && list1 &&
-         (sl->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
+         (sl->implicit_weight[sl->ref_cache[0][scan8[n]]][sl->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
         sl->use_weight == 1)
         mc_part_weighted(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr,
                          x_offset, y_offset, qpix_put, chroma_put,
                          weight_op[0], weight_op[1], weight_avg[0],
                          weight_avg[1], list0, list1, PIXEL_SHIFT, CHROMA_IDC);
     else
-        mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
+        mc_part_std(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr,
                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
                     chroma_avg, list0, list1, PIXEL_SHIFT, CHROMA_IDC);
 }
@@ -77,8 +77,8 @@ static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl,
     assert(IS_INTER(mb_type));
 
     if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME))
-        await_references(h);
-    prefetch_motion(h, 0, PIXEL_SHIFT, CHROMA_IDC);
+        await_references(h, sl);
+    prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC);
 
     if (IS_16X16(mb_type)) {
         mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
@@ -158,6 +158,6 @@ static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl,
         }
     }
 
-    prefetch_motion(h, 1, PIXEL_SHIFT, CHROMA_IDC);
+    prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
 }
 
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 78810df..3d4ffa0 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -39,7 +39,7 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, H264SliceContext *
                                               const int16_t **C,
                                               int i, int list, int part_width)
 {
-    const int topright_ref = h->ref_cache[list][i - 8 + part_width];
+    const int topright_ref = sl->ref_cache[list][i - 8 + part_width];
 
     /* there is no consistent mapping of mvs to neighboring locations that will
      * make mbaff happy, so we can't move all this logic to fill_caches */
@@ -50,17 +50,17 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, H264SliceContext *
         if (!USES_LIST(mb_type, list))                                  \
             return LIST_NOT_USED;                                       \
         mv = h->cur_pic_ptr->motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \
-        h->mv_cache[list][scan8[0] - 2][0] = mv[0];                     \
-        h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP;               \
+        sl->mv_cache[list][scan8[0] - 2][0] = mv[0];                     \
+        sl->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP;               \
         return h->cur_pic_ptr->ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP;
 
         if (topright_ref == PART_NOT_AVAILABLE
             && i >= scan8[0] + 8 && (i & 7) == 4
-            && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) {
+            && sl->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) {
             const uint32_t *mb_types = h->cur_pic_ptr->mb_type;
             const int16_t *mv;
-            AV_ZERO32(h->mv_cache[list][scan8[0] - 2]);
-            *C = h->mv_cache[list][scan8[0] - 2];
+            AV_ZERO32(sl->mv_cache[list][scan8[0] - 2]);
+            *C = sl->mv_cache[list][scan8[0] - 2];
 
             if (!MB_FIELD(h) && IS_INTERLACED(sl->left_type[0])) {
                 SET_DIAG_MV(* 2, >> 1, sl->left_mb_xy[0] + h->mb_stride,
@@ -75,13 +75,13 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, H264SliceContext *
     }
 
     if (topright_ref != PART_NOT_AVAILABLE) {
-        *C = h->mv_cache[list][i - 8 + part_width];
+        *C = sl->mv_cache[list][i - 8 + part_width];
         return topright_ref;
     } else {
         tprintf(h->avctx, "topright MV not available\n");
 
-        *C = h->mv_cache[list][i - 8 - 1];
-        return h->ref_cache[list][i - 8 - 1];
+        *C = sl->mv_cache[list][i - 8 - 1];
+        return sl->ref_cache[list][i - 8 - 1];
     }
 }
 
@@ -99,10 +99,10 @@ static av_always_inline void pred_motion(H264Context *const h,
                                          int *const mx, int *const my)
 {
     const int index8       = scan8[n];
-    const int top_ref      = h->ref_cache[list][index8 - 8];
-    const int left_ref     = h->ref_cache[list][index8 - 1];
-    const int16_t *const A = h->mv_cache[list][index8 - 1];
-    const int16_t *const B = h->mv_cache[list][index8 - 8];
+    const int top_ref      = sl->ref_cache[list][index8 - 8];
+    const int left_ref     = sl->ref_cache[list][index8 - 1];
+    const int16_t *const A = sl->mv_cache[list][index8 - 1];
+    const int16_t *const B = sl->mv_cache[list][index8 - 8];
     const int16_t *C;
     int diagonal_ref, match_count;
 
@@ -163,8 +163,8 @@ static av_always_inline void pred_16x8_motion(H264Context *const h,
                                               int *const mx, int *const my)
 {
     if (n == 0) {
-        const int top_ref      = h->ref_cache[list][scan8[0] - 8];
-        const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
+        const int top_ref      = sl->ref_cache[list][scan8[0] - 8];
+        const int16_t *const B = sl->mv_cache[list][scan8[0] - 8];
 
         tprintf(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
                 top_ref, B[0], B[1], h->mb_x, h->mb_y, n, list);
@@ -175,8 +175,8 @@ static av_always_inline void pred_16x8_motion(H264Context *const h,
             return;
         }
     } else {
-        const int left_ref     = h->ref_cache[list][scan8[8] - 1];
-        const int16_t *const A = h->mv_cache[list][scan8[8] - 1];
+        const int left_ref     = sl->ref_cache[list][scan8[8] - 1];
+        const int16_t *const A = sl->mv_cache[list][scan8[8] - 1];
 
         tprintf(h->avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
                 left_ref, A[0], A[1], h->mb_x, h->mb_y, n, list);
@@ -204,8 +204,8 @@ static av_always_inline void pred_8x16_motion(H264Context *const h,
                                               int *const mx, int *const my)
 {
     if (n == 0) {
-        const int left_ref     = h->ref_cache[list][scan8[0] - 1];
-        const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
+        const int left_ref     = sl->ref_cache[list][scan8[0] - 1];
+        const int16_t *const A = sl->mv_cache[list][scan8[0] - 1];
 
         tprintf(h->avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
                 left_ref, A[0], A[1], h->mb_x, h->mb_y, n, list);
@@ -265,7 +265,7 @@ static av_always_inline void pred_pskip_motion(H264Context *const h,
     const int16_t *A, *B, *C;
     int b_stride = h->b_stride;
 
-    fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
+    fill_rectangle(&sl->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
 
     /* To avoid doing an entire fill_decode_caches, we inline the relevant
      * parts here.
@@ -345,11 +345,11 @@ static av_always_inline void pred_pskip_motion(H264Context *const h,
         my = mid_pred(A[1], B[1], C[1]);
     }
 
-    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4);
+    fill_rectangle(sl->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4);
     return;
 
 zeromv:
-    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
+    fill_rectangle(sl->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
     return;
 }
 
@@ -607,9 +607,9 @@ static void fill_decode_caches(H264Context *h, H264SliceContext *sl, int mb_type
         int list;
         int b_stride = h->b_stride;
         for (list = 0; list < h->list_count; list++) {
-            int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+            int8_t *ref_cache = &sl->ref_cache[list][scan8[0]];
             int8_t *ref       = h->cur_pic.ref_index[list];
-            int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
+            int16_t(*mv_cache)[2] = &sl->mv_cache[list][scan8[0]];
             int16_t(*mv)[2]       = h->cur_pic.motion_val[list];
             if (!USES_LIST(mb_type, list))
                 continue;
@@ -770,9 +770,9 @@ static void fill_decode_caches(H264Context *h, H264SliceContext *sl, int mb_type
                 if (MB_FIELD(h)) {
 
 #define MAP_F2F(idx, mb_type)                                           \
-    if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {      \
-        h->ref_cache[list][idx]    <<= 1;                               \
-        h->mv_cache[list][idx][1]   /= 2;                               \
+    if (!IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) {     \
+        sl->ref_cache[list][idx]    <<= 1;                              \
+        sl->mv_cache[list][idx][1]   /= 2;                              \
         h->mvd_cache[list][idx][1] >>= 1;                               \
     }
 
@@ -781,9 +781,9 @@ static void fill_decode_caches(H264Context *h, H264SliceContext *sl, int mb_type
 
 #undef MAP_F2F
 #define MAP_F2F(idx, mb_type)                                           \
-    if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {       \
-        h->ref_cache[list][idx]    >>= 1;                               \
-        h->mv_cache[list][idx][1]  <<= 1;                               \
+    if (IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) {      \
+        sl->ref_cache[list][idx]    >>= 1;                              \
+        sl->mv_cache[list][idx][1]  <<= 1;                              \
         h->mvd_cache[list][idx][1] <<= 1;                               \
     }
 
@@ -817,7 +817,7 @@ static void av_unused decode_mb_skip(H264Context *h, H264SliceContext *sl)
             fill_decode_neighbors(h, sl, mb_type);
             fill_decode_caches(h, sl, mb_type); //FIXME check what is needed and what not ...
         }
-        ff_h264_pred_direct_motion(h, &mb_type);
+        ff_h264_pred_direct_motion(h, sl, &mb_type);
         mb_type |= MB_TYPE_SKIP;
     } else {
         mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP;
@@ -826,7 +826,7 @@ static void av_unused decode_mb_skip(H264Context *h, H264SliceContext *sl)
         pred_pskip_motion(h, sl);
     }
 
-    write_back_motion(h, mb_type);
+    write_back_motion(h, sl, mb_type);
     h->cur_pic.mb_type[mb_xy]      = mb_type;
     h->cur_pic.qscale_table[mb_xy] = sl->qscale;
     h->slice_table[mb_xy]            = h->slice_num;
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 11dc984..9255389 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1822,6 +1822,7 @@ int ff_h264_get_slice_type(const H264Context *h)
 }
 
 static av_always_inline void fill_filter_caches_inter(H264Context *h,
+                                                      H264SliceContext *sl,
                                                       int mb_type, int top_xy,
                                                       int left_xy[LEFT_MBS],
                                                       int top_type,
@@ -1829,8 +1830,8 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
                                                       int mb_xy, int list)
 {
     int b_stride = h->b_stride;
-    int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
-    int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
+    int16_t(*mv_dst)[2] = &sl->mv_cache[list][scan8[0]];
+    int8_t *ref_cache   = &sl->ref_cache[list][scan8[0]];
     if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
         if (USES_LIST(top_type, list)) {
             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
@@ -1979,10 +1980,10 @@ static int fill_filter_caches(H264Context *h, H264SliceContext *sl, int mb_type)
     if (IS_INTRA(mb_type))
         return 0;
 
-    fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
+    fill_filter_caches_inter(h, sl, mb_type, top_xy, left_xy,
                              top_type, left_type, mb_xy, 0);
     if (h->list_count == 2)
-        fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
+        fill_filter_caches_inter(h, sl, mb_type, top_xy, left_xy,
                                  top_type, left_type, mb_xy, 1);
 
     nnz       = h->non_zero_count[mb_xy];
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 499c35d..163a49e 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -459,15 +459,15 @@ static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
                 int32_t mv = pack16to32(mx, my);
 
                 if (part_height == 8 && i < 8) {
-                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1 * 8], mv);
+                    AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 * 8], mv);
 
                     if (part_width == 8 && j < 8)
-                        AV_WN32A(h->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
+                        AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
                 }
                 if (part_width == 8 && j < 8)
-                    AV_WN32A(h->mv_cache[dir][scan8[k] + 1], mv);
+                    AV_WN32A(sl->mv_cache[dir][scan8[k] + 1], mv);
                 if (part_width == 4 || part_height == 4)
-                    AV_WN32A(h->mv_cache[dir][scan8[k]], mv);
+                    AV_WN32A(sl->mv_cache[dir][scan8[k]], mv);
             }
 
             /* write back motion vectors */
@@ -535,36 +535,36 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
         for (m = 0; m < 2; m++) {
             if (h->mb_x > 0 && sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
                 for (i = 0; i < 4; i++)
-                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i * 8],
+                    AV_COPY32(sl->mv_cache[m][scan8[0] - 1 + i * 8],
                               h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
             } else {
                 for (i = 0; i < 4; i++)
-                    AV_ZERO32(h->mv_cache[m][scan8[0] - 1 + i * 8]);
+                    AV_ZERO32(sl->mv_cache[m][scan8[0] - 1 + i * 8]);
             }
             if (h->mb_y > 0) {
-                memcpy(h->mv_cache[m][scan8[0] - 1 * 8],
+                memcpy(sl->mv_cache[m][scan8[0] - 1 * 8],
                        h->cur_pic.motion_val[m][b_xy - h->b_stride],
                        4 * 2 * sizeof(int16_t));
-                memset(&h->ref_cache[m][scan8[0] - 1 * 8],
+                memset(&sl->ref_cache[m][scan8[0] - 1 * 8],
                        (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
 
                 if (h->mb_x < h->mb_width - 1) {
-                    AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1 * 8],
+                    AV_COPY32(sl->mv_cache[m][scan8[0] + 4 - 1 * 8],
                               h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
-                    h->ref_cache[m][scan8[0] + 4 - 1 * 8] =
+                    sl->ref_cache[m][scan8[0] + 4 - 1 * 8] =
                         (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
                          sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
                 } else
-                    h->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
+                    sl->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
                 if (h->mb_x > 0) {
-                    AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1 * 8],
+                    AV_COPY32(sl->mv_cache[m][scan8[0] - 1 - 1 * 8],
                               h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
-                    h->ref_cache[m][scan8[0] - 1 - 1 * 8] =
+                    sl->ref_cache[m][scan8[0] - 1 - 1 * 8] =
                         (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
                 } else
-                    h->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
+                    sl->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
             } else
-                memset(&h->ref_cache[m][scan8[0] - 1 * 8 - 1],
+                memset(&sl->ref_cache[m][scan8[0] - 1 * 8 - 1],
                        PART_NOT_AVAILABLE, 8);
 
             if (h->pict_type != AV_PICTURE_TYPE_B)
@@ -1111,6 +1111,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
     const uint8_t *buf = avpkt->data;
     SVQ3Context *s     = avctx->priv_data;
     H264Context *h     = &s->h;
+    H264SliceContext *sl = &h->slice_ctx[0];
     int buf_size       = avpkt->size;
     int ret, m, i;
 
@@ -1235,9 +1236,9 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
         for (i = 0; i < 4; i++) {
             int j;
             for (j = -1; j < 4; j++)
-                h->ref_cache[m][scan8[0] + 8 * i + j] = 1;
+                sl->ref_cache[m][scan8[0] + 8 * i + j] = 1;
             if (i < 3)
-                h->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
+                sl->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
         }
     }