[FFmpeg-devel] [PATCH] h264: don't touch H264Context->ref_count[] during MB decoding.

Clément Bœsch ubitux at gmail.com
Thu Oct 4 09:09:22 CEST 2012


On Thu, Oct 04, 2012 at 03:56:12AM +0200, Michael Niedermayer wrote:
> On Wed, Oct 03, 2012 at 04:25:14PM -0700, Ronald S. Bultje wrote:
> > From: "Ronald S. Bultje" <rsbultje at gmail.com>
> > 
> > The variable is copied to subsequent threads at the same time, so this
> > may cause wrong ref_count[] values to be copied to subsequent threads.
> > 
> > This bug was found using TSAN.
> > ---
> >  libavcodec/h264_cabac.c |   41 ++++++++++++++++-------------------------
> >  libavcodec/h264_cavlc.c |   33 +++++++++++++--------------------
> >  2 files changed, 29 insertions(+), 45 deletions(-)
> > 
> > diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
> > index 7e8947b..97e8128 100644
> > --- a/libavcodec/h264_cabac.c
> > +++ b/libavcodec/h264_cabac.c
> > @@ -2003,11 +2003,6 @@ decode_intra_mb:
> >          return 0;
> >      }
> >  
> > -    if(MB_MBAFF){
> > -        h->ref_count[0] <<= 1;
> > -        h->ref_count[1] <<= 1;
> > -    }
> > -
> >      fill_decode_caches(h, mb_type);
> >  
> >      if( IS_INTRA( mb_type ) ) {
> 
> i suggest to add a priv/local/tls/whatever_ref_count[2]
> and set/change it as ref_count is before your patch
> that way the shift doesnt need to be done multiple times in the
> more inner loops. This should also keep the code smaller and reduce
> pressure on the code cache
> 

Like attached?

-- 
Clément B.
-------------- next part --------------
From a96e345826b53bf93c6a7c99dc46fd44417679a0 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje at gmail.com>
Date: Thu, 4 Oct 2012 09:01:45 +0200
Subject: [PATCH] lavc/h264: don't touch H264Context->ref_count[] during MB
 decoding.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The variable is copied to subsequent threads at the same time, so this
may cause wrong ref_count[] values to be copied to subsequent threads.

This bug was found using TSAN and Helgrind.

Original patch by Ronald, adapted with a local_ref_count by Clément,
following the suggestion of Michael Niedermayer.

Signed-off-by: Clément Bœsch <clement.boesch at smartjog.com>
---
 libavcodec/h264_cabac.c |   36 +++++++++++++++++-------------------
 libavcodec/h264_cavlc.c |   32 +++++++++++++++-----------------
 2 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index a064292..def3624 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1864,6 +1864,9 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int dct8x8_allowed= h->pps.transform_8x8_mode;
     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
+    unsigned local_ref_count[2];
+
+    memcpy(local_ref_count, h->ref_count, sizeof(local_ref_count));
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
 
@@ -2005,8 +2008,8 @@ decode_intra_mb:
     }
 
     if(MB_MBAFF){
-        h->ref_count[0] <<= 1;
-        h->ref_count[1] <<= 1;
+        local_ref_count[0] <<= 1;
+        local_ref_count[1] <<= 1;
     }
 
     fill_decode_caches(h, mb_type);
@@ -2077,10 +2080,10 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
-                        if( h->ref_count[list] > 1 ){
+                        if (local_ref_count[list] > 1) {
                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
-                            if(ref[list][i] >= (unsigned)h->ref_count[list]){
-                                av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
+                            if (ref[list][i] >= (unsigned)local_ref_count[list]) {
+                                av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], local_ref_count[list]);
                                 return -1;
                             }
                         }else
@@ -2163,10 +2166,10 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
                     int ref;
-                    if(h->ref_count[list] > 1){
+                    if (local_ref_count[list] > 1) {
                         ref= decode_cabac_mb_ref(h, list, 0);
-                        if(ref >= (unsigned)h->ref_count[list]){
-                            av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+                        if (ref >= (unsigned)local_ref_count[list]) {
+                            av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                             return -1;
                         }
                     }else
@@ -2191,10 +2194,10 @@ decode_intra_mb:
                     for(i=0; i<2; i++){
                         if(IS_DIR(mb_type, i, list)){
                             int ref;
-                            if(h->ref_count[list] > 1){
+                            if (local_ref_count[list] > 1) {
                                 ref= decode_cabac_mb_ref( h, list, 8*i );
-                                if(ref >= (unsigned)h->ref_count[list]){
-                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+                                if (ref >= (unsigned)local_ref_count[list]) {
+                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                                     return -1;
                                 }
                             }else
@@ -2226,10 +2229,10 @@ decode_intra_mb:
                     for(i=0; i<2; i++){
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
                             int ref;
-                            if(h->ref_count[list] > 1){
+                            if (local_ref_count[list] > 1) {
                                 ref= decode_cabac_mb_ref( h, list, 4*i );
-                                if(ref >= (unsigned)h->ref_count[list]){
-                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+                                if (ref >= (unsigned)local_ref_count[list]) {
+                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                                     return -1;
                                 }
                             }else
@@ -2402,10 +2405,5 @@ decode_intra_mb:
     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
     write_back_non_zero_count(h);
 
-    if(MB_MBAFF){
-        h->ref_count[0] >>= 1;
-        h->ref_count[1] >>= 1;
-    }
-
     return 0;
 }
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index f5d403b..00d83be 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -700,6 +700,9 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
     int dct8x8_allowed= h->pps.transform_8x8_mode;
     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
+    unsigned local_ref_count[2];
+
+    memcpy(local_ref_count, h->ref_count, sizeof(local_ref_count));
 
     mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
 
@@ -786,8 +789,8 @@ decode_intra_mb:
     }
 
     if(MB_MBAFF){
-        h->ref_count[0] <<= 1;
-        h->ref_count[1] <<= 1;
+        local_ref_count[0] <<= 1;
+        local_ref_count[1] <<= 1;
     }
 
     fill_decode_neighbors(h, mb_type);
@@ -869,7 +872,7 @@ decode_intra_mb:
         }
 
         for(list=0; list<h->list_count; list++){
-            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
+            int ref_count= IS_REF0(mb_type) ? 1 : local_ref_count[list];
             for(i=0; i<4; i++){
                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
@@ -949,13 +952,13 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                     unsigned int val;
                     if(IS_DIR(mb_type, 0, list)){
-                        if(h->ref_count[list]==1){
+                        if(local_ref_count[list]==1){
                             val= 0;
-                        }else if(h->ref_count[list]==2){
+                        }else if(local_ref_count[list]==2){
                             val= get_bits1(&s->gb)^1;
                         }else{
                             val= get_ue_golomb_31(&s->gb);
-                            if(val >= h->ref_count[list]){
+                            if(val >= local_ref_count[list]){
                                 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                 return -1;
                             }
@@ -979,13 +982,13 @@ decode_intra_mb:
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){
-                            if(h->ref_count[list] == 1){
+                            if(local_ref_count[list] == 1){
                                 val= 0;
-                            }else if(h->ref_count[list] == 2){
+                            }else if(local_ref_count[list] == 2){
                                 val= get_bits1(&s->gb)^1;
                             }else{
                                 val= get_ue_golomb_31(&s->gb);
-                                if(val >= h->ref_count[list]){
+                                if(val >= local_ref_count[list]){
                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -1016,13 +1019,13 @@ decode_intra_mb:
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
-                            if(h->ref_count[list]==1){
+                            if(local_ref_count[list]==1){
                                 val= 0;
-                            }else if(h->ref_count[list]==2){
+                            }else if(local_ref_count[list]==2){
                                 val= get_bits1(&s->gb)^1;
                             }else{
                                 val= get_ue_golomb_31(&s->gb);
-                                if(val >= h->ref_count[list]){
+                                if(val >= local_ref_count[list]){
                                     av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -1162,10 +1165,5 @@ decode_intra_mb:
     s->current_picture.f.qscale_table[mb_xy] = s->qscale;
     write_back_non_zero_count(h);
 
-    if(MB_MBAFF){
-        h->ref_count[0] >>= 1;
-        h->ref_count[1] >>= 1;
-    }
-
     return 0;
 }
-- 
1.7.10.4

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20121004/01d0f070/attachment.asc>


More information about the ffmpeg-devel mailing list