[FFmpeg-devel] [PATCH] Patch cleanup for MPEG 1 & 2 optimizations

Jeff Downs heydowns
Mon Apr 14 16:53:42 CEST 2008


On Sun, 13 Apr 2008, Jeff Downs wrote:

> MPV_motion_internal is no longer inlined (max-inline-insns-single limit 
> reached).  Trying to increase that limit to double the gcc man page stated 
> default did nothing to help. Limit was still reached.
> 
> Adding av_always_inline to it makes it be inlined. MPV_motion is still not 
> (though I don't know if it is in current svn either).
> This (and fixing the aforementioned typo) gets performance closer to what 
> I posted for the hardcoded version.
> 

OP's patches with the addition of av_always_inline to MPV_motion and 
fixing the typo in the calls to MPV_decode_mb are attached.

Here are benchmarks for these three patches (together).  10 runs each 
on Core 2 duo:

MPEG1 Current SVN:
User: avg: 0.201  stddev: 0.003  med: 0.200
Real: avg: 0.203  stddev: 0.003  med: 0.202

MPEG1 w/ Patches:
User: avg: 0.198  stddev: 0.003  med: 0.197
Real: avg: 0.201  stddev: 0.003  med: 0.200



MPEG2 Current SVN:
User: avg: 3.059  stddev: 0.029  med: 3.050
Real: avg: 3.101  stddev: 0.025  med: 3.096


MPEG2 w/ Patches:
User: avg: 3.012  stddev: 0.023  med: 3.005
Real: avg: 3.056  stddev: 0.026  med: 3.049


	-Jeff
-------------- next part --------------
Index: libavcodec/mpegvideo_common.h
===================================================================
--- libavcodec/mpegvideo_common.h	(revision 12790)
+++ libavcodec/mpegvideo_common.h	(working copy)
@@ -617,12 +635,12 @@
  * @param pic_op qpel motion compensation function (average or put normally)
  * the motion vectors are taken from s->mv and the MV type from s->mv_type
  */
-static inline void MPV_motion(MpegEncContext *s,
+static av_always_inline void MPV_motion_internal(MpegEncContext *s,
                               uint8_t *dest_y, uint8_t *dest_cb,
                               uint8_t *dest_cr, int dir,
                               uint8_t **ref_picture,
                               op_pixels_func (*pix_op)[4],
-                              qpel_mc_func (*qpix_op)[16])
+                              qpel_mc_func (*qpix_op)[16], int is_mpeg12)
 {
     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
     int mb_x, mb_y, i;
@@ -633,7 +651,7 @@
 
     prefetch_motion(s, ref_picture, dir);
 
-    if(s->obmc && s->pict_type != FF_B_TYPE){
+    if(!is_mpeg12 && s->obmc && s->pict_type != FF_B_TYPE){
         int16_t mv_cache[4][4][2];
         const int xy= s->mb_x + s->mb_y*s->mb_stride;
         const int mot_stride= s->b8_stride;
@@ -704,12 +722,12 @@
                 gmc_motion(s, dest_y, dest_cb, dest_cr,
                             ref_picture);
             }
-        }else if(s->quarter_sample){
+        }else if(!is_mpeg12 && s->quarter_sample){
             qpel_motion(s, dest_y, dest_cb, dest_cr,
                         0, 0, 0,
                         ref_picture, pix_op, qpix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
-        }else if(ENABLE_WMV2 && s->mspel){
+        }else if(!is_mpeg12 && ENABLE_WMV2 && s->mspel){
             ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
                         ref_picture, pix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
@@ -722,6 +740,7 @@
         }
         break;
     case MV_TYPE_8X8:
+    if (!is_mpeg12) {
         mx = 0;
         my = 0;
         if(s->quarter_sample){
@@ -775,10 +794,11 @@
 
         if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
             chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+    }
         break;
     case MV_TYPE_FIELD:
         if (s->picture_structure == PICT_FRAME) {
-            if(s->quarter_sample){
+            if(!is_mpeg12 && s->quarter_sample){
                 for(i=0; i<2; i++){
                     qpel_motion(s, dest_y, dest_cb, dest_cr,
                                 1, i, s->field_select[dir][i],
@@ -862,4 +882,20 @@
     }
 }
 
+static inline void MPV_motion(MpegEncContext *s,
+                              uint8_t *dest_y, uint8_t *dest_cb,
+                              uint8_t *dest_cr, int dir,
+                              uint8_t **ref_picture,
+                              op_pixels_func (*pix_op)[4],
+                              qpel_mc_func (*qpix_op)[16])
+{
+#ifndef CONFIG_SMALL
+    if(s->out_format == FMT_MPEG1)
+        MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
+                            ref_picture, pix_op, qpix_op, 1);
+    else
+#endif
+        MPV_motion_internal(s, dest_y, dest_cb, dest_cr, dir,
+                            ref_picture, pix_op, qpix_op, 0);
+}
 #endif /* FFMPEG_MPEGVIDEO_COMMON_H */
-------------- next part --------------
Index: libavcodec/mpegvideo.c
===================================================================
--- libavcodec/mpegvideo.c	(revision 12790)
+++ libavcodec/mpegvideo.c	(working copy)
@@ -1737,7 +1737,7 @@
  */
 static av_always_inline
 void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
-                            int lowres_flag)
+                            int lowres_flag, int is_mpeg12)
 {
     int mb_x, mb_y;
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -1764,7 +1764,7 @@
 
     /* update DC predictors for P macroblocks */
     if (!s->mb_intra) {
-        if (s->h263_pred || s->h263_aic) {
+        if (!is_mpeg12 && (s->h263_pred || s->h263_aic)) {
             if(s->mbintra_table[mb_xy])
                 ff_clean_intra_table_entries(s);
         } else {
@@ -1773,7 +1773,7 @@
             s->last_dc[2] = 128 << s->intra_dc_precision;
         }
     }
-    else if (s->h263_pred || s->h263_aic)
+    else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
         s->mbintra_table[mb_xy]=1;
 
     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==FF_B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
@@ -1888,7 +1888,7 @@
                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
                     }
                 }
-            } else if(s->codec_id != CODEC_ID_WMV2){
+            } else if(is_mpeg12 || (s->codec_id != CODEC_ID_WMV2)){
                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
@@ -1979,8 +1979,14 @@
 }
 
 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
-    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
-    else                  MPV_decode_mb_internal(s, block, 0);
+#ifndef CONFIG_SMALL
+    if(s->out_format == FMT_MPEG1) {
+        if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
+        else                 MPV_decode_mb_internal(s, block, 0, 1);
+    } else
+#endif
+    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
+    else                  MPV_decode_mb_internal(s, block, 0, 0);
 }
 
 /**
-------------- next part --------------
Index: libavcodec/mpegvideo_common.h
===================================================================
--- libavcodec/mpegvideo_common.h	(revision 12790)
+++ libavcodec/mpegvideo_common.h	(working copy)
@@ -237,13 +237,12 @@
     return emu;
 }
 
-/* apply one mpeg motion vector to the three components */
 static av_always_inline
-void mpeg_motion(MpegEncContext *s,
+void mpeg_motion_internal(MpegEncContext *s,
                  uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                  int field_based, int bottom_field, int field_select,
                  uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
-                 int motion_x, int motion_y, int h)
+                 int motion_x, int motion_y, int h, int is_mpeg12)
 {
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int dxy, uvdxy, mx, my, src_x, src_y,
@@ -265,7 +264,7 @@
     src_x = s->mb_x* 16               + (motion_x >> 1);
     src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
 
-    if (s->out_format == FMT_H263) {
+    if (!is_mpeg12 && s->out_format == FMT_H263) {
         if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
             mx = (motion_x>>1)|(motion_x&1);
             my = motion_y >>1;
@@ -277,7 +276,7 @@
             uvsrc_x = src_x>>1;
             uvsrc_y = src_y>>1;
         }
-    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
+    }else if(!is_mpeg12 && s->out_format == FMT_H261){//even chroma mv's are full pel in H261
         mx = motion_x / 4;
         my = motion_y / 4;
         uvdxy = 0;
@@ -312,7 +311,7 @@
 
     if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
        || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
-            if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
+            if(is_mpeg12 || s->codec_id == CODEC_ID_MPEG2VIDEO ||
                s->codec_id == CODEC_ID_MPEG1VIDEO){
                 av_log(s->avctx,AV_LOG_DEBUG,
                         "MPEG motion vector out of boundary\n");
@@ -360,11 +359,30 @@
         pix_op[s->chroma_x_shift][uvdxy]
                 (dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
     }
-    if((ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
+    if(!is_mpeg12 && (ENABLE_H261_ENCODER || ENABLE_H261_DECODER) &&
          s->out_format == FMT_H261){
         ff_h261_loop_filter(s);
     }
 }
+/* apply one mpeg motion vector to the three components */
+static av_always_inline
+void mpeg_motion(MpegEncContext *s,
+                 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
+                 int field_based, int bottom_field, int field_select,
+                 uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
+                 int motion_x, int motion_y, int h)
+{
+#ifndef CONFIG_SMALL
+    if(s->out_format == FMT_MPEG1)
+        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
+                    bottom_field, field_select, ref_picture, pix_op,
+                    motion_x, motion_y, h, 1);
+    else
+#endif
+        mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, field_based,
+                    bottom_field, field_select, ref_picture, pix_op,
+                    motion_x, motion_y, h, 0);
+}
 
 //FIXME move to dsputil, avg variant, 16x16 version
 static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){



More information about the ffmpeg-devel mailing list