[FFmpeg-soc] [soc]: r898 - dirac/libavcodec/dirac.c

marco subversion at mplayerhq.hu
Thu Aug 16 22:50:19 CEST 2007


Author: marco
Date: Thu Aug 16 22:50:19 2007
New Revision: 898

Log:
merge qpel/eighthpel code with the motion_comp_2ref code to optimize border conditions and make further optimizations possible

Modified:
   dirac/libavcodec/dirac.c

Modified: dirac/libavcodec/dirac.c
==============================================================================
--- dirac/libavcodec/dirac.c	(original)
+++ dirac/libavcodec/dirac.c	Thu Aug 16 22:50:19 2007
@@ -2299,10 +2299,16 @@ static void motion_comp_block2refs(Dirac
                                    int comp) {
     int x, y;
     int16_t *line;
+    uint8_t *refline1;
+    uint8_t *refline2;
     int px1, py1;
     int px2, py2;
     int vect1[2];
     int vect2[2];
+    int refxstart1, refystart1;
+    int refxstart2, refystart2;
+
+START_TIMER
 
     vect1[0] = currblock->vect[0][0];
     vect1[1] = currblock->vect[0][1];
@@ -2316,12 +2322,35 @@ static void motion_comp_block2refs(Dirac
         vect2[1] >>= s->chroma_vshift;
     }
 
+    if (s->frame_decoding.mv_precision > 0) {
+        refxstart1   = (xstart << s->frame_decoding.mv_precision) + vect1[0];
+        refxstart1 >>= s->frame_decoding.mv_precision - 1;
+        refystart1   = (ystart << s->frame_decoding.mv_precision) + vect1[1];
+        refystart1 >>= s->frame_decoding.mv_precision - 1;
+        refxstart2   = (xstart << s->frame_decoding.mv_precision) + vect2[0];
+        refxstart2 >>= s->frame_decoding.mv_precision - 1;
+        refystart2   = (ystart << s->frame_decoding.mv_precision) + vect2[1];
+        refystart2 >>= s->frame_decoding.mv_precision - 1;
+    } else {
+        refxstart1 = (xstart + vect1[0]) << 1;
+        refxstart1 >>= s->frame_decoding.mv_precision - 1;
+        refystart1 = (ystart + vect1[1]) << 1;
+        refystart1 >>= s->frame_decoding.mv_precision - 1;
+        refxstart2 = (xstart + vect2[0]) << 1;
+        refxstart2 >>= s->frame_decoding.mv_precision - 1;
+        refystart2 = (ystart + vect2[1]) << 1;
+        refystart2 >>= s->frame_decoding.mv_precision - 1;
+    }
+
     line = &coeffs[s->width * ystart];
+    refline1 = &ref1[refystart1 * s->refwidth];
+    refline2 = &ref2[refystart2 * s->refwidth];
     for (y = ystart; y < ystop; y++) {
         for (x = xstart; x < xstop; x++) {
             int val1 = 0;
             int val2 = 0;
             int val = 0;
+            int hx1, hy1, hx2, hy2;
 
             if (s->frame_decoding.mv_precision > 0) {
                 px1 = (x << s->frame_decoding.mv_precision) + vect1[0];
@@ -2335,14 +2364,73 @@ static void motion_comp_block2refs(Dirac
                 py2 = (y + vect2[1]) << 1;
             }
 
-            val1 = upconvert(s, ref1, s->refwidth, s->refheight,
-                             px1, py1, comp);
-            val1 *= s->frame_decoding.picture_weight_ref1;
+            hx1 = px1 >> (s->frame_decoding.mv_precision - 1);
+            hy1 = py1 >> (s->frame_decoding.mv_precision - 1);
+            hx2 = px2 >> (s->frame_decoding.mv_precision - 1);
+            hy2 = py2 >> (s->frame_decoding.mv_precision - 1);
 
-            val2 = upconvert(s, ref2, s->refwidth, s->refheight,
-                             px2, py2, comp);
-            val2 *= s->frame_decoding.picture_weight_ref2;
+            if (s->frame_decoding.mv_precision == 0
+                || s->frame_decoding.mv_precision == 1) {
+                /* XXX: check this.  */
+                val1 = get_halfpel(ref1, s->refwidth, s->refheight, x, y);
+                val2 = get_halfpel(ref2, s->refwidth, s->refheight, x, y);
+            } else {
+                int w00,  w01,  w10,  w11;
+                int w002, w012, w102, w112;
+                int rx1, ry1, rx2, ry2;
+
+                rx1 = px1 - (hx1 << (s->frame_decoding.mv_precision - 1));
+                ry1 = py1 - (hy1 << (s->frame_decoding.mv_precision - 1));
+                rx2 = px2 - (hx2 << (s->frame_decoding.mv_precision - 1));
+                ry2 = py2 - (hy2 << (s->frame_decoding.mv_precision - 1));
+
+                w00  = ((1 << (s->frame_decoding.mv_precision - 1)) - ry1)
+                     * ((1 << (s->frame_decoding.mv_precision - 1)) - rx1);
+                w01  = ((1 << (s->frame_decoding.mv_precision - 1)) - ry1) * rx1;
+                w10  = ((1 << (s->frame_decoding.mv_precision - 1)) - rx1) * ry1;
+                w11  = ry1 * rx1;
+
+                w002 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry2)
+                    * ((1 << (s->frame_decoding.mv_precision - 1)) - rx2);
+                w012 = ((1 << (s->frame_decoding.mv_precision - 1)) - ry2) * rx2;
+                w102 = ((1 << (s->frame_decoding.mv_precision - 1)) - rx2) * ry2;
+                w112 = ry2 * rx2;
+
+                /* For val1.  */
+                if (hx1 > 0 && hy1 > 0 && hx1 < (s->refwidth - 1) && hy1 < (s->refheight - 11)) {
+                    val1 += w00 * refline1[hx1                  ];
+                    val1 += w01 * refline1[hx1               + 1];
+                    val1 += w10 * refline1[hx1 + s->refwidth    ];
+                    val1 += w11 * refline1[hx1 + s->refwidth + 1];
+                } else {
+                    /* Border condition, keep using the slower code.  */
+                    val1 += w00 * get_halfpel(ref1, s->refwidth, s->refheight, hx1    , hy1    );
+                    val1 += w01 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1    );
+                    val1 += w10 * get_halfpel(ref1, s->refwidth, s->refheight, hx1    , hy1 + 1);
+                    val1 += w11 * get_halfpel(ref1, s->refwidth, s->refheight, hx1 + 1, hy1 + 1);
+                }
+                val1 += 1 << (s->frame_decoding.mv_precision - 1);
+                val1 >>= s->frame_decoding.mv_precision;
+
+                /* For val2.  */
+                if (hx2 > 0 && hy2 > 0 && hx2 < (s->refwidth - 1) && hy2 < (s->refheight - 11)) {
+                    val2 += w00 * refline2[hx2                  ];
+                    val2 += w01 * refline2[hx2               + 1];
+                    val2 += w10 * refline2[hx2 + s->refwidth    ];
+                    val2 += w11 * refline2[hx2 + s->refwidth + 1];
+                } else {
+                    /* Border condition, keep using the slower code.  */
+                    val2 += w002 * get_halfpel(ref2, s->refwidth, s->refheight, hx2    , hy2    );
+                    val2 += w012 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2    );
+                    val2 += w102 * get_halfpel(ref2, s->refwidth, s->refheight, hx2    , hy2 + 1);
+                    val2 += w112 * get_halfpel(ref2, s->refwidth, s->refheight, hx2 + 1, hy2 + 1);
+                }
+                val2 += 1 << (s->frame_decoding.mv_precision - 1);
+                val2 >>= s->frame_decoding.mv_precision;
+            }
 
+            val1 *= s->frame_decoding.picture_weight_ref1;
+            val2 *= s->frame_decoding.picture_weight_ref2;
             val = val1 + val2;
             val = (val
                    * spatial_wt(i, x, s->xbsep, s->xblen,
@@ -2352,8 +2440,12 @@ static void motion_comp_block2refs(Dirac
 
             line[x] += val;
         }
+        refline1 += s->refwidth << 1;
+        refline2 += s->refwidth << 1;
         line += s->width;
     }
+
+STOP_TIMER("two_refframes");
 }
 
 /**



More information about the FFmpeg-soc mailing list