[FFmpeg-cvslog] r24668 - in trunk/libavcodec: vp8.c vp8dsp.c x86/vp8dsp-init.c x86/vp8dsp.asm

darkshikari subversion
Mon Aug 2 22:18:09 CEST 2010


Author: darkshikari
Date: Mon Aug  2 22:18:09 2010
New Revision: 24668

Log:
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.

Modified:
   trunk/libavcodec/vp8.c
   trunk/libavcodec/vp8dsp.c
   trunk/libavcodec/x86/vp8dsp-init.c
   trunk/libavcodec/x86/vp8dsp.asm

Modified: trunk/libavcodec/vp8.c
==============================================================================
--- trunk/libavcodec/vp8.c	Mon Aug  2 21:44:58 2010	(r24667)
+++ trunk/libavcodec/vp8.c	Mon Aug  2 22:18:09 2010	(r24668)
@@ -117,6 +117,7 @@ typedef struct {
      */
     DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
     DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
     uint8_t intra4x4_pred_mode_mb[16];
 
     int chroma_pred_mode;    ///< 8x8c pred mode of the current macroblock
@@ -864,22 +865,19 @@ static av_always_inline
 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
                       uint8_t t_nnz[9], uint8_t l_nnz[9])
 {
-    LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
     int i, x, y, luma_start = 0, luma_ctx = 3;
     int nnz_pred, nnz, nnz_total = 0;
     int segment = s->segment;
 
     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
-        AV_ZERO128(dc);
-        AV_ZERO128(dc+8);
         nnz_pred = t_nnz[8] + l_nnz[8];
 
         // decode DC values and do hadamard
-        nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred,
+        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
                                   s->qmat[segment].luma_dc_qmul);
         l_nnz[8] = t_nnz[8] = !!nnz;
         nnz_total += nnz;
-        s->vp8dsp.vp8_luma_dc_wht(s->block, dc);
+        s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
         luma_start = 1;
         luma_ctx = 0;
     }

Modified: trunk/libavcodec/vp8dsp.c
==============================================================================
--- trunk/libavcodec/vp8dsp.c	Mon Aug  2 21:44:58 2010	(r24667)
+++ trunk/libavcodec/vp8dsp.c	Mon Aug  2 22:18:09 2010	(r24668)
@@ -46,6 +46,10 @@ static void vp8_luma_dc_wht_c(DCTELEM bl
         t1 = dc[i*4+1] + dc[i*4+2];
         t2 = dc[i*4+1] - dc[i*4+2];
         t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
+        dc[i*4+0] = 0;
+        dc[i*4+1] = 0;
+        dc[i*4+2] = 0;
+        dc[i*4+3] = 0;
 
         *block[i][0] = (t0 + t1) >> 3;
         *block[i][1] = (t3 + t2) >> 3;

Modified: trunk/libavcodec/x86/vp8dsp-init.c
==============================================================================
--- trunk/libavcodec/x86/vp8dsp-init.c	Mon Aug  2 21:44:58 2010	(r24667)
+++ trunk/libavcodec/x86/vp8dsp-init.c	Mon Aug  2 22:18:09 2010	(r24668)
@@ -224,6 +224,7 @@ extern void ff_vp8_idct_dc_add4y_mmx(uin
 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
+extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
 
@@ -335,6 +336,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPCo
 
     if (mm_flags & FF_MM_SSE) {
         c->vp8_idct_add                         = ff_vp8_idct_add_sse;
+        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
         c->put_vp8_epel_pixels_tab[0][0][0]     =
         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     }

Modified: trunk/libavcodec/x86/vp8dsp.asm
==============================================================================
--- trunk/libavcodec/x86/vp8dsp.asm	Mon Aug  2 21:44:58 2010	(r24667)
+++ trunk/libavcodec/x86/vp8dsp.asm	Mon Aug  2 22:18:09 2010	(r24668)
@@ -1186,12 +1186,23 @@ VP8_IDCT_ADD sse
     SWAP %1, %4, %3
 %endmacro
 
-INIT_MMX
-cglobal vp8_luma_dc_wht_mmx, 2,3
+%macro VP8_DC_WHT 1
+cglobal vp8_luma_dc_wht_%1, 2,3
     movq          m0, [r1]
     movq          m1, [r1+8]
     movq          m2, [r1+16]
     movq          m3, [r1+24]
+%ifidn %1, sse
+    xorps      xmm0, xmm0
+    movaps  [r1+ 0], xmm0
+    movaps  [r1+16], xmm0
+%else
+    pxor         m4, m4
+    movq    [r1+ 0], m4
+    movq    [r1+ 8], m4
+    movq    [r1+16], m4
+    movq    [r1+24], m4
+%endif
     HADAMARD4_1D  0, 1, 2, 3
     TRANSPOSE4x4W 0, 1, 2, 3, 4
     paddw         m0, [pw_3]
@@ -1203,6 +1214,11 @@ cglobal vp8_luma_dc_wht_mmx, 2,3
     SCATTER_WHT   0, 1, 0
     SCATTER_WHT   2, 3, 2
     RET
+%endmacro
+
+INIT_MMX
+VP8_DC_WHT mmx
+VP8_DC_WHT sse
 
 ;-----------------------------------------------------------------------------
 ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);



More information about the ffmpeg-cvslog mailing list