[FFmpeg-cvslog] r26138 - in trunk/libavcodec/x86: h264_intrapred.asm h264_intrapred_init.c

rbultje subversion
Wed Dec 29 19:33:10 CET 2010


Author: rbultje
Date: Wed Dec 29 19:33:10 2010
New Revision: 26138

Log:
Port pred8x8l_dc_mmx/ssse3 (H.264 intra prediction) from x264 to FFmpeg.
Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser
<darkshikari gmail com> (approves LGPL relicensing for this code) and Loren
Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing for
this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as
part of Google's GCI 2010.

Modified:
   trunk/libavcodec/x86/h264_intrapred.asm
   trunk/libavcodec/x86/h264_intrapred_init.c

Modified: trunk/libavcodec/x86/h264_intrapred.asm
==============================================================================
--- trunk/libavcodec/x86/h264_intrapred.asm	Wed Dec 29 19:11:27 2010	(r26137)
+++ trunk/libavcodec/x86/h264_intrapred.asm	Wed Dec 29 19:33:10 2010	(r26138)
@@ -43,6 +43,7 @@ cextern pb_1
 cextern pb_3
 cextern pw_4
 cextern pw_5
+cextern pw_8
 cextern pw_16
 cextern pw_17
 cextern pw_32
@@ -1139,6 +1140,110 @@ PRED8x8L_TOP_DC ssse3
 %endif
 
 ;-----------------------------------------------------------------------------
+;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%ifdef CONFIG_GPL
+%macro PRED8x8L_DC 1
+cglobal pred8x8l_dc_%1, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1, r1
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test         r2, r2
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .body
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test         r1, r1
+    jz .fix_lt_2
+    test         r2, r2
+    jz .fix_tr_1
+.body
+    lea          r1, [r0+r3*2]
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    pxor        mm0, mm0
+    pxor        mm1, mm1
+    lea          r2, [r1+r3*2]
+    psadbw      mm0, mm7
+    psadbw      mm1, mm6
+    paddw       mm0, [pw_8]
+    paddw       mm0, mm1
+    lea          r4, [r2+r3*2]
+    psrlw       mm0, 4
+    pshufw      mm0, mm0, 0
+    packuswb    mm0, mm0
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    movq [r1+r3*1], mm0
+    movq [r1+r3*2], mm0
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm0
+    movq [r4+r3*1], mm0
+    movq [r4+r3*2], mm0
+    RET
+%endmacro
+INIT_MMX
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_DC mmxext
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_DC ssse3
+%endif
+
+;-----------------------------------------------------------------------------
 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
 ;-----------------------------------------------------------------------------
 

Modified: trunk/libavcodec/x86/h264_intrapred_init.c
==============================================================================
--- trunk/libavcodec/x86/h264_intrapred_init.c	Wed Dec 29 19:11:27 2010	(r26137)
+++ trunk/libavcodec/x86/h264_intrapred_init.c	Wed Dec 29 19:33:10 2010	(r26138)
@@ -61,6 +61,8 @@ void ff_pred8x8_tm_vp8_sse2        (uint
 void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
 void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topright, int stride);
 void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
+void ff_pred8x8l_dc_mmxext         (uint8_t *src, int has_topleft, int has_topright, int stride);
+void ff_pred8x8l_dc_ssse3          (uint8_t *src, int has_topleft, int has_topright, int stride);
 void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
@@ -100,6 +102,7 @@ void ff_h264_pred_init_x86(H264PredConte
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
 #if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
+        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_mmxext;
 #endif
         h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
 #if CONFIG_GPL
@@ -155,6 +158,7 @@ void ff_h264_pred_init_x86(H264PredConte
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
 #if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
+        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_ssse3;
 #endif
         if (codec_id == CODEC_ID_VP8) {
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;



More information about the ffmpeg-cvslog mailing list