[FFmpeg-devel] [PATCH 1/6] x86: huffyuvdsp: port mmx add_bytes to yasm

Christophe Gisquet christophe.gisquet at gmail.com
Thu May 29 11:10:35 CEST 2014


68c to 56c.
---
 libavcodec/x86/huffyuvdsp.asm    | 32 ++++++++++++++++++++++++++++++++
 libavcodec/x86/huffyuvdsp_init.c |  2 +-
 libavcodec/x86/huffyuvdsp_mmx.c  | 32 +-------------------------------
 3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index f183ebe..7acab87 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -163,3 +163,35 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
     ADD_HFYU_LEFT_LOOP 0, 1
 .src_unaligned:
     ADD_HFYU_LEFT_LOOP 0, 0
+
+INIT_MMX mmx
+cglobal add_bytes, 3,4,4, dst, src, w, size
+    mov  sizeq, wq
+    and  sizeq, -2*mmsize
+    jz  .2
+    add  dstq, sizeq
+    add  srcq, sizeq
+    neg  sizeq
+.1:
+    movu   m0, [dstq + sizeq]
+    movu   m1, [srcq + sizeq]
+    movu   m2, [dstq + sizeq + mmsize]
+    movu   m3, [srcq + sizeq + mmsize]
+    paddb  m1, m0
+    paddb  m3, m2
+    movu   [dstq + sizeq], m1
+    movu   [dstq + sizeq + mmsize], m3
+    add    sizeq, 2*mmsize
+    jl .1
+.2:
+    and wq, 2*mmsize-1
+    jz .end
+    add dstq, wq
+    add srcq, wq
+    neg wq
+    mov sizeb, [srcq + wq]
+    add [dstq + wq], sizeb
+    inc wq
+    jmp .2
+.end:
+    REP_RET
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 1efb34d..184c2ce 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -46,7 +46,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
         c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
 #endif
 
-    if (INLINE_MMX(cpu_flags))
+    if (EXTERNAL_MMX(cpu_flags))
         c->add_bytes = ff_add_bytes_mmx;
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
diff --git a/libavcodec/x86/huffyuvdsp_mmx.c b/libavcodec/x86/huffyuvdsp_mmx.c
index 5942210..ee6ec91 100644
--- a/libavcodec/x86/huffyuvdsp_mmx.c
+++ b/libavcodec/x86/huffyuvdsp_mmx.c
@@ -22,9 +22,7 @@
 #include "libavutil/x86/asm.h"
 #include "huffyuvdsp.h"
 
-#if HAVE_INLINE_ASM
-
-#if HAVE_7REGS
+#if HAVE_INLINE_ASM && HAVE_7REGS
 void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
                                   const uint8_t *diff, int w,
                                   int *left, int *left_top)
@@ -61,31 +59,3 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
     *left_top = tl;
 }
 #endif
-
-void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w)
-{
-    x86_reg i = 0;
-
-    __asm__ volatile (
-        "jmp          2f                \n\t"
-        "1:                             \n\t"
-        "movq   (%1, %0), %%mm0         \n\t"
-        "movq   (%2, %0), %%mm1         \n\t"
-        "paddb     %%mm0, %%mm1         \n\t"
-        "movq      %%mm1, (%2, %0)      \n\t"
-        "movq  8(%1, %0), %%mm0         \n\t"
-        "movq  8(%2, %0), %%mm1         \n\t"
-        "paddb     %%mm0, %%mm1         \n\t"
-        "movq      %%mm1, 8(%2, %0)     \n\t"
-        "add         $16, %0            \n\t"
-        "2:                             \n\t"
-        "cmp          %3, %0            \n\t"
-        "js           1b                \n\t"
-        : "+r" (i)
-        : "r" (src), "r" (dst), "r" ((x86_reg) w - 15));
-
-    for (; i < w; i++)
-        dst[i + 0] += src[i + 0];
-}
-
-#endif /* HAVE_INLINE_ASM */
-- 
1.8.0.msysgit.0



More information about the ffmpeg-devel mailing list