[FFmpeg-devel] [PATCH 1/6] x86: huffyuvdsp: port mmx add_bytes to yasm
Christophe Gisquet
christophe.gisquet at gmail.com
Thu May 29 11:10:35 CEST 2014
68c to 56c.
---
libavcodec/x86/huffyuvdsp.asm | 32 ++++++++++++++++++++++++++++++++
libavcodec/x86/huffyuvdsp_init.c | 2 +-
libavcodec/x86/huffyuvdsp_mmx.c | 32 +-------------------------------
3 files changed, 34 insertions(+), 32 deletions(-)
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index f183ebe..7acab87 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -163,3 +163,35 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
ADD_HFYU_LEFT_LOOP 0, 1
.src_unaligned:
ADD_HFYU_LEFT_LOOP 0, 0
+
+INIT_MMX mmx
+cglobal add_bytes, 3,4,4, dst, src, w, size
+ mov sizeq, wq
+ and sizeq, -2*mmsize
+ jz .2
+ add dstq, sizeq
+ add srcq, sizeq
+ neg sizeq
+.1:
+ movu m0, [dstq + sizeq]
+ movu m1, [srcq + sizeq]
+ movu m2, [dstq + sizeq + mmsize]
+ movu m3, [srcq + sizeq + mmsize]
+ paddb m1, m0
+ paddb m3, m2
+ movu [dstq + sizeq], m1
+ movu [dstq + sizeq + mmsize], m3
+ add sizeq, 2*mmsize
+ jl .1
+.2:
+ and wq, 2*mmsize-1
+ jz .end
+ add dstq, wq
+ add srcq, wq
+ neg wq
+ mov sizeb, [srcq + wq]
+ add [dstq + wq], sizeb
+ inc wq
+ jmp .2
+.end:
+ REP_RET
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 1efb34d..184c2ce 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -46,7 +46,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov;
#endif
- if (INLINE_MMX(cpu_flags))
+ if (EXTERNAL_MMX(cpu_flags))
c->add_bytes = ff_add_bytes_mmx;
if (EXTERNAL_MMXEXT(cpu_flags)) {
diff --git a/libavcodec/x86/huffyuvdsp_mmx.c b/libavcodec/x86/huffyuvdsp_mmx.c
index 5942210..ee6ec91 100644
--- a/libavcodec/x86/huffyuvdsp_mmx.c
+++ b/libavcodec/x86/huffyuvdsp_mmx.c
@@ -22,9 +22,7 @@
#include "libavutil/x86/asm.h"
#include "huffyuvdsp.h"
-#if HAVE_INLINE_ASM
-
-#if HAVE_7REGS
+#if HAVE_INLINE_ASM && HAVE_7REGS
void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top)
@@ -61,31 +59,3 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
*left_top = tl;
}
#endif
-
-void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w)
-{
- x86_reg i = 0;
-
- __asm__ volatile (
- "jmp 2f \n\t"
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq (%2, %0), %%mm1 \n\t"
- "paddb %%mm0, %%mm1 \n\t"
- "movq %%mm1, (%2, %0) \n\t"
- "movq 8(%1, %0), %%mm0 \n\t"
- "movq 8(%2, %0), %%mm1 \n\t"
- "paddb %%mm0, %%mm1 \n\t"
- "movq %%mm1, 8(%2, %0) \n\t"
- "add $16, %0 \n\t"
- "2: \n\t"
- "cmp %3, %0 \n\t"
- "js 1b \n\t"
- : "+r" (i)
- : "r" (src), "r" (dst), "r" ((x86_reg) w - 15));
-
- for (; i < w; i++)
- dst[i + 0] += src[i + 0];
-}
-
-#endif /* HAVE_INLINE_ASM */
--
1.8.0.msysgit.0
More information about the ffmpeg-devel
mailing list