[FFmpeg-cvslog] x86: hpeldsp: better factorization

Christophe Gisquet git at videolan.org
Thu May 29 21:53:36 CEST 2014


ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Mon May 26 21:59:14 2014 +0200| [226700398105075d27d07b652a0b67705aa06a1e] | committer: Michael Niedermayer

x86: hpeldsp: better factorization

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=226700398105075d27d07b652a0b67705aa06a1e
---

 libavcodec/x86/hpeldsp.asm |   46 +++++++++-----------------------------------
 libavutil/x86/x86util.asm  |   10 +++++++++-
 2 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 76e4632..a702b8b 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -372,16 +372,6 @@ AVG_PIXELS8
 
 
 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro PAVGB_MMX 4
-    movu   %3, %1
-    por    %3, %2
-    pxor   %2, %1
-    pand   %2, %4
-    psrlq  %2, 1
-    psubb  %3, %2
-    SWAP   %2, %3
-%endmacro
-
 %macro AVG_PIXELS8_X2 0
 %if cpuflag(sse2)
 cglobal avg_pixels16_x2, 4,5,4
@@ -396,53 +386,35 @@ cglobal avg_pixels8_x2, 4,5
 .loop:
     movu         m0, [r1]
     movu         m2, [r1+r2]
-%if notcpuflag(mmxext)
-    PAVGB_MMX    [r1+1], m0, m3, m5
-    PAVGB_MMX    [r1+r2+1], m2, m4, m5
-    PAVGB_MMX    [r0], m0, m3, m5
-    PAVGB_MMX    [r0+r2], m2, m4, m5
-%else
 %if cpuflag(sse2)
     movu         m1, [r1+1]
     movu         m3, [r1+r2+1]
     pavgb        m0, m1
     pavgb        m2, m3
 %else
-    PAVGB        m0, [r1+1]
-    PAVGB        m2, [r1+r2+1]
-%endif
-    PAVGB        m0, [r0]
-    PAVGB        m2, [r0+r2]
+    PAVGB        m0, [r1+1], m3, m5
+    PAVGB        m2, [r1+r2+1], m4, m5
 %endif
+    PAVGB        m0, [r0], m3, m5
+    PAVGB        m2, [r0+r2], m4, m5
     add          r1, r4
     mova       [r0], m0
     mova    [r0+r2], m2
     movu         m0, [r1]
     movu         m2, [r1+r2]
-%if notcpuflag(mmxext)
-    PAVGB_MMX    [r1+1], m0, m3, m5
-    PAVGB_MMX    [r1+r2+1], m2, m4, m5
-%elif cpuflag(sse2)
+%if cpuflag(sse2)
     movu         m1, [r1+1]
     movu         m3, [r1+r2+1]
     pavgb        m0, m1
     pavgb        m2, m3
 %else
-    PAVGB        m0, [r1+1]
-    PAVGB        m2, [r1+r2+1]
+    PAVGB        m0, [r1+1], m3, m5
+    PAVGB        m2, [r1+r2+1], m4, m5
 %endif
     add          r0, r4
     add          r1, r4
-%if notcpuflag(mmxext)
-    PAVGB_MMX    [r0], m0, m3, m5
-    PAVGB_MMX    [r0+r2], m2, m4, m5
-%elif cpuflag(sse2)
-    pavgb        m0, [r0]
-    pavgb        m2, [r0+r2]
-%else
-    PAVGB        m0, [r0]
-    PAVGB        m2, [r0+r2]
-%endif
+    PAVGB        m0, [r0], m3, m5
+    PAVGB        m2, [r0+r2], m4, m5
     mova       [r0], m0
     mova    [r0+r2], m2
     add          r0, r4
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 807e87e..1064e9a 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -340,11 +340,19 @@
 %endif
 %endmacro
 
-%macro PAVGB 2
+%macro PAVGB 2-4
 %if cpuflag(mmxext)
     pavgb   %1, %2
 %elif cpuflag(3dnow)
     pavgusb %1, %2
+%elif cpuflag(mmx)
+    movu   %3, %2
+    por    %3, %1
+    pxor   %1, %2
+    pand   %1, %4
+    psrlq  %1, 1
+    psubb  %3, %1
+    SWAP   %1, %3
 %endif
 %endmacro
 



More information about the ffmpeg-cvslog mailing list