[FFmpeg-devel] [PATCH] x86: Move XOP emulation to x86util

James Almer jamrial at gmail.com
Mon Feb 24 06:34:38 CET 2014


We need the emulation to support the cases where the first
argument is the same as the fourth. To achieve this a fifth
argument working as a temporary may be needed.
Emulation that doesn't obey the original instruction semantics
can't be in x86inc.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavcodec/x86/flacdsp.asm |  8 ++++----
 libavutil/x86/x86inc.asm   | 19 -------------------
 libavutil/x86/x86util.asm  | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 1a83cd8..37ee87b 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -44,21 +44,21 @@ ALIGN 16
     test   jq, jq
     jz .end_order
 .loop_order:
-    pmacsdql m2, m0, m1, m2
+    PMACSDQL m2, m0, m1, m2, m0
     movd   m0, [decodedq+jq*4]
-    pmacsdql m3, m1, m0, m3
+    PMACSDQL m3, m1, m0, m3, m1
     movd   m1, [coeffsq+jq*4]
     inc    jq
     jl .loop_order
 .end_order:
-    pmacsdql m2, m0, m1, m2
+    PMACSDQL m2, m0, m1, m2, m0
     psrlq  m2, m4
     movd   m0, [decodedq]
     paddd  m0, m2
     movd   [decodedq], m0
     sub  lend, 2
     jl .ret
-    pmacsdql m3, m1, m0, m3
+    PMACSDQL m3, m1, m0, m3, m1
     psrlq  m3, m4
     movd   m1, [decodedq+4]
     paddd  m1, m3
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 88cae0c..a7f9f54 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1
 %undef i
 %undef j
 
-%macro FMA_INSTR 3
-    %macro %1 4-7 %1, %2, %3
-        %if cpuflag(xop)
-            v%5 %1, %2, %3, %4
-        %elifidn %1, %4
-            %6 %2, %3
-            %7 %1, %2
-        %else
-            %6 %1, %2, %3
-            %7 %1, %4
-        %endif
-    %endmacro
-%endmacro
-
-FMA_INSTR  pmacsdd,  pmulld, paddd
-FMA_INSTR  pmacsww,  pmullw, paddw
-FMA_INSTR pmacsdql,  pmuldq, paddq
-FMA_INSTR pmadcswd, pmaddwd, paddd
-
 ; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
 ; This lets us use tzcnt without bumping the yasm version requirement yet.
 %define tzcnt rep bsf
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 59e5df2..df58cad 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -666,6 +666,25 @@
 %endif
 %endmacro
 
+%macro PMA_EMU 4
+    %macro %1 5-8 %2, %3, %4
+        %if cpuflag(xop)
+            v%6 %1, %2, %3, %4
+        %elifidn %1, %4
+            %7 %5, %2, %3
+            %8 %1, %4, %5
+        %else
+            %7 %1, %2, %3
+            %8 %1, %4
+        %endif
+    %endmacro
+%endmacro
+
+PMA_EMU  PMACSWW,  pmacsww,  pmullw, paddw
+PMA_EMU  PMACSDD,  pmacsdd,  pmulld, paddd ; sse4 emulation
+PMA_EMU PMACSDQL, pmacsdql,  pmuldq, paddq ; sse4 emulation
+PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
+
 ; Wrapper for non-FMA version of fmaddps
 %macro FMULADD_PS 5
     %if cpuflag(fma3) || cpuflag(fma4)
-- 
1.8.3.2



More information about the ffmpeg-devel mailing list