[FFmpeg-cvslog] x86: add support for fmaddps fma4 instruction with abstraction to avx/sse

Justin Ruggles git at videolan.org
Sat Jul 28 00:10:47 CEST 2012


ffmpeg | branch: master | Justin Ruggles <justin.ruggles at gmail.com> | Mon Jun 18 23:39:14 2012 -0400| [79687079a97a039c325ab79d7a95920d800b791f] | committer: Justin Ruggles

x86: add support for fmaddps fma4 instruction with abstraction to avx/sse

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=79687079a97a039c325ab79d7a95920d800b791f
---

 configure                |    5 +++++
 libavutil/x86/x86inc.asm |   16 +++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index fd90369..715e49b 100755
--- a/configure
+++ b/configure
@@ -242,6 +242,7 @@ Optimization options (experts only):
   --disable-sse            disable SSE optimizations
   --disable-ssse3          disable SSSE3 optimizations
   --disable-avx            disable AVX optimizations
+  --disable-fma4           disable FMA4 optimizations
   --disable-armv5te        disable armv5te optimizations
   --disable-armv6          disable armv6 optimizations
   --disable-armv6t2        disable armv6t2 optimizations
@@ -1047,6 +1048,7 @@ ARCH_EXT_LIST='
     armv6t2
     armvfp
     avx
+    fma4
     mmi
     mmx
     mmx2
@@ -1295,6 +1297,7 @@ mmx2_deps="mmx"
 sse_deps="mmx"
 ssse3_deps="sse"
 avx_deps="ssse3"
+fma4_deps="avx"
 
 aligned_stack_if_any="ppc x86"
 fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
@@ -2865,6 +2868,7 @@ EOF
         check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
             die "yasm not found, use --disable-yasm for a crippled build"
         check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
+        check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
     fi
 
     case "$cpu" in
@@ -3292,6 +3296,7 @@ if enabled x86; then
     echo "SSE enabled               ${sse-no}"
     echo "SSSE3 enabled             ${ssse3-no}"
     echo "AVX enabled               ${avx-no}"
+    echo "FMA4 enabled              ${fma4-no}"
     echo "CMOV enabled              ${cmov-no}"
     echo "CMOV is fast              ${fast_cmov-no}"
     echo "EBX available             ${ebx_available-no}"
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b76a10c..4b4a19b 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1
 %undef j
 
 %macro FMA_INSTR 3
-    %macro %1 4-7 %1, %2, %3
-        %if cpuflag(xop)
-            v%5 %1, %2, %3, %4
+    %macro %1 5-8 %1, %2, %3
+        %if cpuflag(xop) || cpuflag(fma4)
+            v%6 %1, %2, %3, %4
         %else
-            %6 %1, %2, %3
-            %7 %1, %4
+            %ifidn %1, %4
+                %7 %5, %2, %3
+                %8 %1, %4, %5
+            %else
+                %7 %1, %2, %3
+                %8 %1, %4
+            %endif
         %endif
     %endmacro
 %endmacro
 
+FMA_INSTR  fmaddps,   mulps, addps
 FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
 FMA_INSTR pmadcswd, pmaddwd, paddd



More information about the ffmpeg-cvslog mailing list