[FFmpeg-cvslog] x86/aacdec: use HADDPS macro

James Almer git at videolan.org
Wed Jun 8 19:24:08 CEST 2016


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Jun  8 14:18:18 2016 -0300| [82dbfccaf00bc6cea79b3857c39295ff69c9f4b3] | committer: James Almer

x86/aacdec: use HADDPS macro

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=82dbfccaf00bc6cea79b3857c39295ff69c9f4b3
---

 libavcodec/x86/aacpsdsp.asm |   16 +++-------------
 libavcodec/x86/sbrdsp.asm   |   24 ++++--------------------
 2 files changed, 7 insertions(+), 33 deletions(-)

diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index d1187df..d7d7a9a 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -38,17 +38,7 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n
     movaps m1, [srcq+mmsize]
     mulps  m0, m0
     mulps  m1, m1
-%if cpuflag(sse3)
-    haddps m0, m1
-%else
-    movaps m3, m0
-    movaps m4, m1
-    shufps m3, m3, q0301
-    shufps m4, m4, q0301
-    addps  m0, m3
-    addps  m1, m4
-    shufps m0, m1, q2020
-%endif
+    HADDPS m0, m1, m2
     addps  m0, [dstq]
     movaps [dstq], m0
     add  dstq, mmsize
@@ -59,9 +49,9 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n
 %endmacro
 
 INIT_XMM sse
-PS_ADD_SQUARES 3
+PS_ADD_SQUARES 2
 INIT_XMM sse3
-PS_ADD_SQUARES 5
+PS_ADD_SQUARES 3
 
 ;*******************************************************************
 ;void ff_ps_mul_pair_single_sse(float (*dst)[2], float (*src0)[2],
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index b6fa535..07a412b 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -526,31 +526,15 @@ align 16
     xorps   m2, [ps_mask3]
     xorps   m5, [ps_mask3]
     xorps   m6, [ps_mask3]
+    HADDPS  m2, m5, m3
+    HADDPS  m7, m6, m4
 %if cpuflag(sse3)
     movshdup m0, m1
-    haddps  m2, m5
-    haddps  m7, m6
-    addss   m1, m0
 %else
-    movaps  m3, m2
-    movaps  m0, m5
-    movaps  m4, m6
-    shufps  m3, m3, q0301
-    shufps  m0, m0, q0301
-    shufps  m4, m4, q0301
-    addps   m2, m3
-    addps   m5, m0
-    addps   m6, m4
-
-    movss   m0, m7
-    movss   m3, m1
-    shufps  m7, m7, q0001
+    movss   m0, m1
     shufps  m1, m1, q0001
-    addss   m7, m0
-    addss   m1, m3
-    shufps  m2, m5, q2020
-    shufps  m7, m6, q2020
 %endif
+    addss   m1, m0
     movaps  [phiq     ], m2
     movhps  [phiq+0x18], m7
     movss   [phiq+0x28], m7



More information about the ffmpeg-cvslog mailing list