[FFmpeg-cvslog] x86/aacdec: use HADDPS macro
James Almer
git at videolan.org
Wed Jun 8 19:24:08 CEST 2016
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Jun 8 14:18:18 2016 -0300| [82dbfccaf00bc6cea79b3857c39295ff69c9f4b3] | committer: James Almer
x86/aacdec: use HADDPS macro
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=82dbfccaf00bc6cea79b3857c39295ff69c9f4b3
---
libavcodec/x86/aacpsdsp.asm | 16 +++-------------
libavcodec/x86/sbrdsp.asm | 24 ++++--------------------
2 files changed, 7 insertions(+), 33 deletions(-)
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index d1187df..d7d7a9a 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -38,17 +38,7 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n
movaps m1, [srcq+mmsize]
mulps m0, m0
mulps m1, m1
-%if cpuflag(sse3)
- haddps m0, m1
-%else
- movaps m3, m0
- movaps m4, m1
- shufps m3, m3, q0301
- shufps m4, m4, q0301
- addps m0, m3
- addps m1, m4
- shufps m0, m1, q2020
-%endif
+ HADDPS m0, m1, m2
addps m0, [dstq]
movaps [dstq], m0
add dstq, mmsize
@@ -59,9 +49,9 @@ cglobal ps_add_squares, 3, 3, %1, dst, src, n
%endmacro
INIT_XMM sse
-PS_ADD_SQUARES 3
+PS_ADD_SQUARES 2
INIT_XMM sse3
-PS_ADD_SQUARES 5
+PS_ADD_SQUARES 3
;*******************************************************************
;void ff_ps_mul_pair_single_sse(float (*dst)[2], float (*src0)[2],
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index b6fa535..07a412b 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -526,31 +526,15 @@ align 16
xorps m2, [ps_mask3]
xorps m5, [ps_mask3]
xorps m6, [ps_mask3]
+ HADDPS m2, m5, m3
+ HADDPS m7, m6, m4
%if cpuflag(sse3)
movshdup m0, m1
- haddps m2, m5
- haddps m7, m6
- addss m1, m0
%else
- movaps m3, m2
- movaps m0, m5
- movaps m4, m6
- shufps m3, m3, q0301
- shufps m0, m0, q0301
- shufps m4, m4, q0301
- addps m2, m3
- addps m5, m0
- addps m6, m4
-
- movss m0, m7
- movss m3, m1
- shufps m7, m7, q0001
+ movss m0, m1
shufps m1, m1, q0001
- addss m7, m0
- addss m1, m3
- shufps m2, m5, q2020
- shufps m7, m6, q2020
%endif
+ addss m1, m0
movaps [phiq ], m2
movhps [phiq+0x18], m7
movss [phiq+0x28], m7
More information about the ffmpeg-cvslog
mailing list