[FFmpeg-cvslog] x86/aacpsdsp: optimize add_squares loop
James Almer
git at videolan.org
Tue Jun 14 17:43:40 CEST 2016
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Jun 14 12:41:23 2016 -0300| [ede4ec1f8f5fd94dccd880199419a1f1b8137ab6] | committer: James Almer
x86/aacpsdsp: optimize add_squares loop
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ede4ec1f8f5fd94dccd880199419a1f1b8137ab6
---
libavcodec/x86/aacpsdsp.asm | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index d7d7a9a..e92cbbc 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -33,18 +33,22 @@ SECTION .text
;*************************************************************************
%macro PS_ADD_SQUARES 1
cglobal ps_add_squares, 3, 3, %1, dst, src, n
+ shl nd, 3
+ add srcq, nq
+ neg nq
+
+align 16
.loop:
- movaps m0, [srcq]
- movaps m1, [srcq+mmsize]
+ movaps m0, [srcq+nq]
+ movaps m1, [srcq+nq+mmsize]
mulps m0, m0
mulps m1, m1
HADDPS m0, m1, m2
addps m0, [dstq]
movaps [dstq], m0
add dstq, mmsize
- add srcq, mmsize*2
- sub nd, mmsize/4
- jg .loop
+ add nq, mmsize*2
+ jl .loop
REP_RET
%endmacro
More information about the ffmpeg-cvslog
mailing list