[FFmpeg-cvslog] x86: hevc_mc: load less data in epel filters
Christophe Gisquet
git at videolan.org
Sun Jul 27 19:15:48 CEST 2014
ffmpeg | branch: master | Christophe Gisquet <christophe.gisquet at gmail.com> | Fri Jul 25 15:08:49 2014 +0200| [81943a10b5007825892bac4582659fa7f74c4025] | committer: Michael Niedermayer
x86: hevc_mc: load less data in epel filters
Before:
5679 decicycles in epel_bi, 2059976 runs, 37176 skips
3468 decicycles in epel_uni, 1040886 runs, 7690 skips
After:
5323 decicycles in epel_bi, 2059493 runs, 37659 skips
3262 decicycles in epel_uni, 1040871 runs, 7705 skips
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=81943a10b5007825892bac4582659fa7f74c4025
---
libavcodec/x86/hevc_mc.asm | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 545f556..9cfebb8 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -176,15 +176,23 @@ QPEL_TABLE 12, 4, w, sse4
%else
%define rfilterq %2
%endif
- movdqu m0, [rfilterq ] ;load 128bit of x
+%if (%1 == 8 && %4 <= 4)
+%define %%load movd
+%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4)
+%define %%load movq
+%else
+%define %%load movdqu
+%endif
+
+ %%load m0, [rfilterq ]
%ifnum %3
- movdqu m1, [rfilterq+ %3] ;load 128bit of x+stride
- movdqu m2, [rfilterq+2*%3] ;load 128bit of x+2*stride
- movdqu m3, [rfilterq+3*%3] ;load 128bit of x+3*stride
+ %%load m1, [rfilterq+ %3]
+ %%load m2, [rfilterq+2*%3]
+ %%load m3, [rfilterq+3*%3]
%else
- movdqu m1, [rfilterq+ %3q] ;load 128bit of x+stride
- movdqu m2, [rfilterq+2*%3q] ;load 128bit of x+2*stride
- movdqu m3, [rfilterq+r3srcq] ;load 128bit of x+2*stride
+ %%load m1, [rfilterq+ %3q]
+ %%load m2, [rfilterq+2*%3q]
+ %%load m3, [rfilterq+r3srcq]
%endif
%if %1 == 8
More information about the ffmpeg-cvslog
mailing list