[FFmpeg-cvslog] avcodec/x86/videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.

Ronald S. Bultje git at videolan.org
Sun Oct 27 15:08:52 CET 2013


ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Sat Oct 26 08:24:09 2013 -0400| [960490c0b20dd5f9a6c329bd14023b9598082fda] | committer: Michael Niedermayer

avcodec/x86/videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.

Don't use word-size multiplications if size == 2, and if we're using
SIMD instructions (size >= 8), complete leftover 4byte sets using movd,
not mov. Both of these changes lead to minor speedups.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=960490c0b20dd5f9a6c329bd14023b9598082fda
---

 libavcodec/x86/videodsp.asm |   34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 85a41b5..1ac0257 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
 ; obviously not the same on both sides.
 
 %macro READ_V_PIXEL 2
-%if %1 == 2
-    movzx          valw, byte %2
-    imul           valw, 0x0101
-%else
     movzx          vald, byte %2
     imul           vald, 0x01010101
 %if %1 >= 8
@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
     pshufd           m0, m0, q0000
 %else
     punpckldq        m0, m0
-%endif
-%endif ; %1 >= 8
-%endif
+%endif ; mmsize == 16
+%endif ; %1 > 16
 %endmacro ; READ_V_PIXEL
 
 %macro WRITE_V_PIXEL 2
 %assign %%off 0
+
+%if %1 >= 8
+
 %rep %1/mmsize
     movu     [%2+%%off], m0
 %assign %%off %%off+mmsize
@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22
 %assign %%off %%off+8
 %endif
 %endif ; %1-%%off >= 8
-%endif
+%endif ; mmsize == 16
 
 %if %1-%%off >= 4
 %if %1 > 8 && %1-%%off > 4
     movq      [%2+%1-8], m0
 %assign %%off %1
-%elif %1 >= 8 && %1-%%off >= 4
-    movd     [%2+%%off], m0
-%assign %%off %%off+4
 %else
-    mov      [%2+%%off], vald
+    movd     [%2+%%off], m0
 %assign %%off %%off+4
 %endif
 %endif ; %1-%%off >= 4
 
-%if %1-%%off >= 2
-%if %1 >= 8
-    movd      [%2+%1-4], m0
-%else
+%else ; %1 < 8
+
+%rep %1/4
+    mov      [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 >=/< 8
+
+%if %1-%%off == 2
     mov      [%2+%%off], valw
-%endif
 %endif ; (%1-%%off)/2
 %endmacro ; WRITE_V_PIXEL
 



More information about the ffmpeg-cvslog mailing list