[FFmpeg-cvslog] x86: move horizontal add macros to x86util

James Almer git at videolan.org
Thu Apr 17 14:23:31 CEST 2014


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Apr 16 20:15:35 2014 -0300| [76ed71a72bffb45027923e4da5f6fc6a97bfb218] | committer: Michael Niedermayer

x86: move horizontal add macros to x86util

Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors

Signed-off-by: James Almer <jamrial at gmail.com>
Reviewed-by: "Ronald S. Bultje" <rsbultje at gmail.com>
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=76ed71a72bffb45027923e4da5f6fc6a97bfb218
---

 libavcodec/x86/h264_intrapred_10bit.asm |   16 ---------------
 libavutil/x86/x86util.asm               |   33 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 40f1c9f..9dee577 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -171,22 +171,6 @@ PRED4x4_HD
 ;-----------------------------------------------------------------------------
 ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
 ;-----------------------------------------------------------------------------
-%macro HADDD 2 ; sum junk
-%if mmsize == 16
-    movhlps %2, %1
-    paddd   %1, %2
-    pshuflw %2, %1, 0xE
-    paddd   %1, %2
-%else
-    pshufw  %2, %1, 0xE
-    paddd   %1, %2
-%endif
-%endmacro
-
-%macro HADDW 2
-    pmaddwd %1, [pw_1]
-    HADDD   %1, %2
-%endmacro
 
 INIT_MMX mmxext
 cglobal pred4x4_dc_10, 3, 3
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index df58cad..67d7905 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -273,6 +273,39 @@
 %endif
 %endmacro
 
+%macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+    vextracti128 %2, %1, 1
+%define %1 xmm%1
+    paddd   %1, %2
+%endif
+%if mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
+    vphadddq %1, %1
+%endif
+    movhlps %2, %1
+    paddd   %1, %2
+%endif
+%if notcpuflag(xop) || sizeof%1 != 16
+    PSHUFLW %2, %1, q0032
+    paddd   %1, %2
+%endif
+%undef %1
+%undef %2
+%endmacro
+
+%macro HADDW 2 ; reg, tmp
+%if cpuflag(xop) && sizeof%1 == 16
+    vphaddwq  %1, %1
+    movhlps   %2, %1
+    paddd     %1, %2
+%else
+    pmaddwd %1, [pw_1]
+    HADDD   %1, %2
+%endif
+%endmacro
+
 %macro PALIGNR 4-5
 %if cpuflag(ssse3)
 %if %0==5



More information about the ffmpeg-cvslog mailing list