[FFmpeg-devel] [PATCH 2/2] swresample/resample: sse float linear interpolation

James Almer jamrial at gmail.com
Sun Mar 23 23:05:17 CET 2014


About two times faster

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libswresample/resample_template.c |  1 +
 libswresample/x86/resample_mmx.h  | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c
index c661276..396b4e3 100644
--- a/libswresample/resample_template.c
+++ b/libswresample/resample_template.c
@@ -48,6 +48,7 @@
 #        define RENAME(N) N ## _float
 #    elif defined(TEMPLATE_RESAMPLE_FLT_SSE)
 #        define COMMON_CORE COMMON_CORE_FLT_SSE
+#        define LINEAR_CORE LINEAR_CORE_FLT_SSE
 #        define RENAME(N) N ## _float_sse
 #    endif
 
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
index 28a317c..a0df6e1 100644
--- a/libswresample/x86/resample_mmx.h
+++ b/libswresample/x86/resample_mmx.h
@@ -156,3 +156,38 @@ __asm__ volatile(\
       "r" (((uint8_t*)filter)-len),\
       "r" (dst+dst_index)\
 );
+
+#define LINEAR_CORE_FLT_SSE \
+    x86_reg len= -4*c->filter_length;\
+__asm__ volatile(\
+    "xorps      %%xmm0, %%xmm0    \n\t"\
+    "xorps      %%xmm2, %%xmm2    \n\t"\
+    "1:                           \n\t"\
+    "movups   (%3, %0), %%xmm1    \n\t"\
+    "movaps     %%xmm1, %%xmm3    \n\t"\
+    "mulps    (%4, %0), %%xmm1    \n\t"\
+    "mulps    (%5, %0), %%xmm3    \n\t"\
+    "addps      %%xmm1, %%xmm0    \n\t"\
+    "addps      %%xmm3, %%xmm2    \n\t"\
+    "add           $16, %0        \n\t"\
+    " js 1b                       \n\t"\
+    "movhlps    %%xmm0, %%xmm1    \n\t"\
+    "movhlps    %%xmm2, %%xmm3    \n\t"\
+    "addps      %%xmm1, %%xmm0    \n\t"\
+    "addps      %%xmm3, %%xmm2    \n\t"\
+    "movss      %%xmm0, %%xmm1    \n\t"\
+    "movss      %%xmm2, %%xmm3    \n\t"\
+    "shufps $1, %%xmm0, %%xmm0    \n\t"\
+    "shufps $1, %%xmm2, %%xmm2    \n\t"\
+    "addps      %%xmm1, %%xmm0    \n\t"\
+    "addps      %%xmm3, %%xmm2    \n\t"\
+    "movss      %%xmm0, %1        \n\t"\
+    "movss      %%xmm2, %2        \n\t"\
+    : "+r" (len),\
+      "=m" (val),\
+      "=m" (v2)\
+    : "r" (((uint8_t*)(src+sample_index))-len),\
+      "r" (((uint8_t*)filter)-len),\
+      "r" (((uint8_t*)(filter+c->filter_alloc))-len)\
+    XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")\
+);
-- 
1.8.3.2



More information about the ffmpeg-devel mailing list