[FFmpeg-cvslog] x86: move some inline asm macros to the only places they are used

Mans Rullgard git at videolan.org
Sun Jun 24 02:20:46 CEST 2012


ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Fri Jun 22 22:10:31 2012 +0100| [685f5438bbbf99126856c165f64638e66a4ed355] | committer: Mans Rullgard

x86: move some inline asm macros to the only places they are used

Signed-off-by: Mans Rullgard <mans at mansr.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=685f5438bbbf99126856c165f64638e66a4ed355
---

 libavcodec/x86/cavsdsp_mmx.c |    6 ++++++
 libavcodec/x86/dsputil_mmx.c |   28 ++++++++++++++++++++++++++++
 libavcodec/x86/dsputil_mmx.h |   34 ----------------------------------
 3 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c
index 3bc62ea..f56f859 100644
--- a/libavcodec/x86/cavsdsp_mmx.c
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -29,6 +29,12 @@
 #include "libavcodec/cavsdsp.h"
 #include "dsputil_mmx.h"
 
+/* in/out: mma=mma+mmb, mmb=mmb-mma */
+#define SUMSUB_BA( a, b ) \
+    "paddw "#b", "#a" \n\t"\
+    "paddw "#b", "#b" \n\t"\
+    "psubw "#a", "#b" \n\t"
+
 /*****************************************************************************
  *
  * inverse transform
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 434d185..71a65e7 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -630,6 +630,34 @@ static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
 }
 #endif
 
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){
+    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
+        "movd  (%1), %%mm0              \n\t"
+        "add   %3, %1                   \n\t"
+        "movd  (%1), %%mm1              \n\t"
+        "movd  (%1,%3,1), %%mm2         \n\t"
+        "movd  (%1,%3,2), %%mm3         \n\t"
+        "punpcklbw %%mm1, %%mm0         \n\t"
+        "punpcklbw %%mm3, %%mm2         \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "punpcklwd %%mm2, %%mm0         \n\t"
+        "punpckhwd %%mm2, %%mm1         \n\t"
+        "movd  %%mm0, (%0)              \n\t"
+        "add   %2, %0                   \n\t"
+        "punpckhdq %%mm0, %%mm0         \n\t"
+        "movd  %%mm0, (%0)              \n\t"
+        "movd  %%mm1, (%0,%2,1)         \n\t"
+        "punpckhdq %%mm1, %%mm1         \n\t"
+        "movd  %%mm1, (%0,%2,2)         \n\t"
+
+        :  "+&r" (dst),
+           "+&r" (src)
+        :  "r" (dst_stride),
+           "r" (src_stride)
+        :  "memory"
+    );
+}
+
 #define H263_LOOP_FILTER                        \
     "pxor      %%mm7, %%mm7             \n\t"   \
     "movq         %0, %%mm0             \n\t"   \
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 37f4581..fa42be6 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -78,12 +78,6 @@ extern const double ff_pd_2[2];
     "movq "#c", 2*"#stride"+"#out"\n\t"\
     "movq "#d", 3*"#stride"+"#out"\n\t"
 
-/* in/out: mma=mma+mmb, mmb=mmb-mma */
-#define SUMSUB_BA( a, b ) \
-    "paddw "#b", "#a" \n\t"\
-    "paddw "#b", "#b" \n\t"\
-    "psubw "#a", "#b" \n\t"
-
 #define SBUTTERFLY(a,b,t,n,m)\
     "mov" #m " " #a ", " #t "         \n\t" /* abcd */\
     "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
@@ -95,34 +89,6 @@ extern const double ff_pd_2[2];
     SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
     SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
 
-static inline void transpose4x4(uint8_t *dst, uint8_t *src, x86_reg dst_stride, x86_reg src_stride){
-    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
-        "movd  (%1), %%mm0              \n\t"
-        "add   %3, %1                   \n\t"
-        "movd  (%1), %%mm1              \n\t"
-        "movd  (%1,%3,1), %%mm2         \n\t"
-        "movd  (%1,%3,2), %%mm3         \n\t"
-        "punpcklbw %%mm1, %%mm0         \n\t"
-        "punpcklbw %%mm3, %%mm2         \n\t"
-        "movq %%mm0, %%mm1              \n\t"
-        "punpcklwd %%mm2, %%mm0         \n\t"
-        "punpckhwd %%mm2, %%mm1         \n\t"
-        "movd  %%mm0, (%0)              \n\t"
-        "add   %2, %0                   \n\t"
-        "punpckhdq %%mm0, %%mm0         \n\t"
-        "movd  %%mm0, (%0)              \n\t"
-        "movd  %%mm1, (%0,%2,1)         \n\t"
-        "punpckhdq %%mm1, %%mm1         \n\t"
-        "movd  %%mm1, (%0,%2,2)         \n\t"
-
-        :  "+&r" (dst),
-           "+&r" (src)
-        :  "r" (dst_stride),
-           "r" (src_stride)
-        :  "memory"
-    );
-}
-
 // e,f,g,h can be memory
 // out: a,d,t,c
 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\



More information about the ffmpeg-cvslog mailing list