[FFmpeg-cvslog] r18319 - trunk/libavcodec/x86/dsputil_mmx.c

alexc subversion
Thu Apr 2 23:02:42 CEST 2009


Author: alexc
Date: Thu Apr  2 23:02:42 2009
New Revision: 18319

Log:
Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.

Modified:
   trunk/libavcodec/x86/dsputil_mmx.c

Modified: trunk/libavcodec/x86/dsputil_mmx.c
==============================================================================
--- trunk/libavcodec/x86/dsputil_mmx.c	Thu Apr  2 22:24:15 2009	(r18318)
+++ trunk/libavcodec/x86/dsputil_mmx.c	Thu Apr  2 23:02:42 2009	(r18319)
@@ -28,7 +28,6 @@
 #include "libavcodec/mpegvideo.h"
 #include "libavcodec/simple_idct.h"
 #include "dsputil_mmx.h"
-#include "mmx.h"
 #include "vp3dsp_mmx.h"
 #include "vp3dsp_sse2.h"
 #include "vp6dsp_mmx.h"
@@ -273,22 +272,41 @@ void put_pixels_clamped_mmx(const DCTELE
             :"memory");
 }
 
-static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
+DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) =
   { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
 
+#define put_signed_pixels_clamped_mmx_half(off) \
+            "movq    "#off"(%2), %%mm1          \n\t"\
+            "movq 16+"#off"(%2), %%mm2          \n\t"\
+            "movq 32+"#off"(%2), %%mm3          \n\t"\
+            "movq 48+"#off"(%2), %%mm4          \n\t"\
+            "packsswb  8+"#off"(%2), %%mm1      \n\t"\
+            "packsswb 24+"#off"(%2), %%mm2      \n\t"\
+            "packsswb 40+"#off"(%2), %%mm3      \n\t"\
+            "packsswb 56+"#off"(%2), %%mm4      \n\t"\
+            "paddb %%mm0, %%mm1                 \n\t"\
+            "paddb %%mm0, %%mm2                 \n\t"\
+            "paddb %%mm0, %%mm3                 \n\t"\
+            "paddb %%mm0, %%mm4                 \n\t"\
+            "movq %%mm1, (%0)                   \n\t"\
+            "movq %%mm2, (%0, %3)               \n\t"\
+            "movq %%mm3, (%0, %3, 2)            \n\t"\
+            "movq %%mm4, (%0, %1)               \n\t"
+
 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
 {
-    int i;
+    x86_reg line_skip = line_size;
+    x86_reg av_uninit(line_skip3);
 
-    movq_m2r(*vector128, mm1);
-    for (i = 0; i < 8; i++) {
-        movq_m2r(*(block), mm0);
-        packsswb_m2r(*(block + 4), mm0);
-        block += 8;
-        paddb_r2r(mm1, mm0);
-        movq_r2m(mm0, *pixels);
-        pixels += line_size;
-    }
+    __asm__ volatile (
+            "movq "MANGLE(ff_vector128)", %%mm0 \n\t"
+            "lea (%3, %3, 2), %1                \n\t"
+            put_signed_pixels_clamped_mmx_half(0)
+            "lea (%0, %3, 4), %0                \n\t"
+            put_signed_pixels_clamped_mmx_half(64)
+            :"+r" (pixels), "+r" (line_skip3)
+            :"r" (block), "r"(line_skip)
+            :"memory");
 }
 
 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)



More information about the ffmpeg-cvslog mailing list