[Ffmpeg-devel] Still struggling with -fPIC and MMX asm-code

Fri Jul 15 22:10:27 CEST 2005

Rich Felker schrieb:

>Giving up PIC which is useless on x86.
>  
>

But the Debian-Policy requires -fPIC :-(

Michael said, the original patch from the Debian maintainer was wrong -
and he was right. But I shouldn't have tried to reinvent the wheel by
trying to split the assembler blocks. The solution was much easier,
because the patch from the Debian package was on the right track and
just contained a small bug. The patch replaces one of the register
operands (which is not available anymore because of -fPIC) by a memory
operand and modifies the code to use a memory address instead of a
register. The bug was simply, that eax was overwritten even though it's
value still needed to be used for qmat[i] addressing. At least my
modification works now (see below) and I have learned a little bit about
assembler code in gcc and MMX instructions :-)

Thanks everyone for all your help!!!

Tobias

--- ffmpeg-0.cvs20050626.orig/libavcodec/i386/mmx.h
+++ ffmpeg-0.cvs20050626/libavcodec/i386/mmx.h
@@ -9,6 +9,9 @@
 #  define REG_a "rax"
 #else
 #  define REG_a "eax"
+#  if defined(PIC)
+#     define REG_b "ebx"
+#  endif
 #endif
 
 /*
--- ffmpeg-0.cvs20050626.orig/libavcodec/i386/mpegvideo_mmx_template.c
+++ ffmpeg-0.cvs20050626/libavcodec/i386/mpegvideo_mmx_template.c
@@ -95,7 +95,14 @@
             SPREADW(%%mm3)
             "pxor %%mm7, %%mm7            \n\t" // 0
             "pxor %%mm4, %%mm4            \n\t" // 0
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_a"            \n\t"
+            "movl %2, %%"REG_a"            \n\t"
+            "movq (%%"REG_a"), %%mm5        \n\t" // qmat[0]
+            "pop %%"REG_a"            \n\t"
+#else
             "movq (%2), %%mm5            \n\t" // qmat[0]
+#endif
             "pxor %%mm6, %%mm6            \n\t"
             "psubw (%3), %%mm6            \n\t" // -bias[0]
             "mov $-128, %%"REG_a"        \n\t"
@@ -128,7 +135,11 @@
             "movd %%mm3, %%"REG_a"        \n\t"
             "movzb %%al, %%"REG_a"        \n\t" // last_non_zero_p1
         : "+a" (last_non_zero_p1)
+#if defined(PIC) && !defined(ARCH_X86_64)
+            : "r" (block+64), "m" (qmat), "r" (bias),
+#else
             : "r" (block+64), "r" (qmat), "r" (bias),
+#endif
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...
@@ -157,7 +168,14 @@
             "psubw %%mm1, %%mm0            \n\t" // ABS(block[i])
             "movq (%3, %%"REG_a"), %%mm6    \n\t" // bias[0]
             "paddusw %%mm6, %%mm0        \n\t" // ABS(block[i]) + bias[0]
+#if defined(PIC) && !defined(ARCH_X86_64)
+            "push %%"REG_b"            \n\t"
+            "movl %2, %%"REG_b"            \n\t"
+            "movq (%%"REG_b", %%"REG_a"), %%mm5    \n\t" // qmat[i]
+            "pop %%"REG_b"            \n\t"
+#else
             "movq (%2, %%"REG_a"), %%mm5        \n\t" // qmat[i]
+#endif
             "pmulhw %%mm5, %%mm0        \n\t" // (ABS(block[i])*qmat[0]
+ bias[0]*qmat[0])>>16
             "por %%mm0, %%mm4            \n\t"
             "pxor %%mm1, %%mm0            \n\t"
@@ -179,7 +197,11 @@
             "movd %%mm3, %%"REG_a"        \n\t"
             "movzb %%al, %%"REG_a"        \n\t" // last_non_zero_p1
         : "+a" (last_non_zero_p1)
+#if defined(PIC) && !defined(ARCH_X86_64)
+            : "r" (block+64), "m" (qmat+64), "r" (bias+64),
+#else
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
+#endif
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...