[Ffmpeg-devel] [PATCH] H264 cabac vlc reading code

Michael Niedermayer michaelni
Sat Oct 14 13:17:58 CEST 2006


Hi

On Fri, Oct 13, 2006 at 03:43:46PM +0200, Michael Niedermayer wrote:
> Hi
> 
> the attached patch contains some generic "non binary"/"vlc" cabac 
> reading code, sadly its slower, thats why i post it here instead of
> commiting it ;)
> 
> maybe its usefull for someone or someone has an idea how to make it
> faster

todays useless attached cabac patch changes the hardcoded registers into
more flexible constraints its supposed to be better as gcc should be able
to reuse registers and avoid a few loads/stores but it isnt faster, instead
its significantly slower, ive not looked at the asm code gcc generates but
ive tried a few different gcc versions ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is
-------------- next part --------------
Index: libavcodec/cabac.h
===================================================================
--- libavcodec/cabac.h	(revision 6683)
+++ libavcodec/cabac.h	(working copy)
@@ -451,73 +451,66 @@
     );
     bit&=1;
 #else /* BRANCHLESS_CABAC_DECODER */
+    int dummy;
     asm volatile(
-        "movzbl (%1), %%eax                     \n\t"
-        "movl "RANGE    "(%2), %%ebx            \n\t"
-        "movl "RANGE    "(%2), %%edx            \n\t"
-        "shrl $23, %%ebx                        \n\t"
-        "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t"
-        "shll $17, %%esi                        \n\t"
-        "movl "LOW      "(%2), %%ebx            \n\t"
+        "shrl $23, %4                           \n\t"
+        "movzbl "MANGLE(ff_h264_lps_range)"(%4, %0, 4), %4\n\t"
+        "shll $17, %4                           \n\t"
 //eax:state ebx:low, edx:range, esi:RangeLPS
-        "subl %%esi, %%edx                      \n\t"
+        "subl %4, %2                            \n\t"
 #ifdef CMOV_IS_FAST //FIXME actually define this somewhere
-        "cmpl %%ebx, %%edx                      \n\t"
-        "cmova %%edx, %%esi                     \n\t"
+        "cmpl %1, %2                            \n\t"
+        "cmova %2, %4                           \n\t"
         "sbbl %%ecx, %%ecx                      \n\t"
-        "andl %%ecx, %%edx                      \n\t"
-        "subl %%edx, %%ebx                      \n\t"
-        "xorl %%ecx, %%eax                      \n\t"
+        "andl %%ecx, %2                         \n\t"
+        "subl %2, %1                            \n\t"
+        "xorl %%ecx, %0                         \n\t"
 #else /* CMOV_IS_FAST */
-        "movl %%edx, %%ecx                      \n\t"
-        "subl %%ebx, %%edx                      \n\t"
-        "sarl $31, %%edx                        \n\t" //lps_mask
-        "subl %%ecx, %%esi                      \n\t" //RangeLPS - range
-        "andl %%edx, %%esi                      \n\t" //(RangeLPS - range)&lps_mask
-        "addl %%ecx, %%esi                      \n\t" //new range
-        "andl %%edx, %%ecx                      \n\t"
-        "subl %%ecx, %%ebx                      \n\t"
-        "xorl %%edx, %%eax                      \n\t"
+        "movl %2, %%ecx                         \n\t"
+        "subl %1, %2                            \n\t"
+        "sarl $31, %2                           \n\t" //lps_mask
+        "subl %%ecx, %4                         \n\t" //RangeLPS - range
+        "andl %2, %4                            \n\t" //(RangeLPS - range)&lps_mask
+        "addl %%ecx, %4                         \n\t" //new range
+        "andl %2, %%ecx                         \n\t"
+        "subl %%ecx, %1                         \n\t"
+        "xorl %2, %0                            \n\t"
 #endif /* CMOV_IS_FAST */
 
 //eax:state ebx:low edx:mask esi:range
-        "movzbl "MANGLE(ff_h264_mlps_state)"+128(%%eax), %%ecx   \n\t"
-        "movb %%cl, (%1)                        \n\t"
+        "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %k3   \n\t"
 
-        "movl %%esi, %%edx                      \n\t"
+        "movl %4, %2                            \n\t"
 //eax:bit ebx:low edx:range esi:range
 
-        "shr $19, %%esi                         \n\t"
-        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
-        "shll %%cl, %%edx                       \n\t"
-        "movl %%edx, "RANGE    "(%2)            \n\t"
-        "shll %%cl, %%ebx                       \n\t"
-        "movl %%ebx, "LOW      "(%2)            \n\t"
-        "test %%bx, %%bx                        \n\t"
+        "shr $19, %4                            \n\t"
+        "movzbl " MANGLE(ff_h264_norm_shift) "(%4), %%ecx   \n\t"
+        "shll %%cl, %2                          \n\t"
+        "shll %%cl, %1                          \n\t"
+        "test %w1, %w1                          \n\t"
         " jnz 1f                                \n\t"
 
-        "movl "BYTE     "(%2), %%ecx            \n\t"
-        "movzwl (%%ecx), %%esi                  \n\t"
-        "bswap %%esi                            \n\t"
-        "shrl $15, %%esi                        \n\t"
-        "subl $0xFFFF, %%esi                    \n\t"
+        "movl %5, %%ecx                         \n\t"
+        "movzwl (%%ecx), %4                     \n\t"
+        "bswap %4                               \n\t"
+        "shrl $15, %4                           \n\t"
+        "subl $0xFFFF, %4                       \n\t"
         "addl $2, %%ecx                         \n\t"
-        "movl %%ecx, "BYTE    "(%2)             \n\t"
+        "movl %%ecx, %5                         \n\t"
 
-        "leal -1(%%ebx), %%ecx                  \n\t"
-        "xorl %%ebx, %%ecx                      \n\t"
+        "leal -1(%1), %%ecx                     \n\t"
+        "xorl %1, %%ecx                         \n\t"
         "shrl $17, %%ecx                        \n\t"
         "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
         "neg %%ecx                              \n\t"
         "add $7, %%ecx                          \n\t"
 
-        "shll %%cl , %%esi                      \n\t"
-        "addl %%esi, %%ebx                      \n\t"
-        "movl %%ebx, "LOW      "(%2)            \n\t"
+        "shll %%cl , %4                         \n\t"
+        "addl %4, %1                            \n\t"
         "1:                                     \n\t"
-        :"=&a"(bit)
-        :"r"(state), "r"(c)
-        : "%ecx", "%ebx", "%edx", "%esi", "memory"
+        :"=&r"(bit), "+r"(c->low), "+r"(c->range), "=&r"(*state), "=&r"(dummy), "+m"(c->bytestream)
+        :"0"((int)*state), "4"(c->range)
+        : "%ecx"
     );
     bit&=1;
 #endif /* BRANCHLESS_CABAC_DECODER */



More information about the ffmpeg-devel mailing list