[FFmpeg-devel] [PATCH 2/2] libavutil: add bmi2 optimized av_zhb

James Almer jamrial at gmail.com
Tue Mar 17 05:08:06 CET 2015


Signed-off-by: James Almer <jamrial at gmail.com>
---
GCC apparently can't generate a bzhi instruction on its own from the c version, so 
here's a custom implementation.

Before:

gcc -O3
<av_zhb_c>:
   0:   89 f1                   mov    ecx,esi
   2:   ba 01 00 00 00          mov    edx,0x1
   7:   d3 e2                   shl    edx,cl
   9:   83 ea 01                sub    edx,0x1
   c:   89 d0                   mov    eax,edx
   e:   21 f8                   and    eax,edi
  10:   c3                      ret

gcc -mbmi2 -O3
<av_zhb_c>:
   0:   ba 01 00 00 00          mov    edx,0x1
   5:   c4 e2 49 f7 d2          shlx   edx,edx,esi
   a:   8d 42 ff                lea    eax,[rdx-0x1]
   d:   21 f8                   and    eax,edi
   f:   c3                      ret

After:

gcc -mbmi2 -O3
<av_zhb_bmi2>:
   0:   c4 e2 48 f5 c7          bzhi   eax,edi,esi
   5:   c3                      ret

The non-bmi2 example is a bit bloated with movs to have values in ecx (needed for 
shl) and eax (ret value) since, unlike the actual function, it was not inlined.
Still, best case scenario is mov + shl + sub/dec/lea + and versus a single bzhi 
when p is not a constant.

 libavutil/x86/intmath.h | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index 7aa6bc4..f19ef64 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h
@@ -24,15 +24,36 @@
 #include <stdint.h>
 #include "config.h"
 
+#if defined(__GNUC__)
+
 /* Our generic version of av_popcount is faster than GCC's built-in on
  * CPUs that don't support the popcnt instruction.
  */
-#if defined(__GNUC__) && defined(__POPCNT__)
+#if defined(__POPCNT__)
+
     #define av_popcount   __builtin_popcount
 #if ARCH_X86_64
     #define av_popcount64 __builtin_popcountll
 #endif
 
-#endif /* defined(__GNUC__) && defined(__POPCNT__) */
+#endif /* __POPCNT__ */
+
+#if defined(__BMI2__)
+
+#define av_zhb av_zhb_bmi2
+static av_always_inline av_const unsigned av_zhb_bmi2(unsigned a, unsigned p)
+{
+    if (av_builtin_constant_p(p))
+        return a & ((1 << p) - 1);
+    else {
+        unsigned x;
+        __asm__ ("bzhi %2, %1, %0 \n\t" : "=r"(x) : "rm"(a), "r"(p));
+        return x;
+    }
+}
+
+#endif /* __BMI2__ */
+
+#endif /* __GNUC__ */
 
 #endif /* AVUTIL_X86_INTMATH_H */
-- 
2.3.2



More information about the ffmpeg-devel mailing list