[FFmpeg-devel] [PATCH 2/2] libavutil: add bmi2 optimized av_zhb
James Almer
jamrial at gmail.com
Tue Mar 17 05:08:06 CET 2015
Signed-off-by: James Almer <jamrial at gmail.com>
---
GCC apparently can't generate a bzhi instruction on its own from the c version, so
here's a custom implementation.
Before:
gcc -O3
<av_zhb_c>:
0: 89 f1 mov ecx,esi
2: ba 01 00 00 00 mov edx,0x1
7: d3 e2 shl edx,cl
9: 83 ea 01 sub edx,0x1
c: 89 d0 mov eax,edx
e: 21 f8 and eax,edi
10: c3 ret
gcc -mbmi2 -O3
<av_zhb_c>:
0: ba 01 00 00 00 mov edx,0x1
5: c4 e2 49 f7 d2 shlx edx,edx,esi
a: 8d 42 ff lea eax,[rdx-0x1]
d: 21 f8 and eax,edi
f: c3 ret
After:
gcc -mbmi2 -O3
<av_zhb_bmi2>:
0: c4 e2 48 f5 c7 bzhi eax,edi,esi
5: c3 ret
The non-bmi2 example is a bit bloated with movs to have values in ecx (needed for
shl) and eax (ret value) since, unlike the actual function, it was not inlined.
Still, best case scenario is mov + shl + sub/dec/lea + and versus a single bzhi
when p is not a constant.
libavutil/x86/intmath.h | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index 7aa6bc4..f19ef64 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h
@@ -24,15 +24,36 @@
#include <stdint.h>
#include "config.h"
+#if defined(__GNUC__)
+
/* Our generic version of av_popcount is faster than GCC's built-in on
* CPUs that don't support the popcnt instruction.
*/
-#if defined(__GNUC__) && defined(__POPCNT__)
+#if defined(__POPCNT__)
+
#define av_popcount __builtin_popcount
#if ARCH_X86_64
#define av_popcount64 __builtin_popcountll
#endif
-#endif /* defined(__GNUC__) && defined(__POPCNT__) */
+#endif /* __POPCNT__ */
+
+#if defined(__BMI2__)
+
+#define av_zhb av_zhb_bmi2
+static av_always_inline av_const unsigned av_zhb_bmi2(unsigned a, unsigned p)
+{
+ if (av_builtin_constant_p(p))
+ return a & ((1 << p) - 1);
+ else {
+ unsigned x;
+ __asm__ ("bzhi %2, %1, %0 \n\t" : "=r"(x) : "rm"(a), "r"(p));
+ return x;
+ }
+}
+
+#endif /* __BMI2__ */
+
+#endif /* __GNUC__ */
#endif /* AVUTIL_X86_INTMATH_H */
--
2.3.2
More information about the ffmpeg-devel
mailing list