[FFmpeg-devel] [PATCH] x86: use new gcc atomic built-ins if available

James Almer jamrial at gmail.com
Sun Oct 26 02:32:57 CET 2014


__sync built-ins are considered legacy and will be deprecated.
These new memory model aware built-ins have been available since GCC 4.7.0

Signed-off-by: James Almer <jamrial at gmail.com>
---
https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/_005f_005fatomic-Builtins.html
This is an RFC for a couple reasons.

The first is the memory model parameter. The documentation mentions that the 
__sync functions match the behavoir of the new __atomic functions when the 
latter use the full barrier model (__ATOMIC_SEQ_CST), so i went with it for 
consistency's sake. It may however be a good idea to check if any of the more 
relaxed models available for these new functions can be used instead.
It's worth mentioning that when i tested, gcc-tsan liked the __atomic load and 
store functions a lot more than __sync_synchronize(), regardless of memory 
model.

The second reason is __atomic_compare_exchange_n(), and how it differs from
__sync_val_compare_and_swap().
While the latter returns *ptr as it was before the operation, the former
doesn't and instead copies *ptr to oldval if the result of the comparison is 
false. This means that returning oldval will match the old behavoir without 
having to change the wrapper.
A disassemble example from libavutil/buffer.o however hints that the __atomic
function may be slower because of it writting oldval.

__sync_val_compare_and_swap:
 8e3:	48 89 d8             	mov    rax,rbx
 8e6:	f0 48 0f b1 16       	lock cmpxchg QWORD PTR [rsi],rdx
 8eb:	48 85 c0             	test   rax,rax

__atomic_compare_exchange_n:
 8f0:	48 8d 4c 24 20       	lea    rcx,[rsp+0x20]
 [...]
 90c:	48 89 d8             	mov    rax,rbx
 90f:	48 89 5c 24 20       	mov    QWORD PTR [rsp+0x20],rbx
 914:	f0 48 0f b1 16       	lock cmpxchg QWORD PTR [rsi],rdx
 919:	74 03                	je     91e <av_buffer_pool_get+0x3e>
 91b:	48 89 01             	mov    QWORD PTR [rcx],rax
 91e:	48 8b 44 24 20       	mov    rax,QWORD PTR [rsp+0x20]
 923:	48 85 c0             	test   rax,rax

So the question is, do we keep using __sync_val_compare_and_swap as long as 
gcc offers it (Which is probably a very long time), or immediately switch to 
__atomic_compare_exchange_n if available?

 configure              |  4 +++-
 libavutil/atomic_gcc.h | 17 ++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 3eb1aa0..7697ed8 100755
--- a/configure
+++ b/configure
@@ -1596,6 +1596,7 @@ ARCH_FEATURES="
 
 BUILTIN_LIST="
     atomic_cas_ptr
+    atomic_compare_exchange
     machine_rw_barrier
     MemoryBarrier
     mm_empty
@@ -2021,7 +2022,7 @@ simd_align_16_if_any="altivec neon sse"
 symver_if_any="symver_asm_label symver_gnu_asm"
 
 # threading support
-atomics_gcc_if="sync_val_compare_and_swap"
+atomics_gcc_if_any="sync_val_compare_and_swap atomic_compare_exchange"
 atomics_suncc_if="atomic_cas_ptr machine_rw_barrier"
 atomics_win32_if="MemoryBarrier"
 atomics_native_if_any="$ATOMICS_LIST"
@@ -4673,6 +4674,7 @@ if ! disabled network; then
 fi
 
 check_builtin atomic_cas_ptr atomic.h "void **ptr; void *oldval, *newval; atomic_cas_ptr(ptr, oldval, newval)"
+check_builtin atomic_compare_exchange "" "int *ptr, *oldval; int newval; __atomic_compare_exchange_n(ptr, oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)"
 check_builtin machine_rw_barrier mbarrier.h "__machine_rw_barrier()"
 check_builtin MemoryBarrier windows.h "MemoryBarrier()"
 check_builtin sarestart signal.h "SA_RESTART"
diff --git a/libavutil/atomic_gcc.h b/libavutil/atomic_gcc.h
index 2bb43c3..4b0e425 100644
--- a/libavutil/atomic_gcc.h
+++ b/libavutil/atomic_gcc.h
@@ -28,28 +28,43 @@
 #define avpriv_atomic_int_get atomic_int_get_gcc
 static inline int atomic_int_get_gcc(volatile int *ptr)
 {
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+    return __atomic_load_n(ptr, __ATOMIC_SEQ_CST);
+#else
     __sync_synchronize();
     return *ptr;
+#endif
 }
 
 #define avpriv_atomic_int_set atomic_int_set_gcc
 static inline void atomic_int_set_gcc(volatile int *ptr, int val)
 {
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+    __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST);
+#else
     *ptr = val;
     __sync_synchronize();
+#endif
 }
 
 #define avpriv_atomic_int_add_and_fetch atomic_int_add_and_fetch_gcc
 static inline int atomic_int_add_and_fetch_gcc(volatile int *ptr, int inc)
 {
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+    return __atomic_add_fetch(ptr, inc, __ATOMIC_SEQ_CST);
+#else
     return __sync_add_and_fetch(ptr, inc);
+#endif
 }
 
 #define avpriv_atomic_ptr_cas atomic_ptr_cas_gcc
 static inline void *atomic_ptr_cas_gcc(void * volatile *ptr,
                                        void *oldval, void *newval)
 {
-#ifdef __ARMCC_VERSION
+#if HAVE_ATOMIC_COMPARE_EXCHANGE
+    __atomic_compare_exchange_n(ptr, &oldval, newval, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return oldval;
+#elif defined(__ARMCC_VERSION)
     // armcc will throw an error if ptr is not an integer type
     volatile uintptr_t *tmp = (volatile uintptr_t*)ptr;
     return (void*)__sync_val_compare_and_swap(tmp, oldval, newval);
-- 
2.0.4



More information about the ffmpeg-devel mailing list