1
0
Fork 0
mirror of https://github.com/yuzu-emu/unicorn.git synced 2025-01-13 14:45:28 +00:00

atomic: base mb_read/mb_set on load-acquire and store-release

This introduces load-acquire and store-release operations in QEMU.
For now, just use them as an implementation detail of atomic_mb_read
and atomic_mb_set.

Since docs/atomics.txt documents that atomic_mb_read only synchronizes
with an atomic_mb_set of the same variable, we can use the new implementation
everywhere instead of seq-cst loads and stores.

Backports commit 803cf26a9e019b5d2256a8edeb22e3538c4f3261 from qemu
This commit is contained in:
Paolo Bonzini 2018-02-26 10:02:45 -05:00 committed by Lioncash
parent fd7ef4c184
commit 8b239bd48b
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7

View file

@ -100,45 +100,18 @@ void _ReadWriteBarrier(void);
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
/* atomic_mb_read/set semantics map Java volatile variables. They are
* less expensive on some platforms (notably POWER & ARMv7) than fully
* sequentially consistent operations.
*
* As long as they are used as paired operations they are safe to
* use. See docs/atomic.txt for more discussion.
*/
#if defined(_ARCH_PPC)
#define atomic_mb_read(ptr) \
#define atomic_load_acquire(ptr) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
typeof(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_RELAXED); \
smp_mb_acquire(); \
__atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \
_val; \
})
#define atomic_mb_set(ptr, i) do { \
#define atomic_store_release(ptr, i) do { \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
smp_mb_release(); \
__atomic_store_n(ptr, i, __ATOMIC_RELAXED); \
smp_mb(); \
__atomic_store_n(ptr, i, __ATOMIC_RELEASE); \
} while(0)
#else
#define atomic_mb_read(ptr) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
typeof(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_SEQ_CST); \
_val; \
})
#define atomic_mb_set(ptr, i) do { \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
__atomic_store_n(ptr, i, __ATOMIC_SEQ_CST); \
} while(0)
#endif
/* All the remaining operations are fully sequentially consistent */
@ -222,11 +195,6 @@ void _ReadWriteBarrier(void);
*/
#define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i))
/*
* Load/store with Java volatile semantics.
*/
#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
#elif defined(_ARCH_PPC)
/*
@ -317,38 +285,15 @@ void _ReadWriteBarrier(void);
atomic_set(ptr, i); \
} while (0)
/* These have the same semantics as Java volatile variables.
* See http://gee.cs.oswego.edu/dl/jmm/cookbook.html:
* "1. Issue a StoreStore barrier (wmb) before each volatile store."
* 2. Issue a StoreLoad barrier after each volatile store.
* Note that you could instead issue one before each volatile load, but
* this would be slower for typical programs using volatiles in which
* reads greatly outnumber writes. Alternatively, if available, you
* can implement volatile store as an atomic instruction (for example
* XCHG on x86) and omit the barrier. This may be more efficient if
* atomic instructions are cheaper than StoreLoad barriers.
* 3. Issue LoadLoad and LoadStore barriers after each volatile load."
*
* If you prefer to think in terms of "pairing" of memory barriers,
* an atomic_mb_read pairs with an atomic_mb_set.
*
* And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq,
* while an atomic_mb_set is a st.rel followed by a memory barrier.
*
* These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST
* (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough.
* Just always use the barriers manually by the rules above.
*/
#define atomic_mb_read(ptr) ({ \
#define atomic_load_acquire(ptr) ({ \
typeof(*ptr) _val = atomic_read(ptr); \
smp_mb_acquire(); \
_val; \
})
#define atomic_mb_set(ptr, i) do { \
#define atomic_store_release(ptr, i) do { \
smp_mb_release(); \
atomic_set(ptr, i); \
smp_mb(); \
} while (0)
#ifndef atomic_xchg
@ -402,4 +347,31 @@ void _ReadWriteBarrier(void);
#define smp_rmb() smp_mb_acquire()
#endif
/* This is more efficient than a store plus a fence. */
#if !defined(__SANITIZE_THREAD__)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
#endif
#endif
/* atomic_mb_read/set semantics map Java volatile variables. They are
* less expensive on some platforms (notably POWER) than fully
* sequentially consistent operations.
*
* As long as they are used as paired operations they are safe to
* use. See docs/atomic.txt for more discussion.
*/
#ifndef atomic_mb_read
#define atomic_mb_read(ptr) \
atomic_load_acquire(ptr)
#endif
#ifndef atomic_mb_set
#define atomic_mb_set(ptr, i) do { \
atomic_store_release(ptr, i); \
smp_mb(); \
} while(0)
#endif
#endif /* QEMU_ATOMIC_H */