svn commit: r284901 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include
Konstantin Belousov
kib at FreeBSD.org
Sun Jun 28 05:04:10 UTC 2015
Author: kib
Date: Sun Jun 28 05:04:08 2015
New Revision: 284901
URL: https://svnweb.freebsd.org/changeset/base/284901
Log:
Remove unneeded data dependency, currently imposed by
atomic_load_acq(9), on it source, for x86.
Right now, atomic_load_acq() on x86 is sequentially consistent with
other atomics, code ensures this by doing store/load barrier by
performing locked nop on the source. Provide separate primitive
__storeload_barrier(), which is implemented as the locked nop done on
a cpu-private variable, and put __storeload_barrier() before load, to
keep seq_cst semantic but avoid introducing false dependency on the
no-modification of the source for its later use.
Note that seq_cst property of x86 atomic_load_acq() is not documented
and not carried by atomics implementations on other architectures,
although some kernel code relies on the behaviour. This commit does
not intend to change this.
Reviewed by: alc
Discussed with: bde
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Modified:
head/sys/amd64/amd64/atomic.c
head/sys/amd64/amd64/vm_machdep.c
head/sys/amd64/include/atomic.h
head/sys/i386/i386/vm_machdep.c
head/sys/i386/include/atomic.h
Modified: head/sys/amd64/amd64/atomic.c
==============================================================================
--- head/sys/amd64/amd64/atomic.c Sun Jun 28 03:22:26 2015 (r284900)
+++ head/sys/amd64/amd64/atomic.c Sun Jun 28 05:04:08 2015 (r284901)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#undef ATOMIC_ASM
/* Make atomic.h generate public functions */
+static __inline void __storeload_barrier(void);
#define WANT_FUNCTIONS
#define static
#undef __inline
Modified: head/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- head/sys/amd64/amd64/vm_machdep.c Sun Jun 28 03:22:26 2015 (r284900)
+++ head/sys/amd64/amd64/vm_machdep.c Sun Jun 28 05:04:08 2015 (r284901)
@@ -93,6 +93,8 @@ _Static_assert(OFFSETOF_CURTHREAD == off
"OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
"OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
+_Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
+ "OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf.");
struct savefpu *
get_pcb_user_save_td(struct thread *td)
Modified: head/sys/amd64/include/atomic.h
==============================================================================
--- head/sys/amd64/include/atomic.h Sun Jun 28 03:22:26 2015 (r284900)
+++ head/sys/amd64/include/atomic.h Sun Jun 28 05:04:08 2015 (r284901)
@@ -85,7 +85,7 @@ u_long atomic_fetchadd_long(volatile u_l
int atomic_testandset_int(volatile u_int *p, u_int v);
int atomic_testandset_long(volatile u_long *p, u_int v);
-#define ATOMIC_LOAD(TYPE, LOP) \
+#define ATOMIC_LOAD(TYPE) \
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
@@ -245,53 +245,79 @@ atomic_testandset_long(volatile u_long *
* We assume that a = b will do atomic loads and stores. Due to the
* IA32 memory model, a simple store guarantees release semantics.
*
- * However, loads may pass stores, so for atomic_load_acq we have to
- * ensure a Store/Load barrier to do the load in SMP kernels. We use
- * "lock cmpxchg" as recommended by the AMD Software Optimization
- * Guide, and not mfence. For UP kernels, however, the cache of the
- * single processor is always consistent, so we only need to take care
- * of the compiler.
+ * However, a load may pass a store if they are performed on distinct
+ * addresses, so for atomic_load_acq we introduce a Store/Load barrier
+ * before the load in SMP kernels. We use "lock addl $0,mem", as
+ * recommended by the AMD Software Optimization Guide, and not mfence.
+ * In the kernel, we use a private per-cpu cache line as the target
+ * for the locked addition, to avoid introducing false data
+ * dependencies. In userspace, a word in the red zone on the stack
+ * (-8(%rsp)) is utilized.
+ *
+ * For UP kernels, however, the memory of the single processor is
+ * always consistent, so we only need to stop the compiler from
+ * reordering accesses in a way that violates the semantics of acquire
+ * and release.
*/
-#define ATOMIC_STORE(TYPE) \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __compiler_membar(); \
- *p = v; \
-} \
-struct __hack
-#if defined(_KERNEL) && !defined(SMP)
+#if defined(_KERNEL)
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE tmp; \
- \
- tmp = *p; \
- __compiler_membar(); \
- return (tmp); \
-} \
-struct __hack
+/*
+ * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
+ *
+ * The open-coded number is used instead of the symbolic expression to
+ * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
+ * An assertion in amd64/vm_machdep.c ensures that the value is correct.
+ */
+#define OFFSETOF_MONITORBUF 0x180
-#else /* !(_KERNEL && !SMP) */
+#if defined(SMP)
+static __inline void
+__storeload_barrier(void)
+{
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE res; \
- \
- __asm __volatile(MPLOCKED LOP \
- : "=a" (res), /* 0 */ \
- "+m" (*p) /* 1 */ \
- : : "memory", "cc"); \
- return (res); \
-} \
+ __asm __volatile("lock; addl $0,%%gs:%0"
+ : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
+}
+#else /* _KERNEL && UP */
+static __inline void
+__storeload_barrier(void)
+{
+
+ __compiler_membar();
+}
+#endif /* SMP */
+#else /* !_KERNEL */
+static __inline void
+__storeload_barrier(void)
+{
+
+ __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc");
+}
+#endif /* _KERNEL*/
+
+#define ATOMIC_LOAD(TYPE) \
+static __inline u_##TYPE \
+atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
+{ \
+ u_##TYPE res; \
+ \
+ __storeload_barrier(); \
+ res = *p; \
+ __compiler_membar(); \
+ return (res); \
+} \
struct __hack
-#endif /* _KERNEL && !SMP */
+#define ATOMIC_STORE(TYPE) \
+static __inline void \
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
+{ \
+ \
+ __compiler_membar(); \
+ *p = v; \
+} \
+struct __hack
#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
@@ -315,20 +341,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0"
ATOMIC_ASM(add, long, "addq %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v);
-ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
-ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
-ATOMIC_LOAD(int, "cmpxchgl %0,%1");
-ATOMIC_LOAD(long, "cmpxchgq %0,%1");
-
-ATOMIC_STORE(char);
-ATOMIC_STORE(short);
-ATOMIC_STORE(int);
-ATOMIC_STORE(long);
+#define ATOMIC_LOADSTORE(TYPE) \
+ ATOMIC_LOAD(TYPE); \
+ ATOMIC_STORE(TYPE)
+
+ATOMIC_LOADSTORE(char);
+ATOMIC_LOADSTORE(short);
+ATOMIC_LOADSTORE(int);
+ATOMIC_LOADSTORE(long);
#undef ATOMIC_ASM
#undef ATOMIC_LOAD
#undef ATOMIC_STORE
-
+#undef ATOMIC_LOADSTORE
#ifndef WANT_FUNCTIONS
/* Read the current value and store a new value in the destination. */
Modified: head/sys/i386/i386/vm_machdep.c
==============================================================================
--- head/sys/i386/i386/vm_machdep.c Sun Jun 28 03:22:26 2015 (r284900)
+++ head/sys/i386/i386/vm_machdep.c Sun Jun 28 05:04:08 2015 (r284901)
@@ -111,6 +111,8 @@ _Static_assert(OFFSETOF_CURTHREAD == off
"OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
"OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
+_Static_assert(OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf),
+ "OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf.");
static void cpu_reset_real(void);
#ifdef SMP
Modified: head/sys/i386/include/atomic.h
==============================================================================
--- head/sys/i386/include/atomic.h Sun Jun 28 03:22:26 2015 (r284900)
+++ head/sys/i386/include/atomic.h Sun Jun 28 05:04:08 2015 (r284901)
@@ -87,7 +87,7 @@ int atomic_cmpset_int(volatile u_int *ds
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
int atomic_testandset_int(volatile u_int *p, u_int v);
-#define ATOMIC_LOAD(TYPE, LOP) \
+#define ATOMIC_LOAD(TYPE) \
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
@@ -228,53 +228,78 @@ atomic_testandset_int(volatile u_int *p,
* We assume that a = b will do atomic loads and stores. Due to the
* IA32 memory model, a simple store guarantees release semantics.
*
- * However, loads may pass stores, so for atomic_load_acq we have to
- * ensure a Store/Load barrier to do the load in SMP kernels. We use
- * "lock cmpxchg" as recommended by the AMD Software Optimization
- * Guide, and not mfence. For UP kernels, however, the cache of the
- * single processor is always consistent, so we only need to take care
- * of the compiler.
+ * However, a load may pass a store if they are performed on distinct
+ * addresses, so for atomic_load_acq we introduce a Store/Load barrier
+ * before the load in SMP kernels. We use "lock addl $0,mem", as
+ * recommended by the AMD Software Optimization Guide, and not mfence.
+ * In the kernel, we use a private per-cpu cache line as the target
+ * for the locked addition, to avoid introducing false data
+ * dependencies. In userspace, a word at the top of the stack is
+ * utilized.
+ *
+ * For UP kernels, however, the memory of the single processor is
+ * always consistent, so we only need to stop the compiler from
+ * reordering accesses in a way that violates the semantics of acquire
+ * and release.
*/
-#define ATOMIC_STORE(TYPE) \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __compiler_membar(); \
- *p = v; \
-} \
-struct __hack
+#if defined(_KERNEL)
-#if defined(_KERNEL) && !defined(SMP)
+/*
+ * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
+ *
+ * The open-coded number is used instead of the symbolic expression to
+ * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
+ * An assertion in i386/vm_machdep.c ensures that the value is correct.
+ */
+#define OFFSETOF_MONITORBUF 0x180
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE tmp; \
- \
- tmp = *p; \
- __compiler_membar(); \
- return (tmp); \
-} \
-struct __hack
+#if defined(SMP)
+static __inline void
+__storeload_barrier(void)
+{
-#else /* !(_KERNEL && !SMP) */
+ __asm __volatile("lock; addl $0,%%fs:%0"
+ : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
+}
+#else /* _KERNEL && UP */
+static __inline void
+__storeload_barrier(void)
+{
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE res; \
- \
- __asm __volatile(MPLOCKED LOP \
- : "=a" (res), /* 0 */ \
- "+m" (*p) /* 1 */ \
- : : "memory", "cc"); \
- return (res); \
-} \
+ __compiler_membar();
+}
+#endif /* SMP */
+#else /* !_KERNEL */
+static __inline void
+__storeload_barrier(void)
+{
+
+ __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
+}
+#endif /* _KERNEL*/
+
+#define ATOMIC_LOAD(TYPE) \
+static __inline u_##TYPE \
+atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
+{ \
+ u_##TYPE res; \
+ \
+ __storeload_barrier(); \
+ res = *p; \
+ __compiler_membar(); \
+ return (res); \
+} \
struct __hack
-#endif /* _KERNEL && !SMP */
+#define ATOMIC_STORE(TYPE) \
+static __inline void \
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
+{ \
+ \
+ __compiler_membar(); \
+ *p = v; \
+} \
+struct __hack
#ifdef _KERNEL
@@ -511,19 +536,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0"
ATOMIC_ASM(add, long, "addl %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v);
-ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
-ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
-ATOMIC_LOAD(int, "cmpxchgl %0,%1");
-ATOMIC_LOAD(long, "cmpxchgl %0,%1");
-
-ATOMIC_STORE(char);
-ATOMIC_STORE(short);
-ATOMIC_STORE(int);
-ATOMIC_STORE(long);
+#define ATOMIC_LOADSTORE(TYPE) \
+ ATOMIC_LOAD(TYPE); \
+ ATOMIC_STORE(TYPE)
+
+ATOMIC_LOADSTORE(char);
+ATOMIC_LOADSTORE(short);
+ATOMIC_LOADSTORE(int);
+ATOMIC_LOADSTORE(long);
#undef ATOMIC_ASM
#undef ATOMIC_LOAD
#undef ATOMIC_STORE
+#undef ATOMIC_LOADSTORE
#ifndef WANT_FUNCTIONS
More information about the svn-src-all
mailing list