svn commit: r197803 - in head/sys: amd64/include i386/include

Attilio Rao attilio at FreeBSD.org
Tue Oct 6 13:45:50 UTC 2009


Author: attilio
Date: Tue Oct  6 13:45:49 2009
New Revision: 197803
URL: http://svn.freebsd.org/changeset/base/197803

Log:
  Per their definition, atomic instructions used in conjuction with
  memory barriers should also ensure that the compiler doesn't reorder paths
  where they are used.  GCC, however, does that aggressively, even in
  presence of volatile operands.  The most reliable way GCC offers for avoid
  instructions reordering is clobbering "memory" even if that is
  theoretically an heavy-weight operation, flushing the content of all
  the registers and forcing reload of them (We could rely, however, on
  gcc DTRT by just understanding the purpose as this is a well-known
  pattern for many modern operating-systems).
  
  Not all our memory barriers, right now, clobber memory for GCC-like
  compilers. The most notable cases are IA32 and amd64 where the memory
  barrier are treacted the same as normal atomic instructions.
  Fix this by offering the possibility to implement atomic instructions
  with memory barriers separately from the normal version and implement
  the GCC-like specific one using memory clobbering.
  Thanks to Chris Lattner (@apple) for his discussion on llvm specifics.
  
  Reported by:	jhb
  Reviewed by:	jhb
  Tested by:	rdivacky, Giovanni Trematerra
  		<giovanni dot trematerra at gmail dot com>

Modified:
  head/sys/amd64/include/atomic.h
  head/sys/i386/include/atomic.h

Modified: head/sys/amd64/include/atomic.h
==============================================================================
--- head/sys/amd64/include/atomic.h	Tue Oct  6 10:19:20 2009	(r197802)
+++ head/sys/amd64/include/atomic.h	Tue Oct  6 13:45:49 2009	(r197803)
@@ -73,10 +73,13 @@
  */
 #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
-void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
+void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
+void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
 int	atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src);
 int	atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src);
+int	atomic_cmpset_barr_int(volatile u_int *dst, u_int exp, u_int src);
+int	atomic_cmpset_barr_long(volatile u_long *dst, u_long exp, u_long src);
 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
 u_long	atomic_fetchadd_long(volatile u_long *p, u_long v);
 
@@ -97,8 +100,9 @@ void		atomic_store_rel_##TYPE(volatile u
 #endif
 
 /*
- * The assembly is volatilized to demark potential before-and-after side
- * effects if an interrupt or SMP collision were to occur.
+ * The assembly is volatilized to avoid code chunk removal by the compiler.
+ * GCC aggressively reorders operations and memory clobbering is necessary
+ * in order to avoid that for memory barriers.
  */
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
 static __inline void					\
@@ -108,6 +112,15 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE
 	: "=m" (*p)					\
 	: CONS (V), "m" (*p));				\
 }							\
+							\
+static __inline void					\
+atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile(MPLOCKED OP			\
+	: "=m" (*p)					\
+	: CONS (V), "m" (*p)				\
+	: "memory");					\
+}							\
 struct __hack
 
 /*
@@ -160,6 +173,9 @@ atomic_cmpset_long(volatile u_long *dst,
 	return (res);
 }
 
+#define	atomic_cmpset_barr_int	atomic_cmpset_int
+#define	atomic_cmpset_barr_long	atomic_cmpset_long
+
 /*
  * Atomically add the value of v to the integer pointed to by p and return
  * the previous value of *p.
@@ -205,18 +221,23 @@ atomic_fetchadd_long(volatile u_long *p,
  * PentiumPro or higher, reads may pass writes, so for that case we have
  * to use a serializing instruction (i.e. with LOCK) to do the load in
  * SMP kernels.  For UP kernels, however, the cache of the single processor
- * is always consistent, so we don't need any memory barriers.
+ * is always consistent, so we only need to take care of compiler.
  */
 #define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
-	return (*p);					\
+	u_##TYPE tmp;					\
+							\
+	tmp = *p;					\
+	__asm __volatile ("" : : : "memory");		\
+	return (tmp);					\
 }							\
 							\
 static __inline void					\
 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
+	__asm __volatile ("" : : : "memory");		\
 	*p = v;						\
 }							\
 struct __hack
@@ -247,7 +268,8 @@ atomic_store_rel_##TYPE(volatile u_##TYP
 	__asm __volatile(SOP				\
 	: "=m" (*p),			/* 0 */		\
 	  "+r" (v)			/* 1 */		\
-	: "m" (*p));			/* 2 */		\
+	: "m" (*p)			/* 2 */		\
+	: "memory");					\
 }							\
 struct __hack
 
@@ -327,46 +349,45 @@ u_long	atomic_readandclear_long(volatile
 
 #endif /* __GNUCLIKE_ASM */
 
-/* Acquire and release variants are identical to the normal ones. */
-#define	atomic_set_acq_char		atomic_set_char
-#define	atomic_set_rel_char		atomic_set_char
-#define	atomic_clear_acq_char		atomic_clear_char
-#define	atomic_clear_rel_char		atomic_clear_char
-#define	atomic_add_acq_char		atomic_add_char
-#define	atomic_add_rel_char		atomic_add_char
-#define	atomic_subtract_acq_char	atomic_subtract_char
-#define	atomic_subtract_rel_char	atomic_subtract_char
-
-#define	atomic_set_acq_short		atomic_set_short
-#define	atomic_set_rel_short		atomic_set_short
-#define	atomic_clear_acq_short		atomic_clear_short
-#define	atomic_clear_rel_short		atomic_clear_short
-#define	atomic_add_acq_short		atomic_add_short
-#define	atomic_add_rel_short		atomic_add_short
-#define	atomic_subtract_acq_short	atomic_subtract_short
-#define	atomic_subtract_rel_short	atomic_subtract_short
-
-#define	atomic_set_acq_int		atomic_set_int
-#define	atomic_set_rel_int		atomic_set_int
-#define	atomic_clear_acq_int		atomic_clear_int
-#define	atomic_clear_rel_int		atomic_clear_int
-#define	atomic_add_acq_int		atomic_add_int
-#define	atomic_add_rel_int		atomic_add_int
-#define	atomic_subtract_acq_int		atomic_subtract_int
-#define	atomic_subtract_rel_int		atomic_subtract_int
-#define	atomic_cmpset_acq_int		atomic_cmpset_int
-#define	atomic_cmpset_rel_int		atomic_cmpset_int
-
-#define	atomic_set_acq_long		atomic_set_long
-#define	atomic_set_rel_long		atomic_set_long
-#define	atomic_clear_acq_long		atomic_clear_long
-#define	atomic_clear_rel_long		atomic_clear_long
-#define	atomic_add_acq_long		atomic_add_long
-#define	atomic_add_rel_long		atomic_add_long
-#define	atomic_subtract_acq_long	atomic_subtract_long
-#define	atomic_subtract_rel_long	atomic_subtract_long
-#define	atomic_cmpset_acq_long		atomic_cmpset_long
-#define	atomic_cmpset_rel_long		atomic_cmpset_long
+#define	atomic_set_acq_char		atomic_set_barr_char
+#define	atomic_set_rel_char		atomic_set_barr_char
+#define	atomic_clear_acq_char		atomic_clear_barr_char
+#define	atomic_clear_rel_char		atomic_clear_barr_char
+#define	atomic_add_acq_char		atomic_add_barr_char
+#define	atomic_add_rel_char		atomic_add_barr_char
+#define	atomic_subtract_acq_char	atomic_subtract_barr_char
+#define	atomic_subtract_rel_char	atomic_subtract_barr_char
+
+#define	atomic_set_acq_short		atomic_set_barr_short
+#define	atomic_set_rel_short		atomic_set_barr_short
+#define	atomic_clear_acq_short		atomic_clear_barr_short
+#define	atomic_clear_rel_short		atomic_clear_barr_short
+#define	atomic_add_acq_short		atomic_add_barr_short
+#define	atomic_add_rel_short		atomic_add_barr_short
+#define	atomic_subtract_acq_short	atomic_subtract_barr_short
+#define	atomic_subtract_rel_short	atomic_subtract_barr_short
+
+#define	atomic_set_acq_int		atomic_set_barr_int
+#define	atomic_set_rel_int		atomic_set_barr_int
+#define	atomic_clear_acq_int		atomic_clear_barr_int
+#define	atomic_clear_rel_int		atomic_clear_barr_int
+#define	atomic_add_acq_int		atomic_add_barr_int
+#define	atomic_add_rel_int		atomic_add_barr_int
+#define	atomic_subtract_acq_int		atomic_subtract_barr_int
+#define	atomic_subtract_rel_int		atomic_subtract_barr_int
+#define	atomic_cmpset_acq_int		atomic_cmpset_barr_int
+#define	atomic_cmpset_rel_int		atomic_cmpset_barr_int
+
+#define	atomic_set_acq_long		atomic_set_barr_long
+#define	atomic_set_rel_long		atomic_set_barr_long
+#define	atomic_clear_acq_long		atomic_clear_barr_long
+#define	atomic_clear_rel_long		atomic_clear_barr_long
+#define	atomic_add_acq_long		atomic_add_barr_long
+#define	atomic_add_rel_long		atomic_add_barr_long
+#define	atomic_subtract_acq_long	atomic_subtract_barr_long
+#define	atomic_subtract_rel_long	atomic_subtract_barr_long
+#define	atomic_cmpset_acq_long		atomic_cmpset_barr_long
+#define	atomic_cmpset_rel_long		atomic_cmpset_barr_long
 
 /* Operations on 8-bit bytes. */
 #define	atomic_set_8		atomic_set_char

Modified: head/sys/i386/include/atomic.h
==============================================================================
--- head/sys/i386/include/atomic.h	Tue Oct  6 10:19:20 2009	(r197802)
+++ head/sys/i386/include/atomic.h	Tue Oct  6 13:45:49 2009	(r197803)
@@ -73,9 +73,11 @@
  */
 #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
-void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
+void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
+void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
 int	atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src);
+int	atomic_cmpset_barr_int(volatile u_int *dst, u_int exp, u_int src);
 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
 
 #define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
@@ -95,8 +97,9 @@ void		atomic_store_rel_##TYPE(volatile u
 #endif
 
 /*
- * The assembly is volatilized to demark potential before-and-after side
- * effects if an interrupt or SMP collision were to occur.
+ * The assembly is volatilized to avoid code chunk removal by the compiler.
+ * GCC aggressively reorders operations and memory clobbering is necessary
+ * in order to avoid that for memory barriers.
  */
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
 static __inline void					\
@@ -106,6 +109,15 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE
 	: "=m" (*p)					\
 	: CONS (V), "m" (*p));				\
 }							\
+							\
+static __inline void					\
+atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile(MPLOCKED OP			\
+	: "=m" (*p)					\
+	: CONS (V), "m" (*p)				\
+	: "memory");					\
+}							\
 struct __hack
 
 /*
@@ -168,6 +180,8 @@ atomic_cmpset_int(volatile u_int *dst, u
 
 #endif /* CPU_DISABLE_CMPXCHG */
 
+#define	atomic_cmpset_barr_int		atomic_cmpset_int
+
 /*
  * Atomically add the value of v to the integer pointed to by p and return
  * the previous value of *p.
@@ -194,18 +208,23 @@ atomic_fetchadd_int(volatile u_int *p, u
  * PentiumPro or higher, reads may pass writes, so for that case we have
  * to use a serializing instruction (i.e. with LOCK) to do the load in
  * SMP kernels.  For UP kernels, however, the cache of the single processor
- * is always consistent, so we don't need any memory barriers.
+ * is always consistent, so we only need to take care of compiler.
  */
 #define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
-	return (*p);					\
+	u_##TYPE tmp;					\
+							\
+	tmp = *p;					\
+	__asm __volatile("" : : : "memory");		\
+	return (tmp);					\
 }							\
 							\
 static __inline void					\
 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
 {							\
+	__asm __volatile("" : : : "memory");		\
 	*p = v;						\
 }							\
 struct __hack
@@ -236,7 +255,8 @@ atomic_store_rel_##TYPE(volatile u_##TYP
 	__asm __volatile(SOP				\
 	: "=m" (*p),			/* 0 */		\
 	  "+r" (v)			/* 1 */		\
-	: "m" (*p));			/* 2 */		\
+	: "m" (*p)			/* 2 */		\
+	: "memory");					\
 }							\
 struct __hack
 
@@ -282,6 +302,14 @@ atomic_cmpset_long(volatile u_long *dst,
 	    (u_int)src));
 }
 
+static __inline int
+atomic_cmpset_barr_long(volatile u_long *dst, u_long exp, u_long src)
+{
+
+	return (atomic_cmpset_barr_int((volatile u_int *)dst, (u_int)exp,
+	    (u_int)src));
+}
+
 static __inline u_long
 atomic_fetchadd_long(volatile u_long *p, u_long v)
 {
@@ -331,46 +359,45 @@ u_long	atomic_readandclear_long(volatile
 
 #endif /* __GNUCLIKE_ASM */
 
-/* Acquire and release variants are identical to the normal ones. */
-#define	atomic_set_acq_char		atomic_set_char
-#define	atomic_set_rel_char		atomic_set_char
-#define	atomic_clear_acq_char		atomic_clear_char
-#define	atomic_clear_rel_char		atomic_clear_char
-#define	atomic_add_acq_char		atomic_add_char
-#define	atomic_add_rel_char		atomic_add_char
-#define	atomic_subtract_acq_char	atomic_subtract_char
-#define	atomic_subtract_rel_char	atomic_subtract_char
-
-#define	atomic_set_acq_short		atomic_set_short
-#define	atomic_set_rel_short		atomic_set_short
-#define	atomic_clear_acq_short		atomic_clear_short
-#define	atomic_clear_rel_short		atomic_clear_short
-#define	atomic_add_acq_short		atomic_add_short
-#define	atomic_add_rel_short		atomic_add_short
-#define	atomic_subtract_acq_short	atomic_subtract_short
-#define	atomic_subtract_rel_short	atomic_subtract_short
-
-#define	atomic_set_acq_int		atomic_set_int
-#define	atomic_set_rel_int		atomic_set_int
-#define	atomic_clear_acq_int		atomic_clear_int
-#define	atomic_clear_rel_int		atomic_clear_int
-#define	atomic_add_acq_int		atomic_add_int
-#define	atomic_add_rel_int		atomic_add_int
-#define	atomic_subtract_acq_int		atomic_subtract_int
-#define	atomic_subtract_rel_int		atomic_subtract_int
-#define	atomic_cmpset_acq_int		atomic_cmpset_int
-#define	atomic_cmpset_rel_int		atomic_cmpset_int
-
-#define	atomic_set_acq_long		atomic_set_long
-#define	atomic_set_rel_long		atomic_set_long
-#define	atomic_clear_acq_long		atomic_clear_long
-#define	atomic_clear_rel_long		atomic_clear_long
-#define	atomic_add_acq_long		atomic_add_long
-#define	atomic_add_rel_long		atomic_add_long
-#define	atomic_subtract_acq_long	atomic_subtract_long
-#define	atomic_subtract_rel_long	atomic_subtract_long
-#define	atomic_cmpset_acq_long		atomic_cmpset_long
-#define	atomic_cmpset_rel_long		atomic_cmpset_long
+#define	atomic_set_acq_char		atomic_set_barr_char
+#define	atomic_set_rel_char		atomic_set_barr_char
+#define	atomic_clear_acq_char		atomic_clear_barr_char
+#define	atomic_clear_rel_char		atomic_clear_barr_char
+#define	atomic_add_acq_char		atomic_add_barr_char
+#define	atomic_add_rel_char		atomic_add_barr_char
+#define	atomic_subtract_acq_char	atomic_subtract_barr_char
+#define	atomic_subtract_rel_char	atomic_subtract_barr_char
+
+#define	atomic_set_acq_short		atomic_set_barr_short
+#define	atomic_set_rel_short		atomic_set_barr_short
+#define	atomic_clear_acq_short		atomic_clear_barr_short
+#define	atomic_clear_rel_short		atomic_clear_barr_short
+#define	atomic_add_acq_short		atomic_add_barr_short
+#define	atomic_add_rel_short		atomic_add_barr_short
+#define	atomic_subtract_acq_short	atomic_subtract_barr_short
+#define	atomic_subtract_rel_short	atomic_subtract_barr_short
+
+#define	atomic_set_acq_int		atomic_set_barr_int
+#define	atomic_set_rel_int		atomic_set_barr_int
+#define	atomic_clear_acq_int		atomic_clear_barr_int
+#define	atomic_clear_rel_int		atomic_clear_barr_int
+#define	atomic_add_acq_int		atomic_add_barr_int
+#define	atomic_add_rel_int		atomic_add_barr_int
+#define	atomic_subtract_acq_int		atomic_subtract_barr_int
+#define	atomic_subtract_rel_int		atomic_subtract_barr_int
+#define	atomic_cmpset_acq_int		atomic_cmpset_barr_int
+#define	atomic_cmpset_rel_int		atomic_cmpset_barr_int
+
+#define	atomic_set_acq_long		atomic_set_barr_long
+#define	atomic_set_rel_long		atomic_set_barr_long
+#define	atomic_clear_acq_long		atomic_clear_barr_long
+#define	atomic_clear_rel_long		atomic_clear_barr_long
+#define	atomic_add_acq_long		atomic_add_barr_long
+#define	atomic_add_rel_long		atomic_add_barr_long
+#define	atomic_subtract_acq_long	atomic_subtract_barr_long
+#define	atomic_subtract_rel_long	atomic_subtract_barr_long
+#define	atomic_cmpset_acq_long		atomic_cmpset_barr_long
+#define	atomic_cmpset_rel_long		atomic_cmpset_barr_long
 
 /* Operations on 8-bit bytes. */
 #define	atomic_set_8		atomic_set_char


More information about the svn-src-all mailing list