1 files changed, 44 insertions, 184 deletions
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index b865e317a14f..49ff172a72b9 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -9,131 +9,26 @@
 #define __ASM_BARRIER_H
 
 #include <asm/addrspace.h>
+#include <asm/sync.h>
 
-/*
- * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
- * These values are used with the sync instruction to perform memory barriers.
- * Types of ordering guarantees available through the SYNC instruction:
- * - Completion Barriers
- * - Ordering Barriers
- * As compared to the completion barrier, the ordering barrier is a
- * lighter-weight operation as it does not require the specified instructions
- * before the SYNC to be already completed. Instead it only requires that those
- * specified instructions which are subsequent to the SYNC in the instruction
- * stream are never re-ordered for processing ahead of the specified
- * instructions which are before the SYNC in the instruction stream.
- * This potentially reduces how many cycles the barrier instruction must stall
- * before it completes.
- * Implementations that do not use any of the non-zero values of stype to define
- * different barriers, such as ordering barriers, must make those stype values
- * act the same as stype zero.
- */
-
-/*
- * Completion barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must be
- *   already globally performed before any synchronizable specified memory
- *   instructions that occur after the SYNC are allowed to be performed, with
- *   respect to any other processor or coherent I/O module.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- *
- * - A stype value of zero will always be defined such that it performs the most
- *   complete set of synchronization operations that are defined.This means
- *   stype zero always does a completion barrier that affects both loads and
- *   stores preceding the SYNC instruction and both loads and stores that are
- *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
- *   by the architecture or specific implementations to perform synchronization
- *   behaviors that are less complete than that of stype zero. If an
- *   implementation does not use one of these non-zero values to define a
- *   different synchronization behavior, then that non-zero value of stype must
- *   act the same as stype zero completion barrier. This allows software written
- *   for an implementation with a lighter-weight barrier to work on another
- *   implementation which only implements the stype zero completion barrier.
- *
- * - A completion barrier is required, potentially in conjunction with SSNOP (in
- *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
- *   to guarantee that memory reference results are visible across operating
- *   mode changes. For example, a completion barrier is required on some
- *   implementations on entry to and exit from Debug Mode to guarantee that
- *   memory effects are handled correctly.
- */
-
-/*
- * stype 0 - A completion barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: Loads, Stores
- */
-#define STYPE_SYNC 0x0
-
-/*
- * Ordering barriers:
- * - Every synchronizable specified memory instruction (loads or stores or both)
- *   that occurs in the instruction stream before the SYNC instruction must
- *   reach a stage in the load/store datapath after which no instruction
- *   re-ordering is possible before any synchronizable specified memory
- *   instruction which occurs after the SYNC instruction in the instruction
- *   stream reaches the same stage in the load/store datapath.
- *
- * - If any memory instruction before the SYNC instruction in program order,
- *   generates a memory request to the external memory and any memory
- *   instruction after the SYNC instruction in program order also generates a
- *   memory request to external memory, the memory request belonging to the
- *   older instruction must be globally performed before the time the memory
- *   request belonging to the younger instruction is globally performed.
- *
- * - The barrier does not guarantee the order in which instruction fetches are
- *   performed.
- */
+static inline void __sync(void)
+{
+	asm volatile(__SYNC(full, always) ::: "memory");
+}
 
-/*
- * stype 0x10 - An ordering barrier that affects preceding loads and stores and
- * subsequent loads and stores.
- * Older instructions which must reach the load/store ordering point before the
- * SYNC instruction completes: Loads, Stores
- * Younger instructions which must reach the load/store ordering point only
- * after the SYNC instruction completes: Loads, Stores
- * Older instructions which must be globally performed when the SYNC instruction
- * completes: N/A
- */
-#define STYPE_SYNC_MB 0x10
+static inline void rmb(void)
+{
+	asm volatile(__SYNC(rmb, always) ::: "memory");
+}
+#define rmb rmb
 
-/*
- * stype 0x14 - A completion barrier specific to global invalidations
- *
- * When a sync instruction of this type completes any preceding GINVI or GINVT
- * operation has been globalized & completed on all coherent CPUs. Anything
- * that the GINV* instruction should invalidate will have been invalidated on
- * all coherent CPUs when this instruction completes. It is implementation
- * specific whether the GINV* instructions themselves will ensure completion,
- * or this sync type will.
- *
- * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
- * this sync type also requires that previous SYNCI operations have completed.
- */
-#define STYPE_GINV	0x14
+static inline void wmb(void)
+{
+	asm volatile(__SYNC(wmb, always) ::: "memory");
+}
+#define wmb wmb
 
-#ifdef CONFIG_CPU_HAS_SYNC
-#define __sync()				\
-	__asm__ __volatile__(			\
-		".set	push\n\t"		\
-		".set	noreorder\n\t"		\
-		".set	mips2\n\t"		\
-		"sync\n\t"			\
-		".set	pop"			\
-		: /* no output */		\
-		: /* no input */		\
-		: "memory")
-#else
-#define __sync()	do { } while(0)
-#endif
+#define fast_mb()	__sync()
 
 #define __fast_iob()				\
 	__asm__ __volatile__(			\
@@ -146,17 +41,8 @@
 		: "m" (*(int *)CKSEG1)		\
 		: "memory")
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
-# define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
-
-# define fast_wmb()	__syncw()
-# define fast_rmb()	barrier()
-# define fast_mb()	__sync()
 # define fast_iob()	do { } while (0)
 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
-# define fast_wmb()	__sync()
-# define fast_rmb()	__sync()
-# define fast_mb()	__sync()
 # ifdef CONFIG_SGI_IP28
 #  define fast_iob()				\
 	__asm__ __volatile__(			\
@@ -192,33 +78,33 @@
 
 #endif /* !CONFIG_CPU_HAS_WB */
 
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
-
 #if defined(CONFIG_WEAK_ORDERING)
-# ifdef CONFIG_CPU_CAVIUM_OCTEON
-#  define __smp_mb()	__sync()
-#  define __smp_rmb()	barrier()
-#  define __smp_wmb()	__syncw()
-# else
-#  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
-#  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
-# endif
+# define __smp_mb()	__sync()
+# define __smp_rmb()	rmb()
+# define __smp_wmb()	wmb()
 #else
-#define __smp_mb()	barrier()
-#define __smp_rmb()	barrier()
-#define __smp_wmb()	barrier()
+# define __smp_mb()	barrier()
+# define __smp_rmb()	barrier()
+# define __smp_wmb()	barrier()
 #endif
 
+/*
+ * When LL/SC does imply order, it must also be a compiler barrier to avoid the
+ * compiler from reordering where the CPU will not. When it does not imply
+ * order, the compiler is also free to reorder across the LL/SC loop and
+ * ordering will be done by smp_llsc_mb() and friends.
+ */
 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
-#define __WEAK_LLSC_MB		"	sync	\n"
+# define __WEAK_LLSC_MB		sync
+# define smp_llsc_mb() \
+	__asm__ __volatile__(__stringify(__WEAK_LLSC_MB) : : :"memory")
+# define __LLSC_CLOBBER
 #else
-#define __WEAK_LLSC_MB		"		\n"
+# define __WEAK_LLSC_MB
+# define smp_llsc_mb()		do { } while (0)
+# define __LLSC_CLOBBER		"memory"
 #endif
 
-#define smp_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
-
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #define smp_mb__before_llsc() smp_wmb()
 #define __smp_mb__before_llsc() __smp_wmb()
@@ -233,48 +119,22 @@
 #define nudge_writes() mb()
 #endif
 
-#define __smp_mb__before_atomic()	__smp_mb__before_llsc()
-#define __smp_mb__after_atomic()	smp_llsc_mb()
-
 /*
- * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
- * store or pref) in between an ll & sc can cause the sc instruction to
- * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
- * containing such sequences, this bug bites harder than we might otherwise
- * expect due to reordering & speculation:
- *
- * 1) A memory access appearing prior to the ll in program order may actually
- *    be executed after the ll - this is the reordering case.
- *
- *    In order to avoid this we need to place a memory barrier (ie. a sync
- *    instruction) prior to every ll instruction, in between it & any earlier
- *    memory access instructions. Many of these cases are already covered by
- *    smp_mb__before_llsc() but for the remaining cases, typically ones in
- *    which multiple CPUs may operate on a memory location but ordering is not
- *    usually guaranteed, we use loongson_llsc_mb() below.
- *
- *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
- *
- * 2) If a conditional branch exists between an ll & sc with a target outside
- *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
- *    or similar, then misprediction of the branch may allow speculative
- *    execution of memory accesses from outside of the ll-sc loop.
- *
- *    In order to avoid this we need a memory barrier (ie. a sync instruction)
- *    at each affected branch target, for which we also use loongson_llsc_mb()
- *    defined below.
- *
- *    This case affects all current Loongson 3 CPUs.
+ * In the Loongson3 LL/SC workaround case, all of our LL/SC loops already have
+ * a completion barrier immediately preceding the LL instruction. Therefore we
+ * can skip emitting a barrier from __smp_mb__before_atomic().
  */
-#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
-#define loongson_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
+# define __smp_mb__before_atomic()
 #else
-#define loongson_llsc_mb()	do { } while (0)
+# define __smp_mb__before_atomic()	__smp_mb__before_llsc()
 #endif
 
+#define __smp_mb__after_atomic()	smp_llsc_mb()
+
 static inline void sync_ginv(void)
 {
-	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
+	asm volatile(__SYNC(ginv, always));
 }
 
 #include <asm-generic/barrier.h>