From 386afc91144b36b42117b0092893f15bc8798a80 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 9 Apr 2013 10:48:33 -0700
Subject: spinlocks and preemption points need to be at least compiler barriers

In UP and non-preempt respectively, the spinlocks and preemption
disable/enable points are stubbed out entirely, because there is no
regular code that can ever hit the kind of concurrency they are meant to
protect against.

However, while there is no regular code that can cause scheduling, we
_do_ end up having some exceptional (literally!) code that can do so,
and that we need to make sure does not ever get moved into the critical
region by the compiler.

In particular, get_user() and put_user() is generally implemented as
inline asm statements (even if the inline asm may then make a call
instruction to call out-of-line), and can obviously cause a page fault
and IO as a result.  If that inline asm has been scheduled into the
middle of a preemption-safe (or spinlock-protected) code region, we
obviously lose.

Now, admittedly this is *very* unlikely to actually ever happen, and
we've not seen examples of actual bugs related to this.  But partly
exactly because it's so hard to trigger and the resulting bug is so
subtle, we should be extra careful to get this right.

So make sure that even when preemption is disabled, and we don't have to
generate any actual *code* to explicitly tell the system that we are in
a preemption-disabled region, we need to at least tell the compiler not
to move things around the critical region.

This patch grew out of the same discussion that caused commits
79e5f05edcbf ("ARC: Add implicit compiler barrier to raw_local_irq*
functions") and 3e2e0d2c222b ("tile: comment assumption about
__insn_mtspr for <asm/irqflags.h>") to come about.

Note for stable: use discretion when/if applying this.  As mentioned,
this bug may never have actually bitten anybody, and gcc may never have
done the required code motion for it to possibly ever trigger in
practice.

Cc: stable@vger.kernel.org
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spinlock_up.h | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux/spinlock_up.h')

diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index a26e2fb604e6..e2369c167dbd 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -16,7 +16,10 @@
  * In the debug case, 1 means unlocked, 0 means locked. (the values
  * are inverted, to catch initialization bugs)
  *
- * No atomicity anywhere, we are on UP.
+ * No atomicity anywhere, we are on UP. However, we still need
+ * the compiler barriers, because we do not want the compiler to
+ * move potentially faulting instructions (notably user accesses)
+ * into the locked sequence, resulting in non-atomic execution.
  */
 
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -25,6 +28,7 @@
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
+	barrier();
 }
 
 static inline void
@@ -32,6 +36,7 @@ arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 {
 	local_irq_save(flags);
 	lock->slock = 0;
+	barrier();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -39,32 +44,34 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	char oldval = lock->slock;
 
 	lock->slock = 0;
+	barrier();
 
 	return oldval > 0;
 }
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+	barrier();
 	lock->slock = 1;
 }
 
 /*
  * Read-write spinlocks. No debug version.
  */
-#define arch_read_lock(lock)		do { (void)(lock); } while (0)
-#define arch_write_lock(lock)		do { (void)(lock); } while (0)
-#define arch_read_trylock(lock)	({ (void)(lock); 1; })
-#define arch_write_trylock(lock)	({ (void)(lock); 1; })
-#define arch_read_unlock(lock)		do { (void)(lock); } while (0)
-#define arch_write_unlock(lock)	do { (void)(lock); } while (0)
+#define arch_read_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_lock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_read_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_write_trylock(lock)	({ barrier(); (void)(lock); 1; })
+#define arch_read_unlock(lock)		do { barrier(); (void)(lock); } while (0)
+#define arch_write_unlock(lock)	do { barrier(); (void)(lock); } while (0)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
-# define arch_spin_lock(lock)		do { (void)(lock); } while (0)
-# define arch_spin_lock_flags(lock, flags)	do { (void)(lock); } while (0)
-# define arch_spin_unlock(lock)	do { (void)(lock); } while (0)
-# define arch_spin_trylock(lock)	({ (void)(lock); 1; })
+# define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
+# define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
+# define arch_spin_unlock(lock)	do { barrier(); (void)(lock); } while (0)
+# define arch_spin_trylock(lock)	({ barrier(); (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
 #define arch_spin_is_contended(lock)	(((void)(lock), 0))
-- 
cgit v1.2.1