summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c131
-rw-r--r--kernel/locking/lockdep.c59
2 files changed, 107 insertions, 83 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 5d6ce6413ef1..bae542e4b2e9 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -124,16 +124,16 @@
* futex_wait(futex, val);
*
* waiters++; (a)
- * mb(); (A) <-- paired with -.
- * |
- * lock(hash_bucket(futex)); |
- * |
- * uval = *futex; |
- * | *futex = newval;
- * | sys_futex(WAKE, futex);
- * | futex_wake(futex);
- * |
- * `-------> mb(); (B)
+ * smp_mb(); (A) <-- paired with -.
+ * |
+ * lock(hash_bucket(futex)); |
+ * |
+ * uval = *futex; |
+ * | *futex = newval;
+ * | sys_futex(WAKE, futex);
+ * | futex_wake(futex);
+ * |
+ * `--------> smp_mb(); (B)
* if (uval == val)
* queue();
* unlock(hash_bucket(futex));
@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key)
/*
* Ensure futex_get_mm() implies a full barrier such that
* get_futex_key() implies a full barrier. This is relied upon
- * as full barrier (B), see the ordering comment above.
+ * as smp_mb(); (B), see the ordering comment above.
*/
smp_mb__after_atomic();
}
@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- ihold(key->shared.inode); /* implies MB (B) */
+ ihold(key->shared.inode); /* implies smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
- futex_get_mm(key); /* implies MB (B) */
+ futex_get_mm(key); /* implies smp_mb(); (B) */
break;
default:
/*
@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key)
* mm, therefore the only purpose of calling get_futex_key_refs
* is because we need the barrier for the lockless waiter check.
*/
- smp_mb(); /* explicit MB (B) */
+ smp_mb(); /* explicit smp_mb(); (B) */
}
}
@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
if (!fshared) {
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies MB (B) */
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
return 0;
}
@@ -520,7 +520,20 @@ again:
else
err = 0;
- lock_page(page);
+ /*
+ * The treatment of mapping from this point on is critical. The page
+ * lock protects many things but in this context the page lock
+ * stabilizes mapping, prevents inode freeing in the shared
+ * file-backed region case and guards against movement to swap cache.
+ *
+ * Strictly speaking the page lock is not needed in all cases being
+ * considered here and page lock forces unnecessarily serialization
+ * From this point on, mapping will be re-verified if necessary and
+ * page lock will be acquired only if it is unavoidable
+ */
+ page = compound_head(page);
+ mapping = READ_ONCE(page->mapping);
+
/*
* If page->mapping is NULL, then it cannot be a PageAnon
* page; but it might be the ZERO_PAGE or in the gate area or
@@ -536,19 +549,31 @@ again:
* shmem_writepage move it from filecache to swapcache beneath us:
* an unlikely race, but we do need to retry for page->mapping.
*/
- mapping = compound_head(page)->mapping;
- if (!mapping) {
- int shmem_swizzled = PageSwapCache(page);
+ if (unlikely(!mapping)) {
+ int shmem_swizzled;
+
+ /*
+ * Page lock is required to identify which special case above
+ * applies. If this is really a shmem page then the page lock
+ * will prevent unexpected transitions.
+ */
+ lock_page(page);
+ shmem_swizzled = PageSwapCache(page) || page->mapping;
unlock_page(page);
put_page(page);
+
if (shmem_swizzled)
goto again;
+
return -EFAULT;
}
/*
* Private mappings are handled in a simple way.
*
+ * If the futex key is stored on an anonymous page, then the associated
+ * object is the mm which is implicitly pinned by the calling process.
+ *
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
* it's a read-only handle, it's expected that futexes attach to
* the object not the particular process.
@@ -566,16 +591,74 @@ again:
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm;
key->private.address = address;
+
+ get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
} else {
+ struct inode *inode;
+
+ /*
+ * The associated futex object in this case is the inode and
+ * the page->mapping must be traversed. Ordinarily this should
+ * be stabilised under page lock but it's not strictly
+ * necessary in this case as we just want to pin the inode, not
+ * update the radix tree or anything like that.
+ *
+ * The RCU read lock is taken as the inode is finally freed
+ * under RCU. If the mapping still matches expectations then the
+ * mapping->host can be safely accessed as being a valid inode.
+ */
+ rcu_read_lock();
+
+ if (READ_ONCE(page->mapping) != mapping) {
+ rcu_read_unlock();
+ put_page(page);
+
+ goto again;
+ }
+
+ inode = READ_ONCE(mapping->host);
+ if (!inode) {
+ rcu_read_unlock();
+ put_page(page);
+
+ goto again;
+ }
+
+ /*
+ * Take a reference unless it is about to be freed. Previously
+ * this reference was taken by ihold under the page lock
+ * pinning the inode in place so i_lock was unnecessary. The
+ * only way for this check to fail is if the inode was
+ * truncated in parallel so warn for now if this happens.
+ *
+ * We are not calling into get_futex_key_refs() in file-backed
+ * cases, therefore a successful atomic_inc return below will
+ * guarantee that get_futex_key() will still imply smp_mb(); (B).
+ */
+ if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+ rcu_read_unlock();
+ put_page(page);
+
+ goto again;
+ }
+
+ /* Should be impossible but lets be paranoid for now */
+ if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+ err = -EFAULT;
+ rcu_read_unlock();
+ iput(inode);
+
+ goto out;
+ }
+
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = mapping->host;
+ key->shared.inode = inode;
key->shared.pgoff = basepage_index(page);
+ rcu_read_unlock();
}
- get_futex_key_refs(key); /* implies MB (B) */
-
out:
- unlock_page(page);
put_page(page);
return err;
}
@@ -1864,7 +1947,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
q->lock_ptr = &hb->lock;
- spin_lock(&hb->lock); /* implies MB (A) */
+ spin_lock(&hb->lock); /* implies smp_mb(); (A) */
return hb;
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 716547fdb873..3261214323fa 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void)
return ret;
}
-static int lockdep_initialized;
-
unsigned long nr_list_entries;
static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
@@ -434,19 +432,6 @@ unsigned int max_lockdep_depth;
#ifdef CONFIG_DEBUG_LOCKDEP
/*
- * We cannot printk in early bootup code. Not even early_printk()
- * might work. So we mark any initialization errors and printk
- * about it later on, in lockdep_info().
- */
-static int lockdep_init_error;
-static const char *lock_init_error;
-static unsigned long lockdep_init_trace_data[20];
-static struct stack_trace lockdep_init_trace = {
- .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
- .entries = lockdep_init_trace_data,
-};
-
-/*
* Various lockdep statistics:
*/
DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
@@ -669,20 +654,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
struct hlist_head *hash_head;
struct lock_class *class;
-#ifdef CONFIG_DEBUG_LOCKDEP
- /*
- * If the architecture calls into lockdep before initializing
- * the hashes then we'll warn about it later. (we cannot printk
- * right now)
- */
- if (unlikely(!lockdep_initialized)) {
- lockdep_init();
- lockdep_init_error = 1;
- lock_init_error = lock->name;
- save_stack_trace(&lockdep_init_trace);
- }
-#endif
-
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
debug_locks_off();
printk(KERN_ERR
@@ -4013,28 +3984,6 @@ out_restore:
raw_local_irq_restore(flags);
}
-void lockdep_init(void)
-{
- int i;
-
- /*
- * Some architectures have their own start_kernel()
- * code which calls lockdep_init(), while we also
- * call lockdep_init() from the start_kernel() itself,
- * and we want to initialize the hashes only once:
- */
- if (lockdep_initialized)
- return;
-
- for (i = 0; i < CLASSHASH_SIZE; i++)
- INIT_HLIST_HEAD(classhash_table + i);
-
- for (i = 0; i < CHAINHASH_SIZE; i++)
- INIT_HLIST_HEAD(chainhash_table + i);
-
- lockdep_initialized = 1;
-}
-
void __init lockdep_info(void)
{
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4061,14 +4010,6 @@ void __init lockdep_info(void)
printk(" per task-struct memory footprint: %lu bytes\n",
sizeof(struct held_lock) * MAX_LOCK_DEPTH);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
- if (lockdep_init_error) {
- printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
- printk("Call stack leading to lockdep invocation was:\n");
- print_stack_trace(&lockdep_init_trace, 0);
- }
-#endif
}
static void
OpenPOWER on IntegriCloud