From 27d7be1801a4824ecccbc735593101d72c038f13 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 27 Feb 2017 14:28:15 -0800 Subject: ipc/sem.c: avoid using spin_unlock_wait() a) The ACQUIRE in spin_lock() applies to the read, not to the store, at least for powerpc. This forces to add a smp_mb() into the fast path. b) The memory barrier provided by spin_unlock_wait() is right now arch dependent. Therefore: Use spin_lock()/spin_unlock() instead of spin_unlock_wait(). Advantage: faster single op semop calls(), observed +8.9% on x86. (the other solution would be arch dependencies in ipc/sem). Disadvantage: slower complex op semop calls, if (and only if) there are no sleeping operations. The next patch adds hysteresis, this further reduces the probability that the slow path is used. Link: http://lkml.kernel.org/r/1476851896-3590-2-git-send-email-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: <1vier1@web.de> Cc: kernel test robot Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) (limited to 'ipc/sem.c') diff --git a/ipc/sem.c b/ipc/sem.c index 3ec5742b5640..fe5db1ed081b 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -278,24 +278,13 @@ static void complexmode_enter(struct sem_array *sma) return; } - /* We need a full barrier after seting complex_mode: - * The write to complex_mode must be visible - * before we read the first sem->lock spinlock state. - */ - smp_store_mb(sma->complex_mode, true); + sma->complex_mode = true; for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); + spin_lock(&sem->lock); + spin_unlock(&sem->lock); } - /* - * spin_unlock_wait() is not a memory barriers, it is only a - * control barrier. The code must pair with spin_unlock(&sem->lock), - * thus just the control barrier is insufficient. - * - * smp_rmb() is sufficient, as writes cannot pass the control barrier. - */ - smp_rmb(); } /* @@ -361,14 +350,6 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, */ spin_lock(&sem->lock); - /* - * See 51d7d5205d33 - * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): - * A full barrier is required: the write of sem->lock - * must be visible before the read is executed - */ - smp_mb(); - if (!smp_load_acquire(&sma->complex_mode)) { /* fast path successful! */ return sops->sem_num; -- cgit v1.2.3 From 9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 27 Feb 2017 14:28:18 -0800 Subject: ipc/sem: add hysteresis sysv sem has two lock modes: One with per-semaphore locks, one lock mode with a single global lock for the whole array. When switching from the per-semaphore locks to the global lock, all per-semaphore locks must be scanned for ongoing operations. The patch adds a hysteresis for switching from the global lock to the per semaphore locks. This reduces how often the per-semaphore locks must be scanned. Compared to the initial patch, this is a simplified solution: Setting USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior. In theory, a workload with exactly 10 simple sops and then one complex op now scales a bit worse, but this is pure theory: If there is concurrency, the it won't be exactly 10:1:10:1:10:1:... If there is no concurrency, then there is no need for scalability. Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: <1vier1@web.de> Cc: kernel test robot Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 2 +- ipc/sem.c | 86 +++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 62 insertions(+), 26 deletions(-) (limited to 'ipc/sem.c') diff --git a/include/linux/sem.h b/include/linux/sem.h index d0efd6e6c20a..4fc222f8755d 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ - bool complex_mode; /* no parallel simple ops */ + unsigned int use_global_lock;/* >0: global lock required */ }; #ifdef CONFIG_SYSVIPC diff --git a/ipc/sem.c b/ipc/sem.c index fe5db1ed081b..e468cd1c12f0 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -158,23 +158,43 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define SEMMSL_FAST 256 /* 512 bytes on stack */ #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ +/* + * Switching from the mode suitable for simple ops + * to the mode for complex ops is costly. Therefore: + * use some hysteresis + */ +#define USE_GLOBAL_LOCK_HYSTERESIS 10 + /* * Locking: * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.complex_mode * sem_array.pending{_alter,_const}, * sem_array.sem_undo * * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * sem_array.complex_mode (for read) * * c) special: * sem_undo_list.list_proc: * * undo_list->lock for write * * rcu for read + * use_global_lock: + * * global sem_lock() for write + * * either local or global sem_lock() for read. + * + * Memory ordering: + * Most ordering is enforced by using spin_lock() and spin_unlock(). + * The special case is use_global_lock: + * Setting it from non-zero to 0 is a RELEASE, this is ensured by + * using smp_store_release(). + * Testing if it is non-zero is an ACQUIRE, this is ensured by using + * smp_load_acquire(). + * Setting it from 0 to non-zero must be ordered with regards to + * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() + * is inside a spin_lock() and after a write from 0 to non-zero a + * spin_lock()+spin_unlock() is done. */ #define sc_semmsl sem_ctls[0] @@ -273,12 +293,16 @@ static void complexmode_enter(struct sem_array *sma) int i; struct sem *sem; - if (sma->complex_mode) { - /* We are already in complex_mode. Nothing to do */ + if (sma->use_global_lock > 0) { + /* + * We are already in global lock mode. + * Nothing to do, just reset the + * counter until we return to simple mode. + */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; return; } - - sma->complex_mode = true; + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; @@ -299,13 +323,17 @@ static void complexmode_tryleave(struct sem_array *sma) */ return; } - /* - * Immediately after setting complex_mode to false, - * a simple op can start. Thus: all memory writes - * performed by the current operation must be visible - * before we set complex_mode to false. - */ - smp_store_release(&sma->complex_mode, false); + if (sma->use_global_lock == 1) { + /* + * Immediately after setting use_global_lock to 0, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set use_global_lock to 0. + */ + smp_store_release(&sma->use_global_lock, 0); + } else { + sma->use_global_lock--; + } } #define SEM_GLOBAL_LOCK (-1) @@ -335,22 +363,23 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, * Optimized locking is possible if no complex operation * is either enqueued or processed right now. * - * Both facts are tracked by complex_mode. + * Both facts are tracked by use_global_mode. */ sem = sma->sem_base + sops->sem_num; /* - * Initial check for complex_mode. Just an optimization, + * Initial check for use_global_lock. Just an optimization, * no locking, no memory barrier. */ - if (!sma->complex_mode) { + if (!sma->use_global_lock) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - if (!smp_load_acquire(&sma->complex_mode)) { + /* pairs with smp_store_release() */ + if (!smp_load_acquire(&sma->use_global_lock)) { /* fast path successful! */ return sops->sem_num; } @@ -360,19 +389,26 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* slow path: acquire the full lock */ ipc_lock_object(&sma->sem_perm); - if (sma->complex_count == 0) { - /* False alarm: - * There is no complex operation, thus we can switch - * back to the fast path. + if (sma->use_global_lock == 0) { + /* + * The use_global_lock mode ended while we waited for + * sma->sem_perm.lock. Thus we must switch to locking + * with sem->lock. + * Unlike in the fast path, there is no need to recheck + * sma->use_global_lock after we have acquired sem->lock: + * We own sma->sem_perm.lock, thus use_global_lock cannot + * change. */ spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); return sops->sem_num; } else { - /* Not a false alarm, thus complete the sequence for a - * full lock. + /* + * Not a false alarm, thus continue to use the global lock + * mode. No need for complexmode_enter(), this was done by + * the caller that has set use_global_mode to non-zero. */ - complexmode_enter(sma); return SEM_GLOBAL_LOCK; } } @@ -476,7 +512,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } sma->complex_count = 0; - sma->complex_mode = true; /* dropped by sem_unlock below */ + sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); -- cgit v1.2.3 From 84f001e15737f8214b0f5f0f7dfec0fb1027938f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/wake_q.h | 6 ++++++ ipc/mqueue.c | 1 + ipc/msg.c | 1 + ipc/sem.c | 1 + kernel/futex.c | 1 + kernel/locking/mutex.c | 1 + kernel/locking/rtmutex.c | 1 + kernel/locking/rtmutex_common.h | 1 + kernel/locking/rwsem-xadd.c | 3 ++- kernel/sched/sched.h | 1 + 10 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/wake_q.h (limited to 'ipc/sem.c') diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h new file mode 100644 index 000000000000..6383b35e4eba --- /dev/null +++ b/include/linux/sched/wake_q.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_WAKE_Q_H +#define _LINUX_SCHED_WAKE_Q_H + +#include + +#endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 4fdd97031431..40e448de8a7e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/ipc/msg.c b/ipc/msg.c index e3e52ce01123..ecc387e573f6 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/ipc/sem.c b/ipc/sem.c index e468cd1c12f0..947dc2348271 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/kernel/futex.c b/kernel/futex.c index b687cb22301c..8ddcf9ea953c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index ad2d9e22697b..57f6311e2405 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index d340be3a488f..d4f798491361 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "rtmutex_common.h" diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 990134617b4c..856dfff5c33a 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -13,6 +13,7 @@ #define __KERNEL_RTMUTEX_COMMON_H #include +#include /* * This is the control structure for tasks blocked on a rt_mutex, diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2ad8d8dc3bb1..4fe8d8ad4396 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -10,10 +10,11 @@ * and Davidlohr Bueso . Based on mutexes. */ #include -#include #include #include +#include #include +#include #include #include "rwsem.h" diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9b9cb260d9cf..683570739f46 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3