summaryrefslogtreecommitdiffstats
path: root/include/linux/percpu-rwsem.h
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-12-17 16:01:32 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 17:15:18 -0800
commita1fd3e24d8a484b3265a6d485202afe093c058f3 (patch)
tree472f6480a81abbc04b27eccdb798d80b1685bee0 /include/linux/percpu-rwsem.h
parent53809751ac230a3611b5cdd375f3389f3207d471 (diff)
downloadtalos-op-linux-a1fd3e24d8a484b3265a6d485202afe093c058f3.tar.gz
talos-op-linux-a1fd3e24d8a484b3265a6d485202afe093c058f3.zip
percpu_rw_semaphore: reimplement to not block the readers unnecessarily
Currently the writer does msleep() plus synchronize_sched() 3 times to acquire/release the semaphore, and during this time the readers are blocked completely. Even if the "write" section was not actually started or if it was already finished. With this patch down_write/up_write does synchronize_sched() twice and down_read/up_read are still possible during this time, just they use the slow path. percpu_down_write() first forces the readers to use rw_semaphore and increment the "slow" counter to take the lock for reading, then it takes that rw_semaphore for writing and blocks the readers. Also. With this patch the code relies on the documented behaviour of synchronize_sched(), it doesn't try to pair synchronize_sched() with barrier. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anton Arapov <anton@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/percpu-rwsem.h')
-rw-r--r--include/linux/percpu-rwsem.h83
1 files changed, 13 insertions, 70 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index bd1e86071e57..592f0d610d8e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -2,82 +2,25 @@
#define _LINUX_PERCPU_RWSEM_H
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/delay.h>
+#include <linux/wait.h>
struct percpu_rw_semaphore {
- unsigned __percpu *counters;
- bool locked;
- struct mutex mtx;
+ unsigned int __percpu *fast_read_ctr;
+ struct mutex writer_mutex;
+ struct rw_semaphore rw_sem;
+ atomic_t slow_read_ctr;
+ wait_queue_head_t write_waitq;
};
-#define light_mb() barrier()
-#define heavy_mb() synchronize_sched_expedited()
+extern void percpu_down_read(struct percpu_rw_semaphore *);
+extern void percpu_up_read(struct percpu_rw_semaphore *);
-static inline void percpu_down_read(struct percpu_rw_semaphore *p)
-{
- rcu_read_lock_sched();
- if (unlikely(p->locked)) {
- rcu_read_unlock_sched();
- mutex_lock(&p->mtx);
- this_cpu_inc(*p->counters);
- mutex_unlock(&p->mtx);
- return;
- }
- this_cpu_inc(*p->counters);
- rcu_read_unlock_sched();
- light_mb(); /* A, between read of p->locked and read of data, paired with D */
-}
+extern void percpu_down_write(struct percpu_rw_semaphore *);
+extern void percpu_up_write(struct percpu_rw_semaphore *);
-static inline void percpu_up_read(struct percpu_rw_semaphore *p)
-{
- light_mb(); /* B, between read of the data and write to p->counter, paired with C */
- this_cpu_dec(*p->counters);
-}
-
-static inline unsigned __percpu_count(unsigned __percpu *counters)
-{
- unsigned total = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
-
- return total;
-}
-
-static inline void percpu_down_write(struct percpu_rw_semaphore *p)
-{
- mutex_lock(&p->mtx);
- p->locked = true;
- synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
- while (__percpu_count(p->counters))
- msleep(1);
- heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
-}
-
-static inline void percpu_up_write(struct percpu_rw_semaphore *p)
-{
- heavy_mb(); /* D, between write to data and write to p->locked, paired with A */
- p->locked = false;
- mutex_unlock(&p->mtx);
-}
-
-static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
-{
- p->counters = alloc_percpu(unsigned);
- if (unlikely(!p->counters))
- return -ENOMEM;
- p->locked = false;
- mutex_init(&p->mtx);
- return 0;
-}
-
-static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
-{
- free_percpu(p->counters);
- p->counters = NULL; /* catch use after free bugs */
-}
+extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
+extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
#endif
OpenPOWER on IntegriCloud