diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/rculist.h | 40 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 18 | ||||
-rw-r--r-- | include/linux/rcutree.h | 7 | ||||
-rw-r--r-- | include/linux/srcu.h | 48 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 2 |
5 files changed, 94 insertions, 21 deletions
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d079290843a9..e0f0fab20415 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,7 @@ * This is only for internal list manipulation where we know * the prev/next entries already! */ +#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new, rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } +#else +extern void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next); +#endif /** * list_add_rcu - add a new entry to rcu-protected list @@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new, */ static inline void list_del_rcu(struct list_head *entry) { - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->prev = LIST_POISON2; } @@ -228,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list, }) /** - * list_first_entry_rcu - get the first element from a list + * Where are list_empty_rcu() and list_first_entry_rcu()? + * + * Implementing those functions following their counterparts list_empty() and + * list_first_entry() is not advisable because they lead to subtle race + * conditions as the following snippet shows: + * + * if (!list_empty_rcu(mylist)) { + * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); + * do_something(bar); + * } + * + * The list may not be empty when list_empty_rcu checks it, but it may be when + * list_first_entry_rcu rereads the ->next pointer. + * + * Rereading the ->next pointer is not a problem for list_empty() and + * list_first_entry() because they would be protected by a lock that blocks + * writers. + * + * See list_first_or_null_rcu for an alternative. + */ + +/** + * list_first_or_null_rcu - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. * - * Note, that list is expected to be not empty. + * Note that if the list is empty, it returns NULL. * * This primitive may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ -#define list_first_entry_rcu(ptr, type, member) \ - list_entry_rcu((ptr)->next, type, member) +#define list_first_or_null_rcu(ptr, type, member) \ + ({struct list_head *__ptr = (ptr); \ + struct list_head __rcu *__next = list_next_rcu(__ptr); \ + likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \ + }) /** * list_for_each_entry_rcu - iterate over rcu list of given type diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 29665a3b3ac5..26d1a47591f1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -924,6 +924,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) kfree_call_rcu(head, (rcu_callback)offset); } +/* + * Does the specified offset indicate that the corresponding rcu_head + * structure can be handled by kfree_rcu()? + */ +#define __is_kfree_rcu_offset(offset) ((offset) < 4096) + +/* + * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. + */ +#define __kfree_rcu(head, offset) \ + do { \ + BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ + call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + } while (0) + /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree @@ -946,6 +961,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) * * Note that the allowable offset might decrease in the future, for example, * to allow something like kmem_cache_free_rcu(). + * + * The BUILD_BUG_ON check must not involve any function calls, hence the + * checks are done in macros here. */ #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 782a8ab51bc1..3c6083cde4fc 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -86,13 +86,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -/* A context switch is a grace period for RCU-sched and RCU-bh. */ -static inline int rcu_blocking_is_gp(void) -{ - might_sleep(); /* Check for RCU read-side critical section. */ - return num_online_cpus() == 1; -} - extern void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index d3d5fa54f25e..55a5c52cbb25 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -29,26 +29,35 @@ #include <linux/mutex.h> #include <linux/rcupdate.h> +#include <linux/workqueue.h> struct srcu_struct_array { - int c[2]; + unsigned long c[2]; + unsigned long seq[2]; +}; + +struct rcu_batch { + struct rcu_head *head, **tail; }; struct srcu_struct { - int completed; + unsigned completed; struct srcu_struct_array __percpu *per_cpu_ref; - struct mutex mutex; + spinlock_t queue_lock; /* protect ->batch_queue, ->running */ + bool running; + /* callbacks just queued */ + struct rcu_batch batch_queue; + /* callbacks try to do the first check_zero */ + struct rcu_batch batch_check0; + /* callbacks done with the first check_zero and the flip */ + struct rcu_batch batch_check1; + struct rcu_batch batch_done; + struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; -#ifndef CONFIG_PREEMPT -#define srcu_barrier() barrier() -#else /* #ifndef CONFIG_PREEMPT */ -#define srcu_barrier() -#endif /* #else #ifndef CONFIG_PREEMPT */ - #ifdef CONFIG_DEBUG_LOCK_ALLOC int __init_srcu_struct(struct srcu_struct *sp, const char *name, @@ -67,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp); #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/** + * call_srcu() - Queue a callback for invocation after an SRCU grace period + * @sp: srcu_struct in queue the callback + * @head: structure to be used for queueing the SRCU callback. + * @func: function to be invoked after the SRCU grace period + * + * The callback function will be invoked some time after a full SRCU + * grace period elapses, in other words after all pre-existing SRCU + * read-side critical sections have completed. However, the callback + * function might well execute concurrently with other SRCU read-side + * critical sections that started after call_srcu() was invoked. SRCU + * read-side critical sections are delimited by srcu_read_lock() and + * srcu_read_unlock(), and may be nested. + * + * The callback will be invoked from process context, but must nevertheless + * be fast and must not block. + */ +void call_srcu(struct srcu_struct *sp, struct rcu_head *head, + void (*func)(struct rcu_head *head)); + void cleanup_srcu_struct(struct srcu_struct *sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); +void srcu_barrier(struct srcu_struct *sp); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 337099783f37..1480900c511c 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -292,6 +292,8 @@ TRACE_EVENT(rcu_dyntick, * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! * "Timer": Timer fired to cause CPU to continue processing callbacks. + * "Demigrate": Timer fired on wrong CPU, woke up correct CPU. + * "Cleanup after idle": Idle exited, timer canceled. */ TRACE_EVENT(rcu_prep_idle, |