From 27dc0b1b9fdaefeeb3d6c8fa0aed5d826ce8f3e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Apr 2017 18:55:21 -0700 Subject: rcutorture: Add lockdep to one of the SRCU scenarios Back when SRCU was simpler, there wasn't much need for lockdep. However, with Tree SRCU, it is needed. This commit therefore adds CONFIG_PROVE_LOCKING to the SRCU-P scenario. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-P | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P index 4837430a71c0..8205b481d9ed 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y #CHECK#CONFIG_RCU_EXPERT=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y -- cgit v1.2.1 From 8d6dd656a1c5683534ee174f06c9b1d736ebea3c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 10:27:19 -0700 Subject: rcutorture: Add three-level tree test for Tree SRCU This commit adds a test for a three-level srcu_node tree for Tree SRCU in the existing SRCU-P scenario. This requires enabling CONFIG_RCU_EXPERT, so the CONFIG_RCU_EXPERT=n scenario is now SRCU-N. The reason for using SRCU-P for the tall tree is that preemption raises the possibility of locating more bugs than does the non-preemptive SRCU-N. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-N | 2 +- tools/testing/selftests/rcutorture/configs/rcu/SRCU-P | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 1a087c3c8bb8..2da8b49589a0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -5,4 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_RCU_EXPERT=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P index 8205b481d9ed..ab7ccd38232b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -2,9 +2,11 @@ CONFIG_RCU_TRACE=n CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_EXPERT=y +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -#CHECK#CONFIG_RCU_EXPERT=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -- cgit v1.2.1 From 3c52f2622779ee6c8712c731b18406998eb4d4c6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 10:48:42 -0700 Subject: rcutorture: Fix bug in reporting Kconfig mis-settings Kconfig "select" clauses can defeat Kconfig-fragment file attempts to clear a given Kconfig variable, and dependencies can defeat attempts to set a given Kconfig variable. Because "select" clauses and dependencies can be added at any time, there needs to be a way to verify that the Kconfig-fragment file's requests were honored. And there is, except that it is buggy. This commit therefore provides the needed fix. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/configcheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index eee31e261bf7..70fca318a82b 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -27,7 +27,7 @@ cat $1 > $T/.config cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' | awk ' -BEGIN { +{ print "if grep -q \"" $0 "\" < '"$T/.config"'"; print "then"; print "\t:"; -- cgit v1.2.1 From 23ca09670312a373d0b72bc7f87bd32323fb673f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 11:39:08 -0700 Subject: rcutorture: Add a scenario for Tiny SRCU This commit adds an SRCU-t rcutorture scenario for the new Tiny SRCU implementation, removing the need to pass the --bootargs parameter to kvm.sh to run Tiny SRCU tests. This commit also adds SRCU-t to the set of scenarios that are run by default. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/CFLIST | 2 ++ tools/testing/selftests/rcutorture/configs/rcu/SRCU-t | 10 ++++++++++ tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot | 1 + tools/testing/selftests/rcutorture/configs/rcu/SRCU-u | 9 +++++++++ tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot | 1 + 5 files changed, 23 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-t create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-u create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index a3a1a05a2b5c..6a0b9f69faad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -9,6 +9,8 @@ TREE08 TREE09 SRCU-N SRCU-P +SRCU-t +SRCU-u TINY01 TINY02 TASKS01 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t new file mode 100644 index 000000000000..6c78022c8cd8 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t @@ -0,0 +1,10 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_SRCU=y +CONFIG_RCU_TRACE=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_DEBUG_ATOMIC_SLEEP=y +#CHECK#CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot new file mode 100644 index 000000000000..238bfe3bd0cc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u new file mode 100644 index 000000000000..6bc24e99862f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u @@ -0,0 +1,9 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_SRCU=y +CONFIG_RCU_TRACE=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot new file mode 100644 index 000000000000..84a7d51b7481 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcud -- cgit v1.2.1 From c0ee4500ff67e455dcbc74ff3e9e9faa3bc93be1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 13:49:03 -0700 Subject: rcutorture: Add a scenario for Classic SRCU A robust combination of paranoia and cowardice has resulted in retaining Classic SRCU (CONFIG_CLASSIC_SRCU) as a backup for the shiny new Tiny and Tree SRCU implementations. If it is to be a viable backup, it of course needs to be tested. This commit therefore adds an rcutorture scenario named SRCU-C for Classic SRCU. This commit also adds this scenario to the set that are run by default. Once sufficient good experience has accumulated for Tiny and Tree SRCU, this test will be removed, along with the Classic SRCU implementation itself. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/CFLIST | 1 + tools/testing/selftests/rcutorture/configs/rcu/SRCU-C | 11 +++++++++++ tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot | 1 + 3 files changed, 13 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-C create mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 6a0b9f69faad..0c1da784b8cb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -7,6 +7,7 @@ TREE06 TREE07 TREE08 TREE09 +SRCU-C SRCU-N SRCU-P SRCU-t diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C new file mode 100644 index 000000000000..e4f8b1b75584 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C @@ -0,0 +1,11 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_EXPERT=y +CONFIG_CLASSIC_SRCU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot new file mode 100644 index 000000000000..84a7d51b7481 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcud -- cgit v1.2.1 From f92c734f02cbf10e40569facff82059ae9b61920 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 15:40:35 -0700 Subject: rcu: Prevent rcu_barrier() from starting needless grace periods Currently rcu_barrier() uses call_rcu() to enqueue new callbacks on each CPU with a non-empty callback list. This works, but means that rcu_barrier() forces grace periods that are not otherwise needed. The key point is that rcu_barrier() never needs to wait for a grace period, but instead only for all pre-existing callbacks to be invoked. This means that rcu_barrier()'s new callbacks should be placed in the callback-list segment containing the last pre-existing callback. This commit makes this change using the new rcu_segcblist_entrain() function. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 1 + kernel/rcu/tree.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index e3facb356838..91dc089d65b7 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -742,6 +742,7 @@ TRACE_EVENT(rcu_torture_read, * "OnlineQ": _rcu_barrier() found online CPU with callbacks. * "OnlineNQ": _rcu_barrier() found online CPU, no callbacks. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. + * "IRQNQ": An rcu_barrier_callback() callback found no callbacks. * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. * "Inc2": _rcu_barrier() piggyback check counter incremented. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e354e475e645..657056c3e0cd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3578,8 +3578,14 @@ static void rcu_barrier_func(void *type) struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); - atomic_inc(&rsp->barrier_cpu_count); - rsp->call(&rdp->barrier_head, rcu_barrier_callback); + rdp->barrier_head.func = rcu_barrier_callback; + debug_rcu_head_queue(&rdp->barrier_head); + if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { + atomic_inc(&rsp->barrier_cpu_count); + } else { + debug_rcu_head_unqueue(&rdp->barrier_head); + _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence); + } } /* -- cgit v1.2.1 From 39687d6c1dc0c842ede1d4b4677931170c51305d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 16:15:06 -0700 Subject: rcutorture: Correctly handle CONFIG_RCU_TORTURE_TEST_* options The rcutorture scripting handles the CONFIG_*_TORTURE_TEST Kconfig options specially, and therefore greps them out of the Kconfig-fragment files. Unfortunately, a poor choice of grep pattern means that the CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP, CONFIG_RCU_TORTURE_TEST_SLOW_INIT, and CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT Kconfig options are also grepped out, preventing rcutorture from using them. This commit therefore fixes the offending grep pattern to focus only on the CONFIG_*_TORTURE_TEST Kconfig options. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 00cb0db2643d..c29f2ec0bf9f 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -45,7 +45,7 @@ T=/tmp/test-linux.sh.$$ trap 'rm -rf $T' 0 mkdir $T -grep -v 'CONFIG_[A-Z]*_TORTURE_TEST' < ${config_template} > $T/config +grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config cat << ___EOF___ >> $T/config CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD" CONFIG_VIRTIO_PCI=y -- cgit v1.2.1 From 17ed2b6c3ad9f80174c32cc19d86a15396abc196 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Apr 2017 16:22:08 -0700 Subject: rcutorture: Update test scenarios based on new Kconfig dependencies A number of the rcutorture test scenarios were not using the desired Kconfig options because dependencies were preventing the selections in the Kconfig-fragment files from being honored. This commit therefore updates the Kconfig-fragment files to account for these changes in dependencies. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/TINY02 | 3 ++- tools/testing/selftests/rcutorture/configs/rcu/TREE01 | 1 + tools/testing/selftests/rcutorture/configs/rcu/TREE02 | 2 +- tools/testing/selftests/rcutorture/configs/rcu/TREE04 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE06 | 1 + tools/testing/selftests/rcutorture/configs/rcu/TREE07 | 2 -- 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index a59f7686e219..9007cd979df7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -11,5 +11,6 @@ CONFIG_PROVE_LOCKING=y CONFIG_PROVE_RCU_REPEATEDLY=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_PREEMPT_COUNT=y +CONFIG_DEBUG_ATOMIC_SLEEP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 359cb258f639..cc6c5815236e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -10,6 +10,7 @@ CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y +CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index c1ab5926568b..1cecab330ba0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -18,9 +18,9 @@ CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 5af758e783c7..851c01ae2cea 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -15,7 +15,6 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 4cb02bd28f08..9215827649bd 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -18,6 +18,7 @@ CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_RCU_EXPERT=y CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index b12a3ea1867e..99f04e4c5162 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -1,6 +1,5 @@ CONFIG_SMP=y CONFIG_NR_CPUS=16 -CONFIG_CPUMASK_OFFSTACK=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n @@ -15,7 +14,6 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -- cgit v1.2.1 From 881ec9d209d5371c21db89ca1bb19afd3fcadab3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Apr 2017 15:16:50 -0700 Subject: srcu: Eliminate possibility of destructive counter overflow Earlier versions of Tree SRCU were subject to a counter overflow bug that could theoretically result in too-short grace periods. This commit eliminates this problem by adding an update-side memory barrier. The short explanation is that if the updater sums the unlock counts too late to see a given __srcu_read_unlock() increment, that CPU's next __srcu_read_lock() must see the new value of ->srcu_idx, thus incrementing the other bank of counters. This eliminates the possibility of destructive counter overflow as long as the srcu_read_lock() nesting level does not exceed floor(ULONG_MAX/NR_CPUS/2), which should be an eminently reasonable nesting limit, especially on 64-bit systems. Reported-by: Lance Roy Suggested-by: Lance Roy Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 157654fa436a..fceca84df6b0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -275,15 +275,20 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) * not mean that there are no more readers, as one could have read * the current index but not have incremented the lock counter yet. * - * Possible bug: There is no guarantee that there haven't been - * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were - * counted, meaning that this could return true even if there are - * still active readers. Since there are no memory barriers around - * srcu_flip(), the CPU is not required to increment ->srcu_idx - * before running srcu_readers_unlock_idx(), which means that there - * could be an arbitrarily large number of critical sections that - * execute after srcu_readers_unlock_idx() but use the old value - * of ->srcu_idx. + * So suppose that the updater is preempted here for so long + * that more than ULONG_MAX non-nested readers come and go in + * the meantime. It turns out that this cannot result in overflow + * because if a reader modifies its unlock count after we read it + * above, then that reader's next load of ->srcu_idx is guaranteed + * to get the new value, which will cause it to operate on the + * other bank of counters, where it cannot contribute to the + * overflow of these counters. This means that there is a maximum + * of 2*NR_CPUS increments, which cannot overflow given current + * systems, especially not on 64-bit systems. + * + * OK, how about nesting? This does impose a limit on nesting + * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, + * especially on 64-bit systems. */ return srcu_readers_lock_idx(sp, idx) == unlocks; } @@ -671,6 +676,16 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) */ static void srcu_flip(struct srcu_struct *sp) { + /* + * Ensure that if this updater saw a given reader's increment + * from __srcu_read_lock(), that reader was using an old value + * of ->srcu_idx. Also ensure that if a given reader sees the + * new value of ->srcu_idx, this updater's earlier scans cannot + * have seen that reader's increments (which is OK, because this + * grace period need not wait on that reader). + */ + smp_mb(); /* E */ /* Pairs with B and C. */ + WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); /* -- cgit v1.2.1 From 5b72f9643b52a5148bb8ced126e20563adfa3466 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Apr 2017 15:29:14 -0700 Subject: rcu: Complain if blocking in preemptible RCU read-side critical section Although preemptible RCU allows its read-side critical sections to be preempted, general blocking is forbidden. The reason for this is that excessive preemption times can be handled by CONFIG_RCU_BOOST=y, but a voluntarily blocked task doesn't care how high you boost its priority. Because preemptible RCU is a global mechanism, one ill-behaved reader hurts everyone. Hence the prohibition against general blocking in RCU-preempt read-side critical sections. Preemption yes, blocking no. This commit enforces this prohibition. There is a special exception for the -rt patchset (which they kindly volunteered to implement): It is OK to block (as opposed to merely being preempted) within an RCU-preempt read-side critical section, but only if the blocking is subject to priority inheritance. This exception permits CONFIG_RCU_BOOST=y to get -rt RCU readers out of trouble. Why doesn't this exception also apply to mainline's rt_mutex? Because of the possibility that someone does general blocking while holding an rt_mutex. Yes, the priority boosting will affect the rt_mutex, but it won't help with the task doing general blocking while holding that rt_mutex. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 657056c3e0cd..9ce682242e99 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -478,7 +478,7 @@ void rcu_note_context_switch(bool preempt) barrier(); /* Avoid RCU read-side critical sections leaking down. */ trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); - rcu_preempt_note_context_switch(); + rcu_preempt_note_context_switch(preempt); /* Load rcu_urgent_qs before other flags. */ if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) goto out; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ba38262c3554..0fa7aee9ef55 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -477,7 +477,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); -static void rcu_preempt_note_context_switch(void); +static void rcu_preempt_note_context_switch(bool preempt); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static bool rcu_preempt_has_tasks(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c9a48657512a..a421753e8e9c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -286,12 +286,13 @@ static void rcu_preempt_qs(void) * * Caller must disable interrupts. */ -static void rcu_preempt_note_context_switch(void) +static void rcu_preempt_note_context_switch(bool preempt) { struct task_struct *t = current; struct rcu_data *rdp; struct rcu_node *rnp; + WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); if (t->rcu_read_lock_nesting > 0 && !t->rcu_read_unlock_special.b.blocked) { @@ -738,7 +739,7 @@ static void __init rcu_bootup_announce(void) * Because preemptible RCU does not exist, we never have to check for * CPUs being in quiescent states. */ -static void rcu_preempt_note_context_switch(void) +static void rcu_preempt_note_context_switch(bool preempt) { } -- cgit v1.2.1 From 9683937df9ebf4eb62cdedb09c5f20a0760c7d80 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Apr 2017 16:12:52 -0700 Subject: rcuperf: Defer expedited/normal check to end of test Current rcuperf startup checks to see if the user asked to measure only expedited grace periods, yet constrained all grace periods to be normal, or if the user asked to measure only normal grace periods, yet constrained all grace periods to be expedited. Useless tests of this sort are aborted. Unfortunately, making RCU work through the mid-boot dead zone [1] puts RCU into expedited-only mode during that zone. Which happens to also be the exact time that rcuperf carries out the aforementioned check. So if the user asks rcuperf to measure only normal grace periods (the default), rcuperf will now always complain and terminate the test. This commit therefore moves the checks to rcu_perf_cleanup(). This has the disadvantage of failing to abort useless tests, but avoids the need to create yet another kthread and the need to do fiddly checks involving the holdoff time. (Yes, another approach is to do the checks in a late-stage init function, but that would require some way to communicate badness to rcuperf's kthreads, and seems not worth the bother.) [1] https://lwn.net/Articles/716148/ Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuperf.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index a4a86fb47e4a..ef5b1faac495 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -452,6 +452,15 @@ rcu_perf_cleanup(void) u64 *wdp; u64 *wdpp; + /* + * Would like warning at start, but everything is expedited + * during the mid-boot phase, so have to wait till the end. + */ + if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) + VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); + if (rcu_gp_is_normal() && gp_exp) + VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + if (torture_cleanup_begin()) return; @@ -624,16 +633,6 @@ rcu_perf_init(void) firsterr = -ENOMEM; goto unwind; } - if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) { - VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); - firsterr = -EINVAL; - goto unwind; - } - if (rcu_gp_is_normal() && gp_exp) { - VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); - firsterr = -EINVAL; - goto unwind; - } for (i = 0; i < nrealwriters; i++) { writer_durations[i] = kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), -- cgit v1.2.1 From 1dcf2806ec19f01abed8027377f69e5c45878838 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Apr 2017 19:12:36 -0700 Subject: rcuperf: Remove conflicting Kconfig options The TREE and TREE54 rcuperf scenarios' Kconfig fragment files specified conflicting values for CONFIG_RCU_TRACE. This commit therefore removes the =n line in favor of the =y line. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcuperf/TREE | 1 - tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE index a312f671a29a..721cfda76ab2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE @@ -7,7 +7,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 index 985fb170d13c..7629f5dd73b2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 @@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n -- cgit v1.2.1 From e28371c891db29c892d85322ea27ad997cc50f72 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Apr 2017 09:59:53 -0700 Subject: rcu: Remove obsolete reference to synchronize_kernel() The synchronize_kernel() primitive was removed in favor of synchronize_sched() more than a decade ago, and it seems likely that rather few kernel hackers are familiar with it. Its continued presence is therefore providing more confusion than enlightenment. This commit therefore removes the reference from the synchronize_sched() header comment, and adds the corresponding information to the synchronize_rcu(0 header comment. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 ------ kernel/rcu/tree_plugin.h | 9 +++++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9ce682242e99..3bee58fc23b1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3280,12 +3280,6 @@ static inline int rcu_blocking_is_gp(void) * to have executed a full memory barrier during the execution of * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but * again only if the system has more than one CPU). - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. */ void synchronize_sched(void) { diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a421753e8e9c..3b432fa4c45b 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -664,8 +664,13 @@ EXPORT_SYMBOL_GPL(call_rcu); * synchronize_rcu() was waiting. RCU read-side critical sections are * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. * - * See the description of synchronize_sched() for more detailed information - * on memory ordering guarantees. + * See the description of synchronize_sched() for more detailed + * information on memory-ordering guarantees. However, please note + * that -only- the memory-ordering guarantees apply. For example, + * synchronize_rcu() is -not- guaranteed to wait on things like code + * protected by preempt_disable(), instead, synchronize_rcu() is -only- + * guaranteed to wait on RCU read-side critical sections, that is, sections + * of code protected by rcu_read_lock(). */ void synchronize_rcu(void) { -- cgit v1.2.1 From 881ed593a323c832c2e9383effeb6a0c99859210 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Apr 2017 12:47:10 -0700 Subject: rcuperf: Add ability to performance-test call_rcu() and friends This commit upgrades rcuperf so that it can do performance testing on asynchronous grace-period primitives such as call_srcu(). There is a new rcuperf.gp_async module parameter that specifies this new behavior, with the pre-existing rcuperf.gp_exp testing expedited grace periods such as synchronize_rcu_expedited, and with the default being to test synchronous non-expedited grace periods such as synchronize_rcu(). There is also a new rcuperf.gp_async_max module parameter that specifies the maximum number of outstanding callbacks per writer kthread, defaulting to 1,000. When this limit is exceeded, the writer thread invokes the appropriate flavor of rcu_barrier() to wait for callbacks to drain. Signed-off-by: Paul E. McKenney [ paulmck: Removed the redundant initialization noted by Arnd Bergmann. ] --- Documentation/admin-guide/kernel-parameters.txt | 11 ++++ kernel/rcu/rcuperf.c | 69 +++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..3598464ca8ed 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3319,6 +3319,17 @@ This wake_up() will be accompanied by a WARN_ONCE() splat and an ftrace_dump(). + rcuperf.gp_async= [KNL] + Measure performance of asynchronous + grace-period primitives such as call_rcu(). + + rcuperf.gp_async_max= [KNL] + Specify the maximum number of outstanding + callbacks per writer thread. When a writer + thread exceeds this limit, it invokes the + corresponding flavor of rcu_barrier() to allow + previously posted callbacks to drain. + rcuperf.gp_exp= [KNL] Measure performance of expedited synchronous grace-period primitives. diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index ef5b1faac495..e1ce97bead94 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -59,6 +59,8 @@ MODULE_AUTHOR("Paul E. McKenney "); #define VERBOSE_PERFOUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) +torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); +torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); @@ -86,13 +88,16 @@ static u64 t_rcu_perf_writer_started; static u64 t_rcu_perf_writer_finished; static unsigned long b_rcu_perf_writer_started; static unsigned long b_rcu_perf_writer_finished; +static DEFINE_PER_CPU(atomic_t, n_async_inflight); static int rcu_perf_writer_state; #define RTWS_INIT 0 -#define RTWS_EXP_SYNC 1 -#define RTWS_SYNC 2 -#define RTWS_IDLE 2 -#define RTWS_STOPPING 3 +#define RTWS_ASYNC 1 +#define RTWS_BARRIER 2 +#define RTWS_EXP_SYNC 3 +#define RTWS_SYNC 4 +#define RTWS_IDLE 5 +#define RTWS_STOPPING 6 #define MAX_MEAS 10000 #define MIN_MEAS 100 @@ -114,6 +119,8 @@ struct rcu_perf_ops { unsigned long (*started)(void); unsigned long (*completed)(void); unsigned long (*exp_completed)(void); + void (*async)(struct rcu_head *head, rcu_callback_t func); + void (*gp_barrier)(void); void (*sync)(void); void (*exp_sync)(void); const char *name; @@ -153,6 +160,8 @@ static struct rcu_perf_ops rcu_ops = { .started = rcu_batches_started, .completed = rcu_batches_completed, .exp_completed = rcu_exp_batches_completed, + .async = call_rcu, + .gp_barrier = rcu_barrier, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .name = "rcu" @@ -181,6 +190,8 @@ static struct rcu_perf_ops rcu_bh_ops = { .started = rcu_batches_started_bh, .completed = rcu_batches_completed_bh, .exp_completed = rcu_exp_batches_completed_sched, + .async = call_rcu_bh, + .gp_barrier = rcu_barrier_bh, .sync = synchronize_rcu_bh, .exp_sync = synchronize_rcu_bh_expedited, .name = "rcu_bh" @@ -208,6 +219,16 @@ static unsigned long srcu_perf_completed(void) return srcu_batches_completed(srcu_ctlp); } +static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + call_srcu(srcu_ctlp, head, func); +} + +static void srcu_rcu_barrier(void) +{ + srcu_barrier(srcu_ctlp); +} + static void srcu_perf_synchronize(void) { synchronize_srcu(srcu_ctlp); @@ -226,6 +247,8 @@ static struct rcu_perf_ops srcu_ops = { .started = NULL, .completed = srcu_perf_completed, .exp_completed = srcu_perf_completed, + .async = srcu_call_rcu, + .gp_barrier = srcu_rcu_barrier, .sync = srcu_perf_synchronize, .exp_sync = srcu_perf_synchronize_expedited, .name = "srcu" @@ -254,6 +277,8 @@ static struct rcu_perf_ops sched_ops = { .started = rcu_batches_started_sched, .completed = rcu_batches_completed_sched, .exp_completed = rcu_exp_batches_completed_sched, + .async = call_rcu_sched, + .gp_barrier = rcu_barrier_sched, .sync = synchronize_sched, .exp_sync = synchronize_sched_expedited, .name = "sched" @@ -281,6 +306,8 @@ static struct rcu_perf_ops tasks_ops = { .readunlock = tasks_perf_read_unlock, .started = rcu_no_completed, .completed = rcu_no_completed, + .async = call_rcu_tasks, + .gp_barrier = rcu_barrier_tasks, .sync = synchronize_rcu_tasks, .exp_sync = synchronize_rcu_tasks, .name = "tasks" @@ -343,6 +370,15 @@ rcu_perf_reader(void *arg) return 0; } +/* + * Callback function for asynchronous grace periods from rcu_perf_writer(). + */ +static void rcu_perf_async_cb(struct rcu_head *rhp) +{ + atomic_dec(this_cpu_ptr(&n_async_inflight)); + kfree(rhp); +} + /* * RCU perf writer kthread. Repeatedly does a grace period. */ @@ -352,6 +388,7 @@ rcu_perf_writer(void *arg) int i = 0; int i_max; long me = (long)arg; + struct rcu_head *rhp = NULL; struct sched_param sp; bool started = false, done = false, alldone = false; u64 t; @@ -382,7 +419,23 @@ rcu_perf_writer(void *arg) do { wdp = &wdpp[i]; *wdp = ktime_get_mono_fast_ns(); - if (gp_exp) { + if (gp_async) { +retry: + if (!rhp) + rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); + if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) { + rcu_perf_writer_state = RTWS_ASYNC; + atomic_inc(this_cpu_ptr(&n_async_inflight)); + cur_ops->async(rhp, rcu_perf_async_cb); + rhp = NULL; + } else if (!kthread_should_stop()) { + rcu_perf_writer_state = RTWS_BARRIER; + cur_ops->gp_barrier(); + goto retry; + } else { + kfree(rhp); /* Because we are stopping. */ + } + } else if (gp_exp) { rcu_perf_writer_state = RTWS_EXP_SYNC; cur_ops->exp_sync(); } else { @@ -429,6 +482,10 @@ rcu_perf_writer(void *arg) i++; rcu_perf_wait_shutdown(); } while (!torture_must_stop()); + if (gp_async) { + rcu_perf_writer_state = RTWS_BARRIER; + cur_ops->gp_barrier(); + } rcu_perf_writer_state = RTWS_STOPPING; writer_n_durations[me] = i_max; torture_kthread_stopping("rcu_perf_writer"); @@ -460,6 +517,8 @@ rcu_perf_cleanup(void) VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); if (rcu_gp_is_normal() && gp_exp) VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + if (gp_exp && gp_async) + VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!"); if (torture_cleanup_begin()) return; -- cgit v1.2.1 From ced8d6fdf8b6b4b4a8354eede7464fbc0c2c96a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Apr 2017 14:53:23 -0700 Subject: rcuperf: Add a Kconfig-fragment file for Classic SRCU This commit adds a Kconfig-fragment file for Classic SRCU to ease performance comparisons with Tree SRCU. Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/configs/rcuperf/SRCUCLASSIC | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC b/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC new file mode 100644 index 000000000000..a1395af60ef4 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC @@ -0,0 +1,16 @@ +CONFIG_SMP=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y +CONFIG_CLASSIC_SRCU=y -- cgit v1.2.1 From dcfc315b7b7a5e7668dd9cb2474708b51ab1cdb1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Apr 2017 09:53:07 -0700 Subject: rcu: Make sync_rcu_preempt_exp_done() return bool The sync_rcu_preempt_exp_done() function returns a logical expression, but its return type is nevertheless int. This commit therefore changes the return type to bool. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index e513b4ab1197..dd21ca47e4b4 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -147,7 +147,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) * * Caller must hold the rcu_state's exp_mutex. */ -static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) +static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) { return rnp->exp_tasks == NULL && READ_ONCE(rnp->expmask) == 0; -- cgit v1.2.1 From 9895313534d44e714785603a2af8db928904787e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Apr 2017 07:37:45 -0700 Subject: checkpatch: Remove checks for expedited grace periods There was a time when the expedited grace-period primitives (synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(), and synchronize_sched_expedited()) used rather antisocial kernel facilities like try_stop_cpus(). However, they have since been housebroken to use only single-CPU IPIs, and typically cause less disturbance than a scheduling-clock interrupt. Furthermore, this disturbance can be eliminated entirely using NO_HZ_FULL on the one hand or the rcupdate.rcu_normal boot parameter on the other. This commit therefore removes checkpatch's complaints about use of the expedited RCU primitives. Signed-off-by: Paul E. McKenney --- scripts/checkpatch.pl | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4b9569fa931b..c7e4d73fe1ce 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5533,23 +5533,6 @@ sub process { } } -# Check for expedited grace periods that interrupt non-idle non-nohz -# online CPUs. These expedited can therefore degrade real-time response -# if used carelessly, and should be avoided where not absolutely -# needed. It is always OK to use synchronize_rcu_expedited() and -# synchronize_sched_expedited() at boot time (before real-time applications -# start) and in error situations where real-time response is compromised in -# any case. Note that synchronize_srcu_expedited() does -not- interrupt -# other CPUs, so don't warn on uses of synchronize_srcu_expedited(). -# Of course, nothing comes for free, and srcu_read_lock() and -# srcu_read_unlock() do contain full memory barriers in payment for -# synchronize_srcu_expedited() non-interruption properties. - if ($line =~ /\b(synchronize_rcu_expedited|synchronize_sched_expedited)\(/) { - WARN("EXPEDITED_RCU_GRACE_PERIOD", - "expedited RCU grace periods should be avoided where they can degrade real-time response\n" . $herecurr); - - } - # check of hardware specific defines if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { CHK("ARCH_DEFINES", -- cgit v1.2.1 From f60cb4d4c8e568c2d63d01d72e589c4f75ee4140 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Apr 2017 13:43:21 -0700 Subject: rcuperf: Add test for dynamically initialized srcu_struct This commit adds a perf_type of "srcud", which species that rcuperf test SRCU on a dynamically initialized srcu_struct. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuperf.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index e1ce97bead94..5158ddba6716 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -254,6 +254,35 @@ static struct rcu_perf_ops srcu_ops = { .name = "srcu" }; +static struct srcu_struct srcud; + +static void srcu_sync_perf_init(void) +{ + srcu_ctlp = &srcud; + init_srcu_struct(srcu_ctlp); +} + +static void srcu_sync_perf_cleanup(void) +{ + cleanup_srcu_struct(srcu_ctlp); +} + +static struct rcu_perf_ops srcud_ops = { + .ptype = SRCU_FLAVOR, + .init = srcu_sync_perf_init, + .cleanup = srcu_sync_perf_cleanup, + .readlock = srcu_perf_read_lock, + .readunlock = srcu_perf_read_unlock, + .started = NULL, + .completed = srcu_perf_completed, + .exp_completed = srcu_perf_completed, + .async = srcu_call_rcu, + .gp_barrier = srcu_rcu_barrier, + .sync = srcu_perf_synchronize, + .exp_sync = srcu_perf_synchronize_expedited, + .name = "srcud" +}; + /* * Definitions for sched perf testing. */ @@ -622,7 +651,7 @@ rcu_perf_init(void) long i; int firsterr = 0; static struct rcu_perf_ops *perf_ops[] = { - &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, + &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops, RCUPERF_TASKS_OPS }; -- cgit v1.2.1 From 79269ee3fa1da52f392f8a4ebf3082152c1c8c58 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Apr 2017 14:30:37 -0700 Subject: doc/atomic_ops: Clarify smp_mb__{before,after}_atomic() This commit explicitly states that surrounding a non-value-returning atomic read-modify atomic operations provides full ordering, just as is provided by value-returning atomic read-modify-write operations. Signed-off-by: Paul E. McKenney --- Documentation/core-api/atomic_ops.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst index 55e43f1c80de..fce929144ccd 100644 --- a/Documentation/core-api/atomic_ops.rst +++ b/Documentation/core-api/atomic_ops.rst @@ -303,6 +303,11 @@ defined which accomplish this:: void smp_mb__before_atomic(void); void smp_mb__after_atomic(void); +Preceding a non-value-returning read-modify-write atomic operation with +smp_mb__before_atomic() and following it with smp_mb__after_atomic() +provides the same full ordering that is provided by value-returning +read-modify-write atomic operations. + For example, smp_mb__before_atomic() can be used like so:: obj->dead = 1; -- cgit v1.2.1 From 6016ffc3874d3a1ddf41518481da54b4714717af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Apr 2017 16:20:07 -0700 Subject: atomics: Add header comment so spin_unlock_wait() There is material describing the ordering guarantees provided by spin_unlock_wait(), but it is not necessarily easy to find. This commit therefore adds a docbook header comment to this function informally describing its semantics. Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra --- include/linux/spinlock.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 59248dcc6ef3..d9510e8522d4 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) +/** + * spin_unlock_wait - Interpose between successive critical sections + * @lock: the spinlock whose critical sections are to be interposed. + * + * Semantically this is equivalent to a spin_lock() immediately + * followed by a spin_unlock(). However, most architectures have + * more efficient implementations in which the spin_unlock_wait() + * cannot block concurrent lock acquisition, and in some cases + * where spin_unlock_wait() does not write to the lock variable. + * Nevertheless, spin_unlock_wait() can have high overhead, so if + * you feel the need to use it, please check to see if there is + * a better way to get your job done. + * + * The ordering guarantees provided by spin_unlock_wait() are: + * + * 1. All accesses preceding the spin_unlock_wait() happen before + * any accesses in later critical sections for this same lock. + * 2. All accesses following the spin_unlock_wait() happen after + * any accesses in earlier critical sections for this same lock. + */ static __always_inline void spin_unlock_wait(spinlock_t *lock) { raw_spin_unlock_wait(&lock->rlock); -- cgit v1.2.1 From 35bdc72a339717c60c16457091f227a122456bd9 Mon Sep 17 00:00:00 2001 From: Stan Drozd Date: Thu, 20 Apr 2017 11:03:36 +0200 Subject: docs: Fix typo in Documentation/memory-barriers.txt This commit changes "architecure" to the correct spelling, "architecture". Signed-off-by: Stan Drozd Signed-off-by: Paul E. McKenney --- Documentation/memory-barriers.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 732f10ea382e..9d5e0f853f08 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -27,7 +27,7 @@ The purpose of this document is twofold: (2) to provide a guide as to how to use the barriers that are available. Note that an architecture can provide more than the minimum requirement -for any particular barrier, but if the architecure provides less than +for any particular barrier, but if the architecture provides less than that, that architecture is incorrect. Note also that it is possible that a barrier may be a no-op for an -- cgit v1.2.1 From 59ca3f9fef599cc6da7975d2261ab3bb86a6ac6b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 Apr 2017 17:17:27 -0700 Subject: rcuperf: Add the ability to test tiny RCU flavors This commit adds a TINY rcuperf test scenario, which allows performance testing of Tiny RCU and Tiny SRCU. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcuperf/TINY | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/TINY diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY new file mode 100644 index 000000000000..fb05ef5279b4 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY @@ -0,0 +1,16 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_RCU_TRACE=y -- cgit v1.2.1 From 1f4f6da1c80905830337c3ff46a2d3260dabb864 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Apr 2017 11:16:32 -0700 Subject: srcu: Make Classic and Tree SRCU announce themselves at bootup Currently, the only way to tell whether a given kernel is running Classic, Tiny, or Tree SRCU is to look at the .config file, which can easily be lost or associated with the wrong kernel. This commit therefore has Classic and Tree SRCU identify themselves at boot time. Signed-off-by: Paul E. McKenney --- kernel/rcu/srcu.c | 7 +++++++ kernel/rcu/srcutree.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index dea03614263f..4e3f558409a0 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -659,3 +659,10 @@ void process_srcu(struct work_struct *work) srcu_reschedule(sp); } EXPORT_SYMBOL_GPL(process_srcu); + +static int __init srcu_bootup_announce(void) +{ + pr_info("Classic SRCU implementation.\n"); + return 0; +} +early_initcall(srcu_bootup_announce); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index fceca84df6b0..03d57fe9f094 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1167,3 +1167,10 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type, *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); } EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); + +static int __init srcu_bootup_announce(void) +{ + pr_info("Hierarchical SRCU implementation.\n"); + return 0; +} +early_initcall(srcu_bootup_announce); -- cgit v1.2.1 From 41f364819fbfc092462e3c85283fa887c8159387 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Apr 2017 12:01:32 -0700 Subject: rcutorture: Reduce CPUs dedicated to testing Classic SRCU Given that the plan is to retire Classic SRCU in the near future, this commit reduces the number of CPUs dedicated to testing Classic SRCU. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/SRCU-C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C index e4f8b1b75584..d4e19c087c21 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C @@ -1,6 +1,6 @@ CONFIG_RCU_TRACE=n CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=3 CONFIG_HOTPLUG_CPU=y CONFIG_RCU_EXPERT=y CONFIG_CLASSIC_SRCU=y -- cgit v1.2.1 From 3ddf20c953520203c42dbed1f091ed52080e1cd2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Apr 2017 13:33:20 -0700 Subject: srcu: Shrink Tiny SRCU a bit more This commit rearranges Tiny SRCU's srcu_struct structure, substitutes u8 for bool, and shrinks counters down to short. Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 8 ++++---- kernel/rcu/rcutorture.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 42311ee0334f..b8859179b001 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -27,15 +27,15 @@ #include struct srcu_struct { - int srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + short srcu_idx; /* Current reader array element. */ + u8 srcu_gp_running; /* GP workqueue running? */ + u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ struct rcu_segcblist srcu_cblist; /* Pending SRCU callbacks. */ - int srcu_idx; /* Current reader array element. */ - bool srcu_gp_running; /* GP workqueue running? */ - bool srcu_gp_waiting; /* GP waiting for readers? */ struct work_struct srcu_work; /* For driving grace periods. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ae6e574d4cf5..a58592b73f19 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -609,7 +609,7 @@ static void srcu_torture_stats(void) pr_cont("\n"); #elif defined(CONFIG_TINY_SRCU) idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; - pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n", + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", torture_type, TORTURE_FLAG, idx, READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); -- cgit v1.2.1 From 492b95e59735998312f678d77a2d5fe20af6b0b9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Apr 2017 16:09:15 -0700 Subject: rcuperf: Set more user-friendly defaults Common-case use of rcuperf must set rcuperf.nreaders=0 and if not built as a module, rcuperf.shutdown. This commit therefore sets the default for rcuperf.nreaders to zero and sets the default for rcuperf.shutdown to zero if rcuperf is built as a module and to one otherwise. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcuperf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 5158ddba6716..49c8ed6bd2fd 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -63,9 +63,10 @@ torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); -torture_param(int, nreaders, -1, "Number of RCU reader threads"); +torture_param(int, nreaders, 0, "Number of RCU reader threads"); torture_param(int, nwriters, -1, "Number of RCU updater threads"); -torture_param(bool, shutdown, false, "Shutdown at end of performance tests."); +torture_param(bool, shutdown, !IS_ENABLED(MODULE), + "Shutdown at end of performance tests."); torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); static char *perf_type = "rcu"; -- cgit v1.2.1 From b562b85c3a8c395d49aca6a46b321299d6e49ff1 Mon Sep 17 00:00:00 2001 From: Priyalee Kushwaha Date: Sat, 22 Apr 2017 10:17:11 -0700 Subject: srcu-cbmc: Use /usr/bin/awk instead of /bin/awk Most OS distribution have awk in /usr/bin not in /bin Without this patch, kernel-devsrc fails to build as runtime dependency for srcu-cbmc script /bin/awk is not found. Signed-off-by: Kushwaha, Priyalee Acked-by: Lance Roy Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk index 8ff89043d0a9..c9e8bc5082a7 100755 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk @@ -1,4 +1,4 @@ -#!/bin/awk -f +#!/usr/bin/awk -f # Modify SRCU for formal verification. The first argument should be srcu.h and # the second should be srcu.c. Outputs modified srcu.h and srcu.c into the -- cgit v1.2.1 From 820687a7b98a5031207893ff265f97c0a0ad403e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Apr 2017 15:12:56 -0700 Subject: rcuperf: Add writer_holdoff boot parameter This commit adds a writer_holdoff boot parameter to rcuperf, which is intended to be used to test Tree SRCU's auto-expediting. This boot parameter is in microseconds, and defaults to zero (that is, disabled). Set it to a bit larger than srcutree.exp_holdoff, keeping the nanosecond/microsecond conversion, to force Tree SRCU to auto-expedite more aggressively. This commit also adds documentation for this parameter, and fixes some alphabetization while in the neighborhood. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 11 ++++++++--- kernel/rcu/rcuperf.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3598464ca8ed..01b5ab92d251 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3357,17 +3357,22 @@ rcuperf.perf_runnable= [BOOT] Start rcuperf running at boot time. + rcuperf.perf_type= [KNL] + Specify the RCU implementation to test. + rcuperf.shutdown= [KNL] Shut the system down after performance tests complete. This is useful for hands-off automated testing. - rcuperf.perf_type= [KNL] - Specify the RCU implementation to test. - rcuperf.verbose= [KNL] Enable additional printk() statements. + rcuperf.writer_holdoff= [KNL] + Write-side holdoff between grace periods, + in microseconds. The default of zero says + no holdoff. + rcutorture.cbflood_inter_holdoff= [KNL] Set holdoff time (jiffies) between successive callback-flood tests. diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 49c8ed6bd2fd..d80f11d9f8bd 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -68,6 +68,7 @@ torture_param(int, nwriters, -1, "Number of RCU updater threads"); torture_param(bool, shutdown, !IS_ENABLED(MODULE), "Shutdown at end of performance tests."); torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); +torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); static char *perf_type = "rcu"; module_param(perf_type, charp, 0444); @@ -447,6 +448,8 @@ rcu_perf_writer(void *arg) } do { + if (writer_holdoff) + udelay(writer_holdoff); wdp = &wdpp[i]; *wdp = ktime_get_mono_fast_ns(); if (gp_async) { -- cgit v1.2.1 From 5d9853f3cf26109ab7b6b7bf414ffe3c739177d4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Apr 2017 10:24:08 -0700 Subject: rcutorture: Add "git diff" output to testid.txt file Currently, when running from a git archive, the testid.txt file contains only the branch name, the output of "git status", and the SHA-1 of the current HEAD. This is useful, but does not uniquely identify the source code that was built. This commit therefore adds the output of "git diff HEAD", which means that if two testid.txt files compare equal, they correspond to exactly the same source code. Give or take the possibility of SHA-1 collisions, that is. ;-) Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 3b3c1b693ee1..50091de3a911 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -296,10 +296,7 @@ if test -d .git then git status >> $resdir/$ds/testid.txt git rev-parse HEAD >> $resdir/$ds/testid.txt - if ! git diff HEAD > $T/git-diff 2>&1 - then - cp $T/git-diff $resdir/$ds - fi + git diff HEAD >> $resdir/$ds/testid.txt fi ___EOF___ awk < $T/cfgcpu.pack \ -- cgit v1.2.1 From 09f501a0f0b5ad81f79049a31fb71f99f282d663 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Apr 2017 13:16:15 -0700 Subject: srcu: Document auto-expediting requirement This commit documents the auto-expediting requirement satisfied by commits 2da4b2a7fd8d ("srcu: Expedite first synchronize_srcu() when idle") and 22607d66bbc3 ("srcu: Specify auto-expedite holdoff time"). Signed-off-by: Paul E. McKenney --- Documentation/RCU/Design/Requirements/Requirements.html | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index f60adf112663..8bbf0bb18389 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2935,6 +2935,20 @@ The reason that this is possible is that SRCU is insensitive to whether or not a CPU is online, which means that srcu_barrier() need not exclude CPU-hotplug operations. +

+SRCU also differs from other RCU flavors in that SRCU's expedited and +non-expedited grace periods are implemented by the same mechanism. +This means that in the current SRCU implementation, expediting a +future grace period has the side effect of expediting all prior +grace periods that have not yet completed. +(But please note that this is a property of the current implementation, +not necessarily of future implementations.) +In addition, if SRCU has been idle for longer than the interval +specified by the srcutree.exp_holdoff kernel boot parameter +(25 microseconds by default), +and if a synchronize_srcu() invocation ends this idle period, +that invocation will be automatically expedited. +

As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating a locking bottleneck present in prior kernel versions. -- cgit v1.2.1 From c75e9caaf85f1fc4ed32e510b259d67ec3c4c603 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Apr 2017 13:25:41 -0700 Subject: doc: Take tail recursion into account in RCU requirements This commit classifies tail recursion as an alternative way to write a loop, with similar limitations. Signed-off-by: Paul E. McKenney --- Documentation/RCU/Design/Requirements/Requirements.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 8bbf0bb18389..cb614f2a69c2 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -1849,7 +1849,8 @@ mass storage, or user patience, whichever comes first. If the nesting is not visible to the compiler, as is the case with mutually recursive functions each in its own translation unit, stack overflow will result. -If the nesting takes the form of loops, either the control variable +If the nesting takes the form of loops, perhaps in the guise of tail +recursion, either the control variable will overflow or (in the Linux kernel) you will get an RCU CPU stall warning. Nevertheless, this class of RCU implementations is one of the most composable constructs in existence. -- cgit v1.2.1 From f4687d2637a4016b2eedfdb777105c95e8d6fe52 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Apr 2017 16:13:53 -0700 Subject: rcu: Add preemptibility checks in rcu_sched_qs() and rcu_bh_qs() This commit adds WARN_ON_ONCE() calls that trigger if either rcu_sched_qs() or rcu_bh_qs() are invoked with preemption enabled. In the immortal words of Peter Zijlstra: "these are much harder to ignore than comments". Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3bee58fc23b1..b01a02e7a0b7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -250,6 +250,7 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(void) { + RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!"); if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) return; trace_rcu_grace_period(TPS("rcu_sched"), @@ -265,6 +266,7 @@ void rcu_sched_qs(void) void rcu_bh_qs(void) { + RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_bh"), __this_cpu_read(rcu_bh_data.gpnum), -- cgit v1.2.1 From 59d80fd8351b7b9a5dc7bbfa8bc4ca19f6ff3dad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 10:20:28 -0700 Subject: rcu: Print out rcupdate.c non-default boot-time settings This commit adds a rcupdate_announce_bootup_oddness() function to print out non-default values of significant kernel boot parameter settings to aid in debugging. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + kernel/rcu/tree_plugin.h | 1 + kernel/rcu/update.c | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e1e5d002fdb9..393e461d3ea8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -76,6 +76,7 @@ bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); +void rcupdate_announce_bootup_oddness(void); #endif /* #else #ifdef CONFIG_TINY_RCU */ enum rcutorture_type { diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3b432fa4c45b..eb5ebdce25ff 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -92,6 +92,7 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); if (IS_ENABLED(CONFIG_RCU_BOOST)) pr_info("\tRCU kthread priority: %d.\n", kthread_prio); + rcupdate_announce_bootup_oddness(); } #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 273e869ca21d..82a5aa10dbc5 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -560,7 +560,8 @@ static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); DEFINE_SRCU(tasks_rcu_exit_srcu); /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ -static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; +#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) +static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; module_param(rcu_task_stall_timeout, int, 0644); static void rcu_spawn_tasks_kthread(void); @@ -851,6 +852,23 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ +#ifndef CONFIG_TINY_RCU + +/* + * Print any non-default Tasks RCU settings. + */ +static void __init rcu_tasks_bootup_oddness(void) +{ +#ifdef CONFIG_TASKS_RCU + if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT) + pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout); + else + pr_info("\tTasks RCU enabled.\n"); +#endif /* #ifdef CONFIG_TASKS_RCU */ +} + +#endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_PROVE_RCU /* @@ -935,3 +953,25 @@ late_initcall(rcu_verify_early_boot_tests); #else void rcu_early_boot_tests(void) {} #endif /* CONFIG_PROVE_RCU */ + +#ifndef CONFIG_TINY_RCU + +/* + * Print any significant non-default boot-time settings. + */ +void __init rcupdate_announce_bootup_oddness(void) +{ + if (rcu_normal) + pr_info("\tNo expedited grace period (rcu_normal).\n"); + else if (rcu_normal_after_boot) + pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n"); + else if (rcu_expedited) + pr_info("\tAll grace periods are expedited (rcu_expedited).\n"); + if (rcu_cpu_stall_suppress) + pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n"); + if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT) + pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout); + rcu_tasks_bootup_oddness(); +} + +#endif /* #ifndef CONFIG_TINY_RCU */ -- cgit v1.2.1 From 17c7798bea64b9d35c7b4cc14d564e6feff73ac3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 11:12:34 -0700 Subject: rcu: Update rcu_bootup_announce_oddness() This commit updates rcu_bootup_announce_oddness() to check additional Kconfig options and module/boot parameters. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 9 ++++++--- kernel/rcu/tree_plugin.h | 31 ++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b01a02e7a0b7..ac8dce15fd74 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -536,9 +536,12 @@ void rcu_all_qs(void) } EXPORT_SYMBOL_GPL(rcu_all_qs); -static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ -static long qhimark = 10000; /* If this many pending, ignore blimit. */ -static long qlowmark = 100; /* Once only this many pending, use blimit. */ +#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */ +static long blimit = DEFAULT_RCU_BLIMIT; +#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ +static long qhimark = DEFAULT_RCU_QHIMARK; +#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ +static long qlowmark = DEFAULT_RCU_QLOMARK; module_param(blimit, long, 0444); module_param(qhimark, long, 0444); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index eb5ebdce25ff..9cb3dff78b6f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -79,7 +79,9 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); - if (IS_ENABLED(CONFIG_PROVE_RCU)) + if (IS_ENABLED(CONFIG_PROVE_RCU_REPEATEDLY)) + pr_info("\tRCU lockdep checking is permanently enabled.\n"); + else if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (RCU_NUM_LVLS >= 4) pr_info("\tFour(or more)-level hierarchy is enabled.\n"); @@ -90,8 +92,31 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); - if (IS_ENABLED(CONFIG_RCU_BOOST)) - pr_info("\tRCU kthread priority: %d.\n", kthread_prio); +#ifdef CONFIG_RCU_BOOST + pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY); +#endif + if (blimit != DEFAULT_RCU_BLIMIT) + pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit); + if (qhimark != DEFAULT_RCU_QHIMARK) + pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark); + if (qlowmark != DEFAULT_RCU_QLOMARK) + pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark); + if (jiffies_till_first_fqs != ULONG_MAX) + pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs); + if (jiffies_till_next_fqs != ULONG_MAX) + pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs); + if (rcu_kick_kthreads) + pr_info("\tKick kthreads if too-long grace period.\n"); + if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) + pr_info("\tRCU callback double-/use-after-free debug enabled.\n"); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT)) + pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)) + pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); + if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP)) + pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) + pr_info("\tRCU debug extended QS entry/exit.\n"); rcupdate_announce_bootup_oddness(); } -- cgit v1.2.1 From b5815e6cd3b747cacf7628a32a275aa2c0f61e06 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 11:20:29 -0700 Subject: srcu: Make exp_holdoff module parameter be static Because exp_holdoff is not used outside of srcutree.c, it can be static. This commit therefore makes this change. Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 03d57fe9f094..08d43736f72a 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -40,7 +40,7 @@ #include "rcu.h" #include "rcu_segcblist.h" -ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ +static ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ module_param(exp_holdoff, ulong, 0444); static void srcu_invoke_callbacks(struct work_struct *work); -- cgit v1.2.1 From 0c8e0e3c37955d17cced37222a10c00ab47efd4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 11:24:22 -0700 Subject: srcu: Print non-default exp_holdoff values at boot time This commit makes srcu_bootup_announce() check for non-default values of the auto-expedite holdoff time exp_holdoff and print a message if so. Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 08d43736f72a..0b6ea105d9f8 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -40,7 +40,9 @@ #include "rcu.h" #include "rcu_segcblist.h" -static ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ +/* Holdoff in nanoseconds for auto-expediting. */ +#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000) +static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; module_param(exp_holdoff, ulong, 0444); static void srcu_invoke_callbacks(struct work_struct *work); @@ -1171,6 +1173,8 @@ EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); static int __init srcu_bootup_announce(void) { pr_info("Hierarchical SRCU implementation.\n"); + if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF) + pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff); return 0; } early_initcall(srcu_bootup_announce); -- cgit v1.2.1 From c0b334c5bfa98ab104bde38da330a113a6c7dd56 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 12:32:15 -0700 Subject: rcu: Add lockdep_assert_held() teeth to tree.c Comments can be helpful, but assertions carry more force. This commit therefore adds lockdep_assert_held() and RCU_LOCKDEP_WARN() calls to enforce lock-held and interrupt-disabled preconditions. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ac8dce15fd74..121c1436a7f3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -762,6 +762,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp) int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; int *fp = &rnp->need_future_gp[idx]; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_future_needs_gp() invoked with irqs enabled!!!"); return READ_ONCE(*fp); } @@ -773,6 +774,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp) static bool cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) { + RCU_LOCKDEP_WARN(!irqs_disabled(), "cpu_needs_another_gp() invoked with irqs enabled!!!"); if (rcu_gp_in_progress(rsp)) return false; /* No, a grace period is already in progress. */ if (rcu_future_needs_gp(rsp)) @@ -799,6 +801,7 @@ static void rcu_eqs_enter_common(bool user) struct rcu_data *rdp; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_enter_common() invoked with irqs enabled!!!"); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { @@ -972,6 +975,7 @@ static void rcu_eqs_exit(bool user) struct rcu_dynticks *rdtp; long long oldval; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_exit() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); @@ -1679,6 +1683,8 @@ void rcu_cpu_stall_reset(void) static unsigned long rcu_cbs_completed(struct rcu_state *rsp, struct rcu_node *rnp) { + lockdep_assert_held(&rnp->lock); + /* * If RCU is idle, we just wait for the next grace period. * But we can only be sure that RCU is idle if we are looking @@ -1724,6 +1730,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, bool ret = false; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + lockdep_assert_held(&rnp->lock); + /* * Pick up grace-period number for new callbacks. If this * grace period is already marked as needed, return to the caller. @@ -1850,6 +1858,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, { bool ret = false; + lockdep_assert_held(&rnp->lock); + /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ if (!rcu_segcblist_pend_cbs(&rdp->cblist)) return false; @@ -1888,6 +1898,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { + lockdep_assert_held(&rnp->lock); + /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ if (!rcu_segcblist_pend_cbs(&rdp->cblist)) return false; @@ -1914,6 +1926,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, bool ret; bool need_gp; + lockdep_assert_held(&rnp->lock); + /* Handle the ends of any preceding grace periods first. */ if (rdp->completed == rnp->completed && !unlikely(READ_ONCE(rdp->gpwrap))) { @@ -2346,6 +2360,7 @@ static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { + lockdep_assert_held(&rnp->lock); if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* * Either we have not yet spawned the grace-period @@ -2407,6 +2422,7 @@ static bool rcu_start_gp(struct rcu_state *rsp) static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { + lockdep_assert_held(&rcu_get_root(rsp)->lock); WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); @@ -2431,6 +2447,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, unsigned long oldmask = 0; struct rcu_node *rnp_c; + lockdep_assert_held(&rnp->lock); + /* Walk up the rcu_node hierarchy. */ for (;;) { if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { @@ -2491,6 +2509,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp, unsigned long mask; struct rcu_node *rnp_p; + lockdep_assert_held(&rnp->lock); if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -2604,6 +2623,8 @@ static void rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { + lockdep_assert_held(&rsp->orphan_lock); + /* No-CBs CPUs do not have orphanable callbacks. */ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) return; @@ -2644,6 +2665,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); + lockdep_assert_held(&rsp->orphan_lock); + /* No-CBs CPUs are handled specially. */ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) @@ -2710,6 +2733,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) long mask; struct rcu_node *rnp = rnp_leaf; + lockdep_assert_held(&rnp->lock); if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) return; @@ -3703,6 +3727,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) long mask; struct rcu_node *rnp = rnp_leaf; + lockdep_assert_held(&rnp->lock); for (;;) { mask = rnp->grpmask; rnp = rnp->parent; -- cgit v1.2.1 From ea9b0c8a26a2cadfe49382d679eee88d3c4de79c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 13:19:28 -0700 Subject: rcu: Add lockdep_assert_held() teeth to tree_plugin.h Comments can be helpful, but assertions carry more force. This commit therefore adds lockdep_assert_held() and RCU_LOCKDEP_WARN() calls to enforce lock-held and interrupt-disabled preconditions. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 9cb3dff78b6f..ee7cea75273e 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -181,6 +181,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0); struct task_struct *t = current; + lockdep_assert_held(&rnp->lock); + /* * Decide where to queue the newly blocked task. In theory, * this could be an if-statement. In practice, when I tried @@ -289,6 +291,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) */ static void rcu_preempt_qs(void) { + RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n"); if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data_p->gpnum), @@ -318,6 +321,7 @@ static void rcu_preempt_note_context_switch(bool preempt) struct rcu_data *rdp; struct rcu_node *rnp; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n"); WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); if (t->rcu_read_lock_nesting > 0 && !t->rcu_read_unlock_special.b.blocked) { @@ -634,6 +638,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) { + RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); if (rcu_preempt_has_tasks(rnp)) rnp->gp_tasks = rnp->blkd_tasks.next; @@ -1024,6 +1029,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) { struct task_struct *t; + lockdep_assert_held(&rnp->lock); if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { rnp->n_balk_exp_gp_tasks++; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -1404,6 +1410,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); unsigned long dj; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_needs_cpu() invoked with irqs enabled!!!"); if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { *nextevt = KTIME_MAX; return 0; @@ -1456,6 +1463,7 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_prepare_for_idle() invoked with irqs enabled!!!"); if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || rcu_is_nocb_cpu(smp_processor_id())) return; @@ -1511,6 +1519,7 @@ static void rcu_prepare_for_idle(void) */ static void rcu_cleanup_after_idle(void) { + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_cleanup_after_idle() invoked with irqs enabled!!!"); if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || rcu_is_nocb_cpu(smp_processor_id())) return; @@ -2544,6 +2553,8 @@ static void rcu_sysidle_enter(int irq) unsigned long j; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_enter() invoked with irqs enabled!!!"); + /* If there are no nohz_full= CPUs, no need to track this. */ if (!tick_nohz_full_enabled()) return; @@ -2615,6 +2626,8 @@ static void rcu_sysidle_exit(int irq) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_exit() invoked with irqs enabled!!!"); + /* If there are no nohz_full= CPUs, no need to track this. */ if (!tick_nohz_full_enabled()) return; @@ -2674,6 +2687,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, unsigned long j; struct rcu_dynticks *rdtp = rdp->dynticks; + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_check_cpu() invoked with irqs enabled!!!"); + /* If there are no nohz_full= CPUs, don't check system-wide idleness. */ if (!tick_nohz_full_enabled()) return; @@ -2842,6 +2857,8 @@ bool rcu_sys_is_idle(void) static struct rcu_sysidle_head rsh; int rss = READ_ONCE(full_sysidle_state); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sys_is_idle() invoked with irqs enabled!!!"); + if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu)) return false; -- cgit v1.2.1 From 07f6e64bf2ab98cad0d9c595659209858e7bff83 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 13:53:04 -0700 Subject: srcu: Make SRCU be once again optional Commit d160a727c40e ("srcu: Make SRCU be built by default") in response to build errors, which were caused by code that included srcu.h despite !SRCU. However, srcutiny.o is almost 2K of code, which is not insignificant for those attempting to run the Linux kernel on IoT devices. This commit therefore makes SRCU be once again optional, and adjusts srcu.h to allow error-free inclusion in !SRCU kernel builds. Signed-off-by: Paul E. McKenney Acked-by: Nicolas Pitre --- include/linux/srcu.h | 7 ++++++- init/Kconfig | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4c1d5f7e62c4..ea356d800675 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -62,8 +62,13 @@ int init_srcu_struct(struct srcu_struct *sp); #include #elif defined(CONFIG_CLASSIC_SRCU) #include -#else +#elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" +#else + +/* Dummy definition for things like notifiers. Actual use gets link error. */ +struct srcu_struct { }; + #endif /** diff --git a/init/Kconfig b/init/Kconfig index 1d3475fc9496..d928a3724af9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -521,7 +521,6 @@ config RCU_EXPERT config SRCU bool - default y help This option selects the sleepable version of RCU. This version permits arbitrary sleeping or blocking within RCU read-side critical -- cgit v1.2.1 From d4efe6c5ad91f9a1f2f1d66b7fbfc87e320b2abc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 14:16:16 -0700 Subject: srcu: Shrink Tiny SRCU a bit In Tiny SRCU, __srcu_read_lock() is a trivial function, outweighed by its EXPORT_SYMBOL_GPL(), and on many architectures, its call sequence. This commit therefore moves it to srcutiny.h so that it can be inlined. Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 15 +++++++++++++++ kernel/rcu/srcutiny.c | 16 ---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b8859179b001..b6edd9c8fdce 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -63,6 +63,21 @@ void srcu_drive_gp(struct work_struct *wp); void synchronize_srcu(struct srcu_struct *sp); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Can be invoked from irq/bh handlers, but the matching + * __srcu_read_unlock() must be in the same handler instance. Returns an + * index that must be passed to the matching srcu_read_unlock(). + */ +static inline int __srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx); + WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + return idx; +} + static inline void synchronize_srcu_expedited(struct srcu_struct *sp) { synchronize_srcu(sp); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 32798eb14853..988543721d5d 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -95,22 +95,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -/* - * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Can be invoked from irq/bh handlers, but the matching - * __srcu_read_unlock() must be in the same handler instance. Returns an - * index that must be passed to the matching srcu_read_unlock(). - */ -int __srcu_read_lock(struct srcu_struct *sp) -{ - int idx; - - idx = READ_ONCE(sp->srcu_idx); - WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); - return idx; -} -EXPORT_SYMBOL_GPL(__srcu_read_lock); - /* * Removes the count for the old reader from the appropriate element of * the srcu_struct. -- cgit v1.2.1 From a602538e46c9c62e75f1f0be9806495c79bcda9f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 15:39:34 -0700 Subject: srcu: Add DEBUG_OBJECTS_RCU_HEAD functionality This commit adds DEBUG_OBJECTS_RCU_HEAD checking to detect call_srcu() counterparts to double-free bugs. Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 0b6ea105d9f8..c6e2a4a1628b 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -761,6 +761,13 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) return true; /* With reasonable probability, idle! */ } +/* + * SRCU callback function to leak a callback. + */ +static void srcu_leak_callback(struct rcu_head *rhp) +{ +} + /* * Enqueue an SRCU callback on the srcu_data structure associated with * the current CPU and the specified srcu_struct structure, initiating @@ -799,6 +806,12 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, struct srcu_data *sdp; check_init_srcu_struct(sp); + if (debug_rcu_head_queue(rhp)) { + /* Probable double call_srcu(), so leak the callback. */ + WRITE_ONCE(rhp->func, srcu_leak_callback); + WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n"); + return; + } rhp->func = func; local_irq_save(flags); sdp = this_cpu_ptr(sp->sda); @@ -973,9 +986,12 @@ void srcu_barrier(struct srcu_struct *sp) spin_lock_irq(&sdp->lock); atomic_inc(&sp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; + debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, - &sdp->srcu_barrier_head, 0)) + &sdp->srcu_barrier_head, 0)) { + debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&sp->srcu_barrier_cpu_cnt); + } spin_unlock_irq(&sdp->lock); } @@ -1100,6 +1116,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) spin_unlock_irq(&sdp->lock); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { + debug_rcu_head_unqueue(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); -- cgit v1.2.1 From 68ab0b4263224157f4d0c0e42854169a183d7534 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 16:19:07 -0700 Subject: rcu: Make synchronize_rcu_mult() check for duplicates Currently, doing synchronize_rcu_mult(call_rcu, call_rcu) might (or might not) wait for two RCU grace periods. One approach is of course "don't do that!", but in CONFIG_PREEMPT=n kernels, synchronize_rcu_mult(call_rcu, call_rcu_sched) does exactly that. This results in an ugly #ifdef in sched_cpu_deactivate(). This commit therefore makes __wait_rcu_gp() check for duplicates, which in turn allows duplicates to be passed to synchronize_rcu_mult() without risk of waiting twice on the same type of grace period. Signed-off-by: Paul E. McKenney --- kernel/rcu/update.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 82a5aa10dbc5..123a9c4b5055 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -379,6 +379,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array) { int i; + int j; /* Initialize and register callbacks for each flavor specified. */ for (i = 0; i < n; i++) { @@ -390,7 +391,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, } init_rcu_head_on_stack(&rs_array[i].head); init_completion(&rs_array[i].completion); - (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); + for (j = 0; j < i; j++) + if (crcu_array[j] == crcu_array[i]) + break; + if (j == i) + (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); } /* Wait for all callbacks to be invoked. */ @@ -399,7 +404,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, (crcu_array[i] == call_rcu || crcu_array[i] == call_rcu_bh)) continue; - wait_for_completion(&rs_array[i].completion); + for (j = 0; j < i; j++) + if (crcu_array[j] == crcu_array[i]) + break; + if (j == i) + wait_for_completion(&rs_array[i].completion); destroy_rcu_head_on_stack(&rs_array[i].head); } } -- cgit v1.2.1 From d7d34d5e46140a13f86379c87a196dc8a0b9b585 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 16:33:07 -0700 Subject: sched: Rely on synchronize_rcu_mult() de-duplication The synchronize_rcu_mult() function now detects duplicate requests for the same grace-period flavor and waits only once for each flavor. This commit therefore removes the ugly #ifdef from sched_cpu_deactivate() because synchronize_rcu_mult(call_rcu, call_rcu_sched) now does what the #ifdef used to be needed for. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner --- kernel/sched/core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 803c3bc274c4..e91138fcde86 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5874,15 +5874,9 @@ int sched_cpu_deactivate(unsigned int cpu) * users of this state to go away such that all new such users will * observe it. * - * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might - * not imply sync_sched(), so wait for both. - * * Do sync before park smpboot threads to take care the rcu boost case. */ - if (IS_ENABLED(CONFIG_PREEMPT)) - synchronize_rcu_mult(call_rcu, call_rcu_sched); - else - synchronize_rcu(); + synchronize_rcu_mult(call_rcu, call_rcu_sched); if (!sched_smp_initialized) return 0; -- cgit v1.2.1 From 511324e462a12ea8be1a7e5fc63a992134db80d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 17:04:09 -0700 Subject: rcu: Use RCU_NOCB_WAKE rather than RCU_NOGP_WAKE The RCU_NOGP_WAKE_NOT, RCU_NOGP_WAKE, and RCU_NOGP_WAKE_FORCE flags are used to mediate wakeups for the no-CBs CPU kthreads. The "NOGP" really doesn't make any sense, so this commit does s/NOGP/NOCB/. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 6 +++--- kernel/rcu/tree_plugin.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 0fa7aee9ef55..ddfa34d020ba 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -312,9 +312,9 @@ struct rcu_data { }; /* Values for nocb_defer_wakeup field in struct rcu_data. */ -#define RCU_NOGP_WAKE_NOT 0 -#define RCU_NOGP_WAKE 1 -#define RCU_NOGP_WAKE_FORCE 2 +#define RCU_NOCB_WAKE_NOT 0 +#define RCU_NOCB_WAKE 1 +#define RCU_NOCB_WAKE_FORCE 2 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index ee7cea75273e..0b1042545116 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1901,7 +1901,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE); /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, @@ -1915,7 +1915,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { - WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE); /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, @@ -2242,8 +2242,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) if (!rcu_nocb_need_deferred_wakeup(rdp)) return; ndw = READ_ONCE(rdp->nocb_defer_wakeup); - WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT); - wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); + wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); } -- cgit v1.2.1 From 6b5fc3a1331810db407c9e0e673dc1837afdc9d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Apr 2017 20:11:09 -0700 Subject: rcu: Add memory barriers for NOCB leader wakeup Wait/wakeup operations do not guarantee ordering on their own. Instead, either locking or memory barriers are required. This commit therefore adds memory barriers to wake_nocb_leader() and nocb_leader_wait(). Signed-off-by: Paul E. McKenney Tested-by: Krister Johansen Cc: # 4.6.x --- kernel/rcu/tree_plugin.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0b1042545116..573fbe9640a0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1810,6 +1810,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { /* Prior smp_mb__after_atomic() orders against prior enqueue. */ WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); + smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ swake_up(&rdp_leader->nocb_wq); } } @@ -2064,6 +2065,7 @@ wait_again: * nocb_gp_head, where they await a grace period. */ gotcbs = false; + smp_mb(); /* wakeup before ->nocb_head reads. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); if (!rdp->nocb_gp_head) -- cgit v1.2.1 From aaaad0bfac019bb7701f92ebc1b31b4f85e47b55 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 2 May 2017 09:39:09 -0700 Subject: rcu: Flag need for rcu_node_tree.h and rcu_segcblist.h visibility The rcu_node_tree.h and rcu_segcblist.h header files in the include/linux directory might appear at first sight to be internal to the RCU implementation. However, the definitions in these files are needed to determine the size of TREE SRCU's srcu_struct structure, so they must be externally visible, which is why they live in include/linux. This commit adds comments to this effect to those files. Signed-off-by: Paul E. McKenney --- include/linux/rcu_node_tree.h | 4 ++++ include/linux/rcu_segcblist.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h index 4b766b61e1a0..426cee67f0e2 100644 --- a/include/linux/rcu_node_tree.h +++ b/include/linux/rcu_node_tree.h @@ -7,6 +7,10 @@ * unlimited scalability while maintaining a constant level of contention * on the root node. * + * This seemingly RCU-private file must be available to SRCU users + * because the size of the TREE SRCU srcu_struct structure depends + * on these definitions. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index ba4d2621d9ca..c3ad00e63556 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -1,6 +1,10 @@ /* * RCU segmented callback lists * + * This seemingly RCU-private file must be available to SRCU users + * because the size of the TREE SRCU srcu_struct structure depends + * on these definitions. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or -- cgit v1.2.1 From a68a2bb28bbf7a6dd4672a25bd87fd1b5db4fa7d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 08:34:57 -0700 Subject: rcu: Move docbook comments out of rcupdate.h The include/linux/rcupdate.h file is included by more than 200 files, so shrinking it should provide some build-time benefits. This commit therefore moves several docbook comments from rcupdate.h to kernel/rcu/update.c, kernel/rcu/tree.c, and kernel/rcu/tree_plugin.h, thus reducing the number of times that the compiler has to scan these comments. This likely provides only a small benefit, but every little bit helps. This commit also fixes a malformed bulleted list noted by the 0day Test Robot. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 117 ++--------------------------------------------- kernel/rcu/tree.c | 42 +++++++++++++++-- kernel/rcu/tree_plugin.h | 33 ++++++++++++- kernel/rcu/update.c | 20 ++++++-- 4 files changed, 89 insertions(+), 123 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 393e461d3ea8..7a206f039fc2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -140,115 +140,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename, /* Exported common interfaces */ #ifdef CONFIG_PREEMPT_RCU - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - */ -void call_rcu(struct rcu_head *head, - rcu_callback_t func); - +void call_rcu(struct rcu_head *head, rcu_callback_t func); #else /* #ifdef CONFIG_PREEMPT_RCU */ - -/* In classic RCU, call_rcu() is just call_rcu_sched(). */ #define call_rcu call_rcu_sched - #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ -/** - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ -void call_rcu_bh(struct rcu_head *head, - rcu_callback_t func); - -/** - * call_rcu_sched() - Queue an RCU for invocation after sched grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_sched() assumes - * that the read-side critical sections end on enabling of preemption - * or on voluntary preemption. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), - * OR - * anything that disables preemption. - * These may be nested. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ -void call_rcu_sched(struct rcu_head *head, - rcu_callback_t func); - +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); void synchronize_sched(void); - -/** - * call_rcu_tasks() - Queue an RCU for invocation task-based grace period - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_tasks() assumes - * that the read-side critical sections end at a voluntary context - * switch (not a preemption!), entry into idle, or transition to usermode - * execution. As such, there are no read-side primitives analogous to - * rcu_read_lock() and rcu_read_unlock() because this primitive is intended - * to determine that all tasks have passed through a safe state, not so - * much for data-strcuture synchronization. - * - * See the description of call_rcu() for more detailed information on - * memory ordering guarantees. - */ void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); @@ -474,18 +373,8 @@ extern struct lockdep_map rcu_bh_lock_map; extern struct lockdep_map rcu_sched_lock_map; extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); - int rcu_read_lock_held(void); int rcu_read_lock_bh_held(void); - -/** - * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? - * - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an - * RCU-sched read-side critical section. In absence of - * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side - * critical section unless it can prove otherwise. - */ int rcu_read_lock_sched_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 121c1436a7f3..5ebc830297c1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3223,8 +3223,24 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, local_irq_restore(flags); } -/* - * Queue an RCU-sched callback for invocation after a grace period. +/** + * call_rcu_sched() - Queue an RCU for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_sched() assumes + * that the read-side critical sections end on enabling of preemption + * or on voluntary preemption. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR + * - anything that disables preemption. + * + * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) { @@ -3232,8 +3248,26 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) } EXPORT_SYMBOL_GPL(call_rcu_sched); -/* - * Queue an RCU callback for invocation after a quicker grace period. +/** + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. + * OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) { diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 573fbe9640a0..116cf8339826 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -675,8 +675,37 @@ static void rcu_preempt_do_callbacks(void) #endif /* #ifdef CONFIG_RCU_BOOST */ -/* - * Queue a preemptible-RCU callback for invocation after a grace period. +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 123a9c4b5055..84dec2c8ad1b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -576,9 +576,23 @@ module_param(rcu_task_stall_timeout, int, 0644); static void rcu_spawn_tasks_kthread(void); static struct task_struct *rcu_tasks_kthread_ptr; -/* - * Post an RCU-tasks callback. First call must be from process context - * after the scheduler if fully operational. +/** + * call_rcu_tasks() - Queue an RCU for invocation task-based grace period + * @rhp: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_tasks() assumes + * that the read-side critical sections end at a voluntary context + * switch (not a preemption!), entry into idle, or transition to usermode + * execution. As such, there are no read-side primitives analogous to + * rcu_read_lock() and rcu_read_unlock() because this primitive is intended + * to determine that all tasks have passed through a safe state, not so + * much for data-strcuture synchronization. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) { -- cgit v1.2.1 From 3caec62fbb313946b9be53720bbf2280bb19ec28 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 09:27:15 -0700 Subject: rcu: Move rcu_expedited and rcu_normal externs from rcupdate.h The rcu_expedited and rcu_normal variables are used only by sysctl and kernel/rcu/update.c, so it does not make sense to their extern declarations in rcupdate.h. This commit therefore moves these extern declarations to update.c. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 7 ------- kernel/rcu/update.c | 2 ++ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7a206f039fc2..6e7e930c1610 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,13 +46,6 @@ #include #include -#include - -#ifndef CONFIG_TINY_RCU -extern int rcu_expedited; /* for sysctl */ -extern int rcu_normal; /* also for sysctl */ -#endif /* #ifndef CONFIG_TINY_RCU */ - #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 84dec2c8ad1b..00e77c470017 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -62,7 +62,9 @@ #define MODULE_PARAM_PREFIX "rcupdate." #ifndef CONFIG_TINY_RCU +extern int rcu_expedited; /* from sysctl */ module_param(rcu_expedited, int, 0); +extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); static int rcu_normal_after_boot; module_param(rcu_normal_after_boot, int, 0); -- cgit v1.2.1 From 25c36329a30c8cac090effe1fbae9bb916fa95fe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 09:51:55 -0700 Subject: rcu: Move expediting-related access/control out of rcupdate.h The rcu_gp_is_normal(), rcu_gp_is_expedited(), rcu_expedite_gp(), and rcu_unexpedite_gp() functions are intended only for use within the RCU implementation itself -- the sysfs access is what should be used outside of RCU. This commit therefore moves the declarations for these functions to kernel/rcu/rcu.h, and also includes this file into kernel/rcu/rcutorture.c and kernel/rcu/rcuperf.c. This also has the beneficial effect of shrinking rcupdate.c a bit. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 26 -------------------------- kernel/rcu/rcu.h | 26 ++++++++++++++++++++++++++ kernel/rcu/rcuperf.c | 2 ++ kernel/rcu/rcutorture.c | 2 ++ 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6e7e930c1610..049c62c59f1b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,32 +46,6 @@ #include #include -#ifdef CONFIG_TINY_RCU -/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ -static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ -{ - return true; -} -static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ -{ - return false; -} - -static inline void rcu_expedite_gp(void) -{ -} - -static inline void rcu_unexpedite_gp(void) -{ -} -#else /* #ifdef CONFIG_TINY_RCU */ -bool rcu_gp_is_normal(void); /* Internal RCU use. */ -bool rcu_gp_is_expedited(void); /* Internal RCU use. */ -void rcu_expedite_gp(void); -void rcu_unexpedite_gp(void); -void rcupdate_announce_bootup_oddness(void); -#endif /* #else #ifdef CONFIG_TINY_RCU */ - enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 73e16ec4054b..ceb78110db1b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -293,4 +293,30 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) #endif /* #if defined(SRCU) || !defined(TINY_RCU) */ +#ifdef CONFIG_TINY_RCU +/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ +{ + return true; +} +static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ +{ + return false; +} + +static inline void rcu_expedite_gp(void) +{ +} + +static inline void rcu_unexpedite_gp(void) +{ +} +#else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_normal(void); /* Internal RCU use. */ +bool rcu_gp_is_expedited(void); /* Internal RCU use. */ +void rcu_expedite_gp(void); +void rcu_unexpedite_gp(void); +void rcupdate_announce_bootup_oddness(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index d80f11d9f8bd..3cc18110b612 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -48,6 +48,8 @@ #include #include +#include "rcu.h" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index a58592b73f19..03cdf79e73d4 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -52,6 +52,8 @@ #include #include +#include "rcu.h" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); -- cgit v1.2.1 From cad7b3897279c869de61dc88133037b941f84233 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:22:57 -0700 Subject: rcu: Move torture-related definitions from rcupdate.h to rcu.h The include/linux/rcupdate.h file contains a number of definitions that are used only to communicate between rcutorture, rcuperf, and the RCU code itself. There is no point in having these definitions exposed globally throughout the kernel, so this commit moves them to kernel/rcu/rcu.h. This change has the added benefit of shrinking rcupdate.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 52 --------------------------- include/linux/rcutiny.h | 5 +++ include/linux/rcutree.h | 1 + include/linux/srcuclassic.h | 14 -------- include/linux/srcutiny.h | 12 ------- include/linux/srcutree.h | 4 --- kernel/rcu/rcu.h | 85 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 91 insertions(+), 82 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 049c62c59f1b..7557499d8e70 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,58 +46,6 @@ #include #include -enum rcutorture_type { - RCU_FLAVOR, - RCU_BH_FLAVOR, - RCU_SCHED_FLAVOR, - RCU_TASKS_FLAVOR, - SRCU_FLAVOR, - INVALID_RCU_FLAVOR -}; - -#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) -void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, - unsigned long *gpnum, unsigned long *completed); -void rcutorture_record_test_transition(void); -void rcutorture_record_progress(unsigned long vernum); -void do_trace_rcu_torture_read(const char *rcutorturename, - struct rcu_head *rhp, - unsigned long secs, - unsigned long c_old, - unsigned long c); -bool rcu_irq_enter_disabled(void); -#else -static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, - int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - *flags = 0; - *gpnum = 0; - *completed = 0; -} -static inline void rcutorture_record_test_transition(void) -{ -} -static inline void rcutorture_record_progress(unsigned long vernum) -{ -} -static inline bool rcu_irq_enter_disabled(void) -{ - return false; -} -#ifdef CONFIG_RCU_TRACE -void do_trace_rcu_torture_read(const char *rcutorturename, - struct rcu_head *rhp, - unsigned long secs, - unsigned long c_old, - unsigned long c); -#else -#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ - do { } while (0) -#endif -#endif - #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 74d9c3a1feee..ade360e0d58c 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -202,6 +202,11 @@ static inline void rcu_irq_enter(void) { } +static inline bool rcu_irq_enter_disabled(void) +{ + return false; +} + static inline void rcu_irq_exit_irqson(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 0bacb6b2af69..28af91a19573 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -101,6 +101,7 @@ void rcu_irq_enter(void); void rcu_irq_exit(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); +bool rcu_irq_enter_disabled(void); void exit_rcu(void); diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 5753f7322262..41cf99930f34 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -98,18 +98,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); -static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - if (test_type != SRCU_FLAVOR) - return; - *flags = 0; - *completed = sp->completed; - *gpnum = *completed; - if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) - (*gpnum)++; -} - #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b6edd9c8fdce..85bddce6a7a6 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -93,16 +93,4 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) return 0; } -static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - if (test_type != SRCU_FLAVOR) - return; - *flags = 0; - *completed = sp->srcu_gp_seq; - *gpnum = *completed; -} - #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 32e86d85fd11..f4adfed17b51 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -143,8 +143,4 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); -void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, unsigned long *completed); - #endif diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index ceb78110db1b..f190fc1c8215 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -319,4 +319,89 @@ void rcu_unexpedite_gp(void); void rcupdate_announce_bootup_oddness(void); #endif /* #else #ifdef CONFIG_TINY_RCU */ +enum rcutorture_type { + RCU_FLAVOR, + RCU_BH_FLAVOR, + RCU_SCHED_FLAVOR, + RCU_TASKS_FLAVOR, + SRCU_FLAVOR, + INVALID_RCU_FLAVOR +}; + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) +void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, + unsigned long *gpnum, unsigned long *completed); +void rcutorture_record_test_transition(void); +void rcutorture_record_progress(unsigned long vernum); +void do_trace_rcu_torture_read(const char *rcutorturename, + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); +#else +static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, + int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + *flags = 0; + *gpnum = 0; + *completed = 0; +} +static inline void rcutorture_record_test_transition(void) +{ +} +static inline void rcutorture_record_progress(unsigned long vernum) +{ +} +#ifdef CONFIG_RCU_TRACE +void do_trace_rcu_torture_read(const char *rcutorturename, + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); +#else +#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) +#endif +#endif + +#ifdef CONFIG_TINY_SRCU + +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->srcu_gp_seq; + *gpnum = *completed; +} + +#elif defined(CONFIG_TREE_SRCU) + +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed); + +#elif defined(CONFIG_CLASSIC_SRCU) + +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->completed; + *gpnum = *completed; + if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) + (*gpnum)++; +} + +#endif + #endif /* __LINUX_RCU_H */ -- cgit v1.2.1 From c4cbf9f736f5bd0a53a5ea401d86376c86bf905e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:36:36 -0700 Subject: rcu: Remove UINT_CMP_GE() and UINT_CMP_LT() The UINT_CMP_GE() and UINT_CMP_LT() macros are not used, so this commit removes them. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7557499d8e70..fa3f921e5874 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,8 +46,6 @@ #include #include -#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) -#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) -- cgit v1.2.1 From d0df7a349133e10184d478ae1189e79e5c53615d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 10:52:10 -0700 Subject: rcu: Move rcupdate.h to new empty-function style This commit saves a few lines in include/linux/rcupdate.h by moving to single-line definitions for empty functions, instead of the old style where the two curly braces each get their own line. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fa3f921e5874..415633076cb1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -124,12 +124,8 @@ static inline void rcu_end_inkernel_boot(void) { } void rcu_sysrq_start(void); void rcu_sysrq_end(void); #else /* #ifdef CONFIG_RCU_STALL_COMMON */ -static inline void rcu_sysrq_start(void) -{ -} -static inline void rcu_sysrq_end(void) -{ -} +static inline void rcu_sysrq_start(void) { } +static inline void rcu_sysrq_end(void) { } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ #ifdef CONFIG_NO_HZ_FULL @@ -143,9 +139,7 @@ static inline void rcu_user_exit(void) { } #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static inline void rcu_init_nohz(void) -{ -} +static inline void rcu_init_nohz(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** @@ -243,21 +237,10 @@ void destroy_rcu_head(struct rcu_head *head); void init_rcu_head_on_stack(struct rcu_head *head); void destroy_rcu_head_on_stack(struct rcu_head *head); #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -static inline void init_rcu_head(struct rcu_head *head) -{ -} - -static inline void destroy_rcu_head(struct rcu_head *head) -{ -} - -static inline void init_rcu_head_on_stack(struct rcu_head *head) -{ -} - -static inline void destroy_rcu_head_on_stack(struct rcu_head *head) -{ -} +static inline void init_rcu_head(struct rcu_head *head) { } +static inline void destroy_rcu_head(struct rcu_head *head) { } +static inline void init_rcu_head_on_stack(struct rcu_head *head) { } +static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) @@ -334,9 +317,7 @@ static inline void rcu_preempt_sleep_check(void) "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ -static inline void rcu_preempt_sleep_check(void) -{ -} +static inline void rcu_preempt_sleep_check(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ #define rcu_sleep_check() \ @@ -915,9 +896,7 @@ static inline bool rcu_sys_is_idle(void) return false; } -static inline void rcu_sysidle_force_exit(void) -{ -} +static inline void rcu_sysidle_force_exit(void) { } #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -- cgit v1.2.1 From 791875d16e2f6e2e5b90328ccac643f512ac76c4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:06:05 -0700 Subject: rcu: Eliminate the unused __rcu_is_watching() function The __rcu_is_watching() function is currently not used, aside from to implement the rcu_is_watching() function. This commit therefore eliminates __rcu_is_watching(), which has the beneficial side-effect of shrinking include/linux/rcupdate.h a bit. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ---- include/linux/rcutiny.h | 11 ----------- kernel/rcu/tiny.c | 13 ------------- kernel/rcu/tree.c | 19 ++++--------------- 4 files changed, 4 insertions(+), 43 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 415633076cb1..b4edfe0966c6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -204,10 +204,6 @@ do { \ rcu_note_voluntary_context_switch(current); \ } while (0) -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) -bool __rcu_is_watching(void); -#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ - /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ade360e0d58c..5ed6934152a6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -232,22 +232,11 @@ static inline void rcu_scheduler_starting(void) } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) - -static inline bool rcu_is_watching(void) -{ - return __rcu_is_watching(); -} - -#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ - static inline bool rcu_is_watching(void) { return true; } -#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ - static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index e5385731e391..2306cab2195d 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -59,19 +59,6 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL(rcu_barrier_sched); -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) - -/* - * Test whether RCU thinks that the current CPU is idle. - */ -bool notrace __rcu_is_watching(void) -{ - return true; -} -EXPORT_SYMBOL(__rcu_is_watching); - -#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ - /* * Helper function for rcu_sched_qs() and rcu_bh_qs(). * Also irqs are disabled to avoid confusion due to interrupt handlers diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5ebc830297c1..61a97164abcc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1138,23 +1138,12 @@ void rcu_nmi_exit(void) rcu_dynticks_eqs_enter(); } -/** - * __rcu_is_watching - are RCU read-side critical sections safe? - * - * Return true if RCU is watching the running CPU, which means that - * this CPU can safely enter RCU read-side critical sections. Unlike - * rcu_is_watching(), the caller of __rcu_is_watching() must have at - * least disabled preemption. - */ -bool notrace __rcu_is_watching(void) -{ - return !rcu_dynticks_curr_cpu_in_eqs(); -} - /** * rcu_is_watching - see if RCU thinks that the current CPU is idle * - * If the current CPU is in its idle loop and is neither in an interrupt + * Return true if RCU is watching the running CPU, which means that this + * CPU can safely enter RCU read-side critical sections. In other words, + * if the current CPU is in its idle loop and is neither in an interrupt * or NMI handler, return true. */ bool notrace rcu_is_watching(void) @@ -1162,7 +1151,7 @@ bool notrace rcu_is_watching(void) bool ret; preempt_disable_notrace(); - ret = __rcu_is_watching(); + ret = !rcu_dynticks_curr_cpu_in_eqs(); preempt_enable_notrace(); return ret; } -- cgit v1.2.1 From 82118249d0ca4078d56d5e43172ada1567fdf946 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:13:24 -0700 Subject: rcu: Move the RCU_SCHEDULER_ definitions from rcupdate.h The RCU_SCHEDULER_INACTIVE, RCU_SCHEDULER_INIT, and RCU_SCHEDULER_RUNNING definitions are used only within RCU, so this commit moves them from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ---- kernel/rcu/rcu.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b4edfe0966c6..9206a28a2d44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -217,10 +217,6 @@ do { \ #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_SCHEDULER_INACTIVE 0 -#define RCU_SCHEDULER_INIT 1 -#define RCU_SCHEDULER_RUNNING 2 - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f190fc1c8215..17fee2a667d9 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -319,6 +319,10 @@ void rcu_unexpedite_gp(void); void rcupdate_announce_bootup_oddness(void); #endif /* #else #ifdef CONFIG_TINY_RCU */ +#define RCU_SCHEDULER_INACTIVE 0 +#define RCU_SCHEDULER_INIT 1 +#define RCU_SCHEDULER_RUNNING 2 + enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, -- cgit v1.2.1 From 752de307b0ee47308bfc299de3a3ad623c16b4d8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:18:01 -0700 Subject: rcu: Remove linux/debugobjects.h from rcupdate.h The include/linux/rcupdate.h file does not actually need anything from linux/debugobjects.h, so this commit removes the inclusion. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9206a28a2d44..f105f0834bbe 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.1 From fa3c66476975abf00c97f27b6c2b3d223f7d57f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 11:38:55 -0700 Subject: rcu: Improve __call_rcu() debug-objects error message The "__call_rcu(): Leaked duplicate callback" error message from __call_rcu() has proven to be unhelpful. This commit therefore changes it to "__call_rcu(): Double-freed CB" and adds the value of the pointer passed in. The value of the pointer improves debuggability by allowing correlation with tracing output, for example, the rcu:rcu_callback trace event. Reported-by: Vegard Nossum Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 61a97164abcc..cac24f5d3fd2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3161,9 +3161,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); if (debug_rcu_head_queue(head)) { - /* Probable double call_rcu(), so leak the callback. */ + /* + * Probable double call_rcu(), so leak the callback. + * Use rcu:rcu_callback trace event to find the previous + * time callback was passed to __call_rcu(). + */ + WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n", + head, head->func); WRITE_ONCE(head->func, rcu_leak_callback); - WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); return; } head->func = func; -- cgit v1.2.1 From 3d54f7983f3e6ac9f444fa20970b1abc8f089b79 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:25:50 -0700 Subject: rcu: Move rcu_is_nocb_cpu() from rcupdate.h to rcu.h The rcu_is_nocb_cpu() function is used only internally to RCU. This commit therefore moves its declaration from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 --------- kernel/rcu/rcu.h | 8 ++++++++ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f105f0834bbe..003427425e27 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -867,15 +867,6 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) } #endif /* #ifdef CONFIG_TINY_RCU */ -#if defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline bool rcu_is_nocb_cpu(int cpu) { return true; } -#elif defined(CONFIG_RCU_NOCB_CPU) -bool rcu_is_nocb_cpu(int cpu); -#else -static inline bool rcu_is_nocb_cpu(int cpu) { return false; } -#endif - - /* Only for use by adaptive-ticks code. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE bool rcu_sys_is_idle(void); diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 17fee2a667d9..2f344662c568 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -408,4 +408,12 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, #endif +#if defined(CONFIG_RCU_NOCB_CPU_ALL) +static inline bool rcu_is_nocb_cpu(int cpu) { return true; } +#elif defined(CONFIG_RCU_NOCB_CPU) +bool rcu_is_nocb_cpu(int cpu); +#else +static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +#endif + #endif /* __LINUX_RCU_H */ -- cgit v1.2.1 From b8989b76052eedc99b09322efd6f68816f191a1a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:28:59 -0700 Subject: rcu: Move rcu_ftrace_dump() from rcupdate.h to rcu.h The rcu_ftrace_dump() function is used only internally to RCU. This commit therefore moves its declaration from include/linux/rcupdate.h to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 12 ------------ kernel/rcu/rcu.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 003427425e27..ad5e6934dcf3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -883,18 +883,6 @@ static inline void rcu_sysidle_force_exit(void) { } #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -/* - * Dump the ftrace buffer, but only one time per callsite per boot. - */ -#define rcu_ftrace_dump(oops_dump_mode) \ -do { \ - static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ - \ - if (!atomic_read(&___rfd_beenhere) && \ - !atomic_xchg(&___rfd_beenhere, 1)) \ - ftrace_dump(oops_dump_mode); \ -} while (0) - /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 2f344662c568..cdbaa441bdac 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -212,6 +212,18 @@ int rcu_jiffies_till_stall_check(void); */ #define TPS(x) tracepoint_string(x) +/* + * Dump the ftrace buffer, but only one time per callsite per boot. + */ +#define rcu_ftrace_dump(oops_dump_mode) \ +do { \ + static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \ + \ + if (!atomic_read(&___rfd_beenhere) && \ + !atomic_xchg(&___rfd_beenhere, 1)) \ + ftrace_dump(oops_dump_mode); \ +} while (0) + void rcu_early_boot_tests(void); void rcu_test_sync_prims(void); -- cgit v1.2.1 From 17a8c187310ccc5f5b65a7d8faf96fdc66c5fe3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 12:32:55 -0700 Subject: rcu: move rcupdate.h to the new true/false-function style This commit saves a few lines in include/linux/rcupdate.h by moving to single-line definitions for functions that just return either true or false, instead of the old style where the two curly braces each get their own line. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ad5e6934dcf3..564096e6e141 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -237,10 +237,7 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { } #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) bool rcu_lockdep_current_cpu_online(void); #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ -static inline bool rcu_lockdep_current_cpu_online(void) -{ - return true; -} +static inline bool rcu_lockdep_current_cpu_online(void) { return true; } #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -872,14 +869,8 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) bool rcu_sys_is_idle(void); void rcu_sysidle_force_exit(void); #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - -static inline bool rcu_sys_is_idle(void) -{ - return false; -} - +static inline bool rcu_sys_is_idle(void) { return false; } static inline void rcu_sysidle_force_exit(void) { } - #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -- cgit v1.2.1 From e3c8d51e1a58c73a557eb38a9a6afb4f704a3379 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:37:16 -0700 Subject: rcu: Move torture-related functions out of rcutiny.h and rcutree.h The various functions similar to rcu_batches_started(), the function show_rcu_gp_kthreads(), the various functions similar to rcu_force_quiescent_state(), and the variables rcutorture_testseq and rcutorture_vernum are used only within RCU. There is therefore no point in exporting them to the kernel at large from include/linux/rcutiny.h and include/linux/rcutree.h. This commit therefore moves all of these to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 80 --------------------------------------- include/linux/rcutree.h | 16 -------- kernel/rcu/rcu.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 96 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5ed6934152a6..0d9270913686 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -106,86 +106,6 @@ static inline void rcu_virt_note_context_switch(int cpu) { } -/* - * Return the number of grace periods started. - */ -static inline unsigned long rcu_batches_started(void) -{ - return 0; -} - -/* - * Return the number of bottom-half grace periods started. - */ -static inline unsigned long rcu_batches_started_bh(void) -{ - return 0; -} - -/* - * Return the number of sched grace periods started. - */ -static inline unsigned long rcu_batches_started_sched(void) -{ - return 0; -} - -/* - * Return the number of grace periods completed. - */ -static inline unsigned long rcu_batches_completed(void) -{ - return 0; -} - -/* - * Return the number of bottom-half grace periods completed. - */ -static inline unsigned long rcu_batches_completed_bh(void) -{ - return 0; -} - -/* - * Return the number of sched grace periods completed. - */ -static inline unsigned long rcu_batches_completed_sched(void) -{ - return 0; -} - -/* - * Return the number of expedited grace periods completed. - */ -static inline unsigned long rcu_exp_batches_completed(void) -{ - return 0; -} - -/* - * Return the number of expedited sched grace periods completed. - */ -static inline unsigned long rcu_exp_batches_completed_sched(void) -{ - return 0; -} - -static inline void rcu_force_quiescent_state(void) -{ -} - -static inline void rcu_bh_force_quiescent_state(void) -{ -} - -static inline void rcu_sched_force_quiescent_state(void) -{ -} - -static inline void show_rcu_gp_kthreads(void) -{ -} - static inline void rcu_cpu_stall_reset(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 28af91a19573..43113323ca09 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -79,22 +79,6 @@ void cond_synchronize_rcu(unsigned long oldstate); unsigned long get_state_synchronize_sched(void); void cond_synchronize_sched(unsigned long oldstate); -extern unsigned long rcutorture_testseq; -extern unsigned long rcutorture_vernum; -unsigned long rcu_batches_started(void); -unsigned long rcu_batches_started_bh(void); -unsigned long rcu_batches_started_sched(void); -unsigned long rcu_batches_completed(void); -unsigned long rcu_batches_completed_bh(void); -unsigned long rcu_batches_completed_sched(void); -unsigned long rcu_exp_batches_completed(void); -unsigned long rcu_exp_batches_completed_sched(void); -void show_rcu_gp_kthreads(void); - -void rcu_force_quiescent_state(void); -void rcu_bh_force_quiescent_state(void); -void rcu_sched_force_quiescent_state(void); - void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index cdbaa441bdac..d849b371b32b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -420,6 +420,105 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, #endif +#ifdef CONFIG_TINY_RCU + +/* + * Return the number of grace periods started. + */ +static inline unsigned long rcu_batches_started(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods started. + */ +static inline unsigned long rcu_batches_started_bh(void) +{ + return 0; +} + +/* + * Return the number of sched grace periods started. + */ +static inline unsigned long rcu_batches_started_sched(void) +{ + return 0; +} + +/* + * Return the number of grace periods completed. + */ +static inline unsigned long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods completed. + */ +static inline unsigned long rcu_batches_completed_bh(void) +{ + return 0; +} + +/* + * Return the number of sched grace periods completed. + */ +static inline unsigned long rcu_batches_completed_sched(void) +{ + return 0; +} + +/* + * Return the number of expedited grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of expedited sched grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed_sched(void) +{ + return 0; +} + +static inline void rcu_force_quiescent_state(void) +{ +} + +static inline void rcu_bh_force_quiescent_state(void) +{ +} + +static inline void rcu_sched_force_quiescent_state(void) +{ +} + +static inline void show_rcu_gp_kthreads(void) +{ +} + +#else /* #ifdef CONFIG_TINY_RCU */ +extern unsigned long rcutorture_testseq; +extern unsigned long rcutorture_vernum; +unsigned long rcu_batches_started(void); +unsigned long rcu_batches_started_bh(void); +unsigned long rcu_batches_started_sched(void); +unsigned long rcu_batches_completed(void); +unsigned long rcu_batches_completed_bh(void); +unsigned long rcu_batches_completed_sched(void); +unsigned long rcu_exp_batches_completed(void); +unsigned long rcu_exp_batches_completed_sched(void); +void show_rcu_gp_kthreads(void); +void rcu_force_quiescent_state(void); +void rcu_bh_force_quiescent_state(void); +void rcu_sched_force_quiescent_state(void); +#endif /* #else #ifdef CONFIG_TINY_RCU */ + #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } #elif defined(CONFIG_RCU_NOCB_CPU) -- cgit v1.2.1 From fe21a27e8ca0937a5ac298de1f4b46382e9c5c88 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:45:51 -0700 Subject: rcu: Move rcu_request_urgent_qs_task() out of rcutiny.h and rcutree.h The rcu_request_urgent_qs_task() function is used only within RCU, so there is no point in exporting it to the rest of the kernel from nclude/linux/rcutiny.h and include/linux/rcutree.h. This commit therefore moves this function to kernel/rcu/rcu.h. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 4 ---- include/linux/rcutree.h | 3 --- kernel/rcu/rcu.h | 6 ++++++ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 0d9270913686..f5067941bc27 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -157,10 +157,6 @@ static inline bool rcu_is_watching(void) return true; } -static inline void rcu_request_urgent_qs_task(struct task_struct *t) -{ -} - static inline void rcu_all_qs(void) { barrier(); /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 43113323ca09..d6aa89d15d47 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -91,10 +91,7 @@ void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; - bool rcu_is_watching(void); -void rcu_request_urgent_qs_task(struct task_struct *t); - void rcu_all_qs(void); /* RCUtree hotplug events */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index d849b371b32b..5b76a5baff2e 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -335,6 +335,12 @@ void rcupdate_announce_bootup_oddness(void); #define RCU_SCHEDULER_INIT 1 #define RCU_SCHEDULER_RUNNING 2 +#ifdef CONFIG_TINY_RCU +static inline void rcu_request_urgent_qs_task(struct task_struct *t) { } +#else /* #ifdef CONFIG_TINY_RCU */ +void rcu_request_urgent_qs_task(struct task_struct *t); +#endif /* #else #ifdef CONFIG_TINY_RCU */ + enum rcutorture_type { RCU_FLAVOR, RCU_BH_FLAVOR, -- cgit v1.2.1 From 71c40fd0b5ceb300c6cb8753835d9d94a8bfc56f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 13:51:42 -0700 Subject: rcu: Move rcutiny.h to new empty/true/false-function style This commit saves a few lines in include/linux/rcutiny.h by moving to single-line definitions for empty functions, instead of the old style where the two curly braces each get their own line. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 71 +++++++++++-------------------------------------- 1 file changed, 16 insertions(+), 55 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index f5067941bc27..2bfe48bc0e3b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -33,10 +33,8 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) return 0; } -static inline bool rcu_eqs_special_set(int cpu) -{ - return false; /* Never flag non-existent other CPUs! */ -} +/* Never flag non-existent other CPUs! */ +static inline bool rcu_eqs_special_set(int cpu) { return false; } static inline unsigned long get_state_synchronize_rcu(void) { @@ -102,65 +100,28 @@ static inline void kfree_call_rcu(struct rcu_head *head, * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) -{ -} - -static inline void rcu_cpu_stall_reset(void) -{ -} - -static inline void rcu_idle_enter(void) -{ -} - -static inline void rcu_idle_exit(void) -{ -} - -static inline void rcu_irq_enter(void) -{ -} - -static inline bool rcu_irq_enter_disabled(void) -{ - return false; -} - -static inline void rcu_irq_exit_irqson(void) -{ -} - -static inline void rcu_irq_enter_irqson(void) -{ -} - -static inline void rcu_irq_exit(void) -{ -} - -static inline void exit_rcu(void) -{ -} +static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_cpu_stall_reset(void) { } +static inline void rcu_idle_enter(void) { } +static inline void rcu_idle_exit(void) { } +static inline void rcu_irq_enter(void) { } +static inline bool rcu_irq_enter_disabled(void) { return false; } +static inline void rcu_irq_exit_irqson(void) { } +static inline void rcu_irq_enter_irqson(void) { } +static inline void rcu_irq_exit(void) { } +static inline void exit_rcu(void) { } #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) extern int rcu_scheduler_active __read_mostly; void rcu_scheduler_starting(void); #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -static inline void rcu_scheduler_starting(void) -{ -} +static inline void rcu_scheduler_starting(void) { } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ -static inline bool rcu_is_watching(void) -{ - return true; -} +static inline bool rcu_is_watching(void) { return true; } -static inline void rcu_all_qs(void) -{ - barrier(); /* Avoid RCU read-side critical sections leaking across. */ -} +/* Avoid RCU read-side critical sections leaking across. */ +static inline void rcu_all_qs(void) { barrier(); } /* RCUtree hotplug events */ #define rcutree_prepare_cpu NULL -- cgit v1.2.1 From c350c008297643dad3c395c2fd92230142da5cf6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 15:35:32 -0700 Subject: srcu: Prevent sdp->srcu_gp_seq_needed counter wrap If a given CPU never happens to ever start an SRCU grace period, the grace-period sequence counter might wrap. If this CPU were to decide to finally start a grace period, the state of its sdp->srcu_gp_seq_needed might make it appear that it has already requested this grace period, which would prevent starting the grace period. If no other CPU ever started a grace period again, this would look like a grace-period hang. Even if some other CPU took pity and started the needed grace period, the leaf rcu_node structure's ->srcu_data_have_cbs field won't have record of the fact that this CPU has a callback pending, which would look like a very localized grace-period hang. This might seem very unlikely, but SRCU grace periods can take less than a microsecond on small systems, which means that overflow can happen in much less than an hour on a 32-bit embedded system. And embedded systems are especially likely to have long-term idle CPUs. Therefore, it makes sense to prevent this scenario from happening. This commit therefore scans each srcu_data structure occasionally, with frequency controlled by the srcutree.counter_wrap_check kernel boot parameter. This parameter can be set to something like 255 in order to exercise the counter-wrap-prevention code. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ kernel/rcu/srcutree.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 01b5ab92d251..6671f9b60a86 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3810,6 +3810,15 @@ spia_pedr= spia_peddr= + srcutree.counter_wrap_check [KNL] + Specifies how frequently to check for + grace-period sequence counter wrap for the + srcu_data structure's ->srcu_gp_seq_needed field. + The greater the number of bits set in this kernel + parameter, the less frequently counter wrap will + be checked for. Note that the bottom two bits + are ignored. + srcutree.exp_holdoff [KNL] Specifies how many nanoseconds must elapse since the end of the last SRCU grace period for diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c6e2a4a1628b..cc06dbfc9692 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -45,6 +45,10 @@ static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; module_param(exp_holdoff, ulong, 0444); +/* Overflow-check frequency. N bits roughly says every 2**N grace periods. */ +static ulong counter_wrap_check = (ULONG_MAX >> 2); +module_param(counter_wrap_check, ulong, 0444); + static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); @@ -496,10 +500,13 @@ static void srcu_gp_end(struct srcu_struct *sp) { unsigned long cbdelay; bool cbs; + int cpu; + unsigned long flags; unsigned long gpseq; int idx; int idxnext; unsigned long mask; + struct srcu_data *sdp; struct srcu_node *snp; /* Prevent more than one additional grace period. */ @@ -538,6 +545,17 @@ static void srcu_gp_end(struct srcu_struct *sp) smp_mb(); /* GP end before CB invocation. */ srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); } + + /* Occasionally prevent srcu_data counter wrap. */ + if (!(gpseq & counter_wrap_check)) + for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { + sdp = per_cpu_ptr(sp->sda, cpu); + spin_lock_irqsave(&sdp->lock, flags); + if (ULONG_CMP_GE(gpseq, + sdp->srcu_gp_seq_needed + 100)) + sdp->srcu_gp_seq_needed = gpseq; + spin_unlock_irqrestore(&sdp->lock, flags); + } } /* Callback initiation done, allow grace periods after next. */ -- cgit v1.2.1 From 5a0465e17a18c467b712a816985b7b8dd8d10c16 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 May 2017 11:31:04 -0700 Subject: srcu: Shrink srcu.h by moving docbook and private function The call_srcu() docbook entry is currently in include/linux/srcu.h, which causes needless processing for each include point. This commit therefore moves this entry to kernel/rcu/srcutree.c, which the compiler reads only once. In addition, the srcu_batches_completed() function is used only within RCU and its torture-test suites. This commit therefore also moves this function's declaration from include/linux/srcutiny.h, include/linux/srcutree.h, and include/linux/srcuclassic.h to kernel/rcu/rcu.h. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 20 -------------------- include/linux/srcuclassic.h | 1 - include/linux/srcutiny.h | 5 ----- include/linux/srcutree.h | 1 - kernel/rcu/rcu.h | 6 ++++++ kernel/rcu/srcutree.c | 17 +++++++++++++++++ 6 files changed, 23 insertions(+), 27 deletions(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ea356d800675..5f509018e6b5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -65,32 +65,12 @@ int init_srcu_struct(struct srcu_struct *sp); #elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" #else - /* Dummy definition for things like notifiers. Actual use gets link error. */ struct srcu_struct { }; - #endif -/** - * call_srcu() - Queue a callback for invocation after an SRCU grace period - * @sp: srcu_struct in queue the callback - * @head: structure to be used for queueing the SRCU callback. - * @func: function to be invoked after the SRCU grace period - * - * The callback function will be invoked some time after a full SRCU - * grace period elapses, in other words after all pre-existing SRCU - * read-side critical sections have completed. However, the callback - * function might well execute concurrently with other SRCU read-side - * critical sections that started after call_srcu() was invoked. SRCU - * read-side critical sections are delimited by srcu_read_lock() and - * srcu_read_unlock(), and may be nested. - * - * The callback will be invoked from process context, but must nevertheless - * be fast and must not block. - */ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, void (*func)(struct rcu_head *head)); - void cleanup_srcu_struct(struct srcu_struct *sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 41cf99930f34..67db4a36ef0d 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -96,6 +96,5 @@ void process_srcu(struct work_struct *work); void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 85bddce6a7a6..4c53e698c6e4 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -88,9 +88,4 @@ static inline void srcu_barrier(struct srcu_struct *sp) synchronize_srcu(sp); } -static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) -{ - return 0; -} - #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index f4adfed17b51..24e949bda12a 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -141,6 +141,5 @@ void process_srcu(struct work_struct *work); void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 5b76a5baff2e..74d9fc205313 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -492,6 +492,11 @@ static inline unsigned long rcu_exp_batches_completed_sched(void) return 0; } +static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) +{ + return 0; +} + static inline void rcu_force_quiescent_state(void) { } @@ -519,6 +524,7 @@ unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); unsigned long rcu_exp_batches_completed(void); unsigned long rcu_exp_batches_completed_sched(void); +unsigned long srcu_batches_completed(struct srcu_struct *sp); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index cc06dbfc9692..66a998f9c5a7 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -854,6 +854,23 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, srcu_funnel_exp_start(sp, sdp->mynode, s); } +/** + * call_srcu() - Queue a callback for invocation after an SRCU grace period + * @sp: srcu_struct in queue the callback + * @head: structure to be used for queueing the SRCU callback. + * @func: function to be invoked after the SRCU grace period + * + * The callback function will be invoked some time after a full SRCU + * grace period elapses, in other words after all pre-existing SRCU + * read-side critical sections have completed. However, the callback + * function might well execute concurrently with other SRCU read-side + * critical sections that started after call_srcu() was invoked. SRCU + * read-side critical sections are delimited by srcu_read_lock() and + * srcu_read_unlock(), and may be nested. + * + * The callback will be invoked from process context, but must nevertheless + * be fast and must not block. + */ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rcu_callback_t func) { -- cgit v1.2.1 From 2464dd940e23bad227c387a40eec99f7aa02ed96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 May 2017 14:29:16 -0700 Subject: srcu: Apply trivial callback lists to shrink Tiny SRCU The rcu_segcblist structure provides quite a bit of functionality, and Tiny SRCU needs almost none of it. So this commit replaces Tiny SRCU's uses of rcu_segcblist with a simple singly linked list with tail pointer. This change significantly reduces Tiny SRCU's memory footprint, more than making up for the growth caused by the creation of rcu_segcblist.c Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 7 +++-- init/Kconfig | 2 +- kernel/rcu/rcu.h | 2 +- kernel/rcu/srcutiny.c | 70 ++++++++++++++++++++++-------------------------- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4c53e698c6e4..cfbfc540cafc 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -33,9 +33,8 @@ struct srcu_struct { u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; /* Last srcu_read_unlock() wakes GP. */ - unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ - struct rcu_segcblist srcu_cblist; - /* Pending SRCU callbacks. */ + struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ + struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ struct work_struct srcu_work; /* For driving grace periods. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -47,7 +46,7 @@ void srcu_drive_gp(struct work_struct *wp); #define __SRCU_STRUCT_INIT(name) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ - .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist), \ + .srcu_cb_tail = &name.srcu_cb_head, \ .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/init/Kconfig b/init/Kconfig index d928a3724af9..a2cfde19e8b8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -573,7 +573,7 @@ config RCU_STALL_COMMON making these warnings mandatory for the tree variants. config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU ) + def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU ) config CONTEXT_TRACKING bool diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 74d9fc205313..6a1e85bd2eac 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -398,7 +398,7 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, if (test_type != SRCU_FLAVOR) return; *flags = 0; - *completed = sp->srcu_gp_seq; + *completed = sp->srcu_idx; *gpnum = *completed; } diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 988543721d5d..1a1c1047d2ed 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -38,8 +38,8 @@ static int init_srcu_struct_fields(struct srcu_struct *sp) sp->srcu_lock_nesting[0] = 0; sp->srcu_lock_nesting[1] = 0; init_swait_queue_head(&sp->srcu_wq); - sp->srcu_gp_seq = 0; - rcu_segcblist_init(&sp->srcu_cblist); + sp->srcu_cb_head = NULL; + sp->srcu_cb_tail = &sp->srcu_cb_head; sp->srcu_gp_running = false; sp->srcu_gp_waiting = false; sp->srcu_idx = 0; @@ -88,10 +88,10 @@ void cleanup_srcu_struct(struct srcu_struct *sp) { WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); flush_work(&sp->srcu_work); - WARN_ON(rcu_seq_state(sp->srcu_gp_seq)); WARN_ON(sp->srcu_gp_running); WARN_ON(sp->srcu_gp_waiting); - WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)); + WARN_ON(sp->srcu_cb_head); + WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); @@ -117,52 +117,44 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); void srcu_drive_gp(struct work_struct *wp) { int idx; - struct rcu_cblist ready_cbs; - struct srcu_struct *sp; + struct rcu_head *lh; struct rcu_head *rhp; + struct srcu_struct *sp; sp = container_of(wp, struct srcu_struct, srcu_work); - if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist)) + if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head)) return; /* Already running or nothing to do. */ - /* Tag recently arrived callbacks and wait for readers. */ + /* Remove recently arrived callbacks and wait for readers. */ WRITE_ONCE(sp->srcu_gp_running, true); - rcu_segcblist_accelerate(&sp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); - rcu_seq_start(&sp->srcu_gp_seq); + local_irq_disable(); + lh = sp->srcu_cb_head; + sp->srcu_cb_head = NULL; + sp->srcu_cb_tail = &sp->srcu_cb_head; + local_irq_enable(); idx = sp->srcu_idx; WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ - rcu_seq_end(&sp->srcu_gp_seq); - - /* Update callback list based on GP, and invoke ready callbacks. */ - rcu_segcblist_advance(&sp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); - if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { - rcu_cblist_init(&ready_cbs); - local_irq_disable(); - rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs); - local_irq_enable(); - rhp = rcu_cblist_dequeue(&ready_cbs); - for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { - local_bh_disable(); - rhp->func(rhp); - local_bh_enable(); - } - local_irq_disable(); - rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs); - local_irq_enable(); + + /* Invoke the callbacks we removed above. */ + while (lh) { + rhp = lh; + lh = lh->next; + local_bh_disable(); + rhp->func(rhp); + local_bh_enable(); } - WRITE_ONCE(sp->srcu_gp_running, false); /* - * If more callbacks, reschedule ourselves. This can race with - * a call_srcu() at interrupt level, but the ->srcu_gp_running - * checks will straighten that out. + * Enable rescheduling, and if there are more callbacks, + * reschedule ourselves. This can race with a call_srcu() + * at interrupt level, but the ->srcu_gp_running checks will + * straighten that out. */ - if (!rcu_segcblist_empty(&sp->srcu_cblist)) + WRITE_ONCE(sp->srcu_gp_running, false); + if (READ_ONCE(sp->srcu_cb_head)) schedule_work(&sp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); @@ -171,14 +163,16 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp); * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *head, +void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; - head->func = func; + rhp->func = func; + rhp->next = NULL; local_irq_save(flags); - rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); + *sp->srcu_cb_tail = rhp; + sp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); if (!READ_ONCE(sp->srcu_gp_running)) schedule_work(&sp->srcu_work); -- cgit v1.2.1 From 681fbec881dea1848e9246d7d1ecb3b97f11026d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 May 2017 15:44:38 -0700 Subject: lockdep: Use consistent printing primitives Commit a5dd63efda3d ("lockdep: Use "WARNING" tag on lockdep splats") substituted pr_warn() for printk() in places called out by Dmitry Vyukov. However, this resulted in an ugly mix of pr_warn() and printk(). This commit therefore changes printk() to pr_warn() or pr_cont(), depending on the absence or presence of KERN_CONT. This is done in all functions that had printk() changed to pr_warn() by the aforementioned commit. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/locking/lockdep.c | 172 +++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c0e31bfee25c..cceb9534338a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1157,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, if (debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("======================================================\n"); pr_warn("WARNING: possible circular locking dependency detected\n"); print_kernel_ident(); pr_warn("------------------------------------------------------\n"); - printk("%s/%d is trying to acquire lock:\n", + pr_warn("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); - printk("\nbut task is already holding lock:\n"); + pr_warn("\nbut task is already holding lock:\n"); print_lock(check_tgt); - printk("\nwhich lock already depends on the new lock.\n\n"); - printk("\nthe existing dependency chain (in reverse order) is:\n"); + pr_warn("\nwhich lock already depends on the new lock.\n\n"); + pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); print_circular_bug_entry(entry, depth); @@ -1495,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("=====================================================\n"); pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", irqclass, irqclass); print_kernel_ident(); pr_warn("-----------------------------------------------------\n"); - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", + pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, @@ -1509,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr, curr->softirqs_enabled); print_lock(next); - printk("\nand this task is already holding:\n"); + pr_warn("\nand this task is already holding:\n"); print_lock(prev); - printk("which would create a new lock dependency:\n"); + pr_warn("which would create a new lock dependency:\n"); print_lock_name(hlock_class(prev)); - printk(KERN_CONT " ->"); + pr_cont(" ->"); print_lock_name(hlock_class(next)); - printk(KERN_CONT "\n"); + pr_cont("\n"); - printk("\nbut this new dependency connects a %s-irq-safe lock:\n", + pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n", irqclass); print_lock_name(backwards_entry->class); - printk("\n... which became %s-irq-safe at:\n", irqclass); + pr_warn("\n... which became %s-irq-safe at:\n", irqclass); print_stack_trace(backwards_entry->class->usage_traces + bit1, 1); - printk("\nto a %s-irq-unsafe lock:\n", irqclass); + pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass); print_lock_name(forwards_entry->class); - printk("\n... which became %s-irq-unsafe at:\n", irqclass); - printk("..."); + pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass); + pr_warn("..."); print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); - printk("\nother info that might help us debug this:\n\n"); + pr_warn("\nother info that might help us debug this:\n\n"); print_irq_lock_scenario(backwards_entry, forwards_entry, hlock_class(prev), hlock_class(next)); lockdep_print_held_locks(curr); - printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); + pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); if (!save_trace(&prev_root->trace)) return 0; print_shortest_lock_dependencies(backwards_entry, prev_root); - printk("\nthe dependencies between the lock to be acquired"); - printk(" and %s-irq-unsafe lock:\n", irqclass); + pr_warn("\nthe dependencies between the lock to be acquired"); + pr_warn(" and %s-irq-unsafe lock:\n", irqclass); if (!save_trace(&next_root->trace)) return 0; print_shortest_lock_dependencies(forwards_entry, next_root); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -1724,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("============================================\n"); pr_warn("WARNING: possible recursive locking detected\n"); print_kernel_ident(); pr_warn("--------------------------------------------\n"); - printk("%s/%d is trying to acquire lock:\n", + pr_warn("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(next); - printk("\nbut task is already holding lock:\n"); + pr_warn("\nbut task is already holding lock:\n"); print_lock(prev); - printk("\nother info that might help us debug this:\n"); + pr_warn("\nother info that might help us debug this:\n"); print_deadlock_scenario(next, prev); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -2074,21 +2074,21 @@ static void print_collision(struct task_struct *curr, struct held_lock *hlock_next, struct lock_chain *chain) { - printk("\n"); + pr_warn("\n"); pr_warn("============================\n"); pr_warn("WARNING: chain_key collision\n"); print_kernel_ident(); pr_warn("----------------------------\n"); - printk("%s/%d: ", current->comm, task_pid_nr(current)); - printk("Hash chain already cached but the contents don't match!\n"); + pr_warn("%s/%d: ", current->comm, task_pid_nr(current)); + pr_warn("Hash chain already cached but the contents don't match!\n"); - printk("Held locks:"); + pr_warn("Held locks:"); print_chain_keys_held_locks(curr, hlock_next); - printk("Locks in cached chain:"); + pr_warn("Locks in cached chain:"); print_chain_keys_chain(chain); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); } #endif @@ -2373,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("================================\n"); pr_warn("WARNING: inconsistent lock state\n"); print_kernel_ident(); pr_warn("--------------------------------\n"); - printk("inconsistent {%s} -> {%s} usage.\n", + pr_warn("inconsistent {%s} -> {%s} usage.\n", usage_str[prev_bit], usage_str[new_bit]); - printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", curr->comm, task_pid_nr(curr), trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, @@ -2390,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, trace_softirqs_enabled(curr)); print_lock(this); - printk("{%s} state was registered at:\n", usage_str[prev_bit]); + pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]); print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1); print_irqtrace_events(curr); - printk("\nother info that might help us debug this:\n"); + pr_warn("\nother info that might help us debug this:\n"); print_usage_bug_scenario(this); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -2438,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("========================================================\n"); pr_warn("WARNING: possible irq lock inversion dependency detected\n"); print_kernel_ident(); pr_warn("--------------------------------------------------------\n"); - printk("%s/%d just changed the state of lock:\n", + pr_warn("%s/%d just changed the state of lock:\n", curr->comm, task_pid_nr(curr)); print_lock(this); if (forwards) - printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); + pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass); else - printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); + pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); print_lock_name(other->class); - printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); + pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n"); - printk("\nother info that might help us debug this:\n"); + pr_warn("\nother info that might help us debug this:\n"); /* Find a middle lock (if one exists) */ depth = get_lock_depth(other); do { if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad path found in chain graph\n", __func__); + pr_warn("lockdep:%s bad path found in chain graph\n", __func__); break; } middle = entry; @@ -2475,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr, lockdep_print_held_locks(curr); - printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); + pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); if (!save_trace(&root->trace)) return 0; print_shortest_lock_dependencies(other, root); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -3189,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr, if (debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("==================================\n"); pr_warn("WARNING: Nested lock was not taken\n"); print_kernel_ident(); pr_warn("----------------------------------\n"); - printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); + pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); print_lock(hlock); - printk("\nbut this task is not holding:\n"); - printk("%s\n", hlock->nest_lock->name); + pr_warn("\nbut this task is not holding:\n"); + pr_warn("%s\n", hlock->nest_lock->name); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); - printk("\nother info that might help us debug this:\n"); + pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -3402,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("=====================================\n"); pr_warn("WARNING: bad unlock balance detected!\n"); print_kernel_ident(); pr_warn("-------------------------------------\n"); - printk("%s/%d is trying to release lock (", + pr_warn("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); - printk(KERN_CONT ") at:\n"); + pr_cont(") at:\n"); print_ip_sym(ip); - printk("but there are no more locks to release!\n"); - printk("\nother info that might help us debug this:\n"); + pr_warn("but there are no more locks to release!\n"); + pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -3974,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, if (debug_locks_silent) return 0; - printk("\n"); + pr_warn("\n"); pr_warn("=================================\n"); pr_warn("WARNING: bad contention detected!\n"); print_kernel_ident(); pr_warn("---------------------------------\n"); - printk("%s/%d is trying to contend lock (", + pr_warn("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); - printk(KERN_CONT ") at:\n"); + pr_cont(") at:\n"); print_ip_sym(ip); - printk("but there are no locks held!\n"); - printk("\nother info that might help us debug this:\n"); + pr_warn("but there are no locks held!\n"); + pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); return 0; @@ -4318,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, if (debug_locks_silent) return; - printk("\n"); + pr_warn("\n"); pr_warn("=========================\n"); pr_warn("WARNING: held lock freed!\n"); print_kernel_ident(); pr_warn("-------------------------\n"); - printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", + pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n", curr->comm, task_pid_nr(curr), mem_from, mem_to-1); print_lock(hlock); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); } @@ -4376,14 +4376,14 @@ static void print_held_locks_bug(void) if (debug_locks_silent) return; - printk("\n"); + pr_warn("\n"); pr_warn("====================================\n"); pr_warn("WARNING: %s/%d still has locks held!\n", current->comm, task_pid_nr(current)); print_kernel_ident(); pr_warn("------------------------------------\n"); lockdep_print_held_locks(current); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); } @@ -4402,10 +4402,10 @@ void debug_show_all_locks(void) int unlock = 1; if (unlikely(!debug_locks)) { - printk("INFO: lockdep is turned off.\n"); + pr_warn("INFO: lockdep is turned off.\n"); return; } - printk("\nShowing all locks held in the system:\n"); + pr_warn("\nShowing all locks held in the system:\n"); /* * Here we try to get the tasklist_lock as hard as possible, @@ -4416,18 +4416,18 @@ void debug_show_all_locks(void) retry: if (!read_trylock(&tasklist_lock)) { if (count == 10) - printk("hm, tasklist_lock locked, retrying... "); + pr_warn("hm, tasklist_lock locked, retrying... "); if (count) { count--; - printk(" #%d", 10-count); + pr_cont(" #%d", 10-count); mdelay(200); goto retry; } - printk(" ignoring it.\n"); + pr_cont(" ignoring it.\n"); unlock = 0; } else { if (count != 10) - printk(KERN_CONT " locked it.\n"); + pr_cont(" locked it.\n"); } do_each_thread(g, p) { @@ -4445,7 +4445,7 @@ retry: unlock = 1; } while_each_thread(g, p); - printk("\n"); + pr_warn("\n"); pr_warn("=============================================\n\n"); if (unlock) @@ -4475,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void) if (unlikely(curr->lockdep_depth)) { if (!debug_locks_off()) return; - printk("\n"); + pr_warn("\n"); pr_warn("================================================\n"); pr_warn("WARNING: lock held when returning to user space!\n"); print_kernel_ident(); pr_warn("------------------------------------------------\n"); - printk("%s/%d is leaving the kernel with locks still held!\n", + pr_warn("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); } @@ -4495,14 +4495,14 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) return; #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ - printk("\n"); + pr_warn("\n"); pr_warn("=============================\n"); pr_warn("WARNING: suspicious RCU usage\n"); print_kernel_ident(); pr_warn("-----------------------------\n"); - printk("%s:%d %s!\n", file, line, s); - printk("\nother info that might help us debug this:\n\n"); - printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", + pr_warn("%s:%d %s!\n", file, line, s); + pr_warn("\nother info that might help us debug this:\n\n"); + pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" : !rcu_is_watching() @@ -4529,10 +4529,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) * rcu_read_lock_bh() and so on from extended quiescent states. */ if (!rcu_is_watching()) - printk("RCU used illegally from extended quiescent state!\n"); + pr_warn("RCU used illegally from extended quiescent state!\n"); lockdep_print_held_locks(curr); - printk("\nstack backtrace:\n"); + pr_warn("\nstack backtrace:\n"); dump_stack(); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); -- cgit v1.2.1 From 0cb5133ab573d1c471cfcfa632b0260a5aad5303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 31 May 2017 09:26:07 -0700 Subject: bcm47xx: Fix build regression Commit 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h") caused a build regression in an MTD partition driver: In file included from drivers/mtd/bcm47xxpart.c:12:0: include/linux/bcm47xx_nvram.h: In function 'bcm47xx_nvram_init_from_mem': include/linux/bcm47xx_nvram.h:27:10: error: 'ENOTSUPP' undeclared (first use in this function) The rcupdate.h file has no particular need for linux/errno.h, so this commit includes linux/errno.h into bcm47xx_nvram.h. Fixes: 0bc2d534708b ("rcu: Refactor #includes from include/linux/rcupdate.h") Signed-off-by: Arnd Bergmann Signed-off-by: Paul E. McKenney --- include/linux/bcm47xx_nvram.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index 2793652fbf66..a414a2b53e41 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -8,6 +8,7 @@ #ifndef __BCM47XX_NVRAM_H #define __BCM47XX_NVRAM_H +#include #include #include #include -- cgit v1.2.1 From 5f192ab027a5d865be24c817005d42eb96314dc2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 May 2017 15:24:25 -0700 Subject: rcu: Refactor #includes from include/linux/rcupdate.h The list of #includes from include/linux/rcupdate.h has grown quite a bit, so it is time to trim it. This commit moves the #include of include/linux/ktime.h to include/linux/rcutiny.h, along with the Tiny-RCU-only function that was the only thing needing ktimem.h. It then reconstructs the files included into include/linux/ktime.h based on what is actually needed, with significant help from the 0day Test Robot. This single change reduces the .i file footprint from rcupdate.h from 9018 lines to 7101 lines. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 22 ++++++---------------- include/linux/rcutiny.h | 8 +++++++- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 564096e6e141..ee40d7eba741 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,16 +34,14 @@ #define __LINUX_RCUPDATE_H #include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include +#include +#include +#include +#include +#include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) @@ -856,14 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - *nextevt = KTIME_MAX; - return 0; -} -#endif /* #ifdef CONFIG_TINY_RCU */ - /* Only for use by adaptive-ticks code. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE bool rcu_sys_is_idle(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2bfe48bc0e3b..c869785f16bd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -25,7 +25,7 @@ #ifndef __LINUX_TINY_H #define __LINUX_TINY_H -#include +#include struct rcu_dynticks; static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) @@ -96,6 +96,12 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_note_voluntary_context_switch_lite(current); \ } while (0) +static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + *nextevt = KTIME_MAX; + return 0; +} + /* * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. -- cgit v1.2.1 From bf32c76540257f9f5f2cf661dbdd8bb4a4bd8c82 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 May 2017 12:05:46 -0700 Subject: rcu: Convert rnp->lock wrappers to macros for SRCU use Use of smp_mb__after_unlock_lock() would allow SRCU to omit a full memory barrier during callback execution, so this commit converts raw_spin_lock_rcu_node() from inline functions to type-generic macros to allow them to handle locks in srcu_node structures as well as rcu_node structures. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ddfa34d020ba..a7f63f1074b4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -580,27 +580,22 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) * As ->lock of struct rcu_node is a __private field, therefore one should use * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock. */ -static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp) -{ - raw_spin_lock(&ACCESS_PRIVATE(rnp, lock)); - smp_mb__after_unlock_lock(); -} +#define raw_spin_lock_rcu_node(p) \ +do { \ + raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) -static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp) -{ - raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock)); -} +#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock)) -static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp) -{ - raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock)); - smp_mb__after_unlock_lock(); -} +#define raw_spin_lock_irq_rcu_node(p) \ +do { \ + raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) -static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp) -{ - raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock)); -} +#define raw_spin_unlock_irq_rcu_node(p) \ + raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) #define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ do { \ @@ -615,11 +610,11 @@ do { \ raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \ } while (0) -static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp) -{ - bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock)); - - if (locked) - smp_mb__after_unlock_lock(); - return locked; -} +#define raw_spin_trylock_rcu_node(p) \ +({ \ + bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) -- cgit v1.2.1 From 83d40bd3bc3ab3d6b5a4a331f7667d627948a099 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 May 2017 13:28:51 -0700 Subject: rcu: Move rnp->lock wrappers for SRCU use This commit moves the now-generic rnp->lock wrapper macros from kernel/rcu/tree.h to kernel/rcu/rcu.h, thus allowing SRCU to use them. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu/tree.h | 53 ----------------------------------------------------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 6a1e85bd2eac..2a75beb883c8 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -303,6 +303,59 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) cpu <= rnp->grphi; \ cpu = cpumask_next((cpu), cpu_possible_mask)) +/* + * Wrappers for the rcu_node::lock acquire and release. + * + * Because the rcu_nodes form a tree, the tree traversal locking will observe + * different lock values, this in turn means that an UNLOCK of one level + * followed by a LOCK of another level does not imply a full memory barrier; + * and most importantly transitivity is lost. + * + * In order to restore full ordering between tree levels, augment the regular + * lock acquire functions with smp_mb__after_unlock_lock(). + * + * As ->lock of struct rcu_node is a __private field, therefore one should use + * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock. + */ +#define raw_spin_lock_rcu_node(p) \ +do { \ + raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock)) + +#define raw_spin_lock_irq_rcu_node(p) \ +do { \ + raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define raw_spin_unlock_irq_rcu_node(p) \ + raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) + +#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ +do { \ + typecheck(unsigned long, flags); \ + raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \ +do { \ + typecheck(unsigned long, flags); \ + raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \ +} while (0) + +#define raw_spin_trylock_rcu_node(p) \ +({ \ + bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \ + \ + if (___locked) \ + smp_mb__after_unlock_lock(); \ + ___locked; \ +}) + #endif /* #if defined(SRCU) || !defined(TINY_RCU) */ #ifdef CONFIG_TINY_RCU diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a7f63f1074b4..baa0bac8da2a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -565,56 +565,3 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ } #endif /* #ifdef CONFIG_RCU_TRACE */ - -/* - * Wrappers for the rcu_node::lock acquire and release. - * - * Because the rcu_nodes form a tree, the tree traversal locking will observe - * different lock values, this in turn means that an UNLOCK of one level - * followed by a LOCK of another level does not imply a full memory barrier; - * and most importantly transitivity is lost. - * - * In order to restore full ordering between tree levels, augment the regular - * lock acquire functions with smp_mb__after_unlock_lock(). - * - * As ->lock of struct rcu_node is a __private field, therefore one should use - * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock. - */ -#define raw_spin_lock_rcu_node(p) \ -do { \ - raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock)) - -#define raw_spin_lock_irq_rcu_node(p) \ -do { \ - raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define raw_spin_unlock_irq_rcu_node(p) \ - raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) - -#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ -do { \ - typecheck(unsigned long, flags); \ - raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \ - smp_mb__after_unlock_lock(); \ -} while (0) - -#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \ -do { \ - typecheck(unsigned long, flags); \ - raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \ -} while (0) - -#define raw_spin_trylock_rcu_node(p) \ -({ \ - bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \ - \ - if (___locked) \ - smp_mb__after_unlock_lock(); \ - ___locked; \ -}) -- cgit v1.2.1 From a3883df3935e10caa8297719d85fa8eaff7cabbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 9 May 2017 15:00:14 -0700 Subject: srcu: Use rnp->lock wrappers to replace explicit memory barriers This commit uses TREE RCU's rnp->lock wrappers to replace a few explicit memory barriers. This change also has the advantage of making SRCU's memory-ordering properties be implemented in roughly the same way as they are in Tree RCU. Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 8 ++--- kernel/rcu/srcutree.c | 91 +++++++++++++++++++++++------------------------- 2 files changed, 47 insertions(+), 52 deletions(-) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 24e949bda12a..42973f787e7e 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -40,7 +40,7 @@ struct srcu_data { unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ /* Update-side state. */ - spinlock_t lock ____cacheline_internodealigned_in_smp; + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -58,7 +58,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - spinlock_t lock; + raw_spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children */ /* having CBs, but only */ /* is > ->srcu_gq_seq. */ @@ -78,7 +78,7 @@ struct srcu_struct { struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - spinlock_t gp_lock; /* protect ->srcu_cblist */ + raw_spinlock_t __private lock; /* Protect counters */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ @@ -109,7 +109,7 @@ void process_srcu(struct work_struct *work); #define __SRCU_STRUCT_INIT(name) \ { \ .sda = &name##_srcu_data, \ - .gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 66a998f9c5a7..d0ca524bf042 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -76,7 +76,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) /* Each pass through this loop initializes one srcu_node structure. */ rcu_for_each_node_breadth_first(sp, snp) { - spin_lock_init(&snp->lock); + raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -110,7 +110,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp_first = sp->level[level]; for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - spin_lock_init(&sdp->lock); + raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; @@ -169,7 +169,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, /* Don't re-initialize a lock while it is held. */ debug_check_no_locks_freed((void *)sp, sizeof(*sp)); lockdep_init_map(&sp->dep_map, name, key, 0); - spin_lock_init(&sp->gp_lock); + raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -186,7 +186,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); */ int init_srcu_struct(struct srcu_struct *sp) { - spin_lock_init(&sp->gp_lock); + raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -197,7 +197,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * First-use initialization of statically allocated srcu_struct * structure. Wiring up the combining tree is more than can be * done with compile-time initialization, so this check is added - * to each update-side SRCU primitive. Use ->gp_lock, which -is- + * to each update-side SRCU primitive. Use sp->lock, which -is- * compile-time initialized, to resolve races involving multiple * CPUs trying to garner first-use privileges. */ @@ -209,13 +209,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave(&sp->gp_lock, flags); + raw_spin_lock_irqsave_rcu_node(sp, flags); if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore(&sp->gp_lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sp, flags); return; } init_srcu_struct_fields(sp, true); - spin_unlock_irqrestore(&sp->gp_lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -411,8 +411,7 @@ static void srcu_gp_start(struct srcu_struct *sp) struct srcu_data *sdp = this_cpu_ptr(sp->sda); int state; - RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), - "Invoked srcu_gp_start() without ->gp_lock!"); + lockdep_assert_held(&sp->lock); WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); @@ -513,7 +512,7 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_lock(&sp->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq(&sp->gp_lock); + raw_spin_lock_irq_rcu_node(sp); idx = rcu_seq_state(sp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(sp); @@ -522,7 +521,7 @@ static void srcu_gp_end(struct srcu_struct *sp) gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) sp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -530,7 +529,7 @@ static void srcu_gp_end(struct srcu_struct *sp) idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); rcu_for_each_node_breadth_first(sp, snp) { - spin_lock_irq(&snp->lock); + raw_spin_lock_irq_rcu_node(snp); cbs = false; if (snp >= sp->level[rcu_num_lvls - 1]) cbs = snp->srcu_have_cbs[idx] == gpseq; @@ -540,21 +539,19 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_gp_seq_needed_exp = gpseq; mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - spin_unlock_irq(&snp->lock); - if (cbs) { - smp_mb(); /* GP end before CB invocation. */ + raw_spin_unlock_irq_rcu_node(snp); + if (cbs) srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); - } /* Occasionally prevent srcu_data counter wrap. */ if (!(gpseq & counter_wrap_check)) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { sdp = per_cpu_ptr(sp->sda, cpu); - spin_lock_irqsave(&sdp->lock, flags); + raw_spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; - spin_unlock_irqrestore(&sdp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); } } @@ -562,17 +559,17 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_unlock(&sp->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq(&sp->gp_lock); + raw_spin_lock_irq_rcu_node(sp); gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { srcu_gp_start(sp); - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); /* Throttle expedited grace periods: Should be rare! */ srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff ? 0 : SRCU_INTERVAL); } else { - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); } } @@ -592,18 +589,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, if (rcu_seq_done(&sp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; - spin_lock_irqsave(&snp->lock, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { - spin_unlock_irqrestore(&snp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore(&snp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave(&sp->gp_lock, flags); + raw_spin_lock_irqsave_rcu_node(sp, flags); if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) sp->srcu_gp_seq_needed_exp = s; - spin_unlock_irqrestore(&sp->gp_lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -625,14 +622,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, for (; snp != NULL; snp = snp->srcu_parent) { if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ - spin_lock_irqsave(&snp->lock, flags); + raw_spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { snp_seq = snp->srcu_have_cbs[idx]; if (snp == sdp->mynode && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - spin_unlock_irqrestore(&snp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == sdp->mynode && snp_seq != s) { - smp_mb(); /* CBs after GP! */ srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); @@ -647,11 +643,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, snp->srcu_data_have_cbs[idx] |= sdp->grpmask; if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) snp->srcu_gp_seq_needed_exp = s; - spin_unlock_irqrestore(&snp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave(&sp->gp_lock, flags); + raw_spin_lock_irqsave_rcu_node(sp, flags); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -670,7 +666,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, queue_delayed_work(system_power_efficient_wq, &sp->work, srcu_get_delay(sp)); } - spin_unlock_irqrestore(&sp->gp_lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -833,7 +829,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; local_irq_save(flags); sdp = this_cpu_ptr(sp->sda); - spin_lock(&sdp->lock); + raw_spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); @@ -847,7 +843,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - spin_unlock_irqrestore(&sdp->lock, flags); + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) srcu_funnel_gp_start(sp, sdp, s, do_norm); else if (needexp) @@ -1018,7 +1014,7 @@ void srcu_barrier(struct srcu_struct *sp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - spin_lock_irq(&sdp->lock); + raw_spin_lock_irq_rcu_node(sdp); atomic_inc(&sp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1027,7 +1023,7 @@ void srcu_barrier(struct srcu_struct *sp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&sp->srcu_barrier_cpu_cnt); } - spin_unlock_irq(&sdp->lock); + raw_spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ @@ -1076,17 +1072,17 @@ static void srcu_advance_state(struct srcu_struct *sp) */ idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq(&sp->gp_lock); + raw_spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(sp); - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&sp->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1135,20 +1131,19 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp = container_of(work, struct srcu_data, work.work); sp = sdp->sp; rcu_cblist_init(&ready_cbs); - spin_lock_irq(&sdp->lock); - smp_mb(); /* Old grace periods before callback invocation! */ + raw_spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - spin_unlock_irq(&sdp->lock); + raw_spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } /* We are on the job! Extract and invoke ready callbacks. */ sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); - spin_unlock_irq(&sdp->lock); + raw_spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1161,13 +1156,13 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - spin_lock_irq(&sdp->lock); + raw_spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, rcu_seq_snap(&sp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - spin_unlock_irq(&sdp->lock); + raw_spin_unlock_irq_rcu_node(sdp); if (more) srcu_schedule_cbs_sdp(sdp, 0); } @@ -1180,7 +1175,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) { bool pushgp = true; - spin_lock_irq(&sp->gp_lock); + raw_spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1190,7 +1185,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(sp); } - spin_unlock_irq(&sp->gp_lock); + raw_spin_unlock_irq_rcu_node(sp); if (pushgp) queue_delayed_work(system_power_efficient_wq, &sp->work, delay); -- cgit v1.2.1 From 90040c9e3015054db7efa0101afdd446d1167fe8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 May 2017 14:36:55 -0700 Subject: rcu: Remove *_SLOW_* Kconfig options The RCU_TORTURE_TEST_SLOW_PREINIT, RCU_TORTURE_TEST_SLOW_PREINIT_DELAY, RCU_TORTURE_TEST_SLOW_PREINIT_DELAY, RCU_TORTURE_TEST_SLOW_INIT, RCU_TORTURE_TEST_SLOW_INIT_DELAY, RCU_TORTURE_TEST_SLOW_CLEANUP, and RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY Kconfig options are only useful for torture testing, and there are the rcutree.gp_cleanup_delay, rcutree.gp_init_delay, and rcutree.gp_preinit_delay kernel boot parameters that rcutorture can use instead. The effect of these parameters is to artificially slow down grace period initialization and cleanup in order to make some types of race conditions happen more often. This commit therefore simplifies Tree RCU a bit by removing the Kconfig options and adding the corresponding kernel parameters to rcutorture's .boot files instead. However, this commit also leaves out the kernel parameters for TREE02, TREE04, and TREE07 in order to have about the same number of tests slowed as not slowed. TREE01, TREE03, TREE05, and TREE06 are slowed, and the rest are not slowed. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 10 +-- kernel/rcu/tree.c | 26 ++------ kernel/rcu/tree_plugin.h | 6 +- lib/Kconfig.debug | 75 ---------------------- .../selftests/rcutorture/configs/rcu/TREE01 | 3 - .../selftests/rcutorture/configs/rcu/TREE01.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE02 | 3 - .../selftests/rcutorture/configs/rcu/TREE03 | 3 - .../selftests/rcutorture/configs/rcu/TREE03.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE04 | 3 - .../selftests/rcutorture/configs/rcu/TREE05 | 3 - .../selftests/rcutorture/configs/rcu/TREE05.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE06 | 3 - .../selftests/rcutorture/configs/rcu/TREE06.boot | 3 + .../selftests/rcutorture/configs/rcu/TREE07 | 3 - .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 9 --- 16 files changed, 24 insertions(+), 135 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6671f9b60a86..f85bfe02f052 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3229,21 +3229,17 @@ rcutree.gp_cleanup_delay= [KNL] Set the number of jiffies to delay each step of - RCU grace-period cleanup. This only has effect - when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set. + RCU grace-period cleanup. rcutree.gp_init_delay= [KNL] Set the number of jiffies to delay each step of - RCU grace-period initialization. This only has - effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT - is set. + RCU grace-period initialization. rcutree.gp_preinit_delay= [KNL] Set the number of jiffies to delay each step of RCU grace-period pre-initialization, that is, the propagation of recent CPU-hotplug changes up - the rcu_node combining tree. This only has effect - when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set. + the rcu_node combining tree. rcutree.rcu_fanout_exact= [KNL] Disable autobalancing of the rcu_node combining diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cac24f5d3fd2..bbbddd85906b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -177,26 +177,12 @@ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; -module_param(gp_preinit_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ -static const int gp_preinit_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ - -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT -static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; -module_param(gp_init_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ -static const int gp_init_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ - -#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; -module_param(gp_cleanup_delay, int, 0644); -#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ -static const int gp_cleanup_delay; -#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ +static int gp_preinit_delay; +module_param(gp_preinit_delay, int, 0444); +static int gp_init_delay; +module_param(gp_init_delay, int, 0444); +static int gp_cleanup_delay; +module_param(gp_cleanup_delay, int, 0444); /* * Number of grace periods between delays, normalized by the duration of diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 116cf8339826..0553d9fed7d7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -109,11 +109,11 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tKick kthreads if too-long grace period.\n"); if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) pr_info("\tRCU callback double-/use-after-free debug enabled.\n"); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT)) + if (gp_preinit_delay) pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)) + if (gp_init_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP)) + if (gp_cleanup_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) pr_info("\tRCU debug extended QS entry/exit.\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4587ebe52c7..960c5d2d3c03 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1373,81 +1373,6 @@ config RCU_TORTURE_TEST Say M if you want the RCU torture tests to build as a module. Say N if you are unsure. -config RCU_TORTURE_TEST_SLOW_PREINIT - bool "Slow down RCU grace-period pre-initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period pre-initialization (the - propagation of CPU-hotplug changes up the rcu_node combining - tree) for a few jiffies between initializing each pair of - consecutive rcu_node structures. This helps to expose races - involving grace-period pre-initialization, in other words, it - makes your kernel less stable. It can also greatly increase - grace-period latency, especially on systems with large numbers - of CPUs. This is useful when torture-testing RCU, but in - almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY - int "How much to slow down RCU grace-period pre-initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_PREINIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure pre-initialization step. - -config RCU_TORTURE_TEST_SLOW_INIT - bool "Slow down RCU grace-period initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period initialization for a few - jiffies between initializing each pair of consecutive - rcu_node structures. This helps to expose races involving - grace-period initialization, in other words, it makes your - kernel less stable. It can also greatly increase grace-period - latency, especially on systems with large numbers of CPUs. - This is useful when torture-testing RCU, but in almost no - other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_INIT_DELAY - int "How much to slow down RCU grace-period initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_INIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure initialization. - -config RCU_TORTURE_TEST_SLOW_CLEANUP - bool "Slow down RCU grace-period cleanup to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period cleanup for a few jiffies - between cleaning up each pair of consecutive rcu_node - structures. This helps to expose races involving grace-period - cleanup, in other words, it makes your kernel less stable. - It can also greatly increase grace-period latency, especially - on systems with large numbers of CPUs. This is useful when - torture-testing RCU, but in almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - int "How much to slow down RCU grace-period cleanup" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_CLEANUP - help - This option specifies the number of jiffies to wait between - each rcu_node structure cleanup operation. - config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index cc6c5815236e..92ca49f90ef9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -17,6 +17,3 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index adc3abc82fb8..89705ed79596 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1 +1,4 @@ rcutorture.torture_type=rcu_bh maxcpus=8 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 1cecab330ba0..35e639e39366 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -19,8 +19,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 3b93ee544e70..7a17c503b382 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -17,6 +17,3 @@ CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 120c0c88d100..9ef3aed126e9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -1 +1,4 @@ rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 851c01ae2cea..27d22695d64c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -18,7 +18,4 @@ CONFIG_RCU_FANOUT_LEAF=3 CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index d4cdc0d74e16..1257d3227b1e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -19,6 +19,3 @@ CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot index 15b3e1a86f74..c7fd050dfcd9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -1,2 +1,5 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test_sched=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 9215827649bd..05a4eec3f27b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -21,6 +21,3 @@ CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index dd90f28ed700..ad18b52a2cad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -2,3 +2,6 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 +rcutree.gp_preinit_delay=3 +rcutree.gp_init_delay=3 +rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 99f04e4c5162..b9ddd3beeb9a 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -17,6 +17,3 @@ CONFIG_RCU_FANOUT_LEAF=2 CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y -CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 364801b1a230..1dfec4657d95 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -28,9 +28,6 @@ CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations. CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not. -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. @@ -78,12 +75,6 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE Always used in KVM testing. -CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY -CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - - Inspection suffices, ignore. - CONFIG_PREEMPT_RCU CONFIG_TREE_RCU CONFIG_TINY_RCU -- cgit v1.2.1 From f7a10a975036ef9ca957bfe12ab2d4b1a46cccd1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 May 2017 15:57:16 -0700 Subject: rcu: Remove the RCU_KTHREAD_PRIO Kconfig option Anything that can be done with the RCU_KTHREAD_PRIO Kconfig option can also be done with the rcutree.kthread_prio kernel boot parameter. This commit therefore removes this Kconfig option. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Rik van Riel --- init/Kconfig | 31 ---------------------- kernel/rcu/tree.c | 4 --- .../selftests/rcutorture/configs/rcu/TREE03 | 1 - .../selftests/rcutorture/configs/rcu/TREE03.boot | 1 + .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 1 - 5 files changed, 1 insertion(+), 37 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index a2cfde19e8b8..6f257d51f582 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -697,37 +697,6 @@ config RCU_BOOST Say Y here if you are working with real-time apps or heavy loads Say N here if you are unsure. -config RCU_KTHREAD_PRIO - int "Real-time priority to use for RCU worker threads" - range 1 99 if RCU_BOOST - range 0 99 if !RCU_BOOST - default 1 if RCU_BOOST - default 0 if !RCU_BOOST - depends on RCU_EXPERT - help - This option specifies the SCHED_FIFO priority value that will be - assigned to the rcuc/n and rcub/n threads and is also the value - used for RCU_BOOST (if enabled). If you are working with a - real-time application that has one or more CPU-bound threads - running at a real-time priority level, you should set - RCU_KTHREAD_PRIO to a priority higher than the highest-priority - real-time CPU-bound application thread. The default RCU_KTHREAD_PRIO - value of 1 is appropriate in the common case, which is real-time - applications that do not have any CPU-bound threads. - - Some real-time applications might not have a single real-time - thread that saturates a given CPU, but instead might have - multiple real-time threads that, taken together, fully utilize - that CPU. In this case, you should set RCU_KTHREAD_PRIO to - a priority higher than the lowest-priority thread that is - conspiring to prevent the CPU from running any non-real-time - tasks. For example, if one thread at priority 10 and another - thread at priority 5 are between themselves fully consuming - the CPU time on a given CPU, then RCU_KTHREAD_PRIO should be - set to priority 6 or higher. - - Specify the real-time priority, or take the default if unsure. - config RCU_BOOST_DELAY int "Milliseconds to delay boosting after RCU grace-period start" range 0 3000 diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index bbbddd85906b..187ac3f41526 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -168,11 +168,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, static void sync_sched_exp_online_cleanup(int cpu); /* rcuc/rcub kthread realtime priority */ -#ifdef CONFIG_RCU_KTHREAD_PRIO -static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; -#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */ static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; -#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 7a17c503b382..2dc31b16e506 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -14,6 +14,5 @@ CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=y -CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 9ef3aed126e9..5d2cc0bd50a0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -2,3 +2,4 @@ rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 +rcutree.kthread_prio=2 diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 1dfec4657d95..b5ea8489969a 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -16,7 +16,6 @@ CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_PROVE_RCU_REPEATEDLY -- Do one. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. -CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. -- cgit v1.2.1 From fe5ac724d81a3c7803e60c2232718f212f3f38d4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 May 2017 11:26:22 -0700 Subject: rcu: Remove nohz_full full-system-idle state machine The NO_HZ_FULL_SYSIDLE full-system-idle capability was added in 2013 by commit 0edd1b1784cb ("nohz_full: Add full-system-idle state machine"), but has not been used. This commit therefore removes it. If it turns out to be needed later, this commit can always be reverted. Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Cc: Rik van Riel Cc: Ingo Molnar Acked-by: Linus Torvalds --- .../RCU/Design/Requirements/Requirements.html | 6 +- include/linux/rcupdate.h | 9 - kernel/rcu/tree.c | 41 +- kernel/rcu/tree.h | 16 - kernel/rcu/tree_plugin.h | 429 --------------------- kernel/time/Kconfig | 50 --- .../selftests/rcutorture/configs/rcu/TREE07 | 1 - .../testing/selftests/rcutorture/doc/TINY_RCU.txt | 1 - .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 7 +- 9 files changed, 9 insertions(+), 551 deletions(-) diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index cb614f2a69c2..8c94fc1d1c84 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2520,11 +2520,7 @@ It is similarly socially unacceptable to interrupt an nohz_full CPU running in userspace. RCU must therefore track nohz_full userspace execution. -And in -CONFIG_NO_HZ_FULL_SYSIDLE=y -kernels, RCU must separately track idle CPUs on the one hand and -CPUs that are either idle or executing in userspace on the other. -In both cases, RCU must be able to sample state at two points in +RCU must therefore be able to sample state at two points in time, and be able to determine whether or not some other CPU spent any time idle and/or executing in userspace. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ee40d7eba741..7f24a5e673f5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -854,15 +854,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -/* Only for use by adaptive-ticks code. */ -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE -bool rcu_sys_is_idle(void); -void rcu_sysidle_force_exit(void); -#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -static inline bool rcu_sys_is_idle(void) { return false; } -static inline void rcu_sysidle_force_exit(void) { } -#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - /* * Place this after a lock-acquisition primitive to guarantee that diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 187ac3f41526..51d4c3acf32d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -270,10 +270,6 @@ void rcu_bh_qs(void) static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, - .dynticks_idle = ATOMIC_INIT(1), -#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; /* @@ -546,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644); static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp); -static void force_qs_rnp(struct rcu_state *rsp, - int (*f)(struct rcu_data *rsp, bool *isidle, - unsigned long *maxj), - bool *isidle, unsigned long *maxj); +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(void); @@ -854,7 +847,6 @@ void rcu_idle_enter(void) local_irq_save(flags); rcu_eqs_enter(false); - rcu_sysidle_enter(0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -904,7 +896,6 @@ void rcu_irq_exit(void) trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); rdtp->dynticks_nesting--; } - rcu_sysidle_enter(1); } /* @@ -986,7 +977,6 @@ void rcu_idle_exit(void) local_irq_save(flags); rcu_eqs_exit(false); - rcu_sysidle_exit(0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -1038,7 +1028,6 @@ void rcu_irq_enter(void) trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else rcu_eqs_exit_common(oldval, true); - rcu_sysidle_exit(1); } /* @@ -1217,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void) * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. */ -static int dyntick_save_progress_counter(struct rcu_data *rdp, - bool *isidle, unsigned long *maxj) +static int dyntick_save_progress_counter(struct rcu_data *rdp) { rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); - rcu_sysidle_check_cpu(rdp, isidle, maxj); if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, @@ -1238,8 +1225,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, * idle state since the last call to dyntick_save_progress_counter() * for this same CPU, or by virtue of having been offline. */ -static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, - bool *isidle, unsigned long *maxj) +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; bool *rnhqp; @@ -2105,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) */ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time) { - bool isidle = false; - unsigned long maxj; struct rcu_node *rnp = rcu_get_root(rsp); WRITE_ONCE(rsp->gp_activity, jiffies); rsp->n_force_qs++; if (first_time) { /* Collect dyntick-idle snapshots. */ - if (is_sysidle_rcu_state(rsp)) { - isidle = true; - maxj = jiffies - ULONG_MAX / 4; - } - force_qs_rnp(rsp, dyntick_save_progress_counter, - &isidle, &maxj); - rcu_sysidle_report_gp(rsp, isidle, maxj); + force_qs_rnp(rsp, dyntick_save_progress_counter); } else { /* Handle dyntick-idle and offline CPUs. */ - isidle = true; - force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); } /* Clear flag to prevent immediate re-entry. */ if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { @@ -2895,10 +2872,7 @@ void rcu_check_callbacks(int user) * * The caller must have suppressed start of new grace periods. */ -static void force_qs_rnp(struct rcu_state *rsp, - int (*f)(struct rcu_data *rsp, bool *isidle, - unsigned long *maxj), - bool *isidle, unsigned long *maxj) +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)) { int cpu; unsigned long flags; @@ -2937,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp, for_each_leaf_node_possible_cpu(rnp, cpu) { unsigned long bit = leaf_node_cpu_bit(rnp, cpu); if ((rnp->qsmask & bit) != 0) { - if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) + if (f(per_cpu_ptr(rsp->rda, cpu))) mask |= bit; } } @@ -3793,7 +3767,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_sysidle_init_percpu_data(rdp->dynticks); rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index baa0bac8da2a..2c112bb11aa8 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -45,14 +45,6 @@ struct rcu_dynticks { bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - long long dynticks_idle_nesting; - /* irq/process nesting level from idle. */ - atomic_t dynticks_idle; /* Even value for idle, else odd. */ - /* "Idle" excludes userspace execution. */ - unsigned long dynticks_idle_jiffies; - /* End of last non-NMI non-idle period. */ -#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ #ifdef CONFIG_RCU_FAST_NO_HZ bool all_lazy; /* Are all CPU's CBs lazy? */ unsigned long nonlazy_posted; @@ -529,15 +521,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); -static void rcu_sysidle_enter(int irq); -static void rcu_sysidle_exit(int irq); -static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, - unsigned long *maxj); -static bool is_sysidle_rcu_state(struct rcu_state *rsp); -static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, - unsigned long maxj); static void rcu_bind_gp_kthread(void); -static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); static bool rcu_nohz_full_cpu(struct rcu_state *rsp); static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0553d9fed7d7..f524d967f7b6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2563,429 +2563,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu) #endif /* #ifdef CONFIG_NO_HZ_FULL */ } - -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - -static int full_sysidle_state; /* Current system-idle state. */ -#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ -#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ -#define RCU_SYSIDLE_LONG 2 /* All CPUs idle for long enough. */ -#define RCU_SYSIDLE_FULL 3 /* All CPUs idle, ready for sysidle. */ -#define RCU_SYSIDLE_FULL_NOTED 4 /* Actually entered sysidle state. */ - -/* - * Invoked to note exit from irq or task transition to idle. Note that - * usermode execution does -not- count as idle here! After all, we want - * to detect full-system idle states, not RCU quiescent states and grace - * periods. The caller must have disabled interrupts. - */ -static void rcu_sysidle_enter(int irq) -{ - unsigned long j; - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_enter() invoked with irqs enabled!!!"); - - /* If there are no nohz_full= CPUs, no need to track this. */ - if (!tick_nohz_full_enabled()) - return; - - /* Adjust nesting, check for fully idle. */ - if (irq) { - rdtp->dynticks_idle_nesting--; - WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0); - if (rdtp->dynticks_idle_nesting != 0) - return; /* Still not fully idle. */ - } else { - if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) == - DYNTICK_TASK_NEST_VALUE) { - rdtp->dynticks_idle_nesting = 0; - } else { - rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE; - WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0); - return; /* Still not fully idle. */ - } - } - - /* Record start of fully idle period. */ - j = jiffies; - WRITE_ONCE(rdtp->dynticks_idle_jiffies, j); - smp_mb__before_atomic(); - atomic_inc(&rdtp->dynticks_idle); - smp_mb__after_atomic(); - WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1); -} - -/* - * Unconditionally force exit from full system-idle state. This is - * invoked when a normal CPU exits idle, but must be called separately - * for the timekeeping CPU (tick_do_timer_cpu). The reason for this - * is that the timekeeping CPU is permitted to take scheduling-clock - * interrupts while the system is in system-idle state, and of course - * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock - * interrupt from any other type of interrupt. - */ -void rcu_sysidle_force_exit(void) -{ - int oldstate = READ_ONCE(full_sysidle_state); - int newoldstate; - - /* - * Each pass through the following loop attempts to exit full - * system-idle state. If contention proves to be a problem, - * a trylock-based contention tree could be used here. - */ - while (oldstate > RCU_SYSIDLE_SHORT) { - newoldstate = cmpxchg(&full_sysidle_state, - oldstate, RCU_SYSIDLE_NOT); - if (oldstate == newoldstate && - oldstate == RCU_SYSIDLE_FULL_NOTED) { - rcu_kick_nohz_cpu(tick_do_timer_cpu); - return; /* We cleared it, done! */ - } - oldstate = newoldstate; - } - smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */ -} - -/* - * Invoked to note entry to irq or task transition from idle. Note that - * usermode execution does -not- count as idle here! The caller must - * have disabled interrupts. - */ -static void rcu_sysidle_exit(int irq) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_exit() invoked with irqs enabled!!!"); - - /* If there are no nohz_full= CPUs, no need to track this. */ - if (!tick_nohz_full_enabled()) - return; - - /* Adjust nesting, check for already non-idle. */ - if (irq) { - rdtp->dynticks_idle_nesting++; - WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0); - if (rdtp->dynticks_idle_nesting != 1) - return; /* Already non-idle. */ - } else { - /* - * Allow for irq misnesting. Yes, it really is possible - * to enter an irq handler then never leave it, and maybe - * also vice versa. Handle both possibilities. - */ - if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) { - rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE; - WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0); - return; /* Already non-idle. */ - } else { - rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE; - } - } - - /* Record end of idle period. */ - smp_mb__before_atomic(); - atomic_inc(&rdtp->dynticks_idle); - smp_mb__after_atomic(); - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1)); - - /* - * If we are the timekeeping CPU, we are permitted to be non-idle - * during a system-idle state. This must be the case, because - * the timekeeping CPU has to take scheduling-clock interrupts - * during the time that the system is transitioning to full - * system-idle state. This means that the timekeeping CPU must - * invoke rcu_sysidle_force_exit() directly if it does anything - * more than take a scheduling-clock interrupt. - */ - if (smp_processor_id() == tick_do_timer_cpu) - return; - - /* Update system-idle state: We are clearly no longer fully idle! */ - rcu_sysidle_force_exit(); -} - -/* - * Check to see if the current CPU is idle. Note that usermode execution - * does not count as idle. The caller must have disabled interrupts, - * and must be running on tick_do_timer_cpu. - */ -static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, - unsigned long *maxj) -{ - int cur; - unsigned long j; - struct rcu_dynticks *rdtp = rdp->dynticks; - - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sysidle_check_cpu() invoked with irqs enabled!!!"); - - /* If there are no nohz_full= CPUs, don't check system-wide idleness. */ - if (!tick_nohz_full_enabled()) - return; - - /* - * If some other CPU has already reported non-idle, if this is - * not the flavor of RCU that tracks sysidle state, or if this - * is an offline or the timekeeping CPU, nothing to do. - */ - if (!*isidle || rdp->rsp != rcu_state_p || - cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) - return; - /* Verify affinity of current kthread. */ - WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); - - /* Pick up current idle and NMI-nesting counter and check. */ - cur = atomic_read(&rdtp->dynticks_idle); - if (cur & 0x1) { - *isidle = false; /* We are not idle! */ - return; - } - smp_mb(); /* Read counters before timestamps. */ - - /* Pick up timestamps. */ - j = READ_ONCE(rdtp->dynticks_idle_jiffies); - /* If this CPU entered idle more recently, update maxj timestamp. */ - if (ULONG_CMP_LT(*maxj, j)) - *maxj = j; -} - -/* - * Is this the flavor of RCU that is handling full-system idle? - */ -static bool is_sysidle_rcu_state(struct rcu_state *rsp) -{ - return rsp == rcu_state_p; -} - -/* - * Return a delay in jiffies based on the number of CPUs, rcu_node - * leaf fanout, and jiffies tick rate. The idea is to allow larger - * systems more time to transition to full-idle state in order to - * avoid the cache thrashing that otherwise occur on the state variable. - * Really small systems (less than a couple of tens of CPUs) should - * instead use a single global atomically incremented counter, and later - * versions of this will automatically reconfigure themselves accordingly. - */ -static unsigned long rcu_sysidle_delay(void) -{ - if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) - return 0; - return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000); -} - -/* - * Advance the full-system-idle state. This is invoked when all of - * the non-timekeeping CPUs are idle. - */ -static void rcu_sysidle(unsigned long j) -{ - /* Check the current state. */ - switch (READ_ONCE(full_sysidle_state)) { - case RCU_SYSIDLE_NOT: - - /* First time all are idle, so note a short idle period. */ - WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT); - break; - - case RCU_SYSIDLE_SHORT: - - /* - * Idle for a bit, time to advance to next state? - * cmpxchg failure means race with non-idle, let them win. - */ - if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay())) - (void)cmpxchg(&full_sysidle_state, - RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG); - break; - - case RCU_SYSIDLE_LONG: - - /* - * Do an additional check pass before advancing to full. - * cmpxchg failure means race with non-idle, let them win. - */ - if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay())) - (void)cmpxchg(&full_sysidle_state, - RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL); - break; - - default: - break; - } -} - -/* - * Found a non-idle non-timekeeping CPU, so kick the system-idle state - * back to the beginning. - */ -static void rcu_sysidle_cancel(void) -{ - smp_mb(); - if (full_sysidle_state > RCU_SYSIDLE_SHORT) - WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT); -} - -/* - * Update the sysidle state based on the results of a force-quiescent-state - * scan of the CPUs' dyntick-idle state. - */ -static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, - unsigned long maxj, bool gpkt) -{ - if (rsp != rcu_state_p) - return; /* Wrong flavor, ignore. */ - if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) - return; /* Running state machine from timekeeping CPU. */ - if (isidle) - rcu_sysidle(maxj); /* More idle! */ - else - rcu_sysidle_cancel(); /* Idle is over. */ -} - -/* - * Wrapper for rcu_sysidle_report() when called from the grace-period - * kthread's context. - */ -static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, - unsigned long maxj) -{ - /* If there are no nohz_full= CPUs, no need to track this. */ - if (!tick_nohz_full_enabled()) - return; - - rcu_sysidle_report(rsp, isidle, maxj, true); -} - -/* Callback and function for forcing an RCU grace period. */ -struct rcu_sysidle_head { - struct rcu_head rh; - int inuse; -}; - -static void rcu_sysidle_cb(struct rcu_head *rhp) -{ - struct rcu_sysidle_head *rshp; - - /* - * The following memory barrier is needed to replace the - * memory barriers that would normally be in the memory - * allocator. - */ - smp_mb(); /* grace period precedes setting inuse. */ - - rshp = container_of(rhp, struct rcu_sysidle_head, rh); - WRITE_ONCE(rshp->inuse, 0); -} - -/* - * Check to see if the system is fully idle, other than the timekeeping CPU. - * The caller must have disabled interrupts. This is not intended to be - * called unless tick_nohz_full_enabled(). - */ -bool rcu_sys_is_idle(void) -{ - static struct rcu_sysidle_head rsh; - int rss = READ_ONCE(full_sysidle_state); - - RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_sys_is_idle() invoked with irqs enabled!!!"); - - if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu)) - return false; - - /* Handle small-system case by doing a full scan of CPUs. */ - if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) { - int oldrss = rss - 1; - - /* - * One pass to advance to each state up to _FULL. - * Give up if any pass fails to advance the state. - */ - while (rss < RCU_SYSIDLE_FULL && oldrss < rss) { - int cpu; - bool isidle = true; - unsigned long maxj = jiffies - ULONG_MAX / 4; - struct rcu_data *rdp; - - /* Scan all the CPUs looking for nonidle CPUs. */ - for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rcu_state_p->rda, cpu); - rcu_sysidle_check_cpu(rdp, &isidle, &maxj); - if (!isidle) - break; - } - rcu_sysidle_report(rcu_state_p, isidle, maxj, false); - oldrss = rss; - rss = READ_ONCE(full_sysidle_state); - } - } - - /* If this is the first observation of an idle period, record it. */ - if (rss == RCU_SYSIDLE_FULL) { - rss = cmpxchg(&full_sysidle_state, - RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED); - return rss == RCU_SYSIDLE_FULL; - } - - smp_mb(); /* ensure rss load happens before later caller actions. */ - - /* If already fully idle, tell the caller (in case of races). */ - if (rss == RCU_SYSIDLE_FULL_NOTED) - return true; - - /* - * If we aren't there yet, and a grace period is not in flight, - * initiate a grace period. Either way, tell the caller that - * we are not there yet. We use an xchg() rather than an assignment - * to make up for the memory barriers that would otherwise be - * provided by the memory allocator. - */ - if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL && - !rcu_gp_in_progress(rcu_state_p) && - !rsh.inuse && xchg(&rsh.inuse, 1) == 0) - call_rcu(&rsh.rh, rcu_sysidle_cb); - return false; -} - -/* - * Initialize dynticks sysidle state for CPUs coming online. - */ -static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) -{ - rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE; -} - -#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - -static void rcu_sysidle_enter(int irq) -{ -} - -static void rcu_sysidle_exit(int irq) -{ -} - -static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, - unsigned long *maxj) -{ -} - -static bool is_sysidle_rcu_state(struct rcu_state *rsp) -{ - return false; -} - -static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, - unsigned long maxj) -{ -} - -static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) -{ -} - -#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? @@ -3016,13 +2593,7 @@ static void rcu_bind_gp_kthread(void) if (!tick_nohz_full_enabled()) return; -#ifdef CONFIG_NO_HZ_FULL_SYSIDLE - cpu = tick_do_timer_cpu; - if (cpu >= 0 && cpu < nr_cpu_ids) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); -#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ housekeeping_affine(current); -#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ } /* Record the current task on dyntick-idle entry. */ diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 4008d9f95dd7..ac09bc29eb08 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL Note the boot CPU will still be kept outside the range to handle the timekeeping duty. -config NO_HZ_FULL_SYSIDLE - bool "Detect full-system idle state for full dynticks system" - depends on NO_HZ_FULL - default n - help - At least one CPU must keep the scheduling-clock tick running for - timekeeping purposes whenever there is a non-idle CPU, where - "non-idle" also includes dynticks CPUs as long as they are - running non-idle tasks. Because the underlying adaptive-tick - support cannot distinguish between all CPUs being idle and - all CPUs each running a single task in dynticks mode, the - underlying support simply ensures that there is always a CPU - handling the scheduling-clock tick, whether or not all CPUs - are idle. This Kconfig option enables scalable detection of - the all-CPUs-idle state, thus allowing the scheduling-clock - tick to be disabled when all CPUs are idle. Note that scalable - detection of the all-CPUs-idle state means that larger systems - will be slower to declare the all-CPUs-idle state. - - Say Y if you would like to help debug all-CPUs-idle detection. - - Say N if you are unsure. - -config NO_HZ_FULL_SYSIDLE_SMALL - int "Number of CPUs above which large-system approach is used" - depends on NO_HZ_FULL_SYSIDLE - range 1 NR_CPUS - default 8 - help - The full-system idle detection mechanism takes a lazy approach - on large systems, as is required to attain decent scalability. - However, on smaller systems, scalability is not anywhere near as - large a concern as is energy efficiency. The sysidle subsystem - therefore uses a fast but non-scalable algorithm for small - systems and a lazier but scalable algorithm for large systems. - This Kconfig parameter defines the number of CPUs in the largest - system that will be considered to be "small". - - The default value will be fine in most cases. Battery-powered - systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger - numbers of CPUs, and (3) are suffering from battery-lifetime - problems due to long sysidle latencies might wish to experiment - with larger values for this Kconfig parameter. On the other - hand, they might be even better served by disabling NO_HZ_FULL - entirely, given that NO_HZ_FULL is intended for HPC and - real-time workloads that at present do not tend to be run on - battery-powered systems. - - Take the default if you are unsure. - config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index b9ddd3beeb9a..0f4759f4232e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ_FULL_ALL=n -CONFIG_NO_HZ_FULL_SYSIDLE=y CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt index 24396ae8355b..a75b16991a92 100644 --- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt +++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt @@ -18,7 +18,6 @@ CONFIG_PROVE_RCU In common code tested by TREE_RCU test cases. -CONFIG_NO_HZ_FULL_SYSIDLE CONFIG_RCU_NOCB_CPU Meaningless for TINY_RCU. diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index b5ea8489969a..519e06d34d0b 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -9,8 +9,7 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one. CONFIG_HOTPLUG_CPU -- Do half. (Every second.) CONFIG_HZ_PERIODIC -- Do one. CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) -CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. -CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. +CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement. CONFIG_PREEMPT -- Do half. (First three and #8.) CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. @@ -48,10 +47,6 @@ CONFIG_64BIT Used only to check CONFIG_RCU_FANOUT value, inspection suffices. -CONFIG_NO_HZ_FULL_SYSIDLE_SMALL - - Defer until Frederic uses this. - CONFIG_PREEMPT_COUNT CONFIG_PREEMPT_RCU -- cgit v1.2.1 From d2b1654f91f9e928011fbea7138854ee2044f470 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 May 2017 12:01:50 -0700 Subject: rcu: Remove #ifdef moving rcu_end_inkernel_boot from rcupdate.h This commit removes a #ifdef and saves a few lines of code by moving the rcu_end_inkernel_boot() function from include/linux/rcupdate.h to include/linux/rcutiny.h (for TINY_RCU) and to include/linux/rcutree.h (for TREE_RCU). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ------ include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 1 + 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7f24a5e673f5..f816fc72b51e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -111,12 +111,6 @@ void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); void rcu_cpu_starting(unsigned int cpu); -#ifndef CONFIG_TINY_RCU -void rcu_end_inkernel_boot(void); -#else /* #ifndef CONFIG_TINY_RCU */ -static inline void rcu_end_inkernel_boot(void) { } -#endif /* #ifndef CONFIG_TINY_RCU */ - #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c869785f16bd..5becbbccb998 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -123,7 +123,7 @@ void rcu_scheduler_starting(void); #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ static inline void rcu_scheduler_starting(void) { } #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ - +static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d6aa89d15d47..37d6fd3b7ff8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -91,6 +91,7 @@ void exit_rcu(void); void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; +void rcu_end_inkernel_boot(void); bool rcu_is_watching(void); void rcu_all_qs(void); -- cgit v1.2.1 From 4e4bea7427062ec15df7084f97728e2a44d912e3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 May 2017 15:33:23 -0700 Subject: rcu: Remove typecheck() from RCU locking wrapper functions Because raw_spin_lock_irqsave() and raw_spin_unlock_irqrestore() both do typecheck() on their flags argument, there is no point in duplicating this check in raw_spin_lock_irqsave_rcu_node() and raw_spin_unlock_irqrestore_rcu_node(). This commit therefore saves a few lines by removing this duplicated check. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 2a75beb883c8..bc55b5716c37 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -334,18 +334,14 @@ do { \ #define raw_spin_unlock_irq_rcu_node(p) \ raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) -#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \ +#define raw_spin_lock_irqsave_rcu_node(p, flags) \ do { \ - typecheck(unsigned long, flags); \ - raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \ + raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ smp_mb__after_unlock_lock(); \ } while (0) -#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \ -do { \ - typecheck(unsigned long, flags); \ - raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \ -} while (0) +#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \ + raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ #define raw_spin_trylock_rcu_node(p) \ ({ \ -- cgit v1.2.1 From c4a09ff752e164c020bced6513e2008f992a02e6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 12 May 2017 14:37:19 -0700 Subject: rcu: Remove the now-obsolete PROVE_RCU_REPEATEDLY Kconfig option The PROVE_RCU_REPEATEDLY Kconfig option was initially added due to the volume of messages from PROVE_RCU: Doing just one per boot would have required excessive numbers of boots to locate them all. However, PROVE_RCU messages are now relatively rare, so there is no longer any reason to need more than one such message per boot. This commit therefore removes the PROVE_RCU_REPEATEDLY Kconfig option. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar --- kernel/locking/lockdep.c | 4 ---- kernel/rcu/tree_plugin.h | 4 +--- lib/Kconfig.debug | 14 -------------- tools/testing/selftests/rcutorture/configs/rcu/TINY02 | 1 - .../testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 1 - 5 files changed, 1 insertion(+), 23 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index cceb9534338a..7d2499bec5fe 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4490,10 +4490,6 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) { struct task_struct *curr = current; -#ifndef CONFIG_PROVE_RCU_REPEATEDLY - if (!debug_locks_off()) - return; -#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ pr_warn("\n"); pr_warn("=============================\n"); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f524d967f7b6..7f5919ab24c4 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -79,9 +79,7 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); - if (IS_ENABLED(CONFIG_PROVE_RCU_REPEATEDLY)) - pr_info("\tRCU lockdep checking is permanently enabled.\n"); - else if (IS_ENABLED(CONFIG_PROVE_RCU)) + if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (RCU_NUM_LVLS >= 4) pr_info("\tFour(or more)-level hierarchy is enabled.\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 960c5d2d3c03..762deab304fe 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1306,20 +1306,6 @@ menu "RCU Debugging" config PROVE_RCU def_bool PROVE_LOCKING -config PROVE_RCU_REPEATEDLY - bool "RCU debugging: don't disable PROVE_RCU on first splat" - depends on PROVE_RCU - default n - help - By itself, PROVE_RCU will disable checking upon issuing the - first warning (or "splat"). This feature prevents such - disabling, allowing multiple RCU-lockdep warnings to be printed - on a single reboot. - - Say Y to allow multiple RCU-lockdep warnings per boot. - - Say N if you are unsure. - config SPARSE_RCU_POINTER bool "RCU debugging: sparse-based checks for pointer usage" default n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 9007cd979df7..1f6bebbf5da8 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -8,7 +8,6 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU_REPEATEDLY=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_OBJECTS=y diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 519e06d34d0b..b778a28f1386 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -13,7 +13,6 @@ CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement. CONFIG_PREEMPT -- Do half. (First three and #8.) CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. -CONFIG_PROVE_RCU_REPEATEDLY -- Do one. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -- cgit v1.2.1 From 41a2901e7d220875752a8c870e0b53288a578c20 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 12 May 2017 15:56:35 -0700 Subject: rcu: Remove SPARSE_RCU_POINTER Kconfig option The sparse-based checking for non-RCU accesses to RCU-protected pointers has been around for a very long time, and it is now the only type of sparse-based checking that is optional. This commit therefore makes it unconditional. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: Fengguang Wu --- Documentation/RCU/Design/Requirements/Requirements.html | 9 +++------ Documentation/RCU/checklist.txt | 8 ++++---- Documentation/dev-tools/sparse.rst | 6 ------ include/linux/compiler.h | 4 ---- lib/Kconfig.debug | 15 --------------- lib/Makefile | 3 --- .../testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 4 ---- 7 files changed, 7 insertions(+), 42 deletions(-) diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 8c94fc1d1c84..0e6550a8c926 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -559,9 +559,7 @@ The rcu_access_pointer() on line 6 is similar to For remove_gp_synchronous(), as long as all modifications to gp are carried out while holding gp_lock, the above optimizations are harmless. - However, - with CONFIG_SPARSE_RCU_POINTER=y, - sparse will complain if you + However, sparse will complain if you define gp with __rcu and then access it without using either rcu_access_pointer() or rcu_dereference(). @@ -1978,9 +1976,8 @@ guard against mishaps and misuse: and rcu_dereference(), perhaps (incorrectly) substituting a simple assignment. To catch this sort of error, a given RCU-protected pointer may be - tagged with __rcu, after which running sparse - with CONFIG_SPARSE_RCU_POINTER=y will complain - about simple-assignment accesses to that pointer. + tagged with __rcu, after which sparse + will complain about simple-assignment accesses to that pointer. Arnd Bergmann made me aware of this requirement, and also supplied the needed patch series. diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 877947130ebe..6beda556faf3 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -413,11 +413,11 @@ over a rather long period of time, but improvements are always welcome! read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. -17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the - __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to - validate your RCU code. These can help find problems as follows: +17. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the + __rcu sparse checks to validate your RCU code. These can help + find problems as follows: - CONFIG_PROVE_RCU: check that accesses to RCU-protected data + CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data structures are carried out under the proper RCU read-side critical section, while holding the right combination of locks, or whatever other conditions diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst index ffdcc97f6f5a..78aa00a604a0 100644 --- a/Documentation/dev-tools/sparse.rst +++ b/Documentation/dev-tools/sparse.rst @@ -103,9 +103,3 @@ have already built it. The optional make variable CF can be used to pass arguments to sparse. The build system passes -Wbitwise to sparse automatically. - -Checking RCU annotations -~~~~~~~~~~~~~~~~~~~~~~~~ - -RCU annotations are not checked by default. To enable RCU annotation -checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f8110051188f..707242fdbb89 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,7 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) -#ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) -#else /* CONFIG_SPARSE_RCU_POINTER */ -# define __rcu -#endif /* CONFIG_SPARSE_RCU_POINTER */ # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 762deab304fe..498d5dd63bf4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1306,21 +1306,6 @@ menu "RCU Debugging" config PROVE_RCU def_bool PROVE_LOCKING -config SPARSE_RCU_POINTER - bool "RCU debugging: sparse-based checks for pointer usage" - default n - help - This feature enables the __rcu sparse annotation for - RCU-protected pointers. This annotation will cause sparse - to flag any non-RCU used of annotated pointers. This can be - helpful when debugging RCU usage. Please note that this feature - is not intended to enforce code cleanliness; it is instead merely - a debugging aid. - - Say Y to make sparse flag questionable use of RCU-protected pointers - - Say N if you are unsure. - config TORTURE_TEST tristate default n diff --git a/lib/Makefile b/lib/Makefile index 0166fbc0fa81..07fbe6a75692 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -25,9 +25,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ earlycpio.o seq_buf.o siphash.o \ nmi_backtrace.o nodemask.o win_minmax.o -CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER -CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER - lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index b778a28f1386..c5c29fb7438c 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -74,10 +74,6 @@ CONFIG_TINY_RCU These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP. -CONFIG_SPARSE_RCU_POINTER - - Makes sense only for sparse runs, not for kernel builds. - CONFIG_SRCU CONFIG_TASKS_RCU -- cgit v1.2.1 From 7f0cd6333086ae09962791c31f0d4845a3329df9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 14 May 2017 17:06:30 -0700 Subject: srcu: Fix rcutorture-statistics typo The function srcutorture_get_gp_data() duplicated the check for sp->batch_check0.head instead of also checking sp->batch_check1.head. The only effect of this typo would be for rcutorture statistics to understate the fraction of time that an SRCU grace period was in flight, and only for Classic SRCU. This commit fixes this typo. Reported-by: David Binderman Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index bc55b5716c37..27f871c88e0a 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -469,7 +469,7 @@ static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, *flags = 0; *completed = sp->completed; *gpnum = *completed; - if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) + if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check1.head) (*gpnum)++; } -- cgit v1.2.1 From bd8cc5a062f41e334596edbe823e2fa0adddd1b7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 15 May 2017 14:57:01 -0700 Subject: srcu: Remove Classic SRCU Classic SRCU was only ever intended to be a fallback in case of issues with Tree/Tiny SRCU, and the latter two are doing quite well in testing. This commit therefore removes Classic SRCU. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 - include/linux/srcuclassic.h | 100 --- init/Kconfig | 21 +- kernel/rcu/Makefile | 1 - kernel/rcu/rcu.h | 16 - kernel/rcu/rcutorture.c | 17 - kernel/rcu/srcu.c | 668 --------------------- .../selftests/rcutorture/configs/rcu/CFLIST | 1 - .../selftests/rcutorture/configs/rcu/SRCU-C | 11 - .../rcutorture/configs/rcuperf/SRCUCLASSIC | 16 - 10 files changed, 2 insertions(+), 851 deletions(-) delete mode 100644 include/linux/srcuclassic.h delete mode 100644 kernel/rcu/srcu.c delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/SRCU-C delete mode 100644 tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 5f509018e6b5..39af9bc0f653 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -60,8 +60,6 @@ int init_srcu_struct(struct srcu_struct *sp); #include #elif defined(CONFIG_TREE_SRCU) #include -#elif defined(CONFIG_CLASSIC_SRCU) -#include #elif defined(CONFIG_SRCU) #error "Unknown SRCU implementation specified to kernel configuration" #else diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h deleted file mode 100644 index 67db4a36ef0d..000000000000 --- a/include/linux/srcuclassic.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Sleepable Read-Copy Update mechanism for mutual exclusion, - * classic v4.11 variant. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * Copyright (C) IBM Corporation, 2017 - * - * Author: Paul McKenney - */ - -#ifndef _LINUX_SRCU_CLASSIC_H -#define _LINUX_SRCU_CLASSIC_H - -struct srcu_array { - unsigned long lock_count[2]; - unsigned long unlock_count[2]; -}; - -struct rcu_batch { - struct rcu_head *head, **tail; -}; - -#define RCU_BATCH_INIT(name) { NULL, &(name.head) } - -struct srcu_struct { - unsigned long completed; - struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->batch_queue, ->running */ - bool running; - /* callbacks just queued */ - struct rcu_batch batch_queue; - /* callbacks try to do the first check_zero */ - struct rcu_batch batch_check0; - /* callbacks done with the first check_zero and the flip */ - struct rcu_batch batch_check1; - struct rcu_batch batch_done; - struct delayed_work work; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -}; - -void process_srcu(struct work_struct *work); - -#define __SRCU_STRUCT_INIT(name) \ - { \ - .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ - .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ - .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ - .batch_done = RCU_BATCH_INIT(name.batch_done), \ - .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ - __SRCU_DEP_MAP_INIT(name) \ - } - -/* - * Define and initialize a srcu struct at build time. - * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. - * - * Note that although DEFINE_STATIC_SRCU() hides the name from other - * files, the per-CPU variable rules nevertheless require that the - * chosen name be globally unique. These rules also prohibit use of - * DEFINE_STATIC_SRCU() within a function. If these rules are too - * restrictive, declare the srcu_struct manually. For example, in - * each file: - * - * static struct srcu_struct my_srcu; - * - * Then, before the first use of each my_srcu, manually initialize it: - * - * init_srcu_struct(&my_srcu); - * - * See include/linux/percpu-defs.h for the rules on per-CPU variables. - */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) -#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) -#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) - -void synchronize_srcu_expedited(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); - -#endif diff --git a/init/Kconfig b/init/Kconfig index 6f257d51f582..2aa14ff40e88 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -526,32 +526,15 @@ config SRCU permits arbitrary sleeping or blocking within RCU read-side critical sections. -config CLASSIC_SRCU - bool "Use v4.11 classic SRCU implementation" - default n - depends on RCU_EXPERT && SRCU - help - This option selects the traditional well-tested classic SRCU - implementation from v4.11, as might be desired for enterprise - Linux distributions. Without this option, the shiny new - Tiny SRCU and Tree SRCU implementations are used instead. - At some point, it is hoped that Tiny SRCU and Tree SRCU - will accumulate enough test time and confidence to allow - Classic SRCU to be dropped entirely. - - Say Y if you need a rock-solid SRCU. - - Say N if you would like help test Tree SRCU. - config TINY_SRCU bool - default y if SRCU && TINY_RCU && !CLASSIC_SRCU + default y if SRCU && TINY_RCU help This option selects the single-CPU non-preemptible version of SRCU. config TREE_SRCU bool - default y if SRCU && !TINY_RCU && !CLASSIC_SRCU + default y if SRCU && !TINY_RCU help This option selects the full-fledged version of SRCU. diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 23803c7d5180..3945337c8ce4 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -3,7 +3,6 @@ KCOV_INSTRUMENT := n obj-y += update.o sync.o -obj-$(CONFIG_CLASSIC_SRCU) += srcu.o obj-$(CONFIG_TREE_SRCU) += srcutree.o obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 27f871c88e0a..d06c42deee0b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -457,22 +457,6 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type, struct srcu_struct *sp, int *flags, unsigned long *gpnum, unsigned long *completed); -#elif defined(CONFIG_CLASSIC_SRCU) - -static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, - unsigned long *gpnum, - unsigned long *completed) -{ - if (test_type != SRCU_FLAVOR) - return; - *flags = 0; - *completed = sp->completed; - *gpnum = *completed; - if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check1.head) - (*gpnum)++; -} - #endif #ifdef CONFIG_TINY_RCU diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 03cdf79e73d4..b8f7f8ce8575 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -564,31 +564,19 @@ static void srcu_torture_stats(void) int __maybe_unused cpu; int idx; -#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU) #ifdef CONFIG_TREE_SRCU idx = srcu_ctlp->srcu_idx & 0x1; -#else /* #ifdef CONFIG_TREE_SRCU */ - idx = srcu_ctlp->completed & 0x1; -#endif /* #else #ifdef CONFIG_TREE_SRCU */ pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; -#ifdef CONFIG_TREE_SRCU struct srcu_data *counts; counts = per_cpu_ptr(srcu_ctlp->sda, cpu); u0 = counts->srcu_unlock_count[!idx]; u1 = counts->srcu_unlock_count[idx]; -#else /* #ifdef CONFIG_TREE_SRCU */ - struct srcu_array *counts; - - counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); - u0 = counts->unlock_count[!idx]; - u1 = counts->unlock_count[idx]; -#endif /* #else #ifdef CONFIG_TREE_SRCU */ /* * Make sure that a lock is always counted if the corresponding @@ -596,13 +584,8 @@ static void srcu_torture_stats(void) */ smp_rmb(); -#ifdef CONFIG_TREE_SRCU l0 = counts->srcu_lock_count[!idx]; l1 = counts->srcu_lock_count[idx]; -#else /* #ifdef CONFIG_TREE_SRCU */ - l0 = counts->lock_count[!idx]; - l1 = counts->lock_count[idx]; -#endif /* #else #ifdef CONFIG_TREE_SRCU */ c0 = l0 - u0; c1 = l1 - u1; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c deleted file mode 100644 index 4e3f558409a0..000000000000 --- a/kernel/rcu/srcu.c +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Sleepable Read-Copy Update mechanism for mutual exclusion. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * Copyright (C) IBM Corporation, 2006 - * Copyright (C) Fujitsu, 2012 - * - * Author: Paul McKenney - * Lai Jiangshan - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rcu.h" - -/* - * Initialize an rcu_batch structure to empty. - */ -static inline void rcu_batch_init(struct rcu_batch *b) -{ - b->head = NULL; - b->tail = &b->head; -} - -/* - * Enqueue a callback onto the tail of the specified rcu_batch structure. - */ -static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) -{ - *b->tail = head; - b->tail = &head->next; -} - -/* - * Is the specified rcu_batch structure empty? - */ -static inline bool rcu_batch_empty(struct rcu_batch *b) -{ - return b->tail == &b->head; -} - -/* - * Remove the callback at the head of the specified rcu_batch structure - * and return a pointer to it, or return NULL if the structure is empty. - */ -static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) -{ - struct rcu_head *head; - - if (rcu_batch_empty(b)) - return NULL; - - head = b->head; - b->head = head->next; - if (b->tail == &head->next) - rcu_batch_init(b); - - return head; -} - -/* - * Move all callbacks from the rcu_batch structure specified by "from" to - * the structure specified by "to". - */ -static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) -{ - if (!rcu_batch_empty(from)) { - *to->tail = from->head; - to->tail = from->tail; - rcu_batch_init(from); - } -} - -static int init_srcu_struct_fields(struct srcu_struct *sp) -{ - sp->completed = 0; - spin_lock_init(&sp->queue_lock); - sp->running = false; - rcu_batch_init(&sp->batch_queue); - rcu_batch_init(&sp->batch_check0); - rcu_batch_init(&sp->batch_check1); - rcu_batch_init(&sp->batch_done); - INIT_DELAYED_WORK(&sp->work, process_srcu); - sp->per_cpu_ref = alloc_percpu(struct srcu_array); - return sp->per_cpu_ref ? 0 : -ENOMEM; -} - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -int __init_srcu_struct(struct srcu_struct *sp, const char *name, - struct lock_class_key *key) -{ - /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - return init_srcu_struct_fields(sp); -} -EXPORT_SYMBOL_GPL(__init_srcu_struct); - -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/** - * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. - * - * Must invoke this on a given srcu_struct before passing that srcu_struct - * to any other function. Each srcu_struct represents a separate domain - * of SRCU protection. - */ -int init_srcu_struct(struct srcu_struct *sp) -{ - return init_srcu_struct_fields(sp); -} -EXPORT_SYMBOL_GPL(init_srcu_struct); - -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ - -/* - * Returns approximate total of the readers' ->lock_count[] values for the - * rank of per-CPU counters specified by idx. - */ -static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) -{ - int cpu; - unsigned long sum = 0; - - for_each_possible_cpu(cpu) { - struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu); - - sum += READ_ONCE(cpuc->lock_count[idx]); - } - return sum; -} - -/* - * Returns approximate total of the readers' ->unlock_count[] values for the - * rank of per-CPU counters specified by idx. - */ -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) -{ - int cpu; - unsigned long sum = 0; - - for_each_possible_cpu(cpu) { - struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu); - - sum += READ_ONCE(cpuc->unlock_count[idx]); - } - return sum; -} - -/* - * Return true if the number of pre-existing readers is determined to - * be zero. - */ -static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) -{ - unsigned long unlocks; - - unlocks = srcu_readers_unlock_idx(sp, idx); - - /* - * Make sure that a lock is always counted if the corresponding unlock - * is counted. Needs to be a smp_mb() as the read side may contain a - * read from a variable that is written to before the synchronize_srcu() - * in the write side. In this case smp_mb()s A and B act like the store - * buffering pattern. - * - * This smp_mb() also pairs with smp_mb() C to prevent accesses after the - * synchronize_srcu() from being executed before the grace period ends. - */ - smp_mb(); /* A */ - - /* - * If the locks are the same as the unlocks, then there must have - * been no readers on this index at some time in between. This does not - * mean that there are no more readers, as one could have read the - * current index but not have incremented the lock counter yet. - * - * Possible bug: There is no guarantee that there haven't been ULONG_MAX - * increments of ->lock_count[] since the unlocks were counted, meaning - * that this could return true even if there are still active readers. - * Since there are no memory barriers around srcu_flip(), the CPU is not - * required to increment ->completed before running - * srcu_readers_unlock_idx(), which means that there could be an - * arbitrarily large number of critical sections that execute after - * srcu_readers_unlock_idx() but use the old value of ->completed. - */ - return srcu_readers_lock_idx(sp, idx) == unlocks; -} - -/** - * srcu_readers_active - returns true if there are readers. and false - * otherwise - * @sp: which srcu_struct to count active readers (holding srcu_read_lock). - * - * Note that this is not an atomic primitive, and can therefore suffer - * severe errors when invoked on an active srcu_struct. That said, it - * can be useful as an error check at cleanup time. - */ -static bool srcu_readers_active(struct srcu_struct *sp) -{ - int cpu; - unsigned long sum = 0; - - for_each_possible_cpu(cpu) { - struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu); - - sum += READ_ONCE(cpuc->lock_count[0]); - sum += READ_ONCE(cpuc->lock_count[1]); - sum -= READ_ONCE(cpuc->unlock_count[0]); - sum -= READ_ONCE(cpuc->unlock_count[1]); - } - return sum; -} - -/** - * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. - * - * Must invoke this only after you are finished using a given srcu_struct - * that was initialized via init_srcu_struct(). This code does some - * probabalistic checking, spotting late uses of srcu_read_lock(), - * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). - * If any such late uses are detected, the per-CPU memory associated with - * the srcu_struct is simply leaked and WARN_ON() is invoked. If the - * caller frees the srcu_struct itself, a use-after-free crash will likely - * ensue, but at least there will be a warning printed. - */ -void cleanup_srcu_struct(struct srcu_struct *sp) -{ - if (WARN_ON(srcu_readers_active(sp))) - return; /* Leakage unless caller handles error. */ - free_percpu(sp->per_cpu_ref); - sp->per_cpu_ref = NULL; -} -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); - -/* - * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. - * Returns an index that must be passed to the matching srcu_read_unlock(). - */ -int __srcu_read_lock(struct srcu_struct *sp) -{ - int idx; - - idx = READ_ONCE(sp->completed) & 0x1; - this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); - smp_mb(); /* B */ /* Avoid leaking the critical section. */ - return idx; -} -EXPORT_SYMBOL_GPL(__srcu_read_lock); - -/* - * Removes the count for the old reader from the appropriate per-CPU - * element of the srcu_struct. Note that this may well be a different - * CPU than that which was incremented by the corresponding srcu_read_lock(). - */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) -{ - smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]); -} -EXPORT_SYMBOL_GPL(__srcu_read_unlock); - -/* - * We use an adaptive strategy for synchronize_srcu() and especially for - * synchronize_srcu_expedited(). We spin for a fixed time period - * (defined below) to allow SRCU readers to exit their read-side critical - * sections. If there are still some readers after 10 microseconds, - * we repeatedly block for 1-millisecond time periods. This approach - * has done well in testing, so there is no need for a config parameter. - */ -#define SRCU_RETRY_CHECK_DELAY 5 -#define SYNCHRONIZE_SRCU_TRYCOUNT 2 -#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12 - -/* - * @@@ Wait until all pre-existing readers complete. Such readers - * will have used the index specified by "idx". - * the caller should ensures the ->completed is not changed while checking - * and idx = (->completed & 1) ^ 1 - */ -static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) -{ - for (;;) { - if (srcu_readers_active_idx_check(sp, idx)) - return true; - if (--trycount <= 0) - return false; - udelay(SRCU_RETRY_CHECK_DELAY); - } -} - -/* - * Increment the ->completed counter so that future SRCU readers will - * use the other rank of the ->(un)lock_count[] arrays. This allows - * us to wait for pre-existing readers in a starvation-free manner. - */ -static void srcu_flip(struct srcu_struct *sp) -{ - WRITE_ONCE(sp->completed, sp->completed + 1); - - /* - * Ensure that if the updater misses an __srcu_read_unlock() - * increment, that task's next __srcu_read_lock() will see the - * above counter update. Note that both this memory barrier - * and the one in srcu_readers_active_idx_check() provide the - * guarantee for __srcu_read_lock(). - */ - smp_mb(); /* D */ /* Pairs with C. */ -} - -/* - * Enqueue an SRCU callback on the specified srcu_struct structure, - * initiating grace-period processing if it is not already running. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing SRCU read-side critical section. On systems with - * more than one CPU, this means that when "func()" is invoked, each CPU - * is guaranteed to have executed a full memory barrier since the end of - * its last corresponding SRCU read-side critical section whose beginning - * preceded the call to call_rcu(). It also means that each CPU executing - * an SRCU read-side critical section that continues beyond the start of - * "func()" must have executed a memory barrier after the call_rcu() - * but before the beginning of that SRCU read-side critical section. - * Note that these guarantees include CPUs that are offline, idle, or - * executing in user mode, as well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting SRCU callback function "func()", then both CPU A and CPU - * B are guaranteed to execute a full memory barrier during the time - * interval between the call to call_rcu() and the invocation of "func()". - * This guarantee applies even if CPU A and CPU B are the same CPU (but - * again only if the system has more than one CPU). - * - * Of course, these guarantees apply only for invocations of call_srcu(), - * srcu_read_lock(), and srcu_read_unlock() that are all passed the same - * srcu_struct structure. - */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *head, - rcu_callback_t func) -{ - unsigned long flags; - - head->next = NULL; - head->func = func; - spin_lock_irqsave(&sp->queue_lock, flags); - smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ - rcu_batch_queue(&sp->batch_queue, head); - if (!sp->running) { - sp->running = true; - queue_delayed_work(system_power_efficient_wq, &sp->work, 0); - } - spin_unlock_irqrestore(&sp->queue_lock, flags); -} -EXPORT_SYMBOL_GPL(call_srcu); - -static void srcu_advance_batches(struct srcu_struct *sp, int trycount); -static void srcu_reschedule(struct srcu_struct *sp); - -/* - * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). - */ -static void __synchronize_srcu(struct srcu_struct *sp, int trycount) -{ - struct rcu_synchronize rcu; - struct rcu_head *head = &rcu.head; - bool done = false; - - RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || - lock_is_held(&rcu_bh_lock_map) || - lock_is_held(&rcu_lock_map) || - lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); - - might_sleep(); - init_completion(&rcu.completion); - - head->next = NULL; - head->func = wakeme_after_rcu; - spin_lock_irq(&sp->queue_lock); - smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */ - if (!sp->running) { - /* steal the processing owner */ - sp->running = true; - rcu_batch_queue(&sp->batch_check0, head); - spin_unlock_irq(&sp->queue_lock); - - srcu_advance_batches(sp, trycount); - if (!rcu_batch_empty(&sp->batch_done)) { - BUG_ON(sp->batch_done.head != head); - rcu_batch_dequeue(&sp->batch_done); - done = true; - } - /* give the processing owner to work_struct */ - srcu_reschedule(sp); - } else { - rcu_batch_queue(&sp->batch_queue, head); - spin_unlock_irq(&sp->queue_lock); - } - - if (!done) { - wait_for_completion(&rcu.completion); - smp_mb(); /* Caller's later accesses after GP. */ - } - -} - -/** - * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. - * - * Wait for the count to drain to zero of both indexes. To avoid the - * possible starvation of synchronize_srcu(), it waits for the count of - * the index=((->completed & 1) ^ 1) to drain to zero at first, - * and then flip the completed and wait for the count of the other index. - * - * Can block; must be called from process context. - * - * Note that it is illegal to call synchronize_srcu() from the corresponding - * SRCU read-side critical section; doing so will result in deadlock. - * However, it is perfectly legal to call synchronize_srcu() on one - * srcu_struct from some other srcu_struct's read-side critical section, - * as long as the resulting graph of srcu_structs is acyclic. - * - * There are memory-ordering constraints implied by synchronize_srcu(). - * On systems with more than one CPU, when synchronize_srcu() returns, - * each CPU is guaranteed to have executed a full memory barrier since - * the end of its last corresponding SRCU-sched read-side critical section - * whose beginning preceded the call to synchronize_srcu(). In addition, - * each CPU having an SRCU read-side critical section that extends beyond - * the return from synchronize_srcu() is guaranteed to have executed a - * full memory barrier after the beginning of synchronize_srcu() and before - * the beginning of that SRCU read-side critical section. Note that these - * guarantees include CPUs that are offline, idle, or executing in user mode, - * as well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked synchronize_srcu(), which returned - * to its caller on CPU B, then both CPU A and CPU B are guaranteed - * to have executed a full memory barrier during the execution of - * synchronize_srcu(). This guarantee applies even if CPU A and CPU B - * are the same CPU, but again only if the system has more than one CPU. - * - * Of course, these memory-ordering guarantees apply only when - * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are - * passed the same srcu_struct structure. - */ -void synchronize_srcu(struct srcu_struct *sp) -{ - __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal()) - ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT - : SYNCHRONIZE_SRCU_TRYCOUNT); -} -EXPORT_SYMBOL_GPL(synchronize_srcu); - -/** - * synchronize_srcu_expedited - Brute-force SRCU grace period - * @sp: srcu_struct with which to synchronize. - * - * Wait for an SRCU grace period to elapse, but be more aggressive about - * spinning rather than blocking when waiting. - * - * Note that synchronize_srcu_expedited() has the same deadlock and - * memory-ordering properties as does synchronize_srcu(). - */ -void synchronize_srcu_expedited(struct srcu_struct *sp) -{ - __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT); -} -EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); - -/** - * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. - * @sp: srcu_struct on which to wait for in-flight callbacks. - */ -void srcu_barrier(struct srcu_struct *sp) -{ - synchronize_srcu(sp); -} -EXPORT_SYMBOL_GPL(srcu_barrier); - -/** - * srcu_batches_completed - return batches completed. - * @sp: srcu_struct on which to report batch completion. - * - * Report the number of batches, correlated with, but not necessarily - * precisely the same as, the number of grace periods that have elapsed. - */ -unsigned long srcu_batches_completed(struct srcu_struct *sp) -{ - return sp->completed; -} -EXPORT_SYMBOL_GPL(srcu_batches_completed); - -#define SRCU_CALLBACK_BATCH 10 -#define SRCU_INTERVAL 1 - -/* - * Move any new SRCU callbacks to the first stage of the SRCU grace - * period pipeline. - */ -static void srcu_collect_new(struct srcu_struct *sp) -{ - if (!rcu_batch_empty(&sp->batch_queue)) { - spin_lock_irq(&sp->queue_lock); - rcu_batch_move(&sp->batch_check0, &sp->batch_queue); - spin_unlock_irq(&sp->queue_lock); - } -} - -/* - * Core SRCU state machine. Advance callbacks from ->batch_check0 to - * ->batch_check1 and then to ->batch_done as readers drain. - */ -static void srcu_advance_batches(struct srcu_struct *sp, int trycount) -{ - int idx = 1 ^ (sp->completed & 1); - - /* - * Because readers might be delayed for an extended period after - * fetching ->completed for their index, at any point in time there - * might well be readers using both idx=0 and idx=1. We therefore - * need to wait for readers to clear from both index values before - * invoking a callback. - */ - - if (rcu_batch_empty(&sp->batch_check0) && - rcu_batch_empty(&sp->batch_check1)) - return; /* no callbacks need to be advanced */ - - if (!try_check_zero(sp, idx, trycount)) - return; /* failed to advance, will try after SRCU_INTERVAL */ - - /* - * The callbacks in ->batch_check1 have already done with their - * first zero check and flip back when they were enqueued on - * ->batch_check0 in a previous invocation of srcu_advance_batches(). - * (Presumably try_check_zero() returned false during that - * invocation, leaving the callbacks stranded on ->batch_check1.) - * They are therefore ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); - - if (rcu_batch_empty(&sp->batch_check0)) - return; /* no callbacks need to be advanced */ - srcu_flip(sp); - - /* - * The callbacks in ->batch_check0 just finished their - * first check zero and flip, so move them to ->batch_check1 - * for future checking on the other idx. - */ - rcu_batch_move(&sp->batch_check1, &sp->batch_check0); - - /* - * SRCU read-side critical sections are normally short, so check - * at least twice in quick succession after a flip. - */ - trycount = trycount < 2 ? 2 : trycount; - if (!try_check_zero(sp, idx^1, trycount)) - return; /* failed to advance, will try after SRCU_INTERVAL */ - - /* - * The callbacks in ->batch_check1 have now waited for all - * pre-existing readers using both idx values. They are therefore - * ready to invoke, so move them to ->batch_done. - */ - rcu_batch_move(&sp->batch_done, &sp->batch_check1); -} - -/* - * Invoke a limited number of SRCU callbacks that have passed through - * their grace period. If there are more to do, SRCU will reschedule - * the workqueue. Note that needed memory barriers have been executed - * in this task's context by srcu_readers_active_idx_check(). - */ -static void srcu_invoke_callbacks(struct srcu_struct *sp) -{ - int i; - struct rcu_head *head; - - for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { - head = rcu_batch_dequeue(&sp->batch_done); - if (!head) - break; - local_bh_disable(); - head->func(head); - local_bh_enable(); - } -} - -/* - * Finished one round of SRCU grace period. Start another if there are - * more SRCU callbacks queued, otherwise put SRCU into not-running state. - */ -static void srcu_reschedule(struct srcu_struct *sp) -{ - bool pending = true; - - if (rcu_batch_empty(&sp->batch_done) && - rcu_batch_empty(&sp->batch_check1) && - rcu_batch_empty(&sp->batch_check0) && - rcu_batch_empty(&sp->batch_queue)) { - spin_lock_irq(&sp->queue_lock); - if (rcu_batch_empty(&sp->batch_done) && - rcu_batch_empty(&sp->batch_check1) && - rcu_batch_empty(&sp->batch_check0) && - rcu_batch_empty(&sp->batch_queue)) { - sp->running = false; - pending = false; - } - spin_unlock_irq(&sp->queue_lock); - } - - if (pending) - queue_delayed_work(system_power_efficient_wq, - &sp->work, SRCU_INTERVAL); -} - -/* - * This is the work-queue function that handles SRCU grace periods. - */ -void process_srcu(struct work_struct *work) -{ - struct srcu_struct *sp; - - sp = container_of(work, struct srcu_struct, work.work); - - srcu_collect_new(sp); - srcu_advance_batches(sp, 1); - srcu_invoke_callbacks(sp); - srcu_reschedule(sp); -} -EXPORT_SYMBOL_GPL(process_srcu); - -static int __init srcu_bootup_announce(void) -{ - pr_info("Classic SRCU implementation.\n"); - return 0; -} -early_initcall(srcu_bootup_announce); diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 0c1da784b8cb..6a0b9f69faad 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -7,7 +7,6 @@ TREE06 TREE07 TREE08 TREE09 -SRCU-C SRCU-N SRCU-P SRCU-t diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C deleted file mode 100644 index d4e19c087c21..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C +++ /dev/null @@ -1,11 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=3 -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_EXPERT=y -CONFIG_CLASSIC_SRCU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC b/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC deleted file mode 100644 index a1395af60ef4..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/SRCUCLASSIC +++ /dev/null @@ -1,16 +0,0 @@ -CONFIG_SMP=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_LOCKING=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_RCU_EXPERT=y -CONFIG_RCU_TRACE=y -CONFIG_CLASSIC_SRCU=y -- cgit v1.2.1 From ae91aa0adb14dc33114d566feca2f7cb7a96b8b7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 15 May 2017 15:30:32 -0700 Subject: rcu: Remove debugfs tracing RCU's debugfs tracing used to be the only reasonable low-level debug information available, but ftrace and event tracing has since surpassed the RCU debugfs level of usefulness. This commit therefore removes RCU's debugfs tracing. Signed-off-by: Paul E. McKenney --- Documentation/RCU/00-INDEX | 2 - .../RCU/Design/Requirements/Requirements.html | 2 +- Documentation/RCU/trace.txt | 535 --------------------- init/Kconfig | 8 - kernel/rcu/Makefile | 1 - kernel/rcu/tiny_plugin.h | 45 -- kernel/rcu/tree.h | 27 -- kernel/rcu/tree_plugin.h | 31 +- kernel/rcu/tree_trace.c | 494 ------------------- lib/Kconfig.debug | 5 +- .../selftests/rcutorture/configs/rcu/TREE02-T | 21 - .../selftests/rcutorture/configs/rcu/TREE08-T | 21 - .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 4 - 13 files changed, 4 insertions(+), 1192 deletions(-) delete mode 100644 Documentation/RCU/trace.txt delete mode 100644 kernel/rcu/tree_trace.c delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TREE02-T delete mode 100644 tools/testing/selftests/rcutorture/configs/rcu/TREE08-T diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 1672573b037a..f46980c060aa 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -28,8 +28,6 @@ stallwarn.txt - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress) torture.txt - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) -trace.txt - - CONFIG_RCU_TRACE debugfs files and formats UP.txt - RCU on Uniprocessor Systems whatisRCU.txt diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 0e6550a8c926..95b30fa25d56 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2034,7 +2034,7 @@ guard against mishaps and misuse: some other synchronization mechanism, for example, reference counting.

  • In kernels built with CONFIG_RCU_TRACE=y, RCU-related - information is provided via both debugfs and event tracing. + information is provided via event tracing.
  • Open-coded use of rcu_assign_pointer() and rcu_dereference() to create typical linked data structures can be surprisingly error-prone. diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt deleted file mode 100644 index 6549012033f9..000000000000 --- a/Documentation/RCU/trace.txt +++ /dev/null @@ -1,535 +0,0 @@ -CONFIG_RCU_TRACE debugfs Files and Formats - - -The rcutree and rcutiny implementations of RCU provide debugfs trace -output that summarizes counters and state. This information is useful for -debugging RCU itself, and can sometimes also help to debug abuses of RCU. -The following sections describe the debugfs files and formats, first -for rcutree and next for rcutiny. - - -CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats - -These implementations of RCU provide several debugfs directories under the -top-level directory "rcu": - -rcu/rcu_bh -rcu/rcu_preempt -rcu/rcu_sched - -Each directory contains files for the corresponding flavor of RCU. -Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU. -For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor, -so that activity for both appears in rcu/rcu_sched. - -In addition, the following file appears in the top-level directory: -rcu/rcutorture. This file displays rcutorture test progress. The output -of "cat rcu/rcutorture" looks as follows: - -rcutorture test sequence: 0 (test in progress) -rcutorture update version number: 615 - -The first line shows the number of rcutorture tests that have completed -since boot. If a test is currently running, the "(test in progress)" -string will appear as shown above. The second line shows the number of -update cycles that the current test has started, or zero if there is -no test in progress. - - -Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly -also rcu/rcu_preempt) the following files will be present: - -rcudata: - Displays fields in struct rcu_data. -rcuexp: - Displays statistics for expedited grace periods. -rcugp: - Displays grace-period counters. -rcuhier: - Displays the struct rcu_node hierarchy. -rcu_pending: - Displays counts of the reasons rcu_pending() decided that RCU had - work to do. -rcuboost: - Displays RCU boosting statistics. Only present if - CONFIG_RCU_BOOST=y. - -The output of "cat rcu/rcu_preempt/rcudata" looks as follows: - - 0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 - 1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 - 2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 - 3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 - 4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 - 5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 - 6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 - 7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 - -This file has one line per CPU, or eight for this 8-CPU system. -The fields are as follows: - -o The number at the beginning of each line is the CPU number. - CPUs numbers followed by an exclamation mark are offline, - but have been online at least once since boot. There will be - no output for CPUs that have never been online, which can be - a good thing in the surprisingly common case where NR_CPUS is - substantially larger than the number of actual CPUs. - -o "c" is the count of grace periods that this CPU believes have - completed. Offlined CPUs and CPUs in dynticks idle mode may lag - quite a ways behind, for example, CPU 4 under "rcu_sched" above, - which has been offline through 16 RCU grace periods. It is not - unusual to see offline CPUs lagging by thousands of grace periods. - Note that although the grace-period number is an unsigned long, - it is printed out as a signed long to allow more human-friendly - representation near boot time. - -o "g" is the count of grace periods that this CPU believes have - started. Again, offlined CPUs and CPUs in dynticks idle mode - may lag behind. If the "c" and "g" values are equal, this CPU - has already reported a quiescent state for the last RCU grace - period that it is aware of, otherwise, the CPU believes that it - owes RCU a quiescent state. - -o "pq" indicates that this CPU has passed through a quiescent state - for the current grace period. It is possible for "pq" to be - "1" and "c" different than "g", which indicates that although - the CPU has passed through a quiescent state, either (1) this - CPU has not yet reported that fact, (2) some other CPU has not - yet reported for this grace period, or (3) both. - -o "qp" indicates that RCU still expects a quiescent state from - this CPU. Offlined CPUs and CPUs in dyntick idle mode might - well have qp=1, which is OK: RCU is still ignoring them. - -o "dt" is the current value of the dyntick counter that is incremented - when entering or leaving idle, either due to a context switch or - due to an interrupt. This number is even if the CPU is in idle - from RCU's viewpoint and odd otherwise. The number after the - first "/" is the interrupt nesting depth when in idle state, - or a large number added to the interrupt-nesting depth when - running a non-idle task. Some architectures do not accurately - count interrupt nesting when running in non-idle kernel context, - which can result in interesting anomalies such as negative - interrupt-nesting levels. The number after the second "/" - is the NMI nesting depth. - -o "df" is the number of times that some other CPU has forced a - quiescent state on behalf of this CPU due to this CPU being in - idle state. - -o "of" is the number of times that some other CPU has forced a - quiescent state on behalf of this CPU due to this CPU being - offline. In a perfect world, this might never happen, but it - turns out that offlining and onlining a CPU can take several grace - periods, and so there is likely to be an extended period of time - when RCU believes that the CPU is online when it really is not. - Please note that erring in the other direction (RCU believing a - CPU is offline when it is really alive and kicking) is a fatal - error, so it makes sense to err conservatively. - -o "ql" is the number of RCU callbacks currently residing on - this CPU. The first number is the number of "lazy" callbacks - that are known to RCU to only be freeing memory, and the number - after the "/" is the total number of callbacks, lazy or not. - These counters count callbacks regardless of what phase of - grace-period processing that they are in (new, waiting for - grace period to start, waiting for grace period to end, ready - to invoke). - -o "qs" gives an indication of the state of the callback queue - with four characters: - - "N" Indicates that there are callbacks queued that are not - ready to be handled by the next grace period, and thus - will be handled by the grace period following the next - one. - - "R" Indicates that there are callbacks queued that are - ready to be handled by the next grace period. - - "W" Indicates that there are callbacks queued that are - waiting on the current grace period. - - "D" Indicates that there are callbacks queued that have - already been handled by a prior grace period, and are - thus waiting to be invoked. Note that callbacks in - the process of being invoked are not counted here. - Callbacks in the process of being invoked are those - that have been removed from the rcu_data structures - queues by rcu_do_batch(), but which have not yet been - invoked. - - If there are no callbacks in a given one of the above states, - the corresponding character is replaced by ".". - -o "b" is the batch limit for this CPU. If more than this number - of RCU callbacks is ready to invoke, then the remainder will - be deferred. - -o "ci" is the number of RCU callbacks that have been invoked for - this CPU. Note that ci+nci+ql is the number of callbacks that have - been registered in absence of CPU-hotplug activity. - -o "nci" is the number of RCU callbacks that have been offloaded from - this CPU. This will always be zero unless the kernel was built - with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot - parameter was specified. - -o "co" is the number of RCU callbacks that have been orphaned due to - this CPU going offline. These orphaned callbacks have been moved - to an arbitrarily chosen online CPU. - -o "ca" is the number of RCU callbacks that have been adopted by this - CPU due to other CPUs going offline. Note that ci+co-ca+ql is - the number of RCU callbacks registered on this CPU. - - -Kernels compiled with CONFIG_RCU_BOOST=y display the following from -/debug/rcu/rcu_preempt/rcudata: - - 0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 - 1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 - 2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 - 3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 - 4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 - 5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 - 6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 - 7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 - -This is similar to the output discussed above, but contains the following -additional fields: - -o "kt" is the per-CPU kernel-thread state. The digit preceding - the first slash is zero if there is no work pending and 1 - otherwise. The character between the first pair of slashes is - as follows: - - "S" The kernel thread is stopped, in other words, all - CPUs corresponding to this rcu_node structure are - offline. - - "R" The kernel thread is running. - - "W" The kernel thread is waiting because there is no work - for it to do. - - "O" The kernel thread is waiting because it has been - forced off of its designated CPU or because its - ->cpus_allowed mask permits it to run on other than - its designated CPU. - - "Y" The kernel thread is yielding to avoid hogging CPU. - - "?" Unknown value, indicates a bug. - - The number after the final slash is the CPU that the kthread - is actually running on. - - This field is displayed only for CONFIG_RCU_BOOST kernels. - -o "ktl" is the low-order 16 bits (in hexadecimal) of the count of - the number of times that this CPU's per-CPU kthread has gone - through its loop servicing invoke_rcu_cpu_kthread() requests. - - This field is displayed only for CONFIG_RCU_BOOST kernels. - - -The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: - -s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872 - -These fields are as follows: - -o "s" is the sequence number, with an odd number indicating that - an expedited grace period is in progress. - -o "wd1", "wd2", and "wd3" are the number of times that an attempt - to start an expedited grace period found that someone else had - completed an expedited grace period that satisfies the attempted - request. "Our work is done." - -o "enq" is the number of quiescent states still outstanding. - -o "sc" is the number of times that the attempt to start a - new expedited grace period succeeded. - - -The output of "cat rcu/rcu_preempt/rcugp" looks as follows: - -completed=31249 gpnum=31250 age=1 max=18 - -These fields are taken from the rcu_state structure, and are as follows: - -o "completed" is the number of grace periods that have completed. - It is comparable to the "c" field from rcu/rcudata in that a - CPU whose "c" field matches the value of "completed" is aware - that the corresponding RCU grace period has completed. - -o "gpnum" is the number of grace periods that have started. It is - similarly comparable to the "g" field from rcu/rcudata in that - a CPU whose "g" field matches the value of "gpnum" is aware that - the corresponding RCU grace period has started. - - If these two fields are equal, then there is no grace period - in progress, in other words, RCU is idle. On the other hand, - if the two fields differ (as they are above), then an RCU grace - period is in progress. - -o "age" is the number of jiffies that the current grace period - has extended for, or zero if there is no grace period currently - in effect. - -o "max" is the age in jiffies of the longest-duration grace period - thus far. - -The output of "cat rcu/rcu_preempt/rcuhier" looks as follows: - -c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0 -3/3 ..>. 0:7 ^0 -e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1 - -The fields are as follows: - -o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp. - -o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp. - -o "s" is the current state of the force_quiescent_state() - state machine. - -o "jfq" is the number of jiffies remaining for this grace period - before force_quiescent_state() is invoked to help push things - along. Note that CPUs in idle mode throughout the grace period - will not report on their own, but rather must be check by some - other CPU via force_quiescent_state(). - -o "j" is the low-order four hex digits of the jiffies counter. - Yes, Paul did run into a number of problems that turned out to - be due to the jiffies counter no longer counting. Why do you ask? - -o "nfqs" is the number of calls to force_quiescent_state() since - boot. - -o "nfqsng" is the number of useless calls to force_quiescent_state(), - where there wasn't actually a grace period active. This can - no longer happen due to grace-period processing being pushed - into a kthread. The number in parentheses is the difference - between "nfqs" and "nfqsng", or the number of times that - force_quiescent_state() actually did some real work. - -o "fqlh" is the number of calls to force_quiescent_state() that - exited immediately (without even being counted in nfqs above) - due to contention on ->fqslock. - -o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node - structure. Each line represents one level of the hierarchy, - from root to leaves. It is best to think of the rcu_data - structures as forming yet another level after the leaves. - Note that there might be either one, two, three, or even four - levels of rcu_node structures, depending on the relationship - between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly - adjusted using the rcu_fanout_leaf kernel boot parameter), and - CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of - possible CPUs for the booting hardware). - - o The numbers separated by the "/" are the qsmask followed - by the qsmaskinit. The qsmask will have one bit - set for each entity in the next lower level that has - not yet checked in for the current grace period ("e" - indicating CPUs 5, 6, and 7 in the example above). - The qsmaskinit will have one bit for each entity that is - currently expected to check in during each grace period. - The value of qsmaskinit is assigned to that of qsmask - at the beginning of each grace period. - - o The characters separated by the ">" indicate the state - of the blocked-tasks lists. A "G" preceding the ">" - indicates that at least one task blocked in an RCU - read-side critical section blocks the current grace - period, while a "E" preceding the ">" indicates that - at least one task blocked in an RCU read-side critical - section blocks the current expedited grace period. - A "T" character following the ">" indicates that at - least one task is blocked within an RCU read-side - critical section, regardless of whether any current - grace period (expedited or normal) is inconvenienced. - A "." character appears if the corresponding condition - does not hold, so that "..>." indicates that no tasks - are blocked. In contrast, "GE>T" indicates maximal - inconvenience from blocked tasks. CONFIG_TREE_RCU - builds of the kernel will always show "..>.". - - o The numbers separated by the ":" are the range of CPUs - served by this struct rcu_node. This can be helpful - in working out how the hierarchy is wired together. - - For example, the example rcu_node structure shown above - has "0:7", indicating that it covers CPUs 0 through 7. - - o The number after the "^" indicates the bit in the - next higher level rcu_node structure that this rcu_node - structure corresponds to. For example, the "d/d ..>. 4:7 - ^1" has a "1" in this position, indicating that it - corresponds to the "1" bit in the "3" shown in the - "3/3 ..>. 0:7 ^0" entry on the next level up. - - -The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: - - 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0 - 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0 - 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0 - 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0 - 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0 - 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0 - 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0 - 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0 - -The fields are as follows: - -o The leading number is the CPU number, with "!" indicating - an offline CPU. - -o "np" is the number of times that __rcu_pending() has been invoked - for the corresponding flavor of RCU. - -o "qsp" is the number of times that the RCU was waiting for a - quiescent state from this CPU. - -o "rpq" is the number of times that the CPU had passed through - a quiescent state, but not yet reported it to RCU. - -o "cbr" is the number of times that this CPU had RCU callbacks - that had passed through a grace period, and were thus ready - to be invoked. - -o "cng" is the number of times that this CPU needed another - grace period while RCU was idle. - -o "gpc" is the number of times that an old grace period had - completed, but this CPU was not yet aware of it. - -o "gps" is the number of times that a new grace period had started, - but this CPU was not yet aware of it. - -o "ndw" is the number of times that a wakeup of an rcuo - callback-offload kthread had to be deferred in order to avoid - deadlock. - -o "nn" is the number of times that this CPU needed nothing. - - -The output of "cat rcu/rcuboost" looks as follows: - -0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 - balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0 -4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894 - balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0 - -This information is output only for rcu_preempt. Each two-line entry -corresponds to a leaf rcu_node structure. The fields are as follows: - -o "n:m" is the CPU-number range for the corresponding two-line - entry. In the sample output above, the first entry covers - CPUs zero through three and the second entry covers CPUs four - through seven. - -o "tasks=TNEB" gives the state of the various segments of the - rnp->blocked_tasks list: - - "T" This indicates that there are some tasks that blocked - while running on one of the corresponding CPUs while - in an RCU read-side critical section. - - "N" This indicates that some of the blocked tasks are preventing - the current normal (non-expedited) grace period from - completing. - - "E" This indicates that some of the blocked tasks are preventing - the current expedited grace period from completing. - - "B" This indicates that some of the blocked tasks are in - need of RCU priority boosting. - - Each character is replaced with "." if the corresponding - condition does not hold. - -o "kt" is the state of the RCU priority-boosting kernel - thread associated with the corresponding rcu_node structure. - The state can be one of the following: - - "S" The kernel thread is stopped, in other words, all - CPUs corresponding to this rcu_node structure are - offline. - - "R" The kernel thread is running. - - "W" The kernel thread is waiting because there is no work - for it to do. - - "Y" The kernel thread is yielding to avoid hogging CPU. - - "?" Unknown value, indicates a bug. - -o "ntb" is the number of tasks boosted. - -o "neb" is the number of tasks boosted in order to complete an - expedited grace period. - -o "nnb" is the number of tasks boosted in order to complete a - normal (non-expedited) grace period. When boosting a task - that was blocking both an expedited and a normal grace period, - it is counted against the expedited total above. - -o "j" is the low-order 16 bits of the jiffies counter in - hexadecimal. - -o "bt" is the low-order 16 bits of the value that the jiffies - counter will have when we next start boosting, assuming that - the current grace period does not end beforehand. This is - also in hexadecimal. - -o "balk: nt" counts the number of times we didn't boost (in - other words, we balked) even though it was time to boost because - there were no blocked tasks to boost. This situation occurs - when there is one blocked task on one rcu_node structure and - none on some other rcu_node structure. - -o "egt" counts the number of times we balked because although - there were blocked tasks, none of them were blocking the - current grace period, whether expedited or otherwise. - -o "bt" counts the number of times we balked because boosting - had already been initiated for the current grace period. - -o "nb" counts the number of times we balked because there - was at least one task blocking the current non-expedited grace - period that never had blocked. If it is already running, it - just won't help to boost its priority! - -o "ny" counts the number of times we balked because it was - not yet time to start boosting. - -o "nos" counts the number of times we balked for other - reasons, e.g., the grace period ended first. - - -CONFIG_TINY_RCU debugfs Files and Formats - -These implementations of RCU provides a single debugfs file under the -top-level directory RCU, namely rcu/rcudata, which displays fields in -rcu_bh_ctrlblk and rcu_sched_ctrlblk. - -The output of "cat rcu/rcudata" is as follows: - -rcu_sched: qlen: 0 -rcu_bh: qlen: 0 - -This is split into rcu_sched and rcu_bh sections. The field is as -follows: - -o "qlen" is the number of RCU callbacks currently waiting either - for an RCU grace period or waiting to be invoked. This is the - only field present for rcu_sched and rcu_bh, due to the - short-circuiting of grace period in those two cases. diff --git a/init/Kconfig b/init/Kconfig index 2aa14ff40e88..3025383ab443 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -659,14 +659,6 @@ config RCU_FAST_NO_HZ Say N if you are unsure. -config TREE_RCU_TRACE - def_bool RCU_TRACE && ( TREE_RCU || PREEMPT_RCU ) - select DEBUG_FS - help - This option provides tracing for the TREE_RCU and - PREEMPT_RCU implementations, permitting Makefile to - trivially select kernel/rcutree_trace.c. - config RCU_BOOST bool "Enable RCU priority boosting" depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 3945337c8ce4..13c0fc852767 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -9,6 +9,5 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_PREEMPT_RCU) += tree.o -obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o obj-$(CONFIG_TINY_RCU) += tiny.o obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 371034e77f87..c642f23f1582 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -24,8 +24,6 @@ #include #include -#include -#include /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { @@ -87,49 +85,6 @@ static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) local_irq_restore(flags); } -/* - * Dump statistics for TINY_RCU, such as they are. - */ -static int show_tiny_stats(struct seq_file *m, void *unused) -{ - seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); - seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); - return 0; -} - -static int show_tiny_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_tiny_stats, NULL); -} - -static const struct file_operations show_tiny_stats_fops = { - .owner = THIS_MODULE, - .open = show_tiny_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct dentry *rcudir; - -static int __init rcutiny_trace_init(void) -{ - struct dentry *retval; - - rcudir = debugfs_create_dir("rcu", NULL); - if (!rcudir) - goto free_out; - retval = debugfs_create_file("rcudata", 0444, rcudir, - NULL, &show_tiny_stats_fops); - if (!retval) - goto free_out; - return 0; -free_out: - debugfs_remove_recursive(rcudir); - return 1; -} -device_initcall(rcutiny_trace_init); - static void check_cpu_stall(struct rcu_ctrlblk *rcp) { unsigned long j; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2c112bb11aa8..9af0f31d6847 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -152,19 +152,6 @@ struct rcu_node { /* Number of tasks boosted for expedited GP. */ unsigned long n_normal_boosts; /* Number of tasks boosted for normal GP. */ - unsigned long n_balk_blkd_tasks; - /* Refused to boost: no blocked tasks. */ - unsigned long n_balk_exp_gp_tasks; - /* Refused to boost: nothing blocking GP. */ - unsigned long n_balk_boost_tasks; - /* Refused to boost: already boosting. */ - unsigned long n_balk_notblocked; - /* Refused to boost: RCU RS CS still running. */ - unsigned long n_balk_notyet; - /* Refused to boost: not yet time. */ - unsigned long n_balk_nos; - /* Refused to boost: not sure why, though. */ - /* This can happen due to race conditions. */ #ifdef CONFIG_RCU_NOCB_CPU struct swait_queue_head nocb_gp_wq[2]; /* Place for rcu_nocb_kthread() to wait GP. */ @@ -535,17 +522,3 @@ void srcu_offline_cpu(unsigned int cpu) { } #endif /* #else #ifdef CONFIG_SRCU */ #endif /* #ifndef RCU_TREE_NONCORE */ - -#ifdef CONFIG_RCU_TRACE -/* Read out queue lengths for tracing. */ -static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) -{ -#ifdef CONFIG_RCU_NOCB_CPU - *ql = atomic_long_read(&rdp->nocb_q_count); - *qll = atomic_long_read(&rdp->nocb_q_count_lazy); -#else /* #ifdef CONFIG_RCU_NOCB_CPU */ - *ql = 0; - *qll = 0; -#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ -} -#endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7f5919ab24c4..43f2f8026b4a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -70,7 +70,7 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ static void __init rcu_bootup_announce_oddness(void) { if (IS_ENABLED(CONFIG_RCU_TRACE)) - pr_info("\tRCU debugfs-based tracing is enabled.\n"); + pr_info("\tRCU event tracing is enabled.\n"); if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", @@ -899,33 +899,6 @@ void exit_rcu(void) #include "../locking/rtmutex_common.h" -#ifdef CONFIG_RCU_TRACE - -static void rcu_initiate_boost_trace(struct rcu_node *rnp) -{ - if (!rcu_preempt_has_tasks(rnp)) - rnp->n_balk_blkd_tasks++; - else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) - rnp->n_balk_exp_gp_tasks++; - else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) - rnp->n_balk_boost_tasks++; - else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) - rnp->n_balk_notblocked++; - else if (rnp->gp_tasks != NULL && - ULONG_CMP_LT(jiffies, rnp->boost_time)) - rnp->n_balk_notyet++; - else - rnp->n_balk_nos++; -} - -#else /* #ifdef CONFIG_RCU_TRACE */ - -static void rcu_initiate_boost_trace(struct rcu_node *rnp) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - static void rcu_wake_cond(struct task_struct *t, int status) { /* @@ -1058,7 +1031,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) lockdep_assert_held(&rnp->lock); if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { - rnp->n_balk_exp_gp_tasks++; raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } @@ -1074,7 +1046,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) if (t) rcu_wake_cond(t, rnp->boost_kthread_status); } else { - rcu_initiate_boost_trace(rnp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c deleted file mode 100644 index 6cea17a1ea30..000000000000 --- a/kernel/rcu/tree_trace.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Read-Copy Update tracing for hierarchical implementation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * Copyright IBM Corporation, 2008 - * Author: Paul E. McKenney - * - * Papers: http://www.rdrop.com/users/paulmck/RCU - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define RCU_TREE_NONCORE -#include "tree.h" -#include "rcu.h" - -static int r_open(struct inode *inode, struct file *file, - const struct seq_operations *op) -{ - int ret = seq_open(file, op); - if (!ret) { - struct seq_file *m = (struct seq_file *)file->private_data; - m->private = inode->i_private; - } - return ret; -} - -static void *r_start(struct seq_file *m, loff_t *pos) -{ - struct rcu_state *rsp = (struct rcu_state *)m->private; - *pos = cpumask_next(*pos - 1, cpu_possible_mask); - if ((*pos) < nr_cpu_ids) - return per_cpu_ptr(rsp->rda, *pos); - return NULL; -} - -static void *r_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return r_start(m, pos); -} - -static void r_stop(struct seq_file *m, void *v) -{ -} - -static int show_rcubarrier(struct seq_file *m, void *v) -{ - struct rcu_state *rsp = (struct rcu_state *)m->private; - seq_printf(m, "bcc: %d bseq: %lu\n", - atomic_read(&rsp->barrier_cpu_count), - rsp->barrier_sequence); - return 0; -} - -static int rcubarrier_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcubarrier, inode->i_private); -} - -static const struct file_operations rcubarrier_fops = { - .owner = THIS_MODULE, - .open = rcubarrier_open, - .read = seq_read, - .llseek = no_llseek, - .release = single_release, -}; - -#ifdef CONFIG_RCU_BOOST - -static char convert_kthread_status(unsigned int kthread_status) -{ - if (kthread_status > RCU_KTHREAD_MAX) - return '?'; - return "SRWOY"[kthread_status]; -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ - -static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) -{ - long ql, qll; - - if (!rdp->beenonline) - return; - seq_printf(m, "%3d%cc=%ld g=%ld cnq=%d/%d:%d", - rdp->cpu, - cpu_is_offline(rdp->cpu) ? '!' : ' ', - ulong2long(rdp->completed), ulong2long(rdp->gpnum), - rdp->cpu_no_qs.b.norm, - rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu), - rdp->core_needs_qs); - seq_printf(m, " dt=%d/%llx/%d df=%lu", - rcu_dynticks_snap(rdp->dynticks), - rdp->dynticks->dynticks_nesting, - rdp->dynticks->dynticks_nmi_nesting, - rdp->dynticks_fqs); - seq_printf(m, " of=%lu", rdp->offline_fqs); - rcu_nocb_q_lengths(rdp, &ql, &qll); - qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist); - ql += rcu_segcblist_n_cbs(&rdp->cblist); - seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", - qll, ql, - ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)], - ".R"[!rcu_segcblist_segempty(&rdp->cblist, - RCU_NEXT_READY_TAIL)], - ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)], - ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]); -#ifdef CONFIG_RCU_BOOST - seq_printf(m, " kt=%d/%c ktl=%x", - per_cpu(rcu_cpu_has_work, rdp->cpu), - convert_kthread_status(per_cpu(rcu_cpu_kthread_status, - rdp->cpu)), - per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); -#endif /* #ifdef CONFIG_RCU_BOOST */ - seq_printf(m, " b=%ld", rdp->blimit); - seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n", - rdp->n_cbs_invoked, rdp->n_nocbs_invoked, - rdp->n_cbs_orphaned, rdp->n_cbs_adopted); -} - -static int show_rcudata(struct seq_file *m, void *v) -{ - print_one_rcu_data(m, (struct rcu_data *)v); - return 0; -} - -static const struct seq_operations rcudate_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = show_rcudata, -}; - -static int rcudata_open(struct inode *inode, struct file *file) -{ - return r_open(inode, file, &rcudate_op); -} - -static const struct file_operations rcudata_fops = { - .owner = THIS_MODULE, - .open = rcudata_open, - .read = seq_read, - .llseek = no_llseek, - .release = seq_release, -}; - -static int show_rcuexp(struct seq_file *m, void *v) -{ - int cpu; - struct rcu_state *rsp = (struct rcu_state *)m->private; - struct rcu_data *rdp; - unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; - - for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rsp->rda, cpu); - s0 += atomic_long_read(&rdp->exp_workdone0); - s1 += atomic_long_read(&rdp->exp_workdone1); - s2 += atomic_long_read(&rdp->exp_workdone2); - s3 += atomic_long_read(&rdp->exp_workdone3); - } - seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n", - rsp->expedited_sequence, s0, s1, s2, s3, - atomic_read(&rsp->expedited_need_qs), - rsp->expedited_sequence / 2); - return 0; -} - -static int rcuexp_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcuexp, inode->i_private); -} - -static const struct file_operations rcuexp_fops = { - .owner = THIS_MODULE, - .open = rcuexp_open, - .read = seq_read, - .llseek = no_llseek, - .release = single_release, -}; - -#ifdef CONFIG_RCU_BOOST - -static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp) -{ - seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ", - rnp->grplo, rnp->grphi, - "T."[list_empty(&rnp->blkd_tasks)], - "N."[!rnp->gp_tasks], - "E."[!rnp->exp_tasks], - "B."[!rnp->boost_tasks], - convert_kthread_status(rnp->boost_kthread_status), - rnp->n_tasks_boosted, rnp->n_exp_boosts, - rnp->n_normal_boosts); - seq_printf(m, "j=%04x bt=%04x\n", - (int)(jiffies & 0xffff), - (int)(rnp->boost_time & 0xffff)); - seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n", - rnp->n_balk_blkd_tasks, - rnp->n_balk_exp_gp_tasks, - rnp->n_balk_boost_tasks, - rnp->n_balk_notblocked, - rnp->n_balk_notyet, - rnp->n_balk_nos); -} - -static int show_rcu_node_boost(struct seq_file *m, void *unused) -{ - struct rcu_node *rnp; - - rcu_for_each_leaf_node(&rcu_preempt_state, rnp) - print_one_rcu_node_boost(m, rnp); - return 0; -} - -static int rcu_node_boost_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcu_node_boost, NULL); -} - -static const struct file_operations rcu_node_boost_fops = { - .owner = THIS_MODULE, - .open = rcu_node_boost_open, - .read = seq_read, - .llseek = no_llseek, - .release = single_release, -}; - -#endif /* #ifdef CONFIG_RCU_BOOST */ - -static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) -{ - unsigned long gpnum; - int level = 0; - struct rcu_node *rnp; - - gpnum = rsp->gpnum; - seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ", - ulong2long(rsp->completed), ulong2long(gpnum), - rsp->gp_state, - (long)(rsp->jiffies_force_qs - jiffies), - (int)(jiffies & 0xffff)); - seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", - rsp->n_force_qs, rsp->n_force_qs_ngp, - rsp->n_force_qs - rsp->n_force_qs_ngp, - READ_ONCE(rsp->n_force_qs_lh), - rsp->orphan_done.len_lazy, - rsp->orphan_done.len); - for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { - if (rnp->level != level) { - seq_puts(m, "\n"); - level = rnp->level; - } - seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d ", - rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext, - ".G"[rnp->gp_tasks != NULL], - ".E"[rnp->exp_tasks != NULL], - ".T"[!list_empty(&rnp->blkd_tasks)], - rnp->grplo, rnp->grphi, rnp->grpnum); - } - seq_puts(m, "\n"); -} - -static int show_rcuhier(struct seq_file *m, void *v) -{ - struct rcu_state *rsp = (struct rcu_state *)m->private; - print_one_rcu_state(m, rsp); - return 0; -} - -static int rcuhier_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcuhier, inode->i_private); -} - -static const struct file_operations rcuhier_fops = { - .owner = THIS_MODULE, - .open = rcuhier_open, - .read = seq_read, - .llseek = no_llseek, - .release = single_release, -}; - -static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) -{ - unsigned long flags; - unsigned long completed; - unsigned long gpnum; - unsigned long gpage; - unsigned long gpmax; - struct rcu_node *rnp = &rsp->node[0]; - - raw_spin_lock_irqsave_rcu_node(rnp, flags); - completed = READ_ONCE(rsp->completed); - gpnum = READ_ONCE(rsp->gpnum); - if (completed == gpnum) - gpage = 0; - else - gpage = jiffies - rsp->gp_start; - gpmax = rsp->gp_max; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - seq_printf(m, "completed=%ld gpnum=%ld age=%ld max=%ld\n", - ulong2long(completed), ulong2long(gpnum), gpage, gpmax); -} - -static int show_rcugp(struct seq_file *m, void *v) -{ - struct rcu_state *rsp = (struct rcu_state *)m->private; - show_one_rcugp(m, rsp); - return 0; -} - -static int rcugp_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcugp, inode->i_private); -} - -static const struct file_operations rcugp_fops = { - .owner = THIS_MODULE, - .open = rcugp_open, - .read = seq_read, - .llseek = no_llseek, - .release = single_release, -}; - -static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) -{ - if (!rdp->beenonline) - return; - seq_printf(m, "%3d%cnp=%ld ", - rdp->cpu, - cpu_is_offline(rdp->cpu) ? '!' : ' ', - rdp->n_rcu_pending); - seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ", - rdp->n_rp_core_needs_qs, - rdp->n_rp_report_qs, - rdp->n_rp_cb_ready, - rdp->n_rp_cpu_needs_gp); - seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n", - rdp->n_rp_gp_completed, - rdp->n_rp_gp_started, - rdp->n_rp_nocb_defer_wakeup, - rdp->n_rp_need_nothing); -} - -static int show_rcu_pending(struct seq_file *m, void *v) -{ - print_one_rcu_pending(m, (struct rcu_data *)v); - return 0; -} - -static const struct seq_operations rcu_pending_op = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = show_rcu_pending, -}; - -static int rcu_pending_open(struct inode *inode, struct file *file) -{ - return r_open(inode, file, &rcu_pending_op); -} - -static const struct file_operations rcu_pending_fops = { - .owner = THIS_MODULE, - .open = rcu_pending_open, - .read = seq_read, - .llseek = no_llseek, - .release = seq_release, -}; - -static int show_rcutorture(struct seq_file *m, void *unused) -{ - seq_printf(m, "rcutorture test sequence: %lu %s\n", - rcutorture_testseq >> 1, - (rcutorture_testseq & 0x1) ? "(test in progress)" : ""); - seq_printf(m, "rcutorture update version number: %lu\n", - rcutorture_vernum); - return 0; -} - -static int rcutorture_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_rcutorture, NULL); -} - -static const struct file_operations rcutorture_fops = { - .owner = THIS_MODULE, - .open = rcutorture_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct dentry *rcudir; - -static int __init rcutree_trace_init(void) -{ - struct rcu_state *rsp; - struct dentry *retval; - struct dentry *rspdir; - - rcudir = debugfs_create_dir("rcu", NULL); - if (!rcudir) - goto free_out; - - for_each_rcu_flavor(rsp) { - rspdir = debugfs_create_dir(rsp->name, rcudir); - if (!rspdir) - goto free_out; - - retval = debugfs_create_file("rcudata", 0444, - rspdir, rsp, &rcudata_fops); - if (!retval) - goto free_out; - - retval = debugfs_create_file("rcuexp", 0444, - rspdir, rsp, &rcuexp_fops); - if (!retval) - goto free_out; - - retval = debugfs_create_file("rcu_pending", 0444, - rspdir, rsp, &rcu_pending_fops); - if (!retval) - goto free_out; - - retval = debugfs_create_file("rcubarrier", 0444, - rspdir, rsp, &rcubarrier_fops); - if (!retval) - goto free_out; - -#ifdef CONFIG_RCU_BOOST - if (rsp == &rcu_preempt_state) { - retval = debugfs_create_file("rcuboost", 0444, - rspdir, NULL, &rcu_node_boost_fops); - if (!retval) - goto free_out; - } -#endif - - retval = debugfs_create_file("rcugp", 0444, - rspdir, rsp, &rcugp_fops); - if (!retval) - goto free_out; - - retval = debugfs_create_file("rcuhier", 0444, - rspdir, rsp, &rcuhier_fops); - if (!retval) - goto free_out; - } - - retval = debugfs_create_file("rcutorture", 0444, rcudir, - NULL, &rcutorture_fops); - if (!retval) - goto free_out; - return 0; -free_out: - debugfs_remove_recursive(rcudir); - return 1; -} -device_initcall(rcutree_trace_init); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 498d5dd63bf4..8c10b5a97b9e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1361,9 +1361,8 @@ config RCU_TRACE default y if TREE_RCU select TRACE_CLOCK help - This option provides tracing in RCU which presents stats - in debugfs for debugging RCU implementation. It also enables - additional tracepoints for ftrace-style event tracing. + This option enables additional tracepoints for ftrace-style + event tracing. Say Y here if you want to enable RCU tracing Say N if you are unsure. diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T deleted file mode 100644 index 917d2517b5b5..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T +++ /dev/null @@ -1,21 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T deleted file mode 100644 index 2ad13f0d29cc..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T +++ /dev/null @@ -1,21 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index c5c29fb7438c..928fadaecc25 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -79,9 +79,5 @@ CONFIG_TASKS_RCU Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable. -CONFIG_RCU_TRACE - - Implied by CONFIG_RCU_TRACE for Tree RCU. - boot parameters ignored: TBD -- cgit v1.2.1 From 44c65ff2e3b0b48250a970183ab53b0602c25764 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 15 May 2017 16:26:34 -0700 Subject: rcu: Eliminate NOCBs CPU-state Kconfig options The CONFIG_RCU_NOCB_CPU_ALL, CONFIG_RCU_NOCB_CPU_NONE, and CONFIG_RCU_NOCB_CPU_ZERO Kconfig options are used only in testing and are redundant with the rcu_nocbs= boot parameter. This commit therefore removes these three Kconfig options and adjusts the rcutorture scripts to use the boot parameter instead. Signed-off-by: Paul E. McKenney --- Documentation/kernel-per-CPU-kthreads.txt | 31 ++++++------- Documentation/timers/NO_HZ.txt | 29 ++---------- init/Kconfig | 53 ---------------------- kernel/rcu/rcu.h | 4 +- kernel/rcu/tree_plugin.h | 27 ++--------- .../selftests/rcutorture/configs/rcu/TREE01 | 1 - .../selftests/rcutorture/configs/rcu/TREE01.boot | 1 + .../selftests/rcutorture/configs/rcu/TREE05 | 1 - .../selftests/rcutorture/configs/rcu/TREE08 | 1 - .../selftests/rcutorture/configs/rcu/TREE08.boot | 1 + .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 8 ++-- 11 files changed, 26 insertions(+), 131 deletions(-) diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index df31e30b6a02..2cb7dc5c0e0d 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -109,13 +109,12 @@ SCHED_SOFTIRQ: Do all of the following: on that CPU. If a thread that expects to run on the de-jittered CPU awakens, the scheduler will send an IPI that can result in a subsequent SCHED_SOFTIRQ. -2. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, - CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU - to be de-jittered is marked as an adaptive-ticks CPU using the - "nohz_full=" boot parameter. This reduces the number of - scheduler-clock interrupts that the de-jittered CPU receives, - minimizing its chances of being selected to do the load balancing - work that runs in SCHED_SOFTIRQ context. +2. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered + is marked as an adaptive-ticks CPU using the "nohz_full=" + boot parameter. This reduces the number of scheduler-clock + interrupts that the de-jittered CPU receives, minimizing its + chances of being selected to do the load balancing work that + runs in SCHED_SOFTIRQ context. 3. To the extent possible, keep the CPU out of the kernel when it is non-idle, for example, by avoiding system calls and by forcing both kernel threads and interrupts to execute elsewhere. @@ -135,11 +134,10 @@ HRTIMER_SOFTIRQ: Do all of the following: RCU_SOFTIRQ: Do at least one of the following: 1. Offload callbacks and keep the CPU in either dyntick-idle or adaptive-ticks state by doing all of the following: - a. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, - CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU - to be de-jittered is marked as an adaptive-ticks CPU using - the "nohz_full=" boot parameter. Bind the rcuo kthreads - to housekeeping CPUs, which can tolerate OS jitter. + a. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be + de-jittered is marked as an adaptive-ticks CPU using the + "nohz_full=" boot parameter. Bind the rcuo kthreads to + housekeeping CPUs, which can tolerate OS jitter. b. To the extent possible, keep the CPU out of the kernel when it is non-idle, for example, by avoiding system calls and by forcing both kernel threads and interrupts @@ -236,11 +234,10 @@ To reduce its OS jitter, do at least one of the following: is feasible only if your workload never requires RCU priority boosting, for example, if you ensure frequent idle time on all CPUs that might execute within the kernel. -3. Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y, - which offloads all RCU callbacks to kthreads that can be moved - off of CPUs susceptible to OS jitter. This approach prevents the - rcuc/%u kthreads from having any work to do, so that they are - never awakened. +3. Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs= + boot parameter offloading RCU callbacks from all CPUs susceptible + to OS jitter. This approach prevents the rcuc/%u kthreads from + having any work to do, so that they are never awakened. 4. Ensure that the CPU never enters the kernel, and, in particular, avoid initiating any CPU hotplug operations on this CPU. This is another way of preventing any callbacks from being queued on the diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt index 6eaf576294f3..2dcaf9adb7a7 100644 --- a/Documentation/timers/NO_HZ.txt +++ b/Documentation/timers/NO_HZ.txt @@ -194,32 +194,9 @@ that the RCU callbacks are processed in a timely fashion. Another approach is to offload RCU callback processing to "rcuo" kthreads using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to -offload may be selected via several methods: - -1. One of three mutually exclusive Kconfig options specify a - build-time default for the CPUs to offload: - - a. The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in - no CPUs being offloaded. - - b. The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes - CPU 0 to be offloaded. - - c. The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all - CPUs to be offloaded. Note that the callbacks will be - offloaded to "rcuo" kthreads, and that those kthreads - will in fact run on some CPU. However, this approach - gives fine-grained control on exactly which CPUs the - callbacks run on, along with their scheduling priority - (including the default of SCHED_OTHER), and it further - allows this control to be varied dynamically at runtime. - -2. The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated - list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1, - 3, 4, and 5. The specified CPUs will be offloaded in addition to - any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or - CONFIG_RCU_NOCB_CPU_ALL=y. This means that the "rcu_nocbs=" boot - parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y. +offload may be selected using The "rcu_nocbs=" kernel boot parameter, +which takes a comma-separated list of CPUs and CPU ranges, for example, +"1,3-5" selects CPUs 1, 3, 4, and 5. The offloaded CPUs will never queue RCU callbacks, and therefore RCU never prevents offloaded CPUs from entering either dyntick-idle mode diff --git a/init/Kconfig b/init/Kconfig index 3025383ab443..dc431c6109f2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -709,59 +709,6 @@ config RCU_NOCB_CPU Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. -choice - prompt "Build-forced no-CBs CPUs" - default RCU_NOCB_CPU_NONE - depends on RCU_NOCB_CPU - help - This option allows no-CBs CPUs (whose RCU callbacks are invoked - from kthreads rather than from softirq context) to be specified - at build time. Additional no-CBs CPUs may be specified by - the rcu_nocbs= boot parameter. - -config RCU_NOCB_CPU_NONE - bool "No build_forced no-CBs CPUs" - help - This option does not force any of the CPUs to be no-CBs CPUs. - Only CPUs designated by the rcu_nocbs= boot parameter will be - no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU - kthreads whose names begin with "rcuo". All other CPUs will - invoke their own RCU callbacks in softirq context. - - Select this option if you want to choose no-CBs CPUs at - boot time, for example, to allow testing of different no-CBs - configurations without having to rebuild the kernel each time. - -config RCU_NOCB_CPU_ZERO - bool "CPU 0 is a build_forced no-CBs CPU" - help - This option forces CPU 0 to be a no-CBs CPU, so that its RCU - callbacks are invoked by a per-CPU kthread whose name begins - with "rcuo". Additional CPUs may be designated as no-CBs - CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs. - All other CPUs will invoke their own RCU callbacks in softirq - context. - - Select this if CPU 0 needs to be a no-CBs CPU for real-time - or energy-efficiency reasons, but the real reason it exists - is to ensure that randconfig testing covers mixed systems. - -config RCU_NOCB_CPU_ALL - bool "All CPUs are build_forced no-CBs CPUs" - help - This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= - boot parameter will be ignored. All CPUs' RCU callbacks will - be executed in the context of per-CPU rcuo kthreads created for - this purpose. Assuming that the kthreads whose names start with - "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter - on the remaining CPUs, but might decrease memory locality during - RCU-callback invocation, thus potentially degrading throughput. - - Select this if all CPUs need to be no-CBs CPUs for real-time - or energy-efficiency reasons. - -endchoice - endmenu # "RCU Subsystem" config BUILD_BIN2C diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index d06c42deee0b..808b8c85f626 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -564,9 +564,7 @@ void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); #endif /* #else #ifdef CONFIG_TINY_RCU */ -#if defined(CONFIG_RCU_NOCB_CPU_ALL) -static inline bool rcu_is_nocb_cpu(int cpu) { return true; } -#elif defined(CONFIG_RCU_NOCB_CPU) +#ifdef CONFIG_RCU_NOCB_CPU bool rcu_is_nocb_cpu(int cpu); #else static inline bool rcu_is_nocb_cpu(int cpu) { return false; } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 43f2f8026b4a..908b309d60d7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1296,8 +1296,7 @@ static void rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(u64 basemono, u64 *nextevt) { *nextevt = KTIME_MAX; - return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) - ? 0 : rcu_cpu_has_callbacks(NULL); + return rcu_cpu_has_callbacks(NULL); } /* @@ -1409,10 +1408,6 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) unsigned long dj; RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_needs_cpu() invoked with irqs enabled!!!"); - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { - *nextevt = KTIME_MAX; - return 0; - } /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; @@ -1462,8 +1457,7 @@ static void rcu_prepare_for_idle(void) int tne; RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_prepare_for_idle() invoked with irqs enabled!!!"); - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || - rcu_is_nocb_cpu(smp_processor_id())) + if (rcu_is_nocb_cpu(smp_processor_id())) return; /* Handle nohz enablement switches conservatively. */ @@ -1518,8 +1512,7 @@ static void rcu_prepare_for_idle(void) static void rcu_cleanup_after_idle(void) { RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_cleanup_after_idle() invoked with irqs enabled!!!"); - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || - rcu_is_nocb_cpu(smp_processor_id())) + if (rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); @@ -1786,7 +1779,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) init_swait_queue_head(&rnp->nocb_gp_wq[1]); } -#ifndef CONFIG_RCU_NOCB_CPU_ALL /* Is the specified CPU a no-CBs CPU? */ bool rcu_is_nocb_cpu(int cpu) { @@ -1794,7 +1786,6 @@ bool rcu_is_nocb_cpu(int cpu) return cpumask_test_cpu(cpu, rcu_nocb_mask); return false; } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Kick the leader kthread for this NOCB group. @@ -2253,10 +2244,6 @@ void __init rcu_init_nohz(void) bool need_rcu_nocb_mask = true; struct rcu_state *rsp; -#ifdef CONFIG_RCU_NOCB_CPU_NONE - need_rcu_nocb_mask = false; -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ - #if defined(CONFIG_NO_HZ_FULL) if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) need_rcu_nocb_mask = true; @@ -2272,14 +2259,6 @@ void __init rcu_init_nohz(void) if (!have_rcu_nocb_mask) return; -#ifdef CONFIG_RCU_NOCB_CPU_ZERO - pr_info("\tOffload RCU callbacks from CPU 0\n"); - cpumask_set_cpu(0, rcu_nocb_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ -#ifdef CONFIG_RCU_NOCB_CPU_ALL - pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_copy(rcu_nocb_mask, cpu_possible_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ #if defined(CONFIG_NO_HZ_FULL) if (tick_nohz_full_running) cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 92ca49f90ef9..b5b53973c01e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -12,7 +12,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 89705ed79596..1d14e1383016 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -2,3 +2,4 @@ rcutorture.torture_type=rcu_bh maxcpus=8 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 +rcu_nocbs=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 1257d3227b1e..2dde0d9964e3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -13,7 +13,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index 099cc63c6a3b..fb1c763c10c5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -15,7 +15,6 @@ CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index fb066dc82769..1bd8efc4141e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -2,3 +2,4 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 +rcu_nocbs=0-7 diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 928fadaecc25..9ad3f89c8dc7 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -16,11 +16,9 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. -CONFIG_RCU_NOCB_CPU -- Do three, see below. -CONFIG_RCU_NOCB_CPU_ALL -- Do one. -CONFIG_RCU_NOCB_CPU_NONE -- Do one. -CONFIG_RCU_NOCB_CPU_ZERO -- Do one. +CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs. +CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with + rcu_nocbs=0, and one with all rcu_nocbs CPUs. CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations. -- cgit v1.2.1 From 0af92d46098a092aa5817dfeb6d24a8d85b66205 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 May 2017 08:43:40 -0700 Subject: rcu: Move RCU non-debug Kconfig options to kernel/rcu RCU's Kconfig options are scattered, and there are enough of them that it would be good for them to be more centralized. This commit therefore extracts RCU's Kconfig options from init/Kconfig into a new kernel/rcu/Kconfig file. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- init/Kconfig | 239 +--------------------------------------------------- kernel/rcu/Kconfig | 242 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 238 deletions(-) create mode 100644 kernel/rcu/Kconfig diff --git a/init/Kconfig b/init/Kconfig index dc431c6109f2..bc4c180c66a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -472,244 +472,7 @@ config TASK_IO_ACCOUNTING endmenu # "CPU/Task time and stats accounting" -menu "RCU Subsystem" - -config TREE_RCU - bool - default y if !PREEMPT && SMP - help - This option selects the RCU implementation that is - designed for very large SMP system with hundreds or - thousands of CPUs. It also scales down nicely to - smaller systems. - -config PREEMPT_RCU - bool - default y if PREEMPT - help - This option selects the RCU implementation that is - designed for very large SMP systems with hundreds or - thousands of CPUs, but for which real-time response - is also required. It also scales down nicely to - smaller systems. - - Select this option if you are unsure. - -config TINY_RCU - bool - default y if !PREEMPT && !SMP - help - This option selects the RCU implementation that is - designed for UP systems from which real-time response - is not required. This option greatly reduces the - memory footprint of RCU. - -config RCU_EXPERT - bool "Make expert-level adjustments to RCU configuration" - default n - help - This option needs to be enabled if you wish to make - expert-level adjustments to RCU configuration. By default, - no such adjustments can be made, which has the often-beneficial - side-effect of preventing "make oldconfig" from asking you all - sorts of detailed questions about how you would like numerous - obscure RCU options to be set up. - - Say Y if you need to make expert-level adjustments to RCU. - - Say N if you are unsure. - -config SRCU - bool - help - This option selects the sleepable version of RCU. This version - permits arbitrary sleeping or blocking within RCU read-side critical - sections. - -config TINY_SRCU - bool - default y if SRCU && TINY_RCU - help - This option selects the single-CPU non-preemptible version of SRCU. - -config TREE_SRCU - bool - default y if SRCU && !TINY_RCU - help - This option selects the full-fledged version of SRCU. - -config TASKS_RCU - bool - default n - select SRCU - help - This option enables a task-based RCU implementation that uses - only voluntary context switch (not preemption!), idle, and - user-mode execution as quiescent states. - -config RCU_STALL_COMMON - def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE ) - help - This option enables RCU CPU stall code that is common between - the TINY and TREE variants of RCU. The purpose is to allow - the tiny variants to disable RCU CPU stall warnings, while - making these warnings mandatory for the tree variants. - -config RCU_NEED_SEGCBLIST - def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU ) - -config CONTEXT_TRACKING - bool - -config CONTEXT_TRACKING_FORCE - bool "Force context tracking" - depends on CONTEXT_TRACKING - default y if !NO_HZ_FULL - help - The major pre-requirement for full dynticks to work is to - support the context tracking subsystem. But there are also - other dependencies to provide in order to make the full - dynticks working. - - This option stands for testing when an arch implements the - context tracking backend but doesn't yet fullfill all the - requirements to make the full dynticks feature working. - Without the full dynticks, there is no way to test the support - for context tracking and the subsystems that rely on it: RCU - userspace extended quiescent state and tickless cputime - accounting. This option copes with the absence of the full - dynticks subsystem by forcing the context tracking on all - CPUs in the system. - - Say Y only if you're working on the development of an - architecture backend for the context tracking. - - Say N otherwise, this option brings an overhead that you - don't want in production. - - -config RCU_FANOUT - int "Tree-based hierarchical RCU fanout value" - range 2 64 if 64BIT - range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT - default 64 if 64BIT - default 32 if !64BIT - help - This option controls the fanout of hierarchical implementations - of RCU, allowing RCU to work efficiently on machines with - large numbers of CPUs. This value must be at least the fourth - root of NR_CPUS, which allows NR_CPUS to be insanely large. - The default value of RCU_FANOUT should be used for production - systems, but if you are stress-testing the RCU implementation - itself, small RCU_FANOUT values allow you to test large-system - code paths on small(er) systems. - - Select a specific number if testing RCU itself. - Take the default if unsure. - -config RCU_FANOUT_LEAF - int "Tree-based hierarchical RCU leaf-level fanout value" - range 2 64 if 64BIT - range 2 32 if !64BIT - depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT - default 16 - help - This option controls the leaf-level fanout of hierarchical - implementations of RCU, and allows trading off cache misses - against lock contention. Systems that synchronize their - scheduling-clock interrupts for energy-efficiency reasons will - want the default because the smaller leaf-level fanout keeps - lock contention levels acceptably low. Very large systems - (hundreds or thousands of CPUs) will instead want to set this - value to the maximum value possible in order to reduce the - number of cache misses incurred during RCU's grace-period - initialization. These systems tend to run CPU-bound, and thus - are not helped by synchronized interrupts, and thus tend to - skew them, which reduces lock contention enough that large - leaf-level fanouts work well. That said, setting leaf-level - fanout to a large number will likely cause problematic - lock contention on the leaf-level rcu_node structures unless - you boot with the skew_tick kernel parameter. - - Select a specific number if testing RCU itself. - - Select the maximum permissible value for large systems, but - please understand that you may also need to set the skew_tick - kernel boot parameter to avoid contention on the rcu_node - structure's locks. - - Take the default if unsure. - -config RCU_FAST_NO_HZ - bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ_COMMON && SMP && RCU_EXPERT - default n - help - This option permits CPUs to enter dynticks-idle state even if - they have RCU callbacks queued, and prevents RCU from waking - these CPUs up more than roughly once every four jiffies (by - default, you can adjust this using the rcutree.rcu_idle_gp_delay - parameter), thus improving energy efficiency. On the other - hand, this option increases the duration of RCU grace periods, - for example, slowing down synchronize_rcu(). - - Say Y if energy efficiency is critically important, and you - don't care about increased grace-period durations. - - Say N if you are unsure. - -config RCU_BOOST - bool "Enable RCU priority boosting" - depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT - default n - help - This option boosts the priority of preempted RCU readers that - block the current preemptible RCU grace period for too long. - This option also prevents heavy loads from blocking RCU - callback invocation for all flavors of RCU. - - Say Y here if you are working with real-time apps or heavy loads - Say N here if you are unsure. - -config RCU_BOOST_DELAY - int "Milliseconds to delay boosting after RCU grace-period start" - range 0 3000 - depends on RCU_BOOST - default 500 - help - This option specifies the time to wait after the beginning of - a given grace period before priority-boosting preempted RCU - readers blocking that grace period. Note that any RCU reader - blocking an expedited RCU grace period is boosted immediately. - - Accept the default if unsure. - -config RCU_NOCB_CPU - bool "Offload RCU callback processing from boot-selected CPUs" - depends on TREE_RCU || PREEMPT_RCU - depends on RCU_EXPERT || NO_HZ_FULL - default n - help - Use this option to reduce OS jitter for aggressive HPC or - real-time workloads. It can also be used to offload RCU - callback invocation to energy-efficient CPUs in battery-powered - asymmetric multiprocessors. - - This option offloads callback invocation from the set of - CPUs specified at boot time by the rcu_nocbs parameter. - For each such CPU, a kthread ("rcuox/N") will be created to - invoke callbacks, where the "N" is the CPU being offloaded, - and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and - "s" for RCU-sched. Nothing prevents this kthread from running - on the specified CPUs, but (1) the kthreads may be preempted - between each callback, and (2) affinity or cgroups can be used - to force the kthreads to run on whatever set of CPUs is desired. - - Say Y here if you want to help to debug reduced OS jitter. - Say N here if you are unsure. - -endmenu # "RCU Subsystem" +source "kernel/rcu/Kconfig" config BUILD_BIN2C bool diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig new file mode 100644 index 000000000000..8edff43e8e94 --- /dev/null +++ b/kernel/rcu/Kconfig @@ -0,0 +1,242 @@ +# +# RCU-related configuration options +# + +menu "RCU Subsystem" + +config TREE_RCU + bool + default y if !PREEMPT && SMP + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. It also scales down nicely to + smaller systems. + +config PREEMPT_RCU + bool + default y if PREEMPT + help + This option selects the RCU implementation that is + designed for very large SMP systems with hundreds or + thousands of CPUs, but for which real-time response + is also required. It also scales down nicely to + smaller systems. + + Select this option if you are unsure. + +config TINY_RCU + bool + default y if !PREEMPT && !SMP + help + This option selects the RCU implementation that is + designed for UP systems from which real-time response + is not required. This option greatly reduces the + memory footprint of RCU. + +config RCU_EXPERT + bool "Make expert-level adjustments to RCU configuration" + default n + help + This option needs to be enabled if you wish to make + expert-level adjustments to RCU configuration. By default, + no such adjustments can be made, which has the often-beneficial + side-effect of preventing "make oldconfig" from asking you all + sorts of detailed questions about how you would like numerous + obscure RCU options to be set up. + + Say Y if you need to make expert-level adjustments to RCU. + + Say N if you are unsure. + +config SRCU + bool + help + This option selects the sleepable version of RCU. This version + permits arbitrary sleeping or blocking within RCU read-side critical + sections. + +config TINY_SRCU + bool + default y if SRCU && TINY_RCU + help + This option selects the single-CPU non-preemptible version of SRCU. + +config TREE_SRCU + bool + default y if SRCU && !TINY_RCU + help + This option selects the full-fledged version of SRCU. + +config TASKS_RCU + bool + default n + select SRCU + help + This option enables a task-based RCU implementation that uses + only voluntary context switch (not preemption!), idle, and + user-mode execution as quiescent states. + +config RCU_STALL_COMMON + def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE ) + help + This option enables RCU CPU stall code that is common between + the TINY and TREE variants of RCU. The purpose is to allow + the tiny variants to disable RCU CPU stall warnings, while + making these warnings mandatory for the tree variants. + +config RCU_NEED_SEGCBLIST + def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU ) + +config CONTEXT_TRACKING + bool + +config CONTEXT_TRACKING_FORCE + bool "Force context tracking" + depends on CONTEXT_TRACKING + default y if !NO_HZ_FULL + help + The major pre-requirement for full dynticks to work is to + support the context tracking subsystem. But there are also + other dependencies to provide in order to make the full + dynticks working. + + This option stands for testing when an arch implements the + context tracking backend but doesn't yet fullfill all the + requirements to make the full dynticks feature working. + Without the full dynticks, there is no way to test the support + for context tracking and the subsystems that rely on it: RCU + userspace extended quiescent state and tickless cputime + accounting. This option copes with the absence of the full + dynticks subsystem by forcing the context tracking on all + CPUs in the system. + + Say Y only if you're working on the development of an + architecture backend for the context tracking. + + Say N otherwise, this option brings an overhead that you + don't want in production. + + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the fourth + root of NR_CPUS, which allows NR_CPUS to be insanely large. + The default value of RCU_FANOUT should be used for production + systems, but if you are stress-testing the RCU implementation + itself, small RCU_FANOUT values allow you to test large-system + code paths on small(er) systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_LEAF + int "Tree-based hierarchical RCU leaf-level fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT + default 16 + help + This option controls the leaf-level fanout of hierarchical + implementations of RCU, and allows trading off cache misses + against lock contention. Systems that synchronize their + scheduling-clock interrupts for energy-efficiency reasons will + want the default because the smaller leaf-level fanout keeps + lock contention levels acceptably low. Very large systems + (hundreds or thousands of CPUs) will instead want to set this + value to the maximum value possible in order to reduce the + number of cache misses incurred during RCU's grace-period + initialization. These systems tend to run CPU-bound, and thus + are not helped by synchronized interrupts, and thus tend to + skew them, which reduces lock contention enough that large + leaf-level fanouts work well. That said, setting leaf-level + fanout to a large number will likely cause problematic + lock contention on the leaf-level rcu_node structures unless + you boot with the skew_tick kernel parameter. + + Select a specific number if testing RCU itself. + + Select the maximum permissible value for large systems, but + please understand that you may also need to set the skew_tick + kernel boot parameter to avoid contention on the rcu_node + structure's locks. + + Take the default if unsure. + +config RCU_FAST_NO_HZ + bool "Accelerate last non-dyntick-idle CPU's grace periods" + depends on NO_HZ_COMMON && SMP && RCU_EXPERT + default n + help + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). + + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. + + Say N if you are unsure. + +config RCU_BOOST + bool "Enable RCU priority boosting" + depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT + default n + help + This option boosts the priority of preempted RCU readers that + block the current preemptible RCU grace period for too long. + This option also prevents heavy loads from blocking RCU + callback invocation for all flavors of RCU. + + Say Y here if you are working with real-time apps or heavy loads + Say N here if you are unsure. + +config RCU_BOOST_DELAY + int "Milliseconds to delay boosting after RCU grace-period start" + range 0 3000 + depends on RCU_BOOST + default 500 + help + This option specifies the time to wait after the beginning of + a given grace period before priority-boosting preempted RCU + readers blocking that grace period. Note that any RCU reader + blocking an expedited RCU grace period is boosted immediately. + + Accept the default if unsure. + +config RCU_NOCB_CPU + bool "Offload RCU callback processing from boot-selected CPUs" + depends on TREE_RCU || PREEMPT_RCU + depends on RCU_EXPERT || NO_HZ_FULL + default n + help + Use this option to reduce OS jitter for aggressive HPC or + real-time workloads. It can also be used to offload RCU + callback invocation to energy-efficient CPUs in battery-powered + asymmetric multiprocessors. + + This option offloads callback invocation from the set of + CPUs specified at boot time by the rcu_nocbs parameter. + For each such CPU, a kthread ("rcuox/N") will be created to + invoke callbacks, where the "N" is the CPU being offloaded, + and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and + "s" for RCU-sched. Nothing prevents this kthread from running + on the specified CPUs, but (1) the kthreads may be preempted + between each callback, and (2) affinity or cgroups can be used + to force the kthreads to run on whatever set of CPUs is desired. + + Say Y here if you want to help to debug reduced OS jitter. + Say N here if you are unsure. + +endmenu # "RCU Subsystem" -- cgit v1.2.1 From 43a0a2a7d725f2ed2547cd656749eb66c093f2c2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 May 2017 09:19:44 -0700 Subject: rcu: Move RCU debug Kconfig options to kernel/rcu RCU's debugging Kconfig options are in the unintuitive location lib/Kconfig.debug, and there are enough of them that it would be good for them to be more centralized. This commit therefore extracts RCU's Kconfig options from init/Kconfig into a new kernel/rcu/Kconfig.debug file. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- kernel/rcu/Kconfig.debug | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 79 +--------------------------------------------- 2 files changed, 83 insertions(+), 78 deletions(-) create mode 100644 kernel/rcu/Kconfig.debug diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug new file mode 100644 index 000000000000..0ec7d1d33a14 --- /dev/null +++ b/kernel/rcu/Kconfig.debug @@ -0,0 +1,82 @@ +# +# RCU-related debugging configuration options +# + +menu "RCU Debugging" + +config PROVE_RCU + def_bool PROVE_LOCKING + +config TORTURE_TEST + tristate + default n + +config RCU_PERF_TEST + tristate "performance tests for RCU" + depends on DEBUG_KERNEL + select TORTURE_TEST + select SRCU + select TASKS_RCU + default n + help + This option provides a kernel module that runs performance + tests on the RCU infrastructure. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want RCU performance tests to be built into + the kernel. + Say M if you want the RCU performance tests to build as a module. + Say N if you are unsure. + +config RCU_TORTURE_TEST + tristate "torture tests for RCU" + depends on DEBUG_KERNEL + select TORTURE_TEST + select SRCU + select TASKS_RCU + default n + help + This option provides a kernel module that runs torture tests + on the RCU infrastructure. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want RCU torture tests to be built into + the kernel. + Say M if you want the RCU torture tests to build as a module. + Say N if you are unsure. + +config RCU_CPU_STALL_TIMEOUT + int "RCU CPU stall timeout in seconds" + depends on RCU_STALL_COMMON + range 3 300 + default 21 + help + If a given RCU grace period extends more than the specified + number of seconds, a CPU stall warning is printed. If the + RCU grace period persists, additional CPU stall warnings are + printed at more widely spaced intervals. + +config RCU_TRACE + bool "Enable tracing for RCU" + depends on DEBUG_KERNEL + default y if TREE_RCU + select TRACE_CLOCK + help + This option enables additional tracepoints for ftrace-style + event tracing. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + +config RCU_EQS_DEBUG + bool "Provide debugging asserts for adding NO_HZ support to an arch" + depends on DEBUG_KERNEL + help + This option provides consistency checks in RCU's handling of + NO_HZ. These checks have proven quite helpful in detecting + bugs in arch-specific NO_HZ code. + + Say N here if you need ultimate kernel/user switch latencies + Say Y if you are unsure + +endmenu # "RCU Debugging" diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8c10b5a97b9e..a7a751a75cfd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1301,84 +1301,7 @@ config DEBUG_CREDENTIALS If unsure, say N. -menu "RCU Debugging" - -config PROVE_RCU - def_bool PROVE_LOCKING - -config TORTURE_TEST - tristate - default n - -config RCU_PERF_TEST - tristate "performance tests for RCU" - depends on DEBUG_KERNEL - select TORTURE_TEST - select SRCU - select TASKS_RCU - default n - help - This option provides a kernel module that runs performance - tests on the RCU infrastructure. The kernel module may be built - after the fact on the running kernel to be tested, if desired. - - Say Y here if you want RCU performance tests to be built into - the kernel. - Say M if you want the RCU performance tests to build as a module. - Say N if you are unsure. - -config RCU_TORTURE_TEST - tristate "torture tests for RCU" - depends on DEBUG_KERNEL - select TORTURE_TEST - select SRCU - select TASKS_RCU - default n - help - This option provides a kernel module that runs torture tests - on the RCU infrastructure. The kernel module may be built - after the fact on the running kernel to be tested, if desired. - - Say Y here if you want RCU torture tests to be built into - the kernel. - Say M if you want the RCU torture tests to build as a module. - Say N if you are unsure. - -config RCU_CPU_STALL_TIMEOUT - int "RCU CPU stall timeout in seconds" - depends on RCU_STALL_COMMON - range 3 300 - default 21 - help - If a given RCU grace period extends more than the specified - number of seconds, a CPU stall warning is printed. If the - RCU grace period persists, additional CPU stall warnings are - printed at more widely spaced intervals. - -config RCU_TRACE - bool "Enable tracing for RCU" - depends on DEBUG_KERNEL - default y if TREE_RCU - select TRACE_CLOCK - help - This option enables additional tracepoints for ftrace-style - event tracing. - - Say Y here if you want to enable RCU tracing - Say N if you are unsure. - -config RCU_EQS_DEBUG - bool "Provide debugging asserts for adding NO_HZ support to an arch" - depends on DEBUG_KERNEL - help - This option provides consistency checks in RCU's handling of - NO_HZ. These checks have proven quite helpful in detecting - bugs in arch-specific NO_HZ code. - - Say N here if you need ultimate kernel/user switch latencies - Say Y if you are unsure - -endmenu # "RCU Debugging" +source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU bool "Force round-robin CPU selection for unbound work items" -- cgit v1.2.1 From c23484f0e7bc89e1facb04103ce24efeebee76b9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 May 2017 10:17:03 -0700 Subject: rcu: Remove event tracing from Tiny RCU This commit saves a few lines by getting rid of Tiny RCU's event tracing. Signed-off-by: Paul E. McKenney --- kernel/rcu/tiny.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 2306cab2195d..595cb1bf944f 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -35,7 +35,6 @@ #include #include #include -#include #include "rcu.h" @@ -139,7 +138,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); return; } - RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);) list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -161,10 +159,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++;) } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);) - RCU_TRACE(trace_rcu_batch_end(rcp->name, - cb_count, 0, need_resched(), - is_idle_task(current), - false)); } static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) -- cgit v1.2.1 From 6d48152eafde1f0d0a4a9e0584fa7d9ff4fbfdac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 May 2017 10:54:29 -0700 Subject: rcu: Remove RCU CPU stall warnings from Tiny RCU Tiny RCU's job is to be tiny, so this commit removes its RCU CPU stall warning code. After this, there is no longer any need for rcu_sched_ctrlblk and rcu_bh_ctrlblk to be in tiny_plugin.h, so this commit also moves them to tiny.c. Signed-off-by: Paul E. McKenney --- kernel/rcu/Kconfig | 2 +- kernel/rcu/tiny.c | 35 +++++----- kernel/rcu/tiny_plugin.h | 78 ---------------------- .../selftests/rcutorture/configs/rcu/TINY02 | 1 - 4 files changed, 19 insertions(+), 97 deletions(-) diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 8edff43e8e94..be90c945063f 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -78,7 +78,7 @@ config TASKS_RCU user-mode execution as quiescent states. config RCU_STALL_COMMON - def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE ) + def_bool ( TREE_RCU || PREEMPT_RCU ) help This option enables RCU CPU stall code that is common between the TINY and TREE variants of RCU. The purpose is to allow diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 595cb1bf944f..f8488965250f 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -38,11 +38,23 @@ #include "rcu.h" -/* Forward declarations for tiny_plugin.h. */ -struct rcu_ctrlblk; -static void __call_rcu(struct rcu_head *head, - rcu_callback_t func, - struct rcu_ctrlblk *rcp); +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, +}; + +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; #include "tiny_plugin.h" @@ -65,7 +77,6 @@ EXPORT_SYMBOL(rcu_barrier_sched); */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { - RCU_TRACE(reset_cpu_stall_ticks(rcp);) if (rcp->donetail != rcp->curtail) { rcp->donetail = rcp->curtail; return 1; @@ -111,7 +122,6 @@ void rcu_bh_qs(void) */ void rcu_check_callbacks(int user) { - RCU_TRACE(check_cpu_stalls();) if (user) rcu_sched_qs(); else if (!in_softirq()) @@ -126,10 +136,8 @@ void rcu_check_callbacks(int user) */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { - const char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; - RCU_TRACE(int cb_count = 0;) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); @@ -147,18 +155,15 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); /* Invoke the callbacks on the local list. */ - RCU_TRACE(rn = rcp->name;) while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); - __rcu_reclaim(rn, list); + __rcu_reclaim("", list); local_bh_enable(); list = next; - RCU_TRACE(cb_count++;) } - RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);) } static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) @@ -202,7 +207,6 @@ static void __call_rcu(struct rcu_head *head, local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; - RCU_TRACE(rcp->qlen++;) local_irq_restore(flags); if (unlikely(is_idle_task(current))) { @@ -235,8 +239,5 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); - RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);) - RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);) - rcu_early_boot_tests(); } diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index c642f23f1582..f0a01b2a3062 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -22,34 +22,6 @@ * Author: Paul E. McKenney */ -#include -#include - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ - struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ - struct rcu_head **curtail; /* ->next pointer of last CB. */ - RCU_TRACE(long qlen); /* Number of pending CBs. */ - RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */ - RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */ - RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */ - RCU_TRACE(const char *name); /* Name of RCU type. */ -}; - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_sched_ctrlblk = { - .donetail = &rcu_sched_ctrlblk.rcucblist, - .curtail = &rcu_sched_ctrlblk.rcucblist, - RCU_TRACE(.name = "rcu_sched") -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, - RCU_TRACE(.name = "rcu_bh") -}; - #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) #include @@ -73,53 +45,3 @@ void __init rcu_scheduler_starting(void) } #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ - -#ifdef CONFIG_RCU_TRACE - -static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) -{ - unsigned long flags; - - local_irq_save(flags); - rcp->qlen -= n; - local_irq_restore(flags); -} - -static void check_cpu_stall(struct rcu_ctrlblk *rcp) -{ - unsigned long j; - unsigned long js; - - if (rcu_cpu_stall_suppress) - return; - rcp->ticks_this_gp++; - j = jiffies; - js = READ_ONCE(rcp->jiffies_stall); - if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { - pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", - rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, - jiffies - rcp->gp_start, rcp->qlen); - dump_stack(); - WRITE_ONCE(rcp->jiffies_stall, - jiffies + 3 * rcu_jiffies_till_stall_check() + 3); - } else if (ULONG_CMP_GE(j, js)) { - WRITE_ONCE(rcp->jiffies_stall, - jiffies + rcu_jiffies_till_stall_check()); - } -} - -static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) -{ - rcp->ticks_this_gp = 0; - rcp->gp_start = jiffies; - WRITE_ONCE(rcp->jiffies_stall, - jiffies + rcu_jiffies_till_stall_check()); -} - -static void check_cpu_stalls(void) -{ - RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);) - RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);) -} - -#endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 1f6bebbf5da8..d8674264318d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -6,7 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=y CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y -- cgit v1.2.1