rcu: More aggressively enlist scheduler aid for nohz_full CPUs
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Wed, 25 Jul 2018 18:49:47 +0000 (11:49 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Thu, 30 Aug 2018 23:03:44 +0000 (16:03 -0700)
Because nohz_full CPUs can leave the scheduler-clock interrupt disabled
even when in kernel mode, RCU cannot rely on rcu_check_callbacks() to
enlist the scheduler's aid in extracting a quiescent state from such CPUs.
This commit therefore more aggressively uses resched_cpu() on nohz_full
CPUs that fail to pass through a quiescent state in a timely manner.
By default, the resched_cpu() beating starts 300 milliseconds into the
quiescent state.

While in the neighborhood, add a ->last_fqs_resched field to the rcu_data
structure in order to rate-limit resched_cpu() calls from the RCU
grace-period kthread.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
kernel/rcu/tree.c
kernel/rcu/tree.h
kernel/rcu/tree_plugin.h

index 6bd0951a5f3abf4b4c5b2de6be3723eeb555fbd2..96731f62594ab6e6f25354ee0d493388a4a5fdf6 100644 (file)
@@ -61,6 +61,7 @@
 #include <linux/trace_events.h>
 #include <linux/suspend.h>
 #include <linux/ftrace.h>
+#include <linux/tick.h>
 
 #include "tree.h"
 #include "rcu.h"
@@ -1088,19 +1089,38 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
                WRITE_ONCE(*rnhqp, true);
                /* Store rcu_need_heavy_qs before rcu_urgent_qs. */
                smp_store_release(ruqp, true);
-               rcu_state.jiffies_resched += jtsq; /* Re-enable beating. */
        } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
                WRITE_ONCE(*ruqp, true);
        }
 
        /*
-        * If more than halfway to RCU CPU stall-warning time, do a
-        * resched_cpu() to try to loosen things up a bit.  Also check to
-        * see if the CPU is getting hammered with interrupts, but only
-        * once per grace period, just to keep the IPIs down to a dull roar.
+        * NO_HZ_FULL CPUs can run in-kernel without rcu_check_callbacks!
+        * The above code handles this, but only for straight cond_resched().
+        * And some in-kernel loops check need_resched() before calling
+        * cond_resched(), which defeats the above code for CPUs that are
+        * running in-kernel with scheduling-clock interrupts disabled.
+        * So hit them over the head with the resched_cpu() hammer!
         */
-       if (time_after(jiffies, rcu_state.jiffies_resched)) {
+       if (tick_nohz_full_cpu(rdp->cpu) &&
+                  time_after(jiffies,
+                             READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) {
                resched_cpu(rdp->cpu);
+               WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+       }
+
+       /*
+        * If more than halfway to RCU CPU stall-warning time, invoke
+        * resched_cpu() more frequently to try to loosen things up a bit.
+        * Also check to see if the CPU is getting hammered with interrupts,
+        * but only once per grace period, just to keep the IPIs down to
+        * a dull roar.
+        */
+       if (time_after(jiffies, rcu_state.jiffies_resched)) {
+               if (time_after(jiffies,
+                              READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
+                       resched_cpu(rdp->cpu);
+                       WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+               }
                if (IS_ENABLED(CONFIG_IRQ_WORK) &&
                    !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
                    (rnp->ffmask & rdp->grpmask)) {
index 4866fa44ab0bfa6d3556c75e81beff0475c9a048..8f053bb1eec8c21a6cfe0250a12239e6088a6875 100644 (file)
@@ -260,6 +260,7 @@ struct rcu_data {
        short rcu_ofl_gp_flags;         /* ->gp_flags at last offline. */
        unsigned long rcu_onl_gp_seq;   /* ->gp_seq at last online. */
        short rcu_onl_gp_flags;         /* ->gp_flags at last online. */
+       unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
 
        int cpu;
 };
index 7ec366268e2eb8f0504098a3ef1ea9e4a218ed41..1e80a0da792448fa94939cced510d8e157fe3590 100644 (file)
@@ -1850,6 +1850,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
 {
        rdp->ticks_this_gp = 0;
        rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
+       WRITE_ONCE(rdp->last_fqs_resched, jiffies);
 }
 
 #ifdef CONFIG_RCU_NOCB_CPU