rcu: Add *_ONCE() and data_race() to rcu_node ->exp_tasks plus locking
authorPaul E. McKenney <paulmck@kernel.org>
Fri, 3 Jan 2020 22:18:12 +0000 (14:18 -0800)
committerPaul E. McKenney <paulmck@kernel.org>
Mon, 27 Apr 2020 18:01:15 +0000 (11:01 -0700)
There are lockless loads from the rcu_node structure's ->exp_tasks field,
so this commit causes all stores to use WRITE_ONCE() and all lockless
loads to use READ_ONCE() or data_race(), with the latter for debug
prints.  This code also did a unprotected traversal of the linked list
pointed into by ->exp_tasks, so this commit also acquires the rcu_node
structure's ->lock to properly protect this traversal.  This list was
traversed unprotected only when printing an RCU CPU stall warning for
an expedited grace period, so the odds of seeing this in production are
not all that high.

This data race was reported by KCSAN.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
kernel/rcu/tree_exp.h
kernel/rcu/tree_plugin.h

index 1a617b9dffb0534776ff32ce879bb685d7278ea9..c2b04daf11906fb811bb28e254dce82367b4ccb5 100644 (file)
@@ -150,7 +150,7 @@ static void __maybe_unused sync_exp_reset_tree(void)
 static bool sync_rcu_exp_done(struct rcu_node *rnp)
 {
        raw_lockdep_assert_held_rcu_node(rnp);
-       return rnp->exp_tasks == NULL &&
+       return READ_ONCE(rnp->exp_tasks) == NULL &&
               READ_ONCE(rnp->expmask) == 0;
 }
 
@@ -373,7 +373,7 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
         * until such time as the ->expmask bits are cleared.
         */
        if (rcu_preempt_has_tasks(rnp))
-               rnp->exp_tasks = rnp->blkd_tasks.next;
+               WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
        /* IPI the remaining CPUs for expedited quiescent state. */
@@ -542,8 +542,8 @@ static void synchronize_rcu_expedited_wait(void)
                }
                pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
                        jiffies - jiffies_start, rcu_state.expedited_sequence,
-                       READ_ONCE(rnp_root->expmask),
-                       ".T"[!!rnp_root->exp_tasks]);
+                       data_race(rnp_root->expmask),
+                       ".T"[!!data_race(rnp_root->exp_tasks)]);
                if (ndetected) {
                        pr_err("blocking rcu_node structures:");
                        rcu_for_each_node_breadth_first(rnp) {
@@ -553,8 +553,8 @@ static void synchronize_rcu_expedited_wait(void)
                                        continue;
                                pr_cont(" l=%u:%d-%d:%#lx/%c",
                                        rnp->level, rnp->grplo, rnp->grphi,
-                                       READ_ONCE(rnp->expmask),
-                                       ".T"[!!rnp->exp_tasks]);
+                                       data_race(rnp->expmask),
+                                       ".T"[!!data_race(rnp->exp_tasks)]);
                        }
                        pr_cont("\n");
                }
@@ -721,17 +721,20 @@ static void sync_sched_exp_online_cleanup(int cpu)
  */
 static int rcu_print_task_exp_stall(struct rcu_node *rnp)
 {
-       struct task_struct *t;
+       unsigned long flags;
        int ndetected = 0;
+       struct task_struct *t;
 
-       if (!rnp->exp_tasks)
+       if (!READ_ONCE(rnp->exp_tasks))
                return 0;
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
        t = list_entry(rnp->exp_tasks->prev,
                       struct task_struct, rcu_node_entry);
        list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
                pr_cont(" P%d", t->pid);
                ndetected++;
        }
+       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        return ndetected;
 }
 
index 097635c41135da1954de4b1fc5fc0efc676753f4..35d77db035bd564fdb45bc10b266ccb39ab6b89a 100644 (file)
@@ -226,7 +226,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
                WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
        }
        if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
-               rnp->exp_tasks = &t->rcu_node_entry;
+               WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
        WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
                     !(rnp->qsmask & rdp->grpmask));
        WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
@@ -500,7 +500,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
                if (&t->rcu_node_entry == rnp->gp_tasks)
                        WRITE_ONCE(rnp->gp_tasks, np);
                if (&t->rcu_node_entry == rnp->exp_tasks)
-                       rnp->exp_tasks = np;
+                       WRITE_ONCE(rnp->exp_tasks, np);
                if (IS_ENABLED(CONFIG_RCU_BOOST)) {
                        /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
                        drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
@@ -761,7 +761,7 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
                        __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
        pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
                __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
-               rnp->exp_tasks);
+               READ_ONCE(rnp->exp_tasks));
        pr_info("%s: ->blkd_tasks", __func__);
        i = 0;
        list_for_each(lhp, &rnp->blkd_tasks) {
@@ -1036,7 +1036,7 @@ static int rcu_boost_kthread(void *arg)
        for (;;) {
                WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
                trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
-               rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
+               rcu_wait(rnp->boost_tasks || READ_ONCE(rnp->exp_tasks));
                trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
                WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
                more2boost = rcu_boost(rnp);