rcu-tasks: Report stalls during synchronize_srcu() in rcu_tasks_postscan()
authorNeeraj Upadhyay <quic_neeraju@quicinc.com>
Wed, 11 Jan 2023 07:45:00 +0000 (13:15 +0530)
committerJoel Fernandes (Google) <joel@joelfernandes.org>
Wed, 5 Apr 2023 13:47:44 +0000 (13:47 +0000)
The call to synchronize_srcu() from rcu_tasks_postscan() can be stalled
by a task getting stuck in do_exit() between that function's calls to
exit_tasks_rcu_start() and exit_tasks_rcu_finish().   To ease diagnosis
of this situation, print a stall warning message every rcu_task_stall_info
period when rcu_tasks_postscan() is stalled.

[ paulmck: Adjust to handle CONFIG_SMP=n. ]

Acked-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Reported-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/rcu/20230111212736.GA1062057@paulmck-ThinkPad-P17-Gen-1/
Signed-off-by: Neeraj Upadhyay <quic_neeraju@quicinc.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
kernel/rcu/tasks.h

index bfb5e1549f2b22d510758840df48467c59b5dc93..baf7ec178155a87c2188f16fe22bf80eeba6bba4 100644 (file)
@@ -139,6 +139,12 @@ static struct rcu_tasks rt_name =                                                  \
 /* Track exiting tasks in order to allow them to be waited for. */
 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
 
+#ifdef CONFIG_TASKS_RCU
+/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
+static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
+#endif
+
 /* Avoid IPIing CPUs early in the grace period. */
 #define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
 static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
@@ -830,6 +836,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
 /* Processing between scanning taskslist and draining the holdout list. */
 static void rcu_tasks_postscan(struct list_head *hop)
 {
+       int rtsi = READ_ONCE(rcu_task_stall_info);
+
+       if (!IS_ENABLED(CONFIG_TINY_RCU)) {
+               tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+               add_timer(&tasks_rcu_exit_srcu_stall_timer);
+       }
+
        /*
         * Exiting tasks may escape the tasklist scan. Those are vulnerable
         * until their final schedule() with TASK_DEAD state. To cope with
@@ -848,6 +861,9 @@ static void rcu_tasks_postscan(struct list_head *hop)
         * call to synchronize_rcu().
         */
        synchronize_srcu(&tasks_rcu_exit_srcu);
+
+       if (!IS_ENABLED(CONFIG_TINY_RCU))
+               del_timer_sync(&tasks_rcu_exit_srcu_stall_timer);
 }
 
 /* See if tasks are still holding out, complain if so. */
@@ -923,6 +939,21 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp)
 void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
 DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
 
+static void tasks_rcu_exit_srcu_stall(struct timer_list *unused)
+{
+#ifndef CONFIG_TINY_RCU
+       int rtsi;
+
+       rtsi = READ_ONCE(rcu_task_stall_info);
+       pr_info("%s: %s grace period number %lu (since boot) gp_state: %s is %lu jiffies old.\n",
+               __func__, rcu_tasks.kname, rcu_tasks.tasks_gp_seq,
+               tasks_gp_state_getname(&rcu_tasks), jiffies - rcu_tasks.gp_jiffies);
+       pr_info("Please check any exiting tasks stuck between calls to exit_tasks_rcu_start() and exit_tasks_rcu_finish()\n");
+       tasks_rcu_exit_srcu_stall_timer.expires = jiffies + rtsi;
+       add_timer(&tasks_rcu_exit_srcu_stall_timer);
+#endif // #ifndef CONFIG_TINY_RCU
+}
+
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @rhp: structure to be used for queueing the RCU updates.