sched/debug: Try trigger_single_cpu_backtrace(cpu) in dump_cpu_task()
authorZhen Lei <thunder.leizhen@huawei.com>
Thu, 4 Aug 2022 02:34:19 +0000 (10:34 +0800)
committerPaul E. McKenney <paulmck@kernel.org>
Wed, 31 Aug 2022 12:03:14 +0000 (05:03 -0700)
The trigger_all_cpu_backtrace() function attempts to send an NMI to the
target CPU, which usually provides much better stack traces than the
dump_cpu_task() function's approach of dumping that stack from some other
CPU.  So much so that most calls to dump_cpu_task() only happen after
a call to trigger_all_cpu_backtrace() has failed.  And the exception to
this rule really should attempt to use trigger_all_cpu_backtrace() first.

Therefore, move the trigger_all_cpu_backtrace() invocation into
dump_cpu_task().

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Valentin Schneider <vschneid@redhat.com>
kernel/rcu/tree_stall.h
kernel/sched/core.c
kernel/smp.c

index c3fbbcc09327ff348055cebd21003b266fa983bf..5653560573e22d650788b7695fedc85f50568756 100644 (file)
@@ -368,7 +368,7 @@ static void rcu_dump_cpu_stacks(void)
                        if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
                                if (cpu_is_offline(cpu))
                                        pr_err("Offline CPU %d blocking current GP.\n", cpu);
-                               else if (!trigger_single_cpu_backtrace(cpu))
+                               else
                                        dump_cpu_task(cpu);
                        }
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -511,8 +511,7 @@ static void rcu_check_gp_kthread_starvation(void)
                                        pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu);
                                } else  {
                                        pr_err("Stack dump where RCU GP kthread last ran:\n");
-                                       if (!trigger_single_cpu_backtrace(cpu))
-                                               dump_cpu_task(cpu);
+                                       dump_cpu_task(cpu);
                                }
                        }
                        wake_up_process(gpk);
index ee28253c9ac0c2ed4e602a9584b5e1a77c4f2825..e15b6a7f34f47fa2aa4e5996a8b2a217093fb497 100644 (file)
@@ -11183,6 +11183,9 @@ struct cgroup_subsys cpu_cgrp_subsys = {
 
 void dump_cpu_task(int cpu)
 {
+       if (trigger_single_cpu_backtrace(cpu))
+               return;
+
        pr_info("Task dump for CPU %d:\n", cpu);
        sched_show_task(cpu_curr(cpu));
 }
index 650810a6f29b3a1ef2a2554b6f9272067715b4c8..e8cdc025a046fe00009072071315ddea8e8c3e9d 100644 (file)
@@ -370,8 +370,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
        if (cpu >= 0) {
                if (static_branch_unlikely(&csdlock_debug_extended))
                        csd_lock_print_extended(csd, cpu);
-               if (!trigger_single_cpu_backtrace(cpu))
-                       dump_cpu_task(cpu);
+               dump_cpu_task(cpu);
                if (!cpu_cur_csd) {
                        pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
                        arch_send_call_function_single_ipi(cpu);