perf/core: Fix reentry problem in perf_output_read_group()
authorYang Jihong <yangjihong1@huawei.com>
Fri, 2 Sep 2022 08:29:18 +0000 (16:29 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 6 Sep 2022 09:33:00 +0000 (11:33 +0200)
perf_output_read_group may respond to IPI request of other cores and invoke
__perf_install_in_context function. As a result, hwc configuration is modified.
causing inconsistency and unexpected consequences.

Interrupts are not disabled when perf_output_read_group reads PMU counter.
In this case, IPI request may be received from other cores.
As a result, PMU configuration is modified and an error occurs when
reading PMU counter:

     CPU0                                         CPU1
      __se_sys_perf_event_open
perf_install_in_context
  perf_output_read_group                                  smp_call_function_single
    for_each_sibling_event(sub, leader) {                   generic_exec_single
      if ((sub != event) &&                                   remote_function
  (sub->state == PERF_EVENT_STATE_ACTIVE))                    |
  <enter IPI handler: __perf_install_in_context>   <----RAISE IPI-----+
  __perf_install_in_context
    ctx_resched
      event_sched_out
armpmu_del
  ...
  hwc->idx = -1; // event->hwc.idx is set to -1
  ...
  <exit IPI>
      sub->pmu->read(sub);
armpmu_read
  armv8pmu_read_counter
    armv8pmu_read_hw_counter
      int idx = event->hw.idx; // idx = -1
      u64 val = armv8pmu_read_evcntr(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx); // invalid counter = 30
read_pmevcntrn(counter) // undefined instruction

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220902082918.179248-1-yangjihong1@huawei.com
kernel/events/core.c

index 2621fd24ad260b891d5ef2ed1571eacb30e5e5ef..ff4bffc502c677fc201353424d6ed9922c05380b 100644 (file)
@@ -6893,9 +6893,16 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 {
        struct perf_event *leader = event->group_leader, *sub;
        u64 read_format = event->attr.read_format;
+       unsigned long flags;
        u64 values[6];
        int n = 0;
 
+       /*
+        * Disabling interrupts avoids all counter scheduling
+        * (context switches, timer based rotation and IPIs).
+        */
+       local_irq_save(flags);
+
        values[n++] = 1 + leader->nr_siblings;
 
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -6931,6 +6938,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
                __output_copy(handle, values, n * sizeof(u64));
        }
+
+       local_irq_restore(flags);
 }
 
 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\