genirq: Provide a snapshot mechanism for interrupt statistics
authorBitao Hu <yaoma@linux.alibaba.com>
Thu, 11 Apr 2024 07:41:31 +0000 (15:41 +0800)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 12 Apr 2024 15:08:05 +0000 (17:08 +0200)
The soft lockup detector lacks a mechanism to identify interrupt storms as
root cause of a lockup. To enable this the detector needs a mechanism to
snapshot the interrupt count statistics on a CPU when the detector observes
a potential lockup scenario and compare that against the interrupt count
when it warns about the lockup later on. The number of interrupts in that
period give a hint whether the lockup might have been caused by an interrupt
storm.

Instead of having extra storage in the lockup detector and accessing the
internals of the interrupt descriptor directly, add a snapshot member to
the per CPU irq_desc::kstat_irq structure and provide interfaces to take a
snapshot of all interrupts on the current CPU and to retrieve the delta of
a specific interrupt later on.

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240411074134.30922-3-yaoma@linux.alibaba.com
include/linux/irqdesc.h
include/linux/kernel_stat.h
kernel/irq/Kconfig
kernel/irq/irqdesc.c

index c28612674acbc32e607756588ed250a51646064f..fd091c35d5721eee37a2fd3d5526559671d5048d 100644 (file)
@@ -20,9 +20,13 @@ struct pt_regs;
 /**
  * struct irqstat - interrupt statistics
  * @cnt:       real-time interrupt count
+ * @ref:       snapshot of interrupt count
  */
 struct irqstat {
        unsigned int    cnt;
+#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT
+       unsigned int    ref;
+#endif
 };
 
 /**
index 9935f7ecbfb9e31a68a6c6d0748aea49e0171344..9c042c6384bb3dec72670815fbaaf4483ae4d221 100644 (file)
@@ -79,6 +79,14 @@ static inline unsigned int kstat_cpu_softirqs_sum(int cpu)
        return sum;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT
+extern void kstat_snapshot_irqs(void);
+extern unsigned int kstat_get_irq_since_snapshot(unsigned int irq);
+#else
+static inline void kstat_snapshot_irqs(void) { }
+static inline unsigned int kstat_get_irq_since_snapshot(unsigned int irq) { return 0; }
+#endif
+
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
index 2531f3496ab6d73a7570c91ad47198bc9622e1fc..529adb1f58593c6b26fe28ca0d360bffeaef154c 100644 (file)
@@ -108,6 +108,10 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR
 config GENERIC_IRQ_RESERVATION_MODE
        bool
 
+# Snapshot for interrupt statistics
+config GENERIC_IRQ_STAT_SNAPSHOT
+       bool
+
 # Support forced irq threading
 config IRQ_FORCED_THREADING
        bool
index b59b79200ad7451014a2be1f672c7b02f9c60daf..f348faffa7b4241fd354ded9c4b0dedcb8407a0f 100644 (file)
@@ -994,6 +994,31 @@ static unsigned int kstat_irqs(unsigned int irq)
        return sum;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT
+
+void kstat_snapshot_irqs(void)
+{
+       struct irq_desc *desc;
+       unsigned int irq;
+
+       for_each_irq_desc(irq, desc) {
+               if (!desc->kstat_irqs)
+                       continue;
+               this_cpu_write(desc->kstat_irqs->ref, this_cpu_read(desc->kstat_irqs->cnt));
+       }
+}
+
+unsigned int kstat_get_irq_since_snapshot(unsigned int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (!desc || !desc->kstat_irqs)
+               return 0;
+       return this_cpu_read(desc->kstat_irqs->cnt) - this_cpu_read(desc->kstat_irqs->ref);
+}
+
+#endif
+
 /**
  * kstat_irqs_usr - Get the statistics for an interrupt from thread context
  * @irq:       The interrupt number