x86/resctrl: Add tracepoint for llc_occupancy tracking
authorHaifeng Xu <haifeng.xu@shopee.com>
Mon, 8 Apr 2024 09:23:03 +0000 (17:23 +0800)
committerBorislav Petkov (AMD) <bp@alien8.de>
Wed, 24 Apr 2024 12:24:48 +0000 (14:24 +0200)
In our production environment, after removing monitor groups, those
unused RMIDs get stuck in the limbo list forever because their
llc_occupancy is always larger than the threshold. But the unused RMIDs
can be successfully freed by turning up the threshold.

In order to know how much the threshold should be, perf can be used to
acquire the llc_occupancy of RMIDs in each rdt domain.

Instead of using perf tool to track llc_occupancy and filter the log
manually, it is more convenient for users to use tracepoint to do this
work. So add a new tracepoint that shows the llc_occupancy of busy RMIDs
when scanning the limbo list.

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Suggested-by: James Morse <james.morse@arm.com>
Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/r/20240408092303.26413-3-haifeng.xu@shopee.com
Documentation/arch/x86/resctrl.rst
arch/x86/kernel/cpu/resctrl/monitor.c
arch/x86/kernel/cpu/resctrl/trace.h

index 6c245582d8fb16c6691924c7fea866b6c1bb15e8..627e23869bca83bbfb4ac6a893652ba6df471f01 100644 (file)
@@ -446,6 +446,12 @@ during mkdir.
 max_threshold_occupancy is a user configurable value to determine the
 occupancy at which an RMID can be freed.
 
+The mon_llc_occupancy_limbo tracepoint gives the precise occupancy in bytes
+for a subset of RMID that are not immediately available for allocation.
+This can't be relied on to produce output every second, it may be necessary
+to attempt to create an empty monitor group to force an update. Output may
+only be produced if creation of a control or monitor group fails.
+
 Schemata files - general concepts
 ---------------------------------
 Each line in the file describes one resource. The line starts with
index c34a35ec0f031a188fc29424bdef31cd54fa597d..2345e6836593fcda0d6f53ff51e1cfcb64d4fd71 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/resctrl.h>
 
 #include "internal.h"
+#include "trace.h"
 
 /**
  * struct rmid_entry - dirty tracking for all RMID.
@@ -354,6 +355,16 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
                        rmid_dirty = true;
                } else {
                        rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
+
+                       /*
+                        * x86's CLOSID and RMID are independent numbers, so the entry's
+                        * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
+                        * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
+                        * used to select the configuration. It is thus necessary to track both
+                        * CLOSID and RMID because there may be dependencies between them
+                        * on some architectures.
+                        */
+                       trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val);
                }
 
                if (force_free || !rmid_dirty) {
index 495fb90c857229331ed23dd12003f9ef862c71f5..2a506316b303425c5588bcf85c777b92e5e13797 100644 (file)
@@ -35,6 +35,22 @@ TRACE_EVENT(pseudo_lock_l3,
            TP_printk("hits=%llu miss=%llu",
                      __entry->l3_hits, __entry->l3_miss));
 
+TRACE_EVENT(mon_llc_occupancy_limbo,
+           TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes),
+           TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes),
+           TP_STRUCT__entry(__field(u32, ctrl_hw_id)
+                            __field(u32, mon_hw_id)
+                            __field(int, domain_id)
+                            __field(u64, llc_occupancy_bytes)),
+           TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id;
+                          __entry->mon_hw_id = mon_hw_id;
+                          __entry->domain_id = domain_id;
+                          __entry->llc_occupancy_bytes = llc_occupancy_bytes;),
+           TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu",
+                     __entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id,
+                     __entry->llc_occupancy_bytes)
+          );
+
 #endif /* _TRACE_RESCTRL_H */
 
 #undef TRACE_INCLUDE_PATH