Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86/resctrl: Handle number of RMIDs supported by RDT_RESOURCE_PERF_PKG

There are now three meanings for "number of RMIDs":

1) The number for legacy features enumerated by CPUID leaf 0xF. This is the
maximum number of distinct values that can be loaded into MSR_IA32_PQR_ASSOC.
Note that systems with Sub-NUMA Cluster mode enabled will force scaling down
the CPUID enumerated value by the number of SNC nodes per L3-cache.

2) The number of registers in MMIO space for each event. This is enumerated in
the XML files and is the value initialized into event_group::num_rmid.

3) The number of "hardware counters" (this isn't a strictly accurate
description of how things work, but serves as a useful analogy that does
describe the limitations) feeding to those MMIO registers. This is enumerated
in telemetry_region::num_rmids returned by intel_pmt_get_regions_by_feature().

Event groups with insufficient "hardware counters" to track all RMIDs are
difficult for users to use, since the system may reassign "hardware counters"
at any time. This means that users cannot reliably collect two consecutive
event counts to compute the rate at which events are occurring.

Disable such event groups by default. The user may override this with
a command line "rdt=" option. In this case limit an under-resourced event
group's number of possible monitor resource groups to the lowest number of
"hardware counters".

Scan all enabled event groups and assign the RDT_RESOURCE_PERF_PKG resource
"num_rmid" value to the smallest of these values as this value will be used
later to compare against the number of RMIDs supported by other resources to
determine how many monitoring resource groups are supported.

N.B. Change type of resctrl_mon::num_rmid to u32 to match its usage and the
type of event_group::num_rmid so that min(r->num_rmid, e->num_rmid) won't
complain about mixing signed and unsigned types.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/20251217172121.12030-1-tony.luck@intel.com

authored by

Tony Luck and committed by
Borislav Petkov (AMD)
67640e33 842e7f97

+54 -3
+52 -1
arch/x86/kernel/cpu/resctrl/intel_aet.c
··· 22 22 #include <linux/intel_pmt_features.h> 23 23 #include <linux/intel_vsec.h> 24 24 #include <linux/io.h> 25 + #include <linux/minmax.h> 25 26 #include <linux/printk.h> 26 27 #include <linux/rculist.h> 27 28 #include <linux/rcupdate.h> ··· 61 60 * Valid if the system supports the event group, 62 61 * NULL otherwise. 63 62 * @force_off: True when "rdt" command line or architecture code disables 64 - * this event group. 63 + * this event group due to insufficient RMIDs. 65 64 * @force_on: True when "rdt" command line overrides disable of this 66 65 * event group. 67 66 * @guid: Unique number per XML description file. 67 + * @num_rmid: Number of RMIDs supported by this group. May be 68 + * adjusted downwards if enumeration from 69 + * intel_pmt_get_regions_by_feature() indicates fewer 70 + * RMIDs can be tracked simultaneously. 68 71 * @mmio_size: Number of bytes of MMIO registers for this group. 69 72 * @num_events: Number of events in this group. 70 73 * @evts: Array of event descriptors. ··· 81 76 82 77 /* Remaining fields initialized from XML file. */ 83 78 u32 guid; 79 + u32 num_rmid; 84 80 size_t mmio_size; 85 81 unsigned int num_events; 86 82 struct pmt_event evts[] __counted_by(num_events); ··· 96 90 static struct event_group energy_0x26696143 = { 97 91 .pfname = "energy", 98 92 .guid = 0x26696143, 93 + .num_rmid = 576, 99 94 .mmio_size = XML_MMIO_SIZE(576, 2, 3), 100 95 .num_events = 2, 101 96 .evts = { ··· 111 104 static struct event_group perf_0x26557651 = { 112 105 .pfname = "perf", 113 106 .guid = 0x26557651, 107 + .num_rmid = 576, 114 108 .mmio_size = XML_MMIO_SIZE(576, 7, 3), 115 109 .num_events = 7, 116 110 .evts = { ··· 206 198 return usable_regions; 207 199 } 208 200 201 + static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p) 202 + { 203 + struct telemetry_region *tr; 204 + 205 + for (int i = 0; i < p->count; i++) { 206 + if (!p->regions[i].addr) 207 + continue; 208 + tr = &p->regions[i]; 209 + if (tr->num_rmids < e->num_rmid) { 210 + e->force_off = true; 211 + return false; 212 + } 213 + } 214 + 215 + return true; 216 + } 217 + 209 218 static bool enable_events(struct event_group *e, struct pmt_feature_group *p) 210 219 { 211 220 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; ··· 234 209 if (!group_has_usable_regions(e, p)) 235 210 return false; 236 211 212 + /* 213 + * Only enable event group with insufficient RMIDs if the user requested 214 + * it from the kernel command line. 215 + */ 216 + if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) { 217 + pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n", 218 + r->name, e->pfname, e->guid); 219 + return false; 220 + } 221 + 222 + for (int i = 0; i < p->count; i++) { 223 + if (!p->regions[i].addr) 224 + continue; 225 + /* 226 + * e->num_rmid only adjusted lower if user (via rdt= kernel 227 + * parameter) forces an event group with insufficient RMID 228 + * to be enabled. 229 + */ 230 + e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids); 231 + } 232 + 237 233 for (int j = 0; j < e->num_events; j++) { 238 234 if (!resctrl_enable_mon_event(e->evts[j].id, true, 239 235 e->evts[j].bin_bits, &e->evts[j])) ··· 264 218 pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); 265 219 return false; 266 220 } 221 + 222 + if (r->mon.num_rmid) 223 + r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid); 224 + else 225 + r->mon.num_rmid = e->num_rmid; 267 226 268 227 return true; 269 228 }
+1 -1
fs/resctrl/rdtgroup.c
··· 1158 1158 { 1159 1159 struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1160 1160 1161 - seq_printf(seq, "%d\n", r->mon.num_rmid); 1161 + seq_printf(seq, "%u\n", r->mon.num_rmid); 1162 1162 1163 1163 return 0; 1164 1164 }
+1 -1
include/linux/resctrl.h
··· 295 295 * events of monitor groups created via mkdir. 296 296 */ 297 297 struct resctrl_mon { 298 - int num_rmid; 298 + u32 num_rmid; 299 299 unsigned int mbm_cfg_mask; 300 300 int num_mbm_cntrs; 301 301 bool mbm_cntr_assignable;