Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tools/power turbostat: Add L2 cache statistics

version 2026.02.04

Add support for L2 cache statistics: L2MRPS and L2%hit
L2 statistics join the LLC in the "cache" counter group.

While the underlying LLC perf kernel support was architectural,
L2 perf counters are model-specific:

Support Intel Xeon -- Sapphire Rapids and newer.
Support Intel Atom -- Gracemont and newer.
Support Intel Hybrid -- Alder Lake and newer.

Example:

alder-lake-n$ sudo turbostat --quiet --show CPU,Busy%,cache my_workload
CPU Busy% LLCMRPS LLC%hit L2MRPS L2%hit
- 49.82 1210 85.02 2909 31.63
0 99.14 322 88.89 767 32.38
1 0.91 1 32.47 1 18.86
2 0.20 0 40.78 0 23.34
3 99.17 295 81.79 706 31.89
4 0.68 1 58.71 1 15.61
5 99.16 299 85.65 726 31.32
6 0.08 0 45.35 0 31.71
7 99.21 293 83.63 707 30.92

where "my_workload" is a wrapper for a yogini workload
that has 4 fully-busy threads with 2MB working set each.

Note that analogous to the system summary for multiple LLC systems,
the system summary row for the L2 is the aggregate of all CPUS in the
system -- there is no per-cache roll-up.

Signed-off-by: Len Brown <len.brown@intel.com>

Len Brown dd23bfe4 a9c7a1a2

+405 -51
+5 -1
tools/power/x86/turbostat/turbostat.8
··· 163 163 .PP 164 164 \fBLLCMRPS\fP Last Level Cache Millions of References Per Second. For CPUs with an L3 LLC, this is the number of references that CPU made to the L3 (and the number of misses that CPU made to it's L2). For CPUs with an L2 LLC, this is the number of references to the L2 (and the number of misses to the CPU's L1). The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1,000,000. If this value is large, then the LLC%hit column is significant. If this value is small, then the LLC%hit column is not significant. 165 165 .PP 166 - \fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * (References - Misses)/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_References - Sum_Misses)/Sum_References). 166 + \fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * Hits/References. The system summary row shows the weighted average for all CPUs (100.0 * Sum_Hits/Sum_References). 167 + .PP 168 + \fBL2MRPS\fP Level-2 Cache Millions of References Per Second. For CPUs with an L2 LLC, this is the same as LLC references. The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1,000,000. If this value is large, then the L2%hit column is significant. If this value is small, then the L2%hit column is not significant. 169 + .PP 170 + \fBL2%hit\fP Level-2 Cache Hit Rate %. Hit Rate Percent = 100.0 * Hits/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_Hits)/Sum_References). 167 171 .PP 168 172 \fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. 169 173 .PP
+400 -50
tools/power/x86/turbostat/turbostat.c
··· 212 212 { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, 213 213 { 0x0, "LLCMRPS", NULL, 0, 0, 0, NULL, 0 }, 214 214 { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 }, 215 + { 0x0, "L2MRPS", NULL, 0, 0, 0, NULL, 0 }, 216 + { 0x0, "L2%hit", NULL, 0, 0, 0, NULL, 0 }, 215 217 }; 216 218 217 219 /* n.b. bic_names must match the order in bic[], above */ ··· 285 283 BIC_pct_idle, 286 284 BIC_LLC_MRPS, 287 285 BIC_LLC_HIT, 286 + BIC_L2_MRPS, 287 + BIC_L2_HIT, 288 288 MAX_BIC 289 289 }; 290 290 ··· 298 294 299 295 printf("%s:", s); 300 296 301 - for (i = 0; i <= MAX_BIC; ++i) { 297 + for (i = 0; i < MAX_BIC; ++i) { 302 298 303 - if (CPU_ISSET(i, set)) { 304 - assert(i < MAX_BIC); 299 + if (CPU_ISSET(i, set)) 305 300 printf(" %s", bic[i].name); 306 - } 307 301 } 308 302 putchar('\n'); 309 303 } ··· 428 426 BIC_INIT(&bic_group_cache); 429 427 SET_BIC(BIC_LLC_MRPS, &bic_group_cache); 430 428 SET_BIC(BIC_LLC_HIT, &bic_group_cache); 429 + SET_BIC(BIC_L2_MRPS, &bic_group_cache); 430 + SET_BIC(BIC_L2_HIT, &bic_group_cache); 431 431 432 432 BIC_INIT(&bic_group_other); 433 433 SET_BIC(BIC_IRQ, &bic_group_other); ··· 486 482 int *fd_percpu; 487 483 int *fd_instr_count_percpu; 488 484 int *fd_llc_percpu; 485 + int *fd_l2_percpu; 489 486 struct timeval interval_tv = { 5, 0 }; 490 487 struct timespec interval_ts = { 5, 0 }; 491 488 ··· 1254 1249 { 0, NULL }, 1255 1250 }; 1256 1251 1252 + struct { 1253 + unsigned int uniform; 1254 + unsigned int pcore; 1255 + unsigned int ecore; 1256 + unsigned int lcore; 1257 + } perf_pmu_types; 1258 + 1259 + /* 1260 + * Events are enumerated in https://github.com/intel/perfmon 1261 + * and tools/perf/pmu-events/arch/x86/.../cache.json 1262 + */ 1263 + struct perf_l2_events { 1264 + unsigned long long refs; /* L2_REQUEST.ALL */ 1265 + unsigned long long hits; /* L2_REQUEST.HIT */ 1266 + }; 1267 + 1268 + struct perf_model_support { 1269 + unsigned int vfm; 1270 + struct perf_l2_events first; 1271 + struct perf_l2_events second; 1272 + struct perf_l2_events third; 1273 + } *perf_model_support; 1274 + 1275 + /* Perf Cache Events */ 1276 + #define PCE(ext_umask, umask) (((unsigned long long) ext_umask) << 40 | umask << 8 | 0x24) 1277 + 1278 + /* 1279 + * Enumerate up to three perf CPU PMU's in a system. 1280 + * The first, second, and third columns are populated without skipping, describing 1281 + * pcore, ecore, lcore PMUs, in order, if present. (The associated PMU "type" field is 1282 + * read from sysfs in all cases.) Eg. 1283 + * 1284 + * non-hybrid: 1285 + * GNR: pcore, {}, {} 1286 + * ADL-N: ecore, {}, {} 1287 + * hybrid: 1288 + * MTL: pcore, ecore, {}% 1289 + * ARL-H: pcore, ecore, lcore 1290 + * LNL: ecore, ecore%%, {} 1291 + * 1292 + * % MTL physical lcore share architecture and PMU with ecore, and are thus not enumerated separately. 1293 + * %% LNL physical lcore is enumerated by perf as ecore 1294 + */ 1295 + static struct perf_model_support turbostat_perf_model_support[] = { 1296 + { INTEL_SAPPHIRERAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} }, 1297 + { INTEL_EMERALDRAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} }, 1298 + { INTEL_GRANITERAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} }, 1299 + { INTEL_GRANITERAPIDS_D, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, {}, {} }, 1300 + { INTEL_DIAMONDRAPIDS_X, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, {}, {} }, 1301 + 1302 + { INTEL_ATOM_GRACEMONT, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* ADL-N */ 1303 + { INTEL_ATOM_CRESTMONT_X, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* SRF */ 1304 + { INTEL_ATOM_CRESTMONT, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {}, {} }, /* GRR */ 1305 + { INTEL_ATOM_DARKMONT_X, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {}, {} }, /* CWF */ 1306 + 1307 + { INTEL_ALDERLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1308 + { INTEL_ALDERLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1309 + { INTEL_ALDERLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1310 + { INTEL_RAPTORLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1311 + { INTEL_RAPTORLAKE_P, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1312 + { INTEL_RAPTORLAKE_S, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1313 + { INTEL_METEORLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1314 + { INTEL_METEORLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1315 + { INTEL_ARROWLAKE_U, { PCE(0x00, 0xFF), PCE(0x00, 0xDF)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)}, {} }, 1316 + 1317 + { INTEL_LUNARLAKE_M, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, {} }, 1318 + { INTEL_ARROWLAKE_H, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, { PCE(0x00, 0x00), PCE(0x00, 0x02)} }, 1319 + { INTEL_ARROWLAKE, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x00, 0x07), PCE(0x00, 0x02)}, {} }, 1320 + 1321 + { INTEL_PANTHERLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} }, 1322 + { INTEL_WILDCATLAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} }, 1323 + 1324 + { INTEL_NOVALAKE, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} }, 1325 + { INTEL_NOVALAKE_L, { PCE(0x00, 0xFF), PCE(0x00, 0x5F)}, { PCE(0x01, 0xFF), PCE(0x01, 0xBF)}, {} }, 1326 + 1327 + { 0, {}, {}, {} } 1328 + }; 1329 + 1257 1330 static const struct platform_features *platform; 1258 1331 1259 1332 void probe_platform_features(unsigned int family, unsigned int model) ··· 1375 1292 exit(1); 1376 1293 } 1377 1294 1295 + void init_perf_model_support(unsigned int family, unsigned int model) 1296 + { 1297 + int i; 1298 + 1299 + if (!genuine_intel) 1300 + return; 1301 + 1302 + for (i = 0; turbostat_perf_model_support[i].vfm; i++) { 1303 + if (VFM_FAMILY(turbostat_perf_model_support[i].vfm) == family && VFM_MODEL(turbostat_perf_model_support[i].vfm) == model) { 1304 + perf_model_support = &turbostat_perf_model_support[i]; 1305 + return; 1306 + } 1307 + } 1308 + } 1309 + 1378 1310 /* Model specific support End */ 1379 1311 1380 1312 #define TJMAX_DEFAULT 100 ··· 1406 1308 1407 1309 #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ 1408 1310 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; 1311 + cpu_set_t *perf_pcore_set, *perf_ecore_set, *perf_lcore_set; 1409 1312 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; 1410 1313 #define MAX_ADDED_THREAD_COUNTERS 24 1411 1314 #define MAX_ADDED_CORE_COUNTERS 8 ··· 2107 2008 unsigned long long references; 2108 2009 unsigned long long misses; 2109 2010 }; 2011 + struct l2_stats { 2012 + unsigned long long references; 2013 + unsigned long long hits; 2014 + }; 2110 2015 struct thread_data { 2111 2016 struct timeval tv_begin; 2112 2017 struct timeval tv_end; ··· 2124 2021 unsigned long long nmi_count; 2125 2022 unsigned int smi_count; 2126 2023 struct llc_stats llc; 2024 + struct l2_stats l2; 2127 2025 unsigned int cpu_id; 2128 2026 unsigned int apic_id; 2129 2027 unsigned int x2apic_id; ··· 2546 2442 CLR_BIC(BIC_IPC, &bic_enabled); 2547 2443 CLR_BIC(BIC_LLC_MRPS, &bic_enabled); 2548 2444 CLR_BIC(BIC_LLC_HIT, &bic_enabled); 2445 + CLR_BIC(BIC_L2_MRPS, &bic_enabled); 2446 + CLR_BIC(BIC_L2_HIT, &bic_enabled); 2549 2447 } 2550 2448 2551 2449 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) ··· 2926 2820 if (DO_BIC(BIC_LLC_HIT)) 2927 2821 outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : "")); 2928 2822 2823 + if (DO_BIC(BIC_L2_MRPS)) 2824 + outp += sprintf(outp, "%sL2MRPS", (printed++ ? delim : "")); 2825 + 2826 + if (DO_BIC(BIC_L2_HIT)) 2827 + outp += sprintf(outp, "%sL2%%hit", (printed++ ? delim : "")); 2828 + 2929 2829 for (mp = sys.tp; mp; mp = mp->next) 2930 2830 outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); 2931 2831 ··· 3169 3057 outp += sprintf(outp, "LLC miss: %lld", t->llc.misses); 3170 3058 outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses), t->llc.references)); 3171 3059 3060 + outp += sprintf(outp, "L2 refs: %lld", t->l2.references); 3061 + outp += sprintf(outp, "L2 hits: %lld", t->l2.hits); 3062 + outp += sprintf(outp, "L2 Hit%%: %.2f", pct(t->l2.hits, t->l2.references)); 3063 + 3172 3064 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 3173 3065 outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path); 3174 3066 } ··· 3272 3156 llc->misses = r.llc.misses; 3273 3157 if (actual_read_size != expected_read_size) 3274 3158 warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size); 3159 + } 3160 + 3161 + void get_perf_l2_stats(int cpu, struct l2_stats *l2) 3162 + { 3163 + struct read_format { 3164 + unsigned long long num_read; 3165 + struct l2_stats l2; 3166 + } r; 3167 + const ssize_t expected_read_size = sizeof(r); 3168 + ssize_t actual_read_size; 3169 + 3170 + actual_read_size = read(fd_l2_percpu[cpu], &r, expected_read_size); 3171 + 3172 + if (actual_read_size == -1) 3173 + err(-1, "%s(cpu%d,) %d,,%ld", __func__, cpu, fd_l2_percpu[cpu], expected_read_size); 3174 + 3175 + l2->references = r.l2.references; 3176 + l2->hits = r.l2.hits; 3177 + if (actual_read_size != expected_read_size) 3178 + warn("%s: cpu%d: failed to read(%d) perf_data (req %zu act %zu)", __func__, cpu, fd_l2_percpu[cpu], expected_read_size, actual_read_size); 3275 3179 } 3276 3180 3277 3181 /* ··· 3442 3306 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); 3443 3307 3444 3308 /* LLC Stats */ 3445 - if (DO_BIC(BIC_LLC_MRPS) || DO_BIC(BIC_LLC_HIT)) { 3446 - if (DO_BIC(BIC_LLC_MRPS)) 3447 - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000000); 3309 + if (DO_BIC(BIC_LLC_MRPS)) 3310 + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000000); 3448 3311 3449 - if (DO_BIC(BIC_LLC_HIT)) 3450 - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses), t->llc.references)); 3451 - } 3312 + if (DO_BIC(BIC_LLC_HIT)) 3313 + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses), t->llc.references)); 3314 + 3315 + /* L2 Stats */ 3316 + if (DO_BIC(BIC_L2_MRPS)) 3317 + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->l2.references / interval_float / 1000000); 3318 + 3319 + if (DO_BIC(BIC_L2_HIT)) 3320 + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct(t->l2.hits, t->l2.references)); 3452 3321 3453 3322 /* Added Thread Counters */ 3454 3323 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { ··· 3996 3855 if (DO_BIC(BIC_SMI)) 3997 3856 old->smi_count = new->smi_count - old->smi_count; 3998 3857 3999 - if (DO_BIC(BIC_LLC_MRPS)) 3858 + if (DO_BIC(BIC_LLC_MRPS) || DO_BIC(BIC_LLC_HIT)) 4000 3859 old->llc.references = new->llc.references - old->llc.references; 4001 3860 4002 3861 if (DO_BIC(BIC_LLC_HIT)) 4003 3862 old->llc.misses = new->llc.misses - old->llc.misses; 3863 + 3864 + if (DO_BIC(BIC_L2_MRPS) || DO_BIC(BIC_L2_HIT)) 3865 + old->l2.references = new->l2.references - old->l2.references; 3866 + 3867 + if (DO_BIC(BIC_L2_HIT)) 3868 + old->l2.hits = new->l2.hits - old->l2.hits; 4004 3869 4005 3870 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4006 3871 if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) ··· 4088 3941 t->llc.references = 0; 4089 3942 t->llc.misses = 0; 4090 3943 3944 + t->l2.references = 0; 3945 + t->l2.hits = 0; 3946 + 4091 3947 c->c3 = 0; 4092 3948 c->c6 = 0; 4093 3949 c->c7 = 0; ··· 4098 3948 c->core_temp_c = 0; 4099 3949 rapl_counter_clear(&c->core_energy); 4100 3950 c->core_throt_cnt = 0; 4101 - 4102 - t->llc.references = 0; 4103 - t->llc.misses = 0; 4104 3951 4105 3952 p->pkg_wtd_core_c0 = 0; 4106 3953 p->pkg_any_core_c0 = 0; ··· 4198 4051 4199 4052 average.threads.llc.references += t->llc.references; 4200 4053 average.threads.llc.misses += t->llc.misses; 4054 + 4055 + average.threads.l2.references += t->l2.references; 4056 + average.threads.l2.hits += t->l2.hits; 4201 4057 4202 4058 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { 4203 4059 if (mp->format == FORMAT_RAW) ··· 5220 5070 if (DO_BIC(BIC_LLC_MRPS) || DO_BIC(BIC_LLC_HIT)) 5221 5071 get_perf_llc_stats(cpu, &t->llc); 5222 5072 5073 + if (DO_BIC(BIC_L2_MRPS) || DO_BIC(BIC_L2_HIT)) 5074 + get_perf_l2_stats(cpu, &t->l2); 5075 + 5223 5076 if (DO_BIC(BIC_IPC)) 5224 5077 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) 5225 5078 return -4; ··· 5838 5685 5839 5686 free(fd_llc_percpu); 5840 5687 fd_llc_percpu = NULL; 5688 + 5689 + BIC_NOT_PRESENT(BIC_LLC_MRPS); 5690 + BIC_NOT_PRESENT(BIC_LLC_HIT); 5691 + } 5692 + 5693 + void free_fd_l2_percpu(void) 5694 + { 5695 + if (!fd_l2_percpu) 5696 + return; 5697 + 5698 + for (int i = 0; i < topo.max_cpu_num + 1; ++i) { 5699 + if (fd_l2_percpu[i] != 0) 5700 + close(fd_l2_percpu[i]); 5701 + } 5702 + 5703 + free(fd_l2_percpu); 5704 + fd_l2_percpu = NULL; 5705 + 5706 + BIC_NOT_PRESENT(BIC_L2_MRPS); 5707 + BIC_NOT_PRESENT(BIC_L2_HIT); 5841 5708 } 5842 5709 5843 5710 void free_fd_cstate(void) ··· 5962 5789 cpu_affinity_set = NULL; 5963 5790 cpu_affinity_setsize = 0; 5964 5791 5792 + if (perf_pcore_set) { 5793 + CPU_FREE(perf_pcore_set); 5794 + perf_pcore_set = NULL; 5795 + } 5796 + 5797 + if (perf_ecore_set) { 5798 + CPU_FREE(perf_ecore_set); 5799 + perf_ecore_set = NULL; 5800 + } 5801 + 5802 + if (perf_lcore_set) { 5803 + CPU_FREE(perf_lcore_set); 5804 + perf_lcore_set = NULL; 5805 + } 5806 + 5965 5807 free(thread_even); 5966 5808 free(core_even); 5967 5809 free(package_even); ··· 6000 5812 free_fd_percpu(); 6001 5813 free_fd_instr_count_percpu(); 6002 5814 free_fd_llc_percpu(); 5815 + free_fd_l2_percpu(); 6003 5816 free_fd_msr(); 6004 5817 free_fd_rapl_percpu(); 6005 5818 free_fd_cstate(); ··· 6348 6159 void rapl_perf_init(void); 6349 6160 void cstate_perf_init(void); 6350 6161 void perf_llc_init(void); 6162 + void perf_l2_init(void); 6351 6163 void added_perf_counters_init(void); 6352 6164 void pmt_init(void); 6353 6165 ··· 6361 6171 rapl_perf_init(); 6362 6172 cstate_perf_init(); 6363 6173 perf_llc_init(); 6174 + perf_l2_init(); 6364 6175 added_perf_counters_init(); 6365 6176 pmt_init(); 6366 6177 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); ··· 8524 8333 return ret; 8525 8334 } 8526 8335 8336 + char cpuset_buf[1024]; 8337 + int initialize_cpu_set_from_sysfs(cpu_set_t *cpu_set, char *sysfs_path, char *sysfs_file) 8338 + { 8339 + FILE *fp; 8340 + char path[128]; 8341 + 8342 + if (snprintf(path, 128, "%s/%s", sysfs_path, sysfs_file) > 128) 8343 + err(-1, "%s %s", sysfs_path, sysfs_file); 8344 + 8345 + fp = fopen(path, "r"); 8346 + if (!fp) { 8347 + warn("open %s", path); 8348 + return -1; 8349 + } 8350 + if (fread(cpuset_buf, sizeof(char), 1024, fp) == 0) { 8351 + warn("read %s", sysfs_path); 8352 + goto err; 8353 + } 8354 + if (parse_cpu_str(cpuset_buf, cpu_set, cpu_possible_setsize)) { 8355 + warnx("%s: cpu str malformat %s\n", sysfs_path, cpu_effective_str); 8356 + goto err; 8357 + } 8358 + return 0; 8359 + 8360 + err: 8361 + fclose(fp); 8362 + return -1; 8363 + } 8364 + 8365 + void print_cpu_set(char *s, cpu_set_t *set) 8366 + { 8367 + int i; 8368 + 8369 + assert(MAX_BIC < CPU_SETSIZE); 8370 + 8371 + printf("%s:", s); 8372 + 8373 + for (i = 0; i <= topo.max_cpu_num; ++i) 8374 + if (CPU_ISSET(i, set)) 8375 + printf(" %d", i); 8376 + putchar('\n'); 8377 + } 8378 + 8379 + void linux_perf_init_hybrid_cpus(void) 8380 + { 8381 + char *perf_cpu_pcore_path = "/sys/devices/cpu_core"; 8382 + char *perf_cpu_ecore_path = "/sys/devices/cpu_atom"; 8383 + char *perf_cpu_lcore_path = "/sys/devices/cpu_lowpower"; 8384 + char path[128]; 8385 + 8386 + if (!access(perf_cpu_pcore_path, F_OK)) { 8387 + perf_pcore_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8388 + if (perf_pcore_set == NULL) 8389 + err(3, "CPU_ALLOC"); 8390 + CPU_ZERO_S(cpu_possible_setsize, perf_pcore_set); 8391 + initialize_cpu_set_from_sysfs(perf_pcore_set, perf_cpu_pcore_path, "cpus"); 8392 + if (debug) 8393 + print_cpu_set("perf pcores", perf_pcore_set); 8394 + sprintf(path, "%s/%s", perf_cpu_pcore_path, "type"); 8395 + perf_pmu_types.pcore = snapshot_sysfs_counter(path); 8396 + } 8397 + 8398 + if (!access(perf_cpu_ecore_path, F_OK)) { 8399 + perf_ecore_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8400 + if (perf_ecore_set == NULL) 8401 + err(3, "CPU_ALLOC"); 8402 + CPU_ZERO_S(cpu_possible_setsize, perf_ecore_set); 8403 + initialize_cpu_set_from_sysfs(perf_ecore_set, perf_cpu_ecore_path, "cpus"); 8404 + if (debug) 8405 + print_cpu_set("perf ecores", perf_ecore_set); 8406 + sprintf(path, "%s/%s", perf_cpu_ecore_path, "type"); 8407 + perf_pmu_types.ecore = snapshot_sysfs_counter(path); 8408 + } 8409 + 8410 + if (!access(perf_cpu_lcore_path, F_OK)) { 8411 + perf_lcore_set = CPU_ALLOC((topo.max_cpu_num + 1)); 8412 + if (perf_lcore_set == NULL) 8413 + err(3, "CPU_ALLOC"); 8414 + CPU_ZERO_S(cpu_possible_setsize, perf_lcore_set); 8415 + initialize_cpu_set_from_sysfs(perf_lcore_set, perf_cpu_lcore_path, "cpus"); 8416 + if (debug) 8417 + print_cpu_set("perf lcores", perf_lcore_set); 8418 + sprintf(path, "%s/%s", perf_cpu_lcore_path, "type"); 8419 + perf_pmu_types.lcore = snapshot_sysfs_counter(path); 8420 + } 8421 + } 8422 + 8527 8423 /* 8528 - * Linux-perf manages the HW instructions-retired counter 8529 - * by enabling when requested, and hiding rollover 8424 + * Linux-perf related initialization 8530 8425 */ 8531 8426 void linux_perf_init(void) 8532 8427 { 8428 + char path[128]; 8429 + char *perf_cpu_path = "/sys/devices/cpu"; 8430 + 8533 8431 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) 8534 8432 return; 8433 + 8434 + if (!access(perf_cpu_path, F_OK)) { 8435 + sprintf(path, "%s/%s", perf_cpu_path, "type"); 8436 + perf_pmu_types.uniform = snapshot_sysfs_counter(path); 8437 + } else { 8438 + linux_perf_init_hybrid_cpus(); 8439 + } 8535 8440 8536 8441 if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) { 8537 8442 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8538 8443 if (fd_instr_count_percpu == NULL) 8539 8444 err(-1, "calloc fd_instr_count_percpu"); 8540 8445 } 8541 - if (BIC_IS_ENABLED(BIC_LLC_MRPS)) { 8446 + if (BIC_IS_ENABLED(BIC_LLC_MRPS) || BIC_IS_ENABLED(BIC_LLC_HIT)) { 8542 8447 fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8543 8448 if (fd_llc_percpu == NULL) 8544 8449 err(-1, "calloc fd_llc_percpu"); 8450 + } 8451 + if (BIC_IS_ENABLED(BIC_L2_MRPS) || BIC_IS_ENABLED(BIC_L2_HIT)) { 8452 + fd_l2_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); 8453 + if (fd_l2_percpu == NULL) 8454 + err(-1, "calloc fd_l2_percpu"); 8545 8455 } 8546 8456 } 8547 8457 ··· 9075 8783 for_all_cpus(print_epb, ODD_COUNTERS); 9076 8784 for_all_cpus(print_perf_limit, ODD_COUNTERS); 9077 8785 } 8786 + 9078 8787 void dump_word_chars(unsigned int word) 9079 8788 { 9080 8789 int i; ··· 9083 8790 for (i = 0; i < 4; ++i) 9084 8791 fprintf(outf, "%c", (word >> (i * 8)) & 0xFF); 9085 8792 } 8793 + 9086 8794 void dump_cpuid_hypervisor(void) 9087 8795 { 9088 8796 unsigned int ebx = 0; ··· 9169 8875 dump_cpuid_hypervisor(); 9170 8876 9171 8877 probe_platform_features(family, model); 8878 + init_perf_model_support(family, model); 9172 8879 9173 8880 if (!(edx_flags & (1 << 5))) 9174 8881 errx(1, "CPUID: no MSR"); ··· 9336 9041 decode_misc_feature_control(); 9337 9042 } 9338 9043 9339 - /* perf_llc_probe 9044 + /* 9045 + * has_perf_llc_access() 9340 9046 * 9341 9047 * return 1 on success, else 0 9342 9048 */ ··· 9366 9070 9367 9071 if (no_perf) 9368 9072 return; 9369 - if (!(BIC_IS_ENABLED(BIC_LLC_MRPS) && BIC_IS_ENABLED(BIC_LLC_HIT))) 9073 + if (!(BIC_IS_ENABLED(BIC_LLC_MRPS) || BIC_IS_ENABLED(BIC_LLC_HIT))) 9370 9074 return; 9075 + 9076 + assert(fd_llc_percpu != 0); 9371 9077 9372 9078 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 9373 9079 9374 9080 if (cpu_is_not_allowed(cpu)) 9375 9081 continue; 9376 9082 9377 - assert(fd_llc_percpu != 0); 9378 9083 fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); 9379 9084 if (fd_llc_percpu[cpu] == -1) { 9380 9085 warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu); 9381 9086 free_fd_llc_percpu(); 9382 9087 return; 9383 9088 } 9384 - assert(fd_llc_percpu != 0); 9385 9089 retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP); 9386 9090 if (retval == -1) { 9387 9091 warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu); ··· 9391 9095 } 9392 9096 BIC_PRESENT(BIC_LLC_MRPS); 9393 9097 BIC_PRESENT(BIC_LLC_HIT); 9098 + } 9099 + 9100 + void perf_l2_init(void) 9101 + { 9102 + int cpu; 9103 + int retval; 9104 + 9105 + if (no_perf) 9106 + return; 9107 + if (!(BIC_IS_ENABLED(BIC_L2_MRPS) || BIC_IS_ENABLED(BIC_L2_HIT))) 9108 + return; 9109 + if (perf_model_support == NULL) 9110 + return; 9111 + 9112 + assert(fd_l2_percpu != 0); 9113 + 9114 + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 9115 + 9116 + if (cpu_is_not_allowed(cpu)) 9117 + continue; 9118 + 9119 + if (!is_hybrid) { 9120 + fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP); 9121 + if (fd_l2_percpu[cpu] == -1) { 9122 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.refs); 9123 + free_fd_l2_percpu(); 9124 + return; 9125 + } 9126 + retval = open_perf_counter(cpu, perf_pmu_types.uniform, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); 9127 + if (retval == -1) { 9128 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.uniform, perf_model_support->first.hits); 9129 + free_fd_l2_percpu(); 9130 + return; 9131 + } 9132 + continue; 9133 + } 9134 + if (perf_pcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_pcore_set)) { 9135 + fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.refs, -1, PERF_FORMAT_GROUP); 9136 + if (fd_l2_percpu[cpu] == -1) { 9137 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.refs); 9138 + free_fd_l2_percpu(); 9139 + return; 9140 + } 9141 + retval = open_perf_counter(cpu, perf_pmu_types.pcore, perf_model_support->first.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); 9142 + if (retval == -1) { 9143 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->first.hits); 9144 + free_fd_l2_percpu(); 9145 + return; 9146 + } 9147 + } else if (perf_ecore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_ecore_set)) { 9148 + fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.refs, -1, PERF_FORMAT_GROUP); 9149 + if (fd_l2_percpu[cpu] == -1) { 9150 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.refs); 9151 + free_fd_l2_percpu(); 9152 + return; 9153 + } 9154 + retval = open_perf_counter(cpu, perf_pmu_types.ecore, perf_model_support->second.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); 9155 + if (retval == -1) { 9156 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->second.hits); 9157 + free_fd_l2_percpu(); 9158 + return; 9159 + } 9160 + } else if (perf_lcore_set && CPU_ISSET_S(cpu, cpu_possible_setsize, perf_lcore_set)) { 9161 + fd_l2_percpu[cpu] = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.refs, -1, PERF_FORMAT_GROUP); 9162 + if (fd_l2_percpu[cpu] == -1) { 9163 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) REFS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.refs); 9164 + free_fd_l2_percpu(); 9165 + return; 9166 + } 9167 + retval = open_perf_counter(cpu, perf_pmu_types.lcore, perf_model_support->third.hits, fd_l2_percpu[cpu], PERF_FORMAT_GROUP); 9168 + if (retval == -1) { 9169 + err(-1, "%s(cpu%d, 0x%x, 0x%llx) HITS", __func__, cpu, perf_pmu_types.pcore, perf_model_support->third.hits); 9170 + free_fd_l2_percpu(); 9171 + return; 9172 + } 9173 + } else 9174 + err(-1, "%s: cpu%d: type %d", __func__, cpu, cpus[cpu].type); 9175 + } 9176 + BIC_PRESENT(BIC_L2_MRPS); 9177 + BIC_PRESENT(BIC_L2_HIT); 9394 9178 } 9395 9179 9396 9180 /* ··· 9483 9107 return 1; 9484 9108 else 9485 9109 return 0; 9486 - } 9487 - 9488 - char *possible_file = "/sys/devices/system/cpu/possible"; 9489 - char possible_buf[1024]; 9490 - 9491 - int initialize_cpu_possible_set(void) 9492 - { 9493 - FILE *fp; 9494 - 9495 - fp = fopen(possible_file, "r"); 9496 - if (!fp) { 9497 - warn("open %s", possible_file); 9498 - return -1; 9499 - } 9500 - if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { 9501 - warn("read %s", possible_file); 9502 - goto err; 9503 - } 9504 - if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { 9505 - warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); 9506 - goto err; 9507 - } 9508 - return 0; 9509 - 9510 - err: 9511 - fclose(fp); 9512 - return -1; 9513 9110 } 9514 9111 9515 9112 void topology_probe(bool startup) ··· 9524 9175 err(3, "CPU_ALLOC"); 9525 9176 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9526 9177 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); 9527 - initialize_cpu_possible_set(); 9178 + initialize_cpu_set_from_sysfs(cpu_possible_set, "/sys/devices/system/cpu", "possible"); 9528 9179 9529 9180 /* 9530 9181 * Allocate and initialize cpu_effective_set ··· 10427 10078 rapl_perf_init(); 10428 10079 cstate_perf_init(); 10429 10080 perf_llc_init(); 10081 + perf_l2_init(); 10430 10082 added_perf_counters_init(); 10431 10083 pmt_init(); 10432 10084 ··· 10533 10183 10534 10184 void print_version() 10535 10185 { 10536 - fprintf(outf, "turbostat version 2025.12.05 - Len Brown <lenb@kernel.org>\n"); 10186 + fprintf(outf, "turbostat version 2026.02.04 - Len Brown <lenb@kernel.org>\n"); 10537 10187 } 10538 10188 10539 10189 #define COMMAND_LINE_SIZE 2048