tools/power turbostat: Favor cpu# over core#

Turbostat collects statistics and outputs results in "topology order",
which means it prioritizes the core# over the cpu#.
The strategy is to minimize wakesups to a core -- which is
important when measuring an idle system.

But core order is problematic, because Linux core#'s are physical
(within each package), and thus subject to APIC-id scrambling
that may be done by the hardware or the BIOS.

As a result users may be are faced with rows in a confusing order:

sudo turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1
Core CPU Busy% CPU%c6 UncMHz
- - 1.25 72.18 3400
0 4 7.74 0.00
1 5 1.77 88.59
2 6 0.48 96.73
3 7 0.21 98.34
4 8 0.14 96.85
5 9 0.26 97.55
6 10 0.44 97.24
7 11 0.12 96.18
8 0 5.41 0.31 3400
8 1 0.19
12 2 0.41 0.22
12 3 0.08
32 12 0.04 99.21
33 13 0.25 94.92

Abandon the legacy "core# topology order" in favor of simply
ordering by cpu#, with a special case to handle HT siblings
that may not have adjacent cpu#'s.

sudo ./turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1
1.003001 sec
Core CPU Busy% CPU%c6 UncMHz
- - 1.38 80.55 1600
8 0 10.94 0.00 1600
8 1 0.53
12 2 2.90 0.45
12 3 0.11
0 4 1.96 91.20
1 5 0.97 96.40
2 6 0.24 94.72
3 7 0.31 98.01
4 8 0.20 98.20
5 9 0.62 96.00
6 10 0.06 98.15
7 11 0.12 99.31
32 12 0.04 99.07
33 13 0.27 95.09

The result is that cpu#'s now take precedence over core#'s.

Signed-off-by: Len Brown <len.brown@intel.com>

Len Brown 2 months ago a2b4d0f8 6be5c151

+69 -54

1 changed file

expand all

tools

power

x86

turbostat

turbostat.c

+69 -54

tools/power/x86/turbostat/turbostat.c

··· 2187 2187 #define ODD_COUNTERS odd.threads, odd.cores, odd.packages 2188 2188 #define EVEN_COUNTERS even.threads, even.cores, even.packages 2189 2189 2190 - #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ 2191 - ((thread_base) + \ 2192 - ((pkg_no) * \ 2193 - topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ 2194 - ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ 2195 - ((core_no) * topo.threads_per_core) + \ 2196 - (thread_no)) 2197 - 2198 - #define GET_CORE(core_base, core_no, node_no, pkg_no) \ 2199 - ((core_base) + \ 2200 - ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ 2201 - ((node_no) * topo.cores_per_node) + \ 2202 - (core_no)) 2203 - 2204 2190 /* 2205 2191 * The accumulated sum of MSR is defined as a monotonic 2206 2192 * increasing MSR, it will be accumulated periodically, ··· 2378 2392 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ 2379 2393 } platform_counters_odd, platform_counters_even; 2380 2394 2395 + #define MAX_HT_ID 3 /* support SMT-4 */ 2396 + 2381 2397 struct cpu_topology { 2382 2398 int cpu_id; 2383 2399 int core_id; /* unique within a package */ ··· 2389 2401 int physical_node_id; 2390 2402 int logical_node_id; /* 0-based count within the package */ 2391 2403 int ht_id; /* unique within a core */ 2392 - int ht_sibling_cpu_id; 2404 + int ht_sibling_cpu_id[MAX_HT_ID + 1]; 2393 2405 int type; 2394 2406 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ 2395 2407 } *cpus; ··· 2446 2458 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 2447 2459 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 2448 2460 { 2449 - int retval, pkg_no, core_no, thread_no, node_no; 2461 + int cpu, retval; 2450 2462 2451 2463 retval = 0; 2452 2464 2453 - for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2454 - for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { 2455 - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 2456 - for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 2457 - struct thread_data *t; 2458 - struct core_data *c; 2465 + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 2466 + struct thread_data *t; 2467 + struct core_data *c; 2468 + struct pkg_data *p; 2459 2469 2460 - t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 2470 + int pkg_id = cpus[cpu].package_id; 2461 2471 2462 - if (cpu_is_not_allowed(t->cpu_id)) 2463 - continue; 2472 + if (cpu_is_not_allowed(cpu)) 2473 + continue; 2464 2474 2465 - c = GET_CORE(core_base, core_no, node_no, pkg_no); 2475 + if (cpus[cpu].ht_id > 0) /* skip HT sibling */ 2476 + continue; 2466 2477 2467 - retval |= func(t, c, &pkg_base[pkg_no]); 2468 - } 2469 - } 2478 + t = &thread_base[cpu]; 2479 + c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, pkg_id)]; 2480 + p = &pkg_base[pkg_id]; 2481 + 2482 + retval |= func(t, c, p); 2483 + 2484 + /* Handle HT sibling now */ 2485 + int i; 2486 + 2487 + for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */ 2488 + if (cpus[cpu].ht_sibling_cpu_id[i] <= 0) 2489 + continue; 2490 + t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]]; 2491 + 2492 + retval |= func(t, c, p); 2470 2493 } 2471 2494 } 2472 2495 return retval; ··· 6167 6168 return 0; 6168 6169 } 6169 6170 6170 - int get_thread_siblings(struct cpu_topology *thiscpu) 6171 + int set_thread_siblings(struct cpu_topology *thiscpu) 6171 6172 { 6172 6173 char path[80], character; 6173 6174 FILE *filep; ··· 6205 6206 if (sib_core == thiscpu->core_id) { 6206 6207 CPU_SET_S(so, size, thiscpu->put_ids); 6207 6208 if ((so != cpu) && (cpus[so].ht_id < 0)) { 6208 - cpus[so].ht_id = thread_id++; 6209 - cpus[cpu].ht_sibling_cpu_id = so; 6209 + cpus[so].ht_id = thread_id; 6210 + cpus[cpu].ht_sibling_cpu_id[thread_id] = so; 6211 + if (debug) 6212 + fprintf(stderr, "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n", __func__, cpu, thread_id, so); 6213 + thread_id += 1; 6210 6214 } 6211 6215 } 6212 6216 } ··· 6231 6229 struct core_data *core_base, struct pkg_data *pkg_base, 6232 6230 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) 6233 6231 { 6234 - int retval, pkg_no, node_no, core_no, thread_no; 6232 + int cpu, retval; 6235 6233 6236 6234 retval = 0; 6237 6235 6238 - for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 6239 - for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { 6240 - for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { 6241 - for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { 6242 - struct thread_data *t, *t2; 6243 - struct core_data *c, *c2; 6236 + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 6237 + struct thread_data *t, *t2; 6238 + struct core_data *c, *c2; 6239 + struct pkg_data *p, *p2; 6244 6240 6245 - t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); 6241 + if (cpu_is_not_allowed(cpu)) 6242 + continue; 6246 6243 6247 - if (cpu_is_not_allowed(t->cpu_id)) 6248 - continue; 6244 + if (cpus[cpu].ht_id > 0) /* skip HT sibling */ 6245 + continue; 6249 6246 6250 - t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); 6247 + t = &thread_base[cpu]; 6248 + t2 = &thread_base2[cpu]; 6249 + c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)]; 6250 + c2 = &core_base2[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)]; 6251 + p = &pkg_base[cpus[cpu].package_id]; 6252 + p2 = &pkg_base2[cpus[cpu].package_id]; 6251 6253 6252 - c = GET_CORE(core_base, core_no, node_no, pkg_no); 6253 - c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); 6254 + retval |= func(t, c, p, t2, c2, p2); 6254 6255 6255 - retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]); 6256 - } 6257 - } 6256 + /* Handle HT sibling now */ 6257 + int i; 6258 + 6259 + for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */ 6260 + if (cpus[cpu].ht_sibling_cpu_id[i] <= 0) 6261 + continue; 6262 + t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]]; 6263 + t2 = &thread_base2[cpus[cpu].ht_sibling_cpu_id[i]]; 6264 + 6265 + retval |= func(t, c, p, t2, c2, p2); 6258 6266 } 6259 6267 } 6260 6268 return retval; ··· 6403 6391 return 0; 6404 6392 } 6405 6393 6406 - int init_ht_id(int cpu) 6394 + int clear_ht_id(int cpu) 6407 6395 { 6396 + int i; 6397 + 6408 6398 cpus[cpu].ht_id = -1; 6409 - cpus[cpu].ht_sibling_cpu_id = -1; 6399 + for (i = 0; i <= MAX_HT_ID; ++i) 6400 + cpus[cpu].ht_sibling_cpu_id[i] = -1; 6410 6401 return 0; 6411 6402 } 6412 6403 ··· 9594 9579 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9595 9580 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 9596 9581 9597 - for_all_proc_cpus(init_ht_id); 9582 + for_all_proc_cpus(clear_ht_id); 9598 9583 9599 9584 for_all_proc_cpus(set_cpu_hybrid_type); 9600 9585 ··· 9639 9624 max_core_id = cpus[i].core_id; 9640 9625 9641 9626 /* get thread information */ 9642 - siblings = get_thread_siblings(&cpus[i]); 9627 + siblings = set_thread_siblings(&cpus[i]); 9643 9628 if (siblings > max_siblings) 9644 9629 max_siblings = siblings; 9645 9630 if (cpus[i].ht_id == 0) ··· 9763 9748 if (node_id < 0) 9764 9749 node_id = 0; 9765 9750 9766 - t = GET_THREAD(thread_base, cpus[cpu_id].ht_id, core_id, node_id, pkg_id); 9767 - c = GET_CORE(core_base, core_id, node_id, pkg_id); 9751 + t = &thread_base[cpu_id]; 9752 + c = &core_base[GLOBAL_CORE_ID(core_id, pkg_id)]; 9768 9753 9769 9754 t->cpu_id = cpu_id; 9770 9755 if (!cpu_is_not_allowed(cpu_id)) {

Configure Feed

Configure Feed