Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

tools/power turbostat: Fix --cpu-set 1 regression on HT systems

When the "--cpu-set" option limits turbostat to run on
a higher numbered HT sibling, it exits upon dividing by zero.

This is because the HT support handles higher numbered siblings
at the same time as lower numbered siblings. But when that lower
number sibling is dis-allowed, the higher numbered sibling is
never processed. The result is a time delta of 0, which results
in a divide by 0 for any of the "per-second" metrics.

Enhance the HT enumeration code to record all siblings (up to SMT4).
Consult this complete HT sibling list to determine when
to process an HT sibling, and when to skip it.

Fixes: a2b4d0f8bf07 ("tools/power turbostat: Favor cpu# over core#")
Signed-off-by: Len Brown <len.brown@intel.com>

+55 -15
+55 -15
tools/power/x86/turbostat/turbostat.c
··· 2449 2449 2450 2450 #define PER_THREAD_PARAMS struct thread_data *t, struct core_data *c, struct pkg_data *p 2451 2451 2452 + int has_allowed_lower_ht_sibling(int cpu) 2453 + { 2454 + int i; 2455 + 2456 + for (i = 0; i <= cpus[cpu].ht_id; ++i) { 2457 + int sibling_cpu_id = cpus[cpu].ht_sibling_cpu_id[i]; 2458 + 2459 + if (sibling_cpu_id == cpu) 2460 + return 0; 2461 + 2462 + if (!cpu_is_not_allowed(sibling_cpu_id)) 2463 + return 1; 2464 + } 2465 + return 0; 2466 + } 2467 + 2452 2468 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), 2453 2469 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 2454 2470 { ··· 2482 2466 if (cpu_is_not_allowed(cpu)) 2483 2467 continue; 2484 2468 2485 - if (cpus[cpu].ht_id > 0) /* skip HT sibling */ 2469 + if (has_allowed_lower_ht_sibling(cpu)) /* skip HT sibling */ 2486 2470 continue; 2487 2471 2488 2472 t = &thread_base[cpu]; ··· 2491 2475 2492 2476 retval |= func(t, c, p); 2493 2477 2494 - /* Handle HT sibling now */ 2478 + /* Handle other HT siblings now */ 2495 2479 int i; 2496 2480 2497 - for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */ 2481 + for (i = 0; i <= MAX_HT_ID; ++i) { 2498 2482 int sibling_cpu_id = cpus[cpu].ht_sibling_cpu_id[i]; 2483 + 2484 + if (sibling_cpu_id < 0) 2485 + break; 2486 + 2487 + if (sibling_cpu_id == cpu) 2488 + continue; 2499 2489 2500 2490 if (cpu_is_not_allowed(sibling_cpu_id)) 2501 2491 continue; ··· 6200 6178 int cpu = thiscpu->cpu_id; 6201 6179 int offset = topo.max_cpu_num + 1; 6202 6180 size_t size; 6203 - int thread_id = 0; 6181 + int ht_id = 0; 6204 6182 6205 6183 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); 6206 6184 if (thiscpu->ht_id < 0) 6207 - thiscpu->ht_id = thread_id++; 6185 + thiscpu->ht_id = 0; /* first CPU in core */ 6208 6186 if (!thiscpu->put_ids) 6209 6187 return -1; 6210 6188 ··· 6228 6206 sib_core = get_core_id(so); 6229 6207 if (sib_core == thiscpu->core_id) { 6230 6208 CPU_SET_S(so, size, thiscpu->put_ids); 6231 - if ((so != cpu) && (cpus[so].ht_id < 0)) { 6232 - cpus[so].ht_id = thread_id; 6233 - cpus[cpu].ht_sibling_cpu_id[thread_id] = so; 6234 - if (debug) 6235 - fprintf(stderr, "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n", __func__, cpu, thread_id, so); 6236 - thread_id += 1; 6237 - } 6209 + cpus[so].ht_id = ht_id; 6210 + cpus[cpu].ht_sibling_cpu_id[ht_id] = so; 6211 + ht_id += 1; 6238 6212 } 6239 6213 } 6240 6214 } ··· 6263 6245 if (cpu_is_not_allowed(cpu)) 6264 6246 continue; 6265 6247 6266 - if (cpus[cpu].ht_id > 0) /* skip HT sibling */ 6248 + if (has_allowed_lower_ht_sibling(cpu)) /* skip HT sibling */ 6267 6249 continue; 6268 6250 6269 6251 t = &thread_base[cpu]; ··· 6278 6260 /* Handle HT sibling now */ 6279 6261 int i; 6280 6262 6281 - for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */ 6263 + for (i = 0; i <= MAX_HT_ID; ++i) { 6282 6264 int sibling_cpu_id = cpus[cpu].ht_sibling_cpu_id[i]; 6265 + 6266 + if (sibling_cpu_id < 0) 6267 + break; 6268 + 6269 + if (sibling_cpu_id == cpu) 6270 + continue; 6283 6271 6284 6272 if (cpu_is_not_allowed(sibling_cpu_id)) 6285 6273 continue; ··· 9541 9517 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9542 9518 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 9543 9519 for_all_proc_cpus(mark_cpu_present); 9520 + if (debug) 9521 + print_cpu_set("present set", cpu_present_set); 9544 9522 9545 9523 /* 9546 9524 * Allocate and initialize cpu_possible_set ··· 9553 9527 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9554 9528 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); 9555 9529 initialize_cpu_set_from_sysfs(cpu_possible_set, "/sys/devices/system/cpu", "possible"); 9530 + if (debug) 9531 + print_cpu_set("possible set", cpu_possible_set); 9556 9532 9557 9533 /* 9558 9534 * Allocate and initialize cpu_effective_set ··· 9565 9537 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 9566 9538 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set); 9567 9539 update_effective_set(startup); 9540 + if (debug) 9541 + print_cpu_set("effective set", cpu_effective_set); 9568 9542 9569 9543 /* 9570 9544 * Allocate and initialize cpu_allowed_set ··· 9610 9580 9611 9581 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set); 9612 9582 } 9583 + if (debug) 9584 + print_cpu_set("allowed set", cpu_allowed_set); 9613 9585 9614 9586 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set)) 9615 9587 err(-ENODEV, "No valid cpus found"); ··· 9715 9683 return; 9716 9684 9717 9685 for (i = 0; i <= topo.max_cpu_num; ++i) { 9686 + int ht_id; 9687 + 9718 9688 if (cpu_is_not_present(i)) 9719 9689 continue; 9720 9690 fprintf(outf, 9721 - "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d thread %d\n", 9691 + "cpu %d pkg %d die %d l3 %d node %d lnode %d core %d ht_id %d", 9722 9692 i, cpus[i].package_id, cpus[i].die_id, cpus[i].l3_id, 9723 9693 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].core_id, cpus[i].ht_id); 9694 + fprintf(outf, " siblings"); 9695 + for (ht_id = 0; ht_id <= MAX_HT_ID; ++ht_id) 9696 + fprintf(outf, " %d", cpus[i].ht_sibling_cpu_id[ht_id]); 9697 + fprintf(outf, "\n"); 9724 9698 } 9725 9699 9726 9700 } ··· 9867 9829 topo.allowed_cores = 0; 9868 9830 topo.allowed_packages = 0; 9869 9831 for_all_cpus(update_topo, ODD_COUNTERS); 9832 + if (debug) 9833 + fprintf(stderr, "allowed_cpus %d allowed_cores %d allowed_packages %d\n", topo.allowed_cpus, topo.allowed_cores, topo.allowed_packages); 9870 9834 } 9871 9835 9872 9836 void setup_all_buffers(bool startup)