Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf report: Add machine parallelism

Add calculation of the current parallelism level (number of threads actively
running on CPUs). The parallelism level can be shown in reports on its own,
and to calculate latency overheads.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/0f8c1b8eb12619029e31b3d5c0346f4616a5aeda.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Dmitry Vyukov and committed by
Namhyung Kim
f13bc61b 20600b8a

+20
+1
tools/perf/builtin-report.c
··· 1568 1568 report.tool.cgroup = perf_event__process_cgroup; 1569 1569 report.tool.exit = perf_event__process_exit; 1570 1570 report.tool.fork = perf_event__process_fork; 1571 + report.tool.context_switch = perf_event__process_switch; 1571 1572 report.tool.lost = perf_event__process_lost; 1572 1573 report.tool.read = process_read_event; 1573 1574 report.tool.attr = process_attr;
+1
tools/perf/util/addr_location.c
··· 17 17 al->cpumode = 0; 18 18 al->cpu = 0; 19 19 al->socket = 0; 20 + al->parallelism = 1; 20 21 } 21 22 22 23 /*
+2
tools/perf/util/addr_location.h
··· 21 21 u8 cpumode; 22 22 s32 cpu; 23 23 s32 socket; 24 + /* Same as machine.parallelism but within [1, nr_cpus]. */ 25 + int parallelism; 24 26 }; 25 27 26 28 void addr_location__init(struct addr_location *al);
+3
tools/perf/util/event.c
··· 767 767 al->socket = env->cpu[al->cpu].socket_id; 768 768 } 769 769 770 + /* Account for possible out-of-order switch events. */ 771 + al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); 772 + 770 773 if (al->map) { 771 774 if (symbol_conf.dso_list && 772 775 (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+7
tools/perf/util/machine.c
··· 94 94 machine->comm_exec = false; 95 95 machine->kernel_start = 0; 96 96 machine->vmlinux_map = NULL; 97 + /* There is no initial context switch in, so we start at 1. */ 98 + machine->parallelism = 1; 97 99 98 100 machine->root_dir = strdup(root_dir); 99 101 if (machine->root_dir == NULL) ··· 679 677 int machine__process_switch_event(struct machine *machine __maybe_unused, 680 678 union perf_event *event) 681 679 { 680 + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 681 + 682 682 if (dump_trace) 683 683 perf_event__fprintf_switch(event, stdout); 684 + machine->parallelism += out ? -1 : 1; 684 685 return 0; 685 686 } 686 687 ··· 1885 1880 if (dump_trace) 1886 1881 perf_event__fprintf_task(event, stdout); 1887 1882 1883 + /* There is no context switch out before exit, so we decrement here. */ 1884 + machine->parallelism--; 1888 1885 if (thread != NULL) { 1889 1886 if (symbol_conf.keep_exited_threads) 1890 1887 thread__set_exited(thread, /*exited=*/true);
+6
tools/perf/util/machine.h
··· 50 50 u64 text_start; 51 51 u64 text_end; 52 52 } sched, lock, traceiter, trace; 53 + /* 54 + * The current parallelism level (number of threads that run on CPUs). 55 + * This value can be less than 1, or larger than the total number 56 + * of CPUs, if events are poorly ordered. 57 + */ 58 + int parallelism; 53 59 pid_t *current_tid; 54 60 size_t current_tid_sz; 55 61 union { /* Tool specific area */