Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf lock contention: Add -l/--lock-addr option

The -l/--lock-addr option is to implement per-lock-instance contention
stat using LOCK_AGGR_ADDR. It displays lock address and optionally
symbol name if exists.

$ sudo ./perf lock con -abl sleep 1
contended total wait max wait avg wait address symbol

1 36.28 us 36.28 us 36.28 us ffff92615d6448b8
9 10.91 us 1.84 us 1.21 us ffffffffbaed50c0 rcu_state
1 10.49 us 10.49 us 10.49 us ffff9262ac4f0c80
8 4.68 us 1.67 us 585 ns ffffffffbae07a40 jiffies_lock
3 3.03 us 1.45 us 1.01 us ffff9262277861e0
1 924 ns 924 ns 924 ns ffff926095ba9d20
1 436 ns 436 ns 436 ns ffff9260bfda4f60

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221209190727.759804-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
688d2e8d eca949b2

+101 -27
+4
tools/perf/Documentation/perf-lock.txt
··· 168 168 --entries=<value>:: 169 169 Display this many entries. 170 170 171 + -l:: 172 + --lock-addr:: 173 + Show lock contention stat by address 174 + 171 175 172 176 SEE ALSO 173 177 --------
+66 -16
tools/perf/builtin-lock.c
··· 56 56 57 57 static bool combine_locks; 58 58 static bool show_thread_stats; 59 + static bool show_lock_addrs; 59 60 static bool use_bpf; 60 61 static unsigned long bpf_map_entries = 10240; 61 62 static int max_stack_depth = CONTENTION_STACK_DEPTH; ··· 1000 999 ls = lock_stat_find(key); 1001 1000 if (!ls) { 1002 1001 char buf[128]; 1003 - const char *caller = buf; 1002 + const char *name = ""; 1004 1003 unsigned int flags = evsel__intval(evsel, sample, "flags"); 1004 + struct machine *machine = &session->machines.host; 1005 + struct map *kmap; 1006 + struct symbol *sym; 1005 1007 1006 - if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1007 - caller = "Unknown"; 1008 + switch (aggr_mode) { 1009 + case LOCK_AGGR_ADDR: 1010 + /* make sure it loads the kernel map to find lock symbols */ 1011 + map__load(machine__kernel_map(machine)); 1008 1012 1009 - ls = lock_stat_findnew(key, caller, flags); 1013 + sym = machine__find_kernel_symbol(machine, key, &kmap); 1014 + if (sym) 1015 + name = sym->name; 1016 + break; 1017 + case LOCK_AGGR_CALLER: 1018 + name = buf; 1019 + if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) 1020 + name = "Unknown"; 1021 + break; 1022 + case LOCK_AGGR_TASK: 1023 + default: 1024 + break; 1025 + } 1026 + 1027 + ls = lock_stat_findnew(key, name, flags); 1010 1028 if (!ls) 1011 1029 return -ENOMEM; 1012 1030 ··· 1480 1460 list_for_each_entry(key, &lock_keys, list) 1481 1461 pr_info("%*s ", key->len, key->header); 1482 1462 1483 - if (show_thread_stats) 1463 + switch (aggr_mode) { 1464 + case LOCK_AGGR_TASK: 1484 1465 pr_info(" %10s %s\n\n", "pid", "comm"); 1485 - else 1466 + break; 1467 + case LOCK_AGGR_CALLER: 1486 1468 pr_info(" %10s %s\n\n", "type", "caller"); 1469 + break; 1470 + case LOCK_AGGR_ADDR: 1471 + pr_info(" %16s %s\n\n", "address", "symbol"); 1472 + break; 1473 + default: 1474 + break; 1475 + } 1487 1476 } 1488 1477 1489 1478 bad = total = printed = 0; ··· 1500 1471 bad = bad_hist[BROKEN_CONTENDED]; 1501 1472 1502 1473 while ((st = pop_from_result())) { 1474 + struct thread *t; 1475 + int pid; 1476 + 1503 1477 total += use_bpf ? st->nr_contended : 1; 1504 1478 if (st->broken) 1505 1479 bad++; ··· 1512 1480 pr_info(" "); 1513 1481 } 1514 1482 1515 - if (show_thread_stats) { 1516 - struct thread *t; 1517 - int pid = st->addr; 1518 - 1519 - /* st->addr contains tid of thread */ 1483 + switch (aggr_mode) { 1484 + case LOCK_AGGR_CALLER: 1485 + pr_info(" %10s %s\n", get_type_str(st), st->name); 1486 + break; 1487 + case LOCK_AGGR_TASK: 1488 + pid = st->addr; 1520 1489 t = perf_session__findnew(session, pid); 1521 1490 pr_info(" %10d %s\n", pid, thread__comm_str(t)); 1522 - goto next; 1491 + break; 1492 + case LOCK_AGGR_ADDR: 1493 + pr_info(" %016llx %s\n", (unsigned long long)st->addr, 1494 + st->name ? : ""); 1495 + break; 1496 + default: 1497 + break; 1523 1498 } 1524 1499 1525 - pr_info(" %10s %s\n", get_type_str(st), st->name); 1526 - if (verbose) { 1500 + if (aggr_mode == LOCK_AGGR_CALLER && verbose) { 1527 1501 struct map *kmap; 1528 1502 struct symbol *sym; 1529 1503 char buf[128]; ··· 1546 1508 } 1547 1509 } 1548 1510 1549 - next: 1550 1511 if (++printed >= print_nr_entries) 1551 1512 break; 1552 1513 } ··· 1653 1616 .map_nr_entries = bpf_map_entries, 1654 1617 .max_stack = max_stack_depth, 1655 1618 .stack_skip = stack_skip, 1656 - .aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER, 1657 1619 }; 1658 1620 1659 1621 session = perf_session__new(use_bpf ? NULL : &data, &eops); ··· 1662 1626 } 1663 1627 1664 1628 con.machine = &session->machines.host; 1629 + 1630 + con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : 1631 + show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER; 1665 1632 1666 1633 /* for lock function check */ 1667 1634 symbol_conf.sort_by_name = true; ··· 1946 1907 "Set the number of stack depth to skip when finding a lock caller, " 1947 1908 "Default: " __stringify(CONTENTION_STACK_SKIP)), 1948 1909 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"), 1910 + OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"), 1949 1911 OPT_PARENT(lock_options) 1950 1912 }; 1951 1913 ··· 2016 1976 argc = parse_options(argc, argv, contention_options, 2017 1977 contention_usage, 0); 2018 1978 } 1979 + 1980 + if (show_thread_stats && show_lock_addrs) { 1981 + pr_err("Cannot use thread and addr mode together\n"); 1982 + parse_options_usage(contention_usage, contention_options, 1983 + "threads", 0); 1984 + parse_options_usage(NULL, contention_options, 1985 + "lock-addr", 0); 1986 + return -1; 1987 + } 1988 + 2019 1989 rc = __cmd_contention(argc, argv); 2020 1990 } else { 2021 1991 usage_with_options(lock_usage, lock_options);
+17 -6
tools/perf/util/bpf_lock_contention.c
··· 137 137 thread__set_comm(idle, "swapper", /*timestamp=*/0); 138 138 } 139 139 140 + /* make sure it loads the kernel map */ 141 + map__load(maps__first(machine->kmaps)); 142 + 140 143 prev_key = NULL; 141 144 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 142 145 struct map *kmap; 143 146 struct symbol *sym; 144 147 int idx = 0; 148 + s32 stack_id; 145 149 146 150 /* to handle errors in the loop body */ 147 151 err = -1; ··· 164 160 st->avg_wait_time = data.total_time / data.count; 165 161 166 162 st->flags = data.flags; 163 + st->addr = key.aggr_key; 167 164 168 165 if (con->aggr_mode == LOCK_AGGR_TASK) { 169 166 struct contention_task_data task; 170 167 struct thread *t; 171 - 172 - st->addr = key.stack_or_task_id; 168 + int pid = key.aggr_key; 173 169 174 170 /* do not update idle comm which contains CPU number */ 175 171 if (st->addr) { 176 - bpf_map_lookup_elem(task_fd, &key, &task); 177 - t = __machine__findnew_thread(machine, /*pid=*/-1, 178 - key.stack_or_task_id); 172 + bpf_map_lookup_elem(task_fd, &pid, &task); 173 + t = __machine__findnew_thread(machine, /*pid=*/-1, pid); 179 174 thread__set_comm(t, task.comm, /*timestamp=*/0); 180 175 } 181 176 goto next; 182 177 } 183 178 184 - bpf_map_lookup_elem(stack, &key, stack_trace); 179 + if (con->aggr_mode == LOCK_AGGR_ADDR) { 180 + sym = machine__find_kernel_symbol(machine, st->addr, &kmap); 181 + if (sym) 182 + st->name = strdup(sym->name); 183 + goto next; 184 + } 185 + 186 + stack_id = key.aggr_key; 187 + bpf_map_lookup_elem(stack, &stack_id, stack_trace); 185 188 186 189 /* skip lock internal functions */ 187 190 while (machine__is_lock_function(machine, stack_trace[idx]) &&
+13 -4
tools/perf/util/bpf_skel/lock_contention.bpf.c
··· 168 168 169 169 duration = bpf_ktime_get_ns() - pelem->timestamp; 170 170 171 - if (aggr_mode == LOCK_AGGR_CALLER) { 172 - key.stack_or_task_id = pelem->stack_id; 173 - } else { 174 - key.stack_or_task_id = pid; 171 + switch (aggr_mode) { 172 + case LOCK_AGGR_CALLER: 173 + key.aggr_key = pelem->stack_id; 174 + break; 175 + case LOCK_AGGR_TASK: 176 + key.aggr_key = pid; 175 177 update_task_data(pid); 178 + break; 179 + case LOCK_AGGR_ADDR: 180 + key.aggr_key = pelem->lock; 181 + break; 182 + default: 183 + /* should not happen */ 184 + return 0; 176 185 } 177 186 178 187 data = bpf_map_lookup_elem(&lock_stat, &key);
+1 -1
tools/perf/util/bpf_skel/lock_data.h
··· 4 4 #define UTIL_BPF_SKEL_LOCK_DATA_H 5 5 6 6 struct contention_key { 7 - s32 stack_or_task_id; 7 + u64 aggr_key; /* can be stack_id, pid or lock addr */ 8 8 }; 9 9 10 10 #define TASK_COMM_LEN 16