Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Refactor stack map trace depth calculation into helper function

Extract the duplicated maximum allowed depth computation for stack
traces stored in BPF stacks from bpf_get_stackid() and __bpf_get_stack()
into a dedicated stack_map_calculate_max_depth() helper function.

This unifies the logic for:
- The max depth computation
- Enforcing the sysctl_perf_event_max_stack limit

No functional changes for existing code paths.

Signed-off-by: Arnaud Lecomte <contact@arnaud-lcm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20251025192858.31424-1-contact@arnaud-lcm.com

authored by

Arnaud Lecomte and committed by
Andrii Nakryiko
e17d62fe 88427328

+32 -15
+32 -15
kernel/bpf/stackmap.c
··· 42 42 sizeof(struct bpf_stack_build_id) : sizeof(u64); 43 43 } 44 44 45 + /** 46 + * stack_map_calculate_max_depth - Calculate maximum allowed stack trace depth 47 + * @size: Size of the buffer/map value in bytes 48 + * @elem_size: Size of each stack trace element 49 + * @flags: BPF stack trace flags (BPF_F_USER_STACK, BPF_F_USER_BUILD_ID, ...) 50 + * 51 + * Return: Maximum number of stack trace entries that can be safely stored 52 + */ 53 + static u32 stack_map_calculate_max_depth(u32 size, u32 elem_size, u64 flags) 54 + { 55 + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 56 + u32 max_depth; 57 + u32 curr_sysctl_max_stack = READ_ONCE(sysctl_perf_event_max_stack); 58 + 59 + max_depth = size / elem_size; 60 + max_depth += skip; 61 + if (max_depth > curr_sysctl_max_stack) 62 + return curr_sysctl_max_stack; 63 + 64 + return max_depth; 65 + } 66 + 45 67 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) 46 68 { 47 69 u64 elem_size = sizeof(struct stack_map_bucket) + ··· 322 300 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, 323 301 u64, flags) 324 302 { 325 - u32 max_depth = map->value_size / stack_map_data_size(map); 326 - u32 skip = flags & BPF_F_SKIP_FIELD_MASK; 303 + u32 elem_size = stack_map_data_size(map); 327 304 bool user = flags & BPF_F_USER_STACK; 328 305 struct perf_callchain_entry *trace; 329 306 bool kernel = !user; 307 + u32 max_depth; 330 308 331 309 if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | 332 310 BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) 333 311 return -EINVAL; 334 312 335 - max_depth += skip; 336 - if (max_depth > sysctl_perf_event_max_stack) 337 - max_depth = sysctl_perf_event_max_stack; 338 - 313 + max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags); 339 314 trace = get_perf_callchain(regs, kernel, user, max_depth, 340 315 false, false); 341 316 ··· 425 406 struct perf_callchain_entry *trace_in, 426 407 void *buf, u32 size, u64 flags, bool may_fault) 427 408 { 428 - u32 trace_nr, copy_len, elem_size, num_elem, max_depth; 409 + u32 trace_nr, copy_len, elem_size, max_depth; 429 410 bool user_build_id = flags & BPF_F_USER_BUILD_ID; 430 411 bool crosstask = task && task != current; 431 412 u32 skip = flags & BPF_F_SKIP_FIELD_MASK; ··· 457 438 goto clear; 458 439 } 459 440 460 - num_elem = size / elem_size; 461 - max_depth = num_elem + skip; 462 - if (sysctl_perf_event_max_stack < max_depth) 463 - max_depth = sysctl_perf_event_max_stack; 441 + max_depth = stack_map_calculate_max_depth(size, elem_size, flags); 464 442 465 443 if (may_fault) 466 444 rcu_read_lock(); /* need RCU for perf's callchain below */ 467 445 468 - if (trace_in) 446 + if (trace_in) { 469 447 trace = trace_in; 470 - else if (kernel && task) 448 + trace->nr = min_t(u32, trace->nr, max_depth); 449 + } else if (kernel && task) { 471 450 trace = get_callchain_entry_for_task(task, max_depth); 472 - else 451 + } else { 473 452 trace = get_perf_callchain(regs, kernel, user, max_depth, 474 453 crosstask, false); 454 + } 475 455 476 456 if (unlikely(!trace) || trace->nr < skip) { 477 457 if (may_fault) ··· 479 461 } 480 462 481 463 trace_nr = trace->nr - skip; 482 - trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; 483 464 copy_len = trace_nr * elem_size; 484 465 485 466 ips = trace->ip + skip;