Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'percpu_hash-maps'

Leon Hwang says:

====================
In the discussion thread
"[PATCH bpf-next v9 0/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps"[1],
it was pointed out that missing calls to bpf_obj_free_fields() could
lead to memory leaks.

A selftest was added to confirm that this is indeed a real issue - the
refcount of BPF_KPTR_REF field is not decremented when
bpf_obj_free_fields() is missing after copy_map_value[,_long]().

Further inspection of copy_map_value[,_long]() call sites revealed two
locations affected by this issue:

1. pcpu_copy_value()
2. htab_map_update_elem() when used with BPF_F_LOCK

Similar case happens when update local storage maps with BPF_F_LOCK.

This series fixes the cases where BPF_F_LOCK is not involved by
properly calling bpf_obj_free_fields() after copy_map_value[,_long](),
and adds a selftest to verify the fix.

The remaining cases involving BPF_F_LOCK will be addressed in a
separate patch set after the series
"bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps"
is applied.

Changes:
v5 -> v6:
* Update the test name to include "refcounted_kptr".
* Update some local variables' name in the test (per Alexei).
* v5: https://lore.kernel.org/bpf/20251104142714.99878-1-leon.hwang@linux.dev/

v4 -> v5:
* Use a local variable to store the this_cpu_ptr()/per_cpu_ptr() result,
and reuse it between copy_map_value[,_long]() and
bpf_obj_free_fields() in patch #1 (per Andrii).
* Drop patch #2 and #3, because the combination of BPF_F_LOCK with other
special fields (except for BPF_SPIN_LOCK) will be disallowed on the
UAPI side in the future (per Alexei).
* v4: https://lore.kernel.org/bpf/20251030152451.62778-1-leon.hwang@linux.dev/

v3 -> v4:
* Target bpf-next tree.
* Address comments from Amery:
* Drop 'bpf_obj_free_fields()' in the path of updating local storage
maps without BPF_F_LOCK.
* Drop the corresponding self test.
* Respin the other test of local storage maps using syscall BPF
programs.
* v3: https://lore.kernel.org/bpf/20251026154000.34151-1-leon.hwang@linux.dev/

v2 -> v3:
* Free special fields when update local storage maps without BPF_F_LOCK.
* Add test to verify decrementing refcount when update cgroup local
storage maps without BPF_F_LOCK.
* Address review from AI bot:
* Slow path with BPF_F_LOCK (around line 642-646) in
'bpf_local_storage.c'.
* v2: https://lore.kernel.org/bpf/20251020164608.20536-1-leon.hwang@linux.dev/

v1 -> v2:
* Add test to verify decrementing refcount when update cgroup local
storage maps with BPF_F_LOCK.
* Address review from AI bot:
* Fast path without bucket lock (around line 610) in
'bpf_local_storage.c'.
* v1: https://lore.kernel.org/bpf/20251016145801.47552-1-leon.hwang@linux.dev/
====================

Link: https://patch.msgid.link/20251105151407.12723-1-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+124 -2
+8 -2
kernel/bpf/hashtab.c
··· 934 934 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 935 935 void *value, bool onallcpus) 936 936 { 937 + void *ptr; 938 + 937 939 if (!onallcpus) { 938 940 /* copy true value_size bytes */ 939 - copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 941 + ptr = this_cpu_ptr(pptr); 942 + copy_map_value(&htab->map, ptr, value); 943 + bpf_obj_free_fields(htab->map.record, ptr); 940 944 } else { 941 945 u32 size = round_up(htab->map.value_size, 8); 942 946 int off = 0, cpu; 943 947 944 948 for_each_possible_cpu(cpu) { 945 - copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 949 + ptr = per_cpu_ptr(pptr, cpu); 950 + copy_map_value_long(&htab->map, ptr, value + off); 951 + bpf_obj_free_fields(htab->map.record, ptr); 946 952 off += size; 947 953 } 948 954 }
+56
tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
··· 44 44 ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); 45 45 refcounted_kptr__destroy(skel); 46 46 } 47 + 48 + void test_percpu_hash_refcounted_kptr_refcount_leak(void) 49 + { 50 + struct refcounted_kptr *skel; 51 + int cpu_nr, fd, err, key = 0; 52 + struct bpf_map *map; 53 + size_t values_sz; 54 + u64 *values; 55 + LIBBPF_OPTS(bpf_test_run_opts, opts, 56 + .data_in = &pkt_v4, 57 + .data_size_in = sizeof(pkt_v4), 58 + .repeat = 1, 59 + ); 60 + 61 + cpu_nr = libbpf_num_possible_cpus(); 62 + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus")) 63 + return; 64 + 65 + values = calloc(cpu_nr, sizeof(u64)); 66 + if (!ASSERT_OK_PTR(values, "calloc values")) 67 + return; 68 + 69 + skel = refcounted_kptr__open_and_load(); 70 + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) { 71 + free(values); 72 + return; 73 + } 74 + 75 + values_sz = cpu_nr * sizeof(u64); 76 + memset(values, 0, values_sz); 77 + 78 + map = skel->maps.percpu_hash; 79 + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); 80 + if (!ASSERT_OK(err, "bpf_map__update_elem")) 81 + goto out; 82 + 83 + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak); 84 + err = bpf_prog_test_run_opts(fd, &opts); 85 + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) 86 + goto out; 87 + if (!ASSERT_EQ(opts.retval, 2, "opts.retval")) 88 + goto out; 89 + 90 + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); 91 + if (!ASSERT_OK(err, "bpf_map__update_elem")) 92 + goto out; 93 + 94 + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount); 95 + err = bpf_prog_test_run_opts(fd, &opts); 96 + ASSERT_OK(err, "bpf_prog_test_run_opts"); 97 + ASSERT_EQ(opts.retval, 1, "opts.retval"); 98 + 99 + out: 100 + refcounted_kptr__destroy(skel); 101 + free(values); 102 + }
+60
tools/testing/selftests/bpf/progs/refcounted_kptr.c
··· 568 568 return 0; 569 569 } 570 570 571 + private(kptr_ref) u64 ref; 572 + 573 + static int probe_read_refcount(void) 574 + { 575 + u32 refcount; 576 + 577 + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref); 578 + return refcount; 579 + } 580 + 581 + static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock, 582 + struct node_data __kptr **node) 583 + { 584 + struct node_data *node_new, *node_ref, *node_old; 585 + 586 + node_new = bpf_obj_new(typeof(*node_new)); 587 + if (!node_new) 588 + return -1; 589 + 590 + node_ref = bpf_refcount_acquire(node_new); 591 + node_old = bpf_kptr_xchg(node, node_new); 592 + if (node_old) { 593 + bpf_obj_drop(node_old); 594 + bpf_obj_drop(node_ref); 595 + return -2; 596 + } 597 + 598 + bpf_spin_lock(lock); 599 + bpf_list_push_front(head, &node_ref->l); 600 + ref = (u64)(void *) &node_ref->ref; 601 + bpf_spin_unlock(lock); 602 + return probe_read_refcount(); 603 + } 604 + 605 + struct { 606 + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 607 + __type(key, int); 608 + __type(value, struct map_value); 609 + __uint(max_entries, 1); 610 + } percpu_hash SEC(".maps"); 611 + 612 + SEC("tc") 613 + int percpu_hash_refcount_leak(void *ctx) 614 + { 615 + struct map_value *v; 616 + int key = 0; 617 + 618 + v = bpf_map_lookup_elem(&percpu_hash, &key); 619 + if (!v) 620 + return 0; 621 + 622 + return __insert_in_list(&head, &lock, &v->node); 623 + } 624 + 625 + SEC("tc") 626 + int check_percpu_hash_refcount(void *ctx) 627 + { 628 + return probe_read_refcount(); 629 + } 630 + 571 631 char _license[] SEC("license") = "GPL";