Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'Add support for kptrs in more BPF maps'

Kumar Kartikeya Dwivedi says:

====================

This set adds support for kptrs in percpu hashmaps, percpu LRU hashmaps,
and local storage maps (covering sk, cgrp, task, inode).

Tests are expanded to test more existing maps at runtime and also test
the code path for the local storage maps (which is shared by all
implementations).

A question for reviewers is what the position of the BPF runtime should
be on dealing with reference cycles that can be created by BPF programs
at runtime using this additional support. For instance, one can store
the kptr of the task in its own task local storage, creating a cycle
which prevents destruction of task local storage. Cycles can be formed
using arbitrarily long kptr ownership chains. Therefore, just preventing
storage of such kptrs in some maps is not a sufficient solution, and is
more likely to hurt usability.

There is precedence in existing runtimes which promise memory safety,
like Rust, where reference cycles and memory leaks are permitted.
However, traditionally the safety guarantees of BPF have been stronger.
Thus, more discussion and thought is invited on this topic to ensure we
cover all usage aspects.

Changelog:
----------
v2 -> v3
v2: https://lore.kernel.org/bpf/20230221200646.2500777-1-memxor@gmail.com/

* Fix a use-after-free bug in local storage patch
* Fix selftest for aarch64 (don't use fentry/fmod_ret)
* Wait for RCU Tasks Trace GP along with RCU GP in selftest

v1 -> v2
v1: https://lore.kernel.org/bpf/20230219155249.1755998-1-memxor@gmail.com

* Simplify selftests, fix a couple of bugs
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+554 -97
+6
include/linux/bpf_local_storage.h
··· 74 74 struct hlist_node snode; /* Linked to bpf_local_storage */ 75 75 struct bpf_local_storage __rcu *local_storage; 76 76 struct rcu_head rcu; 77 + bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU 78 + * callbacks? bpf_local_storage_map_free only 79 + * executes rcu_barrier when there are special 80 + * fields, this field remembers that to ensure we 81 + * don't access already freed smap in sdata. 82 + */ 77 83 /* 8 bytes hole */ 78 84 /* The data is stored in another cacheline to minimize 79 85 * the number of cachelines access during a cache hit.
+44 -4
kernel/bpf/bpf_local_storage.c
··· 85 85 if (selem) { 86 86 if (value) 87 87 copy_map_value(&smap->map, SDATA(selem)->data, value); 88 + /* No need to call check_and_init_map_value as memory is zero init */ 88 89 return selem; 89 90 } 90 91 ··· 114 113 struct bpf_local_storage_elem *selem; 115 114 116 115 selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 116 + /* The can_use_smap bool is set whenever we need to free additional 117 + * fields in selem data before freeing selem. bpf_local_storage_map_free 118 + * only executes rcu_barrier to wait for RCU callbacks when it has 119 + * special fields, hence we can only conditionally dereference smap, as 120 + * by this time the map might have already been freed without waiting 121 + * for our call_rcu callback if it did not have any special fields. 122 + */ 123 + if (selem->can_use_smap) 124 + bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data); 125 + kfree(selem); 126 + } 127 + 128 + static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu) 129 + { 130 + /* Free directly if Tasks Trace RCU GP also implies RCU GP */ 117 131 if (rcu_trace_implies_rcu_gp()) 118 - kfree(selem); 132 + bpf_selem_free_rcu(rcu); 119 133 else 120 - kfree_rcu(selem, rcu); 134 + call_rcu(rcu, bpf_selem_free_rcu); 121 135 } 122 136 123 137 /* local_storage->lock must be held and selem->local_storage == local_storage. ··· 186 170 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 187 171 188 172 if (use_trace_rcu) 189 - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); 173 + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu); 190 174 else 191 - kfree_rcu(selem, rcu); 175 + call_rcu(&selem->rcu, bpf_selem_free_rcu); 192 176 193 177 return free_local_storage; 194 178 } ··· 256 240 RCU_INIT_POINTER(SDATA(selem)->smap, smap); 257 241 hlist_add_head_rcu(&selem->map_node, &b->list); 258 242 raw_spin_unlock_irqrestore(&b->lock, flags); 243 + 244 + /* If our data will have special fields, smap will wait for us to use 245 + * its record in bpf_selem_free_* RCU callbacks before freeing itself. 246 + */ 247 + selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record); 259 248 } 260 249 261 250 void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) ··· 744 723 */ 745 724 synchronize_rcu(); 746 725 726 + /* Only delay freeing of smap, buckets are not needed anymore */ 747 727 kvfree(smap->buckets); 728 + 729 + /* When local storage has special fields, callbacks for 730 + * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using 731 + * the map BTF record, we need to execute an RCU barrier to wait for 732 + * them as the record will be freed right after our map_free callback. 733 + */ 734 + if (!IS_ERR_OR_NULL(smap->map.record)) { 735 + rcu_barrier_tasks_trace(); 736 + /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp() 737 + * is true, because while call_rcu invocation is skipped in that 738 + * case in bpf_selem_free_tasks_trace_rcu (and all local storage 739 + * maps pass use_trace_rcu = true), there can be call_rcu 740 + * callbacks based on use_trace_rcu = false in the earlier while 741 + * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock 742 + * called from owner's free path. 743 + */ 744 + rcu_barrier(); 745 + } 748 746 bpf_map_area_free(smap); 749 747 }
+37 -22
kernel/bpf/hashtab.c
··· 249 249 struct htab_elem *elem; 250 250 251 251 elem = get_htab_elem(htab, i); 252 - bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 252 + if (htab_is_percpu(htab)) { 253 + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 254 + int cpu; 255 + 256 + for_each_possible_cpu(cpu) { 257 + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 258 + cond_resched(); 259 + } 260 + } else { 261 + bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 262 + cond_resched(); 263 + } 253 264 cond_resched(); 254 265 } 255 266 } ··· 770 759 static void check_and_free_fields(struct bpf_htab *htab, 771 760 struct htab_elem *elem) 772 761 { 773 - void *map_value = elem->key + round_up(htab->map.key_size, 8); 762 + if (htab_is_percpu(htab)) { 763 + void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 764 + int cpu; 774 765 775 - bpf_obj_free_fields(htab->map.record, map_value); 766 + for_each_possible_cpu(cpu) 767 + bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 768 + } else { 769 + void *map_value = elem->key + round_up(htab->map.key_size, 8); 770 + 771 + bpf_obj_free_fields(htab->map.record, map_value); 772 + } 776 773 } 777 774 778 775 /* It is called from the bpf_lru_list when the LRU needs to delete ··· 877 858 878 859 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 879 860 { 861 + check_and_free_fields(htab, l); 880 862 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 881 863 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 882 - check_and_free_fields(htab, l); 883 864 bpf_mem_cache_free(&htab->ma, l); 884 865 } 885 866 ··· 937 918 { 938 919 if (!onallcpus) { 939 920 /* copy true value_size bytes */ 940 - memcpy(this_cpu_ptr(pptr), value, htab->map.value_size); 921 + copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 941 922 } else { 942 923 u32 size = round_up(htab->map.value_size, 8); 943 924 int off = 0, cpu; 944 925 945 926 for_each_possible_cpu(cpu) { 946 - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), 947 - value + off, size); 927 + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 948 928 off += size; 949 929 } 950 930 } ··· 958 940 * (onallcpus=false always when coming from bpf prog). 959 941 */ 960 942 if (!onallcpus) { 961 - u32 size = round_up(htab->map.value_size, 8); 962 943 int current_cpu = raw_smp_processor_id(); 963 944 int cpu; 964 945 965 946 for_each_possible_cpu(cpu) { 966 947 if (cpu == current_cpu) 967 - bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value, 968 - size); 969 - else 970 - memset(per_cpu_ptr(pptr, cpu), 0, size); 948 + copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 949 + else /* Since elem is preallocated, we cannot touch special fields */ 950 + zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 971 951 } 972 952 } else { 973 953 pcpu_copy_value(htab, pptr, value, onallcpus); ··· 1591 1575 1592 1576 pptr = htab_elem_get_ptr(l, key_size); 1593 1577 for_each_possible_cpu(cpu) { 1594 - bpf_long_memcpy(value + off, 1595 - per_cpu_ptr(pptr, cpu), 1596 - roundup_value_size); 1578 + copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1579 + check_and_init_map_value(&htab->map, value + off); 1597 1580 off += roundup_value_size; 1598 1581 } 1599 1582 } else { ··· 1787 1772 1788 1773 pptr = htab_elem_get_ptr(l, map->key_size); 1789 1774 for_each_possible_cpu(cpu) { 1790 - bpf_long_memcpy(dst_val + off, 1791 - per_cpu_ptr(pptr, cpu), size); 1775 + copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1776 + check_and_init_map_value(&htab->map, dst_val + off); 1792 1777 off += size; 1793 1778 } 1794 1779 } else { ··· 2061 2046 roundup_value_size = round_up(map->value_size, 8); 2062 2047 pptr = htab_elem_get_ptr(elem, map->key_size); 2063 2048 for_each_possible_cpu(cpu) { 2064 - bpf_long_memcpy(info->percpu_value_buf + off, 2065 - per_cpu_ptr(pptr, cpu), 2066 - roundup_value_size); 2049 + copy_map_value_long(map, info->percpu_value_buf + off, 2050 + per_cpu_ptr(pptr, cpu)); 2051 + check_and_init_map_value(map, info->percpu_value_buf + off); 2067 2052 off += roundup_value_size; 2068 2053 } 2069 2054 ctx.value = info->percpu_value_buf; ··· 2307 2292 */ 2308 2293 pptr = htab_elem_get_ptr(l, map->key_size); 2309 2294 for_each_possible_cpu(cpu) { 2310 - bpf_long_memcpy(value + off, 2311 - per_cpu_ptr(pptr, cpu), size); 2295 + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2296 + check_and_init_map_value(map, value + off); 2312 2297 off += size; 2313 2298 } 2314 2299 ret = 0;
+7 -1
kernel/bpf/syscall.c
··· 1059 1059 case BPF_KPTR_UNREF: 1060 1060 case BPF_KPTR_REF: 1061 1061 if (map->map_type != BPF_MAP_TYPE_HASH && 1062 + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 1062 1063 map->map_type != BPF_MAP_TYPE_LRU_HASH && 1064 + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH && 1063 1065 map->map_type != BPF_MAP_TYPE_ARRAY && 1064 - map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) { 1066 + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 1067 + map->map_type != BPF_MAP_TYPE_SK_STORAGE && 1068 + map->map_type != BPF_MAP_TYPE_INODE_STORAGE && 1069 + map->map_type != BPF_MAP_TYPE_TASK_STORAGE && 1070 + map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) { 1065 1071 ret = -EOPNOTSUPP; 1066 1072 goto free_map_tab; 1067 1073 }
+8 -4
kernel/bpf/verifier.c
··· 7222 7222 break; 7223 7223 case BPF_MAP_TYPE_SK_STORAGE: 7224 7224 if (func_id != BPF_FUNC_sk_storage_get && 7225 - func_id != BPF_FUNC_sk_storage_delete) 7225 + func_id != BPF_FUNC_sk_storage_delete && 7226 + func_id != BPF_FUNC_kptr_xchg) 7226 7227 goto error; 7227 7228 break; 7228 7229 case BPF_MAP_TYPE_INODE_STORAGE: 7229 7230 if (func_id != BPF_FUNC_inode_storage_get && 7230 - func_id != BPF_FUNC_inode_storage_delete) 7231 + func_id != BPF_FUNC_inode_storage_delete && 7232 + func_id != BPF_FUNC_kptr_xchg) 7231 7233 goto error; 7232 7234 break; 7233 7235 case BPF_MAP_TYPE_TASK_STORAGE: 7234 7236 if (func_id != BPF_FUNC_task_storage_get && 7235 - func_id != BPF_FUNC_task_storage_delete) 7237 + func_id != BPF_FUNC_task_storage_delete && 7238 + func_id != BPF_FUNC_kptr_xchg) 7236 7239 goto error; 7237 7240 break; 7238 7241 case BPF_MAP_TYPE_CGRP_STORAGE: 7239 7242 if (func_id != BPF_FUNC_cgrp_storage_get && 7240 - func_id != BPF_FUNC_cgrp_storage_delete) 7243 + func_id != BPF_FUNC_cgrp_storage_delete && 7244 + func_id != BPF_FUNC_kptr_xchg) 7241 7245 goto error; 7242 7246 break; 7243 7247 case BPF_MAP_TYPE_BLOOM_FILTER:
+113 -23
tools/testing/selftests/bpf/prog_tests/map_kptr.c
··· 4 4 5 5 #include "map_kptr.skel.h" 6 6 #include "map_kptr_fail.skel.h" 7 + #include "rcu_tasks_trace_gp.skel.h" 7 8 8 9 static void test_map_kptr_success(bool test_run) 9 10 { 11 + LIBBPF_OPTS(bpf_test_run_opts, lopts); 10 12 LIBBPF_OPTS(bpf_test_run_opts, opts, 11 13 .data_in = &pkt_v4, 12 14 .data_size_in = sizeof(pkt_v4), 13 15 .repeat = 1, 14 16 ); 17 + int key = 0, ret, cpu; 15 18 struct map_kptr *skel; 16 - int key = 0, ret; 17 - char buf[16]; 19 + char buf[16], *pbuf; 18 20 19 21 skel = map_kptr__open_and_load(); 20 22 if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) 21 23 return; 22 24 23 - ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); 24 - ASSERT_OK(ret, "test_map_kptr_ref refcount"); 25 - ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); 25 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts); 26 + ASSERT_OK(ret, "test_map_kptr_ref1 refcount"); 27 + ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval"); 26 28 ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); 27 29 ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); 28 30 ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); 29 31 32 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts); 33 + ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount"); 34 + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval"); 35 + 36 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts); 37 + ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount"); 38 + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval"); 39 + 30 40 if (test_run) 41 + goto exit; 42 + 43 + cpu = libbpf_num_possible_cpus(); 44 + if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus")) 45 + goto exit; 46 + 47 + pbuf = calloc(cpu, sizeof(buf)); 48 + if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)")) 31 49 goto exit; 32 50 33 51 ret = bpf_map__update_elem(skel->maps.array_map, 34 52 &key, sizeof(key), buf, sizeof(buf), 0); 35 53 ASSERT_OK(ret, "array_map update"); 36 - ret = bpf_map__update_elem(skel->maps.array_map, 37 - &key, sizeof(key), buf, sizeof(buf), 0); 38 - ASSERT_OK(ret, "array_map update2"); 54 + skel->data->ref--; 55 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 56 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 57 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 39 58 40 - ret = bpf_map__update_elem(skel->maps.hash_map, 41 - &key, sizeof(key), buf, sizeof(buf), 0); 42 - ASSERT_OK(ret, "hash_map update"); 59 + ret = bpf_map__update_elem(skel->maps.pcpu_array_map, 60 + &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); 61 + ASSERT_OK(ret, "pcpu_array_map update"); 62 + skel->data->ref--; 63 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 64 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 65 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 66 + 43 67 ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); 44 68 ASSERT_OK(ret, "hash_map delete"); 69 + skel->data->ref--; 70 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 71 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 72 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 45 73 46 - ret = bpf_map__update_elem(skel->maps.hash_malloc_map, 47 - &key, sizeof(key), buf, sizeof(buf), 0); 48 - ASSERT_OK(ret, "hash_malloc_map update"); 74 + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); 75 + ASSERT_OK(ret, "pcpu_hash_map delete"); 76 + skel->data->ref--; 77 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 78 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 79 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 80 + 49 81 ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); 50 82 ASSERT_OK(ret, "hash_malloc_map delete"); 83 + skel->data->ref--; 84 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 85 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 86 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 51 87 52 - ret = bpf_map__update_elem(skel->maps.lru_hash_map, 53 - &key, sizeof(key), buf, sizeof(buf), 0); 54 - ASSERT_OK(ret, "lru_hash_map update"); 88 + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); 89 + ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); 90 + skel->data->ref--; 91 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 92 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 93 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 94 + 55 95 ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); 56 96 ASSERT_OK(ret, "lru_hash_map delete"); 97 + skel->data->ref--; 98 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 99 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 100 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 57 101 102 + ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); 103 + ASSERT_OK(ret, "lru_pcpu_hash_map delete"); 104 + skel->data->ref--; 105 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); 106 + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); 107 + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); 108 + 109 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts); 110 + ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete"); 111 + skel->data->ref--; 112 + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval"); 113 + 114 + free(pbuf); 58 115 exit: 59 116 map_kptr__destroy(skel); 60 117 } 61 118 62 - void test_map_kptr(void) 119 + static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) 63 120 { 64 - if (test__start_subtest("success")) { 121 + long gp_seq = READ_ONCE(rcu->bss->gp_seq); 122 + LIBBPF_OPTS(bpf_test_run_opts, opts); 123 + 124 + if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), 125 + &opts), "do_call_rcu_tasks_trace")) 126 + return -EFAULT; 127 + if (!ASSERT_OK(opts.retval, "opts.retval == 0")) 128 + return -EFAULT; 129 + while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) 130 + sched_yield(); 131 + return 0; 132 + } 133 + 134 + void serial_test_map_kptr(void) 135 + { 136 + struct rcu_tasks_trace_gp *skel; 137 + 138 + RUN_TESTS(map_kptr_fail); 139 + 140 + skel = rcu_tasks_trace_gp__open_and_load(); 141 + if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) 142 + return; 143 + if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) 144 + goto end; 145 + 146 + if (test__start_subtest("success-map")) { 147 + test_map_kptr_success(true); 148 + 149 + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); 150 + ASSERT_OK(kern_sync_rcu(), "sync rcu"); 151 + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ 65 152 test_map_kptr_success(false); 66 - /* Do test_run twice, so that we see refcount going back to 1 67 - * after we leave it in map from first iteration. 68 - */ 153 + 154 + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); 155 + ASSERT_OK(kern_sync_rcu(), "sync rcu"); 156 + /* Observe refcount dropping to 1 on synchronous delete elem */ 69 157 test_map_kptr_success(true); 70 158 } 71 159 72 - RUN_TESTS(map_kptr_fail); 160 + end: 161 + rcu_tasks_trace_gp__destroy(skel); 162 + return; 73 163 }
+303 -43
tools/testing/selftests/bpf/progs/map_kptr.c
··· 15 15 __uint(max_entries, 1); 16 16 } array_map SEC(".maps"); 17 17 18 + struct pcpu_array_map { 19 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 20 + __type(key, int); 21 + __type(value, struct map_value); 22 + __uint(max_entries, 1); 23 + } pcpu_array_map SEC(".maps"); 24 + 18 25 struct hash_map { 19 26 __uint(type, BPF_MAP_TYPE_HASH); 20 27 __type(key, int); 21 28 __type(value, struct map_value); 22 29 __uint(max_entries, 1); 23 30 } hash_map SEC(".maps"); 31 + 32 + struct pcpu_hash_map { 33 + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 34 + __type(key, int); 35 + __type(value, struct map_value); 36 + __uint(max_entries, 1); 37 + } pcpu_hash_map SEC(".maps"); 24 38 25 39 struct hash_malloc_map { 26 40 __uint(type, BPF_MAP_TYPE_HASH); ··· 44 30 __uint(map_flags, BPF_F_NO_PREALLOC); 45 31 } hash_malloc_map SEC(".maps"); 46 32 33 + struct pcpu_hash_malloc_map { 34 + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 35 + __type(key, int); 36 + __type(value, struct map_value); 37 + __uint(max_entries, 1); 38 + __uint(map_flags, BPF_F_NO_PREALLOC); 39 + } pcpu_hash_malloc_map SEC(".maps"); 40 + 47 41 struct lru_hash_map { 48 42 __uint(type, BPF_MAP_TYPE_LRU_HASH); 49 43 __type(key, int); 50 44 __type(value, struct map_value); 51 45 __uint(max_entries, 1); 52 46 } lru_hash_map SEC(".maps"); 47 + 48 + struct lru_pcpu_hash_map { 49 + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); 50 + __type(key, int); 51 + __type(value, struct map_value); 52 + __uint(max_entries, 1); 53 + } lru_pcpu_hash_map SEC(".maps"); 54 + 55 + struct cgrp_ls_map { 56 + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); 57 + __uint(map_flags, BPF_F_NO_PREALLOC); 58 + __type(key, int); 59 + __type(value, struct map_value); 60 + } cgrp_ls_map SEC(".maps"); 61 + 62 + struct task_ls_map { 63 + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 64 + __uint(map_flags, BPF_F_NO_PREALLOC); 65 + __type(key, int); 66 + __type(value, struct map_value); 67 + } task_ls_map SEC(".maps"); 68 + 69 + struct inode_ls_map { 70 + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); 71 + __uint(map_flags, BPF_F_NO_PREALLOC); 72 + __type(key, int); 73 + __type(value, struct map_value); 74 + } inode_ls_map SEC(".maps"); 75 + 76 + struct sk_ls_map { 77 + __uint(type, BPF_MAP_TYPE_SK_STORAGE); 78 + __uint(map_flags, BPF_F_NO_PREALLOC); 79 + __type(key, int); 80 + __type(value, struct map_value); 81 + } sk_ls_map SEC(".maps"); 53 82 54 83 #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ 55 84 struct { \ ··· 217 160 return 0; 218 161 } 219 162 163 + SEC("tp_btf/cgroup_mkdir") 164 + int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path) 165 + { 166 + struct map_value *v; 167 + 168 + v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 169 + if (v) 170 + test_kptr(v); 171 + return 0; 172 + } 173 + 174 + SEC("lsm/inode_unlink") 175 + int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim) 176 + { 177 + struct task_struct *task; 178 + struct map_value *v; 179 + 180 + task = bpf_get_current_task_btf(); 181 + if (!task) 182 + return 0; 183 + v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 184 + if (v) 185 + test_kptr(v); 186 + return 0; 187 + } 188 + 189 + SEC("lsm/inode_unlink") 190 + int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim) 191 + { 192 + struct map_value *v; 193 + 194 + v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 195 + if (v) 196 + test_kptr(v); 197 + return 0; 198 + } 199 + 200 + SEC("tc") 201 + int test_sk_map_kptr(struct __sk_buff *ctx) 202 + { 203 + struct map_value *v; 204 + struct bpf_sock *sk; 205 + 206 + sk = ctx->sk; 207 + if (!sk) 208 + return 0; 209 + v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 210 + if (v) 211 + test_kptr(v); 212 + return 0; 213 + } 214 + 220 215 SEC("tc") 221 216 int test_map_in_map_kptr(struct __sk_buff *ctx) 222 217 { ··· 298 189 return 0; 299 190 } 300 191 301 - SEC("tc") 302 - int test_map_kptr_ref(struct __sk_buff *ctx) 192 + int ref = 1; 193 + 194 + static __always_inline 195 + int test_map_kptr_ref_pre(struct map_value *v) 303 196 { 304 197 struct prog_test_ref_kfunc *p, *p_st; 305 198 unsigned long arg = 0; 306 - struct map_value *v; 307 - int key = 0, ret; 199 + int ret; 308 200 309 201 p = bpf_kfunc_call_test_acquire(&arg); 310 202 if (!p) 311 203 return 1; 204 + ref++; 312 205 313 206 p_st = p->next; 314 - if (p_st->cnt.refs.counter != 2) { 207 + if (p_st->cnt.refs.counter != ref) { 315 208 ret = 2; 316 209 goto end; 317 210 } 318 211 319 - v = bpf_map_lookup_elem(&array_map, &key); 320 - if (!v) { 212 + p = bpf_kptr_xchg(&v->ref_ptr, p); 213 + if (p) { 321 214 ret = 3; 322 215 goto end; 323 216 } 324 - 325 - p = bpf_kptr_xchg(&v->ref_ptr, p); 326 - if (p) { 327 - ret = 4; 328 - goto end; 329 - } 330 - if (p_st->cnt.refs.counter != 2) 331 - return 5; 217 + if (p_st->cnt.refs.counter != ref) 218 + return 4; 332 219 333 220 p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); 334 221 if (!p) 335 - return 6; 336 - if (p_st->cnt.refs.counter != 3) { 337 - ret = 7; 222 + return 5; 223 + ref++; 224 + if (p_st->cnt.refs.counter != ref) { 225 + ret = 6; 338 226 goto end; 339 227 } 340 228 bpf_kfunc_call_test_release(p); 341 - if (p_st->cnt.refs.counter != 2) 342 - return 8; 229 + ref--; 230 + if (p_st->cnt.refs.counter != ref) 231 + return 7; 343 232 344 233 p = bpf_kptr_xchg(&v->ref_ptr, NULL); 345 234 if (!p) 346 - return 9; 235 + return 8; 347 236 bpf_kfunc_call_test_release(p); 348 - if (p_st->cnt.refs.counter != 1) 349 - return 10; 237 + ref--; 238 + if (p_st->cnt.refs.counter != ref) 239 + return 9; 350 240 351 241 p = bpf_kfunc_call_test_acquire(&arg); 352 242 if (!p) 353 - return 11; 243 + return 10; 244 + ref++; 354 245 p = bpf_kptr_xchg(&v->ref_ptr, p); 355 246 if (p) { 356 - ret = 12; 247 + ret = 11; 357 248 goto end; 358 249 } 359 - if (p_st->cnt.refs.counter != 2) 360 - return 13; 250 + if (p_st->cnt.refs.counter != ref) 251 + return 12; 361 252 /* Leave in map */ 362 253 363 254 return 0; 364 255 end: 256 + ref--; 365 257 bpf_kfunc_call_test_release(p); 366 258 return ret; 367 259 } 368 260 369 - SEC("tc") 370 - int test_map_kptr_ref2(struct __sk_buff *ctx) 261 + static __always_inline 262 + int test_map_kptr_ref_post(struct map_value *v) 371 263 { 372 264 struct prog_test_ref_kfunc *p, *p_st; 373 - struct map_value *v; 374 - int key = 0; 375 - 376 - v = bpf_map_lookup_elem(&array_map, &key); 377 - if (!v) 378 - return 1; 379 265 380 266 p_st = v->ref_ptr; 381 - if (!p_st || p_st->cnt.refs.counter != 2) 382 - return 2; 267 + if (!p_st || p_st->cnt.refs.counter != ref) 268 + return 1; 383 269 384 270 p = bpf_kptr_xchg(&v->ref_ptr, NULL); 385 271 if (!p) 386 - return 3; 387 - if (p_st->cnt.refs.counter != 2) { 272 + return 2; 273 + if (p_st->cnt.refs.counter != ref) { 388 274 bpf_kfunc_call_test_release(p); 389 - return 4; 275 + return 3; 390 276 } 391 277 392 278 p = bpf_kptr_xchg(&v->ref_ptr, p); 393 279 if (p) { 394 280 bpf_kfunc_call_test_release(p); 395 - return 5; 281 + return 4; 396 282 } 397 - if (p_st->cnt.refs.counter != 2) 398 - return 6; 283 + if (p_st->cnt.refs.counter != ref) 284 + return 5; 399 285 400 286 return 0; 287 + } 288 + 289 + #define TEST(map) \ 290 + v = bpf_map_lookup_elem(&map, &key); \ 291 + if (!v) \ 292 + return -1; \ 293 + ret = test_map_kptr_ref_pre(v); \ 294 + if (ret) \ 295 + return ret; 296 + 297 + #define TEST_PCPU(map) \ 298 + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ 299 + if (!v) \ 300 + return -1; \ 301 + ret = test_map_kptr_ref_pre(v); \ 302 + if (ret) \ 303 + return ret; 304 + 305 + SEC("tc") 306 + int test_map_kptr_ref1(struct __sk_buff *ctx) 307 + { 308 + struct map_value *v, val = {}; 309 + int key = 0, ret; 310 + 311 + bpf_map_update_elem(&hash_map, &key, &val, 0); 312 + bpf_map_update_elem(&hash_malloc_map, &key, &val, 0); 313 + bpf_map_update_elem(&lru_hash_map, &key, &val, 0); 314 + 315 + bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0); 316 + bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0); 317 + bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0); 318 + 319 + TEST(array_map); 320 + TEST(hash_map); 321 + TEST(hash_malloc_map); 322 + TEST(lru_hash_map); 323 + 324 + TEST_PCPU(pcpu_array_map); 325 + TEST_PCPU(pcpu_hash_map); 326 + TEST_PCPU(pcpu_hash_malloc_map); 327 + TEST_PCPU(lru_pcpu_hash_map); 328 + 329 + return 0; 330 + } 331 + 332 + #undef TEST 333 + #undef TEST_PCPU 334 + 335 + #define TEST(map) \ 336 + v = bpf_map_lookup_elem(&map, &key); \ 337 + if (!v) \ 338 + return -1; \ 339 + ret = test_map_kptr_ref_post(v); \ 340 + if (ret) \ 341 + return ret; 342 + 343 + #define TEST_PCPU(map) \ 344 + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ 345 + if (!v) \ 346 + return -1; \ 347 + ret = test_map_kptr_ref_post(v); \ 348 + if (ret) \ 349 + return ret; 350 + 351 + SEC("tc") 352 + int test_map_kptr_ref2(struct __sk_buff *ctx) 353 + { 354 + struct map_value *v; 355 + int key = 0, ret; 356 + 357 + TEST(array_map); 358 + TEST(hash_map); 359 + TEST(hash_malloc_map); 360 + TEST(lru_hash_map); 361 + 362 + TEST_PCPU(pcpu_array_map); 363 + TEST_PCPU(pcpu_hash_map); 364 + TEST_PCPU(pcpu_hash_malloc_map); 365 + TEST_PCPU(lru_pcpu_hash_map); 366 + 367 + return 0; 368 + } 369 + 370 + #undef TEST 371 + #undef TEST_PCPU 372 + 373 + SEC("tc") 374 + int test_map_kptr_ref3(struct __sk_buff *ctx) 375 + { 376 + struct prog_test_ref_kfunc *p; 377 + unsigned long sp = 0; 378 + 379 + p = bpf_kfunc_call_test_acquire(&sp); 380 + if (!p) 381 + return 1; 382 + ref++; 383 + if (p->cnt.refs.counter != ref) { 384 + bpf_kfunc_call_test_release(p); 385 + return 2; 386 + } 387 + bpf_kfunc_call_test_release(p); 388 + ref--; 389 + return 0; 390 + } 391 + 392 + SEC("syscall") 393 + int test_ls_map_kptr_ref1(void *ctx) 394 + { 395 + struct task_struct *current; 396 + struct map_value *v; 397 + int ret; 398 + 399 + current = bpf_get_current_task_btf(); 400 + if (!current) 401 + return 100; 402 + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); 403 + if (v) 404 + return 150; 405 + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 406 + if (!v) 407 + return 200; 408 + return test_map_kptr_ref_pre(v); 409 + } 410 + 411 + SEC("syscall") 412 + int test_ls_map_kptr_ref2(void *ctx) 413 + { 414 + struct task_struct *current; 415 + struct map_value *v; 416 + int ret; 417 + 418 + current = bpf_get_current_task_btf(); 419 + if (!current) 420 + return 100; 421 + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); 422 + if (!v) 423 + return 200; 424 + return test_map_kptr_ref_post(v); 425 + } 426 + 427 + SEC("syscall") 428 + int test_ls_map_kptr_ref_del(void *ctx) 429 + { 430 + struct task_struct *current; 431 + struct map_value *v; 432 + int ret; 433 + 434 + current = bpf_get_current_task_btf(); 435 + if (!current) 436 + return 100; 437 + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); 438 + if (!v) 439 + return 200; 440 + if (!v->ref_ptr) 441 + return 300; 442 + return bpf_task_storage_delete(&task_ls_map, current); 401 443 } 402 444 403 445 char _license[] SEC("license") = "GPL";
+36
tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_tracing.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + struct task_ls_map { 7 + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 8 + __uint(map_flags, BPF_F_NO_PREALLOC); 9 + __type(key, int); 10 + __type(value, int); 11 + } task_ls_map SEC(".maps"); 12 + 13 + long gp_seq; 14 + 15 + SEC("syscall") 16 + int do_call_rcu_tasks_trace(void *ctx) 17 + { 18 + struct task_struct *current; 19 + int *v; 20 + 21 + current = bpf_get_current_task_btf(); 22 + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); 23 + if (!v) 24 + return 1; 25 + /* Invoke call_rcu_tasks_trace */ 26 + return bpf_task_storage_delete(&task_ls_map, current); 27 + } 28 + 29 + SEC("kprobe/rcu_tasks_trace_postgp") 30 + int rcu_tasks_trace_postgp(void *ctx) 31 + { 32 + __sync_add_and_fetch(&gp_seq, 1); 33 + return 0; 34 + } 35 + 36 + char _license[] SEC("license") = "GPL";