Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-expand-the-usage-scenarios-of-bpf_kptr_xchg'

Kaitao Cheng says:

====================
When using bpf_kptr_xchg, we triggered the following error:
31: (85) call bpf_kptr_xchg#194
function calls are not allowed while holding a lock
bpf_kptr_xchg can now be used in lock-held contexts, so we extended
its usage scope in [patch 1/5].

When writing test cases using bpf_kptr_xchg and bpf_rbtree_*, the
following approach must be followed:

bpf_spin_lock(&lock);
rb_n = bpf_rbtree_root(&root);
while (rb_n && can_loop) {
rb_n = bpf_rbtree_remove(&root, rb_n);
if (!rb_n)
goto fail;

tnode = container_of(rb_n, struct tree_node, node);
node_data = bpf_kptr_xchg(&tnode->node_data, NULL);
if (!node_data)
goto fail;

data = node_data->data;
/* use data to do something */

node_data = bpf_kptr_xchg(&tnode->node_data, node_data);
if (node_data)
goto fail;

bpf_rbtree_add(&root, rb_n, less);

if (lookup_key < tnode->key)
rb_n = bpf_rbtree_left(&root, rb_n);
else
rb_n = bpf_rbtree_right(&root, rb_n);
}
bpf_spin_unlock(&lock);

The above illustrates a lock-remove-read-add-unlock workflow, which
exhibits lower performance. To address this, we introduced support
for a streamlined lock-read-unlock operation in [patch 2/5] and
[patch 4/5].

Changes in v7:
- Add a comma to the variable declaration in enum bpf_reg_type
- Modify the prefixes
Changes in v6:
- allow using bpf_kptr_xchg even if the MEM_RCU flag is set
- Add test case
Changes in v5:
- add lastname
Changes in v4:
- Fix the dead logic issue in the test case
Changes in v3:
- Fix compilation errors
Changes in v2:
- Allow using bpf_kptr_xchg even if the NON_OWN_REF flag is set
- Add test case

Link to v6:
https://lore.kernel.org/all/20260208024846.18653-1-pilgrimtao@gmail.com/
Link to v5:
https://lore.kernel.org/all/20260203022712.99347-1-pilgrimtao@gmail.com/
Link to v4:
https://lore.kernel.org/all/20260202090051.87802-1-pilgrimtao@gmail.com/
Link to V3:
https://lore.kernel.org/all/20260202055818.78231-1-pilgrimtao@gmail.com/
Link to V2:
https://lore.kernel.org/all/20260201031607.32940-1-pilgrimtao@gmail.com/
Link to V1:
https://lore.kernel.org/all/20260122081426.78472-1-pilgrimtao@gmail.com/
====================

Link: https://patch.msgid.link/20260214124042.62229-1-pilgrimtao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+307 -2
+7 -2
kernel/bpf/verifier.c
··· 9309 9309 static const struct bpf_reg_types kptr_xchg_dest_types = { 9310 9310 .types = { 9311 9311 PTR_TO_MAP_VALUE, 9312 - PTR_TO_BTF_ID | MEM_ALLOC 9312 + PTR_TO_BTF_ID | MEM_ALLOC, 9313 + PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF, 9314 + PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU, 9313 9315 } 9314 9316 }; 9315 9317 static const struct bpf_reg_types dynptr_types = { ··· 9475 9473 } 9476 9474 case PTR_TO_BTF_ID | MEM_ALLOC: 9477 9475 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC: 9476 + case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 9477 + case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 9478 9478 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && 9479 9479 meta->func_id != BPF_FUNC_kptr_xchg) { 9480 9480 verifier_bug(env, "unimplemented handling of MEM_ALLOC"); ··· 21002 20998 21003 20999 if (env->cur_state->active_locks) { 21004 21000 if ((insn->src_reg == BPF_REG_0 && 21005 - insn->imm != BPF_FUNC_spin_unlock) || 21001 + insn->imm != BPF_FUNC_spin_unlock && 21002 + insn->imm != BPF_FUNC_kptr_xchg) || 21006 21003 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && 21007 21004 (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { 21008 21005 verbose(env,
+6
tools/testing/selftests/bpf/prog_tests/rbtree.c
··· 9 9 #include "rbtree_btf_fail__wrong_node_type.skel.h" 10 10 #include "rbtree_btf_fail__add_wrong_type.skel.h" 11 11 #include "rbtree_search.skel.h" 12 + #include "rbtree_search_kptr.skel.h" 12 13 13 14 static void test_rbtree_add_nodes(void) 14 15 { ··· 193 192 void test_rbtree_search(void) 194 193 { 195 194 RUN_TESTS(rbtree_search); 195 + } 196 + 197 + void test_rbtree_search_kptr(void) 198 + { 199 + RUN_TESTS(rbtree_search_kptr); 196 200 }
+4
tools/testing/selftests/bpf/progs/bpf_misc.h
··· 188 188 #define POINTER_VALUE 0xbadcafe 189 189 #define TEST_DATA_LEN 64 190 190 191 + #ifndef __aligned 192 + #define __aligned(x) __attribute__((aligned(x))) 193 + #endif 194 + 191 195 #ifndef __used 192 196 #define __used __attribute__((used)) 193 197 #endif
+290
tools/testing/selftests/bpf/progs/rbtree_search_kptr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2026 KylinSoft Corporation. */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include "bpf_misc.h" 7 + #include "bpf_experimental.h" 8 + 9 + #define NR_NODES 16 10 + 11 + struct node_data { 12 + int data; 13 + }; 14 + 15 + struct tree_node { 16 + struct bpf_rb_node node; 17 + u64 key; 18 + struct node_data __kptr * node_data; 19 + }; 20 + 21 + struct tree_node_ref { 22 + struct bpf_refcount ref; 23 + struct bpf_rb_node node; 24 + u64 key; 25 + struct node_data __kptr * node_data; 26 + }; 27 + 28 + #define private(name) SEC(".data." #name) __hidden __aligned(8) 29 + 30 + private(A) struct bpf_rb_root root __contains(tree_node, node); 31 + private(A) struct bpf_spin_lock lock; 32 + 33 + private(B) struct bpf_rb_root root_r __contains(tree_node_ref, node); 34 + private(B) struct bpf_spin_lock lock_r; 35 + 36 + static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) 37 + { 38 + struct tree_node *node_a, *node_b; 39 + 40 + node_a = container_of(a, struct tree_node, node); 41 + node_b = container_of(b, struct tree_node, node); 42 + 43 + return node_a->key < node_b->key; 44 + } 45 + 46 + SEC("syscall") 47 + __retval(0) 48 + long rbtree_search_kptr(void *ctx) 49 + { 50 + struct tree_node *tnode; 51 + struct bpf_rb_node *rb_n; 52 + struct node_data __kptr * node_data; 53 + int lookup_key = NR_NODES / 2; 54 + int lookup_data = NR_NODES / 2; 55 + int i, data, ret = 0; 56 + 57 + for (i = 0; i < NR_NODES && can_loop; i++) { 58 + tnode = bpf_obj_new(typeof(*tnode)); 59 + if (!tnode) 60 + return __LINE__; 61 + 62 + node_data = bpf_obj_new(typeof(*node_data)); 63 + if (!node_data) { 64 + bpf_obj_drop(tnode); 65 + return __LINE__; 66 + } 67 + 68 + tnode->key = i; 69 + node_data->data = i; 70 + 71 + node_data = bpf_kptr_xchg(&tnode->node_data, node_data); 72 + if (node_data) 73 + bpf_obj_drop(node_data); 74 + 75 + bpf_spin_lock(&lock); 76 + bpf_rbtree_add(&root, &tnode->node, less); 77 + bpf_spin_unlock(&lock); 78 + } 79 + 80 + bpf_spin_lock(&lock); 81 + rb_n = bpf_rbtree_root(&root); 82 + while (rb_n && can_loop) { 83 + tnode = container_of(rb_n, struct tree_node, node); 84 + node_data = bpf_kptr_xchg(&tnode->node_data, NULL); 85 + if (!node_data) { 86 + ret = __LINE__; 87 + goto fail; 88 + } 89 + 90 + data = node_data->data; 91 + node_data = bpf_kptr_xchg(&tnode->node_data, node_data); 92 + if (node_data) { 93 + bpf_spin_unlock(&lock); 94 + bpf_obj_drop(node_data); 95 + return __LINE__; 96 + } 97 + 98 + if (lookup_key == tnode->key) { 99 + if (data == lookup_data) 100 + break; 101 + 102 + ret = __LINE__; 103 + goto fail; 104 + } 105 + 106 + if (lookup_key < tnode->key) 107 + rb_n = bpf_rbtree_left(&root, rb_n); 108 + else 109 + rb_n = bpf_rbtree_right(&root, rb_n); 110 + } 111 + bpf_spin_unlock(&lock); 112 + 113 + while (can_loop) { 114 + bpf_spin_lock(&lock); 115 + rb_n = bpf_rbtree_first(&root); 116 + if (!rb_n) { 117 + bpf_spin_unlock(&lock); 118 + return 0; 119 + } 120 + 121 + rb_n = bpf_rbtree_remove(&root, rb_n); 122 + if (!rb_n) { 123 + ret = __LINE__; 124 + goto fail; 125 + } 126 + bpf_spin_unlock(&lock); 127 + 128 + tnode = container_of(rb_n, struct tree_node, node); 129 + 130 + node_data = bpf_kptr_xchg(&tnode->node_data, NULL); 131 + if (node_data) 132 + bpf_obj_drop(node_data); 133 + 134 + bpf_obj_drop(tnode); 135 + } 136 + 137 + return 0; 138 + fail: 139 + bpf_spin_unlock(&lock); 140 + return ret; 141 + } 142 + 143 + static bool less_r(struct bpf_rb_node *a, const struct bpf_rb_node *b) 144 + { 145 + struct tree_node_ref *node_a, *node_b; 146 + 147 + node_a = container_of(a, struct tree_node_ref, node); 148 + node_b = container_of(b, struct tree_node_ref, node); 149 + 150 + return node_a->key < node_b->key; 151 + } 152 + 153 + SEC("syscall") 154 + __retval(0) 155 + long rbtree_search_kptr_ref(void *ctx) 156 + { 157 + struct tree_node_ref *tnode_r, *tnode_m; 158 + struct bpf_rb_node *rb_n; 159 + struct node_data __kptr * node_data; 160 + int lookup_key = NR_NODES / 2; 161 + int lookup_data = NR_NODES / 2; 162 + int i, data, ret = 0; 163 + 164 + for (i = 0; i < NR_NODES && can_loop; i++) { 165 + tnode_r = bpf_obj_new(typeof(*tnode_r)); 166 + if (!tnode_r) 167 + return __LINE__; 168 + 169 + node_data = bpf_obj_new(typeof(*node_data)); 170 + if (!node_data) { 171 + bpf_obj_drop(tnode_r); 172 + return __LINE__; 173 + } 174 + 175 + tnode_r->key = i; 176 + node_data->data = i; 177 + 178 + node_data = bpf_kptr_xchg(&tnode_r->node_data, node_data); 179 + if (node_data) 180 + bpf_obj_drop(node_data); 181 + 182 + /* Unused reference */ 183 + tnode_m = bpf_refcount_acquire(tnode_r); 184 + if (!tnode_m) 185 + return __LINE__; 186 + 187 + bpf_spin_lock(&lock_r); 188 + bpf_rbtree_add(&root_r, &tnode_r->node, less_r); 189 + bpf_spin_unlock(&lock_r); 190 + 191 + bpf_obj_drop(tnode_m); 192 + } 193 + 194 + bpf_spin_lock(&lock_r); 195 + rb_n = bpf_rbtree_root(&root_r); 196 + while (rb_n && can_loop) { 197 + tnode_r = container_of(rb_n, struct tree_node_ref, node); 198 + node_data = bpf_kptr_xchg(&tnode_r->node_data, NULL); 199 + if (!node_data) { 200 + ret = __LINE__; 201 + goto fail; 202 + } 203 + 204 + data = node_data->data; 205 + node_data = bpf_kptr_xchg(&tnode_r->node_data, node_data); 206 + if (node_data) { 207 + bpf_spin_unlock(&lock_r); 208 + bpf_obj_drop(node_data); 209 + return __LINE__; 210 + } 211 + 212 + if (lookup_key == tnode_r->key) { 213 + if (data == lookup_data) 214 + break; 215 + 216 + ret = __LINE__; 217 + goto fail; 218 + } 219 + 220 + if (lookup_key < tnode_r->key) 221 + rb_n = bpf_rbtree_left(&root_r, rb_n); 222 + else 223 + rb_n = bpf_rbtree_right(&root_r, rb_n); 224 + } 225 + bpf_spin_unlock(&lock_r); 226 + 227 + while (can_loop) { 228 + bpf_spin_lock(&lock_r); 229 + rb_n = bpf_rbtree_first(&root_r); 230 + if (!rb_n) { 231 + bpf_spin_unlock(&lock_r); 232 + return 0; 233 + } 234 + 235 + rb_n = bpf_rbtree_remove(&root_r, rb_n); 236 + if (!rb_n) { 237 + ret = __LINE__; 238 + goto fail; 239 + } 240 + bpf_spin_unlock(&lock_r); 241 + 242 + tnode_r = container_of(rb_n, struct tree_node_ref, node); 243 + 244 + node_data = bpf_kptr_xchg(&tnode_r->node_data, NULL); 245 + if (node_data) 246 + bpf_obj_drop(node_data); 247 + 248 + bpf_obj_drop(tnode_r); 249 + } 250 + 251 + return 0; 252 + fail: 253 + bpf_spin_unlock(&lock_r); 254 + return ret; 255 + } 256 + 257 + SEC("syscall") 258 + __failure __msg("R1 type=scalar expected=map_value, ptr_, ptr_") 259 + long non_own_ref_kptr_xchg_no_lock(void *ctx) 260 + { 261 + struct tree_node *tnode; 262 + struct bpf_rb_node *rb_n; 263 + struct node_data __kptr * node_data; 264 + int data; 265 + 266 + bpf_spin_lock(&lock); 267 + rb_n = bpf_rbtree_first(&root); 268 + if (!rb_n) { 269 + bpf_spin_unlock(&lock); 270 + return __LINE__; 271 + } 272 + bpf_spin_unlock(&lock); 273 + 274 + tnode = container_of(rb_n, struct tree_node, node); 275 + node_data = bpf_kptr_xchg(&tnode->node_data, NULL); 276 + if (!node_data) 277 + return __LINE__; 278 + 279 + data = node_data->data; 280 + if (data < 0) 281 + return __LINE__; 282 + 283 + node_data = bpf_kptr_xchg(&tnode->node_data, node_data); 284 + if (node_data) 285 + return __LINE__; 286 + 287 + return 0; 288 + } 289 + 290 + char _license[] SEC("license") = "GPL";