Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Refactor active lock management

When bpf_spin_lock was introduced originally, there was deliberation on
whether to use an array of lock IDs, but since bpf_spin_lock is limited
to holding a single lock at any given time, we've been using a single ID
to identify the held lock.

In preparation for introducing spin locks that can be taken multiple
times, introduce support for acquiring multiple lock IDs. For this
purpose, reuse the acquired_refs array and store both lock and pointer
references. We tag the entry with REF_TYPE_PTR or REF_TYPE_LOCK to
disambiguate and find the relevant entry. The ptr field is used to track
the map_ptr or btf (for bpf_obj_new allocations) to ensure locks can be
matched with protected fields within the same "allocation", i.e.
bpf_obj_new object or map value.

The struct active_lock is changed to an int as the state is part of the
acquired_refs array, and we only need active_lock as a cheap way of
detecting lock presence.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20241109231430.2475236-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

authored by

Kumar Kartikeya Dwivedi and committed by
Andrii Nakryiko
f6b9a69a 937a1c29

+132 -67
+25 -28
include/linux/bpf_verifier.h
··· 48 48 REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ 49 49 }; 50 50 51 - /* For every reg representing a map value or allocated object pointer, 52 - * we consider the tuple of (ptr, id) for them to be unique in verifier 53 - * context and conside them to not alias each other for the purposes of 54 - * tracking lock state. 55 - */ 56 - struct bpf_active_lock { 57 - /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, 58 - * there's no active lock held, and other fields have no 59 - * meaning. If non-NULL, it indicates that a lock is held and 60 - * id member has the reg->id of the register which can be >= 0. 61 - */ 62 - void *ptr; 63 - /* This will be reg->id */ 64 - u32 id; 65 - }; 66 - 67 51 #define ITER_PREFIX "bpf_iter_" 68 52 69 53 enum bpf_iter_state { ··· 250 266 }; 251 267 252 268 struct bpf_reference_state { 269 + /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to 270 + * default to pointer reference on zero initialization of a state. 271 + */ 272 + enum ref_state_type { 273 + REF_TYPE_PTR = 0, 274 + REF_TYPE_LOCK, 275 + } type; 253 276 /* Track each reference created with a unique id, even if the same 254 277 * instruction creates the reference multiple times (eg, via CALL). 255 278 */ ··· 265 274 * is used purely to inform the user of a reference leak. 266 275 */ 267 276 int insn_idx; 268 - /* There can be a case like: 269 - * main (frame 0) 270 - * cb (frame 1) 271 - * func (frame 3) 272 - * cb (frame 4) 273 - * Hence for frame 4, if callback_ref just stored boolean, it would be 274 - * impossible to distinguish nested callback refs. Hence store the 275 - * frameno and compare that to callback_ref in check_reference_leak when 276 - * exiting a callback function. 277 - */ 278 - int callback_ref; 277 + union { 278 + /* There can be a case like: 279 + * main (frame 0) 280 + * cb (frame 1) 281 + * func (frame 3) 282 + * cb (frame 4) 283 + * Hence for frame 4, if callback_ref just stored boolean, it would be 284 + * impossible to distinguish nested callback refs. Hence store the 285 + * frameno and compare that to callback_ref in check_reference_leak when 286 + * exiting a callback function. 287 + */ 288 + int callback_ref; 289 + /* Use to keep track of the source object of a lock, to ensure 290 + * it matches on unlock. 291 + */ 292 + void *ptr; 293 + }; 279 294 }; 280 295 281 296 struct bpf_retval_range { ··· 329 332 330 333 /* The following fields should be last. See copy_func_state() */ 331 334 int acquired_refs; 335 + int active_locks; 332 336 struct bpf_reference_state *refs; 333 337 /* The state of the stack. Each element of the array describes BPF_REG_SIZE 334 338 * (i.e. 8) bytes worth of stack memory. ··· 432 434 u32 insn_idx; 433 435 u32 curframe; 434 436 435 - struct bpf_active_lock active_lock; 436 437 bool speculative; 437 438 bool active_rcu_lock; 438 439 u32 active_preempt_lock;
+107 -39
kernel/bpf/verifier.c
··· 1284 1284 if (!dst->refs) 1285 1285 return -ENOMEM; 1286 1286 1287 + dst->active_locks = src->active_locks; 1287 1288 dst->acquired_refs = src->acquired_refs; 1288 1289 return 0; 1289 1290 } ··· 1355 1354 if (err) 1356 1355 return err; 1357 1356 id = ++env->id_gen; 1357 + state->refs[new_ofs].type = REF_TYPE_PTR; 1358 1358 state->refs[new_ofs].id = id; 1359 1359 state->refs[new_ofs].insn_idx = insn_idx; 1360 1360 state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; 1361 1361 1362 1362 return id; 1363 + } 1364 + 1365 + static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, 1366 + int id, void *ptr) 1367 + { 1368 + struct bpf_func_state *state = cur_func(env); 1369 + int new_ofs = state->acquired_refs; 1370 + int err; 1371 + 1372 + err = resize_reference_state(state, state->acquired_refs + 1); 1373 + if (err) 1374 + return err; 1375 + state->refs[new_ofs].type = type; 1376 + state->refs[new_ofs].id = id; 1377 + state->refs[new_ofs].insn_idx = insn_idx; 1378 + state->refs[new_ofs].ptr = ptr; 1379 + 1380 + state->active_locks++; 1381 + return 0; 1363 1382 } 1364 1383 1365 1384 /* release function corresponding to acquire_reference_state(). Idempotent. */ ··· 1389 1368 1390 1369 last_idx = state->acquired_refs - 1; 1391 1370 for (i = 0; i < state->acquired_refs; i++) { 1371 + if (state->refs[i].type != REF_TYPE_PTR) 1372 + continue; 1392 1373 if (state->refs[i].id == ptr_id) { 1393 1374 /* Cannot release caller references in callbacks */ 1394 1375 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) ··· 1404 1381 } 1405 1382 } 1406 1383 return -EINVAL; 1384 + } 1385 + 1386 + static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr) 1387 + { 1388 + int i, last_idx; 1389 + 1390 + last_idx = state->acquired_refs - 1; 1391 + for (i = 0; i < state->acquired_refs; i++) { 1392 + if (state->refs[i].type != type) 1393 + continue; 1394 + if (state->refs[i].id == id && state->refs[i].ptr == ptr) { 1395 + if (last_idx && i != last_idx) 1396 + memcpy(&state->refs[i], &state->refs[last_idx], 1397 + sizeof(*state->refs)); 1398 + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 1399 + state->acquired_refs--; 1400 + state->active_locks--; 1401 + return 0; 1402 + } 1403 + } 1404 + return -EINVAL; 1405 + } 1406 + 1407 + static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, enum ref_state_type type, 1408 + int id, void *ptr) 1409 + { 1410 + struct bpf_func_state *state = cur_func(env); 1411 + int i; 1412 + 1413 + for (i = 0; i < state->acquired_refs; i++) { 1414 + struct bpf_reference_state *s = &state->refs[i]; 1415 + 1416 + if (s->type == REF_TYPE_PTR || s->type != type) 1417 + continue; 1418 + 1419 + if (s->id == id && s->ptr == ptr) 1420 + return s; 1421 + } 1422 + return NULL; 1407 1423 } 1408 1424 1409 1425 static void free_func_state(struct bpf_func_state *state) ··· 1515 1453 dst_state->active_preempt_lock = src->active_preempt_lock; 1516 1454 dst_state->in_sleepable = src->in_sleepable; 1517 1455 dst_state->curframe = src->curframe; 1518 - dst_state->active_lock.ptr = src->active_lock.ptr; 1519 - dst_state->active_lock.id = src->active_lock.id; 1520 1456 dst_state->branches = src->branches; 1521 1457 dst_state->parent = src->parent; 1522 1458 dst_state->first_insn_idx = src->first_insn_idx; ··· 5502 5442 static bool in_rcu_cs(struct bpf_verifier_env *env) 5503 5443 { 5504 5444 return env->cur_state->active_rcu_lock || 5505 - env->cur_state->active_lock.ptr || 5445 + cur_func(env)->active_locks || 5506 5446 !in_sleepable(env); 5507 5447 } 5508 5448 ··· 7784 7724 * Since only one bpf_spin_lock is allowed the checks are simpler than 7785 7725 * reg_is_refcounted() logic. The verifier needs to remember only 7786 7726 * one spin_lock instead of array of acquired_refs. 7787 - * cur_state->active_lock remembers which map value element or allocated 7727 + * cur_func(env)->active_locks remembers which map value element or allocated 7788 7728 * object got locked and clears it after bpf_spin_unlock. 7789 7729 */ 7790 7730 static int process_spin_lock(struct bpf_verifier_env *env, int regno, 7791 7731 bool is_lock) 7792 7732 { 7793 7733 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 7794 - struct bpf_verifier_state *cur = env->cur_state; 7795 7734 bool is_const = tnum_is_const(reg->var_off); 7735 + struct bpf_func_state *cur = cur_func(env); 7796 7736 u64 val = reg->var_off.value; 7797 7737 struct bpf_map *map = NULL; 7798 7738 struct btf *btf = NULL; 7799 7739 struct btf_record *rec; 7740 + int err; 7800 7741 7801 7742 if (!is_const) { 7802 7743 verbose(env, ··· 7829 7768 return -EINVAL; 7830 7769 } 7831 7770 if (is_lock) { 7832 - if (cur->active_lock.ptr) { 7771 + void *ptr; 7772 + 7773 + if (map) 7774 + ptr = map; 7775 + else 7776 + ptr = btf; 7777 + 7778 + if (cur->active_locks) { 7833 7779 verbose(env, 7834 7780 "Locking two bpf_spin_locks are not allowed\n"); 7835 7781 return -EINVAL; 7836 7782 } 7837 - if (map) 7838 - cur->active_lock.ptr = map; 7839 - else 7840 - cur->active_lock.ptr = btf; 7841 - cur->active_lock.id = reg->id; 7783 + err = acquire_lock_state(env, env->insn_idx, REF_TYPE_LOCK, reg->id, ptr); 7784 + if (err < 0) { 7785 + verbose(env, "Failed to acquire lock state\n"); 7786 + return err; 7787 + } 7842 7788 } else { 7843 7789 void *ptr; 7844 7790 ··· 7854 7786 else 7855 7787 ptr = btf; 7856 7788 7857 - if (!cur->active_lock.ptr) { 7789 + if (!cur->active_locks) { 7858 7790 verbose(env, "bpf_spin_unlock without taking a lock\n"); 7859 7791 return -EINVAL; 7860 7792 } 7861 - if (cur->active_lock.ptr != ptr || 7862 - cur->active_lock.id != reg->id) { 7793 + 7794 + if (release_lock_state(cur_func(env), REF_TYPE_LOCK, reg->id, ptr)) { 7863 7795 verbose(env, "bpf_spin_unlock of different lock\n"); 7864 7796 return -EINVAL; 7865 7797 } 7866 7798 7867 7799 invalidate_non_owning_refs(env); 7868 - 7869 - cur->active_lock.ptr = NULL; 7870 - cur->active_lock.id = 0; 7871 7800 } 7872 7801 return 0; 7873 7802 } ··· 9926 9861 const char *sub_name = subprog_name(env, subprog); 9927 9862 9928 9863 /* Only global subprogs cannot be called with a lock held. */ 9929 - if (env->cur_state->active_lock.ptr) { 9864 + if (cur_func(env)->active_locks) { 9930 9865 verbose(env, "global function calls are not allowed while holding a lock,\n" 9931 9866 "use static function instead\n"); 9932 9867 return -EINVAL; ··· 10451 10386 return 0; 10452 10387 10453 10388 for (i = 0; i < state->acquired_refs; i++) { 10389 + if (state->refs[i].type != REF_TYPE_PTR) 10390 + continue; 10454 10391 if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) 10455 10392 continue; 10456 10393 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", ··· 10466 10399 { 10467 10400 int err; 10468 10401 10469 - if (check_lock && env->cur_state->active_lock.ptr) { 10402 + if (check_lock && cur_func(env)->active_locks) { 10470 10403 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); 10471 10404 return -EINVAL; 10472 10405 } ··· 11687 11620 11688 11621 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11689 11622 { 11690 - struct bpf_verifier_state *state = env->cur_state; 11691 11623 struct btf_record *rec = reg_btf_record(reg); 11692 11624 11693 - if (!state->active_lock.ptr) { 11625 + if (!cur_func(env)->active_locks) { 11694 11626 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); 11695 11627 return -EFAULT; 11696 11628 } ··· 11786 11720 */ 11787 11721 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11788 11722 { 11723 + struct bpf_reference_state *s; 11789 11724 void *ptr; 11790 11725 u32 id; 11791 11726 ··· 11803 11736 } 11804 11737 id = reg->id; 11805 11738 11806 - if (!env->cur_state->active_lock.ptr) 11739 + if (!cur_func(env)->active_locks) 11807 11740 return -EINVAL; 11808 - if (env->cur_state->active_lock.ptr != ptr || 11809 - env->cur_state->active_lock.id != id) { 11741 + s = find_lock_state(env, REF_TYPE_LOCK, id, ptr); 11742 + if (!s) { 11810 11743 verbose(env, "held lock and object are not in the same allocation\n"); 11811 11744 return -EINVAL; 11812 11745 } ··· 17702 17635 return false; 17703 17636 17704 17637 for (i = 0; i < old->acquired_refs; i++) { 17705 - if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) 17638 + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 17639 + old->refs[i].type != cur->refs[i].type) 17706 17640 return false; 17641 + switch (old->refs[i].type) { 17642 + case REF_TYPE_PTR: 17643 + if (old->refs[i].callback_ref != cur->refs[i].callback_ref) 17644 + return false; 17645 + break; 17646 + case REF_TYPE_LOCK: 17647 + if (old->refs[i].ptr != cur->refs[i].ptr) 17648 + return false; 17649 + break; 17650 + default: 17651 + WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 17652 + return false; 17653 + } 17707 17654 } 17708 17655 17709 17656 return true; ··· 17793 17712 * must never prune a non-speculative execution one. 17794 17713 */ 17795 17714 if (old->speculative && !cur->speculative) 17796 - return false; 17797 - 17798 - if (old->active_lock.ptr != cur->active_lock.ptr) 17799 - return false; 17800 - 17801 - /* Old and cur active_lock's have to be either both present 17802 - * or both absent. 17803 - */ 17804 - if (!!old->active_lock.id != !!cur->active_lock.id) 17805 - return false; 17806 - 17807 - if (old->active_lock.id && 17808 - !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) 17809 17715 return false; 17810 17716 17811 17717 if (old->active_rcu_lock != cur->active_rcu_lock) ··· 18693 18625 return -EINVAL; 18694 18626 } 18695 18627 18696 - if (env->cur_state->active_lock.ptr) { 18628 + if (cur_func(env)->active_locks) { 18697 18629 if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || 18698 18630 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && 18699 18631 (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {