Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'refactor-lock-management'

Kumar Kartikeya Dwivedi says:

====================
Refactor lock management

This set refactors lock management in the verifier in preparation for
spin locks that can be acquired multiple times. In addition to this,
unnecessary code special case reference leak logic for callbacks is also
dropped, that is no longer necessary. See patches for details.

Changelog:
----------
v5 -> v6
v5: https://lore.kernel.org/bpf/20241109225243.2306756-1-memxor@gmail.com

* Move active_locks mutation to {acquire,release}_lock_state (Alexei)

v4 -> v5
v4: https://lore.kernel.org/bpf/20241109074347.1434011-1-memxor@gmail.com

* Make active_locks part of bpf_func_state (Alexei)
* Remove unneeded in_callback_fn logic for references

v3 -> v4
v3: https://lore.kernel.org/bpf/20241104151716.2079893-1-memxor@gmail.com

* Address comments from Alexei
* Drop struct bpf_active_lock definition
* Name enum type, expand definition to multiple lines
* s/REF_TYPE_BPF_LOCK/REF_TYPE_LOCK/g
* Change active_lock type to int
* Fix type of 'type' in acquire_lock_state
* Filter by taking type explicitly in find_lock_state
* WARN for default case in refsafe switch statement

v2 -> v3
v2: https://lore.kernel.org/bpf/20241103212252.547071-1-memxor@gmail.com

* Rebase on bpf-next to resolve merge conflict

v1 -> v2
v1: https://lore.kernel.org/bpf/20241103205856.345580-1-memxor@gmail.com

* Fix refsafe state comparison to check callback_ref and ptr separately.
====================

Link: https://lore.kernel.org/r/20241109231430.2475236-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

authored by

Alexei Starovoitov and committed by
Andrii Nakryiko
7b6e5bfa 937a1c29

+122 -85
+11 -27
include/linux/bpf_verifier.h
··· 48 48 REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ 49 49 }; 50 50 51 - /* For every reg representing a map value or allocated object pointer, 52 - * we consider the tuple of (ptr, id) for them to be unique in verifier 53 - * context and conside them to not alias each other for the purposes of 54 - * tracking lock state. 55 - */ 56 - struct bpf_active_lock { 57 - /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, 58 - * there's no active lock held, and other fields have no 59 - * meaning. If non-NULL, it indicates that a lock is held and 60 - * id member has the reg->id of the register which can be >= 0. 61 - */ 62 - void *ptr; 63 - /* This will be reg->id */ 64 - u32 id; 65 - }; 66 - 67 51 #define ITER_PREFIX "bpf_iter_" 68 52 69 53 enum bpf_iter_state { ··· 250 266 }; 251 267 252 268 struct bpf_reference_state { 269 + /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to 270 + * default to pointer reference on zero initialization of a state. 271 + */ 272 + enum ref_state_type { 273 + REF_TYPE_PTR = 0, 274 + REF_TYPE_LOCK, 275 + } type; 253 276 /* Track each reference created with a unique id, even if the same 254 277 * instruction creates the reference multiple times (eg, via CALL). 255 278 */ ··· 265 274 * is used purely to inform the user of a reference leak. 266 275 */ 267 276 int insn_idx; 268 - /* There can be a case like: 269 - * main (frame 0) 270 - * cb (frame 1) 271 - * func (frame 3) 272 - * cb (frame 4) 273 - * Hence for frame 4, if callback_ref just stored boolean, it would be 274 - * impossible to distinguish nested callback refs. Hence store the 275 - * frameno and compare that to callback_ref in check_reference_leak when 276 - * exiting a callback function. 277 + /* Use to keep track of the source object of a lock, to ensure 278 + * it matches on unlock. 277 279 */ 278 - int callback_ref; 280 + void *ptr; 279 281 }; 280 282 281 283 struct bpf_retval_range { ··· 316 332 317 333 /* The following fields should be last. See copy_func_state() */ 318 334 int acquired_refs; 335 + int active_locks; 319 336 struct bpf_reference_state *refs; 320 337 /* The state of the stack. Each element of the array describes BPF_REG_SIZE 321 338 * (i.e. 8) bytes worth of stack memory. ··· 419 434 u32 insn_idx; 420 435 u32 curframe; 421 436 422 - struct bpf_active_lock active_lock; 423 437 bool speculative; 424 438 bool active_rcu_lock; 425 439 u32 active_preempt_lock;
+109 -56
kernel/bpf/verifier.c
··· 1284 1284 if (!dst->refs) 1285 1285 return -ENOMEM; 1286 1286 1287 + dst->active_locks = src->active_locks; 1287 1288 dst->acquired_refs = src->acquired_refs; 1288 1289 return 0; 1289 1290 } ··· 1355 1354 if (err) 1356 1355 return err; 1357 1356 id = ++env->id_gen; 1357 + state->refs[new_ofs].type = REF_TYPE_PTR; 1358 1358 state->refs[new_ofs].id = id; 1359 1359 state->refs[new_ofs].insn_idx = insn_idx; 1360 - state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; 1361 1360 1362 1361 return id; 1362 + } 1363 + 1364 + static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, 1365 + int id, void *ptr) 1366 + { 1367 + struct bpf_func_state *state = cur_func(env); 1368 + int new_ofs = state->acquired_refs; 1369 + int err; 1370 + 1371 + err = resize_reference_state(state, state->acquired_refs + 1); 1372 + if (err) 1373 + return err; 1374 + state->refs[new_ofs].type = type; 1375 + state->refs[new_ofs].id = id; 1376 + state->refs[new_ofs].insn_idx = insn_idx; 1377 + state->refs[new_ofs].ptr = ptr; 1378 + 1379 + state->active_locks++; 1380 + return 0; 1363 1381 } 1364 1382 1365 1383 /* release function corresponding to acquire_reference_state(). Idempotent. */ ··· 1388 1368 1389 1369 last_idx = state->acquired_refs - 1; 1390 1370 for (i = 0; i < state->acquired_refs; i++) { 1371 + if (state->refs[i].type != REF_TYPE_PTR) 1372 + continue; 1391 1373 if (state->refs[i].id == ptr_id) { 1392 - /* Cannot release caller references in callbacks */ 1393 - if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) 1394 - return -EINVAL; 1395 1374 if (last_idx && i != last_idx) 1396 1375 memcpy(&state->refs[i], &state->refs[last_idx], 1397 1376 sizeof(*state->refs)); ··· 1400 1381 } 1401 1382 } 1402 1383 return -EINVAL; 1384 + } 1385 + 1386 + static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr) 1387 + { 1388 + int i, last_idx; 1389 + 1390 + last_idx = state->acquired_refs - 1; 1391 + for (i = 0; i < state->acquired_refs; i++) { 1392 + if (state->refs[i].type != type) 1393 + continue; 1394 + if (state->refs[i].id == id && state->refs[i].ptr == ptr) { 1395 + if (last_idx && i != last_idx) 1396 + memcpy(&state->refs[i], &state->refs[last_idx], 1397 + sizeof(*state->refs)); 1398 + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 1399 + state->acquired_refs--; 1400 + state->active_locks--; 1401 + return 0; 1402 + } 1403 + } 1404 + return -EINVAL; 1405 + } 1406 + 1407 + static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, enum ref_state_type type, 1408 + int id, void *ptr) 1409 + { 1410 + struct bpf_func_state *state = cur_func(env); 1411 + int i; 1412 + 1413 + for (i = 0; i < state->acquired_refs; i++) { 1414 + struct bpf_reference_state *s = &state->refs[i]; 1415 + 1416 + if (s->type == REF_TYPE_PTR || s->type != type) 1417 + continue; 1418 + 1419 + if (s->id == id && s->ptr == ptr) 1420 + return s; 1421 + } 1422 + return NULL; 1403 1423 } 1404 1424 1405 1425 static void free_func_state(struct bpf_func_state *state) ··· 1511 1453 dst_state->active_preempt_lock = src->active_preempt_lock; 1512 1454 dst_state->in_sleepable = src->in_sleepable; 1513 1455 dst_state->curframe = src->curframe; 1514 - dst_state->active_lock.ptr = src->active_lock.ptr; 1515 - dst_state->active_lock.id = src->active_lock.id; 1516 1456 dst_state->branches = src->branches; 1517 1457 dst_state->parent = src->parent; 1518 1458 dst_state->first_insn_idx = src->first_insn_idx; ··· 5498 5442 static bool in_rcu_cs(struct bpf_verifier_env *env) 5499 5443 { 5500 5444 return env->cur_state->active_rcu_lock || 5501 - env->cur_state->active_lock.ptr || 5445 + cur_func(env)->active_locks || 5502 5446 !in_sleepable(env); 5503 5447 } 5504 5448 ··· 7780 7724 * Since only one bpf_spin_lock is allowed the checks are simpler than 7781 7725 * reg_is_refcounted() logic. The verifier needs to remember only 7782 7726 * one spin_lock instead of array of acquired_refs. 7783 - * cur_state->active_lock remembers which map value element or allocated 7727 + * cur_func(env)->active_locks remembers which map value element or allocated 7784 7728 * object got locked and clears it after bpf_spin_unlock. 7785 7729 */ 7786 7730 static int process_spin_lock(struct bpf_verifier_env *env, int regno, 7787 7731 bool is_lock) 7788 7732 { 7789 7733 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 7790 - struct bpf_verifier_state *cur = env->cur_state; 7791 7734 bool is_const = tnum_is_const(reg->var_off); 7735 + struct bpf_func_state *cur = cur_func(env); 7792 7736 u64 val = reg->var_off.value; 7793 7737 struct bpf_map *map = NULL; 7794 7738 struct btf *btf = NULL; 7795 7739 struct btf_record *rec; 7740 + int err; 7796 7741 7797 7742 if (!is_const) { 7798 7743 verbose(env, ··· 7825 7768 return -EINVAL; 7826 7769 } 7827 7770 if (is_lock) { 7828 - if (cur->active_lock.ptr) { 7771 + void *ptr; 7772 + 7773 + if (map) 7774 + ptr = map; 7775 + else 7776 + ptr = btf; 7777 + 7778 + if (cur->active_locks) { 7829 7779 verbose(env, 7830 7780 "Locking two bpf_spin_locks are not allowed\n"); 7831 7781 return -EINVAL; 7832 7782 } 7833 - if (map) 7834 - cur->active_lock.ptr = map; 7835 - else 7836 - cur->active_lock.ptr = btf; 7837 - cur->active_lock.id = reg->id; 7783 + err = acquire_lock_state(env, env->insn_idx, REF_TYPE_LOCK, reg->id, ptr); 7784 + if (err < 0) { 7785 + verbose(env, "Failed to acquire lock state\n"); 7786 + return err; 7787 + } 7838 7788 } else { 7839 7789 void *ptr; 7840 7790 ··· 7850 7786 else 7851 7787 ptr = btf; 7852 7788 7853 - if (!cur->active_lock.ptr) { 7789 + if (!cur->active_locks) { 7854 7790 verbose(env, "bpf_spin_unlock without taking a lock\n"); 7855 7791 return -EINVAL; 7856 7792 } 7857 - if (cur->active_lock.ptr != ptr || 7858 - cur->active_lock.id != reg->id) { 7793 + 7794 + if (release_lock_state(cur_func(env), REF_TYPE_LOCK, reg->id, ptr)) { 7859 7795 verbose(env, "bpf_spin_unlock of different lock\n"); 7860 7796 return -EINVAL; 7861 7797 } 7862 7798 7863 7799 invalidate_non_owning_refs(env); 7864 - 7865 - cur->active_lock.ptr = NULL; 7866 - cur->active_lock.id = 0; 7867 7800 } 7868 7801 return 0; 7869 7802 } ··· 9922 9861 const char *sub_name = subprog_name(env, subprog); 9923 9862 9924 9863 /* Only global subprogs cannot be called with a lock held. */ 9925 - if (env->cur_state->active_lock.ptr) { 9864 + if (cur_func(env)->active_locks) { 9926 9865 verbose(env, "global function calls are not allowed while holding a lock,\n" 9927 9866 "use static function instead\n"); 9928 9867 return -EINVAL; ··· 10263 10202 caller->regs[BPF_REG_0] = *r0; 10264 10203 } 10265 10204 10266 - /* callback_fn frame should have released its own additions to parent's 10267 - * reference state at this point, or check_reference_leak would 10268 - * complain, hence it must be the same as the caller. There is no need 10269 - * to copy it back. 10270 - */ 10271 - if (!callee->in_callback_fn) { 10272 - /* Transfer references to the caller */ 10273 - err = copy_reference_state(caller, callee); 10274 - if (err) 10275 - return err; 10276 - } 10205 + /* Transfer references to the caller */ 10206 + err = copy_reference_state(caller, callee); 10207 + if (err) 10208 + return err; 10277 10209 10278 10210 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, 10279 10211 * there function call logic would reschedule callback visit. If iteration ··· 10436 10382 bool refs_lingering = false; 10437 10383 int i; 10438 10384 10439 - if (!exception_exit && state->frameno && !state->in_callback_fn) 10385 + if (!exception_exit && state->frameno) 10440 10386 return 0; 10441 10387 10442 10388 for (i = 0; i < state->acquired_refs; i++) { 10443 - if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) 10389 + if (state->refs[i].type != REF_TYPE_PTR) 10444 10390 continue; 10445 10391 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 10446 10392 state->refs[i].id, state->refs[i].insn_idx); ··· 10453 10399 { 10454 10400 int err; 10455 10401 10456 - if (check_lock && env->cur_state->active_lock.ptr) { 10402 + if (check_lock && cur_func(env)->active_locks) { 10457 10403 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); 10458 10404 return -EINVAL; 10459 10405 } ··· 11674 11620 11675 11621 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11676 11622 { 11677 - struct bpf_verifier_state *state = env->cur_state; 11678 11623 struct btf_record *rec = reg_btf_record(reg); 11679 11624 11680 - if (!state->active_lock.ptr) { 11625 + if (!cur_func(env)->active_locks) { 11681 11626 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); 11682 11627 return -EFAULT; 11683 11628 } ··· 11773 11720 */ 11774 11721 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11775 11722 { 11723 + struct bpf_reference_state *s; 11776 11724 void *ptr; 11777 11725 u32 id; 11778 11726 ··· 11790 11736 } 11791 11737 id = reg->id; 11792 11738 11793 - if (!env->cur_state->active_lock.ptr) 11739 + if (!cur_func(env)->active_locks) 11794 11740 return -EINVAL; 11795 - if (env->cur_state->active_lock.ptr != ptr || 11796 - env->cur_state->active_lock.id != id) { 11741 + s = find_lock_state(env, REF_TYPE_LOCK, id, ptr); 11742 + if (!s) { 11797 11743 verbose(env, "held lock and object are not in the same allocation\n"); 11798 11744 return -EINVAL; 11799 11745 } ··· 17689 17635 return false; 17690 17636 17691 17637 for (i = 0; i < old->acquired_refs; i++) { 17692 - if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) 17638 + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || 17639 + old->refs[i].type != cur->refs[i].type) 17693 17640 return false; 17641 + switch (old->refs[i].type) { 17642 + case REF_TYPE_PTR: 17643 + break; 17644 + case REF_TYPE_LOCK: 17645 + if (old->refs[i].ptr != cur->refs[i].ptr) 17646 + return false; 17647 + break; 17648 + default: 17649 + WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); 17650 + return false; 17651 + } 17694 17652 } 17695 17653 17696 17654 return true; ··· 17778 17712 * must never prune a non-speculative execution one. 17779 17713 */ 17780 17714 if (old->speculative && !cur->speculative) 17781 - return false; 17782 - 17783 - if (old->active_lock.ptr != cur->active_lock.ptr) 17784 - return false; 17785 - 17786 - /* Old and cur active_lock's have to be either both present 17787 - * or both absent. 17788 - */ 17789 - if (!!old->active_lock.id != !!cur->active_lock.id) 17790 - return false; 17791 - 17792 - if (old->active_lock.id && 17793 - !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) 17794 17715 return false; 17795 17716 17796 17717 if (old->active_rcu_lock != cur->active_rcu_lock) ··· 18678 18625 return -EINVAL; 18679 18626 } 18680 18627 18681 - if (env->cur_state->active_lock.ptr) { 18628 + if (cur_func(env)->active_locks) { 18682 18629 if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || 18683 18630 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && 18684 18631 (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
+2 -2
tools/testing/selftests/bpf/prog_tests/cb_refs.c
··· 11 11 const char *prog_name; 12 12 const char *err_msg; 13 13 } cb_refs_tests[] = { 14 - { "underflow_prog", "reference has not been acquired before" }, 15 - { "leak_prog", "Unreleased reference" }, 14 + { "underflow_prog", "must point to scalar, or struct with scalar" }, 15 + { "leak_prog", "Possibly NULL pointer passed to helper arg2" }, 16 16 { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */ 17 17 { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */ 18 18 };