Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-fix-conditions-when-timer-wq-can-be-called'

Alexei Starovoitov says:

====================
bpf: Fix conditions when timer/wq can be called

From: Alexei Starovoitov <ast@kernel.org>

v2->v3:
- Add missing refcount_put
- Detect recursion of indiviual async_cb
v2: https://lore.kernel.org/bpf/20260204040834.22263-4-alexei.starovoitov@gmail.com/

v1->v2:
- Add a recursion check
v1: https://lore.kernel.org/bpf/20260204030927.171-1-alexei.starovoitov@gmail.com/
====================

Link: https://patch.msgid.link/20260204055147.54960-1-alexei.starovoitov@gmail.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

+128 -6
+25 -6
kernel/bpf/helpers.c
··· 1427 1427 return 0; 1428 1428 } 1429 1429 1430 + static DEFINE_PER_CPU(struct bpf_async_cb *, async_cb_running); 1431 + 1430 1432 static int bpf_async_schedule_op(struct bpf_async_cb *cb, enum bpf_async_op op, 1431 1433 u64 nsec, u32 timer_mode) 1432 1434 { 1433 - WARN_ON_ONCE(!in_hardirq()); 1435 + /* 1436 + * Do not schedule another operation on this cpu if it's in irq_work 1437 + * callback that is processing async_cmds queue. Otherwise the following 1438 + * loop is possible: 1439 + * bpf_timer_start() -> bpf_async_schedule_op() -> irq_work_queue(). 1440 + * irqrestore -> bpf_async_irq_worker() -> tracepoint -> bpf_timer_start(). 1441 + */ 1442 + if (this_cpu_read(async_cb_running) == cb) { 1443 + bpf_async_refcount_put(cb); 1444 + return -EDEADLK; 1445 + } 1434 1446 1435 1447 struct bpf_async_cmd *cmd = kmalloc_nolock(sizeof(*cmd), 0, NUMA_NO_NODE); 1436 1448 ··· 1485 1473 .arg2_type = ARG_PTR_TO_FUNC, 1486 1474 }; 1487 1475 1476 + static bool defer_timer_wq_op(void) 1477 + { 1478 + return in_hardirq() || irqs_disabled(); 1479 + } 1480 + 1488 1481 BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, flags) 1489 1482 { 1490 1483 struct bpf_hrtimer *t; ··· 1517 1500 if (!refcount_inc_not_zero(&t->cb.refcnt)) 1518 1501 return -ENOENT; 1519 1502 1520 - if (!in_hardirq()) { 1503 + if (!defer_timer_wq_op()) { 1521 1504 hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode); 1522 1505 bpf_async_refcount_put(&t->cb); 1523 1506 return 0; ··· 1541 1524 bool inc = false; 1542 1525 int ret = 0; 1543 1526 1544 - if (in_hardirq()) 1527 + if (defer_timer_wq_op()) 1545 1528 return -EOPNOTSUPP; 1546 1529 1547 1530 t = READ_ONCE(async->timer); ··· 1642 1625 return; 1643 1626 1644 1627 list = llist_reverse_order(list); 1628 + this_cpu_write(async_cb_running, cb); 1645 1629 llist_for_each_safe(pos, n, list) { 1646 1630 struct bpf_async_cmd *cmd; 1647 1631 ··· 1650 1632 bpf_async_process_op(cb, cmd->op, cmd->nsec, cmd->mode); 1651 1633 kfree_nolock(cmd); 1652 1634 } 1635 + this_cpu_write(async_cb_running, NULL); 1653 1636 } 1654 1637 1655 1638 static void bpf_async_cancel_and_free(struct bpf_async_kern *async) ··· 1669 1650 * refcnt. Either synchronously or asynchronously in irq_work. 1670 1651 */ 1671 1652 1672 - if (!in_hardirq()) { 1653 + if (!defer_timer_wq_op()) { 1673 1654 bpf_async_process_op(cb, BPF_ASYNC_CANCEL, 0, 0); 1674 1655 } else { 1675 1656 (void)bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0); ··· 3180 3161 if (!refcount_inc_not_zero(&w->cb.refcnt)) 3181 3162 return -ENOENT; 3182 3163 3183 - if (!in_hardirq()) { 3164 + if (!defer_timer_wq_op()) { 3184 3165 schedule_work(&w->work); 3185 3166 bpf_async_refcount_put(&w->cb); 3186 3167 return 0; ··· 4480 4461 if (!refcount_inc_not_zero(&cb->refcnt)) 4481 4462 return -ENOENT; 4482 4463 4483 - if (!in_hardirq()) { 4464 + if (!defer_timer_wq_op()) { 4484 4465 struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb); 4485 4466 4486 4467 ret = hrtimer_try_to_cancel(&t->timer);
+33
tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include "timer_start_deadlock.skel.h" 5 + 6 + void test_timer_start_deadlock(void) 7 + { 8 + struct timer_start_deadlock *skel; 9 + int err, prog_fd; 10 + LIBBPF_OPTS(bpf_test_run_opts, opts); 11 + 12 + skel = timer_start_deadlock__open_and_load(); 13 + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 14 + return; 15 + 16 + err = timer_start_deadlock__attach(skel); 17 + if (!ASSERT_OK(err, "skel_attach")) 18 + goto cleanup; 19 + 20 + prog_fd = bpf_program__fd(skel->progs.start_timer); 21 + 22 + /* 23 + * Run the syscall program that attempts to deadlock. 24 + * If the kernel deadlocks, this call will never return. 25 + */ 26 + err = bpf_prog_test_run_opts(prog_fd, &opts); 27 + ASSERT_OK(err, "prog_test_run"); 28 + ASSERT_EQ(opts.retval, 0, "prog_retval"); 29 + 30 + ASSERT_EQ(skel->bss->tp_called, 1, "tp_called"); 31 + cleanup: 32 + timer_start_deadlock__destroy(skel); 33 + }
+70
tools/testing/selftests/bpf/progs/timer_start_deadlock.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + 7 + #define CLOCK_MONOTONIC 1 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + struct elem { 12 + struct bpf_timer timer; 13 + }; 14 + 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_ARRAY); 17 + __uint(max_entries, 1); 18 + __type(key, int); 19 + __type(value, struct elem); 20 + } timer_map SEC(".maps"); 21 + 22 + volatile int in_timer_start; 23 + volatile int tp_called; 24 + 25 + static int timer_cb(void *map, int *key, struct elem *value) 26 + { 27 + return 0; 28 + } 29 + 30 + SEC("tp_btf/hrtimer_cancel") 31 + int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer) 32 + { 33 + struct bpf_timer *timer; 34 + int key = 0; 35 + 36 + if (!in_timer_start) 37 + return 0; 38 + 39 + tp_called = 1; 40 + timer = bpf_map_lookup_elem(&timer_map, &key); 41 + 42 + /* 43 + * Call bpf_timer_start() from the tracepoint within hrtimer logic 44 + * on the same timer to make sure it doesn't deadlock. 45 + */ 46 + bpf_timer_start(timer, 1000000000, 0); 47 + return 0; 48 + } 49 + 50 + SEC("syscall") 51 + int start_timer(void *ctx) 52 + { 53 + struct bpf_timer *timer; 54 + int key = 0; 55 + 56 + timer = bpf_map_lookup_elem(&timer_map, &key); 57 + /* claude may complain here that there is no NULL check. Ignoring it. */ 58 + bpf_timer_init(timer, &timer_map, CLOCK_MONOTONIC); 59 + bpf_timer_set_callback(timer, timer_cb); 60 + 61 + /* 62 + * call hrtimer_start() twice, so that 2nd call does 63 + * remove_hrtimer() and trace_hrtimer_cancel() tracepoint. 64 + */ 65 + in_timer_start = 1; 66 + bpf_timer_start(timer, 1000000000, 0); 67 + bpf_timer_start(timer, 1000000000, 0); 68 + in_timer_start = 0; 69 + return 0; 70 + }