Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-fix-tailcall-infinite-loop-caused-by-freplace'

Leon Hwang says:

====================
bpf: Fix tailcall infinite loop caused by freplace

Previously, I addressed a tailcall infinite loop issue related to
trampolines[0].

In this patchset, I resolve a similar issue where a tailcall infinite loop
can occur due to the combination of tailcalls and freplace programs. The
fix prevents adding extended programs to the prog_array map and blocks the
extension of a tail callee program with freplace.

Key changes:

1. If a program or its subprogram has been extended by an freplace program,
it can no longer be updated to a prog_array map.
2. If a program has been added to a prog_array map, neither it nor its
subprograms can be extended by an freplace program.

Additionally, an extension program should not be tailcalled. As a result,
return -EINVAL if the program has a type of BPF_PROG_TYPE_EXT when adding
it to a prog_array map.

Changes:
v7 -> v8:
* Address comment from Alexei:
* guard(mutex) should not hold range all the way through
bpf_arch_text_poke().
* Address suggestion from Xu Kuohai:
* Extension prog should not be tailcalled independently.

v6 -> v7:
* Address comments from Alexei:
* Rewrite commit message more imperative and consice with AI.
* Extend bpf_trampoline_link_prog() and bpf_trampoline_unlink_prog()
to link and unlink target prog for freplace prog.
* Use guard(mutex)(&tgt_prog->aux->ext_mutex) instead of
mutex_lock()&mutex_unlock() pair.
* Address comment from Eduard:
* Remove misplaced "Reported-by" and "Closes" tags.

v5 -> v6:
* Fix a build warning reported by kernel test robot.

v4 -> v5:
* Move code of linking/unlinking target prog of freplace to trampoline.c.
* Address comments from Alexei:
* Change type of prog_array_member_cnt to u64.
* Combine two patches to one.

v3 -> v4:
* Address comments from Eduard:
* Rename 'tail_callee_cnt' to 'prog_array_member_cnt'.
* Add comment to 'prog_array_member_cnt'.
* Use a mutex to protect 'is_extended' and 'prog_array_member_cnt'.

v2 -> v3:
* Address comments from Alexei:
* Stop hacking JIT.
* Prevent the specific use case at attach/update time.

v1 -> v2:
* Address comment from Eduard:
* Explain why nop5 and xor/nop3 are swapped at prologue.
* Address comment from Alexei:
* Disallow attaching tail_call_reachable freplace prog to
not-tail_call_reachable target in verifier.
* Update "bpf, arm64: Fix tailcall infinite loop caused by freplace" with
latest arm64 JIT code.

Links:
[0] https://lore.kernel.org/bpf/20230912150442.2009-1-hffilwlqm@gmail.com/
====================

Link: https://lore.kernel.org/r/20241015150207.70264-1-leon.hwang@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+192 -35
+13 -4
include/linux/bpf.h
··· 1292 1292 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); 1293 1293 1294 1294 #ifdef CONFIG_BPF_JIT 1295 - int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); 1296 - int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); 1295 + int bpf_trampoline_link_prog(struct bpf_tramp_link *link, 1296 + struct bpf_trampoline *tr, 1297 + struct bpf_prog *tgt_prog); 1298 + int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 1299 + struct bpf_trampoline *tr, 1300 + struct bpf_prog *tgt_prog); 1297 1301 struct bpf_trampoline *bpf_trampoline_get(u64 key, 1298 1302 struct bpf_attach_target_info *tgt_info); 1299 1303 void bpf_trampoline_put(struct bpf_trampoline *tr); ··· 1378 1374 bool bpf_prog_has_trampoline(const struct bpf_prog *prog); 1379 1375 #else 1380 1376 static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, 1381 - struct bpf_trampoline *tr) 1377 + struct bpf_trampoline *tr, 1378 + struct bpf_prog *tgt_prog) 1382 1379 { 1383 1380 return -ENOTSUPP; 1384 1381 } 1385 1382 static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 1386 - struct bpf_trampoline *tr) 1383 + struct bpf_trampoline *tr, 1384 + struct bpf_prog *tgt_prog) 1387 1385 { 1388 1386 return -ENOTSUPP; 1389 1387 } ··· 1489 1483 bool xdp_has_frags; 1490 1484 bool exception_cb; 1491 1485 bool exception_boundary; 1486 + bool is_extended; /* true if extended by freplace program */ 1487 + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ 1488 + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ 1492 1489 struct bpf_arena *arena; 1493 1490 /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ 1494 1491 const struct btf_type *attach_func_proto;
+24 -2
kernel/bpf/arraymap.c
··· 947 947 struct file *map_file, int fd) 948 948 { 949 949 struct bpf_prog *prog = bpf_prog_get(fd); 950 + bool is_extended; 950 951 951 952 if (IS_ERR(prog)) 952 953 return prog; 953 954 954 - if (!bpf_prog_map_compatible(map, prog)) { 955 + if (prog->type == BPF_PROG_TYPE_EXT || 956 + !bpf_prog_map_compatible(map, prog)) { 955 957 bpf_prog_put(prog); 956 958 return ERR_PTR(-EINVAL); 959 + } 960 + 961 + mutex_lock(&prog->aux->ext_mutex); 962 + is_extended = prog->aux->is_extended; 963 + if (!is_extended) 964 + prog->aux->prog_array_member_cnt++; 965 + mutex_unlock(&prog->aux->ext_mutex); 966 + if (is_extended) { 967 + /* Extended prog can not be tail callee. It's to prevent a 968 + * potential infinite loop like: 969 + * tail callee prog entry -> tail callee prog subprog -> 970 + * freplace prog entry --tailcall-> tail callee prog entry. 971 + */ 972 + bpf_prog_put(prog); 973 + return ERR_PTR(-EBUSY); 957 974 } 958 975 959 976 return prog; ··· 978 961 979 962 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) 980 963 { 964 + struct bpf_prog *prog = ptr; 965 + 966 + mutex_lock(&prog->aux->ext_mutex); 967 + prog->aux->prog_array_member_cnt--; 968 + mutex_unlock(&prog->aux->ext_mutex); 981 969 /* bpf_prog is freed after one RCU or tasks trace grace period */ 982 - bpf_prog_put(ptr); 970 + bpf_prog_put(prog); 983 971 } 984 972 985 973 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+1
kernel/bpf/core.c
··· 131 131 INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode); 132 132 #endif 133 133 mutex_init(&fp->aux->used_maps_mutex); 134 + mutex_init(&fp->aux->ext_mutex); 134 135 mutex_init(&fp->aux->dst_mutex); 135 136 136 137 return fp;
+4 -3
kernel/bpf/syscall.c
··· 3214 3214 container_of(link, struct bpf_tracing_link, link.link); 3215 3215 3216 3216 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, 3217 - tr_link->trampoline)); 3217 + tr_link->trampoline, 3218 + tr_link->tgt_prog)); 3218 3219 3219 3220 bpf_trampoline_put(tr_link->trampoline); 3220 3221 ··· 3355 3354 * in prog->aux 3356 3355 * 3357 3356 * - if prog->aux->dst_trampoline is NULL, the program has already been 3358 - * attached to a target and its initial target was cleared (below) 3357 + * attached to a target and its initial target was cleared (below) 3359 3358 * 3360 3359 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 3361 3360 * target_btf_id using the link_create API. ··· 3430 3429 if (err) 3431 3430 goto out_unlock; 3432 3431 3433 - err = bpf_trampoline_link_prog(&link->link, tr); 3432 + err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); 3434 3433 if (err) { 3435 3434 bpf_link_cleanup(&link_primer); 3436 3435 link = NULL;
+39 -8
kernel/bpf/trampoline.c
··· 523 523 } 524 524 } 525 525 526 - static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) 526 + static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) 527 + { 528 + struct bpf_prog_aux *aux = tgt_prog->aux; 529 + 530 + guard(mutex)(&aux->ext_mutex); 531 + if (aux->prog_array_member_cnt) 532 + /* Program extensions can not extend target prog when the target 533 + * prog has been updated to any prog_array map as tail callee. 534 + * It's to prevent a potential infinite loop like: 535 + * tgt prog entry -> tgt prog subprog -> freplace prog entry 536 + * --tailcall-> tgt prog entry. 537 + */ 538 + return -EBUSY; 539 + 540 + aux->is_extended = true; 541 + return 0; 542 + } 543 + 544 + static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, 545 + struct bpf_trampoline *tr, 546 + struct bpf_prog *tgt_prog) 527 547 { 528 548 enum bpf_tramp_prog_type kind; 529 549 struct bpf_tramp_link *link_exiting; ··· 564 544 /* Cannot attach extension if fentry/fexit are in use. */ 565 545 if (cnt) 566 546 return -EBUSY; 547 + err = bpf_freplace_check_tgt_prog(tgt_prog); 548 + if (err) 549 + return err; 567 550 tr->extension_prog = link->link.prog; 568 551 return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, 569 552 link->link.prog->bpf_func); ··· 593 570 return err; 594 571 } 595 572 596 - int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) 573 + int bpf_trampoline_link_prog(struct bpf_tramp_link *link, 574 + struct bpf_trampoline *tr, 575 + struct bpf_prog *tgt_prog) 597 576 { 598 577 int err; 599 578 600 579 mutex_lock(&tr->mutex); 601 - err = __bpf_trampoline_link_prog(link, tr); 580 + err = __bpf_trampoline_link_prog(link, tr, tgt_prog); 602 581 mutex_unlock(&tr->mutex); 603 582 return err; 604 583 } 605 584 606 - static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) 585 + static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 586 + struct bpf_trampoline *tr, 587 + struct bpf_prog *tgt_prog) 607 588 { 608 589 enum bpf_tramp_prog_type kind; 609 590 int err; ··· 618 591 err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, 619 592 tr->extension_prog->bpf_func, NULL); 620 593 tr->extension_prog = NULL; 594 + guard(mutex)(&tgt_prog->aux->ext_mutex); 595 + tgt_prog->aux->is_extended = false; 621 596 return err; 622 597 } 623 598 hlist_del_init(&link->tramp_hlist); ··· 628 599 } 629 600 630 601 /* bpf_trampoline_unlink_prog() should never fail. */ 631 - int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) 602 + int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, 603 + struct bpf_trampoline *tr, 604 + struct bpf_prog *tgt_prog) 632 605 { 633 606 int err; 634 607 635 608 mutex_lock(&tr->mutex); 636 - err = __bpf_trampoline_unlink_prog(link, tr); 609 + err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog); 637 610 mutex_unlock(&tr->mutex); 638 611 return err; 639 612 } ··· 650 619 if (!shim_link->trampoline) 651 620 return; 652 621 653 - WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); 622 + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); 654 623 bpf_trampoline_put(shim_link->trampoline); 655 624 } 656 625 ··· 764 733 goto err; 765 734 } 766 735 767 - err = __bpf_trampoline_link_prog(&shim_link->link, tr); 736 + err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL); 768 737 if (err) 769 738 goto err; 770 739
+108 -16
tools/testing/selftests/bpf/prog_tests/tailcalls.c
··· 1496 1496 RUN_TESTS(tailcall_bpf2bpf_hierarchy3); 1497 1497 } 1498 1498 1499 - /* test_tailcall_freplace checks that the attached freplace prog is OK to 1500 - * update the prog_array map. 1499 + /* test_tailcall_freplace checks that the freplace prog fails to update the 1500 + * prog_array map, no matter whether the freplace prog attaches to its target. 1501 1501 */ 1502 1502 static void test_tailcall_freplace(void) 1503 1503 { ··· 1505 1505 struct bpf_link *freplace_link = NULL; 1506 1506 struct bpf_program *freplace_prog; 1507 1507 struct tc_bpf2bpf *tc_skel = NULL; 1508 - int prog_fd, map_fd; 1508 + int prog_fd, tc_prog_fd, map_fd; 1509 1509 char buff[128] = {}; 1510 1510 int err, key; 1511 1511 ··· 1523 1523 if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) 1524 1524 goto out; 1525 1525 1526 - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); 1526 + tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); 1527 1527 freplace_prog = freplace_skel->progs.entry_freplace; 1528 - err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); 1528 + err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd, 1529 + "subprog_tc"); 1529 1530 if (!ASSERT_OK(err, "set_attach_target")) 1530 1531 goto out; 1531 1532 ··· 1534 1533 if (!ASSERT_OK(err, "tailcall_freplace__load")) 1535 1534 goto out; 1536 1535 1537 - freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, 1538 - "subprog"); 1539 - if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) 1540 - goto out; 1541 - 1542 1536 map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); 1543 1537 prog_fd = bpf_program__fd(freplace_prog); 1544 1538 key = 0; 1545 1539 err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); 1546 - if (!ASSERT_OK(err, "update jmp_table")) 1540 + ASSERT_ERR(err, "update jmp_table failure"); 1541 + 1542 + freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd, 1543 + "subprog_tc"); 1544 + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) 1547 1545 goto out; 1548 1546 1549 - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); 1550 - err = bpf_prog_test_run_opts(prog_fd, &topts); 1551 - ASSERT_OK(err, "test_run"); 1552 - ASSERT_EQ(topts.retval, 34, "test_run retval"); 1547 + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); 1548 + ASSERT_ERR(err, "update jmp_table failure"); 1553 1549 1554 1550 out: 1555 1551 bpf_link__destroy(freplace_link); 1556 - tc_bpf2bpf__destroy(tc_skel); 1557 1552 tailcall_freplace__destroy(freplace_skel); 1553 + tc_bpf2bpf__destroy(tc_skel); 1554 + } 1555 + 1556 + /* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail 1557 + * callee prog with freplace prog or fails to update an extended prog to 1558 + * prog_array map. 1559 + */ 1560 + static void test_tailcall_bpf2bpf_freplace(void) 1561 + { 1562 + struct tailcall_freplace *freplace_skel = NULL; 1563 + struct bpf_link *freplace_link = NULL; 1564 + struct tc_bpf2bpf *tc_skel = NULL; 1565 + char buff[128] = {}; 1566 + int prog_fd, map_fd; 1567 + int err, key; 1568 + 1569 + LIBBPF_OPTS(bpf_test_run_opts, topts, 1570 + .data_in = buff, 1571 + .data_size_in = sizeof(buff), 1572 + .repeat = 1, 1573 + ); 1574 + 1575 + tc_skel = tc_bpf2bpf__open_and_load(); 1576 + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) 1577 + goto out; 1578 + 1579 + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); 1580 + freplace_skel = tailcall_freplace__open(); 1581 + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) 1582 + goto out; 1583 + 1584 + err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace, 1585 + prog_fd, "subprog_tc"); 1586 + if (!ASSERT_OK(err, "set_attach_target")) 1587 + goto out; 1588 + 1589 + err = tailcall_freplace__load(freplace_skel); 1590 + if (!ASSERT_OK(err, "tailcall_freplace__load")) 1591 + goto out; 1592 + 1593 + /* OK to attach then detach freplace prog. */ 1594 + 1595 + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, 1596 + prog_fd, "subprog_tc"); 1597 + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) 1598 + goto out; 1599 + 1600 + err = bpf_link__destroy(freplace_link); 1601 + if (!ASSERT_OK(err, "destroy link")) 1602 + goto out; 1603 + 1604 + /* OK to update prog_array map then delete element from the map. */ 1605 + 1606 + key = 0; 1607 + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); 1608 + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); 1609 + if (!ASSERT_OK(err, "update jmp_table")) 1610 + goto out; 1611 + 1612 + err = bpf_map_delete_elem(map_fd, &key); 1613 + if (!ASSERT_OK(err, "delete_elem from jmp_table")) 1614 + goto out; 1615 + 1616 + /* Fail to attach a tail callee prog with freplace prog. */ 1617 + 1618 + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); 1619 + if (!ASSERT_OK(err, "update jmp_table")) 1620 + goto out; 1621 + 1622 + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, 1623 + prog_fd, "subprog_tc"); 1624 + if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure")) 1625 + goto out; 1626 + 1627 + err = bpf_map_delete_elem(map_fd, &key); 1628 + if (!ASSERT_OK(err, "delete_elem from jmp_table")) 1629 + goto out; 1630 + 1631 + /* Fail to update an extended prog to prog_array map. */ 1632 + 1633 + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, 1634 + prog_fd, "subprog_tc"); 1635 + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) 1636 + goto out; 1637 + 1638 + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); 1639 + if (!ASSERT_ERR(err, "update jmp_table failure")) 1640 + goto out; 1641 + 1642 + out: 1643 + bpf_link__destroy(freplace_link); 1644 + tailcall_freplace__destroy(freplace_skel); 1645 + tc_bpf2bpf__destroy(tc_skel); 1558 1646 } 1559 1647 1560 1648 void test_tailcalls(void) ··· 1696 1606 test_tailcall_bpf2bpf_hierarchy_3(); 1697 1607 if (test__start_subtest("tailcall_freplace")) 1698 1608 test_tailcall_freplace(); 1609 + if (test__start_subtest("tailcall_bpf2bpf_freplace")) 1610 + test_tailcall_bpf2bpf_freplace(); 1699 1611 }
+3 -2
tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
··· 5 5 #include "bpf_misc.h" 6 6 7 7 __noinline 8 - int subprog(struct __sk_buff *skb) 8 + int subprog_tc(struct __sk_buff *skb) 9 9 { 10 10 int ret = 1; 11 11 12 + __sink(skb); 12 13 __sink(ret); 13 14 return ret; 14 15 } ··· 17 16 SEC("tc") 18 17 int entry_tc(struct __sk_buff *skb) 19 18 { 20 - return subprog(skb); 19 + return subprog_tc(skb); 21 20 } 22 21 23 22 char __license[] SEC("license") = "GPL";