Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-fix-tailcall-hierarchy'

Leon Hwang says:

====================
bpf: Fix tailcall hierarchy

This patchset fixes a tailcall hierarchy issue.

The issue is confirmed in the discussions of
"bpf, x64: Fix tailcall infinite loop" [0].

The issue has been resolved on both x86_64 and arm64 [1].

I provide a long commit message in the "bpf, x64: Fix tailcall hierarchy"
patch to describe how the issue happens and how this patchset resolves the
issue in details.

How does this patchset resolve the issue?

In short, it stores tail_call_cnt on the stack of main prog, and propagates
tail_call_cnt_ptr to its subprogs.

First, at the prologue of main prog, it initializes tail_call_cnt and
prepares tail_call_cnt_ptr. And at the prologue of subprog, it reuses
the tail_call_cnt_ptr from caller.

Then, when a tailcall happens, it increments tail_call_cnt by its pointer.

v5 -> v6:
* Address comments from Eduard:
* Add JITed dumping along annotating comments
* Rewrite two selftests with RUN_TESTS macro.

v4 -> v5:
* Solution changes from tailcall run ctx to tail_call_cnt and its pointer.
It's because v4 solution is unable to handle the case that there is no
tailcall in subprog but there is tailcall in EXT prog which attaches to
the subprog.

v3 -> v4:
* Solution changes from per-task tail_call_cnt to tailcall run ctx.
As for per-cpu/per-task solution, there is a case it is unable to handle [2].

v2 -> v3:
* Solution changes from percpu tail_call_cnt to tail_call_cnt at task_struct.

v1 -> v2:
* Solution changes from extra run-time call insn to percpu tail_call_cnt.
* Address comments from Alexei:
* Use percpu tail_call_cnt.
* Use asm to make sure no callee saved registers are touched.

RFC v2 -> v1:
* Solution changes from propagating tail_call_cnt with its pointer to extra
run-time call insn.
* Address comments from Maciej:
* Replace all memcpy(prog, x86_nops[5], X86_PATCH_SIZE) with
emit_nops(&prog, X86_PATCH_SIZE)

RFC v1 -> RFC v2:
* Address comments from Stanislav:
* Separate moving emit_nops() as first patch.

Links:
[0] https://lore.kernel.org/bpf/6203dd01-789d-f02c-5293-def4c1b18aef@gmail.com/
[1] https://github.com/kernel-patches/bpf/pull/7350/checks
[2] https://lore.kernel.org/bpf/CAADnVQK1qF+uBjwom2s2W-yEmgd_3rGi5Nr+KiV3cW0T+UPPfA@mail.gmail.com/
====================

Link: https://lore.kernel.org/r/20240714123902.32305-1-hffilwlqm@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

authored by

Alexei Starovoitov and committed by
Andrii Nakryiko
81a0b954 bde0c5a7

+653 -44
+41 -16
arch/arm64/net/bpf_jit_comp.c
··· 26 26 27 27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 28 28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 29 - #define TCALL_CNT (MAX_BPF_JIT_REG + 2) 29 + #define TCCNT_PTR (MAX_BPF_JIT_REG + 2) 30 30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 31 31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) 32 32 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) ··· 63 63 [TMP_REG_1] = A64_R(10), 64 64 [TMP_REG_2] = A64_R(11), 65 65 [TMP_REG_3] = A64_R(12), 66 - /* tail_call_cnt */ 67 - [TCALL_CNT] = A64_R(26), 66 + /* tail_call_cnt_ptr */ 67 + [TCCNT_PTR] = A64_R(26), 68 68 /* temporary register for blinding constants */ 69 69 [BPF_REG_AX] = A64_R(9), 70 70 [FP_BOTTOM] = A64_R(27), ··· 282 282 * mov x29, sp 283 283 * stp x19, x20, [sp, #-16]! 284 284 * stp x21, x22, [sp, #-16]! 285 - * stp x25, x26, [sp, #-16]! 285 + * stp x26, x25, [sp, #-16]! 286 + * stp x26, x25, [sp, #-16]! 286 287 * stp x27, x28, [sp, #-16]! 287 288 * mov x25, sp 288 289 * mov tcc, #0 289 290 * // PROLOGUE_OFFSET 290 291 */ 292 + 293 + static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx) 294 + { 295 + const struct bpf_prog *prog = ctx->prog; 296 + const bool is_main_prog = !bpf_is_subprog(prog); 297 + const u8 ptr = bpf2a64[TCCNT_PTR]; 298 + const u8 fp = bpf2a64[BPF_REG_FP]; 299 + const u8 tcc = ptr; 300 + 301 + emit(A64_PUSH(ptr, fp, A64_SP), ctx); 302 + if (is_main_prog) { 303 + /* Initialize tail_call_cnt. */ 304 + emit(A64_MOVZ(1, tcc, 0, 0), ctx); 305 + emit(A64_PUSH(tcc, fp, A64_SP), ctx); 306 + emit(A64_MOV(1, ptr, A64_SP), ctx); 307 + } else { 308 + emit(A64_PUSH(ptr, fp, A64_SP), ctx); 309 + emit(A64_NOP, ctx); 310 + emit(A64_NOP, ctx); 311 + } 312 + } 291 313 292 314 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 293 315 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) ··· 318 296 #define POKE_OFFSET (BTI_INSNS + 1) 319 297 320 298 /* Tail call offset to jump into */ 321 - #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) 299 + #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 10) 322 300 323 301 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, 324 302 bool is_exception_cb, u64 arena_vm_start) ··· 330 308 const u8 r8 = bpf2a64[BPF_REG_8]; 331 309 const u8 r9 = bpf2a64[BPF_REG_9]; 332 310 const u8 fp = bpf2a64[BPF_REG_FP]; 333 - const u8 tcc = bpf2a64[TCALL_CNT]; 334 311 const u8 fpb = bpf2a64[FP_BOTTOM]; 335 312 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 336 313 const int idx0 = ctx->idx; ··· 380 359 /* Save callee-saved registers */ 381 360 emit(A64_PUSH(r6, r7, A64_SP), ctx); 382 361 emit(A64_PUSH(r8, r9, A64_SP), ctx); 383 - emit(A64_PUSH(fp, tcc, A64_SP), ctx); 362 + prepare_bpf_tail_call_cnt(ctx); 384 363 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); 385 364 } else { 386 365 /* ··· 393 372 * callee-saved registers. The exception callback will not push 394 373 * anything and re-use the main program's stack. 395 374 * 396 - * 10 registers are on the stack 375 + * 12 registers are on the stack 397 376 */ 398 - emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); 377 + emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx); 399 378 } 400 379 401 380 /* Set up BPF prog stack base register */ 402 381 emit(A64_MOV(1, fp, A64_SP), ctx); 403 382 404 383 if (!ebpf_from_cbpf && is_main_prog) { 405 - /* Initialize tail_call_cnt */ 406 - emit(A64_MOVZ(1, tcc, 0, 0), ctx); 407 - 408 384 cur_offset = ctx->idx - idx0; 409 385 if (cur_offset != PROLOGUE_OFFSET) { 410 386 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", ··· 450 432 451 433 const u8 tmp = bpf2a64[TMP_REG_1]; 452 434 const u8 prg = bpf2a64[TMP_REG_2]; 453 - const u8 tcc = bpf2a64[TCALL_CNT]; 435 + const u8 tcc = bpf2a64[TMP_REG_3]; 436 + const u8 ptr = bpf2a64[TCCNT_PTR]; 454 437 const int idx0 = ctx->idx; 455 438 #define cur_offset (ctx->idx - idx0) 456 439 #define jmp_offset (out_offset - (cur_offset)) ··· 468 449 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 469 450 470 451 /* 471 - * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) 452 + * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT) 472 453 * goto out; 473 - * tail_call_cnt++; 454 + * (*tail_call_cnt_ptr)++; 474 455 */ 475 456 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 457 + emit(A64_LDR64I(tcc, ptr, 0), ctx); 476 458 emit(A64_CMP(1, tcc, tmp), ctx); 477 459 emit(A64_B_(A64_COND_CS, jmp_offset), ctx); 478 460 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); ··· 488 468 emit(A64_LSL(1, prg, r3, 3), ctx); 489 469 emit(A64_LDR64(prg, tmp, prg), ctx); 490 470 emit(A64_CBZ(1, prg, jmp_offset), ctx); 471 + 472 + /* Update tail_call_cnt if the slot is populated. */ 473 + emit(A64_STR64I(tcc, ptr, 0), ctx); 491 474 492 475 /* goto *(prog->bpf_func + prologue_offset); */ 493 476 off = offsetof(struct bpf_prog, bpf_func); ··· 744 721 const u8 r8 = bpf2a64[BPF_REG_8]; 745 722 const u8 r9 = bpf2a64[BPF_REG_9]; 746 723 const u8 fp = bpf2a64[BPF_REG_FP]; 724 + const u8 ptr = bpf2a64[TCCNT_PTR]; 747 725 const u8 fpb = bpf2a64[FP_BOTTOM]; 748 726 749 727 /* We're done with BPF stack */ ··· 762 738 /* Restore x27 and x28 */ 763 739 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); 764 740 /* Restore fs (x25) and x26 */ 765 - emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 741 + emit(A64_POP(ptr, fp, A64_SP), ctx); 742 + emit(A64_POP(ptr, fp, A64_SP), ctx); 766 743 767 744 /* Restore callee-saved register */ 768 745 emit(A64_POP(r8, r9, A64_SP), ctx);
+79 -28
arch/x86/net/bpf_jit_comp.c
··· 273 273 /* Number of bytes emit_patch() needs to generate instructions */ 274 274 #define X86_PATCH_SIZE 5 275 275 /* Number of bytes that will be skipped on tailcall */ 276 - #define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) 276 + #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) 277 277 278 278 static void push_r12(u8 **pprog) 279 279 { ··· 403 403 *pprog = prog; 404 404 } 405 405 406 + static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) 407 + { 408 + u8 *prog = *pprog; 409 + 410 + if (!is_subprog) { 411 + /* cmp rax, MAX_TAIL_CALL_CNT */ 412 + EMIT4(0x48, 0x83, 0xF8, MAX_TAIL_CALL_CNT); 413 + EMIT2(X86_JA, 6); /* ja 6 */ 414 + /* rax is tail_call_cnt if <= MAX_TAIL_CALL_CNT. 415 + * case1: entry of main prog. 416 + * case2: tail callee of main prog. 417 + */ 418 + EMIT1(0x50); /* push rax */ 419 + /* Make rax as tail_call_cnt_ptr. */ 420 + EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */ 421 + EMIT2(0xEB, 1); /* jmp 1 */ 422 + /* rax is tail_call_cnt_ptr if > MAX_TAIL_CALL_CNT. 423 + * case: tail callee of subprog. 424 + */ 425 + EMIT1(0x50); /* push rax */ 426 + /* push tail_call_cnt_ptr */ 427 + EMIT1(0x50); /* push rax */ 428 + } else { /* is_subprog */ 429 + /* rax is tail_call_cnt_ptr. */ 430 + EMIT1(0x50); /* push rax */ 431 + EMIT1(0x50); /* push rax */ 432 + } 433 + 434 + *pprog = prog; 435 + } 436 + 406 437 /* 407 438 * Emit x86-64 prologue code for BPF program. 408 439 * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes ··· 455 424 /* When it's the entry of the whole tailcall context, 456 425 * zeroing rax means initialising tail_call_cnt. 457 426 */ 458 - EMIT2(0x31, 0xC0); /* xor eax, eax */ 427 + EMIT3(0x48, 0x31, 0xC0); /* xor rax, rax */ 459 428 else 460 429 /* Keep the same instruction layout. */ 461 - EMIT2(0x66, 0x90); /* nop2 */ 430 + emit_nops(&prog, 3); /* nop3 */ 462 431 } 463 432 /* Exception callback receives FP as third parameter */ 464 433 if (is_exception_cb) { ··· 484 453 if (stack_depth) 485 454 EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 486 455 if (tail_call_reachable) 487 - EMIT1(0x50); /* push rax */ 456 + emit_prologue_tail_call(&prog, is_subprog); 488 457 *pprog = prog; 489 458 } 490 459 ··· 620 589 *pprog = prog; 621 590 } 622 591 592 + #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (-16 - round_up(stack, 8)) 593 + 623 594 /* 624 595 * Generate the following code: 625 596 * 626 597 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 627 598 * if (index >= array->map.max_entries) 628 599 * goto out; 629 - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 600 + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) 630 601 * goto out; 631 602 * prog = array->ptrs[index]; 632 603 * if (prog == NULL) ··· 641 608 u32 stack_depth, u8 *ip, 642 609 struct jit_context *ctx) 643 610 { 644 - int tcc_off = -4 - round_up(stack_depth, 8); 611 + int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); 645 612 u8 *prog = *pprog, *start = *pprog; 646 613 int offset; 647 614 ··· 663 630 EMIT2(X86_JBE, offset); /* jbe out */ 664 631 665 632 /* 666 - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 633 + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) 667 634 * goto out; 668 635 */ 669 - EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ 670 - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 636 + EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ 637 + EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ 671 638 672 639 offset = ctx->tail_call_indirect_label - (prog + 2 - start); 673 640 EMIT2(X86_JAE, offset); /* jae out */ 674 - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 675 - EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ 676 641 677 642 /* prog = array->ptrs[index]; */ 678 643 EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */ ··· 685 654 offset = ctx->tail_call_indirect_label - (prog + 2 - start); 686 655 EMIT2(X86_JE, offset); /* je out */ 687 656 657 + /* Inc tail_call_cnt if the slot is populated. */ 658 + EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ 659 + 688 660 if (bpf_prog->aux->exception_boundary) { 689 661 pop_callee_regs(&prog, all_callee_regs_used); 690 662 pop_r12(&prog); ··· 697 663 pop_r12(&prog); 698 664 } 699 665 666 + /* Pop tail_call_cnt_ptr. */ 667 + EMIT1(0x58); /* pop rax */ 668 + /* Pop tail_call_cnt, if it's main prog. 669 + * Pop tail_call_cnt_ptr, if it's subprog. 670 + */ 700 671 EMIT1(0x58); /* pop rax */ 701 672 if (stack_depth) 702 673 EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ ··· 730 691 bool *callee_regs_used, u32 stack_depth, 731 692 struct jit_context *ctx) 732 693 { 733 - int tcc_off = -4 - round_up(stack_depth, 8); 694 + int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); 734 695 u8 *prog = *pprog, *start = *pprog; 735 696 int offset; 736 697 737 698 /* 738 - * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 699 + * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) 739 700 * goto out; 740 701 */ 741 - EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ 742 - EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 702 + EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ 703 + EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ 743 704 744 705 offset = ctx->tail_call_direct_label - (prog + 2 - start); 745 706 EMIT2(X86_JAE, offset); /* jae out */ 746 - EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 747 - EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ 748 707 749 708 poke->tailcall_bypass = ip + (prog - start); 750 709 poke->adj_off = X86_TAIL_CALL_OFFSET; ··· 751 714 752 715 emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, 753 716 poke->tailcall_bypass); 717 + 718 + /* Inc tail_call_cnt if the slot is populated. */ 719 + EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ 754 720 755 721 if (bpf_prog->aux->exception_boundary) { 756 722 pop_callee_regs(&prog, all_callee_regs_used); ··· 764 724 pop_r12(&prog); 765 725 } 766 726 727 + /* Pop tail_call_cnt_ptr. */ 728 + EMIT1(0x58); /* pop rax */ 729 + /* Pop tail_call_cnt, if it's main prog. 730 + * Pop tail_call_cnt_ptr, if it's subprog. 731 + */ 767 732 EMIT1(0x58); /* pop rax */ 768 733 if (stack_depth) 769 734 EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); ··· 1356 1311 1357 1312 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) 1358 1313 1359 - /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ 1360 - #define RESTORE_TAIL_CALL_CNT(stack) \ 1361 - EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) 1314 + #define __LOAD_TCC_PTR(off) \ 1315 + EMIT3_off32(0x48, 0x8B, 0x85, off) 1316 + /* mov rax, qword ptr [rbp - rounded_stack_depth - 16] */ 1317 + #define LOAD_TAIL_CALL_CNT_PTR(stack) \ 1318 + __LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)) 1362 1319 1363 1320 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, 1364 1321 int oldproglen, struct jit_context *ctx, bool jmp_padding) ··· 2078 2031 2079 2032 func = (u8 *) __bpf_call_base + imm32; 2080 2033 if (tail_call_reachable) { 2081 - RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); 2034 + LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); 2082 2035 ip += 7; 2083 2036 } 2084 2037 if (!imm32) ··· 2753 2706 return 0; 2754 2707 } 2755 2708 2709 + /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ 2710 + #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \ 2711 + __LOAD_TCC_PTR(-round_up(stack, 8) - 8) 2712 + 2756 2713 /* Example: 2757 2714 * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); 2758 2715 * its 'struct btf_func_model' will be nr_args=2 ··· 2877 2826 * [ ... ] 2878 2827 * [ stack_arg2 ] 2879 2828 * RBP - arg_stack_off [ stack_arg1 ] 2880 - * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX 2829 + * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX 2881 2830 */ 2882 2831 2883 2832 /* room for return value of orig_call or fentry prog */ ··· 3006 2955 save_args(m, &prog, arg_stack_off, true); 3007 2956 3008 2957 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { 3009 - /* Before calling the original function, restore the 3010 - * tail_call_cnt from stack to rax. 2958 + /* Before calling the original function, load the 2959 + * tail_call_cnt_ptr from stack to rax. 3011 2960 */ 3012 - RESTORE_TAIL_CALL_CNT(stack_size); 2961 + LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); 3013 2962 } 3014 2963 3015 2964 if (flags & BPF_TRAMP_F_ORIG_STACK) { ··· 3068 3017 goto cleanup; 3069 3018 } 3070 3019 } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { 3071 - /* Before running the original function, restore the 3072 - * tail_call_cnt from stack to rax. 3020 + /* Before running the original function, load the 3021 + * tail_call_cnt_ptr from stack to rax. 3073 3022 */ 3074 - RESTORE_TAIL_CALL_CNT(stack_size); 3023 + LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); 3075 3024 } 3076 3025 3077 3026 /* restore return value of orig_call or fentry prog back into RAX */
+320
tools/testing/selftests/bpf/prog_tests/tailcalls.c
··· 3 3 #include <test_progs.h> 4 4 #include <network_helpers.h> 5 5 #include "tailcall_poke.skel.h" 6 + #include "tailcall_bpf2bpf_hierarchy2.skel.h" 7 + #include "tailcall_bpf2bpf_hierarchy3.skel.h" 6 8 7 9 8 10 /* test_tailcall_1 checks basic functionality by patching multiple locations ··· 1189 1187 tailcall_poke__destroy(call); 1190 1188 } 1191 1189 1190 + static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, 1191 + bool test_fexit, 1192 + bool test_fentry_entry) 1193 + { 1194 + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; 1195 + struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; 1196 + struct bpf_link *fentry_link = NULL, *fexit_link = NULL; 1197 + struct bpf_program *prog, *fentry_prog; 1198 + struct bpf_map *prog_array, *data_map; 1199 + int fentry_prog_fd; 1200 + char buff[128] = {}; 1201 + 1202 + LIBBPF_OPTS(bpf_test_run_opts, topts, 1203 + .data_in = buff, 1204 + .data_size_in = sizeof(buff), 1205 + .repeat = 1, 1206 + ); 1207 + 1208 + err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, 1209 + &prog_fd); 1210 + if (!ASSERT_OK(err, "load obj")) 1211 + return; 1212 + 1213 + prog = bpf_object__find_program_by_name(obj, "entry"); 1214 + if (!ASSERT_OK_PTR(prog, "find entry prog")) 1215 + goto out; 1216 + 1217 + prog_fd = bpf_program__fd(prog); 1218 + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) 1219 + goto out; 1220 + 1221 + if (test_fentry_entry) { 1222 + fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_hierarchy_fentry.bpf.o", 1223 + NULL); 1224 + if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file")) 1225 + goto out; 1226 + 1227 + fentry_prog = bpf_object__find_program_by_name(fentry_obj, 1228 + "fentry"); 1229 + if (!ASSERT_OK_PTR(prog, "find fentry prog")) 1230 + goto out; 1231 + 1232 + err = bpf_program__set_attach_target(fentry_prog, prog_fd, 1233 + "entry"); 1234 + if (!ASSERT_OK(err, "set_attach_target entry")) 1235 + goto out; 1236 + 1237 + err = bpf_object__load(fentry_obj); 1238 + if (!ASSERT_OK(err, "load fentry_obj")) 1239 + goto out; 1240 + 1241 + fentry_link = bpf_program__attach_trace(fentry_prog); 1242 + if (!ASSERT_OK_PTR(fentry_link, "attach_trace")) 1243 + goto out; 1244 + 1245 + fentry_prog_fd = bpf_program__fd(fentry_prog); 1246 + if (!ASSERT_GE(fentry_prog_fd, 0, "fentry_prog_fd")) 1247 + goto out; 1248 + 1249 + prog_array = bpf_object__find_map_by_name(fentry_obj, "jmp_table"); 1250 + if (!ASSERT_OK_PTR(prog_array, "find jmp_table")) 1251 + goto out; 1252 + 1253 + map_fd = bpf_map__fd(prog_array); 1254 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 1255 + goto out; 1256 + 1257 + i = 0; 1258 + err = bpf_map_update_elem(map_fd, &i, &fentry_prog_fd, BPF_ANY); 1259 + if (!ASSERT_OK(err, "update jmp_table")) 1260 + goto out; 1261 + 1262 + data_map = bpf_object__find_map_by_name(fentry_obj, ".bss"); 1263 + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), 1264 + "find data_map")) 1265 + goto out; 1266 + 1267 + } else { 1268 + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); 1269 + if (!ASSERT_OK_PTR(prog_array, "find jmp_table")) 1270 + goto out; 1271 + 1272 + map_fd = bpf_map__fd(prog_array); 1273 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 1274 + goto out; 1275 + 1276 + i = 0; 1277 + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); 1278 + if (!ASSERT_OK(err, "update jmp_table")) 1279 + goto out; 1280 + 1281 + data_map = bpf_object__find_map_by_name(obj, ".bss"); 1282 + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), 1283 + "find data_map")) 1284 + goto out; 1285 + } 1286 + 1287 + if (test_fentry) { 1288 + fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o", 1289 + NULL); 1290 + if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file")) 1291 + goto out; 1292 + 1293 + prog = bpf_object__find_program_by_name(fentry_obj, "fentry"); 1294 + if (!ASSERT_OK_PTR(prog, "find fentry prog")) 1295 + goto out; 1296 + 1297 + err = bpf_program__set_attach_target(prog, prog_fd, 1298 + "subprog_tail"); 1299 + if (!ASSERT_OK(err, "set_attach_target subprog_tail")) 1300 + goto out; 1301 + 1302 + err = bpf_object__load(fentry_obj); 1303 + if (!ASSERT_OK(err, "load fentry_obj")) 1304 + goto out; 1305 + 1306 + fentry_link = bpf_program__attach_trace(prog); 1307 + if (!ASSERT_OK_PTR(fentry_link, "attach_trace")) 1308 + goto out; 1309 + } 1310 + 1311 + if (test_fexit) { 1312 + fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o", 1313 + NULL); 1314 + if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file")) 1315 + goto out; 1316 + 1317 + prog = bpf_object__find_program_by_name(fexit_obj, "fexit"); 1318 + if (!ASSERT_OK_PTR(prog, "find fexit prog")) 1319 + goto out; 1320 + 1321 + err = bpf_program__set_attach_target(prog, prog_fd, 1322 + "subprog_tail"); 1323 + if (!ASSERT_OK(err, "set_attach_target subprog_tail")) 1324 + goto out; 1325 + 1326 + err = bpf_object__load(fexit_obj); 1327 + if (!ASSERT_OK(err, "load fexit_obj")) 1328 + goto out; 1329 + 1330 + fexit_link = bpf_program__attach_trace(prog); 1331 + if (!ASSERT_OK_PTR(fexit_link, "attach_trace")) 1332 + goto out; 1333 + } 1334 + 1335 + err = bpf_prog_test_run_opts(prog_fd, &topts); 1336 + ASSERT_OK(err, "tailcall"); 1337 + ASSERT_EQ(topts.retval, 1, "tailcall retval"); 1338 + 1339 + main_data_fd = bpf_map__fd(data_map); 1340 + if (!ASSERT_GE(main_data_fd, 0, "main_data_fd")) 1341 + goto out; 1342 + 1343 + i = 0; 1344 + err = bpf_map_lookup_elem(main_data_fd, &i, &val); 1345 + ASSERT_OK(err, "tailcall count"); 1346 + ASSERT_EQ(val, 34, "tailcall count"); 1347 + 1348 + if (test_fentry) { 1349 + data_map = bpf_object__find_map_by_name(fentry_obj, ".bss"); 1350 + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), 1351 + "find tailcall_bpf2bpf_fentry.bss map")) 1352 + goto out; 1353 + 1354 + fentry_data_fd = bpf_map__fd(data_map); 1355 + if (!ASSERT_GE(fentry_data_fd, 0, 1356 + "find tailcall_bpf2bpf_fentry.bss map fd")) 1357 + goto out; 1358 + 1359 + i = 0; 1360 + err = bpf_map_lookup_elem(fentry_data_fd, &i, &val); 1361 + ASSERT_OK(err, "fentry count"); 1362 + ASSERT_EQ(val, 68, "fentry count"); 1363 + } 1364 + 1365 + if (test_fexit) { 1366 + data_map = bpf_object__find_map_by_name(fexit_obj, ".bss"); 1367 + if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map), 1368 + "find tailcall_bpf2bpf_fexit.bss map")) 1369 + goto out; 1370 + 1371 + fexit_data_fd = bpf_map__fd(data_map); 1372 + if (!ASSERT_GE(fexit_data_fd, 0, 1373 + "find tailcall_bpf2bpf_fexit.bss map fd")) 1374 + goto out; 1375 + 1376 + i = 0; 1377 + err = bpf_map_lookup_elem(fexit_data_fd, &i, &val); 1378 + ASSERT_OK(err, "fexit count"); 1379 + ASSERT_EQ(val, 68, "fexit count"); 1380 + } 1381 + 1382 + i = 0; 1383 + err = bpf_map_delete_elem(map_fd, &i); 1384 + if (!ASSERT_OK(err, "delete_elem from jmp_table")) 1385 + goto out; 1386 + 1387 + err = bpf_prog_test_run_opts(prog_fd, &topts); 1388 + ASSERT_OK(err, "tailcall"); 1389 + ASSERT_EQ(topts.retval, 1, "tailcall retval"); 1390 + 1391 + i = 0; 1392 + err = bpf_map_lookup_elem(main_data_fd, &i, &val); 1393 + ASSERT_OK(err, "tailcall count"); 1394 + ASSERT_EQ(val, 35, "tailcall count"); 1395 + 1396 + if (test_fentry) { 1397 + i = 0; 1398 + err = bpf_map_lookup_elem(fentry_data_fd, &i, &val); 1399 + ASSERT_OK(err, "fentry count"); 1400 + ASSERT_EQ(val, 70, "fentry count"); 1401 + } 1402 + 1403 + if (test_fexit) { 1404 + i = 0; 1405 + err = bpf_map_lookup_elem(fexit_data_fd, &i, &val); 1406 + ASSERT_OK(err, "fexit count"); 1407 + ASSERT_EQ(val, 70, "fexit count"); 1408 + } 1409 + 1410 + out: 1411 + bpf_link__destroy(fentry_link); 1412 + bpf_link__destroy(fexit_link); 1413 + bpf_object__close(fentry_obj); 1414 + bpf_object__close(fexit_obj); 1415 + bpf_object__close(obj); 1416 + } 1417 + 1418 + /* test_tailcall_bpf2bpf_hierarchy_1 checks that the count value of the tail 1419 + * call limit enforcement matches with expectations when tailcalls are preceded 1420 + * with two bpf2bpf calls. 1421 + * 1422 + * subprog --tailcall-> entry 1423 + * entry < 1424 + * subprog --tailcall-> entry 1425 + */ 1426 + static void test_tailcall_bpf2bpf_hierarchy_1(void) 1427 + { 1428 + test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o", 1429 + false, false, false); 1430 + } 1431 + 1432 + /* test_tailcall_bpf2bpf_hierarchy_fentry checks that the count value of the 1433 + * tail call limit enforcement matches with expectations when tailcalls are 1434 + * preceded with two bpf2bpf calls, and the two subprogs are traced by fentry. 1435 + */ 1436 + static void test_tailcall_bpf2bpf_hierarchy_fentry(void) 1437 + { 1438 + test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o", 1439 + true, false, false); 1440 + } 1441 + 1442 + /* test_tailcall_bpf2bpf_hierarchy_fexit checks that the count value of the tail 1443 + * call limit enforcement matches with expectations when tailcalls are preceded 1444 + * with two bpf2bpf calls, and the two subprogs are traced by fexit. 1445 + */ 1446 + static void test_tailcall_bpf2bpf_hierarchy_fexit(void) 1447 + { 1448 + test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o", 1449 + false, true, false); 1450 + } 1451 + 1452 + /* test_tailcall_bpf2bpf_hierarchy_fentry_fexit checks that the count value of 1453 + * the tail call limit enforcement matches with expectations when tailcalls are 1454 + * preceded with two bpf2bpf calls, and the two subprogs are traced by both 1455 + * fentry and fexit. 1456 + */ 1457 + static void test_tailcall_bpf2bpf_hierarchy_fentry_fexit(void) 1458 + { 1459 + test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o", 1460 + true, true, false); 1461 + } 1462 + 1463 + /* test_tailcall_bpf2bpf_hierarchy_fentry_entry checks that the count value of 1464 + * the tail call limit enforcement matches with expectations when tailcalls are 1465 + * preceded with two bpf2bpf calls in fentry. 1466 + */ 1467 + static void test_tailcall_bpf2bpf_hierarchy_fentry_entry(void) 1468 + { 1469 + test_tailcall_hierarchy_count("tc_dummy.bpf.o", false, false, true); 1470 + } 1471 + 1472 + /* test_tailcall_bpf2bpf_hierarchy_2 checks that the count value of the tail 1473 + * call limit enforcement matches with expectations: 1474 + * 1475 + * subprog_tail0 --tailcall-> classifier_0 -> subprog_tail0 1476 + * entry < 1477 + * subprog_tail1 --tailcall-> classifier_1 -> subprog_tail1 1478 + */ 1479 + static void test_tailcall_bpf2bpf_hierarchy_2(void) 1480 + { 1481 + RUN_TESTS(tailcall_bpf2bpf_hierarchy2); 1482 + } 1483 + 1484 + /* test_tailcall_bpf2bpf_hierarchy_3 checks that the count value of the tail 1485 + * call limit enforcement matches with expectations: 1486 + * 1487 + * subprog with jmp_table0 to classifier_0 1488 + * entry --tailcall-> classifier_0 < 1489 + * subprog with jmp_table1 to classifier_0 1490 + */ 1491 + static void test_tailcall_bpf2bpf_hierarchy_3(void) 1492 + { 1493 + RUN_TESTS(tailcall_bpf2bpf_hierarchy3); 1494 + } 1495 + 1192 1496 void test_tailcalls(void) 1193 1497 { 1194 1498 if (test__start_subtest("tailcall_1")) ··· 1531 1223 test_tailcall_bpf2bpf_fentry_entry(); 1532 1224 if (test__start_subtest("tailcall_poke")) 1533 1225 test_tailcall_poke(); 1226 + if (test__start_subtest("tailcall_bpf2bpf_hierarchy_1")) 1227 + test_tailcall_bpf2bpf_hierarchy_1(); 1228 + if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry")) 1229 + test_tailcall_bpf2bpf_hierarchy_fentry(); 1230 + if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fexit")) 1231 + test_tailcall_bpf2bpf_hierarchy_fexit(); 1232 + if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_fexit")) 1233 + test_tailcall_bpf2bpf_hierarchy_fentry_fexit(); 1234 + if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_entry")) 1235 + test_tailcall_bpf2bpf_hierarchy_fentry_entry(); 1236 + test_tailcall_bpf2bpf_hierarchy_2(); 1237 + test_tailcall_bpf2bpf_hierarchy_3(); 1534 1238 }
+34
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include "bpf_legacy.h" 5 + 6 + struct { 7 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 8 + __uint(max_entries, 1); 9 + __uint(key_size, sizeof(__u32)); 10 + __uint(value_size, sizeof(__u32)); 11 + } jmp_table SEC(".maps"); 12 + 13 + int count = 0; 14 + 15 + static __noinline 16 + int subprog_tail(struct __sk_buff *skb) 17 + { 18 + bpf_tail_call_static(skb, &jmp_table, 0); 19 + return 0; 20 + } 21 + 22 + SEC("tc") 23 + int entry(struct __sk_buff *skb) 24 + { 25 + int ret = 1; 26 + 27 + count++; 28 + subprog_tail(skb); 29 + subprog_tail(skb); 30 + 31 + return ret; 32 + } 33 + 34 + char __license[] SEC("license") = "GPL";
+70
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include "bpf_misc.h" 5 + 6 + int classifier_0(struct __sk_buff *skb); 7 + int classifier_1(struct __sk_buff *skb); 8 + 9 + struct { 10 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 11 + __uint(max_entries, 2); 12 + __uint(key_size, sizeof(__u32)); 13 + __array(values, void (void)); 14 + } jmp_table SEC(".maps") = { 15 + .values = { 16 + [0] = (void *) &classifier_0, 17 + [1] = (void *) &classifier_1, 18 + }, 19 + }; 20 + 21 + int count0 = 0; 22 + int count1 = 0; 23 + 24 + static __noinline 25 + int subprog_tail0(struct __sk_buff *skb) 26 + { 27 + bpf_tail_call_static(skb, &jmp_table, 0); 28 + return 0; 29 + } 30 + 31 + __auxiliary 32 + SEC("tc") 33 + int classifier_0(struct __sk_buff *skb) 34 + { 35 + count0++; 36 + subprog_tail0(skb); 37 + return 0; 38 + } 39 + 40 + static __noinline 41 + int subprog_tail1(struct __sk_buff *skb) 42 + { 43 + bpf_tail_call_static(skb, &jmp_table, 1); 44 + return 0; 45 + } 46 + 47 + __auxiliary 48 + SEC("tc") 49 + int classifier_1(struct __sk_buff *skb) 50 + { 51 + count1++; 52 + subprog_tail1(skb); 53 + return 0; 54 + } 55 + 56 + __success 57 + __retval(33) 58 + SEC("tc") 59 + int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb) 60 + { 61 + volatile int ret = 0; 62 + 63 + subprog_tail0(skb); 64 + subprog_tail1(skb); 65 + 66 + asm volatile (""::"r+"(ret)); 67 + return (count1 << 16) | count0; 68 + } 69 + 70 + char __license[] SEC("license") = "GPL";
+62
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include "bpf_misc.h" 5 + 6 + int classifier_0(struct __sk_buff *skb); 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 10 + __uint(max_entries, 1); 11 + __uint(key_size, sizeof(__u32)); 12 + __array(values, void (void)); 13 + } jmp_table0 SEC(".maps") = { 14 + .values = { 15 + [0] = (void *) &classifier_0, 16 + }, 17 + }; 18 + 19 + struct { 20 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 21 + __uint(max_entries, 1); 22 + __uint(key_size, sizeof(__u32)); 23 + __array(values, void (void)); 24 + } jmp_table1 SEC(".maps") = { 25 + .values = { 26 + [0] = (void *) &classifier_0, 27 + }, 28 + }; 29 + 30 + int count = 0; 31 + 32 + static __noinline 33 + int subprog_tail(struct __sk_buff *skb, void *jmp_table) 34 + { 35 + bpf_tail_call_static(skb, jmp_table, 0); 36 + return 0; 37 + } 38 + 39 + __auxiliary 40 + SEC("tc") 41 + int classifier_0(struct __sk_buff *skb) 42 + { 43 + count++; 44 + subprog_tail(skb, &jmp_table0); 45 + subprog_tail(skb, &jmp_table1); 46 + return count; 47 + } 48 + 49 + __success 50 + __retval(33) 51 + SEC("tc") 52 + int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb) 53 + { 54 + volatile int ret = 0; 55 + 56 + bpf_tail_call_static(skb, &jmp_table0, 0); 57 + 58 + asm volatile (""::"r+"(ret)); 59 + return ret; 60 + } 61 + 62 + char __license[] SEC("license") = "GPL";
+35
tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright Leon Hwang */ 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 10 + __uint(max_entries, 1); 11 + __uint(key_size, sizeof(__u32)); 12 + __uint(value_size, sizeof(__u32)); 13 + } jmp_table SEC(".maps"); 14 + 15 + int count = 0; 16 + 17 + static __noinline 18 + int subprog_tail(void *ctx) 19 + { 20 + bpf_tail_call_static(ctx, &jmp_table, 0); 21 + return 0; 22 + } 23 + 24 + SEC("fentry/dummy") 25 + int BPF_PROG(fentry, struct sk_buff *skb) 26 + { 27 + count++; 28 + subprog_tail(ctx); 29 + subprog_tail(ctx); 30 + 31 + return 0; 32 + } 33 + 34 + 35 + char _license[] SEC("license") = "GPL";
+12
tools/testing/selftests/bpf/progs/tc_dummy.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include "bpf_legacy.h" 5 + 6 + SEC("tc") 7 + int entry(struct __sk_buff *skb) 8 + { 9 + return 1; 10 + } 11 + 12 + char __license[] SEC("license") = "GPL";