Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

powerpc64/bpf: remove BPF redzone protection in trampoline stack

Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
kernel commit 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with
subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
is triggering "corrupted stack end detected inside scheduler" with the
config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
the stack layout for BPF trampoline, observed that the dummy frame is
trying to protect the redzone of BPF program. This is because tail
call info and NVRs save area are in redzone at the time of tailcall
as the current BPF program stack frame is teared down before the
tailcall. But saving this redzone in the dummy frame of trampoline
is unnecessary because of the follow reasons:

1) Firstly, trampoline can be attached to BPF entry/main program
or subprog. But prologue part of the BPF entry/main program,
where the trampoline attachpoint is, is skipped during tailcall.
So, protecting the redzone does not arise when the trampoline is
not even triggered in this scenario.
2) In case of subprog, the caller's stackframe is already setup
and the subprog's stackframe is yet to be setup. So, nothing
on the redzone to be protected.

Also, using dummy frame in BPF trampoline, wastes critically scarce
kernel stack space, especially in tailcall sequence, for marginal
benefit in stack unwinding. So, drop setting up the dummy frame.
Instead, save return address in bpf trampoline frame and use it as
appropriate. Pruning this unnecessary stack usage mitigates the
likelihood of stack overflow in scenarios where bpf2bpf tailcalls
and fexit programs are mixed.

Reported-by: Saket Kumar Bhaskar <skb99@linux.ibm.com>
Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20260303181031.390073-5-hbathini@linux.ibm.com

authored by

Hari Bathini and committed by
Madhavan Srinivasan
2d347d10 3727d6ec

+33 -56
+33 -56
arch/powerpc/net/bpf_jit_comp.c
··· 638 638 * for the traced function (BPF subprog/callee) to fetch it. 639 639 */ 640 640 static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx, 641 - int func_frame_offset, 642 - int bpf_dummy_frame_size, int r4_off) 641 + int bpf_frame_size, int r4_off) 643 642 { 644 643 if (IS_ENABLED(CONFIG_PPC64)) { 645 - /* 646 - * func_frame_offset = ...(1) 647 - * bpf_dummy_frame_size + trampoline_frame_size 648 - */ 649 - EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset)); 644 + EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size)); 650 645 /* Refer to trampoline's Generated stack layout */ 651 646 EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL)); 652 647 ··· 652 657 EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT)); 653 658 PPC_BCC_CONST_SHORT(COND_GT, 8); 654 659 EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL)); 660 + 655 661 /* 656 - * From ...(1) above: 657 - * trampoline_frame_bottom = ...(2) 658 - * func_frame_offset - bpf_dummy_frame_size 659 - * 660 - * Using ...(2) derived above: 661 - * trampoline_tail_call_info_offset = ...(3) 662 - * trampoline_frame_bottom - BPF_PPC_TAILCALL 663 - * 664 - * From ...(3): 665 - * Use trampoline_tail_call_info_offset to write reference of main's 666 - * tail_call_info in trampoline frame. 662 + * Trampoline's tail_call_info is at the same offset, as that of 663 + * any bpf program, with reference to previous frame. Update the 664 + * address of main's tail_call_info in trampoline frame. 667 665 */ 668 - EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size) 669 - - BPF_PPC_TAILCALL)); 666 + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL)); 670 667 } else { 671 668 /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 672 669 EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); ··· 666 679 } 667 680 668 681 static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, 669 - int func_frame_offset, int r4_off) 682 + int bpf_frame_size, int r4_off) 670 683 { 671 684 if (IS_ENABLED(CONFIG_PPC32)) { 672 685 /* ··· 677 690 } 678 691 } 679 692 680 - static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, 681 - int nr_regs, int regs_off) 693 + static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, 694 + int bpf_frame_size, int nr_regs, int regs_off) 682 695 { 683 696 int param_save_area_offset; 684 697 685 - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 698 + param_save_area_offset = bpf_frame_size; 686 699 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 687 700 688 701 for (int i = 0; i < nr_regs; i++) { ··· 705 718 706 719 /* Used when we call into the traced function. Replicate parameter save area */ 707 720 static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, 708 - int func_frame_offset, int nr_regs, int regs_off) 721 + int bpf_frame_size, int nr_regs, int regs_off) 709 722 { 710 723 int param_save_area_offset; 711 724 712 - param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 725 + param_save_area_offset = bpf_frame_size; 713 726 param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 714 727 715 728 for (int i = 8; i < nr_regs; i++) { ··· 726 739 void *func_addr) 727 740 { 728 741 int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; 729 - int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; 730 742 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 731 743 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 732 744 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 745 + int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0; 733 746 struct codegen_context codegen_ctx, *ctx; 734 747 u32 *image = (u32 *)rw_image; 735 748 ppc_inst_t branch_insn; ··· 755 768 * Generated stack layout: 756 769 * 757 770 * func prev back chain [ back chain ] 758 - * [ ] 759 - * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) 760 - * [ ] -- 761 - * LR save area [ r0 save (64-bit) ] | header 762 - * [ r0 save (32-bit) ] | 763 - * dummy frame for unwind [ back chain 1 ] -- 764 771 * [ tail_call_info ] optional - 64-bit powerpc 765 772 * [ padding ] align stack frame 766 773 * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc 767 774 * alt_lr_off [ real lr (ool stub)] optional - actual lr 775 + * retaddr_off [ return address ] 768 776 * [ r26 ] 769 777 * nvr_off [ r25 ] nvr save area 770 778 * retval_off [ return value ] ··· 823 841 nvr_off = bpf_frame_size; 824 842 bpf_frame_size += 2 * SZL; 825 843 844 + /* Save area for return address */ 845 + retaddr_off = bpf_frame_size; 846 + bpf_frame_size += SZL; 847 + 826 848 /* Optional save area for actual LR in case of ool ftrace */ 827 849 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 828 850 alt_lr_off = bpf_frame_size; ··· 853 867 /* Padding to align stack frame, if any */ 854 868 bpf_frame_size = round_up(bpf_frame_size, SZL * 2); 855 869 856 - /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ 857 - bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; 858 - 859 - /* Offset to the traced function's stack frame */ 860 - func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; 861 - 862 - /* Create dummy frame for unwind, store original return value */ 870 + /* Store original return value */ 863 871 EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); 864 - /* Protect red zone where tail call count goes */ 865 - EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); 866 872 867 873 /* Create our stack frame */ 868 874 EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); ··· 869 891 if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) 870 892 EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 871 893 872 - bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); 894 + bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off); 873 895 874 896 /* Save our LR/return address */ 875 897 EMIT(PPC_RAW_MFLR(_R3)); 876 898 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 877 899 EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); 878 900 else 879 - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 901 + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); 880 902 881 903 /* 882 904 * Derive IP address of the traced function. ··· 903 925 EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); 904 926 905 927 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 906 - /* Fake our LR for unwind */ 928 + /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */ 907 929 EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); 908 - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 930 + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off)); 909 931 } 910 932 911 933 /* Save function arg count -- see bpf_get_func_arg_cnt() */ ··· 944 966 /* Call the traced function */ 945 967 if (flags & BPF_TRAMP_F_CALL_ORIG) { 946 968 /* 947 - * The address in LR save area points to the correct point in the original function 969 + * retaddr on trampoline stack points to the correct point in the original function 948 970 * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction 949 971 * sequence 950 972 */ 951 - EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 973 + EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off)); 952 974 EMIT(PPC_RAW_MTCTR(_R3)); 953 975 954 976 /* Replicate tail_call_cnt before calling the original BPF prog */ 955 977 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 956 - bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset, 957 - bpf_dummy_frame_size, r4_off); 978 + bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off); 958 979 959 980 /* Restore args */ 960 - bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); 981 + bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off); 961 982 962 983 /* Restore TOC for 64-bit */ 963 984 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) ··· 970 993 971 994 /* Restore updated tail_call_cnt */ 972 995 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 973 - bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); 996 + bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off); 974 997 975 998 /* Reserve space to patch branch instruction to skip fexit progs */ 976 999 if (ro_image) /* image is NULL for dummy pass */ ··· 1022 1045 EMIT(PPC_RAW_LD(_R2, _R1, 24)); 1023 1046 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 1024 1047 /* Skip the traced function and return to parent */ 1025 - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 1048 + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1026 1049 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1027 1050 EMIT(PPC_RAW_MTLR(_R0)); 1028 1051 EMIT(PPC_RAW_BLR()); ··· 1030 1053 if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 1031 1054 EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); 1032 1055 EMIT(PPC_RAW_MTLR(_R0)); 1033 - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 1056 + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1034 1057 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1035 1058 EMIT(PPC_RAW_BLR()); 1036 1059 } else { 1037 - EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); 1060 + EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off)); 1038 1061 EMIT(PPC_RAW_MTCTR(_R0)); 1039 - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 1062 + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size)); 1040 1063 EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 1041 1064 EMIT(PPC_RAW_MTLR(_R0)); 1042 1065 EMIT(PPC_RAW_BCTR());