Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf-arg-tracking-for-imprecise-multi-offset-bpf_st-stx'

Eduard Zingerman says:

====================
bpf: arg tracking for imprecise/multi-offset BPF_ST/STX

When the static arg tracking analysis encounters a store through a
pointer with imprecise or multi-offset destination, it must use weak
updates (join) instead of strong updates (overwrite) for the affected
at_stack slots. At runtime only one slot is actually written; the
others retain their old values.

Two cases are addressed:
- BPF_STX, handled by spill_to_stack(). It was gated on
`dst_is_local_fp = (frame == depth)`, which missed ARG_IMPRECISE
pointers entirely.
- BPF_ST, handled by clear_stack_for_all_offs(). It delegates to
clear_overlapping_stack_slots() which unconditionally set
`at_stack[i] = none`. Change to `at_stack[i] = join(old, none)`
when multiple candidate slots exist (cnt != 1), so that untouched
slots preserve their tracked values.

No veristat diff compared to current master when tested on selftests,
sched_ext, cilium and a set of Meta internal programs.

This addresses issues reported by sashiko for patch #7 in [1].

[1] https://sashiko.dev/#/patchset/20260410-patch-set-v4-0-5d4eecb343db%40gmail.com

Changelog:
v2 -> v3:
- Use check_add_overflow() in arg_add() (Alexei).
- Add missing fixes tag (CI bot).
- Remove unused __imm in the selftest (sashiko).
v1 -> v2:
- Delete the OFF_IMPRECISE constant, always rely on
arg_track->cnt == 0 as a marker the offset is imprecise.
(Alexei).
- Squash all patches together to simplify backporting to
'bpf' branch (Alexei).

v1: https://lore.kernel.org/bpf/20260413-stacklive-fixes-v1-0-9f48a9999d6e@gmail.com/T/
v2: https://lore.kernel.org/bpf/20260413-stacklive-fixes-v2-0-ff91c4f8d273@gmail.com/T/
---
====================

Link: https://patch.msgid.link/20260413-stacklive-fixes-v2-0-398e126e5cf3@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+254 -51
+61 -51
kernel/bpf/liveness.c
··· 574 574 * 575 575 * precise {frame=N, off=V} -- known absolute frame index and byte offset 576 576 * | 577 - * offset-imprecise {frame=N, off=OFF_IMPRECISE} 577 + * offset-imprecise {frame=N, cnt=0} 578 578 * | -- known frame identity, unknown offset 579 579 * fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask} 580 580 * -- unknown frame identity; .mask is a ··· 607 607 ARG_IMPRECISE = -3, /* lost identity; .mask is arg bitmask */ 608 608 }; 609 609 610 - #define OFF_IMPRECISE S16_MIN /* arg identity known but offset unknown */ 611 - 612 610 /* Track callee stack slots fp-8 through fp-512 (64 slots of 8 bytes each) */ 613 611 #define MAX_ARG_SPILL_SLOTS 64 614 612 ··· 618 620 static bool arg_is_fp(const struct arg_track *at) 619 621 { 620 622 return at->frame >= 0 || at->frame == ARG_IMPRECISE; 621 - } 622 - 623 - /* 624 - * Clear all tracked callee stack slots overlapping the byte range 625 - * [off, off+sz-1] where off is a negative FP-relative offset. 626 - */ 627 - static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz) 628 - { 629 - struct arg_track none = { .frame = ARG_NONE }; 630 - 631 - if (off == OFF_IMPRECISE) { 632 - for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) 633 - at_stack[i] = none; 634 - return; 635 - } 636 - for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) { 637 - int slot_start = -((i + 1) * 8); 638 - int slot_end = slot_start + 8; 639 - 640 - if (slot_start < off + (int)sz && slot_end > off) 641 - at_stack[i] = none; 642 - } 643 623 } 644 624 645 625 static void verbose_arg_track(struct bpf_verifier_env *env, struct arg_track *at) ··· 839 863 *dst = arg_join_imprecise(*dst, *src); 840 864 } 841 865 842 - static s16 arg_add(s16 off, s64 delta) 866 + static bool arg_add(s16 off, s64 delta, s16 *out) 843 867 { 844 - s64 res; 868 + s16 d = delta; 845 869 846 - if (off == OFF_IMPRECISE) 847 - return OFF_IMPRECISE; 848 - res = (s64)off + delta; 849 - if (res < S16_MIN + 1 || res > S16_MAX) 850 - return OFF_IMPRECISE; 851 - return res; 870 + if (d != delta) 871 + return true; 872 + return check_add_overflow(off, d, out); 852 873 } 853 874 854 875 static void arg_padd(struct arg_track *at, s64 delta) ··· 855 882 if (at->off_cnt == 0) 856 883 return; 857 884 for (i = 0; i < at->off_cnt; i++) { 858 - s16 new_off = arg_add(at->off[i], delta); 885 + s16 new_off; 859 886 860 - if (new_off == OFF_IMPRECISE) { 887 + if (arg_add(at->off[i], delta, &new_off)) { 861 888 at->off_cnt = 0; 862 889 return; 863 890 } ··· 872 899 */ 873 900 static int fp_off_to_slot(s16 off) 874 901 { 875 - if (off == OFF_IMPRECISE) 876 - return -1; 877 902 if (off >= 0 || off < -(int)(MAX_ARG_SPILL_SLOTS * 8)) 878 903 return -1; 879 904 if (off % 8) ··· 901 930 return imp; 902 931 903 932 for (i = 0; i < cnt; i++) { 904 - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); 905 - int slot = fp_off_to_slot(fp_off); 933 + s16 fp_off, slot; 906 934 935 + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) 936 + return imp; 937 + slot = fp_off_to_slot(fp_off); 907 938 if (slot < 0) 908 939 return imp; 909 940 result = __arg_track_join(result, at_stack_out[slot]); ··· 941 968 return; 942 969 } 943 970 for (i = 0; i < cnt; i++) { 944 - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); 945 - int slot = fp_off_to_slot(fp_off); 971 + s16 fp_off; 972 + int slot; 946 973 974 + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) 975 + continue; 976 + slot = fp_off_to_slot(fp_off); 947 977 if (slot < 0) 948 978 continue; 949 979 if (cnt == 1) 950 980 at_stack_out[slot] = new_val; 951 981 else 952 982 at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val); 983 + } 984 + } 985 + 986 + /* 987 + * Clear all tracked callee stack slots overlapping the byte range 988 + * [off, off+sz-1] where off is a negative FP-relative offset. 989 + */ 990 + static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz, int cnt) 991 + { 992 + struct arg_track none = { .frame = ARG_NONE }; 993 + 994 + if (cnt == 0) { 995 + for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) 996 + at_stack[i] = __arg_track_join(at_stack[i], none); 997 + return; 998 + } 999 + for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) { 1000 + int slot_start = -((i + 1) * 8); 1001 + int slot_end = slot_start + 8; 1002 + 1003 + if (slot_start < off + (int)sz && slot_end > off) { 1004 + if (cnt == 1) 1005 + at_stack[i] = none; 1006 + else 1007 + at_stack[i] = __arg_track_join(at_stack[i], none); 1008 + } 953 1009 } 954 1010 } 955 1011 ··· 992 990 int cnt, i; 993 991 994 992 if (reg == BPF_REG_FP) { 995 - clear_overlapping_stack_slots(at_stack_out, insn->off, sz); 993 + clear_overlapping_stack_slots(at_stack_out, insn->off, sz, 1); 996 994 return; 997 995 } 998 996 cnt = at_out[reg].off_cnt; 999 997 if (cnt == 0) { 1000 - clear_overlapping_stack_slots(at_stack_out, OFF_IMPRECISE, sz); 998 + clear_overlapping_stack_slots(at_stack_out, 0, sz, cnt); 1001 999 return; 1002 1000 } 1003 1001 for (i = 0; i < cnt; i++) { 1004 - s16 fp_off = arg_add(at_out[reg].off[i], insn->off); 1002 + s16 fp_off; 1005 1003 1006 - clear_overlapping_stack_slots(at_stack_out, fp_off, sz); 1004 + if (arg_add(at_out[reg].off[i], insn->off, &fp_off)) { 1005 + clear_overlapping_stack_slots(at_stack_out, 0, sz, 0); 1006 + break; 1007 + } 1008 + clear_overlapping_stack_slots(at_stack_out, fp_off, sz, cnt); 1007 1009 } 1008 1010 } 1009 1011 ··· 1046 1040 } 1047 1041 if (printed) 1048 1042 verbose(env, "\n"); 1043 + } 1044 + 1045 + static bool can_be_local_fp(int depth, int regno, struct arg_track *at) 1046 + { 1047 + return regno == BPF_REG_FP || at->frame == depth || 1048 + (at->frame == ARG_IMPRECISE && (at->mask & BIT(depth))); 1049 1049 } 1050 1050 1051 1051 /* ··· 1123 1111 at_out[r] = none; 1124 1112 } else if (class == BPF_LDX) { 1125 1113 u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code)); 1126 - bool src_is_local_fp = insn->src_reg == BPF_REG_FP || src->frame == depth || 1127 - (src->frame == ARG_IMPRECISE && (src->mask & BIT(depth))); 1114 + bool src_is_local_fp = can_be_local_fp(depth, insn->src_reg, src); 1128 1115 1129 1116 /* 1130 1117 * Reload from callee stack: if src is current-frame FP-derived ··· 1158 1147 bool dst_is_local_fp; 1159 1148 1160 1149 /* Track spills to current-frame FP-derived callee stack */ 1161 - dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth; 1150 + dst_is_local_fp = can_be_local_fp(depth, insn->dst_reg, dst); 1162 1151 if (dst_is_local_fp && BPF_MODE(insn->code) == BPF_MEM) 1163 1152 spill_to_stack(insn, at_out, insn->dst_reg, 1164 1153 at_stack_out, src, sz); ··· 1177 1166 } 1178 1167 } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) { 1179 1168 u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code)); 1180 - bool dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth; 1169 + bool dst_is_local_fp = can_be_local_fp(depth, insn->dst_reg, dst); 1181 1170 1182 1171 /* BPF_ST to FP-derived dst: clear overlapping stack slots */ 1183 1172 if (dst_is_local_fp) ··· 1327 1316 resolved.off_cnt = ptr->off_cnt; 1328 1317 resolved.frame = ptr->frame; 1329 1318 for (oi = 0; oi < ptr->off_cnt; oi++) { 1330 - resolved.off[oi] = arg_add(ptr->off[oi], insn->off); 1331 - if (resolved.off[oi] == OFF_IMPRECISE) { 1319 + if (arg_add(ptr->off[oi], insn->off, &resolved.off[oi])) { 1332 1320 resolved.off_cnt = 0; 1333 1321 break; 1334 1322 }
+193
tools/testing/selftests/bpf/progs/verifier_live_stack.c
··· 2647 2647 "exit;" 2648 2648 ::: __clobber_all); 2649 2649 } 2650 + 2651 + /* 2652 + * Same as spill_join_with_multi_off but the write is BPF_ST (store 2653 + * immediate) instead of BPF_STX. BPF_ST goes through 2654 + * clear_stack_for_all_offs() rather than spill_to_stack(), and that 2655 + * path also needs to join instead of overwriting. 2656 + * 2657 + * fp-8 = &fp-24 2658 + * fp-16 = &fp-32 2659 + * r1 = fp-8 or fp-16 (two offsets from branch) 2660 + * *(u64 *)(r1 + 0) = 0 -- BPF_ST with immediate 2661 + * r0 = *(u64 *)(r10 - 16) -- fill from fp-16 2662 + * r0 = *(u64 *)(r0 + 0) -- deref: should produce use 2663 + */ 2664 + SEC("socket") 2665 + __log_level(2) 2666 + __failure 2667 + __msg("15: (7a) *(u64 *)(r1 +0) = 0 fp-8: fp0-24 -> fp0-24|fp0+0 fp-16: fp0-32 -> fp0-32|fp0+0") 2668 + __msg("17: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-32") 2669 + __naked void st_imm_join_with_multi_off(void) 2670 + { 2671 + asm volatile ( 2672 + "*(u64 *)(r10 - 24) = 0;" 2673 + "*(u64 *)(r10 - 32) = 0;" 2674 + "r1 = r10;" 2675 + "r1 += -24;" 2676 + "*(u64 *)(r10 - 8) = r1;" 2677 + "r1 = r10;" 2678 + "r1 += -32;" 2679 + "*(u64 *)(r10 - 16) = r1;" 2680 + /* create r1 with two candidate offsets: fp-8 or fp-16 */ 2681 + "call %[bpf_get_prandom_u32];" 2682 + "if r0 == 0 goto 1f;" 2683 + "r1 = r10;" 2684 + "r1 += -8;" 2685 + "goto 2f;" 2686 + "1:" 2687 + "r1 = r10;" 2688 + "r1 += -16;" 2689 + "2:" 2690 + /* BPF_ST: store immediate through multi-offset r1 */ 2691 + "*(u64 *)(r1 + 0) = 0;" 2692 + /* read back fp-16 and deref */ 2693 + "r0 = *(u64 *)(r10 - 16);" 2694 + "r0 = *(u64 *)(r0 + 0);" 2695 + "r0 = 0;" 2696 + "exit;" 2697 + :: __imm(bpf_get_prandom_u32) 2698 + : __clobber_all); 2699 + } 2700 + 2701 + /* 2702 + * Check that BPF_ST with a known offset fully overwrites stack slot 2703 + * from the arg tracking point of view. 2704 + */ 2705 + SEC("socket") 2706 + __log_level(2) 2707 + __success 2708 + __msg("5: (7a) *(u64 *)(r1 +0) = 0 fp-8: fp0-16 -> _{{$}}") 2709 + __naked void st_imm_join_with_single_off(void) 2710 + { 2711 + asm volatile ( 2712 + "r2 = r10;" 2713 + "r2 += -16;" 2714 + "*(u64 *)(r10 - 8) = r2;" 2715 + "r1 = r10;" 2716 + "r1 += -8;" 2717 + "*(u64 *)(r1 + 0) = 0;" 2718 + "r0 = 0;" 2719 + "exit;" 2720 + ::: __clobber_all); 2721 + } 2722 + 2723 + /* 2724 + * Same as spill_join_with_imprecise_off but the write is BPF_ST. 2725 + * Use "r2 = -8; r1 += r2" to make arg tracking lose offset 2726 + * precision while the main verifier keeps r1 as fixed-offset. 2727 + * 2728 + * fp-8 = &fp-24 2729 + * fp-16 = &fp-32 2730 + * r1 = fp-8 (imprecise to arg tracking) 2731 + * *(u64 *)(r1 + 0) = 0 -- BPF_ST with immediate 2732 + * r0 = *(u64 *)(r10 - 16) -- fill from fp-16 2733 + * r0 = *(u64 *)(r0 + 0) -- deref: should produce use 2734 + */ 2735 + SEC("socket") 2736 + __log_level(2) 2737 + __success 2738 + __msg("13: (79) r0 = *(u64 *)(r0 +0) ; use: fp0-32") 2739 + __naked void st_imm_join_with_imprecise_off(void) 2740 + { 2741 + asm volatile ( 2742 + "*(u64 *)(r10 - 24) = 0;" 2743 + "*(u64 *)(r10 - 32) = 0;" 2744 + "r1 = r10;" 2745 + "r1 += -24;" 2746 + "*(u64 *)(r10 - 8) = r1;" 2747 + "r1 = r10;" 2748 + "r1 += -32;" 2749 + "*(u64 *)(r10 - 16) = r1;" 2750 + /* r1 = fp-8 but arg tracking sees off_cnt == 0 */ 2751 + "r1 = r10;" 2752 + "r2 = -8;" 2753 + "r1 += r2;" 2754 + /* store immediate through imprecise r1 */ 2755 + "*(u64 *)(r1 + 0) = 0;" 2756 + /* read back fp-16 */ 2757 + "r0 = *(u64 *)(r10 - 16);" 2758 + /* deref: should produce use */ 2759 + "r0 = *(u64 *)(r0 + 0);" 2760 + "r0 = 0;" 2761 + "exit;" 2762 + ::: __clobber_all); 2763 + } 2764 + 2765 + /* 2766 + * Test that spilling through an ARG_IMPRECISE pointer joins with 2767 + * existing at_stack values. Subprog receives r1 = fp0-24 and 2768 + * r2 = map_value, creates an ARG_IMPRECISE pointer by joining caller 2769 + * and callee FP on two branches. 2770 + * 2771 + * Setup: callee spills &fp1-16 to fp1-8 (precise, tracked). 2772 + * Then writes map_value through ARG_IMPRECISE r1 — on path A 2773 + * this hits fp1-8, on path B it hits caller stack. 2774 + * Since spill_to_stack is skipped for ARG_IMPRECISE dst, 2775 + * fp1-8 tracking isn't joined with none. 2776 + * 2777 + * Expected after the imprecise write: 2778 + * - arg tracking should show fp1-8 = fp1-16|fp1+0 (joined with none) 2779 + * - read from fp1-8 and deref should produce use for fp1-16 2780 + * - write through it should NOT produce def for fp1-16 2781 + */ 2782 + SEC("socket") 2783 + __log_level(2) 2784 + __success 2785 + __msg("26: (79) r0 = *(u64 *)(r10 -8) // r1=IMP3 r6=fp0-24 r7=fp1-16 fp-8=fp1-16|fp1+0") 2786 + __naked void imprecise_dst_spill_join(void) 2787 + { 2788 + asm volatile ( 2789 + "*(u64 *)(r10 - 24) = 0;" 2790 + /* map lookup for a valid non-FP pointer */ 2791 + "*(u32 *)(r10 - 32) = 0;" 2792 + "r1 = %[map] ll;" 2793 + "r2 = r10;" 2794 + "r2 += -32;" 2795 + "call %[bpf_map_lookup_elem];" 2796 + "if r0 == 0 goto 1f;" 2797 + /* r1 = &caller_fp-24, r2 = map_value */ 2798 + "r1 = r10;" 2799 + "r1 += -24;" 2800 + "r2 = r0;" 2801 + "call imprecise_dst_spill_join_sub;" 2802 + "1:" 2803 + "r0 = 0;" 2804 + "exit;" 2805 + :: __imm_addr(map), 2806 + __imm(bpf_map_lookup_elem) 2807 + : __clobber_all); 2808 + } 2809 + 2810 + static __used __naked void imprecise_dst_spill_join_sub(void) 2811 + { 2812 + asm volatile ( 2813 + /* r6 = &caller_fp-24 (frame=0), r8 = map_value */ 2814 + "r6 = r1;" 2815 + "r8 = r2;" 2816 + /* spill &fp1-16 to fp1-8: at_stack[0] = fp1-16 */ 2817 + "*(u64 *)(r10 - 16) = 0;" 2818 + "r7 = r10;" 2819 + "r7 += -16;" 2820 + "*(u64 *)(r10 - 8) = r7;" 2821 + /* branch to create ARG_IMPRECISE pointer */ 2822 + "call %[bpf_get_prandom_u32];" 2823 + /* path B: r1 = caller fp-24 (frame=0) */ 2824 + "r1 = r6;" 2825 + "if r0 == 0 goto 1f;" 2826 + /* path A: r1 = callee fp-8 (frame=1) */ 2827 + "r1 = r10;" 2828 + "r1 += -8;" 2829 + "1:" 2830 + /* r1 = ARG_IMPRECISE{mask=BIT(0)|BIT(1)}. 2831 + * Write map_value (non-FP) through r1. On path A this overwrites fp1-8. 2832 + * Should join at_stack[0] with none: fp1-16|fp1+0. 2833 + */ 2834 + "*(u64 *)(r1 + 0) = r8;" 2835 + /* read fp1-8: should be fp1-16|fp1+0 (joined) */ 2836 + "r0 = *(u64 *)(r10 - 8);" 2837 + "*(u64 *)(r0 + 0) = 42;" 2838 + "r0 = 0;" 2839 + "exit;" 2840 + :: __imm(bpf_get_prandom_u32) 2841 + : __clobber_all); 2842 + }