Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bpf: Add bitwise tracking for BPF_END

This patch implements bitwise tracking (tnum analysis) for BPF_END
(byte swap) operation.

Currently, the BPF verifier does not track value for BPF_END operation,
treating the result as completely unknown. This limits the verifier's
ability to prove safety of programs that perform endianness conversions,
which are common in networking code.

For example, the following code pattern for port number validation:

int test(struct pt_regs *ctx) {
__u64 x = bpf_get_prandom_u32();
x &= 0x3f00; // Range: [0, 0x3f00], var_off: (0x0; 0x3f00)
x = bswap16(x); // Should swap to range [0, 0x3f], var_off: (0x0; 0x3f)
if (x > 0x3f) goto trap;
return 0;
trap:
return *(u64 *)NULL; // Should be unreachable
}

Currently generates verifier output:

1: (54) w0 &= 16128 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=16128,var_off=(0x0; 0x3f00))
2: (d7) r0 = bswap16 r0 ; R0=scalar()
3: (25) if r0 > 0x3f goto pc+2 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))

Without this patch, even though the verifier knows `x` has certain bits
set, after bswap16, it loses all tracking information and treats port
as having a completely unknown value [0, 65535].

According to the BPF instruction set[1], there are 3 kinds of BPF_END:

1. `bswap(16|32|64)`: opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
- do unconditional swap
2. `le(16|32|64)`: opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
- on big-endian: do swap
- on little-endian: truncation (16/32-bit) or no-op (64-bit)
3. `be(16|32|64)`: opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
- on little-endian: do swap
- on big-endian: truncation (16/32-bit) or no-op (64-bit)

Since BPF_END operations are inherently bit-wise permutations, tnum
(bitwise tracking) offers the most efficient and precise mechanism
for value analysis. By implementing `tnum_bswap16`, `tnum_bswap32`,
and `tnum_bswap64`, we can derive exact `var_off` values concisely,
directly reflecting the bit-level changes.

Here is the overview of changes:

1. In `tnum_bswap(16|32|64)` (kernel/bpf/tnum.c):

Call `swab(16|32|64)` function on the value and mask of `var_off`, and
do truncation for 16/32-bit cases.

2. In `adjust_scalar_min_max_vals` (kernel/bpf/verifier.c):

Call helper function `scalar_byte_swap`.
- Only do byte swap when
* alu64 (unconditional swap) OR
* switching between big-endian and little-endian machines.
- If need do byte swap:
* Firstly call `tnum_bswap(16|32|64)` to update `var_off`.
* Then reset the bound since byte swap scrambles the range.
- For 16/32-bit cases, truncate dst register to match the swapped size.

This enables better verification of networking code that frequently uses
byte swaps for protocol processing, reducing false positive rejections.

[1] https://www.kernel.org/doc/Documentation/bpf/standardization/instruction-set.rst

Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Co-developed-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260204111503.77871-2-ziye@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Tianci Cao and committed by
Alexei Starovoitov
9d211998 4af52669

+78 -3
+5
include/linux/tnum.h
··· 63 63 /* Return @a with all but the lowest @size bytes cleared */ 64 64 struct tnum tnum_cast(struct tnum a, u8 size); 65 65 66 + /* Swap the bytes of a tnum */ 67 + struct tnum tnum_bswap16(struct tnum a); 68 + struct tnum tnum_bswap32(struct tnum a); 69 + struct tnum tnum_bswap64(struct tnum a); 70 + 66 71 /* Returns true if @a is a known constant */ 67 72 static inline bool tnum_is_const(struct tnum a) 68 73 {
+16
kernel/bpf/tnum.c
··· 8 8 */ 9 9 #include <linux/kernel.h> 10 10 #include <linux/tnum.h> 11 + #include <linux/swab.h> 11 12 12 13 #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} 13 14 /* A completely unknown value */ ··· 253 252 struct tnum tnum_const_subreg(struct tnum a, u32 value) 254 253 { 255 254 return tnum_with_subreg(a, tnum_const(value)); 255 + } 256 + 257 + struct tnum tnum_bswap16(struct tnum a) 258 + { 259 + return TNUM(swab16(a.value & 0xFFFF), swab16(a.mask & 0xFFFF)); 260 + } 261 + 262 + struct tnum tnum_bswap32(struct tnum a) 263 + { 264 + return TNUM(swab32(a.value & 0xFFFFFFFF), swab32(a.mask & 0xFFFFFFFF)); 265 + } 266 + 267 + struct tnum tnum_bswap64(struct tnum a) 268 + { 269 + return TNUM(swab64(a.value), swab64(a.mask)); 256 270 }
+57 -3
kernel/bpf/verifier.c
··· 15832 15832 __update_reg_bounds(dst_reg); 15833 15833 } 15834 15834 15835 + static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn) 15836 + { 15837 + /* 15838 + * Byte swap operation - update var_off using tnum_bswap. 15839 + * Three cases: 15840 + * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE) 15841 + * unconditional swap 15842 + * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE) 15843 + * swap on big-endian, truncation or no-op on little-endian 15844 + * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE) 15845 + * swap on little-endian, truncation or no-op on big-endian 15846 + */ 15847 + 15848 + bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64; 15849 + bool to_le = BPF_SRC(insn->code) == BPF_TO_LE; 15850 + bool is_big_endian; 15851 + #ifdef CONFIG_CPU_BIG_ENDIAN 15852 + is_big_endian = true; 15853 + #else 15854 + is_big_endian = false; 15855 + #endif 15856 + /* Apply bswap if alu64 or switch between big-endian and little-endian machines */ 15857 + bool need_bswap = alu64 || (to_le == is_big_endian); 15858 + 15859 + if (need_bswap) { 15860 + if (insn->imm == 16) 15861 + dst_reg->var_off = tnum_bswap16(dst_reg->var_off); 15862 + else if (insn->imm == 32) 15863 + dst_reg->var_off = tnum_bswap32(dst_reg->var_off); 15864 + else if (insn->imm == 64) 15865 + dst_reg->var_off = tnum_bswap64(dst_reg->var_off); 15866 + /* 15867 + * Byteswap scrambles the range, so we must reset bounds. 15868 + * Bounds will be re-derived from the new tnum later. 15869 + */ 15870 + __mark_reg_unbounded(dst_reg); 15871 + } 15872 + /* For bswap16/32, truncate dst register to match the swapped size */ 15873 + if (insn->imm == 16 || insn->imm == 32) 15874 + coerce_reg_to_size(dst_reg, insn->imm / 8); 15875 + } 15876 + 15835 15877 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, 15836 15878 const struct bpf_reg_state *src_reg) 15837 15879 { ··· 15900 15858 case BPF_XOR: 15901 15859 case BPF_OR: 15902 15860 case BPF_MUL: 15861 + case BPF_END: 15903 15862 return true; 15904 15863 15905 15864 /* ··· 16090 16047 else 16091 16048 scalar_min_max_arsh(dst_reg, &src_reg); 16092 16049 break; 16050 + case BPF_END: 16051 + scalar_byte_swap(dst_reg, insn); 16052 + break; 16093 16053 default: 16094 16054 break; 16095 16055 } 16096 16056 16097 - /* ALU32 ops are zero extended into 64bit register */ 16098 - if (alu32) 16057 + /* 16058 + * ALU32 ops are zero extended into 64bit register. 16059 + * 16060 + * BPF_END is already handled inside the helper (truncation), 16061 + * so skip zext here to avoid unexpected zero extension. 16062 + * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40 16063 + * This is a 64bit byte swap operation with alu32==true, 16064 + * but we should not zero extend the result. 16065 + */ 16066 + if (alu32 && opcode != BPF_END) 16099 16067 zext_32_to_64(dst_reg); 16100 16068 reg_bounds_sync(dst_reg); 16101 16069 return 0; ··· 16286 16232 } 16287 16233 16288 16234 /* check dest operand */ 16289 - if (opcode == BPF_NEG && 16235 + if ((opcode == BPF_NEG || opcode == BPF_END) && 16290 16236 regs[insn->dst_reg].type == SCALAR_VALUE) { 16291 16237 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 16292 16238 err = err ?: adjust_scalar_min_max_vals(env, insn,