Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'signed-loads-from-arena'

Puranjay Mohan says:

====================
Signed loads from Arena

Changelog:

v3 -> v4:
v3: https://lore.kernel.org/all/20250915162848.54282-1-puranjay@kernel.org/
- Update bpf_jit_supports_insn() in riscv jit to reject signed arena loads (Eduard)
- Fix coding style related to braces usage in an if statement in x86 jit (Eduard)

v2 -> v3:
v2: https://lore.kernel.org/bpf/20250514175415.2045783-1-memxor@gmail.com/
- Fix encoding for the generated instructions in x86 JIT (Eduard)
The patch in v2 was generating instructions like:
42 63 44 20 f8 movslq -0x8(%rax,%r12), %eax
This doesn't make sense because movslq outputs a 64-bit result, but
the destination register here is set to eax (32-bit). The fix it to
set the REX.W bit in the opcode, that means changing
EMIT2(add_3mod(0x40, ...)) to EMIT2(add_3mod(0x48, ...))
- Add arm64 support
- Add selftests signed laods from arena.

v1 -> v2:
v1: https://lore.kernel.org/bpf/20250509194956.1635207-1-memxor@gmail.com
- Use bpf_jit_supports_insn. (Alexei)

Currently, signed load instructions into arena memory are unsupported.
The compiler is free to generate these, and on GCC-14 we see a
corresponding error when it happens. The hurdle in supporting them is
deciding which unused opcode to use to mark them for the JIT's own
consumption. After much thinking, it appears 0xc0 / BPF_NOSPEC can be
combined with load instructions to identify signed arena loads. Use
this to recognize and JIT them appropriately, and remove the verifier
side limitation on the program if the JIT supports them.
====================

Link: https://patch.msgid.link/20250923110157.18326-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+251 -14
+17 -8
arch/arm64/net/bpf_jit_comp.c
··· 1133 1133 return 0; 1134 1134 1135 1135 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 1136 - BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 1137 - BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 1138 - BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 1136 + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 1137 + BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 1138 + BPF_MODE(insn->code) != BPF_PROBE_MEM32SX && 1139 + BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 1139 1140 return 0; 1140 1141 1141 1142 is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) || 1143 + (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) || 1142 1144 (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC); 1143 1145 1144 1146 if (!ctx->prog->aux->extable || ··· 1661 1659 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1662 1660 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1663 1661 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1664 - if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1662 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B: 1663 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H: 1664 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W: 1665 + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32 || 1666 + BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) { 1665 1667 emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); 1666 1668 src = tmp2; 1667 1669 } ··· 1677 1671 off_adj = off; 1678 1672 } 1679 1673 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1680 - BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1674 + BPF_MODE(insn->code) == BPF_PROBE_MEMSX || 1675 + BPF_MODE(insn->code) == BPF_PROBE_MEM32SX); 1681 1676 switch (BPF_SIZE(code)) { 1682 1677 case BPF_W: 1683 1678 if (is_lsi_offset(off_adj, 2)) { ··· 1886 1879 if (ret) 1887 1880 return ret; 1888 1881 1889 - ret = add_exception_handler(insn, ctx, dst); 1890 - if (ret) 1891 - return ret; 1882 + if (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) { 1883 + ret = add_exception_handler(insn, ctx, dst); 1884 + if (ret) 1885 + return ret; 1886 + } 1892 1887 break; 1893 1888 1894 1889 default:
+5
arch/riscv/net/bpf_jit_comp64.c
··· 2066 2066 case BPF_STX | BPF_ATOMIC | BPF_DW: 2067 2067 if (insn->imm == BPF_CMPXCHG) 2068 2068 return rv_ext_enabled(ZACAS); 2069 + break; 2070 + case BPF_LDX | BPF_MEMSX | BPF_B: 2071 + case BPF_LDX | BPF_MEMSX | BPF_H: 2072 + case BPF_LDX | BPF_MEMSX | BPF_W: 2073 + return false; 2069 2074 } 2070 2075 } 2071 2076
+5
arch/s390/net/bpf_jit_comp.c
··· 2967 2967 case BPF_STX | BPF_ATOMIC | BPF_DW: 2968 2968 if (bpf_atomic_is_load_store(insn)) 2969 2969 return false; 2970 + break; 2971 + case BPF_LDX | BPF_MEMSX | BPF_B: 2972 + case BPF_LDX | BPF_MEMSX | BPF_H: 2973 + case BPF_LDX | BPF_MEMSX | BPF_W: 2974 + return false; 2970 2975 } 2971 2976 return true; 2972 2977 }
+37 -3
arch/x86/net/bpf_jit_comp.c
··· 1152 1152 *pprog = prog; 1153 1153 } 1154 1154 1155 + static void emit_ldsx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) 1156 + { 1157 + u8 *prog = *pprog; 1158 + 1159 + switch (size) { 1160 + case BPF_B: 1161 + /* movsx rax, byte ptr [rax + r12 + off] */ 1162 + EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBE); 1163 + break; 1164 + case BPF_H: 1165 + /* movsx rax, word ptr [rax + r12 + off] */ 1166 + EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBF); 1167 + break; 1168 + case BPF_W: 1169 + /* movsx rax, dword ptr [rax + r12 + off] */ 1170 + EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x63); 1171 + break; 1172 + } 1173 + emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off); 1174 + *pprog = prog; 1175 + } 1176 + 1155 1177 static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 1156 1178 { 1157 1179 emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off); 1180 + } 1181 + 1182 + static void emit_ldsx_r12(u8 **prog, u32 size, u32 dst_reg, u32 src_reg, int off) 1183 + { 1184 + emit_ldsx_index(prog, size, dst_reg, src_reg, X86_REG_R12, off); 1158 1185 } 1159 1186 1160 1187 /* STX: *(u8*)(dst_reg + off) = src_reg */ ··· 2136 2109 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 2137 2110 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 2138 2111 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 2112 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B: 2113 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H: 2114 + case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W: 2139 2115 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 2140 2116 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 2141 2117 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 2142 2118 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 2143 2119 start_of_ldx = prog; 2144 - if (BPF_CLASS(insn->code) == BPF_LDX) 2145 - emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 2146 - else 2120 + if (BPF_CLASS(insn->code) == BPF_LDX) { 2121 + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) 2122 + emit_ldsx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 2123 + else 2124 + emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 2125 + } else { 2147 2126 emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 2127 + } 2148 2128 populate_extable: 2149 2129 { 2150 2130 struct exception_table_entry *ex;
+3
include/linux/filter.h
··· 78 78 /* unused opcode to mark special atomic instruction */ 79 79 #define BPF_PROBE_ATOMIC 0xe0 80 80 81 + /* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */ 82 + #define BPF_PROBE_MEM32SX 0xc0 83 + 81 84 /* unused opcode to mark call to interpreter with arguments */ 82 85 #define BPF_CALL_ARGS 0xe0 83 86
+8 -3
kernel/bpf/verifier.c
··· 21379 21379 continue; 21380 21380 case PTR_TO_ARENA: 21381 21381 if (BPF_MODE(insn->code) == BPF_MEMSX) { 21382 - verbose(env, "sign extending loads from arena are not supported yet\n"); 21383 - return -EOPNOTSUPP; 21382 + if (!bpf_jit_supports_insn(insn, true)) { 21383 + verbose(env, "sign extending loads from arena are not supported yet\n"); 21384 + return -EOPNOTSUPP; 21385 + } 21386 + insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code); 21387 + } else { 21388 + insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); 21384 21389 } 21385 - insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); 21386 21390 env->prog->aux->num_exentries++; 21387 21391 continue; 21388 21392 default: ··· 21592 21588 if (BPF_CLASS(insn->code) == BPF_LDX && 21593 21589 (BPF_MODE(insn->code) == BPF_PROBE_MEM || 21594 21590 BPF_MODE(insn->code) == BPF_PROBE_MEM32 || 21591 + BPF_MODE(insn->code) == BPF_PROBE_MEM32SX || 21595 21592 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) 21596 21593 num_exentries++; 21597 21594 if ((BPF_CLASS(insn->code) == BPF_STX ||
+176
tools/testing/selftests/bpf/progs/verifier_ldsx.c
··· 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 + #include "bpf_arena_common.h" 6 7 7 8 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 9 (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ 9 10 defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ 10 11 defined(__TARGET_ARCH_loongarch)) && \ 11 12 __clang_major__ >= 18 13 + 14 + struct { 15 + __uint(type, BPF_MAP_TYPE_ARENA); 16 + __uint(map_flags, BPF_F_MMAPABLE); 17 + __uint(max_entries, 1); 18 + } arena SEC(".maps"); 12 19 13 20 SEC("socket") 14 21 __description("LDSX, S8") ··· 261 254 : 262 255 : __imm_const(sk_buff_data_end, offsetof(struct __sk_buff, data_end)) 263 256 : __clobber_all); 257 + } 258 + 259 + SEC("syscall") 260 + __description("Arena LDSX Disasm") 261 + __success 262 + __arch_x86_64 263 + __jited("movslq 0x10(%rax,%r12), %r14") 264 + __jited("movswq 0x18(%rax,%r12), %r14") 265 + __jited("movsbq 0x20(%rax,%r12), %r14") 266 + __jited("movslq 0x10(%rdi,%r12), %r15") 267 + __jited("movswq 0x18(%rdi,%r12), %r15") 268 + __jited("movsbq 0x20(%rdi,%r12), %r15") 269 + __arch_arm64 270 + __jited("add x11, x7, x28") 271 + __jited("ldrsw x21, [x11, #0x10]") 272 + __jited("add x11, x7, x28") 273 + __jited("ldrsh x21, [x11, #0x18]") 274 + __jited("add x11, x7, x28") 275 + __jited("ldrsb x21, [x11, #0x20]") 276 + __jited("add x11, x0, x28") 277 + __jited("ldrsw x22, [x11, #0x10]") 278 + __jited("add x11, x0, x28") 279 + __jited("ldrsh x22, [x11, #0x18]") 280 + __jited("add x11, x0, x28") 281 + __jited("ldrsb x22, [x11, #0x20]") 282 + __naked void arena_ldsx_disasm(void *ctx) 283 + { 284 + asm volatile ( 285 + "r1 = %[arena] ll;" 286 + "r2 = 0;" 287 + "r3 = 1;" 288 + "r4 = %[numa_no_node];" 289 + "r5 = 0;" 290 + "call %[bpf_arena_alloc_pages];" 291 + "r0 = addr_space_cast(r0, 0x0, 0x1);" 292 + "r1 = r0;" 293 + "r8 = *(s32 *)(r0 + 16);" 294 + "r8 = *(s16 *)(r0 + 24);" 295 + "r8 = *(s8 *)(r0 + 32);" 296 + "r9 = *(s32 *)(r1 + 16);" 297 + "r9 = *(s16 *)(r1 + 24);" 298 + "r9 = *(s8 *)(r1 + 32);" 299 + "r0 = 0;" 300 + "exit;" 301 + :: __imm(bpf_arena_alloc_pages), 302 + __imm_addr(arena), 303 + __imm_const(numa_no_node, NUMA_NO_NODE) 304 + : __clobber_all 305 + ); 306 + } 307 + 308 + SEC("syscall") 309 + __description("Arena LDSX Exception") 310 + __success __retval(0) 311 + __arch_x86_64 312 + __arch_arm64 313 + __naked void arena_ldsx_exception(void *ctx) 314 + { 315 + asm volatile ( 316 + "r1 = %[arena] ll;" 317 + "r0 = 0xdeadbeef;" 318 + "r0 = addr_space_cast(r0, 0x0, 0x1);" 319 + "r1 = 0x3fe;" 320 + "*(u64 *)(r0 + 0) = r1;" 321 + "r0 = *(s8 *)(r0 + 0);" 322 + "exit;" 323 + : 324 + : __imm_addr(arena) 325 + : __clobber_all 326 + ); 327 + } 328 + 329 + SEC("syscall") 330 + __description("Arena LDSX, S8") 331 + __success __retval(-1) 332 + __arch_x86_64 333 + __arch_arm64 334 + __naked void arena_ldsx_s8(void *ctx) 335 + { 336 + asm volatile ( 337 + "r1 = %[arena] ll;" 338 + "r2 = 0;" 339 + "r3 = 1;" 340 + "r4 = %[numa_no_node];" 341 + "r5 = 0;" 342 + "call %[bpf_arena_alloc_pages];" 343 + "r0 = addr_space_cast(r0, 0x0, 0x1);" 344 + "r1 = 0x3fe;" 345 + "*(u64 *)(r0 + 0) = r1;" 346 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 347 + "r0 = *(s8 *)(r0 + 0);" 348 + #else 349 + "r0 = *(s8 *)(r0 + 7);" 350 + #endif 351 + "r0 >>= 1;" 352 + "exit;" 353 + :: __imm(bpf_arena_alloc_pages), 354 + __imm_addr(arena), 355 + __imm_const(numa_no_node, NUMA_NO_NODE) 356 + : __clobber_all 357 + ); 358 + } 359 + 360 + SEC("syscall") 361 + __description("Arena LDSX, S16") 362 + __success __retval(-1) 363 + __arch_x86_64 364 + __arch_arm64 365 + __naked void arena_ldsx_s16(void *ctx) 366 + { 367 + asm volatile ( 368 + "r1 = %[arena] ll;" 369 + "r2 = 0;" 370 + "r3 = 1;" 371 + "r4 = %[numa_no_node];" 372 + "r5 = 0;" 373 + "call %[bpf_arena_alloc_pages];" 374 + "r0 = addr_space_cast(r0, 0x0, 0x1);" 375 + "r1 = 0x3fffe;" 376 + "*(u64 *)(r0 + 0) = r1;" 377 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 378 + "r0 = *(s16 *)(r0 + 0);" 379 + #else 380 + "r0 = *(s16 *)(r0 + 6);" 381 + #endif 382 + "r0 >>= 1;" 383 + "exit;" 384 + :: __imm(bpf_arena_alloc_pages), 385 + __imm_addr(arena), 386 + __imm_const(numa_no_node, NUMA_NO_NODE) 387 + : __clobber_all 388 + ); 389 + } 390 + 391 + SEC("syscall") 392 + __description("Arena LDSX, S32") 393 + __success __retval(-1) 394 + __arch_x86_64 395 + __arch_arm64 396 + __naked void arena_ldsx_s32(void *ctx) 397 + { 398 + asm volatile ( 399 + "r1 = %[arena] ll;" 400 + "r2 = 0;" 401 + "r3 = 1;" 402 + "r4 = %[numa_no_node];" 403 + "r5 = 0;" 404 + "call %[bpf_arena_alloc_pages];" 405 + "r0 = addr_space_cast(r0, 0x0, 0x1);" 406 + "r1 = 0xfffffffe;" 407 + "*(u64 *)(r0 + 0) = r1;" 408 + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 409 + "r0 = *(s32 *)(r0 + 0);" 410 + #else 411 + "r0 = *(s32 *)(r0 + 4);" 412 + #endif 413 + "r0 >>= 1;" 414 + "exit;" 415 + :: __imm(bpf_arena_alloc_pages), 416 + __imm_addr(arena), 417 + __imm_const(numa_no_node, NUMA_NO_NODE) 418 + : __clobber_all 419 + ); 420 + } 421 + 422 + /* to retain debug info for BTF generation */ 423 + void kfunc_root(void) 424 + { 425 + bpf_arena_alloc_pages(0, 0, 0, 0, 0); 264 426 } 265 427 266 428 #else