Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

powerpc64/bpf: Add support for bpf trampolines

Add support for bpf_arch_text_poke() and arch_prepare_bpf_trampoline()
for 64-bit powerpc. While the code is generic, BPF trampolines are only
enabled on 64-bit powerpc. 32-bit powerpc will need testing and some
updates.

BPF Trampolines adhere to the existing ftrace ABI utilizing a
two-instruction profiling sequence, as well as the newer ABI utilizing a
three-instruction profiling sequence enabling return with a 'blr'. The
trampoline code itself closely follows x86 implementation.

BPF prog JIT is extended to mimic 64-bit powerpc approach for ftrace
having a single nop at function entry, followed by the function
profiling sequence out-of-line and a separate long branch stub for calls
to trampolines that are out of range. A dummy_tramp is provided to
simplify synchronization similar to arm64.

When attaching a bpf trampoline to a bpf prog, we can patch up to three
things:
- the nop at bpf prog entry to go to the out-of-line stub
- the instruction in the out-of-line stub to either call the bpf trampoline
directly, or to branch to the long_branch stub.
- the trampoline address before the long_branch stub.

We do not need any synchronization here since we always have a valid
branch target regardless of the order in which the above stores are
seen. dummy_tramp ensures that the long_branch stub goes to a valid
destination on other cpus, even when the branch to the long_branch stub
is seen before the updated trampoline address.

However, when detaching a bpf trampoline from a bpf prog, or if changing
the bpf trampoline address, we need synchronization to ensure that other
cpus can no longer branch into the older trampoline so that it can be
safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus
make forward progress, but we still need to ensure that other cpus
execute isync (or some CSI) so that they don't go back into the
trampoline again. While here, update the stale comment that describes
the redzone usage in ppc64 BPF JIT.

Signed-off-by: Naveen N Rao <naveen@kernel.org>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://patch.msgid.link/20241030070850.1361304-18-hbathini@linux.ibm.com

authored by

Naveen N Rao and committed by
Michael Ellerman
d243b62b 71db948b

+891 -5
+14
arch/powerpc/include/asm/ppc-opcode.h
··· 587 587 #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) 588 588 #define PPC_RAW_EIEIO() (0x7c0006ac) 589 589 590 + /* bcl 20,31,$+4 */ 591 + #define PPC_RAW_BCL4() (0x429f0005) 590 592 #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) 591 593 #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) 592 594 #define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) 593 595 #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) 594 596 #define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) 597 + 598 + #ifdef CONFIG_PPC32 599 + #define PPC_RAW_STL PPC_RAW_STW 600 + #define PPC_RAW_STLU PPC_RAW_STWU 601 + #define PPC_RAW_LL PPC_RAW_LWZ 602 + #define PPC_RAW_CMPLI PPC_RAW_CMPWI 603 + #else 604 + #define PPC_RAW_STL PPC_RAW_STD 605 + #define PPC_RAW_STLU PPC_RAW_STDU 606 + #define PPC_RAW_LL PPC_RAW_LD 607 + #define PPC_RAW_CMPLI PPC_RAW_CMPDI 608 + #endif 595 609 596 610 /* Deal with instructions that older assemblers aren't aware of */ 597 611 #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
+17
arch/powerpc/net/bpf_jit.h
··· 12 12 13 13 #include <asm/types.h> 14 14 #include <asm/ppc-opcode.h> 15 + #include <linux/build_bug.h> 15 16 16 17 #ifdef CONFIG_PPC64_ELF_ABI_V1 17 18 #define FUNCTION_DESCR_SIZE 24 ··· 21 20 #endif 22 21 23 22 #define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4) 23 + 24 + #define SZL sizeof(unsigned long) 25 + #define BPF_INSN_SAFETY 64 24 26 25 27 #define PLANT_INSTR(d, idx, instr) \ 26 28 do { if (d) { (d)[idx] = instr; } idx++; } while (0) ··· 85 81 EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 86 82 0xffff)); \ 87 83 } } while (0) 84 + #define PPC_LI_ADDR PPC_LI64 85 + 86 + #ifndef CONFIG_PPC_KERNEL_PCREL 87 + #define PPC64_LOAD_PACA() \ 88 + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))) 89 + #else 90 + #define PPC64_LOAD_PACA() do {} while (0) 91 + #endif 92 + #else 93 + #define PPC_LI64(d, i) BUILD_BUG() 94 + #define PPC_LI_ADDR PPC_LI32 95 + #define PPC64_LOAD_PACA() BUILD_BUG() 88 96 #endif 89 97 90 98 /* ··· 181 165 u32 *addrs, int pass, bool extra_pass); 182 166 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); 183 167 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); 168 + void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx); 184 169 void bpf_jit_realloc_regs(struct codegen_context *ctx); 185 170 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); 186 171
+846 -1
arch/powerpc/net/bpf_jit_comp.c
··· 22 22 23 23 #include "bpf_jit.h" 24 24 25 + /* These offsets are from bpf prog end and stay the same across progs */ 26 + static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; 27 + 25 28 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 26 29 { 27 30 memset32(area, BREAKPOINT_INSTRUCTION, size / 4); 31 + } 32 + 33 + void dummy_tramp(void); 34 + 35 + asm ( 36 + " .pushsection .text, \"ax\", @progbits ;" 37 + " .global dummy_tramp ;" 38 + " .type dummy_tramp, @function ;" 39 + "dummy_tramp: ;" 40 + #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE 41 + " blr ;" 42 + #else 43 + /* LR is always in r11, so we don't need a 'mflr r11' here */ 44 + " mtctr 11 ;" 45 + " mtlr 0 ;" 46 + " bctr ;" 47 + #endif 48 + " .size dummy_tramp, .-dummy_tramp ;" 49 + " .popsection ;" 50 + ); 51 + 52 + void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) 53 + { 54 + int ool_stub_idx, long_branch_stub_idx; 55 + 56 + /* 57 + * Out-of-line stub: 58 + * mflr r0 59 + * [b|bl] tramp 60 + * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE 61 + * b bpf_func + 4 62 + */ 63 + ool_stub_idx = ctx->idx; 64 + EMIT(PPC_RAW_MFLR(_R0)); 65 + EMIT(PPC_RAW_NOP()); 66 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 67 + EMIT(PPC_RAW_MTLR(_R0)); 68 + WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); 69 + EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); 70 + 71 + /* 72 + * Long branch stub: 73 + * .long <dummy_tramp_addr> 74 + * mflr r11 75 + * bcl 20,31,$+4 76 + * mflr r12 77 + * ld r12, -8-SZL(r12) 78 + * mtctr r12 79 + * mtlr r11 // needed to retain ftrace ABI 80 + * bctr 81 + */ 82 + if (image) 83 + *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; 84 + ctx->idx += SZL / 4; 85 + long_branch_stub_idx = ctx->idx; 86 + EMIT(PPC_RAW_MFLR(_R11)); 87 + EMIT(PPC_RAW_BCL4()); 88 + EMIT(PPC_RAW_MFLR(_R12)); 89 + EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); 90 + EMIT(PPC_RAW_MTCTR(_R12)); 91 + EMIT(PPC_RAW_MTLR(_R11)); 92 + EMIT(PPC_RAW_BCTR()); 93 + 94 + if (!bpf_jit_ool_stub) { 95 + bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; 96 + bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; 97 + } 28 98 } 29 99 30 100 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) ··· 292 222 293 223 fp->bpf_func = (void *)fimage; 294 224 fp->jited = 1; 295 - fp->jited_len = proglen + FUNCTION_DESCR_SIZE; 225 + fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; 296 226 297 227 if (!fp->is_func || extra_pass) { 298 228 if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { ··· 438 368 bool bpf_jit_supports_far_kfunc_call(void) 439 369 { 440 370 return IS_ENABLED(CONFIG_PPC64); 371 + } 372 + 373 + void *arch_alloc_bpf_trampoline(unsigned int size) 374 + { 375 + return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); 376 + } 377 + 378 + void arch_free_bpf_trampoline(void *image, unsigned int size) 379 + { 380 + bpf_prog_pack_free(image, size); 381 + } 382 + 383 + int arch_protect_bpf_trampoline(void *image, unsigned int size) 384 + { 385 + return 0; 386 + } 387 + 388 + static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, 389 + struct bpf_tramp_link *l, int regs_off, int retval_off, 390 + int run_ctx_off, bool save_ret) 391 + { 392 + struct bpf_prog *p = l->link.prog; 393 + ppc_inst_t branch_insn; 394 + u32 jmp_idx; 395 + int ret = 0; 396 + 397 + /* Save cookie */ 398 + if (IS_ENABLED(CONFIG_PPC64)) { 399 + PPC_LI64(_R3, l->cookie); 400 + EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, 401 + bpf_cookie))); 402 + } else { 403 + PPC_LI32(_R3, l->cookie >> 32); 404 + PPC_LI32(_R4, l->cookie); 405 + EMIT(PPC_RAW_STW(_R3, _R1, 406 + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); 407 + EMIT(PPC_RAW_STW(_R4, _R1, 408 + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); 409 + } 410 + 411 + /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ 412 + PPC_LI_ADDR(_R3, p); 413 + EMIT(PPC_RAW_MR(_R25, _R3)); 414 + EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); 415 + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 416 + (unsigned long)bpf_trampoline_enter(p)); 417 + if (ret) 418 + return ret; 419 + 420 + /* Remember prog start time returned by __bpf_prog_enter */ 421 + EMIT(PPC_RAW_MR(_R26, _R3)); 422 + 423 + /* 424 + * if (__bpf_prog_enter(p) == 0) 425 + * goto skip_exec_of_prog; 426 + * 427 + * Emit a nop to be later patched with conditional branch, once offset is known 428 + */ 429 + EMIT(PPC_RAW_CMPLI(_R3, 0)); 430 + jmp_idx = ctx->idx; 431 + EMIT(PPC_RAW_NOP()); 432 + 433 + /* p->bpf_func(ctx) */ 434 + EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); 435 + if (!p->jited) 436 + PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); 437 + if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, 438 + BRANCH_SET_LINK)) { 439 + if (image) 440 + image[ctx->idx] = ppc_inst_val(branch_insn); 441 + ctx->idx++; 442 + } else { 443 + EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); 444 + EMIT(PPC_RAW_MTCTR(_R12)); 445 + EMIT(PPC_RAW_BCTRL()); 446 + } 447 + 448 + if (save_ret) 449 + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 450 + 451 + /* Fix up branch */ 452 + if (image) { 453 + if (create_cond_branch(&branch_insn, &image[jmp_idx], 454 + (unsigned long)&image[ctx->idx], COND_EQ << 16)) 455 + return -EINVAL; 456 + image[jmp_idx] = ppc_inst_val(branch_insn); 457 + } 458 + 459 + /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ 460 + EMIT(PPC_RAW_MR(_R3, _R25)); 461 + EMIT(PPC_RAW_MR(_R4, _R26)); 462 + EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); 463 + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 464 + (unsigned long)bpf_trampoline_exit(p)); 465 + 466 + return ret; 467 + } 468 + 469 + static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, 470 + struct bpf_tramp_links *tl, int regs_off, int retval_off, 471 + int run_ctx_off, u32 *branches) 472 + { 473 + int i; 474 + 475 + /* 476 + * The first fmod_ret program will receive a garbage return value. 477 + * Set this to 0 to avoid confusing the program. 478 + */ 479 + EMIT(PPC_RAW_LI(_R3, 0)); 480 + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 481 + for (i = 0; i < tl->nr_links; i++) { 482 + if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, 483 + run_ctx_off, true)) 484 + return -EINVAL; 485 + 486 + /* 487 + * mod_ret prog stored return value after prog ctx. Emit: 488 + * if (*(u64 *)(ret_val) != 0) 489 + * goto do_fexit; 490 + */ 491 + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 492 + EMIT(PPC_RAW_CMPLI(_R3, 0)); 493 + 494 + /* 495 + * Save the location of the branch and generate a nop, which is 496 + * replaced with a conditional jump once do_fexit (i.e. the 497 + * start of the fexit invocation) is finalized. 498 + */ 499 + branches[i] = ctx->idx; 500 + EMIT(PPC_RAW_NOP()); 501 + } 502 + 503 + return 0; 504 + } 505 + 506 + static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, 507 + int func_frame_offset, int r4_off) 508 + { 509 + if (IS_ENABLED(CONFIG_PPC64)) { 510 + /* See bpf_jit_stack_tailcallcnt() */ 511 + int tailcallcnt_offset = 6 * 8; 512 + 513 + EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); 514 + EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); 515 + } else { 516 + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 517 + EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); 518 + } 519 + } 520 + 521 + static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, 522 + int func_frame_offset, int r4_off) 523 + { 524 + if (IS_ENABLED(CONFIG_PPC64)) { 525 + /* See bpf_jit_stack_tailcallcnt() */ 526 + int tailcallcnt_offset = 6 * 8; 527 + 528 + EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); 529 + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); 530 + } else { 531 + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ 532 + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 533 + } 534 + } 535 + 536 + static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, 537 + int nr_regs, int regs_off) 538 + { 539 + int param_save_area_offset; 540 + 541 + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 542 + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 543 + 544 + for (int i = 0; i < nr_regs; i++) { 545 + if (i < 8) { 546 + EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); 547 + } else { 548 + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 549 + EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); 550 + } 551 + } 552 + } 553 + 554 + /* Used when restoring just the register parameters when returning back */ 555 + static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, 556 + int nr_regs, int regs_off) 557 + { 558 + for (int i = 0; i < nr_regs && i < 8; i++) 559 + EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); 560 + } 561 + 562 + /* Used when we call into the traced function. Replicate parameter save area */ 563 + static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, 564 + int func_frame_offset, int nr_regs, int regs_off) 565 + { 566 + int param_save_area_offset; 567 + 568 + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ 569 + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ 570 + 571 + for (int i = 8; i < nr_regs; i++) { 572 + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); 573 + EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); 574 + } 575 + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 576 + } 577 + 578 + static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, 579 + void *rw_image_end, void *ro_image, 580 + const struct btf_func_model *m, u32 flags, 581 + struct bpf_tramp_links *tlinks, 582 + void *func_addr) 583 + { 584 + int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; 585 + int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; 586 + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 587 + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 588 + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 589 + struct codegen_context codegen_ctx, *ctx; 590 + u32 *image = (u32 *)rw_image; 591 + ppc_inst_t branch_insn; 592 + u32 *branches = NULL; 593 + bool save_ret; 594 + 595 + if (IS_ENABLED(CONFIG_PPC32)) 596 + return -EOPNOTSUPP; 597 + 598 + nr_regs = m->nr_args; 599 + /* Extra registers for struct arguments */ 600 + for (i = 0; i < m->nr_args; i++) 601 + if (m->arg_size[i] > SZL) 602 + nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; 603 + 604 + if (nr_regs > MAX_BPF_FUNC_ARGS) 605 + return -EOPNOTSUPP; 606 + 607 + ctx = &codegen_ctx; 608 + memset(ctx, 0, sizeof(*ctx)); 609 + 610 + /* 611 + * Generated stack layout: 612 + * 613 + * func prev back chain [ back chain ] 614 + * [ ] 615 + * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) 616 + * [ ] -- 617 + * LR save area [ r0 save (64-bit) ] | header 618 + * [ r0 save (32-bit) ] | 619 + * dummy frame for unwind [ back chain 1 ] -- 620 + * [ padding ] align stack frame 621 + * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc 622 + * alt_lr_off [ real lr (ool stub)] optional - actual lr 623 + * [ r26 ] 624 + * nvr_off [ r25 ] nvr save area 625 + * retval_off [ return value ] 626 + * [ reg argN ] 627 + * [ ... ] 628 + * regs_off [ reg_arg1 ] prog ctx context 629 + * nregs_off [ args count ] 630 + * ip_off [ traced function ] 631 + * [ ... ] 632 + * run_ctx_off [ bpf_tramp_run_ctx ] 633 + * [ reg argN ] 634 + * [ ... ] 635 + * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI 636 + * [ TOC save (64-bit) ] -- 637 + * [ LR save (64-bit) ] | header 638 + * [ LR save (32-bit) ] | 639 + * bpf trampoline frame [ back chain 2 ] -- 640 + * 641 + */ 642 + 643 + /* Minimum stack frame header */ 644 + bpf_frame_size = STACK_FRAME_MIN_SIZE; 645 + 646 + /* 647 + * Room for parameter save area. 648 + * 649 + * As per the ABI, this is required if we call into the traced 650 + * function (BPF_TRAMP_F_CALL_ORIG): 651 + * - if the function takes more than 8 arguments for the rest to spill onto the stack 652 + * - or, if the function has variadic arguments 653 + * - or, if this functions's prototype was not available to the caller 654 + * 655 + * Reserve space for at least 8 registers for now. This can be optimized later. 656 + */ 657 + bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; 658 + 659 + /* Room for struct bpf_tramp_run_ctx */ 660 + run_ctx_off = bpf_frame_size; 661 + bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); 662 + 663 + /* Room for IP address argument */ 664 + ip_off = bpf_frame_size; 665 + if (flags & BPF_TRAMP_F_IP_ARG) 666 + bpf_frame_size += SZL; 667 + 668 + /* Room for args count */ 669 + nregs_off = bpf_frame_size; 670 + bpf_frame_size += SZL; 671 + 672 + /* Room for args */ 673 + regs_off = bpf_frame_size; 674 + bpf_frame_size += nr_regs * SZL; 675 + 676 + /* Room for return value of func_addr or fentry prog */ 677 + retval_off = bpf_frame_size; 678 + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 679 + if (save_ret) 680 + bpf_frame_size += SZL; 681 + 682 + /* Room for nvr save area */ 683 + nvr_off = bpf_frame_size; 684 + bpf_frame_size += 2 * SZL; 685 + 686 + /* Optional save area for actual LR in case of ool ftrace */ 687 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 688 + alt_lr_off = bpf_frame_size; 689 + bpf_frame_size += SZL; 690 + } 691 + 692 + if (IS_ENABLED(CONFIG_PPC32)) { 693 + if (nr_regs < 2) { 694 + r4_off = bpf_frame_size; 695 + bpf_frame_size += SZL; 696 + } else { 697 + r4_off = regs_off + SZL; 698 + } 699 + } 700 + 701 + /* Padding to align stack frame, if any */ 702 + bpf_frame_size = round_up(bpf_frame_size, SZL * 2); 703 + 704 + /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ 705 + bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; 706 + 707 + /* Offset to the traced function's stack frame */ 708 + func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; 709 + 710 + /* Create dummy frame for unwind, store original return value */ 711 + EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); 712 + /* Protect red zone where tail call count goes */ 713 + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); 714 + 715 + /* Create our stack frame */ 716 + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); 717 + 718 + /* 64-bit: Save TOC and load kernel TOC */ 719 + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { 720 + EMIT(PPC_RAW_STD(_R2, _R1, 24)); 721 + PPC64_LOAD_PACA(); 722 + } 723 + 724 + /* 32-bit: save tail call count in r4 */ 725 + if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) 726 + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); 727 + 728 + bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); 729 + 730 + /* Save our return address */ 731 + EMIT(PPC_RAW_MFLR(_R3)); 732 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 733 + EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); 734 + else 735 + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 736 + 737 + /* 738 + * Save ip address of the traced function. 739 + * We could recover this from LR, but we will need to address for OOL trampoline, 740 + * and optional GEP area. 741 + */ 742 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { 743 + EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); 744 + EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); 745 + EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); 746 + EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); 747 + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); 748 + } 749 + 750 + if (flags & BPF_TRAMP_F_IP_ARG) 751 + EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); 752 + 753 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) 754 + /* Fake our LR for unwind */ 755 + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 756 + 757 + /* Save function arg count -- see bpf_get_func_arg_cnt() */ 758 + EMIT(PPC_RAW_LI(_R3, nr_regs)); 759 + EMIT(PPC_RAW_STL(_R3, _R1, nregs_off)); 760 + 761 + /* Save nv regs */ 762 + EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); 763 + EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); 764 + 765 + if (flags & BPF_TRAMP_F_CALL_ORIG) { 766 + PPC_LI_ADDR(_R3, (unsigned long)im); 767 + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 768 + (unsigned long)__bpf_tramp_enter); 769 + if (ret) 770 + return ret; 771 + } 772 + 773 + for (i = 0; i < fentry->nr_links; i++) 774 + if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, 775 + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) 776 + return -EINVAL; 777 + 778 + if (fmod_ret->nr_links) { 779 + branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); 780 + if (!branches) 781 + return -ENOMEM; 782 + 783 + if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, 784 + run_ctx_off, branches)) { 785 + ret = -EINVAL; 786 + goto cleanup; 787 + } 788 + } 789 + 790 + /* Call the traced function */ 791 + if (flags & BPF_TRAMP_F_CALL_ORIG) { 792 + /* 793 + * The address in LR save area points to the correct point in the original function 794 + * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction 795 + * sequence 796 + */ 797 + EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); 798 + EMIT(PPC_RAW_MTCTR(_R3)); 799 + 800 + /* Replicate tail_call_cnt before calling the original BPF prog */ 801 + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 802 + bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); 803 + 804 + /* Restore args */ 805 + bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); 806 + 807 + /* Restore TOC for 64-bit */ 808 + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 809 + EMIT(PPC_RAW_LD(_R2, _R1, 24)); 810 + EMIT(PPC_RAW_BCTRL()); 811 + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 812 + PPC64_LOAD_PACA(); 813 + 814 + /* Store return value for bpf prog to access */ 815 + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); 816 + 817 + /* Restore updated tail_call_cnt */ 818 + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 819 + bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); 820 + 821 + /* Reserve space to patch branch instruction to skip fexit progs */ 822 + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; 823 + EMIT(PPC_RAW_NOP()); 824 + } 825 + 826 + /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ 827 + for (i = 0; i < fmod_ret->nr_links && image; i++) { 828 + if (create_cond_branch(&branch_insn, &image[branches[i]], 829 + (unsigned long)&image[ctx->idx], COND_NE << 16)) { 830 + ret = -EINVAL; 831 + goto cleanup; 832 + } 833 + 834 + image[branches[i]] = ppc_inst_val(branch_insn); 835 + } 836 + 837 + for (i = 0; i < fexit->nr_links; i++) 838 + if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, 839 + run_ctx_off, false)) { 840 + ret = -EINVAL; 841 + goto cleanup; 842 + } 843 + 844 + if (flags & BPF_TRAMP_F_CALL_ORIG) { 845 + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; 846 + PPC_LI_ADDR(_R3, im); 847 + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, 848 + (unsigned long)__bpf_tramp_exit); 849 + if (ret) 850 + goto cleanup; 851 + } 852 + 853 + if (flags & BPF_TRAMP_F_RESTORE_REGS) 854 + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); 855 + 856 + /* Restore return value of func_addr or fentry prog */ 857 + if (save_ret) 858 + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); 859 + 860 + /* Restore nv regs */ 861 + EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); 862 + EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); 863 + 864 + /* Epilogue */ 865 + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) 866 + EMIT(PPC_RAW_LD(_R2, _R1, 24)); 867 + if (flags & BPF_TRAMP_F_SKIP_FRAME) { 868 + /* Skip the traced function and return to parent */ 869 + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 870 + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 871 + EMIT(PPC_RAW_MTLR(_R0)); 872 + EMIT(PPC_RAW_BLR()); 873 + } else { 874 + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { 875 + EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); 876 + EMIT(PPC_RAW_MTLR(_R0)); 877 + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 878 + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 879 + EMIT(PPC_RAW_BLR()); 880 + } else { 881 + EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); 882 + EMIT(PPC_RAW_MTCTR(_R0)); 883 + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); 884 + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); 885 + EMIT(PPC_RAW_MTLR(_R0)); 886 + EMIT(PPC_RAW_BCTR()); 887 + } 888 + } 889 + 890 + /* Make sure the trampoline generation logic doesn't overflow */ 891 + if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { 892 + ret = -EFAULT; 893 + goto cleanup; 894 + } 895 + ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; 896 + 897 + cleanup: 898 + kfree(branches); 899 + return ret; 900 + } 901 + 902 + int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 903 + struct bpf_tramp_links *tlinks, void *func_addr) 904 + { 905 + struct bpf_tramp_image im; 906 + void *image; 907 + int ret; 908 + 909 + /* 910 + * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). 911 + * This will NOT cause fragmentation in direct map, as we do not 912 + * call set_memory_*() on this buffer. 913 + * 914 + * We cannot use kvmalloc here, because we need image to be in 915 + * module memory range. 916 + */ 917 + image = bpf_jit_alloc_exec(PAGE_SIZE); 918 + if (!image) 919 + return -ENOMEM; 920 + 921 + ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, 922 + m, flags, tlinks, func_addr); 923 + bpf_jit_free_exec(image); 924 + 925 + return ret; 926 + } 927 + 928 + int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 929 + const struct btf_func_model *m, u32 flags, 930 + struct bpf_tramp_links *tlinks, 931 + void *func_addr) 932 + { 933 + u32 size = image_end - image; 934 + void *rw_image, *tmp; 935 + int ret; 936 + 937 + /* 938 + * rw_image doesn't need to be in module memory range, so we can 939 + * use kvmalloc. 940 + */ 941 + rw_image = kvmalloc(size, GFP_KERNEL); 942 + if (!rw_image) 943 + return -ENOMEM; 944 + 945 + ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, 946 + flags, tlinks, func_addr); 947 + if (ret < 0) 948 + goto out; 949 + 950 + if (bpf_jit_enable > 1) 951 + bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); 952 + 953 + tmp = bpf_arch_text_copy(image, rw_image, size); 954 + if (IS_ERR(tmp)) 955 + ret = PTR_ERR(tmp); 956 + 957 + out: 958 + kvfree(rw_image); 959 + return ret; 960 + } 961 + 962 + static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) 963 + { 964 + ppc_inst_t org_inst; 965 + 966 + if (copy_inst_from_kernel_nofault(&org_inst, ip)) { 967 + pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); 968 + return -EFAULT; 969 + } 970 + 971 + if (!ppc_inst_equal(org_inst, old_inst)) { 972 + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", 973 + (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); 974 + return -EINVAL; 975 + } 976 + 977 + if (ppc_inst_equal(old_inst, new_inst)) 978 + return 0; 979 + 980 + return patch_instruction(ip, new_inst); 981 + } 982 + 983 + static void do_isync(void *info __maybe_unused) 984 + { 985 + isync(); 986 + } 987 + 988 + /* 989 + * A 3-step process for bpf prog entry: 990 + * 1. At bpf prog entry, a single nop/b: 991 + * bpf_func: 992 + * [nop|b] ool_stub 993 + * 2. Out-of-line stub: 994 + * ool_stub: 995 + * mflr r0 996 + * [b|bl] <bpf_prog>/<long_branch_stub> 997 + * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only 998 + * b bpf_func + 4 999 + * 3. Long branch stub: 1000 + * long_branch_stub: 1001 + * .long <branch_addr>/<dummy_tramp> 1002 + * mflr r11 1003 + * bcl 20,31,$+4 1004 + * mflr r12 1005 + * ld r12, -16(r12) 1006 + * mtctr r12 1007 + * mtlr r11 // needed to retain ftrace ABI 1008 + * bctr 1009 + * 1010 + * dummy_tramp is used to reduce synchronization requirements. 1011 + * 1012 + * When attaching a bpf trampoline to a bpf prog, we do not need any 1013 + * synchronization here since we always have a valid branch target regardless 1014 + * of the order in which the above stores are seen. dummy_tramp ensures that 1015 + * the long_branch stub goes to a valid destination on other cpus, even when 1016 + * the branch to the long_branch stub is seen before the updated trampoline 1017 + * address. 1018 + * 1019 + * However, when detaching a bpf trampoline from a bpf prog, or if changing 1020 + * the bpf trampoline address, we need synchronization to ensure that other 1021 + * cpus can no longer branch into the older trampoline so that it can be 1022 + * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus 1023 + * make forward progress, but we still need to ensure that other cpus 1024 + * execute isync (or some CSI) so that they don't go back into the 1025 + * trampoline again. 1026 + */ 1027 + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 1028 + void *old_addr, void *new_addr) 1029 + { 1030 + unsigned long bpf_func, bpf_func_end, size, offset; 1031 + ppc_inst_t old_inst, new_inst; 1032 + int ret = 0, branch_flags; 1033 + char name[KSYM_NAME_LEN]; 1034 + 1035 + if (IS_ENABLED(CONFIG_PPC32)) 1036 + return -EOPNOTSUPP; 1037 + 1038 + bpf_func = (unsigned long)ip; 1039 + branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; 1040 + 1041 + /* We currently only support poking bpf programs */ 1042 + if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { 1043 + pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); 1044 + return -EOPNOTSUPP; 1045 + } 1046 + 1047 + /* 1048 + * If we are not poking at bpf prog entry, then we are simply patching in/out 1049 + * an unconditional branch instruction at im->ip_after_call 1050 + */ 1051 + if (offset) { 1052 + if (poke_type != BPF_MOD_JUMP) { 1053 + pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, 1054 + bpf_func); 1055 + return -EOPNOTSUPP; 1056 + } 1057 + old_inst = ppc_inst(PPC_RAW_NOP()); 1058 + if (old_addr) 1059 + if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) 1060 + return -ERANGE; 1061 + new_inst = ppc_inst(PPC_RAW_NOP()); 1062 + if (new_addr) 1063 + if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) 1064 + return -ERANGE; 1065 + mutex_lock(&text_mutex); 1066 + ret = bpf_modify_inst(ip, old_inst, new_inst); 1067 + mutex_unlock(&text_mutex); 1068 + 1069 + /* Make sure all cpus see the new instruction */ 1070 + smp_call_function(do_isync, NULL, 1); 1071 + return ret; 1072 + } 1073 + 1074 + bpf_func_end = bpf_func + size; 1075 + 1076 + /* Address of the jmp/call instruction in the out-of-line stub */ 1077 + ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); 1078 + 1079 + if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { 1080 + pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, 1081 + bpf_func); 1082 + return -ERANGE; 1083 + } 1084 + 1085 + old_inst = ppc_inst(PPC_RAW_NOP()); 1086 + if (old_addr) { 1087 + if (is_offset_in_branch_range(ip - old_addr)) 1088 + create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); 1089 + else 1090 + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1091 + branch_flags); 1092 + } 1093 + new_inst = ppc_inst(PPC_RAW_NOP()); 1094 + if (new_addr) { 1095 + if (is_offset_in_branch_range(ip - new_addr)) 1096 + create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); 1097 + else 1098 + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, 1099 + branch_flags); 1100 + } 1101 + 1102 + mutex_lock(&text_mutex); 1103 + 1104 + /* 1105 + * 1. Update the address in the long branch stub: 1106 + * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr 1107 + * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. 1108 + */ 1109 + if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || 1110 + (old_addr && !is_offset_in_branch_range(old_addr - ip))) 1111 + ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), 1112 + (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? 1113 + (unsigned long)new_addr : (unsigned long)dummy_tramp); 1114 + if (ret) 1115 + goto out; 1116 + 1117 + /* 2. Update the branch/call in the out-of-line stub */ 1118 + ret = bpf_modify_inst(ip, old_inst, new_inst); 1119 + if (ret) 1120 + goto out; 1121 + 1122 + /* 3. Update instruction at bpf prog entry */ 1123 + ip = (void *)bpf_func; 1124 + if (!old_addr || !new_addr) { 1125 + if (!old_addr) { 1126 + old_inst = ppc_inst(PPC_RAW_NOP()); 1127 + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1128 + } else { 1129 + new_inst = ppc_inst(PPC_RAW_NOP()); 1130 + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); 1131 + } 1132 + ret = bpf_modify_inst(ip, old_inst, new_inst); 1133 + } 1134 + 1135 + out: 1136 + mutex_unlock(&text_mutex); 1137 + 1138 + /* 1139 + * Sync only if we are not attaching a trampoline to a bpf prog so the older 1140 + * trampoline can be freed safely. 1141 + */ 1142 + if (old_addr) 1143 + smp_call_function(do_isync, NULL, 1); 1144 + 1145 + return ret; 441 1146 }
+6 -1
arch/powerpc/net/bpf_jit_comp32.c
··· 127 127 { 128 128 int i; 129 129 130 + /* Instruction for trampoline attach */ 131 + EMIT(PPC_RAW_NOP()); 132 + 130 133 /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ 131 134 if (ctx->seen & SEEN_TAILCALL) 132 135 EMIT(PPC_RAW_LI(_R4, 0)); 133 136 else 134 137 EMIT(PPC_RAW_NOP()); 135 138 136 - #define BPF_TAILCALL_PROLOGUE_SIZE 4 139 + #define BPF_TAILCALL_PROLOGUE_SIZE 8 137 140 138 141 if (bpf_has_stack_frame(ctx)) 139 142 EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); ··· 201 198 bpf_jit_emit_common_epilogue(image, ctx); 202 199 203 200 EMIT(PPC_RAW_BLR()); 201 + 202 + bpf_jit_build_fentry_stubs(image, ctx); 204 203 } 205 204 206 205 /* Relative offset needs to be calculated based on final image location */
+8 -3
arch/powerpc/net/bpf_jit_comp64.c
··· 84 84 } 85 85 86 86 /* 87 - * When not setting up our own stackframe, the redzone usage is: 87 + * When not setting up our own stackframe, the redzone (288 bytes) usage is: 88 88 * 89 89 * [ prev sp ] <------------- 90 90 * [ ... ] | ··· 92 92 * [ nv gpr save area ] 5*8 93 93 * [ tail_call_cnt ] 8 94 94 * [ local_tmp_var ] 16 95 - * [ unused red zone ] 208 bytes protected 95 + * [ unused red zone ] 224 96 96 */ 97 97 static int bpf_jit_stack_local(struct codegen_context *ctx) 98 98 { ··· 125 125 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 126 126 { 127 127 int i; 128 + 129 + /* Instruction for trampoline attach */ 130 + EMIT(PPC_RAW_NOP()); 128 131 129 132 #ifndef CONFIG_PPC_KERNEL_PCREL 130 133 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) ··· 203 200 EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); 204 201 205 202 EMIT(PPC_RAW_BLR()); 203 + 204 + bpf_jit_build_fentry_stubs(image, ctx); 206 205 } 207 206 208 207 int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) ··· 308 303 */ 309 304 int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); 310 305 int b2p_index = bpf_to_ppc(BPF_REG_3); 311 - int bpf_tailcall_prologue_size = 8; 306 + int bpf_tailcall_prologue_size = 12; 312 307 313 308 if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) 314 309 bpf_tailcall_prologue_size += 4; /* skip past the toc load */