Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'bpf, x64: optimize JIT's pro/epilogue'

Maciej Fijalkowski says:

====================
Hi!

This small set can be considered as a followup after recent addition of
support for tailcalls in bpf subprograms and is focused on optimizing
x64 JIT prologue and epilogue sections.

Turns out the popping tail call counter is not needed anymore and %rsp
handling when stack depth is 0 can be skipped.

For longer explanations, please see commit messages.

Thank you,
Maciej
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+23 -12
+23 -12
arch/x86/net/bpf_jit_comp.c
··· 281 281 EMIT1(0x55); /* push rbp */ 282 282 EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 283 283 /* sub rsp, rounded_stack_depth */ 284 - EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 284 + if (stack_depth) 285 + EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 285 286 if (tail_call_reachable) 286 287 EMIT1(0x50); /* push rax */ 287 288 *pprog = prog; ··· 408 407 int tcc_off = -4 - round_up(stack_depth, 8); 409 408 u8 *prog = *pprog; 410 409 int pop_bytes = 0; 411 - int off1 = 49; 412 - int off2 = 38; 413 - int off3 = 16; 410 + int off1 = 42; 411 + int off2 = 31; 412 + int off3 = 9; 414 413 int cnt = 0; 415 414 416 415 /* count the additional bytes used for popping callee regs from stack ··· 421 420 off1 += pop_bytes; 422 421 off2 += pop_bytes; 423 422 off3 += pop_bytes; 423 + 424 + if (stack_depth) { 425 + off1 += 7; 426 + off2 += 7; 427 + off3 += 7; 428 + } 424 429 425 430 /* 426 431 * rdi - pointer to ctx ··· 472 465 prog = *pprog; 473 466 474 467 EMIT1(0x58); /* pop rax */ 475 - EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ 476 - round_up(stack_depth, 8)); 468 + if (stack_depth) 469 + EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ 470 + round_up(stack_depth, 8)); 477 471 478 472 /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ 479 473 EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */ ··· 499 491 int tcc_off = -4 - round_up(stack_depth, 8); 500 492 u8 *prog = *pprog; 501 493 int pop_bytes = 0; 502 - int off1 = 27; 494 + int off1 = 20; 503 495 int poke_off; 504 496 int cnt = 0; 505 497 ··· 514 506 * total bytes for: 515 507 * - nop5/ jmpq $off 516 508 * - pop callee regs 517 - * - sub rsp, $val 509 + * - sub rsp, $val if depth > 0 518 510 * - pop rax 519 511 */ 520 - poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1; 512 + poke_off = X86_PATCH_SIZE + pop_bytes + 1; 513 + if (stack_depth) { 514 + poke_off += 7; 515 + off1 += 7; 516 + } 521 517 522 518 /* 523 519 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ··· 545 533 pop_callee_regs(pprog, callee_regs_used); 546 534 prog = *pprog; 547 535 EMIT1(0x58); /* pop rax */ 548 - EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); 536 + if (stack_depth) 537 + EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); 549 538 550 539 memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); 551 540 prog += X86_PATCH_SIZE; ··· 1454 1441 /* Update cleanup_addr */ 1455 1442 ctx->cleanup_addr = proglen; 1456 1443 pop_callee_regs(&prog, callee_regs_used); 1457 - if (tail_call_reachable) 1458 - EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */ 1459 1444 EMIT1(0xC9); /* leave */ 1460 1445 EMIT1(0xC3); /* ret */ 1461 1446 break;