Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
"Some more powerpc fixes for 4.12. Most of these actually came in last
week but got held up for some more testing.

- three fixes for kprobes/ftrace/livepatch interactions.

- properly handle data breakpoints when using the Radix MMU.

- fix for perf sampling of registers during call_usermodehelper().

- properly initialise the thread_info on our emergency stacks

- add an explicit flush when doing TLB invalidations for a process
using NPU2.

Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi
Bangoria, Masami Hiramatsu"

* tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
powerpc/64: Initialise thread_info for emergency stacks
powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
powerpc/perf: Fix oops when kthread execs user process
powerpc/64s: Handle data breakpoints in Radix mode
powerpc/kprobes: Skip livepatch_handler() for jprobes
powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS
powerpc/kprobes: Pause function_graph tracing during jprobes handling

+166 -50
+1
arch/powerpc/include/asm/kprobes.h
··· 103 103 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 104 104 extern int kprobe_handler(struct pt_regs *regs); 105 105 extern int kprobe_post_handler(struct pt_regs *regs); 106 + extern int is_current_kprobe_addr(unsigned long addr); 106 107 #ifdef CONFIG_KPROBES_ON_FTRACE 107 108 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, 108 109 struct kprobe_ctlblk *kcb);
+7 -4
arch/powerpc/kernel/exceptions-64s.S
··· 1411 1411 .balign IFETCH_ALIGN_BYTES 1412 1412 do_hash_page: 1413 1413 #ifdef CONFIG_PPC_STD_MMU_64 1414 - andis. r0,r4,0xa410 /* weird error? */ 1414 + andis. r0,r4,0xa450 /* weird error? */ 1415 1415 bne- handle_page_fault /* if not, try to insert a HPTE */ 1416 - andis. r0,r4,DSISR_DABRMATCH@h 1417 - bne- handle_dabr_fault 1418 1416 CURRENT_THREAD_INFO(r11, r1) 1419 1417 lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ 1420 1418 andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ ··· 1436 1438 1437 1439 /* Error */ 1438 1440 blt- 13f 1441 + 1442 + /* Reload DSISR into r4 for the DABR check below */ 1443 + ld r4,_DSISR(r1) 1439 1444 #endif /* CONFIG_PPC_STD_MMU_64 */ 1440 1445 1441 1446 /* Here we have a page fault that hash_page can't handle. */ 1442 1447 handle_page_fault: 1443 - 11: ld r4,_DAR(r1) 1448 + 11: andis. r0,r4,DSISR_DABRMATCH@h 1449 + bne- handle_dabr_fault 1450 + ld r4,_DAR(r1) 1444 1451 ld r5,_DSISR(r1) 1445 1452 addi r3,r1,STACK_FRAME_OVERHEAD 1446 1453 bl do_page_fault
+17
arch/powerpc/kernel/kprobes.c
··· 43 43 44 44 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; 45 45 46 + int is_current_kprobe_addr(unsigned long addr) 47 + { 48 + struct kprobe *p = kprobe_running(); 49 + return (p && (unsigned long)p->addr == addr) ? 1 : 0; 50 + } 51 + 46 52 bool arch_within_kprobe_blacklist(unsigned long addr) 47 53 { 48 54 return (addr >= (unsigned long)__kprobes_text_start && ··· 623 617 regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); 624 618 #endif 625 619 620 + /* 621 + * jprobes use jprobe_return() which skips the normal return 622 + * path of the function, and this messes up the accounting of the 623 + * function graph tracer. 624 + * 625 + * Pause function graph tracing while performing the jprobe function. 626 + */ 627 + pause_graph_tracing(); 628 + 626 629 return 1; 627 630 } 628 631 NOKPROBE_SYMBOL(setjmp_pre_handler); ··· 657 642 * saved regs... 658 643 */ 659 644 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); 645 + /* It's OK to start function graph tracing again */ 646 + unpause_graph_tracing(); 660 647 preempt_enable_no_resched(); 661 648 return 1; 662 649 }
+28 -3
arch/powerpc/kernel/setup_64.c
··· 616 616 #endif 617 617 618 618 /* 619 + * Emergency stacks are used for a range of things, from asynchronous 620 + * NMIs (system reset, machine check) to synchronous, process context. 621 + * We set preempt_count to zero, even though that isn't necessarily correct. To 622 + * get the right value we'd need to copy it from the previous thread_info, but 623 + * doing that might fault causing more problems. 624 + * TODO: what to do with accounting? 625 + */ 626 + static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) 627 + { 628 + ti->task = NULL; 629 + ti->cpu = cpu; 630 + ti->preempt_count = 0; 631 + ti->local_flags = 0; 632 + ti->flags = 0; 633 + klp_init_thread_info(ti); 634 + } 635 + 636 + /* 619 637 * Stack space used when we detect a bad kernel stack pointer, and 620 638 * early in SMP boots before relocation is enabled. Exclusive emergency 621 639 * stack for machine checks. ··· 651 633 * Since we use these as temporary stacks during secondary CPU 652 634 * bringup, we need to get at them in real mode. This means they 653 635 * must also be within the RMO region. 636 + * 637 + * The IRQ stacks allocated elsewhere in this file are zeroed and 638 + * initialized in kernel/irq.c. These are initialized here in order 639 + * to have emergency stacks available as early as possible. 654 640 */ 655 641 limit = min(safe_stack_limit(), ppc64_rma_size); 656 642 657 643 for_each_possible_cpu(i) { 658 644 struct thread_info *ti; 659 645 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 660 - klp_init_thread_info(ti); 646 + memset(ti, 0, THREAD_SIZE); 647 + emerg_stack_init_thread_info(ti, i); 661 648 paca[i].emergency_sp = (void *)ti + THREAD_SIZE; 662 649 663 650 #ifdef CONFIG_PPC_BOOK3S_64 664 651 /* emergency stack for NMI exception handling. */ 665 652 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 666 - klp_init_thread_info(ti); 653 + memset(ti, 0, THREAD_SIZE); 654 + emerg_stack_init_thread_info(ti, i); 667 655 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; 668 656 669 657 /* emergency stack for machine check exception handling. */ 670 658 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 671 - klp_init_thread_info(ti); 659 + memset(ti, 0, THREAD_SIZE); 660 + emerg_stack_init_thread_info(ti, i); 672 661 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; 673 662 #endif 674 663 }
+46 -13
arch/powerpc/kernel/trace/ftrace_64_mprofile.S
··· 45 45 stdu r1,-SWITCH_FRAME_SIZE(r1) 46 46 47 47 /* Save all gprs to pt_regs */ 48 - SAVE_8GPRS(0,r1) 49 - SAVE_8GPRS(8,r1) 50 - SAVE_8GPRS(16,r1) 51 - SAVE_8GPRS(24,r1) 48 + SAVE_GPR(0, r1) 49 + SAVE_10GPRS(2, r1) 50 + SAVE_10GPRS(12, r1) 51 + SAVE_10GPRS(22, r1) 52 + 53 + /* Save previous stack pointer (r1) */ 54 + addi r8, r1, SWITCH_FRAME_SIZE 55 + std r8, GPR1(r1) 52 56 53 57 /* Load special regs for save below */ 54 58 mfmsr r8 ··· 99 95 bl ftrace_stub 100 96 nop 101 97 102 - /* Load ctr with the possibly modified NIP */ 103 - ld r3, _NIP(r1) 104 - mtctr r3 98 + /* Load the possibly modified NIP */ 99 + ld r15, _NIP(r1) 100 + 105 101 #ifdef CONFIG_LIVEPATCH 106 - cmpd r14,r3 /* has NIP been altered? */ 102 + cmpd r14, r15 /* has NIP been altered? */ 107 103 #endif 108 104 105 + #if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) 106 + /* NIP has not been altered, skip over further checks */ 107 + beq 1f 108 + 109 + /* Check if there is an active kprobe on us */ 110 + subi r3, r14, 4 111 + bl is_current_kprobe_addr 112 + nop 113 + 114 + /* 115 + * If r3 == 1, then this is a kprobe/jprobe. 116 + * else, this is livepatched function. 117 + * 118 + * The conditional branch for livepatch_handler below will use the 119 + * result of this comparison. For kprobe/jprobe, we just need to branch to 120 + * the new NIP, not call livepatch_handler. The branch below is bne, so we 121 + * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want 122 + * CR0[EQ] = (r3 == 1). 123 + */ 124 + cmpdi r3, 1 125 + 1: 126 + #endif 127 + 128 + /* Load CTR with the possibly modified NIP */ 129 + mtctr r15 130 + 109 131 /* Restore gprs */ 110 - REST_8GPRS(0,r1) 111 - REST_8GPRS(8,r1) 112 - REST_8GPRS(16,r1) 113 - REST_8GPRS(24,r1) 132 + REST_GPR(0,r1) 133 + REST_10GPRS(2,r1) 134 + REST_10GPRS(12,r1) 135 + REST_10GPRS(22,r1) 114 136 115 137 /* Restore possibly modified LR */ 116 138 ld r0, _LINK(r1) ··· 149 119 addi r1, r1, SWITCH_FRAME_SIZE 150 120 151 121 #ifdef CONFIG_LIVEPATCH 152 - /* Based on the cmpd above, if the NIP was altered handle livepatch */ 122 + /* 123 + * Based on the cmpd or cmpdi above, if the NIP was altered and we're 124 + * not on a kprobe/jprobe, then handle livepatch. 125 + */ 153 126 bne- livepatch_handler 154 127 #endif 155 128
+2 -1
arch/powerpc/perf/perf_regs.c
··· 101 101 struct pt_regs *regs_user_copy) 102 102 { 103 103 regs_user->regs = task_pt_regs(current); 104 - regs_user->abi = perf_reg_abi(current); 104 + regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : 105 + PERF_SAMPLE_REGS_ABI_NONE; 105 106 }
+65 -29
arch/powerpc/platforms/powernv/npu-dma.c
··· 449 449 return mmio_atsd_reg; 450 450 } 451 451 452 - static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) 452 + static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) 453 453 { 454 454 unsigned long launch; 455 455 ··· 465 465 /* PID */ 466 466 launch |= pid << PPC_BITLSHIFT(38); 467 467 468 + /* No flush */ 469 + launch |= !flush << PPC_BITLSHIFT(39); 470 + 468 471 /* Invalidating the entire process doesn't use a va */ 469 472 return mmio_launch_invalidate(npu, launch, 0); 470 473 } 471 474 472 475 static int mmio_invalidate_va(struct npu *npu, unsigned long va, 473 - unsigned long pid) 476 + unsigned long pid, bool flush) 474 477 { 475 478 unsigned long launch; 476 479 ··· 489 486 /* PID */ 490 487 launch |= pid << PPC_BITLSHIFT(38); 491 488 489 + /* No flush */ 490 + launch |= !flush << PPC_BITLSHIFT(39); 491 + 492 492 return mmio_launch_invalidate(npu, launch, va); 493 493 } 494 494 495 495 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) 496 + 497 + struct mmio_atsd_reg { 498 + struct npu *npu; 499 + int reg; 500 + }; 501 + 502 + static void mmio_invalidate_wait( 503 + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) 504 + { 505 + struct npu *npu; 506 + int i, reg; 507 + 508 + /* Wait for all invalidations to complete */ 509 + for (i = 0; i <= max_npu2_index; i++) { 510 + if (mmio_atsd_reg[i].reg < 0) 511 + continue; 512 + 513 + /* Wait for completion */ 514 + npu = mmio_atsd_reg[i].npu; 515 + reg = mmio_atsd_reg[i].reg; 516 + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) 517 + cpu_relax(); 518 + 519 + put_mmio_atsd_reg(npu, reg); 520 + 521 + /* 522 + * The GPU requires two flush ATSDs to ensure all entries have 523 + * been flushed. We use PID 0 as it will never be used for a 524 + * process on the GPU. 525 + */ 526 + if (flush) 527 + mmio_invalidate_pid(npu, 0, true); 528 + } 529 + } 496 530 497 531 /* 498 532 * Invalidate either a single address or an entire PID depending on 499 533 * the value of va. 500 534 */ 501 535 static void mmio_invalidate(struct npu_context *npu_context, int va, 502 - unsigned long address) 536 + unsigned long address, bool flush) 503 537 { 504 - int i, j, reg; 538 + int i, j; 505 539 struct npu *npu; 506 540 struct pnv_phb *nphb; 507 541 struct pci_dev *npdev; 508 - struct { 509 - struct npu *npu; 510 - int reg; 511 - } mmio_atsd_reg[NV_MAX_NPUS]; 542 + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; 512 543 unsigned long pid = npu_context->mm->context.id; 513 544 514 545 /* ··· 562 525 563 526 if (va) 564 527 mmio_atsd_reg[i].reg = 565 - mmio_invalidate_va(npu, address, pid); 528 + mmio_invalidate_va(npu, address, pid, 529 + flush); 566 530 else 567 531 mmio_atsd_reg[i].reg = 568 - mmio_invalidate_pid(npu, pid); 532 + mmio_invalidate_pid(npu, pid, flush); 569 533 570 534 /* 571 535 * The NPU hardware forwards the shootdown to all GPUs ··· 582 544 */ 583 545 flush_tlb_mm(npu_context->mm); 584 546 585 - /* Wait for all invalidations to complete */ 586 - for (i = 0; i <= max_npu2_index; i++) { 587 - if (mmio_atsd_reg[i].reg < 0) 588 - continue; 589 - 590 - /* Wait for completion */ 591 - npu = mmio_atsd_reg[i].npu; 592 - reg = mmio_atsd_reg[i].reg; 593 - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) 594 - cpu_relax(); 595 - put_mmio_atsd_reg(npu, reg); 596 - } 547 + mmio_invalidate_wait(mmio_atsd_reg, flush); 548 + if (flush) 549 + /* Wait for the flush to complete */ 550 + mmio_invalidate_wait(mmio_atsd_reg, false); 597 551 } 598 552 599 553 static void pnv_npu2_mn_release(struct mmu_notifier *mn, ··· 601 571 * There should be no more translation requests for this PID, but we 602 572 * need to ensure any entries for it are removed from the TLB. 603 573 */ 604 - mmio_invalidate(npu_context, 0, 0); 574 + mmio_invalidate(npu_context, 0, 0, true); 605 575 } 606 576 607 577 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, ··· 611 581 { 612 582 struct npu_context *npu_context = mn_to_npu_context(mn); 613 583 614 - mmio_invalidate(npu_context, 1, address); 584 + mmio_invalidate(npu_context, 1, address, true); 615 585 } 616 586 617 587 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, ··· 620 590 { 621 591 struct npu_context *npu_context = mn_to_npu_context(mn); 622 592 623 - mmio_invalidate(npu_context, 1, address); 593 + mmio_invalidate(npu_context, 1, address, true); 624 594 } 625 595 626 596 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, ··· 630 600 struct npu_context *npu_context = mn_to_npu_context(mn); 631 601 unsigned long address; 632 602 633 - for (address = start; address <= end; address += PAGE_SIZE) 634 - mmio_invalidate(npu_context, 1, address); 603 + for (address = start; address < end; address += PAGE_SIZE) 604 + mmio_invalidate(npu_context, 1, address, false); 605 + 606 + /* Do the flush only on the final addess == end */ 607 + mmio_invalidate(npu_context, 1, address, true); 635 608 } 636 609 637 610 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { ··· 684 651 /* No nvlink associated with this GPU device */ 685 652 return ERR_PTR(-ENODEV); 686 653 687 - if (!mm) { 688 - /* kernel thread contexts are not supported */ 654 + if (!mm || mm->context.id == 0) { 655 + /* 656 + * Kernel thread contexts are not supported and context id 0 is 657 + * reserved on the GPU. 658 + */ 689 659 return ERR_PTR(-EINVAL); 690 660 } 691 661