Merge tag 'powerpc-4.17-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

+1 -1

arch/powerpc/include/asm/powernv.h

··· 15 15 extern void powernv_set_nmmu_ptcr(unsigned long ptcr); 16 16 extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, 17 17 unsigned long flags, 18 - struct npu_context *(*cb)(struct npu_context *, void *), 18 + void (*cb)(struct npu_context *, void *), 19 19 void *priv); 20 20 extern void pnv_npu2_destroy_context(struct npu_context *context, 21 21 struct pci_dev *gpdev);

+2 -5

arch/powerpc/kernel/mce_power.c

··· 441 441 if (pfn != ULONG_MAX) { 442 442 *phys_addr = 443 443 (pfn << PAGE_SHIFT); 444 - handled = 1; 445 444 } 446 445 } 447 446 } ··· 531 532 * kernel/exception-64s.h 532 533 */ 533 534 if (get_paca()->in_mce < MAX_MCE_DEPTH) 534 - if (!mce_find_instr_ea_and_pfn(regs, addr, 535 - phys_addr)) 536 - handled = 1; 535 + mce_find_instr_ea_and_pfn(regs, addr, phys_addr); 537 536 } 538 537 found = 1; 539 538 } ··· 569 572 const struct mce_ierror_table itable[]) 570 573 { 571 574 struct mce_error_info mce_err = { 0 }; 572 - uint64_t addr, phys_addr; 575 + uint64_t addr, phys_addr = ULONG_MAX; 573 576 uint64_t srr1 = regs->msr; 574 577 long handled; 575 578

+42 -7

arch/powerpc/kernel/smp.c

··· 566 566 #endif 567 567 568 568 #ifdef CONFIG_NMI_IPI 569 - static void stop_this_cpu(struct pt_regs *regs) 570 - #else 569 + static void nmi_stop_this_cpu(struct pt_regs *regs) 570 + { 571 + /* 572 + * This is a special case because it never returns, so the NMI IPI 573 + * handling would never mark it as done, which makes any later 574 + * smp_send_nmi_ipi() call spin forever. Mark it done now. 575 + * 576 + * IRQs are already hard disabled by the smp_handle_nmi_ipi. 577 + */ 578 + nmi_ipi_lock(); 579 + nmi_ipi_busy_count--; 580 + nmi_ipi_unlock(); 581 + 582 + /* Remove this CPU */ 583 + set_cpu_online(smp_processor_id(), false); 584 + 585 + spin_begin(); 586 + while (1) 587 + spin_cpu_relax(); 588 + } 589 + 590 + void smp_send_stop(void) 591 + { 592 + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); 593 + } 594 + 595 + #else /* CONFIG_NMI_IPI */ 596 + 571 597 static void stop_this_cpu(void *dummy) 572 - #endif 573 598 { 574 599 /* Remove this CPU */ 575 600 set_cpu_online(smp_processor_id(), false); ··· 607 582 608 583 void smp_send_stop(void) 609 584 { 610 - #ifdef CONFIG_NMI_IPI 611 - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000); 612 - #else 585 + static bool stopped = false; 586 + 587 + /* 588 + * Prevent waiting on csd lock from a previous smp_send_stop. 589 + * This is racy, but in general callers try to do the right 590 + * thing and only fire off one smp_send_stop (e.g., see 591 + * kernel/panic.c) 592 + */ 593 + if (stopped) 594 + return; 595 + 596 + stopped = true; 597 + 613 598 smp_call_function(stop_this_cpu, NULL, 0); 614 - #endif 615 599 } 600 + #endif /* CONFIG_NMI_IPI */ 616 601 617 602 struct thread_info *current_set[NR_CPUS]; 618 603

+7

arch/powerpc/kvm/booke.c

··· 305 305 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); 306 306 } 307 307 308 + #ifdef CONFIG_ALTIVEC 309 + void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu) 310 + { 311 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL); 312 + } 313 + #endif 314 + 308 315 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 309 316 { 310 317 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);

+2

arch/powerpc/mm/mem.c

··· 133 133 start, start + size, rc); 134 134 return -EFAULT; 135 135 } 136 + flush_inval_dcache_range(start, start + size); 136 137 137 138 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 138 139 } ··· 160 159 161 160 /* Remove htab bolted mappings for this section of memory */ 162 161 start = (unsigned long)__va(start); 162 + flush_inval_dcache_range(start, start + size); 163 163 ret = remove_section_mapping(start, start + size); 164 164 165 165 /* Ensure all vmalloc mappings are flushed in case they also

-17

arch/powerpc/platforms/powernv/memtrace.c

··· 82 82 .open = simple_open, 83 83 }; 84 84 85 - static void flush_memory_region(u64 base, u64 size) 86 - { 87 - unsigned long line_size = ppc64_caches.l1d.size; 88 - u64 end = base + size; 89 - u64 addr; 90 - 91 - base = round_down(base, line_size); 92 - end = round_up(end, line_size); 93 - 94 - for (addr = base; addr < end; addr += line_size) 95 - asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory"); 96 - } 97 - 98 85 static int check_memblock_online(struct memory_block *mem, void *arg) 99 86 { 100 87 if (mem->state != MEM_ONLINE) ··· 118 131 119 132 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, 120 133 change_memblock_state); 121 - 122 - /* RCU grace period? */ 123 - flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT), 124 - nr_pages << PAGE_SHIFT); 125 134 126 135 lock_device_hotplug(); 127 136 remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);

+73 -15

arch/powerpc/platforms/powernv/npu-dma.c

··· 34 34 #define npu_to_phb(x) container_of(x, struct pnv_phb, npu) 35 35 36 36 /* 37 + * spinlock to protect initialisation of an npu_context for a particular 38 + * mm_struct. 39 + */ 40 + static DEFINE_SPINLOCK(npu_context_lock); 41 + 42 + /* 43 + * When an address shootdown range exceeds this threshold we invalidate the 44 + * entire TLB on the GPU for the given PID rather than each specific address in 45 + * the range. 46 + */ 47 + #define ATSD_THRESHOLD (2*1024*1024) 48 + 49 + /* 37 50 * Other types of TCE cache invalidation are not functional in the 38 51 * hardware. 39 52 */ ··· 414 401 bool nmmu_flush; 415 402 416 403 /* Callback to stop translation requests on a given GPU */ 417 - struct npu_context *(*release_cb)(struct npu_context *, void *); 404 + void (*release_cb)(struct npu_context *context, void *priv); 418 405 419 406 /* 420 407 * Private pointer passed to the above callback for usage by ··· 684 671 struct npu_context *npu_context = mn_to_npu_context(mn); 685 672 unsigned long address; 686 673 687 - for (address = start; address < end; address += PAGE_SIZE) 688 - mmio_invalidate(npu_context, 1, address, false); 674 + if (end - start > ATSD_THRESHOLD) { 675 + /* 676 + * Just invalidate the entire PID if the address range is too 677 + * large. 678 + */ 679 + mmio_invalidate(npu_context, 0, 0, true); 680 + } else { 681 + for (address = start; address < end; address += PAGE_SIZE) 682 + mmio_invalidate(npu_context, 1, address, false); 689 683 690 - /* Do the flush only on the final addess == end */ 691 - mmio_invalidate(npu_context, 1, address, true); 684 + /* Do the flush only on the final addess == end */ 685 + mmio_invalidate(npu_context, 1, address, true); 686 + } 692 687 } 693 688 694 689 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { ··· 717 696 * Returns an error if there no contexts are currently available or a 718 697 * npu_context which should be passed to pnv_npu2_handle_fault(). 719 698 * 720 - * mmap_sem must be held in write mode. 699 + * mmap_sem must be held in write mode and must not be called from interrupt 700 + * context. 721 701 */ 722 702 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, 723 703 unsigned long flags, 724 - struct npu_context *(*cb)(struct npu_context *, void *), 704 + void (*cb)(struct npu_context *, void *), 725 705 void *priv) 726 706 { 727 707 int rc; ··· 765 743 /* 766 744 * Setup the NPU context table for a particular GPU. These need to be 767 745 * per-GPU as we need the tables to filter ATSDs when there are no 768 - * active contexts on a particular GPU. 746 + * active contexts on a particular GPU. It is safe for these to be 747 + * called concurrently with destroy as the OPAL call takes appropriate 748 + * locks and refcounts on init/destroy. 769 749 */ 770 750 rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, 771 751 PCI_DEVID(gpdev->bus->number, gpdev->devfn)); ··· 778 754 * We store the npu pci device so we can more easily get at the 779 755 * associated npus. 780 756 */ 757 + spin_lock(&npu_context_lock); 781 758 npu_context = mm->context.npu_context; 759 + if (npu_context) { 760 + if (npu_context->release_cb != cb || 761 + npu_context->priv != priv) { 762 + spin_unlock(&npu_context_lock); 763 + opal_npu_destroy_context(nphb->opal_id, mm->context.id, 764 + PCI_DEVID(gpdev->bus->number, 765 + gpdev->devfn)); 766 + return ERR_PTR(-EINVAL); 767 + } 768 + 769 + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); 770 + } 771 + spin_unlock(&npu_context_lock); 772 + 782 773 if (!npu_context) { 774 + /* 775 + * We can set up these fields without holding the 776 + * npu_context_lock as the npu_context hasn't been returned to 777 + * the caller meaning it can't be destroyed. Parallel allocation 778 + * is protected against by mmap_sem. 779 + */ 783 780 rc = -ENOMEM; 784 781 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); 785 782 if (npu_context) { ··· 819 774 } 820 775 821 776 mm->context.npu_context = npu_context; 822 - } else { 823 - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); 824 777 } 825 778 826 779 npu_context->release_cb = cb; ··· 857 814 mm_context_remove_copro(npu_context->mm); 858 815 859 816 npu_context->mm->context.npu_context = NULL; 860 - mmu_notifier_unregister(&npu_context->mn, 861 - npu_context->mm); 862 - 863 - kfree(npu_context); 864 817 } 865 818 819 + /* 820 + * Destroy a context on the given GPU. May free the npu_context if it is no 821 + * longer active on any GPUs. Must not be called from interrupt context. 822 + */ 866 823 void pnv_npu2_destroy_context(struct npu_context *npu_context, 867 824 struct pci_dev *gpdev) 868 825 { 826 + int removed; 869 827 struct pnv_phb *nphb; 870 828 struct npu *npu; 871 829 struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); ··· 888 844 WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); 889 845 opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, 890 846 PCI_DEVID(gpdev->bus->number, gpdev->devfn)); 891 - kref_put(&npu_context->kref, pnv_npu2_release_context); 847 + spin_lock(&npu_context_lock); 848 + removed = kref_put(&npu_context->kref, pnv_npu2_release_context); 849 + spin_unlock(&npu_context_lock); 850 + 851 + /* 852 + * We need to do this outside of pnv_npu2_release_context so that it is 853 + * outside the spinlock as mmu_notifier_destroy uses SRCU. 854 + */ 855 + if (removed) { 856 + mmu_notifier_unregister(&npu_context->mn, 857 + npu_context->mm); 858 + 859 + kfree(npu_context); 860 + } 861 + 892 862 } 893 863 EXPORT_SYMBOL(pnv_npu2_destroy_context); 894 864

+5 -3

arch/powerpc/platforms/powernv/opal-rtc.c

··· 48 48 49 49 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 50 50 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); 51 - if (rc == OPAL_BUSY_EVENT) 51 + if (rc == OPAL_BUSY_EVENT) { 52 + mdelay(OPAL_BUSY_DELAY_MS); 52 53 opal_poll_events(NULL); 53 - else if (rc == OPAL_BUSY) 54 - mdelay(10); 54 + } else if (rc == OPAL_BUSY) { 55 + mdelay(OPAL_BUSY_DELAY_MS); 56 + } 55 57 } 56 58 if (rc != OPAL_SUCCESS) 57 59 return 0;

+11 -3

drivers/cpufreq/powernv-cpufreq.c

··· 679 679 680 680 if (!spin_trylock(&gpstates->gpstate_lock)) 681 681 return; 682 + /* 683 + * If the timer has migrated to the different cpu then bring 684 + * it back to one of the policy->cpus 685 + */ 686 + if (!cpumask_test_cpu(raw_smp_processor_id(), policy->cpus)) { 687 + gpstates->timer.expires = jiffies + msecs_to_jiffies(1); 688 + add_timer_on(&gpstates->timer, cpumask_first(policy->cpus)); 689 + spin_unlock(&gpstates->gpstate_lock); 690 + return; 691 + } 682 692 683 693 /* 684 694 * If PMCR was last updated was using fast_swtich then ··· 728 718 if (gpstate_idx != gpstates->last_lpstate_idx) 729 719 queue_gpstate_timer(gpstates); 730 720 721 + set_pstate(&freq_data); 731 722 spin_unlock(&gpstates->gpstate_lock); 732 - 733 - /* Timer may get migrated to a different cpu on cpu hot unplug */ 734 - smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1); 735 723 } 736 724 737 725 /*

+23 -14

drivers/rtc/rtc-opal.c

··· 57 57 58 58 static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) 59 59 { 60 - long rc = OPAL_BUSY; 60 + s64 rc = OPAL_BUSY; 61 61 int retries = 10; 62 62 u32 y_m_d; 63 63 u64 h_m_s_ms; ··· 66 66 67 67 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 68 68 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); 69 - if (rc == OPAL_BUSY_EVENT) 69 + if (rc == OPAL_BUSY_EVENT) { 70 + msleep(OPAL_BUSY_DELAY_MS); 70 71 opal_poll_events(NULL); 71 - else if (retries-- && (rc == OPAL_HARDWARE 72 - || rc == OPAL_INTERNAL_ERROR)) 73 - msleep(10); 74 - else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) 75 - break; 72 + } else if (rc == OPAL_BUSY) { 73 + msleep(OPAL_BUSY_DELAY_MS); 74 + } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) { 75 + if (retries--) { 76 + msleep(10); /* Wait 10ms before retry */ 77 + rc = OPAL_BUSY; /* go around again */ 78 + } 79 + } 76 80 } 77 81 78 82 if (rc != OPAL_SUCCESS) ··· 91 87 92 88 static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) 93 89 { 94 - long rc = OPAL_BUSY; 90 + s64 rc = OPAL_BUSY; 95 91 int retries = 10; 96 92 u32 y_m_d = 0; 97 93 u64 h_m_s_ms = 0; 98 94 99 95 tm_to_opal(tm, &y_m_d, &h_m_s_ms); 96 + 100 97 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 101 98 rc = opal_rtc_write(y_m_d, h_m_s_ms); 102 - if (rc == OPAL_BUSY_EVENT) 99 + if (rc == OPAL_BUSY_EVENT) { 100 + msleep(OPAL_BUSY_DELAY_MS); 103 101 opal_poll_events(NULL); 104 - else if (retries-- && (rc == OPAL_HARDWARE 105 - || rc == OPAL_INTERNAL_ERROR)) 106 - msleep(10); 107 - else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) 108 - break; 102 + } else if (rc == OPAL_BUSY) { 103 + msleep(OPAL_BUSY_DELAY_MS); 104 + } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) { 105 + if (retries--) { 106 + msleep(10); /* Wait 10ms before retry */ 107 + rc = OPAL_BUSY; /* go around again */ 108 + } 109 + } 109 110 } 110 111 111 112 return rc == OPAL_SUCCESS ? 0 : -EIO;

Configure Feed

Configure Feed