Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"Mixed bugfixes. Perhaps the most interesting one is a latent bug that
was finally triggered by PCID support"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm/x86: Handle async PF in RCU read-side critical sections
KVM: nVMX: Fix nested #PF intends to break L1's vmlauch/vmresume
KVM: VMX: use cmpxchg64
KVM: VMX: simplify and fix vmx_vcpu_pi_load
KVM: VMX: avoid double list add with VT-d posted interrupts
KVM: VMX: extract __pi_post_block
KVM: PPC: Book3S HV: Check for updated HDSISR on P9 HDSI exception
KVM: nVMX: fix HOST_CR3/HOST_CR4 cache

+116 -107
+13 -1
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 1121 1121 BEGIN_FTR_SECTION 1122 1122 mtspr SPRN_PPR, r0 1123 1123 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1124 + 1125 + /* Move canary into DSISR to check for later */ 1126 + BEGIN_FTR_SECTION 1127 + li r0, 0x7fff 1128 + mtspr SPRN_HDSISR, r0 1129 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1130 + 1124 1131 ld r0, VCPU_GPR(R0)(r4) 1125 1132 ld r4, VCPU_GPR(R4)(r4) 1126 1133 ··· 1963 1956 kvmppc_hdsi: 1964 1957 ld r3, VCPU_KVM(r9) 1965 1958 lbz r0, KVM_RADIX(r3) 1966 - cmpwi r0, 0 1967 1959 mfspr r4, SPRN_HDAR 1968 1960 mfspr r6, SPRN_HDSISR 1961 + BEGIN_FTR_SECTION 1962 + /* Look for DSISR canary. If we find it, retry instruction */ 1963 + cmpdi r6, 0x7fff 1964 + beq 6f 1965 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1966 + cmpwi r0, 0 1969 1967 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ 1970 1968 /* HPTE not found fault or protection fault? */ 1971 1969 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
+2 -1
arch/x86/kernel/kvm.c
··· 140 140 141 141 n.token = token; 142 142 n.cpu = smp_processor_id(); 143 - n.halted = is_idle_task(current) || preempt_count() > 1; 143 + n.halted = is_idle_task(current) || preempt_count() > 1 || 144 + rcu_preempt_depth(); 144 145 init_swait_queue_head(&n.wq); 145 146 hlist_add_head(&n.link, &b->list); 146 147 raw_spin_unlock(&b->lock);
+101 -105
arch/x86/kvm/vmx.c
··· 200 200 int cpu; 201 201 bool launched; 202 202 bool nmi_known_unmasked; 203 + unsigned long vmcs_host_cr3; /* May not match real cr3 */ 204 + unsigned long vmcs_host_cr4; /* May not match real cr4 */ 203 205 struct list_head loaded_vmcss_on_cpu_link; 204 206 }; 205 207 ··· 602 600 int gs_ldt_reload_needed; 603 601 int fs_reload_needed; 604 602 u64 msr_host_bndcfgs; 605 - unsigned long vmcs_host_cr3; /* May not match real cr3 */ 606 - unsigned long vmcs_host_cr4; /* May not match real cr4 */ 607 603 } host_state; 608 604 struct { 609 605 int vm86_active; ··· 2202 2202 struct pi_desc old, new; 2203 2203 unsigned int dest; 2204 2204 2205 - if (!kvm_arch_has_assigned_device(vcpu->kvm) || 2206 - !irq_remapping_cap(IRQ_POSTING_CAP) || 2207 - !kvm_vcpu_apicv_active(vcpu)) 2205 + /* 2206 + * In case of hot-plug or hot-unplug, we may have to undo 2207 + * vmx_vcpu_pi_put even if there is no assigned device. And we 2208 + * always keep PI.NDST up to date for simplicity: it makes the 2209 + * code easier, and CPU migration is not a fast path. 2210 + */ 2211 + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 2208 2212 return; 2209 2213 2214 + /* 2215 + * First handle the simple case where no cmpxchg is necessary; just 2216 + * allow posting non-urgent interrupts. 2217 + * 2218 + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change 2219 + * PI.NDST: pi_post_block will do it for us and the wakeup_handler 2220 + * expects the VCPU to be on the blocked_vcpu_list that matches 2221 + * PI.NDST. 2222 + */ 2223 + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || 2224 + vcpu->cpu == cpu) { 2225 + pi_clear_sn(pi_desc); 2226 + return; 2227 + } 2228 + 2229 + /* The full case. */ 2210 2230 do { 2211 2231 old.control = new.control = pi_desc->control; 2212 2232 2213 - /* 2214 - * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there 2215 - * are two possible cases: 2216 - * 1. After running 'pre_block', context switch 2217 - * happened. For this case, 'sn' was set in 2218 - * vmx_vcpu_put(), so we need to clear it here. 2219 - * 2. After running 'pre_block', we were blocked, 2220 - * and woken up by some other guy. For this case, 2221 - * we don't need to do anything, 'pi_post_block' 2222 - * will do everything for us. However, we cannot 2223 - * check whether it is case #1 or case #2 here 2224 - * (maybe, not needed), so we also clear sn here, 2225 - * I think it is not a big deal. 2226 - */ 2227 - if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { 2228 - if (vcpu->cpu != cpu) { 2229 - dest = cpu_physical_id(cpu); 2233 + dest = cpu_physical_id(cpu); 2230 2234 2231 - if (x2apic_enabled()) 2232 - new.ndst = dest; 2233 - else 2234 - new.ndst = (dest << 8) & 0xFF00; 2235 - } 2235 + if (x2apic_enabled()) 2236 + new.ndst = dest; 2237 + else 2238 + new.ndst = (dest << 8) & 0xFF00; 2236 2239 2237 - /* set 'NV' to 'notification vector' */ 2238 - new.nv = POSTED_INTR_VECTOR; 2239 - } 2240 - 2241 - /* Allow posting non-urgent interrupts */ 2242 2240 new.sn = 0; 2243 - } while (cmpxchg(&pi_desc->control, old.control, 2244 - new.control) != old.control); 2241 + } while (cmpxchg64(&pi_desc->control, old.control, 2242 + new.control) != old.control); 2245 2243 } 2246 2244 2247 2245 static void decache_tsc_multiplier(struct vcpu_vmx *vmx) ··· 5176 5178 */ 5177 5179 cr3 = __read_cr3(); 5178 5180 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ 5179 - vmx->host_state.vmcs_host_cr3 = cr3; 5181 + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; 5180 5182 5181 5183 /* Save the most likely value for this task's CR4 in the VMCS. */ 5182 5184 cr4 = cr4_read_shadow(); 5183 5185 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 5184 - vmx->host_state.vmcs_host_cr4 = cr4; 5186 + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; 5185 5187 5186 5188 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 5187 5189 #ifdef CONFIG_X86_64 ··· 9271 9273 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 9272 9274 9273 9275 cr3 = __get_current_cr3_fast(); 9274 - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { 9276 + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { 9275 9277 vmcs_writel(HOST_CR3, cr3); 9276 - vmx->host_state.vmcs_host_cr3 = cr3; 9278 + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; 9277 9279 } 9278 9280 9279 9281 cr4 = cr4_read_shadow(); 9280 - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 9282 + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { 9281 9283 vmcs_writel(HOST_CR4, cr4); 9282 - vmx->host_state.vmcs_host_cr4 = cr4; 9284 + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; 9283 9285 } 9284 9286 9285 9287 /* When single-stepping over STI and MOV SS, we must clear the ··· 9589 9591 9590 9592 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; 9591 9593 9594 + /* 9595 + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR 9596 + * or POSTED_INTR_WAKEUP_VECTOR. 9597 + */ 9598 + vmx->pi_desc.nv = POSTED_INTR_VECTOR; 9599 + vmx->pi_desc.sn = 1; 9600 + 9592 9601 return &vmx->vcpu; 9593 9602 9594 9603 free_vmcs: ··· 9844 9839 9845 9840 WARN_ON(!is_guest_mode(vcpu)); 9846 9841 9847 - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { 9842 + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && 9843 + !to_vmx(vcpu)->nested.nested_run_pending) { 9848 9844 vmcs12->vm_exit_intr_error_code = fault->error_code; 9849 9845 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 9850 9846 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | ··· 11710 11704 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); 11711 11705 } 11712 11706 11707 + static void __pi_post_block(struct kvm_vcpu *vcpu) 11708 + { 11709 + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11710 + struct pi_desc old, new; 11711 + unsigned int dest; 11712 + 11713 + do { 11714 + old.control = new.control = pi_desc->control; 11715 + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, 11716 + "Wakeup handler not enabled while the VCPU is blocked\n"); 11717 + 11718 + dest = cpu_physical_id(vcpu->cpu); 11719 + 11720 + if (x2apic_enabled()) 11721 + new.ndst = dest; 11722 + else 11723 + new.ndst = (dest << 8) & 0xFF00; 11724 + 11725 + /* set 'NV' to 'notification vector' */ 11726 + new.nv = POSTED_INTR_VECTOR; 11727 + } while (cmpxchg64(&pi_desc->control, old.control, 11728 + new.control) != old.control); 11729 + 11730 + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { 11731 + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11732 + list_del(&vcpu->blocked_vcpu_list); 11733 + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11734 + vcpu->pre_pcpu = -1; 11735 + } 11736 + } 11737 + 11713 11738 /* 11714 11739 * This routine does the following things for vCPU which is going 11715 11740 * to be blocked if VT-d PI is enabled. ··· 11756 11719 */ 11757 11720 static int pi_pre_block(struct kvm_vcpu *vcpu) 11758 11721 { 11759 - unsigned long flags; 11760 11722 unsigned int dest; 11761 11723 struct pi_desc old, new; 11762 11724 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ··· 11765 11729 !kvm_vcpu_apicv_active(vcpu)) 11766 11730 return 0; 11767 11731 11768 - vcpu->pre_pcpu = vcpu->cpu; 11769 - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, 11770 - vcpu->pre_pcpu), flags); 11771 - list_add_tail(&vcpu->blocked_vcpu_list, 11772 - &per_cpu(blocked_vcpu_on_cpu, 11773 - vcpu->pre_pcpu)); 11774 - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, 11775 - vcpu->pre_pcpu), flags); 11732 + WARN_ON(irqs_disabled()); 11733 + local_irq_disable(); 11734 + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { 11735 + vcpu->pre_pcpu = vcpu->cpu; 11736 + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11737 + list_add_tail(&vcpu->blocked_vcpu_list, 11738 + &per_cpu(blocked_vcpu_on_cpu, 11739 + vcpu->pre_pcpu)); 11740 + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11741 + } 11776 11742 11777 11743 do { 11778 11744 old.control = new.control = pi_desc->control; 11779 - 11780 - /* 11781 - * We should not block the vCPU if 11782 - * an interrupt is posted for it. 11783 - */ 11784 - if (pi_test_on(pi_desc) == 1) { 11785 - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, 11786 - vcpu->pre_pcpu), flags); 11787 - list_del(&vcpu->blocked_vcpu_list); 11788 - spin_unlock_irqrestore( 11789 - &per_cpu(blocked_vcpu_on_cpu_lock, 11790 - vcpu->pre_pcpu), flags); 11791 - vcpu->pre_pcpu = -1; 11792 - 11793 - return 1; 11794 - } 11795 11745 11796 11746 WARN((pi_desc->sn == 1), 11797 11747 "Warning: SN field of posted-interrupts " ··· 11800 11778 11801 11779 /* set 'NV' to 'wakeup vector' */ 11802 11780 new.nv = POSTED_INTR_WAKEUP_VECTOR; 11803 - } while (cmpxchg(&pi_desc->control, old.control, 11804 - new.control) != old.control); 11781 + } while (cmpxchg64(&pi_desc->control, old.control, 11782 + new.control) != old.control); 11805 11783 11806 - return 0; 11784 + /* We should not block the vCPU if an interrupt is posted for it. */ 11785 + if (pi_test_on(pi_desc) == 1) 11786 + __pi_post_block(vcpu); 11787 + 11788 + local_irq_enable(); 11789 + return (vcpu->pre_pcpu == -1); 11807 11790 } 11808 11791 11809 11792 static int vmx_pre_block(struct kvm_vcpu *vcpu) ··· 11824 11797 11825 11798 static void pi_post_block(struct kvm_vcpu *vcpu) 11826 11799 { 11827 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11828 - struct pi_desc old, new; 11829 - unsigned int dest; 11830 - unsigned long flags; 11831 - 11832 - if (!kvm_arch_has_assigned_device(vcpu->kvm) || 11833 - !irq_remapping_cap(IRQ_POSTING_CAP) || 11834 - !kvm_vcpu_apicv_active(vcpu)) 11800 + if (vcpu->pre_pcpu == -1) 11835 11801 return; 11836 11802 11837 - do { 11838 - old.control = new.control = pi_desc->control; 11839 - 11840 - dest = cpu_physical_id(vcpu->cpu); 11841 - 11842 - if (x2apic_enabled()) 11843 - new.ndst = dest; 11844 - else 11845 - new.ndst = (dest << 8) & 0xFF00; 11846 - 11847 - /* Allow posting non-urgent interrupts */ 11848 - new.sn = 0; 11849 - 11850 - /* set 'NV' to 'notification vector' */ 11851 - new.nv = POSTED_INTR_VECTOR; 11852 - } while (cmpxchg(&pi_desc->control, old.control, 11853 - new.control) != old.control); 11854 - 11855 - if(vcpu->pre_pcpu != -1) { 11856 - spin_lock_irqsave( 11857 - &per_cpu(blocked_vcpu_on_cpu_lock, 11858 - vcpu->pre_pcpu), flags); 11859 - list_del(&vcpu->blocked_vcpu_list); 11860 - spin_unlock_irqrestore( 11861 - &per_cpu(blocked_vcpu_on_cpu_lock, 11862 - vcpu->pre_pcpu), flags); 11863 - vcpu->pre_pcpu = -1; 11864 - } 11803 + WARN_ON(irqs_disabled()); 11804 + local_irq_disable(); 11805 + __pi_post_block(vcpu); 11806 + local_irq_enable(); 11865 11807 } 11866 11808 11867 11809 static void vmx_post_block(struct kvm_vcpu *vcpu)