Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:

- Yet another race with VM destruction plugged

- A set of small vgic fixes

x86:

- Preserve pending INIT

- RCU fixes in paravirtual async pf, VM teardown, and VMXOFF
emulation

- nVMX interrupt injection and dirty tracking fixes

- initialize to make UBSAN happy"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: arm/arm64: vgic: Use READ_ONCE fo cmpxchg
KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit"
KVM: nVMX: mark vmcs12 pages dirty on L2 exit
kvm: nVMX: don't flush VMCS12 during VMXOFF or VCPU teardown
KVM: nVMX: do not pin the VMCS12
KVM: avoid using rcu_dereference_protected
KVM: X86: init irq->level in kvm_pv_kick_cpu_op
KVM: X86: Fix loss of pending INIT due to race
KVM: async_pf: make rcu irq exit if not triggered from idle task
KVM: nVMX: fixes to nested virt interrupt injection
KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12
KVM: arm/arm64: Handle hva aging while destroying the vm
KVM: arm/arm64: PMU: Fix overflow interrupt injection
KVM: arm/arm64: Fix bug in advertising KVM_CAP_MSI_DEVID capability

+220 -129
+1 -1
arch/arm64/kvm/sys_regs.c
··· 764 764 if (p->is_write) { 765 765 if (r->CRm & 0x2) 766 766 /* accessing PMOVSSET_EL0 */ 767 - kvm_pmu_overflow_set(vcpu, p->regval & mask); 767 + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); 768 768 else 769 769 /* accessing PMOVSCLR_EL0 */ 770 770 vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+4 -2
arch/x86/kernel/kvm.c
··· 151 151 if (hlist_unhashed(&n.link)) 152 152 break; 153 153 154 + rcu_irq_exit(); 155 + 154 156 if (!n.halted) { 155 157 local_irq_enable(); 156 158 schedule(); ··· 161 159 /* 162 160 * We cannot reschedule. So halt. 163 161 */ 164 - rcu_irq_exit(); 165 162 native_safe_halt(); 166 163 local_irq_disable(); 167 - rcu_irq_enter(); 168 164 } 165 + 166 + rcu_irq_enter(); 169 167 } 170 168 if (!n.halted) 171 169 finish_swait(&n.wq, &wait);
+10
arch/x86/kvm/svm.c
··· 2430 2430 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 2431 2431 svm->vmcb->control.exit_code_hi = 0; 2432 2432 svm->vmcb->control.exit_info_1 = error_code; 2433 + 2434 + /* 2435 + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. 2436 + * The fix is to add the ancillary datum (CR2 or DR6) to structs 2437 + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be 2438 + * written only when inject_pending_event runs (DR6 would written here 2439 + * too). This should be conditional on a new capability---if the 2440 + * capability is disabled, kvm_multiple_exception would write the 2441 + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. 2442 + */ 2433 2443 if (svm->vcpu.arch.exception.nested_apf) 2434 2444 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; 2435 2445 else
+163 -74
arch/x86/kvm/vmx.c
··· 416 416 417 417 /* The guest-physical address of the current VMCS L1 keeps for L2 */ 418 418 gpa_t current_vmptr; 419 - /* The host-usable pointer to the above */ 420 - struct page *current_vmcs12_page; 421 - struct vmcs12 *current_vmcs12; 422 419 /* 423 420 * Cache of the guest's VMCS, existing outside of guest memory. 424 421 * Loaded from guest memory during VMPTRLD. Flushed to guest 425 - * memory during VMXOFF, VMCLEAR, VMPTRLD. 422 + * memory during VMCLEAR and VMPTRLD. 426 423 */ 427 424 struct vmcs12 *cached_vmcs12; 428 425 /* ··· 924 927 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 925 928 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 926 929 static int alloc_identity_pagetable(struct kvm *kvm); 930 + static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); 931 + static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); 932 + static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, 933 + u16 error_code); 927 934 928 935 static DEFINE_PER_CPU(struct vmcs *, vmxarea); 929 936 static DEFINE_PER_CPU(struct vmcs *, current_vmcs); ··· 2429 2428 vmx_set_interrupt_shadow(vcpu, 0); 2430 2429 } 2431 2430 2431 + static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, 2432 + unsigned long exit_qual) 2433 + { 2434 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 2435 + unsigned int nr = vcpu->arch.exception.nr; 2436 + u32 intr_info = nr | INTR_INFO_VALID_MASK; 2437 + 2438 + if (vcpu->arch.exception.has_error_code) { 2439 + vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; 2440 + intr_info |= INTR_INFO_DELIVER_CODE_MASK; 2441 + } 2442 + 2443 + if (kvm_exception_is_soft(nr)) 2444 + intr_info |= INTR_TYPE_SOFT_EXCEPTION; 2445 + else 2446 + intr_info |= INTR_TYPE_HARD_EXCEPTION; 2447 + 2448 + if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) && 2449 + vmx_get_nmi_mask(vcpu)) 2450 + intr_info |= INTR_INFO_UNBLOCK_NMI; 2451 + 2452 + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); 2453 + } 2454 + 2432 2455 /* 2433 2456 * KVM wants to inject page-faults which it got to the guest. This function 2434 2457 * checks whether in a nested guest, we need to inject them to L1 or L2. ··· 2462 2437 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 2463 2438 unsigned int nr = vcpu->arch.exception.nr; 2464 2439 2465 - if (!((vmcs12->exception_bitmap & (1u << nr)) || 2466 - (nr == PF_VECTOR && vcpu->arch.exception.nested_apf))) 2467 - return 0; 2440 + if (nr == PF_VECTOR) { 2441 + if (vcpu->arch.exception.nested_apf) { 2442 + nested_vmx_inject_exception_vmexit(vcpu, 2443 + vcpu->arch.apf.nested_apf_token); 2444 + return 1; 2445 + } 2446 + /* 2447 + * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. 2448 + * The fix is to add the ancillary datum (CR2 or DR6) to structs 2449 + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 2450 + * can be written only when inject_pending_event runs. This should be 2451 + * conditional on a new capability---if the capability is disabled, 2452 + * kvm_multiple_exception would write the ancillary information to 2453 + * CR2 or DR6, for backwards ABI-compatibility. 2454 + */ 2455 + if (nested_vmx_is_page_fault_vmexit(vmcs12, 2456 + vcpu->arch.exception.error_code)) { 2457 + nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); 2458 + return 1; 2459 + } 2460 + } else { 2461 + unsigned long exit_qual = 0; 2462 + if (nr == DB_VECTOR) 2463 + exit_qual = vcpu->arch.dr6; 2468 2464 2469 - if (vcpu->arch.exception.nested_apf) { 2470 - vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code); 2471 - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 2472 - PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | 2473 - INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, 2474 - vcpu->arch.apf.nested_apf_token); 2475 - return 1; 2465 + if (vmcs12->exception_bitmap & (1u << nr)) { 2466 + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 2467 + return 1; 2468 + } 2476 2469 } 2477 2470 2478 - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 2479 - vmcs_read32(VM_EXIT_INTR_INFO), 2480 - vmcs_readl(EXIT_QUALIFICATION)); 2481 - return 1; 2471 + return 0; 2482 2472 } 2483 2473 2484 2474 static void vmx_queue_exception(struct kvm_vcpu *vcpu) ··· 2707 2667 * reason is that if one of these bits is necessary, it will appear 2708 2668 * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control 2709 2669 * fields of vmcs01 and vmcs02, will turn these bits off - and 2710 - * nested_vmx_exit_handled() will not pass related exits to L1. 2670 + * nested_vmx_exit_reflected() will not pass related exits to L1. 2711 2671 * These rules have exceptions below. 2712 2672 */ 2713 2673 ··· 4995 4955 return enable_apicv; 4996 4956 } 4997 4957 4958 + static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) 4959 + { 4960 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 4961 + gfn_t gfn; 4962 + 4963 + /* 4964 + * Don't need to mark the APIC access page dirty; it is never 4965 + * written to by the CPU during APIC virtualization. 4966 + */ 4967 + 4968 + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 4969 + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; 4970 + kvm_vcpu_mark_page_dirty(vcpu, gfn); 4971 + } 4972 + 4973 + if (nested_cpu_has_posted_intr(vmcs12)) { 4974 + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; 4975 + kvm_vcpu_mark_page_dirty(vcpu, gfn); 4976 + } 4977 + } 4978 + 4979 + 4998 4980 static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) 4999 4981 { 5000 4982 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 5024 4962 void *vapic_page; 5025 4963 u16 status; 5026 4964 5027 - if (vmx->nested.pi_desc && 5028 - vmx->nested.pi_pending) { 5029 - vmx->nested.pi_pending = false; 5030 - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) 5031 - return; 4965 + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) 4966 + return; 5032 4967 5033 - max_irr = find_last_bit( 5034 - (unsigned long *)vmx->nested.pi_desc->pir, 256); 4968 + vmx->nested.pi_pending = false; 4969 + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) 4970 + return; 5035 4971 5036 - if (max_irr == 256) 5037 - return; 5038 - 4972 + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); 4973 + if (max_irr != 256) { 5039 4974 vapic_page = kmap(vmx->nested.virtual_apic_page); 5040 4975 __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); 5041 4976 kunmap(vmx->nested.virtual_apic_page); ··· 5044 4985 vmcs_write16(GUEST_INTR_STATUS, status); 5045 4986 } 5046 4987 } 4988 + 4989 + nested_mark_vmcs12_pages_dirty(vcpu); 5047 4990 } 5048 4991 5049 4992 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, ··· 7195 7134 return 1; 7196 7135 } 7197 7136 7137 + static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) 7138 + { 7139 + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); 7140 + vmcs_write64(VMCS_LINK_POINTER, -1ull); 7141 + } 7142 + 7198 7143 static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) 7199 7144 { 7200 7145 if (vmx->nested.current_vmptr == -1ull) 7201 - return; 7202 - 7203 - /* current_vmptr and current_vmcs12 are always set/reset together */ 7204 - if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) 7205 7146 return; 7206 7147 7207 7148 if (enable_shadow_vmcs) { ··· 7211 7148 they were modified */ 7212 7149 copy_shadow_to_vmcs12(vmx); 7213 7150 vmx->nested.sync_shadow_vmcs = false; 7214 - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, 7215 - SECONDARY_EXEC_SHADOW_VMCS); 7216 - vmcs_write64(VMCS_LINK_POINTER, -1ull); 7151 + vmx_disable_shadow_vmcs(vmx); 7217 7152 } 7218 7153 vmx->nested.posted_intr_nv = -1; 7219 7154 7220 7155 /* Flush VMCS12 to guest memory */ 7221 - memcpy(vmx->nested.current_vmcs12, vmx->nested.cached_vmcs12, 7222 - VMCS12_SIZE); 7156 + kvm_vcpu_write_guest_page(&vmx->vcpu, 7157 + vmx->nested.current_vmptr >> PAGE_SHIFT, 7158 + vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); 7223 7159 7224 - kunmap(vmx->nested.current_vmcs12_page); 7225 - nested_release_page(vmx->nested.current_vmcs12_page); 7226 7160 vmx->nested.current_vmptr = -1ull; 7227 - vmx->nested.current_vmcs12 = NULL; 7228 7161 } 7229 7162 7230 7163 /* ··· 7234 7175 7235 7176 vmx->nested.vmxon = false; 7236 7177 free_vpid(vmx->nested.vpid02); 7237 - nested_release_vmcs12(vmx); 7178 + vmx->nested.posted_intr_nv = -1; 7179 + vmx->nested.current_vmptr = -1ull; 7238 7180 if (vmx->nested.msr_bitmap) { 7239 7181 free_page((unsigned long)vmx->nested.msr_bitmap); 7240 7182 vmx->nested.msr_bitmap = NULL; 7241 7183 } 7242 7184 if (enable_shadow_vmcs) { 7185 + vmx_disable_shadow_vmcs(vmx); 7243 7186 vmcs_clear(vmx->vmcs01.shadow_vmcs); 7244 7187 free_vmcs(vmx->vmcs01.shadow_vmcs); 7245 7188 vmx->vmcs01.shadow_vmcs = NULL; ··· 7640 7579 } 7641 7580 7642 7581 nested_release_vmcs12(vmx); 7643 - vmx->nested.current_vmcs12 = new_vmcs12; 7644 - vmx->nested.current_vmcs12_page = page; 7645 7582 /* 7646 7583 * Load VMCS12 from guest memory since it is not already 7647 7584 * cached. 7648 7585 */ 7649 - memcpy(vmx->nested.cached_vmcs12, 7650 - vmx->nested.current_vmcs12, VMCS12_SIZE); 7586 + memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); 7587 + kunmap(page); 7588 + nested_release_page_clean(page); 7589 + 7651 7590 set_current_vmptr(vmx, vmptr); 7652 7591 } 7653 7592 ··· 8080 8019 * should handle it ourselves in L0 (and then continue L2). Only call this 8081 8020 * when in is_guest_mode (L2). 8082 8021 */ 8083 - static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) 8022 + static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) 8084 8023 { 8085 8024 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8086 8025 struct vcpu_vmx *vmx = to_vmx(vcpu); 8087 8026 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 8088 - u32 exit_reason = vmx->exit_reason; 8089 8027 8090 8028 trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, 8091 8029 vmcs_readl(EXIT_QUALIFICATION), ··· 8092 8032 intr_info, 8093 8033 vmcs_read32(VM_EXIT_INTR_ERROR_CODE), 8094 8034 KVM_ISA_VMX); 8035 + 8036 + /* 8037 + * The host physical addresses of some pages of guest memory 8038 + * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU 8039 + * may write to these pages via their host physical address while 8040 + * L2 is running, bypassing any address-translation-based dirty 8041 + * tracking (e.g. EPT write protection). 8042 + * 8043 + * Mark them dirty on every exit from L2 to prevent them from 8044 + * getting out of sync with dirty tracking. 8045 + */ 8046 + nested_mark_vmcs12_pages_dirty(vcpu); 8095 8047 8096 8048 if (vmx->nested.nested_run_pending) 8097 8049 return false; ··· 8239 8167 default: 8240 8168 return true; 8241 8169 } 8170 + } 8171 + 8172 + static int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason) 8173 + { 8174 + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 8175 + 8176 + /* 8177 + * At this point, the exit interruption info in exit_intr_info 8178 + * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT 8179 + * we need to query the in-kernel LAPIC. 8180 + */ 8181 + WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); 8182 + if ((exit_intr_info & 8183 + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 8184 + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { 8185 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 8186 + vmcs12->vm_exit_intr_error_code = 8187 + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 8188 + } 8189 + 8190 + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, 8191 + vmcs_readl(EXIT_QUALIFICATION)); 8192 + return 1; 8242 8193 } 8243 8194 8244 8195 static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) ··· 8510 8415 if (vmx->emulation_required) 8511 8416 return handle_invalid_guest_state(vcpu); 8512 8417 8513 - if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 8514 - nested_vmx_vmexit(vcpu, exit_reason, 8515 - vmcs_read32(VM_EXIT_INTR_INFO), 8516 - vmcs_readl(EXIT_QUALIFICATION)); 8517 - return 1; 8518 - } 8418 + if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason)) 8419 + return nested_vmx_reflect_vmexit(vcpu, exit_reason); 8519 8420 8520 8421 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { 8521 8422 dump_vmcs(); ··· 9314 9223 9315 9224 vmx->nested.posted_intr_nv = -1; 9316 9225 vmx->nested.current_vmptr = -1ull; 9317 - vmx->nested.current_vmcs12 = NULL; 9318 9226 9319 9227 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; 9320 9228 ··· 9599 9509 9600 9510 WARN_ON(!is_guest_mode(vcpu)); 9601 9511 9602 - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) 9603 - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, 9604 - vmcs_read32(VM_EXIT_INTR_INFO), 9605 - vmcs_readl(EXIT_QUALIFICATION)); 9606 - else 9512 + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { 9513 + vmcs12->vm_exit_intr_error_code = fault->error_code; 9514 + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 9515 + PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | 9516 + INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK, 9517 + fault->address); 9518 + } else { 9607 9519 kvm_inject_page_fault(vcpu, fault); 9520 + } 9608 9521 } 9609 9522 9610 9523 static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, ··· 10187 10094 * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, 10188 10095 * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when 10189 10096 * !enable_ept, EB.PF is 1, so the "or" will always be 1. 10190 - * 10191 - * A problem with this approach (when !enable_ept) is that L1 may be 10192 - * injected with more page faults than it asked for. This could have 10193 - * caused problems, but in practice existing hypervisors don't care. 10194 - * To fix this, we will need to emulate the PFEC checking (on the L1 10195 - * page tables), using walk_addr(), when injecting PFs to L1. 10196 10097 */ 10197 10098 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 10198 10099 enable_ept ? vmcs12->page_fault_error_code_mask : 0); ··· 10934 10847 10935 10848 vmcs12->vm_exit_reason = exit_reason; 10936 10849 vmcs12->exit_qualification = exit_qualification; 10937 - 10938 10850 vmcs12->vm_exit_intr_info = exit_intr_info; 10939 - if ((vmcs12->vm_exit_intr_info & 10940 - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == 10941 - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) 10942 - vmcs12->vm_exit_intr_error_code = 10943 - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 10851 + 10944 10852 vmcs12->idt_vectoring_info_field = 0; 10945 10853 vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 10946 10854 vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); ··· 11131 11049 11132 11050 vmx_switch_vmcs(vcpu, &vmx->vmcs01); 11133 11051 11134 - if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) 11135 - && nested_exit_intr_ack_set(vcpu)) { 11052 + /* 11053 + * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of 11054 + * the VM-exit interrupt information (valid interrupt) is always set to 11055 + * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need 11056 + * kvm_cpu_has_interrupt(). See the commit message for details. 11057 + */ 11058 + if (nested_exit_intr_ack_set(vcpu) && 11059 + exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && 11060 + kvm_cpu_has_interrupt(vcpu)) { 11136 11061 int irq = kvm_cpu_get_interrupt(vcpu); 11137 11062 WARN_ON(irq < 0); 11138 11063 vmcs12->vm_exit_intr_info = irq |
+12 -8
arch/x86/kvm/x86.c
··· 3159 3159 kvm_set_hflags(vcpu, hflags); 3160 3160 3161 3161 vcpu->arch.smi_pending = events->smi.pending; 3162 - if (events->smi.smm_inside_nmi) 3163 - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 3164 - else 3165 - vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; 3166 - if (lapic_in_kernel(vcpu)) { 3167 - if (events->smi.latched_init) 3168 - set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 3162 + 3163 + if (events->smi.smm) { 3164 + if (events->smi.smm_inside_nmi) 3165 + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 3169 3166 else 3170 - clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 3167 + vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; 3168 + if (lapic_in_kernel(vcpu)) { 3169 + if (events->smi.latched_init) 3170 + set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 3171 + else 3172 + clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 3173 + } 3171 3174 } 3172 3175 } 3173 3176 ··· 6218 6215 6219 6216 lapic_irq.shorthand = 0; 6220 6217 lapic_irq.dest_mode = 0; 6218 + lapic_irq.level = 0; 6221 6219 lapic_irq.dest_id = apicid; 6222 6220 lapic_irq.msi_redir_hint = false; 6223 6221
-2
include/kvm/arm_pmu.h
··· 48 48 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); 49 49 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val); 50 50 void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); 51 - void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val); 52 51 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); 53 52 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); 54 53 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu); ··· 85 86 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} 86 87 static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {} 87 88 static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} 88 - static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {} 89 89 static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} 90 90 static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} 91 91 static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
+4 -2
include/linux/kvm_host.h
··· 477 477 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) 478 478 { 479 479 return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, 480 - lockdep_is_held(&kvm->slots_lock)); 480 + lockdep_is_held(&kvm->slots_lock) || 481 + !refcount_read(&kvm->users_count)); 481 482 } 482 483 483 484 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) ··· 571 570 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) 572 571 { 573 572 return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, 574 - lockdep_is_held(&kvm->slots_lock)); 573 + lockdep_is_held(&kvm->slots_lock) || 574 + !refcount_read(&kvm->users_count)); 575 575 } 576 576 577 577 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+4
virt/kvm/arm/mmu.c
··· 1718 1718 1719 1719 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 1720 1720 { 1721 + if (!kvm->arch.pgd) 1722 + return 0; 1721 1723 trace_kvm_age_hva(start, end); 1722 1724 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); 1723 1725 } 1724 1726 1725 1727 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1726 1728 { 1729 + if (!kvm->arch.pgd) 1730 + return 0; 1727 1731 trace_kvm_test_age_hva(hva); 1728 1732 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); 1729 1733 }
+15 -28
virt/kvm/arm/pmu.c
··· 203 203 return reg; 204 204 } 205 205 206 - static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) 206 + static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 207 207 { 208 208 struct kvm_pmu *pmu = &vcpu->arch.pmu; 209 - bool overflow = !!kvm_pmu_overflow_status(vcpu); 209 + bool overflow; 210 210 211 + if (!kvm_arm_pmu_v3_ready(vcpu)) 212 + return; 213 + 214 + overflow = !!kvm_pmu_overflow_status(vcpu); 211 215 if (pmu->irq_level == overflow) 212 216 return; 213 217 ··· 219 215 220 216 if (likely(irqchip_in_kernel(vcpu->kvm))) { 221 217 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 222 - pmu->irq_num, overflow, 223 - &vcpu->arch.pmu); 218 + pmu->irq_num, overflow, pmu); 224 219 WARN_ON(ret); 225 220 } 226 - } 227 - 228 - /** 229 - * kvm_pmu_overflow_set - set PMU overflow interrupt 230 - * @vcpu: The vcpu pointer 231 - * @val: the value guest writes to PMOVSSET register 232 - */ 233 - void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) 234 - { 235 - if (val == 0) 236 - return; 237 - 238 - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; 239 - kvm_pmu_check_overflow(vcpu); 240 - } 241 - 242 - static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 243 - { 244 - if (!kvm_arm_pmu_v3_ready(vcpu)) 245 - return; 246 - kvm_pmu_check_overflow(vcpu); 247 221 } 248 222 249 223 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) ··· 285 303 } 286 304 287 305 /** 288 - * When perf event overflows, call kvm_pmu_overflow_set to set overflow status. 306 + * When the perf event overflows, set the overflow status and inform the vcpu. 289 307 */ 290 308 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 291 309 struct perf_sample_data *data, ··· 295 313 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 296 314 int idx = pmc->idx; 297 315 298 - kvm_pmu_overflow_set(vcpu, BIT(idx)); 316 + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 317 + 318 + if (kvm_pmu_overflow_status(vcpu)) { 319 + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 320 + kvm_vcpu_kick(vcpu); 321 + } 299 322 } 300 323 301 324 /** ··· 328 341 reg = lower_32_bits(reg); 329 342 vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 330 343 if (!reg) 331 - kvm_pmu_overflow_set(vcpu, BIT(i)); 344 + vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 332 345 } 333 346 } 334 347 }
-3
virt/kvm/arm/vgic/vgic-init.c
··· 285 285 if (ret) 286 286 goto out; 287 287 288 - if (vgic_has_its(kvm)) 289 - dist->msis_require_devid = true; 290 - 291 288 kvm_for_each_vcpu(i, vcpu, kvm) 292 289 kvm_vgic_vcpu_enable(vcpu); 293 290
+1
virt/kvm/arm/vgic/vgic-its.c
··· 1598 1598 INIT_LIST_HEAD(&its->device_list); 1599 1599 INIT_LIST_HEAD(&its->collection_list); 1600 1600 1601 + dev->kvm->arch.vgic.msis_require_devid = true; 1601 1602 dev->kvm->arch.vgic.has_its = true; 1602 1603 its->enabled = false; 1603 1604 its->dev = dev;
+2 -2
virt/kvm/arm/vgic/vgic-mmio-v3.c
··· 369 369 return; 370 370 371 371 do { 372 - old_propbaser = dist->propbaser; 372 + old_propbaser = READ_ONCE(dist->propbaser); 373 373 propbaser = old_propbaser; 374 374 propbaser = update_64bit_reg(propbaser, addr & 4, len, val); 375 375 propbaser = vgic_sanitise_propbaser(propbaser); ··· 397 397 return; 398 398 399 399 do { 400 - old_pendbaser = vgic_cpu->pendbaser; 400 + old_pendbaser = READ_ONCE(vgic_cpu->pendbaser); 401 401 pendbaser = old_pendbaser; 402 402 pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); 403 403 pendbaser = vgic_sanitise_pendbaser(pendbaser);
+4 -7
virt/kvm/kvm_main.c
··· 717 717 hardware_disable_all(); 718 718 out_err_no_disable: 719 719 for (i = 0; i < KVM_NR_BUSES; i++) 720 - kfree(rcu_access_pointer(kvm->buses[i])); 720 + kfree(kvm_get_bus(kvm, i)); 721 721 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) 722 - kvm_free_memslots(kvm, 723 - rcu_dereference_protected(kvm->memslots[i], 1)); 722 + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); 724 723 kvm_arch_free_vm(kvm); 725 724 mmdrop(current->mm); 726 725 return ERR_PTR(r); ··· 753 754 spin_unlock(&kvm_lock); 754 755 kvm_free_irq_routing(kvm); 755 756 for (i = 0; i < KVM_NR_BUSES; i++) { 756 - struct kvm_io_bus *bus; 757 + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); 757 758 758 - bus = rcu_dereference_protected(kvm->buses[i], 1); 759 759 if (bus) 760 760 kvm_io_bus_destroy(bus); 761 761 kvm->buses[i] = NULL; ··· 768 770 kvm_arch_destroy_vm(kvm); 769 771 kvm_destroy_devices(kvm); 770 772 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) 771 - kvm_free_memslots(kvm, 772 - rcu_dereference_protected(kvm->memslots[i], 1)); 773 + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); 773 774 cleanup_srcu_struct(&kvm->irq_srcu); 774 775 cleanup_srcu_struct(&kvm->srcu); 775 776 kvm_arch_free_vm(kvm);