Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:
- Fix a VFP corruption in 32-bit guest
- Add missing cache invalidation for CoW pages
- Two small cleanups

s390:
- Fallout from the hugetlbfs support: pfmf interpretion and locking
- VSIE: fix keywrapping for nested guests

PPC:
- Fix a bug where pages might not get marked dirty, causing guest
memory corruption on migration
- Fix a bug causing reads from guest memory to use the wrong guest
real address for very large HPT guests (>256G of memory), leading
to failures in instruction emulation.

x86:
- Fix out of bound access from malicious pv ipi hypercalls
(introduced in rc1)
- Fix delivery of pending interrupts when entering a nested guest,
preventing arbitrarily late injection
- Sanitize kvm_stat output after destroying a guest
- Fix infinite loop when emulating a nested guest page fault and
improve the surrounding emulation code
- Two minor cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
KVM: LAPIC: Fix pv ipis out-of-bounds access
KVM: nVMX: Fix loss of pending IRQ/NMI before entering L2
arm64: KVM: Remove pgd_lock
KVM: Remove obsolete kvm_unmap_hva notifier backend
arm64: KVM: Only force FPEXC32_EL2.EN if trapping FPSIMD
KVM: arm/arm64: Clean dcache to PoC when changing PTE due to CoW
KVM: s390: Properly lock mm context allow_gmap_hpage_1m setting
KVM: s390: vsie: copy wrapping keys to right place
KVM: s390: Fix pfmf and conditional skey emulation
tools/kvm_stat: re-animate display of dead guests
tools/kvm_stat: indicate dead guests as such
tools/kvm_stat: handle guest removals more gracefully
tools/kvm_stat: don't reset stats when setting PID filter for debugfs
tools/kvm_stat: fix updates for dead guests
tools/kvm_stat: fix handling of invalid paths in debugfs provider
tools/kvm_stat: fix python3 issues
KVM: x86: Unexport x86_emulate_instruction()
KVM: x86: Rename emulate_instruction() to kvm_emulate_instruction()
KVM: x86: Do not re-{try,execute} after failed emulation in L2
KVM: x86: Default to not allowing emulation retry in kvm_mmu_page_fault
...

+204 -134
-1
arch/arm/include/asm/kvm_host.h
··· 223 223 struct kvm_vcpu_events *events); 224 224 225 225 #define KVM_ARCH_WANT_MMU_NOTIFIER 226 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 227 226 int kvm_unmap_hva_range(struct kvm *kvm, 228 227 unsigned long start, unsigned long end); 229 228 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+1 -3
arch/arm64/include/asm/kvm_host.h
··· 61 61 u64 vmid_gen; 62 62 u32 vmid; 63 63 64 - /* 1-level 2nd stage table and lock */ 65 - spinlock_t pgd_lock; 64 + /* 1-level 2nd stage table, protected by kvm->mmu_lock */ 66 65 pgd_t *pgd; 67 66 68 67 /* VTTBR value associated with above pgd and vmid */ ··· 356 357 struct kvm_vcpu_events *events); 357 358 358 359 #define KVM_ARCH_WANT_MMU_NOTIFIER 359 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 360 360 int kvm_unmap_hva_range(struct kvm *kvm, 361 361 unsigned long start, unsigned long end); 362 362 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+6 -3
arch/arm64/kvm/hyp/switch.c
··· 98 98 val = read_sysreg(cpacr_el1); 99 99 val |= CPACR_EL1_TTA; 100 100 val &= ~CPACR_EL1_ZEN; 101 - if (!update_fp_enabled(vcpu)) 101 + if (!update_fp_enabled(vcpu)) { 102 102 val &= ~CPACR_EL1_FPEN; 103 + __activate_traps_fpsimd32(vcpu); 104 + } 103 105 104 106 write_sysreg(val, cpacr_el1); 105 107 ··· 116 114 117 115 val = CPTR_EL2_DEFAULT; 118 116 val |= CPTR_EL2_TTA | CPTR_EL2_TZ; 119 - if (!update_fp_enabled(vcpu)) 117 + if (!update_fp_enabled(vcpu)) { 120 118 val |= CPTR_EL2_TFP; 119 + __activate_traps_fpsimd32(vcpu); 120 + } 121 121 122 122 write_sysreg(val, cptr_el2); 123 123 } ··· 133 129 if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 134 130 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 135 131 136 - __activate_traps_fpsimd32(vcpu); 137 132 if (has_vhe()) 138 133 activate_traps_vhe(vcpu); 139 134 else
-1
arch/mips/include/asm/kvm_host.h
··· 931 931 bool write); 932 932 933 933 #define KVM_ARCH_WANT_MMU_NOTIFIER 934 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 935 934 int kvm_unmap_hva_range(struct kvm *kvm, 936 935 unsigned long start, unsigned long end); 937 936 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-10
arch/mips/kvm/mmu.c
··· 512 512 return 1; 513 513 } 514 514 515 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 516 - { 517 - unsigned long end = hva + PAGE_SIZE; 518 - 519 - handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); 520 - 521 - kvm_mips_callbacks->flush_shadow_all(kvm); 522 - return 0; 523 - } 524 - 525 515 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 526 516 { 527 517 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+1 -1
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 358 358 unsigned long pp, key; 359 359 unsigned long v, orig_v, gr; 360 360 __be64 *hptep; 361 - int index; 361 + long int index; 362 362 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 363 363 364 364 if (kvm_is_radix(vcpu->kvm))
+3 -3
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 725 725 gpa, shift); 726 726 kvmppc_radix_tlbie_page(kvm, gpa, shift); 727 727 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { 728 - unsigned long npages = 1; 728 + unsigned long psize = PAGE_SIZE; 729 729 if (shift) 730 - npages = 1ul << (shift - PAGE_SHIFT); 731 - kvmppc_update_dirty_map(memslot, gfn, npages); 730 + psize = 1ul << shift; 731 + kvmppc_update_dirty_map(memslot, gfn, psize); 732 732 } 733 733 } 734 734 return 0;
+7 -1
arch/s390/include/asm/mmu.h
··· 16 16 unsigned long asce; 17 17 unsigned long asce_limit; 18 18 unsigned long vdso_base; 19 - /* The mmu context allocates 4K page tables. */ 19 + /* 20 + * The following bitfields need a down_write on the mm 21 + * semaphore when they are written to. As they are only 22 + * written once, they can be read without a lock. 23 + * 24 + * The mmu context allocates 4K page tables. 25 + */ 20 26 unsigned int alloc_pgste:1; 21 27 /* The mmu context uses extended page tables. */ 22 28 unsigned int has_pgste:1;
+2
arch/s390/kvm/kvm-s390.c
··· 695 695 r = -EINVAL; 696 696 else { 697 697 r = 0; 698 + down_write(&kvm->mm->mmap_sem); 698 699 kvm->mm->context.allow_gmap_hpage_1m = 1; 700 + up_write(&kvm->mm->mmap_sem); 699 701 /* 700 702 * We might have to create fake 4k page 701 703 * tables. To avoid that the hardware works on
+18 -12
arch/s390/kvm/priv.c
··· 280 280 goto retry; 281 281 } 282 282 } 283 - if (rc) 284 - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 285 283 up_read(&current->mm->mmap_sem); 284 + if (rc == -EFAULT) 285 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 286 + if (rc < 0) 287 + return rc; 286 288 vcpu->run->s.regs.gprs[reg1] &= ~0xff; 287 289 vcpu->run->s.regs.gprs[reg1] |= key; 288 290 return 0; ··· 326 324 goto retry; 327 325 } 328 326 } 329 - if (rc < 0) 330 - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 331 327 up_read(&current->mm->mmap_sem); 328 + if (rc == -EFAULT) 329 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 330 + if (rc < 0) 331 + return rc; 332 332 kvm_s390_set_psw_cc(vcpu, rc); 333 333 return 0; 334 334 } ··· 394 390 FAULT_FLAG_WRITE, &unlocked); 395 391 rc = !rc ? -EAGAIN : rc; 396 392 } 393 + up_read(&current->mm->mmap_sem); 397 394 if (rc == -EFAULT) 398 395 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 399 - 400 - up_read(&current->mm->mmap_sem); 401 - if (rc >= 0) 402 - start += PAGE_SIZE; 396 + if (rc < 0) 397 + return rc; 398 + start += PAGE_SIZE; 403 399 } 404 400 405 401 if (m3 & (SSKE_MC | SSKE_MR)) { ··· 1006 1002 FAULT_FLAG_WRITE, &unlocked); 1007 1003 rc = !rc ? -EAGAIN : rc; 1008 1004 } 1005 + up_read(&current->mm->mmap_sem); 1009 1006 if (rc == -EFAULT) 1010 1007 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 1011 - 1012 - up_read(&current->mm->mmap_sem); 1013 - if (rc >= 0) 1014 - start += PAGE_SIZE; 1008 + if (rc == -EAGAIN) 1009 + continue; 1010 + if (rc < 0) 1011 + return rc; 1015 1012 } 1013 + start += PAGE_SIZE; 1016 1014 } 1017 1015 if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { 1018 1016 if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
+2 -1
arch/s390/kvm/vsie.c
··· 173 173 return set_validity_icpt(scb_s, 0x0039U); 174 174 175 175 /* copy only the wrapping keys */ 176 - if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) 176 + if (read_guest_real(vcpu, crycb_addr + 72, 177 + vsie_page->crycb.dea_wrapping_key_mask, 56)) 177 178 return set_validity_icpt(scb_s, 0x0035U); 178 179 179 180 scb_s->ecb3 |= ecb3_flags;
+7 -15
arch/x86/include/asm/kvm_host.h
··· 1237 1237 #define EMULTYPE_NO_DECODE (1 << 0) 1238 1238 #define EMULTYPE_TRAP_UD (1 << 1) 1239 1239 #define EMULTYPE_SKIP (1 << 2) 1240 - #define EMULTYPE_RETRY (1 << 3) 1241 - #define EMULTYPE_NO_REEXECUTE (1 << 4) 1242 - #define EMULTYPE_NO_UD_ON_FAIL (1 << 5) 1243 - #define EMULTYPE_VMWARE (1 << 6) 1244 - int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, 1245 - int emulation_type, void *insn, int insn_len); 1246 - 1247 - static inline int emulate_instruction(struct kvm_vcpu *vcpu, 1248 - int emulation_type) 1249 - { 1250 - return x86_emulate_instruction(vcpu, 0, 1251 - emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); 1252 - } 1240 + #define EMULTYPE_ALLOW_RETRY (1 << 3) 1241 + #define EMULTYPE_NO_UD_ON_FAIL (1 << 4) 1242 + #define EMULTYPE_VMWARE (1 << 5) 1243 + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); 1244 + int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, 1245 + void *insn, int insn_len); 1253 1246 1254 1247 void kvm_enable_efer_bits(u64); 1255 1248 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); ··· 1443 1450 ____kvm_handle_fault_on_reboot(insn, "") 1444 1451 1445 1452 #define KVM_ARCH_WANT_MMU_NOTIFIER 1446 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); 1447 1453 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); 1448 1454 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 1449 1455 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); ··· 1455 1463 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); 1456 1464 1457 1465 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 1458 - unsigned long ipi_bitmap_high, int min, 1466 + unsigned long ipi_bitmap_high, u32 min, 1459 1467 unsigned long icr, int op_64_bit); 1460 1468 1461 1469 u64 kvm_get_arch_capabilities(void);
+20 -7
arch/x86/kvm/lapic.c
··· 548 548 } 549 549 550 550 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 551 - unsigned long ipi_bitmap_high, int min, 551 + unsigned long ipi_bitmap_high, u32 min, 552 552 unsigned long icr, int op_64_bit) 553 553 { 554 554 int i; ··· 571 571 rcu_read_lock(); 572 572 map = rcu_dereference(kvm->arch.apic_map); 573 573 574 + if (min > map->max_apic_id) 575 + goto out; 574 576 /* Bits above cluster_size are masked in the caller. */ 575 - for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { 576 - vcpu = map->phys_map[min + i]->vcpu; 577 - count += kvm_apic_set_irq(vcpu, &irq, NULL); 577 + for_each_set_bit(i, &ipi_bitmap_low, 578 + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 579 + if (map->phys_map[min + i]) { 580 + vcpu = map->phys_map[min + i]->vcpu; 581 + count += kvm_apic_set_irq(vcpu, &irq, NULL); 582 + } 578 583 } 579 584 580 585 min += cluster_size; 581 - for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { 582 - vcpu = map->phys_map[min + i]->vcpu; 583 - count += kvm_apic_set_irq(vcpu, &irq, NULL); 586 + 587 + if (min > map->max_apic_id) 588 + goto out; 589 + 590 + for_each_set_bit(i, &ipi_bitmap_high, 591 + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 592 + if (map->phys_map[min + i]) { 593 + vcpu = map->phys_map[min + i]->vcpu; 594 + count += kvm_apic_set_irq(vcpu, &irq, NULL); 595 + } 584 596 } 585 597 598 + out: 586 599 rcu_read_unlock(); 587 600 return count; 588 601 }
+15 -11
arch/x86/kvm/mmu.c
··· 1853 1853 return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); 1854 1854 } 1855 1855 1856 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 1857 - { 1858 - return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); 1859 - } 1860 - 1861 1856 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 1862 1857 { 1863 1858 return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); ··· 5212 5217 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, 5213 5218 void *insn, int insn_len) 5214 5219 { 5215 - int r, emulation_type = EMULTYPE_RETRY; 5220 + int r, emulation_type = 0; 5216 5221 enum emulation_result er; 5217 5222 bool direct = vcpu->arch.mmu.direct_map; 5218 5223 ··· 5225 5230 r = RET_PF_INVALID; 5226 5231 if (unlikely(error_code & PFERR_RSVD_MASK)) { 5227 5232 r = handle_mmio_page_fault(vcpu, cr2, direct); 5228 - if (r == RET_PF_EMULATE) { 5229 - emulation_type = 0; 5233 + if (r == RET_PF_EMULATE) 5230 5234 goto emulate; 5231 - } 5232 5235 } 5233 5236 5234 5237 if (r == RET_PF_INVALID) { ··· 5253 5260 return 1; 5254 5261 } 5255 5262 5256 - if (mmio_info_in_cache(vcpu, cr2, direct)) 5257 - emulation_type = 0; 5263 + /* 5264 + * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still 5265 + * optimistically try to just unprotect the page and let the processor 5266 + * re-execute the instruction that caused the page fault. Do not allow 5267 + * retrying MMIO emulation, as it's not only pointless but could also 5268 + * cause us to enter an infinite loop because the processor will keep 5269 + * faulting on the non-existent MMIO address. Retrying an instruction 5270 + * from a nested guest is also pointless and dangerous as we are only 5271 + * explicitly shadowing L1's page tables, i.e. unprotecting something 5272 + * for L1 isn't going to magically fix whatever issue cause L2 to fail. 5273 + */ 5274 + if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) 5275 + emulation_type = EMULTYPE_ALLOW_RETRY; 5258 5276 emulate: 5259 5277 /* 5260 5278 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+9 -10
arch/x86/kvm/svm.c
··· 776 776 } 777 777 778 778 if (!svm->next_rip) { 779 - if (emulate_instruction(vcpu, EMULTYPE_SKIP) != 779 + if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) != 780 780 EMULATE_DONE) 781 781 printk(KERN_DEBUG "%s: NOP\n", __func__); 782 782 return; ··· 2715 2715 2716 2716 WARN_ON_ONCE(!enable_vmware_backdoor); 2717 2717 2718 - er = emulate_instruction(vcpu, 2718 + er = kvm_emulate_instruction(vcpu, 2719 2719 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); 2720 2720 if (er == EMULATE_USER_EXIT) 2721 2721 return 0; ··· 2819 2819 string = (io_info & SVM_IOIO_STR_MASK) != 0; 2820 2820 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 2821 2821 if (string) 2822 - return emulate_instruction(vcpu, 0) == EMULATE_DONE; 2822 + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; 2823 2823 2824 2824 port = io_info >> 16; 2825 2825 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; ··· 3861 3861 static int invlpg_interception(struct vcpu_svm *svm) 3862 3862 { 3863 3863 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 3864 - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3864 + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3865 3865 3866 3866 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); 3867 3867 return kvm_skip_emulated_instruction(&svm->vcpu); ··· 3869 3869 3870 3870 static int emulate_on_interception(struct vcpu_svm *svm) 3871 3871 { 3872 - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3872 + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 3873 3873 } 3874 3874 3875 3875 static int rsm_interception(struct vcpu_svm *svm) 3876 3876 { 3877 - return x86_emulate_instruction(&svm->vcpu, 0, 0, 3878 - rsm_ins_bytes, 2) == EMULATE_DONE; 3877 + return kvm_emulate_instruction_from_buffer(&svm->vcpu, 3878 + rsm_ins_bytes, 2) == EMULATE_DONE; 3879 3879 } 3880 3880 3881 3881 static int rdpmc_interception(struct vcpu_svm *svm) ··· 4700 4700 ret = avic_unaccel_trap_write(svm); 4701 4701 } else { 4702 4702 /* Handling Fault */ 4703 - ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); 4703 + ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); 4704 4704 } 4705 4705 4706 4706 return ret; ··· 6747 6747 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 6748 6748 { 6749 6749 unsigned long vaddr, vaddr_end, next_vaddr; 6750 - unsigned long dst_vaddr, dst_vaddr_end; 6750 + unsigned long dst_vaddr; 6751 6751 struct page **src_p, **dst_p; 6752 6752 struct kvm_sev_dbg debug; 6753 6753 unsigned long n; ··· 6763 6763 size = debug.len; 6764 6764 vaddr_end = vaddr + size; 6765 6765 dst_vaddr = debug.dst_uaddr; 6766 - dst_vaddr_end = dst_vaddr + size; 6767 6766 6768 6767 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 6769 6768 int len, s_off, d_off;
+31 -12
arch/x86/kvm/vmx.c
··· 6983 6983 * Cause the #SS fault with 0 error code in VM86 mode. 6984 6984 */ 6985 6985 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { 6986 - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { 6986 + if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { 6987 6987 if (vcpu->arch.halt_request) { 6988 6988 vcpu->arch.halt_request = 0; 6989 6989 return kvm_vcpu_halt(vcpu); ··· 7054 7054 7055 7055 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { 7056 7056 WARN_ON_ONCE(!enable_vmware_backdoor); 7057 - er = emulate_instruction(vcpu, 7057 + er = kvm_emulate_instruction(vcpu, 7058 7058 EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); 7059 7059 if (er == EMULATE_USER_EXIT) 7060 7060 return 0; ··· 7157 7157 ++vcpu->stat.io_exits; 7158 7158 7159 7159 if (string) 7160 - return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7160 + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; 7161 7161 7162 7162 port = exit_qualification >> 16; 7163 7163 size = (exit_qualification & 7) + 1; ··· 7231 7231 static int handle_desc(struct kvm_vcpu *vcpu) 7232 7232 { 7233 7233 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); 7234 - return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7234 + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; 7235 7235 } 7236 7236 7237 7237 static int handle_cr(struct kvm_vcpu *vcpu) ··· 7480 7480 7481 7481 static int handle_invd(struct kvm_vcpu *vcpu) 7482 7482 { 7483 - return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7483 + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; 7484 7484 } 7485 7485 7486 7486 static int handle_invlpg(struct kvm_vcpu *vcpu) ··· 7547 7547 return kvm_skip_emulated_instruction(vcpu); 7548 7548 } 7549 7549 } 7550 - return emulate_instruction(vcpu, 0) == EMULATE_DONE; 7550 + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; 7551 7551 } 7552 7552 7553 7553 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) ··· 7704 7704 if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) 7705 7705 return kvm_skip_emulated_instruction(vcpu); 7706 7706 else 7707 - return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, 7708 - NULL, 0) == EMULATE_DONE; 7707 + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == 7708 + EMULATE_DONE; 7709 7709 } 7710 7710 7711 7711 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); ··· 7748 7748 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 7749 7749 return 1; 7750 7750 7751 - err = emulate_instruction(vcpu, 0); 7751 + err = kvm_emulate_instruction(vcpu, 0); 7752 7752 7753 7753 if (err == EMULATE_USER_EXIT) { 7754 7754 ++vcpu->stat.mmio_exits; ··· 12537 12537 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 12538 12538 bool from_vmentry = !!exit_qual; 12539 12539 u32 dummy_exit_qual; 12540 + u32 vmcs01_cpu_exec_ctrl; 12540 12541 int r = 0; 12542 + 12543 + vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 12541 12544 12542 12545 enter_guest_mode(vcpu); 12543 12546 ··· 12575 12572 * have already been set at vmentry time and should not be reset. 12576 12573 */ 12577 12574 kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); 12575 + } 12576 + 12577 + /* 12578 + * If L1 had a pending IRQ/NMI until it executed 12579 + * VMLAUNCH/VMRESUME which wasn't delivered because it was 12580 + * disallowed (e.g. interrupts disabled), L0 needs to 12581 + * evaluate if this pending event should cause an exit from L2 12582 + * to L1 or delivered directly to L2 (e.g. In case L1 don't 12583 + * intercept EXTERNAL_INTERRUPT). 12584 + * 12585 + * Usually this would be handled by L0 requesting a 12586 + * IRQ/NMI window by setting VMCS accordingly. However, 12587 + * this setting was done on VMCS01 and now VMCS02 is active 12588 + * instead. Thus, we force L0 to perform pending event 12589 + * evaluation by requesting a KVM_REQ_EVENT. 12590 + */ 12591 + if (vmcs01_cpu_exec_ctrl & 12592 + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { 12593 + kvm_make_request(KVM_REQ_EVENT, vcpu); 12578 12594 } 12579 12595 12580 12596 /* ··· 14009 13987 if (check_vmentry_prereqs(vcpu, vmcs12) || 14010 13988 check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) 14011 13989 return -EINVAL; 14012 - 14013 - if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING) 14014 - vmx->nested.nested_run_pending = 1; 14015 13990 14016 13991 vmx->nested.dirty_vmcs12 = true; 14017 13992 ret = enter_vmx_non_root_mode(vcpu, NULL);
+23 -5
arch/x86/kvm/x86.c
··· 4987 4987 emul_type = 0; 4988 4988 } 4989 4989 4990 - er = emulate_instruction(vcpu, emul_type); 4990 + er = kvm_emulate_instruction(vcpu, emul_type); 4991 4991 if (er == EMULATE_USER_EXIT) 4992 4992 return 0; 4993 4993 if (er != EMULATE_DONE) ··· 5870 5870 gpa_t gpa = cr2; 5871 5871 kvm_pfn_t pfn; 5872 5872 5873 - if (emulation_type & EMULTYPE_NO_REEXECUTE) 5873 + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) 5874 + return false; 5875 + 5876 + if (WARN_ON_ONCE(is_guest_mode(vcpu))) 5874 5877 return false; 5875 5878 5876 5879 if (!vcpu->arch.mmu.direct_map) { ··· 5961 5958 */ 5962 5959 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; 5963 5960 5964 - if (!(emulation_type & EMULTYPE_RETRY)) 5961 + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) 5962 + return false; 5963 + 5964 + if (WARN_ON_ONCE(is_guest_mode(vcpu))) 5965 5965 return false; 5966 5966 5967 5967 if (x86_page_table_writing_insn(ctxt)) ··· 6282 6276 6283 6277 return r; 6284 6278 } 6285 - EXPORT_SYMBOL_GPL(x86_emulate_instruction); 6279 + 6280 + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) 6281 + { 6282 + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); 6283 + } 6284 + EXPORT_SYMBOL_GPL(kvm_emulate_instruction); 6285 + 6286 + int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, 6287 + void *insn, int insn_len) 6288 + { 6289 + return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); 6290 + } 6291 + EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); 6286 6292 6287 6293 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, 6288 6294 unsigned short port) ··· 7752 7734 { 7753 7735 int r; 7754 7736 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 7755 - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 7737 + r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 7756 7738 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 7757 7739 if (r != EMULATE_DONE) 7758 7740 return 0;
+2
arch/x86/kvm/x86.h
··· 274 274 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, 275 275 int page_num); 276 276 bool kvm_vector_hashing_enabled(void); 277 + int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, 278 + int emulation_type, void *insn, int insn_len); 277 279 278 280 #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ 279 281 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
+49 -10
tools/kvm/kvm_stat/kvm_stat
··· 759 759 if len(vms) == 0: 760 760 self.do_read = False 761 761 762 - self.paths = filter(lambda x: "{}-".format(pid) in x, vms) 762 + self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms)) 763 763 764 764 else: 765 765 self.paths = [] 766 766 self.do_read = True 767 - self.reset() 767 + 768 + def _verify_paths(self): 769 + """Remove invalid paths""" 770 + for path in self.paths: 771 + if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)): 772 + self.paths.remove(path) 773 + continue 768 774 769 775 def read(self, reset=0, by_guest=0): 770 776 """Returns a dict with format:'file name / field -> current value'. ··· 786 780 # If no debugfs filtering support is available, then don't read. 787 781 if not self.do_read: 788 782 return results 783 + self._verify_paths() 789 784 790 785 paths = self.paths 791 786 if self._pid == 0: ··· 1103 1096 pid = self.stats.pid_filter 1104 1097 self.screen.erase() 1105 1098 gname = self.get_gname_from_pid(pid) 1099 + self._gname = gname 1106 1100 if gname: 1107 1101 gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' 1108 1102 if len(gname) > MAX_GUEST_NAME_LEN 1109 1103 else gname)) 1110 1104 if pid > 0: 1111 - self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' 1112 - .format(pid, gname), curses.A_BOLD) 1105 + self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname) 1113 1106 else: 1114 - self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) 1107 + self._headline = 'kvm statistics - summary' 1108 + self.screen.addstr(0, 0, self._headline, curses.A_BOLD) 1115 1109 if self.stats.fields_filter: 1116 1110 regex = self.stats.fields_filter 1117 1111 if len(regex) > MAX_REGEX_LEN: ··· 1170 1162 1171 1163 return sorted_items 1172 1164 1165 + if not self._is_running_guest(self.stats.pid_filter): 1166 + if self._gname: 1167 + try: # ...to identify the guest by name in case it's back 1168 + pids = self.get_pid_from_gname(self._gname) 1169 + if len(pids) == 1: 1170 + self._refresh_header(pids[0]) 1171 + self._update_pid(pids[0]) 1172 + return 1173 + except: 1174 + pass 1175 + self._display_guest_dead() 1176 + # leave final data on screen 1177 + return 1173 1178 row = 3 1174 1179 self.screen.move(row, 0) 1175 1180 self.screen.clrtobot() ··· 1205 1184 # print events 1206 1185 tavg = 0 1207 1186 tcur = 0 1187 + guest_removed = False 1208 1188 for key, values in get_sorted_events(self, stats): 1209 1189 if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): 1210 1190 break ··· 1213 1191 key = self.get_gname_from_pid(key) 1214 1192 if not key: 1215 1193 continue 1216 - cur = int(round(values.delta / sleeptime)) if values.delta else '' 1194 + cur = int(round(values.delta / sleeptime)) if values.delta else 0 1195 + if cur < 0: 1196 + guest_removed = True 1197 + continue 1217 1198 if key[0] != ' ': 1218 1199 if values.delta: 1219 1200 tcur += values.delta ··· 1229 1204 values.value * 100 / float(ltotal), cur)) 1230 1205 row += 1 1231 1206 if row == 3: 1232 - self.screen.addstr(4, 1, 'No matching events reported yet') 1207 + if guest_removed: 1208 + self.screen.addstr(4, 1, 'Guest removed, updating...') 1209 + else: 1210 + self.screen.addstr(4, 1, 'No matching events reported yet') 1233 1211 if row > 4: 1234 1212 tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' 1235 1213 self.screen.addstr(row, 1, '%-40s %10d %8s' % 1236 1214 ('Total', total, tavg), curses.A_BOLD) 1237 1215 self.screen.refresh() 1216 + 1217 + def _display_guest_dead(self): 1218 + marker = ' Guest is DEAD ' 1219 + y = min(len(self._headline), 80 - len(marker)) 1220 + self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT) 1238 1221 1239 1222 def _show_msg(self, text): 1240 1223 """Display message centered text and exit on key press""" ··· 1252 1219 (x, term_width) = self.screen.getmaxyx() 1253 1220 row = 2 1254 1221 for line in text: 1255 - start = (term_width - len(line)) / 2 1222 + start = (term_width - len(line)) // 2 1256 1223 self.screen.addstr(row, start, line) 1257 1224 row += 1 1258 - self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, 1225 + self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint, 1259 1226 curses.A_STANDOUT) 1260 1227 self.screen.getkey() 1261 1228 ··· 1352 1319 msg = '"' + str(val) + '": Invalid value' 1353 1320 self._refresh_header() 1354 1321 1322 + def _is_running_guest(self, pid): 1323 + """Check if pid is still a running process.""" 1324 + if not pid: 1325 + return True 1326 + return os.path.isdir(os.path.join('/proc/', str(pid))) 1327 + 1355 1328 def _show_vm_selection_by_guest(self): 1356 1329 """Draws guest selection mask. 1357 1330 ··· 1385 1346 if not guest or guest == '0': 1386 1347 break 1387 1348 if guest.isdigit(): 1388 - if not os.path.isdir(os.path.join('/proc/', guest)): 1349 + if not self._is_running_guest(guest): 1389 1350 msg = '"' + guest + '": Not a running process' 1390 1351 continue 1391 1352 pid = int(guest)
+8 -13
virt/kvm/arm/mmu.c
··· 1817 1817 return 0; 1818 1818 } 1819 1819 1820 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 1821 - { 1822 - unsigned long end = hva + PAGE_SIZE; 1823 - 1824 - if (!kvm->arch.pgd) 1825 - return 0; 1826 - 1827 - trace_kvm_unmap_hva(hva); 1828 - handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); 1829 - return 0; 1830 - } 1831 - 1832 1820 int kvm_unmap_hva_range(struct kvm *kvm, 1833 1821 unsigned long start, unsigned long end) 1834 1822 { ··· 1848 1860 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1849 1861 { 1850 1862 unsigned long end = hva + PAGE_SIZE; 1863 + kvm_pfn_t pfn = pte_pfn(pte); 1851 1864 pte_t stage2_pte; 1852 1865 1853 1866 if (!kvm->arch.pgd) 1854 1867 return; 1855 1868 1856 1869 trace_kvm_set_spte_hva(hva); 1857 - stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); 1870 + 1871 + /* 1872 + * We've moved a page around, probably through CoW, so let's treat it 1873 + * just like a translation fault and clean the cache to the PoC. 1874 + */ 1875 + clean_dcache_guest_page(pfn, PAGE_SIZE); 1876 + stage2_pte = pfn_pte(pfn, PAGE_S2); 1858 1877 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); 1859 1878 } 1860 1879
-15
virt/kvm/arm/trace.h
··· 134 134 __entry->vcpu_pc, __entry->instr, __entry->cpsr) 135 135 ); 136 136 137 - TRACE_EVENT(kvm_unmap_hva, 138 - TP_PROTO(unsigned long hva), 139 - TP_ARGS(hva), 140 - 141 - TP_STRUCT__entry( 142 - __field( unsigned long, hva ) 143 - ), 144 - 145 - TP_fast_assign( 146 - __entry->hva = hva; 147 - ), 148 - 149 - TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva) 150 - ); 151 - 152 137 TRACE_EVENT(kvm_unmap_hva_range, 153 138 TP_PROTO(unsigned long start, unsigned long end), 154 139 TP_ARGS(start, end),