Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"A bit on the bigger side, mostly due to me being on vacation, then
busy, then on parental leave, but there's nothing worrisome.

ARM:
- Multiple stolen time fixes, with a new capability to match x86
- Fix for hugetlbfs mappings when PUD and PMD are the same level
- Fix for hugetlbfs mappings when PTE mappings are enforced (dirty
logging, for example)
- Fix tracing output of 64bit values

x86:
- nSVM state restore fixes
- Async page fault fixes
- Lots of small fixes everywhere"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
KVM: emulator: more strict rsm checks.
KVM: nSVM: more strict SMM checks when returning to nested guest
SVM: nSVM: setup nested msr permission bitmap on nested state load
SVM: nSVM: correctly restore GIF on vmexit from nesting after migration
x86/kvm: don't forget to ACK async PF IRQ
x86/kvm: properly use DEFINE_IDTENTRY_SYSVEC() macro
KVM: VMX: Don't freeze guest when event delivery causes an APIC-access exit
KVM: SVM: avoid emulation with stale next_rip
KVM: x86: always allow writing '0' to MSR_KVM_ASYNC_PF_EN
KVM: SVM: Periodically schedule when unregistering regions on destroy
KVM: MIPS: Change the definition of kvm type
kvm x86/mmu: use KVM_REQ_MMU_SYNC to sync when needed
KVM: nVMX: Fix the update value of nested load IA32_PERF_GLOBAL_CTRL control
KVM: fix memory leak in kvm_io_bus_unregister_dev()
KVM: Check the allocation of pv cpu mask
KVM: nVMX: Update VMCS02 when L2 PAE PDPTE updates detected
KVM: arm64: Update page shift if stage 2 block mapping not supported
KVM: arm64: Fix address truncation in traces
KVM: arm64: Do not try to map PUDs when they are folded into PMD
arm64/x86: KVM: Introduce steal-time cap
...

+180 -81
+18 -4
Documentation/virt/kvm/api.rst
··· 6130 6130 8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH 6131 6131 ----------------------------------- 6132 6132 6133 - :Architecture: x86 6133 + :Architectures: x86 6134 6134 6135 6135 This capability indicates that KVM running on top of Hyper-V hypervisor 6136 6136 enables Direct TLB flush for its guests meaning that TLB flush ··· 6143 6143 thinks it's running on Hyper-V and only use Hyper-V hypercalls. 6144 6144 6145 6145 8.22 KVM_CAP_S390_VCPU_RESETS 6146 + ----------------------------- 6146 6147 6147 - Architectures: s390 6148 + :Architectures: s390 6148 6149 6149 6150 This capability indicates that the KVM_S390_NORMAL_RESET and 6150 6151 KVM_S390_CLEAR_RESET ioctls are available. 6151 6152 6152 6153 8.23 KVM_CAP_S390_PROTECTED 6154 + --------------------------- 6153 6155 6154 - Architecture: s390 6155 - 6156 + :Architectures: s390 6156 6157 6157 6158 This capability indicates that the Ultravisor has been initialized and 6158 6159 KVM can therefore start protected VMs. 6159 6160 This capability governs the KVM_S390_PV_COMMAND ioctl and the 6160 6161 KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected 6161 6162 guests when the state change is invalid. 6163 + 6164 + 8.24 KVM_CAP_STEAL_TIME 6165 + ----------------------- 6166 + 6167 + :Architectures: arm64, x86 6168 + 6169 + This capability indicates that KVM supports steal time accounting. 6170 + When steal time accounting is supported it may be enabled with 6171 + architecture-specific interfaces. This capability and the architecture- 6172 + specific interfaces must be consistent, i.e. if one says the feature 6173 + is supported, than the other should as well and vice versa. For arm64 6174 + see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL". 6175 + For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME".
+1 -1
arch/arm64/include/asm/kvm_host.h
··· 368 368 369 369 /* Guest PV state */ 370 370 struct { 371 - u64 steal; 372 371 u64 last_steal; 373 372 gpa_t base; 374 373 } steal; ··· 543 544 gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); 544 545 void kvm_update_stolen_time(struct kvm_vcpu *vcpu); 545 546 547 + bool kvm_arm_pvtime_supported(void); 546 548 int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, 547 549 struct kvm_device_attr *attr); 548 550 int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+3
arch/arm64/kvm/arm.c
··· 206 206 */ 207 207 r = 1; 208 208 break; 209 + case KVM_CAP_STEAL_TIME: 210 + r = kvm_arm_pvtime_supported(); 211 + break; 209 212 default: 210 213 r = kvm_arch_vm_ioctl_check_extension(kvm, ext); 211 214 break;
+7 -1
arch/arm64/kvm/mmu.c
··· 1877 1877 !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { 1878 1878 force_pte = true; 1879 1879 vma_pagesize = PAGE_SIZE; 1880 + vma_shift = PAGE_SHIFT; 1880 1881 } 1881 1882 1882 1883 /* ··· 1971 1970 (fault_status == FSC_PERM && 1972 1971 stage2_is_exec(mmu, fault_ipa, vma_pagesize)); 1973 1972 1974 - if (vma_pagesize == PUD_SIZE) { 1973 + /* 1974 + * If PUD_SIZE == PMD_SIZE, there is no real PUD level, and 1975 + * all we have is a 2-level page table. Trying to map a PUD in 1976 + * this case would be fatally wrong. 1977 + */ 1978 + if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) { 1975 1979 pud_t new_pud = kvm_pfn_pud(pfn, mem_type); 1976 1980 1977 1981 new_pud = kvm_pud_mkhuge(new_pud);
+13 -16
arch/arm64/kvm/pvtime.c
··· 13 13 void kvm_update_stolen_time(struct kvm_vcpu *vcpu) 14 14 { 15 15 struct kvm *kvm = vcpu->kvm; 16 - u64 steal; 17 - __le64 steal_le; 18 - u64 offset; 19 - int idx; 20 16 u64 base = vcpu->arch.steal.base; 17 + u64 last_steal = vcpu->arch.steal.last_steal; 18 + u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); 19 + u64 steal = 0; 20 + int idx; 21 21 22 22 if (base == GPA_INVALID) 23 23 return; 24 24 25 - /* Let's do the local bookkeeping */ 26 - steal = vcpu->arch.steal.steal; 27 - steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; 28 - vcpu->arch.steal.last_steal = current->sched_info.run_delay; 29 - vcpu->arch.steal.steal = steal; 30 - 31 - steal_le = cpu_to_le64(steal); 32 25 idx = srcu_read_lock(&kvm->srcu); 33 - offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); 34 - kvm_put_guest(kvm, base + offset, steal_le, u64); 26 + if (!kvm_get_guest(kvm, base + offset, steal)) { 27 + steal = le64_to_cpu(steal); 28 + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); 29 + steal += vcpu->arch.steal.last_steal - last_steal; 30 + kvm_put_guest(kvm, base + offset, cpu_to_le64(steal)); 31 + } 35 32 srcu_read_unlock(&kvm->srcu, idx); 36 33 } 37 34 ··· 40 43 switch (feature) { 41 44 case ARM_SMCCC_HV_PV_TIME_FEATURES: 42 45 case ARM_SMCCC_HV_PV_TIME_ST: 43 - val = SMCCC_RET_SUCCESS; 46 + if (vcpu->arch.steal.base != GPA_INVALID) 47 + val = SMCCC_RET_SUCCESS; 44 48 break; 45 49 } 46 50 ··· 62 64 * Start counting stolen time from the time the guest requests 63 65 * the feature enabled. 64 66 */ 65 - vcpu->arch.steal.steal = 0; 66 67 vcpu->arch.steal.last_steal = current->sched_info.run_delay; 67 68 68 69 idx = srcu_read_lock(&kvm->srcu); ··· 71 74 return base; 72 75 } 73 76 74 - static bool kvm_arm_pvtime_supported(void) 77 + bool kvm_arm_pvtime_supported(void) 75 78 { 76 79 return !!sched_info_on(); 77 80 }
+8 -8
arch/arm64/kvm/trace_arm.h
··· 23 23 __entry->vcpu_pc = vcpu_pc; 24 24 ), 25 25 26 - TP_printk("PC: 0x%08lx", __entry->vcpu_pc) 26 + TP_printk("PC: 0x%016lx", __entry->vcpu_pc) 27 27 ); 28 28 29 29 TRACE_EVENT(kvm_exit, ··· 42 42 __entry->vcpu_pc = vcpu_pc; 43 43 ), 44 44 45 - TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", 45 + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx", 46 46 __print_symbolic(__entry->ret, kvm_arm_exception_type), 47 47 __entry->esr_ec, 48 48 __print_symbolic(__entry->esr_ec, kvm_arm_exception_class), ··· 69 69 __entry->ipa = ipa; 70 70 ), 71 71 72 - TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", 72 + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx", 73 73 __entry->ipa, __entry->hsr, 74 74 __entry->hxfar, __entry->vcpu_pc) 75 75 ); ··· 131 131 __entry->cpsr = cpsr; 132 132 ), 133 133 134 - TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", 134 + TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)", 135 135 __entry->vcpu_pc, __entry->instr, __entry->cpsr) 136 136 ); 137 137 ··· 149 149 __entry->end = end; 150 150 ), 151 151 152 - TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", 152 + TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", 153 153 __entry->start, __entry->end) 154 154 ); 155 155 ··· 165 165 __entry->hva = hva; 166 166 ), 167 167 168 - TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) 168 + TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) 169 169 ); 170 170 171 171 TRACE_EVENT(kvm_age_hva, ··· 182 182 __entry->end = end; 183 183 ), 184 184 185 - TP_printk("mmu notifier age hva: %#08lx -- %#08lx", 185 + TP_printk("mmu notifier age hva: %#016lx -- %#016lx", 186 186 __entry->start, __entry->end) 187 187 ); 188 188 ··· 198 198 __entry->hva = hva; 199 199 ), 200 200 201 - TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) 201 + TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) 202 202 ); 203 203 204 204 TRACE_EVENT(kvm_set_way_flush,
+3 -3
arch/arm64/kvm/trace_handle_exit.h
··· 22 22 __entry->is_wfe = is_wfe; 23 23 ), 24 24 25 - TP_printk("guest executed wf%c at: 0x%08lx", 25 + TP_printk("guest executed wf%c at: 0x%016lx", 26 26 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) 27 27 ); 28 28 ··· 42 42 __entry->imm = imm; 43 43 ), 44 44 45 - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", 45 + TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)", 46 46 __entry->vcpu_pc, __entry->r0, __entry->imm) 47 47 ); 48 48 ··· 135 135 __entry->write_value = write_value; 136 136 ), 137 137 138 - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) 138 + TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) 139 139 ); 140 140 141 141 TRACE_EVENT(kvm_handle_sys_reg,
+2
arch/mips/kvm/mips.c
··· 137 137 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 138 138 { 139 139 switch (type) { 140 + case KVM_VM_MIPS_AUTO: 141 + break; 140 142 #ifdef CONFIG_KVM_MIPS_VZ 141 143 case KVM_VM_MIPS_VZ: 142 144 #else
+20 -6
arch/x86/kernel/kvm.c
··· 270 270 { 271 271 struct pt_regs *old_regs = set_irq_regs(regs); 272 272 u32 token; 273 - irqentry_state_t state; 274 273 275 - state = irqentry_enter(regs); 274 + ack_APIC_irq(); 276 275 277 276 inc_irq_stat(irq_hv_callback_count); 278 277 ··· 282 283 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); 283 284 } 284 285 285 - irqentry_exit(regs, state); 286 286 set_irq_regs(old_regs); 287 287 } 288 288 ··· 652 654 } 653 655 654 656 if (pv_tlb_flush_supported()) { 655 - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; 656 657 pv_ops.mmu.tlb_remove_table = tlb_remove_table; 657 658 pr_info("KVM setup pv remote TLB flush\n"); 658 659 } ··· 764 767 } 765 768 arch_initcall(activate_jump_labels); 766 769 770 + static void kvm_free_pv_cpu_mask(void) 771 + { 772 + unsigned int cpu; 773 + 774 + for_each_possible_cpu(cpu) 775 + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu)); 776 + } 777 + 767 778 static __init int kvm_alloc_cpumask(void) 768 779 { 769 780 int cpu; ··· 790 785 791 786 if (alloc) 792 787 for_each_possible_cpu(cpu) { 793 - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), 794 - GFP_KERNEL, cpu_to_node(cpu)); 788 + if (!zalloc_cpumask_var_node( 789 + per_cpu_ptr(&__pv_cpu_mask, cpu), 790 + GFP_KERNEL, cpu_to_node(cpu))) { 791 + goto zalloc_cpumask_fail; 792 + } 795 793 } 796 794 795 + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; 796 + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; 797 797 return 0; 798 + 799 + zalloc_cpumask_fail: 800 + kvm_free_pv_cpu_mask(); 801 + return -ENOMEM; 798 802 } 799 803 arch_initcall(kvm_alloc_cpumask); 800 804
+17 -5
arch/x86/kvm/emulate.c
··· 2505 2505 *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); 2506 2506 2507 2507 val = GET_SMSTATE(u32, smstate, 0x7fcc); 2508 - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); 2508 + 2509 + if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1)) 2510 + return X86EMUL_UNHANDLEABLE; 2511 + 2509 2512 val = GET_SMSTATE(u32, smstate, 0x7fc8); 2510 - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2513 + 2514 + if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1)) 2515 + return X86EMUL_UNHANDLEABLE; 2511 2516 2512 2517 selector = GET_SMSTATE(u32, smstate, 0x7fc4); 2513 2518 set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); ··· 2565 2560 ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; 2566 2561 2567 2562 val = GET_SMSTATE(u32, smstate, 0x7f68); 2568 - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); 2563 + 2564 + if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1)) 2565 + return X86EMUL_UNHANDLEABLE; 2566 + 2569 2567 val = GET_SMSTATE(u32, smstate, 0x7f60); 2570 - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); 2568 + 2569 + if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1)) 2570 + return X86EMUL_UNHANDLEABLE; 2571 2571 2572 2572 cr0 = GET_SMSTATE(u64, smstate, 0x7f58); 2573 2573 cr3 = GET_SMSTATE(u64, smstate, 0x7f50); 2574 2574 cr4 = GET_SMSTATE(u64, smstate, 0x7f48); 2575 2575 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); 2576 2576 val = GET_SMSTATE(u64, smstate, 0x7ed0); 2577 - ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); 2577 + 2578 + if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) 2579 + return X86EMUL_UNHANDLEABLE; 2578 2580 2579 2581 selector = GET_SMSTATE(u32, smstate, 0x7e90); 2580 2582 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+1 -1
arch/x86/kvm/mmu/mmu.c
··· 2469 2469 } 2470 2470 2471 2471 if (sp->unsync_children) 2472 - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 2472 + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); 2473 2473 2474 2474 __clear_sp_write_flooding_count(sp); 2475 2475
+6 -1
arch/x86/kvm/svm/nested.c
··· 586 586 svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; 587 587 588 588 /* Give the current vmcb to the guest */ 589 - svm_set_gif(svm, false); 590 589 591 590 nested_vmcb->save.es = vmcb->save.es; 592 591 nested_vmcb->save.cs = vmcb->save.cs; ··· 630 631 631 632 /* Restore the original control entries */ 632 633 copy_vmcb_control_area(&vmcb->control, &hsave->control); 634 + 635 + /* On vmexit the GIF is set to false */ 636 + svm_set_gif(svm, false); 633 637 634 638 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = 635 639 svm->vcpu.arch.l1_tsc_offset; ··· 1133 1131 svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa; 1134 1132 load_nested_vmcb_control(svm, &ctl); 1135 1133 nested_prepare_vmcb_control(svm); 1134 + 1135 + if (!nested_svm_vmrun_msrpm(svm)) 1136 + return -EINVAL; 1136 1137 1137 1138 out_set_gif: 1138 1139 svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+1
arch/x86/kvm/svm/sev.c
··· 1106 1106 list_for_each_safe(pos, q, head) { 1107 1107 __unregister_enc_region_locked(kvm, 1108 1108 list_entry(pos, struct enc_region, list)); 1109 + cond_resched(); 1109 1110 } 1110 1111 } 1111 1112
+21 -15
arch/x86/kvm/svm/svm.c
··· 2938 2938 if (npt_enabled) 2939 2939 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2940 2940 2941 - svm_complete_interrupts(svm); 2942 - 2943 2941 if (is_guest_mode(vcpu)) { 2944 2942 int vmexit; 2945 2943 ··· 3502 3504 stgi(); 3503 3505 3504 3506 /* Any pending NMI will happen here */ 3505 - exit_fastpath = svm_exit_handlers_fastpath(vcpu); 3506 3507 3507 3508 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3508 3509 kvm_after_interrupt(&svm->vcpu); ··· 3515 3518 } 3516 3519 3517 3520 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 3521 + vmcb_mark_all_clean(svm->vmcb); 3518 3522 3519 3523 /* if exit due to PF check for async PF */ 3520 3524 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) ··· 3535 3537 SVM_EXIT_EXCP_BASE + MC_VECTOR)) 3536 3538 svm_handle_mce(svm); 3537 3539 3538 - vmcb_mark_all_clean(svm->vmcb); 3540 + svm_complete_interrupts(svm); 3541 + exit_fastpath = svm_exit_handlers_fastpath(vcpu); 3539 3542 return exit_fastpath; 3540 3543 } 3541 3544 ··· 3899 3900 static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 3900 3901 { 3901 3902 struct vcpu_svm *svm = to_svm(vcpu); 3902 - struct vmcb *nested_vmcb; 3903 3903 struct kvm_host_map map; 3904 - u64 guest; 3905 - u64 vmcb; 3906 3904 int ret = 0; 3907 3905 3908 - guest = GET_SMSTATE(u64, smstate, 0x7ed8); 3909 - vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); 3906 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { 3907 + u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); 3908 + u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); 3909 + u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); 3910 3910 3911 - if (guest) { 3912 - if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) 3913 - return 1; 3914 - nested_vmcb = map.hva; 3915 - ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb); 3916 - kvm_vcpu_unmap(&svm->vcpu, &map, true); 3911 + if (guest) { 3912 + if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) 3913 + return 1; 3914 + 3915 + if (!(saved_efer & EFER_SVME)) 3916 + return 1; 3917 + 3918 + if (kvm_vcpu_map(&svm->vcpu, 3919 + gpa_to_gfn(vmcb), &map) == -EINVAL) 3920 + return 1; 3921 + 3922 + ret = enter_svm_guest_mode(svm, vmcb, map.hva); 3923 + kvm_vcpu_unmap(&svm->vcpu, &map, true); 3924 + } 3917 3925 } 3918 3926 3919 3927 return ret;
+9 -1
arch/x86/kvm/vmx/nested.c
··· 4404 4404 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) 4405 4405 kvm_vcpu_flush_tlb_current(vcpu); 4406 4406 4407 + /* 4408 + * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between 4409 + * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are 4410 + * up-to-date before switching to L1. 4411 + */ 4412 + if (enable_ept && is_pae_paging(vcpu)) 4413 + vmx_ept_load_pdptrs(vcpu); 4414 + 4407 4415 leave_guest_mode(vcpu); 4408 4416 4409 4417 if (nested_cpu_has_preemption_timer(vmcs12)) ··· 4676 4668 vmx->nested.msrs.entry_ctls_high &= 4677 4669 ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 4678 4670 vmx->nested.msrs.exit_ctls_high &= 4679 - ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; 4671 + ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; 4680 4672 } 4681 4673 } 4682 4674
+3 -2
arch/x86/kvm/vmx/vmx.c
··· 2971 2971 vpid_sync_context(to_vmx(vcpu)->vpid); 2972 2972 } 2973 2973 2974 - static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 2974 + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) 2975 2975 { 2976 2976 struct kvm_mmu *mmu = vcpu->arch.walk_mmu; 2977 2977 ··· 3114 3114 guest_cr3 = vcpu->arch.cr3; 3115 3115 else /* vmcs01.GUEST_CR3 is already up-to-date. */ 3116 3116 update_guest_cr3 = false; 3117 - ept_load_pdptrs(vcpu); 3117 + vmx_ept_load_pdptrs(vcpu); 3118 3118 } else { 3119 3119 guest_cr3 = pgd; 3120 3120 } ··· 6054 6054 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 6055 6055 exit_reason != EXIT_REASON_EPT_VIOLATION && 6056 6056 exit_reason != EXIT_REASON_PML_FULL && 6057 + exit_reason != EXIT_REASON_APIC_ACCESS && 6057 6058 exit_reason != EXIT_REASON_TASK_SWITCH)) { 6058 6059 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 6059 6060 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+1
arch/x86/kvm/vmx/vmx.h
··· 356 356 int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); 357 357 int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, 358 358 struct x86_exception *e); 359 + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); 359 360 360 361 #define POSTED_INTR_ON 0 361 362 #define POSTED_INTR_SN 1
+4 -1
arch/x86/kvm/x86.c
··· 2731 2731 return 1; 2732 2732 2733 2733 if (!lapic_in_kernel(vcpu)) 2734 - return 1; 2734 + return data ? 1 : 0; 2735 2735 2736 2736 vcpu->arch.apf.msr_en_val = data; 2737 2737 ··· 3577 3577 break; 3578 3578 case KVM_CAP_SMALLER_MAXPHYADDR: 3579 3579 r = (int) allow_smaller_maxphyaddr; 3580 + break; 3581 + case KVM_CAP_STEAL_TIME: 3582 + r = sched_info_on(); 3580 3583 break; 3581 3584 default: 3582 3585 break;
+26 -5
include/linux/kvm_host.h
··· 749 749 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 750 750 gpa_t gpa, unsigned long len); 751 751 752 - #define __kvm_put_guest(kvm, gfn, offset, value, type) \ 752 + #define __kvm_get_guest(kvm, gfn, offset, v) \ 753 753 ({ \ 754 754 unsigned long __addr = gfn_to_hva(kvm, gfn); \ 755 - type __user *__uaddr = (type __user *)(__addr + offset); \ 755 + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ 756 756 int __ret = -EFAULT; \ 757 757 \ 758 758 if (!kvm_is_error_hva(__addr)) \ 759 - __ret = put_user(value, __uaddr); \ 759 + __ret = get_user(v, __uaddr); \ 760 + __ret; \ 761 + }) 762 + 763 + #define kvm_get_guest(kvm, gpa, v) \ 764 + ({ \ 765 + gpa_t __gpa = gpa; \ 766 + struct kvm *__kvm = kvm; \ 767 + \ 768 + __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ 769 + offset_in_page(__gpa), v); \ 770 + }) 771 + 772 + #define __kvm_put_guest(kvm, gfn, offset, v) \ 773 + ({ \ 774 + unsigned long __addr = gfn_to_hva(kvm, gfn); \ 775 + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ 776 + int __ret = -EFAULT; \ 777 + \ 778 + if (!kvm_is_error_hva(__addr)) \ 779 + __ret = put_user(v, __uaddr); \ 760 780 if (!__ret) \ 761 781 mark_page_dirty(kvm, gfn); \ 762 782 __ret; \ 763 783 }) 764 784 765 - #define kvm_put_guest(kvm, gpa, value, type) \ 785 + #define kvm_put_guest(kvm, gpa, v) \ 766 786 ({ \ 767 787 gpa_t __gpa = gpa; \ 768 788 struct kvm *__kvm = kvm; \ 789 + \ 769 790 __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ 770 - offset_in_page(__gpa), (value), type); \ 791 + offset_in_page(__gpa), v); \ 771 792 }) 772 793 773 794 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
+4 -2
include/uapi/linux/kvm.h
··· 790 790 #define KVM_VM_PPC_HV 1 791 791 #define KVM_VM_PPC_PR 2 792 792 793 - /* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ 794 - #define KVM_VM_MIPS_TE 0 793 + /* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ 794 + #define KVM_VM_MIPS_AUTO 0 795 795 #define KVM_VM_MIPS_VZ 1 796 + #define KVM_VM_MIPS_TE 2 796 797 797 798 #define KVM_S390_SIE_PAGE_OFFSET 1 798 799 ··· 1036 1035 #define KVM_CAP_LAST_CPU 184 1037 1036 #define KVM_CAP_SMALLER_MAXPHYADDR 185 1038 1037 #define KVM_CAP_S390_DIAG318 186 1038 + #define KVM_CAP_STEAL_TIME 187 1039 1039 1040 1040 #ifdef KVM_CAP_IRQ_ROUTING 1041 1041
+12 -9
virt/kvm/kvm_main.c
··· 4332 4332 void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 4333 4333 struct kvm_io_device *dev) 4334 4334 { 4335 - int i; 4335 + int i, j; 4336 4336 struct kvm_io_bus *new_bus, *bus; 4337 4337 4338 4338 bus = kvm_get_bus(kvm, bus_idx); ··· 4349 4349 4350 4350 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), 4351 4351 GFP_KERNEL_ACCOUNT); 4352 - if (!new_bus) { 4352 + if (new_bus) { 4353 + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); 4354 + new_bus->dev_count--; 4355 + memcpy(new_bus->range + i, bus->range + i + 1, 4356 + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); 4357 + } else { 4353 4358 pr_err("kvm: failed to shrink bus, removing it completely\n"); 4354 - goto broken; 4359 + for (j = 0; j < bus->dev_count; j++) { 4360 + if (j == i) 4361 + continue; 4362 + kvm_iodevice_destructor(bus->range[j].dev); 4363 + } 4355 4364 } 4356 4365 4357 - memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); 4358 - new_bus->dev_count--; 4359 - memcpy(new_bus->range + i, bus->range + i + 1, 4360 - (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); 4361 - 4362 - broken: 4363 4366 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 4364 4367 synchronize_srcu_expedited(&kvm->srcu); 4365 4368 kfree(bus);