Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:

- x86 bugfixes: APIC, nested virtualization, IOAPIC

- PPC bugfix: HPT guests on a POWER9 radix host

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits)
KVM: Let KVM_SET_SIGNAL_MASK work as advertised
KVM: VMX: Fix vmx->nested freeing when no SMI handler
KVM: VMX: Fix rflags cache during vCPU reset
KVM: X86: Fix softlockup when get the current kvmclock
KVM: lapic: Fixup LDR on load in x2apic
KVM: lapic: Split out x2apic ldr calculation
KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts
KVM: vmx: use X86_CR4_UMIP and X86_FEATURE_UMIP
KVM: x86: Fix CPUID function for word 6 (80000001_ECX)
KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2
KVM: x86: ioapic: Preserve read-only values in the redirection table
KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered
KVM: x86: ioapic: Remove redundant check for Remote IRR in ioapic_set_irq
KVM: x86: ioapic: Don't fire level irq when Remote IRR set
KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race
KVM: x86: inject exceptions produced by x86_decode_insn
KVM: x86: Allow suppressing prints on RDMSR/WRMSR of unhandled MSRs
KVM: x86: fix em_fxstor() sleeping while in atomic
KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure
KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry
...

+214 -98
+2 -5
arch/mips/kvm/mips.c
··· 445 445 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 446 446 { 447 447 int r = -EINTR; 448 - sigset_t sigsaved; 449 448 450 - if (vcpu->sigset_active) 451 - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 449 + kvm_sigset_activate(vcpu); 452 450 453 451 if (vcpu->mmio_needed) { 454 452 if (!vcpu->mmio_is_write) ··· 478 480 local_irq_enable(); 479 481 480 482 out: 481 - if (vcpu->sigset_active) 482 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 483 + kvm_sigset_deactivate(vcpu); 483 484 484 485 return r; 485 486 }
+1
arch/powerpc/include/asm/kvm_ppc.h
··· 180 180 struct iommu_group *grp); 181 181 extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm); 182 182 extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); 183 + extern void kvmppc_setup_partition_table(struct kvm *kvm); 183 184 184 185 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 185 186 struct kvm_create_spapr_tce_64 *args);
+23 -14
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 1238 1238 unsigned long vpte, rpte, guest_rpte; 1239 1239 int ret; 1240 1240 struct revmap_entry *rev; 1241 - unsigned long apsize, psize, avpn, pteg, hash; 1241 + unsigned long apsize, avpn, pteg, hash; 1242 1242 unsigned long new_idx, new_pteg, replace_vpte; 1243 + int pshift; 1243 1244 1244 1245 hptep = (__be64 *)(old->virt + (idx << 4)); 1245 1246 ··· 1299 1298 goto out; 1300 1299 1301 1300 rpte = be64_to_cpu(hptep[1]); 1302 - psize = hpte_base_page_size(vpte, rpte); 1303 - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); 1301 + pshift = kvmppc_hpte_base_page_shift(vpte, rpte); 1302 + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23); 1304 1303 pteg = idx / HPTES_PER_GROUP; 1305 1304 if (vpte & HPTE_V_SECONDARY) 1306 1305 pteg = ~pteg; ··· 1312 1311 offset = (avpn & 0x1f) << 23; 1313 1312 vsid = avpn >> 5; 1314 1313 /* We can find more bits from the pteg value */ 1315 - if (psize < (1ULL << 23)) 1316 - offset |= ((vsid ^ pteg) & old_hash_mask) * psize; 1314 + if (pshift < 23) 1315 + offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; 1317 1316 1318 - hash = vsid ^ (offset / psize); 1317 + hash = vsid ^ (offset >> pshift); 1319 1318 } else { 1320 1319 unsigned long offset, vsid; 1321 1320 1322 1321 /* We only have 40 - 23 bits of seg_off in avpn */ 1323 1322 offset = (avpn & 0x1ffff) << 23; 1324 1323 vsid = avpn >> 17; 1325 - if (psize < (1ULL << 23)) 1326 - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; 1324 + if (pshift < 23) 1325 + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; 1327 1326 1328 - hash = vsid ^ (vsid << 25) ^ (offset / psize); 1327 + hash = vsid ^ (vsid << 25) ^ (offset >> pshift); 1329 1328 } 1330 1329 1331 1330 new_pteg = hash & new_hash_mask; ··· 1802 1801 ssize_t nb; 1803 1802 long int err, ret; 1804 1803 int mmu_ready; 1804 + int pshift; 1805 1805 1806 1806 if (!access_ok(VERIFY_READ, buf, count)) 1807 1807 return -EFAULT; ··· 1857 1855 err = -EINVAL; 1858 1856 if (!(v & HPTE_V_VALID)) 1859 1857 goto out; 1858 + pshift = kvmppc_hpte_base_page_shift(v, r); 1859 + if (pshift <= 0) 1860 + goto out; 1860 1861 lbuf += 2; 1861 1862 nb += HPTE_SIZE; 1862 1863 ··· 1874 1869 goto out; 1875 1870 } 1876 1871 if (!mmu_ready && is_vrma_hpte(v)) { 1877 - unsigned long psize = hpte_base_page_size(v, r); 1878 - unsigned long senc = slb_pgsize_encoding(psize); 1879 - unsigned long lpcr; 1872 + unsigned long senc, lpcr; 1880 1873 1874 + senc = slb_pgsize_encoding(1ul << pshift); 1881 1875 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1882 1876 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1883 - lpcr = senc << (LPCR_VRMASD_SH - 4); 1884 - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1877 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1878 + lpcr = senc << (LPCR_VRMASD_SH - 4); 1879 + kvmppc_update_lpcr(kvm, lpcr, 1880 + LPCR_VRMASD); 1881 + } else { 1882 + kvmppc_setup_partition_table(kvm); 1883 + } 1885 1884 mmu_ready = 1; 1886 1885 } 1887 1886 ++i;
+1 -2
arch/powerpc/kvm/book3s_hv.c
··· 120 120 121 121 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 122 122 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 123 - static void kvmppc_setup_partition_table(struct kvm *kvm); 124 123 125 124 static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, 126 125 int *ip) ··· 3573 3574 return; 3574 3575 } 3575 3576 3576 - static void kvmppc_setup_partition_table(struct kvm *kvm) 3577 + void kvmppc_setup_partition_table(struct kvm *kvm) 3577 3578 { 3578 3579 unsigned long dw0, dw1; 3579 3580
+2 -5
arch/powerpc/kvm/powerpc.c
··· 1407 1407 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 1408 1408 { 1409 1409 int r; 1410 - sigset_t sigsaved; 1411 1410 1412 1411 if (vcpu->mmio_needed) { 1413 1412 vcpu->mmio_needed = 0; ··· 1447 1448 #endif 1448 1449 } 1449 1450 1450 - if (vcpu->sigset_active) 1451 - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1451 + kvm_sigset_activate(vcpu); 1452 1452 1453 1453 if (run->immediate_exit) 1454 1454 r = -EINTR; 1455 1455 else 1456 1456 r = kvmppc_vcpu_run(run, vcpu); 1457 1457 1458 - if (vcpu->sigset_active) 1459 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1458 + kvm_sigset_deactivate(vcpu); 1460 1459 1461 1460 return r; 1462 1461 }
+2 -5
arch/s390/kvm/kvm-s390.c
··· 3372 3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3373 3373 { 3374 3374 int rc; 3375 - sigset_t sigsaved; 3376 3375 3377 3376 if (kvm_run->immediate_exit) 3378 3377 return -EINTR; ··· 3381 3382 return 0; 3382 3383 } 3383 3384 3384 - if (vcpu->sigset_active) 3385 - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 3385 + kvm_sigset_activate(vcpu); 3386 3386 3387 3387 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { 3388 3388 kvm_s390_vcpu_start(vcpu); ··· 3415 3417 disable_cpu_timer_accounting(vcpu); 3416 3418 store_regs(vcpu, kvm_run); 3417 3419 3418 - if (vcpu->sigset_active) 3419 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 3420 + kvm_sigset_deactivate(vcpu); 3420 3421 3421 3422 vcpu->stat.exit_userspace++; 3422 3423 return rc;
+2 -1
arch/x86/include/asm/kvm_host.h
··· 1161 1161 static inline int emulate_instruction(struct kvm_vcpu *vcpu, 1162 1162 int emulation_type) 1163 1163 { 1164 - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); 1164 + return x86_emulate_instruction(vcpu, 0, 1165 + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); 1165 1166 } 1166 1167 1167 1168 void kvm_enable_efer_bits(u64);
+1 -1
arch/x86/kvm/cpuid.h
··· 44 44 [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, 45 45 [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, 46 46 [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, 47 - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, 47 + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, 48 48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, 49 49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, 50 50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
+33 -6
arch/x86/kvm/emulate.c
··· 4014 4014 fxstate_size(ctxt)); 4015 4015 } 4016 4016 4017 + /* 4018 + * FXRSTOR might restore XMM registers not provided by the guest. Fill 4019 + * in the host registers (via FXSAVE) instead, so they won't be modified. 4020 + * (preemption has to stay disabled until FXRSTOR). 4021 + * 4022 + * Use noinline to keep the stack for other functions called by callers small. 4023 + */ 4024 + static noinline int fxregs_fixup(struct fxregs_state *fx_state, 4025 + const size_t used_size) 4026 + { 4027 + struct fxregs_state fx_tmp; 4028 + int rc; 4029 + 4030 + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); 4031 + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, 4032 + __fxstate_size(16) - used_size); 4033 + 4034 + return rc; 4035 + } 4036 + 4017 4037 static int em_fxrstor(struct x86_emulate_ctxt *ctxt) 4018 4038 { 4019 4039 struct fxregs_state fx_state; ··· 4044 4024 if (rc != X86EMUL_CONTINUE) 4045 4025 return rc; 4046 4026 4027 + size = fxstate_size(ctxt); 4028 + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); 4029 + if (rc != X86EMUL_CONTINUE) 4030 + return rc; 4031 + 4047 4032 ctxt->ops->get_fpu(ctxt); 4048 4033 4049 - size = fxstate_size(ctxt); 4050 4034 if (size < __fxstate_size(16)) { 4051 - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); 4035 + rc = fxregs_fixup(&fx_state, size); 4052 4036 if (rc != X86EMUL_CONTINUE) 4053 4037 goto out; 4054 4038 } 4055 - 4056 - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); 4057 - if (rc != X86EMUL_CONTINUE) 4058 - goto out; 4059 4039 4060 4040 if (fx_state.mxcsr >> 16) { 4061 4041 rc = emulate_gp(ctxt, 0); ··· 5020 5000 bool op_prefix = false; 5021 5001 bool has_seg_override = false; 5022 5002 struct opcode opcode; 5003 + u16 dummy; 5004 + struct desc_struct desc; 5023 5005 5024 5006 ctxt->memop.type = OP_NONE; 5025 5007 ctxt->memopp = NULL; ··· 5040 5018 switch (mode) { 5041 5019 case X86EMUL_MODE_REAL: 5042 5020 case X86EMUL_MODE_VM86: 5021 + def_op_bytes = def_ad_bytes = 2; 5022 + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); 5023 + if (desc.d) 5024 + def_op_bytes = def_ad_bytes = 4; 5025 + break; 5043 5026 case X86EMUL_MODE_PROT16: 5044 5027 def_op_bytes = def_ad_bytes = 2; 5045 5028 break;
+25 -9
arch/x86/kvm/ioapic.c
··· 209 209 210 210 old_irr = ioapic->irr; 211 211 ioapic->irr |= mask; 212 - if (edge) 212 + if (edge) { 213 213 ioapic->irr_delivered &= ~mask; 214 - if ((edge && old_irr == ioapic->irr) || 215 - (!edge && entry.fields.remote_irr)) { 216 - ret = 0; 217 - goto out; 214 + if (old_irr == ioapic->irr) { 215 + ret = 0; 216 + goto out; 217 + } 218 218 } 219 219 220 220 ret = ioapic_service(ioapic, irq, line_status); ··· 257 257 index == RTC_GSI) { 258 258 if (kvm_apic_match_dest(vcpu, NULL, 0, 259 259 e->fields.dest_id, e->fields.dest_mode) || 260 - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && 261 - kvm_apic_pending_eoi(vcpu, e->fields.vector))) 260 + kvm_apic_pending_eoi(vcpu, e->fields.vector)) 262 261 __set_bit(e->fields.vector, 263 262 ioapic_handled_vectors); 264 263 } ··· 276 277 { 277 278 unsigned index; 278 279 bool mask_before, mask_after; 280 + int old_remote_irr, old_delivery_status; 279 281 union kvm_ioapic_redirect_entry *e; 280 282 281 283 switch (ioapic->ioregsel) { ··· 299 299 return; 300 300 e = &ioapic->redirtbl[index]; 301 301 mask_before = e->fields.mask; 302 + /* Preserve read-only fields */ 303 + old_remote_irr = e->fields.remote_irr; 304 + old_delivery_status = e->fields.delivery_status; 302 305 if (ioapic->ioregsel & 1) { 303 306 e->bits &= 0xffffffff; 304 307 e->bits |= (u64) val << 32; 305 308 } else { 306 309 e->bits &= ~0xffffffffULL; 307 310 e->bits |= (u32) val; 308 - e->fields.remote_irr = 0; 309 311 } 312 + e->fields.remote_irr = old_remote_irr; 313 + e->fields.delivery_status = old_delivery_status; 314 + 315 + /* 316 + * Some OSes (Linux, Xen) assume that Remote IRR bit will 317 + * be cleared by IOAPIC hardware when the entry is configured 318 + * as edge-triggered. This behavior is used to simulate an 319 + * explicit EOI on IOAPICs that don't have the EOI register. 320 + */ 321 + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) 322 + e->fields.remote_irr = 0; 323 + 310 324 mask_after = e->fields.mask; 311 325 if (mask_before != mask_after) 312 326 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); ··· 338 324 struct kvm_lapic_irq irqe; 339 325 int ret; 340 326 341 - if (entry->fields.mask) 327 + if (entry->fields.mask || 328 + (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && 329 + entry->fields.remote_irr)) 342 330 return -1; 343 331 344 332 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+11 -1
arch/x86/kvm/lapic.c
··· 266 266 recalculate_apic_map(apic->vcpu->kvm); 267 267 } 268 268 269 + static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 270 + { 271 + return ((id >> 4) << 16) | (1 << (id & 0xf)); 272 + } 273 + 269 274 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 270 275 { 271 - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 276 + u32 ldr = kvm_apic_calc_x2apic_ldr(id); 272 277 273 278 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 274 279 ··· 2250 2245 { 2251 2246 if (apic_x2apic_mode(vcpu->arch.apic)) { 2252 2247 u32 *id = (u32 *)(s->regs + APIC_ID); 2248 + u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2253 2249 2254 2250 if (vcpu->kvm->arch.x2apic_format) { 2255 2251 if (*id != vcpu->vcpu_id) ··· 2261 2255 else 2262 2256 *id <<= 24; 2263 2257 } 2258 + 2259 + /* In x2APIC mode, the LDR is fixed and based on the id */ 2260 + if (set) 2261 + *ldr = kvm_apic_calc_x2apic_ldr(*id); 2264 2262 } 2265 2263 2266 2264 return 0;
+10 -1
arch/x86/kvm/svm.c
··· 361 361 { 362 362 struct vmcb_control_area *c, *h; 363 363 struct nested_state *g; 364 + u32 h_intercept_exceptions; 364 365 365 366 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 366 367 ··· 372 371 h = &svm->nested.hsave->control; 373 372 g = &svm->nested; 374 373 374 + /* No need to intercept #UD if L1 doesn't intercept it */ 375 + h_intercept_exceptions = 376 + h->intercept_exceptions & ~(1U << UD_VECTOR); 377 + 375 378 c->intercept_cr = h->intercept_cr | g->intercept_cr; 376 379 c->intercept_dr = h->intercept_dr | g->intercept_dr; 377 - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; 380 + c->intercept_exceptions = 381 + h_intercept_exceptions | g->intercept_exceptions; 378 382 c->intercept = h->intercept | g->intercept; 379 383 } 380 384 ··· 2202 2196 { 2203 2197 int er; 2204 2198 2199 + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); 2205 2200 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 2201 + if (er == EMULATE_USER_EXIT) 2202 + return 0; 2206 2203 if (er != EMULATE_DONE) 2207 2204 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2208 2205 return 1;
+44 -29
arch/x86/kvm/vmx.c
··· 1887 1887 { 1888 1888 u32 eb; 1889 1889 1890 - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 1890 + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | 1891 1891 (1u << DB_VECTOR) | (1u << AC_VECTOR); 1892 1892 if ((vcpu->guest_debug & 1893 1893 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == ··· 1905 1905 */ 1906 1906 if (is_guest_mode(vcpu)) 1907 1907 eb |= get_vmcs12(vcpu)->exception_bitmap; 1908 + else 1909 + eb |= 1u << UD_VECTOR; 1908 1910 1909 1911 vmcs_write32(EXCEPTION_BITMAP, eb); 1910 1912 } ··· 5602 5600 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 5603 5601 } 5604 5602 5605 - vmcs_writel(GUEST_RFLAGS, 0x02); 5603 + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); 5606 5604 kvm_rip_write(vcpu, 0xfff0); 5607 5605 5608 5606 vmcs_writel(GUEST_GDTR_BASE, 0); ··· 5917 5915 return 1; /* already handled by vmx_vcpu_run() */ 5918 5916 5919 5917 if (is_invalid_opcode(intr_info)) { 5920 - if (is_guest_mode(vcpu)) { 5921 - kvm_queue_exception(vcpu, UD_VECTOR); 5922 - return 1; 5923 - } 5918 + WARN_ON_ONCE(is_guest_mode(vcpu)); 5924 5919 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); 5920 + if (er == EMULATE_USER_EXIT) 5921 + return 0; 5925 5922 if (er != EMULATE_DONE) 5926 5923 kvm_queue_exception(vcpu, UD_VECTOR); 5927 5924 return 1; ··· 6603 6602 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 6604 6603 return 1; 6605 6604 6606 - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); 6605 + err = emulate_instruction(vcpu, 0); 6607 6606 6608 6607 if (err == EMULATE_USER_EXIT) { 6609 6608 ++vcpu->stat.mmio_exits; ··· 7415 7414 */ 7416 7415 static void free_nested(struct vcpu_vmx *vmx) 7417 7416 { 7418 - if (!vmx->nested.vmxon) 7417 + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) 7419 7418 return; 7420 7419 7421 7420 vmx->nested.vmxon = false; 7421 + vmx->nested.smm.vmxon = false; 7422 7422 free_vpid(vmx->nested.vpid02); 7423 7423 vmx->nested.posted_intr_nv = -1; 7424 7424 vmx->nested.current_vmptr = -1ull; ··· 9802 9800 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); 9803 9801 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); 9804 9802 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); 9805 - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ 9806 - cr4_fixed1_update(bit(11), ecx, bit(2)); 9803 + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); 9807 9804 9808 9805 #undef cr4_fixed1_update 9809 9806 } ··· 10876 10875 return 1; 10877 10876 } 10878 10877 10878 + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && 10879 + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || 10880 + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) 10881 + return 1; 10882 + 10879 10883 return 0; 10880 10884 } 10881 10885 ··· 11105 11099 { 11106 11100 struct vcpu_vmx *vmx = to_vmx(vcpu); 11107 11101 unsigned long exit_qual; 11108 - 11109 - if (kvm_event_needs_reinjection(vcpu)) 11110 - return -EBUSY; 11102 + bool block_nested_events = 11103 + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); 11111 11104 11112 11105 if (vcpu->arch.exception.pending && 11113 11106 nested_vmx_check_exception(vcpu, &exit_qual)) { 11114 - if (vmx->nested.nested_run_pending) 11107 + if (block_nested_events) 11115 11108 return -EBUSY; 11116 11109 nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 11117 11110 vcpu->arch.exception.pending = false; ··· 11119 11114 11120 11115 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 11121 11116 vmx->nested.preemption_timer_expired) { 11122 - if (vmx->nested.nested_run_pending) 11117 + if (block_nested_events) 11123 11118 return -EBUSY; 11124 11119 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); 11125 11120 return 0; 11126 11121 } 11127 11122 11128 11123 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { 11129 - if (vmx->nested.nested_run_pending) 11124 + if (block_nested_events) 11130 11125 return -EBUSY; 11131 11126 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 11132 11127 NMI_VECTOR | INTR_TYPE_NMI_INTR | ··· 11142 11137 11143 11138 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && 11144 11139 nested_exit_on_intr(vcpu)) { 11145 - if (vmx->nested.nested_run_pending) 11140 + if (block_nested_events) 11146 11141 return -EBUSY; 11147 11142 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 11148 11143 return 0; ··· 11329 11324 kvm_clear_interrupt_queue(vcpu); 11330 11325 } 11331 11326 11327 + static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, 11328 + struct vmcs12 *vmcs12) 11329 + { 11330 + u32 entry_failure_code; 11331 + 11332 + nested_ept_uninit_mmu_context(vcpu); 11333 + 11334 + /* 11335 + * Only PDPTE load can fail as the value of cr3 was checked on entry and 11336 + * couldn't have changed. 11337 + */ 11338 + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) 11339 + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); 11340 + 11341 + if (!enable_ept) 11342 + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 11343 + } 11344 + 11332 11345 /* 11333 11346 * A part of what we need to when the nested L2 guest exits and we want to 11334 11347 * run its L1 parent, is to reset L1's guest state to the host state specified ··· 11360 11337 struct vmcs12 *vmcs12) 11361 11338 { 11362 11339 struct kvm_segment seg; 11363 - u32 entry_failure_code; 11364 11340 11365 11341 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 11366 11342 vcpu->arch.efer = vmcs12->host_ia32_efer; ··· 11386 11364 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 11387 11365 vmx_set_cr4(vcpu, vmcs12->host_cr4); 11388 11366 11389 - nested_ept_uninit_mmu_context(vcpu); 11390 - 11391 - /* 11392 - * Only PDPTE load can fail as the value of cr3 was checked on entry and 11393 - * couldn't have changed. 11394 - */ 11395 - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) 11396 - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); 11397 - 11398 - if (!enable_ept) 11399 - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 11367 + load_vmcs12_mmu_host_state(vcpu, vmcs12); 11400 11368 11401 11369 if (enable_vpid) { 11402 11370 /* ··· 11616 11604 * accordingly. 11617 11605 */ 11618 11606 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 11607 + 11608 + load_vmcs12_mmu_host_state(vcpu, vmcs12); 11609 + 11619 11610 /* 11620 11611 * The emulated instruction was already skipped in 11621 11612 * nested_vmx_run, but the updated RIP was never
+28 -14
arch/x86/kvm/x86.c
··· 107 107 static bool __read_mostly ignore_msrs = 0; 108 108 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 109 109 110 + static bool __read_mostly report_ignored_msrs = true; 111 + module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); 112 + 110 113 unsigned int min_timer_period_us = 500; 111 114 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 112 115 ··· 1798 1795 /* both __this_cpu_read() and rdtsc() should be on the same cpu */ 1799 1796 get_cpu(); 1800 1797 1801 - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, 1802 - &hv_clock.tsc_shift, 1803 - &hv_clock.tsc_to_system_mul); 1804 - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); 1798 + if (__this_cpu_read(cpu_tsc_khz)) { 1799 + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, 1800 + &hv_clock.tsc_shift, 1801 + &hv_clock.tsc_to_system_mul); 1802 + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); 1803 + } else 1804 + ret = ktime_get_boot_ns() + ka->kvmclock_offset; 1805 1805 1806 1806 put_cpu(); 1807 1807 ··· 1835 1829 * version field is the first in the struct. 1836 1830 */ 1837 1831 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 1832 + 1833 + if (guest_hv_clock.version & 1) 1834 + ++guest_hv_clock.version; /* first time write, random junk */ 1838 1835 1839 1836 vcpu->hv_clock.version = guest_hv_clock.version + 1; 1840 1837 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, ··· 2331 2322 /* Drop writes to this legacy MSR -- see rdmsr 2332 2323 * counterpart for further detail. 2333 2324 */ 2334 - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); 2325 + if (report_ignored_msrs) 2326 + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", 2327 + msr, data); 2335 2328 break; 2336 2329 case MSR_AMD64_OSVW_ID_LENGTH: 2337 2330 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) ··· 2370 2359 msr, data); 2371 2360 return 1; 2372 2361 } else { 2373 - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", 2374 - msr, data); 2362 + if (report_ignored_msrs) 2363 + vcpu_unimpl(vcpu, 2364 + "ignored wrmsr: 0x%x data 0x%llx\n", 2365 + msr, data); 2375 2366 break; 2376 2367 } 2377 2368 } ··· 2591 2578 msr_info->index); 2592 2579 return 1; 2593 2580 } else { 2594 - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); 2581 + if (report_ignored_msrs) 2582 + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", 2583 + msr_info->index); 2595 2584 msr_info->data = 0; 2596 2585 } 2597 2586 break; ··· 5445 5430 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 5446 5431 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 5447 5432 vcpu->run->internal.ndata = 0; 5448 - r = EMULATE_FAIL; 5433 + r = EMULATE_USER_EXIT; 5449 5434 } 5450 5435 kvm_queue_exception(vcpu, UD_VECTOR); 5451 5436 ··· 5736 5721 return EMULATE_FAIL; 5737 5722 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, 5738 5723 emulation_type)) 5724 + return EMULATE_DONE; 5725 + if (ctxt->have_exception && inject_emulated_exception(vcpu)) 5739 5726 return EMULATE_DONE; 5740 5727 if (emulation_type & EMULTYPE_SKIP) 5741 5728 return EMULATE_FAIL; ··· 7267 7250 { 7268 7251 struct fpu *fpu = &current->thread.fpu; 7269 7252 int r; 7270 - sigset_t sigsaved; 7271 7253 7272 7254 fpu__initialize(fpu); 7273 7255 7274 - if (vcpu->sigset_active) 7275 - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 7256 + kvm_sigset_activate(vcpu); 7276 7257 7277 7258 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7278 7259 if (kvm_run->immediate_exit) { ··· 7313 7298 7314 7299 out: 7315 7300 post_kvm_run_save(vcpu); 7316 - if (vcpu->sigset_active) 7317 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 7301 + kvm_sigset_deactivate(vcpu); 7318 7302 7319 7303 return r; 7320 7304 }
+3
include/linux/kvm_host.h
··· 715 715 unsigned long len); 716 716 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); 717 717 718 + void kvm_sigset_activate(struct kvm_vcpu *vcpu); 719 + void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); 720 + 718 721 void kvm_vcpu_block(struct kvm_vcpu *vcpu); 719 722 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); 720 723 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
+3 -5
virt/kvm/arm/arm.c
··· 615 615 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 616 616 { 617 617 int ret; 618 - sigset_t sigsaved; 619 618 620 619 if (unlikely(!kvm_vcpu_initialized(vcpu))) 621 620 return -ENOEXEC; ··· 632 633 if (run->immediate_exit) 633 634 return -EINTR; 634 635 635 - if (vcpu->sigset_active) 636 - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 636 + kvm_sigset_activate(vcpu); 637 637 638 638 ret = 1; 639 639 run->exit_reason = KVM_EXIT_UNKNOWN; ··· 767 769 kvm_pmu_update_run(vcpu); 768 770 } 769 771 770 - if (vcpu->sigset_active) 771 - sigprocmask(SIG_SETMASK, &sigsaved, NULL); 772 + kvm_sigset_deactivate(vcpu); 773 + 772 774 return ret; 773 775 } 774 776
+23
virt/kvm/kvm_main.c
··· 2065 2065 } 2066 2066 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); 2067 2067 2068 + void kvm_sigset_activate(struct kvm_vcpu *vcpu) 2069 + { 2070 + if (!vcpu->sigset_active) 2071 + return; 2072 + 2073 + /* 2074 + * This does a lockless modification of ->real_blocked, which is fine 2075 + * because, only current can change ->real_blocked and all readers of 2076 + * ->real_blocked don't care as long ->real_blocked is always a subset 2077 + * of ->blocked. 2078 + */ 2079 + sigprocmask(SIG_SETMASK, &vcpu->sigset, &current->real_blocked); 2080 + } 2081 + 2082 + void kvm_sigset_deactivate(struct kvm_vcpu *vcpu) 2083 + { 2084 + if (!vcpu->sigset_active) 2085 + return; 2086 + 2087 + sigprocmask(SIG_SETMASK, &current->real_blocked, NULL); 2088 + sigemptyset(&current->real_blocked); 2089 + } 2090 + 2068 2091 static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) 2069 2092 { 2070 2093 unsigned int old, val, grow;