Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"x86:

- Fixes for Xen emulation. While nobody should be enabling it in the
kernel (the only public users of the feature are the selftests),
the bug effectively allows userspace to read arbitrary memory.

- Correctness fixes for nested hypervisors that do not intercept INIT
or SHUTDOWN on AMD; the subsequent CPU reset can cause a
use-after-free when it disables virtualization extensions. While
downgrading the panic to a WARN is quite easy, the full fix is a
bit more laborious; there are also tests. This is the bulk of the
pull request.

- Fix race condition due to incorrect mmu_lock use around
make_mmu_pages_available().

Generic:

- Obey changes to the kvm.halt_poll_ns module parameter in VMs not
using KVM_CAP_HALT_POLL, restoring behavior from before the
introduction of the capability"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: Update gfn_to_pfn_cache khva when it moves within the same page
KVM: x86/xen: Only do in-kernel acceleration of hypercalls for guest CPL0
KVM: x86/xen: Validate port number in SCHEDOP_poll
KVM: x86/mmu: Fix race condition in direct_page_fault
KVM: x86: remove exit_int_info warning in svm_handle_exit
KVM: selftests: add svm part to triple_fault_test
KVM: x86: allow L1 to not intercept triple fault
kvm: selftests: add svm nested shutdown test
KVM: selftests: move idt_entry to header
KVM: x86: forcibly leave nested mode on vCPU reset
KVM: x86: add kvm_leave_nested
KVM: x86: nSVM: harden svm_free_nested against freeing vmcb02 while still in use
KVM: x86: nSVM: leave nested mode on vCPU free
KVM: Obey kvm.halt_poll_ns in VMs not using KVM_CAP_HALT_POLL
KVM: Avoid re-reading kvm->max_halt_poll_ns during halt-polling
KVM: Cap vcpu->halt_poll_ns before halting rather than after

+251 -83
+7 -6
arch/x86/kvm/mmu/mmu.c
··· 2443 2443 { 2444 2444 bool list_unstable, zapped_root = false; 2445 2445 2446 + lockdep_assert_held_write(&kvm->mmu_lock); 2446 2447 trace_kvm_mmu_prepare_zap_page(sp); 2447 2448 ++kvm->stat.mmu_shadow_zapped; 2448 2449 *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); ··· 4263 4262 if (is_page_fault_stale(vcpu, fault, mmu_seq)) 4264 4263 goto out_unlock; 4265 4264 4266 - r = make_mmu_pages_available(vcpu); 4267 - if (r) 4268 - goto out_unlock; 4269 - 4270 - if (is_tdp_mmu_fault) 4265 + if (is_tdp_mmu_fault) { 4271 4266 r = kvm_tdp_mmu_map(vcpu, fault); 4272 - else 4267 + } else { 4268 + r = make_mmu_pages_available(vcpu); 4269 + if (r) 4270 + goto out_unlock; 4273 4271 r = __direct_map(vcpu, fault); 4272 + } 4274 4273 4275 4274 out_unlock: 4276 4275 if (is_tdp_mmu_fault)
+9 -3
arch/x86/kvm/svm/nested.c
··· 1091 1091 1092 1092 static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) 1093 1093 { 1094 + struct vcpu_svm *svm = to_svm(vcpu); 1095 + 1096 + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) 1097 + return; 1098 + 1099 + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); 1094 1100 nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); 1095 1101 } 1096 1102 ··· 1131 1125 if (!svm->nested.initialized) 1132 1126 return; 1133 1127 1128 + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) 1129 + svm_switch_vmcb(svm, &svm->vmcb01); 1130 + 1134 1131 svm_vcpu_free_msrpm(svm->nested.msrpm); 1135 1132 svm->nested.msrpm = NULL; 1136 1133 ··· 1152 1143 svm->nested.initialized = false; 1153 1144 } 1154 1145 1155 - /* 1156 - * Forcibly leave nested mode in order to be able to reset the VCPU later on. 1157 - */ 1158 1146 void svm_leave_nested(struct kvm_vcpu *vcpu) 1159 1147 { 1160 1148 struct vcpu_svm *svm = to_svm(vcpu);
+1 -15
arch/x86/kvm/svm/svm.c
··· 346 346 return 0; 347 347 } 348 348 349 - static int is_external_interrupt(u32 info) 350 - { 351 - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 352 - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 353 - } 354 - 355 349 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) 356 350 { 357 351 struct vcpu_svm *svm = to_svm(vcpu); ··· 1432 1438 */ 1433 1439 svm_clear_current_vmcb(svm->vmcb); 1434 1440 1441 + svm_leave_nested(vcpu); 1435 1442 svm_free_nested(svm); 1436 1443 1437 1444 sev_free_vcpu(vcpu); ··· 3419 3424 dump_vmcb(vcpu); 3420 3425 return 0; 3421 3426 } 3422 - 3423 - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 3424 - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && 3425 - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && 3426 - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) 3427 - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " 3428 - "exit_code 0x%x\n", 3429 - __func__, svm->vmcb->control.exit_int_info, 3430 - exit_code); 3431 3427 3432 3428 if (exit_fastpath != EXIT_FASTPATH_NONE) 3433 3429 return 1;
+1 -3
arch/x86/kvm/vmx/nested.c
··· 4854 4854 4855 4855 static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) 4856 4856 { 4857 + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); 4857 4858 nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); 4858 4859 } 4859 4860 ··· 6441 6440 return kvm_state.size; 6442 6441 } 6443 6442 6444 - /* 6445 - * Forcibly leave nested mode in order to be able to reset the VCPU later on. 6446 - */ 6447 6443 void vmx_leave_nested(struct kvm_vcpu *vcpu) 6448 6444 { 6449 6445 if (is_guest_mode(vcpu)) {
+23 -6
arch/x86/kvm/x86.c
··· 628 628 ex->payload = payload; 629 629 } 630 630 631 + /* Forcibly leave the nested mode in cases like a vCPU reset */ 632 + static void kvm_leave_nested(struct kvm_vcpu *vcpu) 633 + { 634 + kvm_x86_ops.nested_ops->leave_nested(vcpu); 635 + } 636 + 631 637 static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 632 638 unsigned nr, bool has_error, u32 error_code, 633 639 bool has_payload, unsigned long payload, bool reinject) ··· 5201 5195 5202 5196 if (events->flags & KVM_VCPUEVENT_VALID_SMM) { 5203 5197 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { 5204 - kvm_x86_ops.nested_ops->leave_nested(vcpu); 5198 + kvm_leave_nested(vcpu); 5205 5199 kvm_smm_changed(vcpu, events->smi.smm); 5206 5200 } 5207 5201 ··· 9811 9805 9812 9806 int kvm_check_nested_events(struct kvm_vcpu *vcpu) 9813 9807 { 9814 - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 9808 + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 9815 9809 kvm_x86_ops.nested_ops->triple_fault(vcpu); 9816 9810 return 1; 9817 9811 } ··· 10566 10560 r = 0; 10567 10561 goto out; 10568 10562 } 10569 - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 10570 - if (is_guest_mode(vcpu)) { 10563 + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 10564 + if (is_guest_mode(vcpu)) 10571 10565 kvm_x86_ops.nested_ops->triple_fault(vcpu); 10572 - } else { 10566 + 10567 + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { 10573 10568 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 10574 10569 vcpu->mmio_needed = 0; 10575 10570 r = 0; 10576 - goto out; 10577 10571 } 10572 + goto out; 10578 10573 } 10579 10574 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { 10580 10575 /* Page is swapped out. Do synthetic halt */ ··· 12004 11997 WARN_ON_ONCE(!init_event && 12005 11998 (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); 12006 11999 12000 + /* 12001 + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's 12002 + * possible to INIT the vCPU while L2 is active. Force the vCPU back 12003 + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER 12004 + * bits), i.e. virtualization is disabled. 12005 + */ 12006 + if (is_guest_mode(vcpu)) 12007 + kvm_leave_nested(vcpu); 12008 + 12007 12009 kvm_lapic_reset(vcpu, init_event); 12008 12010 12011 + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); 12009 12012 vcpu->arch.hflags = 0; 12010 12013 12011 12014 vcpu->arch.smi_pending = 0;
+23 -9
arch/x86/kvm/xen.c
··· 954 954 return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); 955 955 } 956 956 957 + static inline int max_evtchn_port(struct kvm *kvm) 958 + { 959 + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) 960 + return EVTCHN_2L_NR_CHANNELS; 961 + else 962 + return COMPAT_EVTCHN_2L_NR_CHANNELS; 963 + } 964 + 957 965 static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, 958 966 evtchn_port_t *ports) 959 967 { ··· 1048 1040 if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, 1049 1041 &ports[i], sizeof(port))) { 1050 1042 *r = -EFAULT; 1043 + goto out; 1044 + } 1045 + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { 1046 + *r = -EINVAL; 1051 1047 goto out; 1052 1048 } 1053 1049 } ··· 1227 1215 bool longmode; 1228 1216 u64 input, params[6], r = -ENOSYS; 1229 1217 bool handled = false; 1218 + u8 cpl; 1230 1219 1231 1220 input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); 1232 1221 ··· 1255 1242 params[5] = (u64)kvm_r9_read(vcpu); 1256 1243 } 1257 1244 #endif 1245 + cpl = static_call(kvm_x86_get_cpl)(vcpu); 1258 1246 trace_kvm_xen_hypercall(input, params[0], params[1], params[2], 1259 1247 params[3], params[4], params[5]); 1248 + 1249 + /* 1250 + * Only allow hypercall acceleration for CPL0. The rare hypercalls that 1251 + * are permitted in guest userspace can be handled by the VMM. 1252 + */ 1253 + if (unlikely(cpl > 0)) 1254 + goto handle_in_userspace; 1260 1255 1261 1256 switch (input) { 1262 1257 case __HYPERVISOR_xen_version: ··· 1300 1279 if (handled) 1301 1280 return kvm_xen_hypercall_set_result(vcpu, r); 1302 1281 1282 + handle_in_userspace: 1303 1283 vcpu->run->exit_reason = KVM_EXIT_XEN; 1304 1284 vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; 1305 1285 vcpu->run->xen.u.hcall.longmode = longmode; 1306 - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); 1286 + vcpu->run->xen.u.hcall.cpl = cpl; 1307 1287 vcpu->run->xen.u.hcall.input = input; 1308 1288 vcpu->run->xen.u.hcall.params[0] = params[0]; 1309 1289 vcpu->run->xen.u.hcall.params[1] = params[1]; ··· 1317 1295 kvm_xen_hypercall_complete_userspace; 1318 1296 1319 1297 return 0; 1320 - } 1321 - 1322 - static inline int max_evtchn_port(struct kvm *kvm) 1323 - { 1324 - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) 1325 - return EVTCHN_2L_NR_CHANNELS; 1326 - else 1327 - return COMPAT_EVTCHN_2L_NR_CHANNELS; 1328 1298 } 1329 1299 1330 1300 static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+1
include/linux/kvm_host.h
··· 776 776 struct srcu_struct srcu; 777 777 struct srcu_struct irq_srcu; 778 778 pid_t userspace_pid; 779 + bool override_halt_poll_ns; 779 780 unsigned int max_halt_poll_ns; 780 781 u32 dirty_ring_size; 781 782 bool vm_bugged;
+1
tools/testing/selftests/kvm/.gitignore
··· 41 41 /x86_64/svm_vmcall_test 42 42 /x86_64/svm_int_ctl_test 43 43 /x86_64/svm_nested_soft_inject_test 44 + /x86_64/svm_nested_shutdown_test 44 45 /x86_64/sync_regs_test 45 46 /x86_64/tsc_msrs_test 46 47 /x86_64/tsc_scaling_sync
+1
tools/testing/selftests/kvm/Makefile
··· 101 101 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test 102 102 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test 103 103 TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test 104 + TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test 104 105 TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test 105 106 TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync 106 107 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+13
tools/testing/selftests/kvm/include/x86_64/processor.h
··· 748 748 uint64_t rflags; 749 749 }; 750 750 751 + struct idt_entry { 752 + uint16_t offset0; 753 + uint16_t selector; 754 + uint16_t ist : 3; 755 + uint16_t : 5; 756 + uint16_t type : 4; 757 + uint16_t : 1; 758 + uint16_t dpl : 2; 759 + uint16_t p : 1; 760 + uint16_t offset1; 761 + uint32_t offset2; uint32_t reserved; 762 + }; 763 + 751 764 void vm_init_descriptor_tables(struct kvm_vm *vm); 752 765 void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); 753 766 void vm_install_exception_handler(struct kvm_vm *vm, int vector,
-13
tools/testing/selftests/kvm/lib/x86_64/processor.c
··· 1074 1074 } 1075 1075 } 1076 1076 1077 - struct idt_entry { 1078 - uint16_t offset0; 1079 - uint16_t selector; 1080 - uint16_t ist : 3; 1081 - uint16_t : 5; 1082 - uint16_t type : 4; 1083 - uint16_t : 1; 1084 - uint16_t dpl : 2; 1085 - uint16_t p : 1; 1086 - uint16_t offset1; 1087 - uint32_t offset2; uint32_t reserved; 1088 - }; 1089 - 1090 1077 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, 1091 1078 int dpl, unsigned short selector) 1092 1079 {
+67
tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * svm_nested_shutdown_test 4 + * 5 + * Copyright (C) 2022, Red Hat, Inc. 6 + * 7 + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host 8 + */ 9 + 10 + #include "test_util.h" 11 + #include "kvm_util.h" 12 + #include "processor.h" 13 + #include "svm_util.h" 14 + 15 + static void l2_guest_code(struct svm_test_data *svm) 16 + { 17 + __asm__ __volatile__("ud2"); 18 + } 19 + 20 + static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) 21 + { 22 + #define L2_GUEST_STACK_SIZE 64 23 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 24 + struct vmcb *vmcb = svm->vmcb; 25 + 26 + generic_svm_setup(svm, l2_guest_code, 27 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 28 + 29 + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); 30 + 31 + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP 32 + idt[11].p = 0; // #NP is not intercepted and will cause another 33 + // #NP that will be converted to #DF 34 + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN 35 + 36 + run_guest(vmcb, svm->vmcb_gpa); 37 + 38 + /* should not reach here */ 39 + GUEST_ASSERT(0); 40 + } 41 + 42 + int main(int argc, char *argv[]) 43 + { 44 + struct kvm_vcpu *vcpu; 45 + struct kvm_run *run; 46 + vm_vaddr_t svm_gva; 47 + struct kvm_vm *vm; 48 + 49 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); 50 + 51 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 52 + vm_init_descriptor_tables(vm); 53 + vcpu_init_descriptor_tables(vcpu); 54 + 55 + vcpu_alloc_svm(vm, &svm_gva); 56 + 57 + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); 58 + run = vcpu->run; 59 + 60 + vcpu_run(vcpu); 61 + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, 62 + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", 63 + run->exit_reason, 64 + exit_reason_str(run->exit_reason)); 65 + 66 + kvm_vm_free(vm); 67 + }
+56 -17
tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
··· 3 3 #include "kvm_util.h" 4 4 #include "processor.h" 5 5 #include "vmx.h" 6 + #include "svm_util.h" 6 7 7 8 #include <string.h> 8 9 #include <sys/ioctl.h> ··· 21 20 : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); 22 21 } 23 22 24 - void l1_guest_code(struct vmx_pages *vmx) 25 - { 26 23 #define L2_GUEST_STACK_SIZE 64 27 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 24 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 25 + 26 + void l1_guest_code_vmx(struct vmx_pages *vmx) 27 + { 28 28 29 29 GUEST_ASSERT(vmx->vmcs_gpa); 30 30 GUEST_ASSERT(prepare_for_vmx_operation(vmx)); ··· 40 38 GUEST_DONE(); 41 39 } 42 40 41 + void l1_guest_code_svm(struct svm_test_data *svm) 42 + { 43 + struct vmcb *vmcb = svm->vmcb; 44 + 45 + generic_svm_setup(svm, l2_guest_code, 46 + &l2_guest_stack[L2_GUEST_STACK_SIZE]); 47 + 48 + /* don't intercept shutdown to test the case of SVM allowing to do so */ 49 + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); 50 + 51 + run_guest(vmcb, svm->vmcb_gpa); 52 + 53 + /* should not reach here, L1 should crash */ 54 + GUEST_ASSERT(0); 55 + } 56 + 43 57 int main(void) 44 58 { 45 59 struct kvm_vcpu *vcpu; 46 60 struct kvm_run *run; 47 61 struct kvm_vcpu_events events; 48 - vm_vaddr_t vmx_pages_gva; 49 62 struct ucall uc; 50 63 51 - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 64 + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); 65 + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); 66 + 67 + TEST_REQUIRE(has_vmx || has_svm); 52 68 53 69 TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); 54 70 55 - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 56 - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); 57 71 72 + if (has_vmx) { 73 + vm_vaddr_t vmx_pages_gva; 74 + 75 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); 76 + vcpu_alloc_vmx(vm, &vmx_pages_gva); 77 + vcpu_args_set(vcpu, 1, vmx_pages_gva); 78 + } else { 79 + vm_vaddr_t svm_gva; 80 + 81 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); 82 + vcpu_alloc_svm(vm, &svm_gva); 83 + vcpu_args_set(vcpu, 1, svm_gva); 84 + } 85 + 86 + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); 58 87 run = vcpu->run; 59 - vcpu_alloc_vmx(vm, &vmx_pages_gva); 60 - vcpu_args_set(vcpu, 1, vmx_pages_gva); 61 88 vcpu_run(vcpu); 62 89 63 90 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, ··· 109 78 "No triple fault pending"); 110 79 vcpu_run(vcpu); 111 80 112 - switch (get_ucall(vcpu, &uc)) { 113 - case UCALL_DONE: 114 - break; 115 - case UCALL_ABORT: 116 - REPORT_GUEST_ASSERT(uc); 117 - default: 118 - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 119 - } 120 81 82 + if (has_svm) { 83 + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, 84 + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", 85 + run->exit_reason, 86 + exit_reason_str(run->exit_reason)); 87 + } else { 88 + switch (get_ucall(vcpu, &uc)) { 89 + case UCALL_DONE: 90 + break; 91 + case UCALL_ABORT: 92 + REPORT_GUEST_ASSERT(uc); 93 + default: 94 + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 95 + } 96 + } 97 + return 0; 121 98 }
+42 -10
virt/kvm/kvm_main.c
··· 1198 1198 goto out_err_no_arch_destroy_vm; 1199 1199 } 1200 1200 1201 - kvm->max_halt_poll_ns = halt_poll_ns; 1202 - 1203 1201 r = kvm_arch_init_vm(kvm, type); 1204 1202 if (r) 1205 1203 goto out_err_no_arch_destroy_vm; ··· 3375 3377 if (val < grow_start) 3376 3378 val = grow_start; 3377 3379 3378 - if (val > vcpu->kvm->max_halt_poll_ns) 3379 - val = vcpu->kvm->max_halt_poll_ns; 3380 - 3381 3380 vcpu->halt_poll_ns = val; 3382 3381 out: 3383 3382 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); ··· 3478 3483 } 3479 3484 } 3480 3485 3486 + static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) 3487 + { 3488 + struct kvm *kvm = vcpu->kvm; 3489 + 3490 + if (kvm->override_halt_poll_ns) { 3491 + /* 3492 + * Ensure kvm->max_halt_poll_ns is not read before 3493 + * kvm->override_halt_poll_ns. 3494 + * 3495 + * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. 3496 + */ 3497 + smp_rmb(); 3498 + return READ_ONCE(kvm->max_halt_poll_ns); 3499 + } 3500 + 3501 + return READ_ONCE(halt_poll_ns); 3502 + } 3503 + 3481 3504 /* 3482 3505 * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt 3483 3506 * polling is enabled, busy wait for a short time before blocking to avoid the ··· 3504 3491 */ 3505 3492 void kvm_vcpu_halt(struct kvm_vcpu *vcpu) 3506 3493 { 3494 + unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); 3507 3495 bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); 3508 - bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; 3509 3496 ktime_t start, cur, poll_end; 3510 3497 bool waited = false; 3498 + bool do_halt_poll; 3511 3499 u64 halt_ns; 3500 + 3501 + if (vcpu->halt_poll_ns > max_halt_poll_ns) 3502 + vcpu->halt_poll_ns = max_halt_poll_ns; 3503 + 3504 + do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; 3512 3505 3513 3506 start = cur = poll_end = ktime_get(); 3514 3507 if (do_halt_poll) { ··· 3554 3535 update_halt_poll_stats(vcpu, start, poll_end, !waited); 3555 3536 3556 3537 if (halt_poll_allowed) { 3538 + /* Recompute the max halt poll time in case it changed. */ 3539 + max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); 3540 + 3557 3541 if (!vcpu_valid_wakeup(vcpu)) { 3558 3542 shrink_halt_poll_ns(vcpu); 3559 - } else if (vcpu->kvm->max_halt_poll_ns) { 3543 + } else if (max_halt_poll_ns) { 3560 3544 if (halt_ns <= vcpu->halt_poll_ns) 3561 3545 ; 3562 3546 /* we had a long block, shrink polling */ 3563 3547 else if (vcpu->halt_poll_ns && 3564 - halt_ns > vcpu->kvm->max_halt_poll_ns) 3548 + halt_ns > max_halt_poll_ns) 3565 3549 shrink_halt_poll_ns(vcpu); 3566 3550 /* we had a short halt and our poll time is too small */ 3567 - else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && 3568 - halt_ns < vcpu->kvm->max_halt_poll_ns) 3551 + else if (vcpu->halt_poll_ns < max_halt_poll_ns && 3552 + halt_ns < max_halt_poll_ns) 3569 3553 grow_halt_poll_ns(vcpu); 3570 3554 } else { 3571 3555 vcpu->halt_poll_ns = 0; ··· 4603 4581 return -EINVAL; 4604 4582 4605 4583 kvm->max_halt_poll_ns = cap->args[0]; 4584 + 4585 + /* 4586 + * Ensure kvm->override_halt_poll_ns does not become visible 4587 + * before kvm->max_halt_poll_ns. 4588 + * 4589 + * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). 4590 + */ 4591 + smp_wmb(); 4592 + kvm->override_halt_poll_ns = true; 4593 + 4606 4594 return 0; 4607 4595 } 4608 4596 case KVM_CAP_DIRTY_LOG_RING:
+6 -1
virt/kvm/pfncache.c
··· 297 297 if (!gpc->valid || old_uhva != gpc->uhva) { 298 298 ret = hva_to_pfn_retry(kvm, gpc); 299 299 } else { 300 - /* If the HVA→PFN mapping was already valid, don't unmap it. */ 300 + /* 301 + * If the HVA→PFN mapping was already valid, don't unmap it. 302 + * But do update gpc->khva because the offset within the page 303 + * may have changed. 304 + */ 305 + gpc->khva = old_khva + page_offset; 301 306 old_pfn = KVM_PFN_ERR_FAULT; 302 307 old_khva = NULL; 303 308 ret = 0;