Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"Fixes for kvm on x86:

- new selftests

- fixes for migration with HyperV re-enlightenment enabled

- fix RCU/SRCU usage

- fixes for local_irq_restore misuse false positive"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
documentation/kvm: additional explanations on KVM_SET_BOOT_CPU_ID
x86/kvm: Fix broken irq restoration in kvm_wait
KVM: X86: Fix missing local pCPU when executing wbinvd on all dirty pCPUs
KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish
selftests: kvm: add set_boot_cpu_id test
selftests: kvm: add _vm_ioctl
selftests: kvm: add get_msr_index_features
selftests: kvm: Add basic Hyper-V clocksources tests
KVM: x86: hyper-v: Don't touch TSC page values when guest opted for re-enlightenment
KVM: x86: hyper-v: Track Hyper-V TSC page status
KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs
KVM: x86: hyper-v: Limit guest to writing zero to HV_X64_MSR_TSC_EMULATION_STATUS
KVM: x86/mmu: Store the address space ID in the TDP iterator
KVM: x86/mmu: Factor out tdp_iter_return_to_root
KVM: x86/mmu: Fix RCU usage when atomically zapping SPTEs
KVM: x86/mmu: Fix RCU usage in handle_removed_tdp_mmu_page

+808 -121
+6 -3
Documentation/virt/kvm/api.rst
··· 1495 1495 1496 1496 Define which vcpu is the Bootstrap Processor (BSP). Values are the same 1497 1497 as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default 1498 - is vcpu 0. 1498 + is vcpu 0. This ioctl has to be called before vcpu creation, 1499 + otherwise it will return EBUSY error. 1499 1500 1500 1501 1501 1502 4.42 KVM_GET_XSAVE ··· 4807 4806 allows user space to deflect and potentially handle various MSR accesses 4808 4807 into user space. 4809 4808 4810 - If a vCPU is in running state while this ioctl is invoked, the vCPU may 4811 - experience inconsistent filtering behavior on MSR accesses. 4809 + Note, invoking this ioctl with a vCPU is running is inherently racy. However, 4810 + KVM does guarantee that vCPUs will see either the previous filter or the new 4811 + filter, e.g. MSRs with identical settings in both the old and new filter will 4812 + have deterministic behavior. 4812 4813 4813 4814 4.127 KVM_XEN_HVM_SET_ATTR 4814 4815 --------------------------
+26 -8
arch/x86/include/asm/kvm_host.h
··· 884 884 u64 options; 885 885 }; 886 886 887 + /* Current state of Hyper-V TSC page clocksource */ 888 + enum hv_tsc_page_status { 889 + /* TSC page was not set up or disabled */ 890 + HV_TSC_PAGE_UNSET = 0, 891 + /* TSC page MSR was written by the guest, update pending */ 892 + HV_TSC_PAGE_GUEST_CHANGED, 893 + /* TSC page MSR was written by KVM userspace, update pending */ 894 + HV_TSC_PAGE_HOST_CHANGED, 895 + /* TSC page was properly set up and is currently active */ 896 + HV_TSC_PAGE_SET, 897 + /* TSC page is currently being updated and therefore is inactive */ 898 + HV_TSC_PAGE_UPDATING, 899 + /* TSC page was set up with an inaccessible GPA */ 900 + HV_TSC_PAGE_BROKEN, 901 + }; 902 + 887 903 /* Hyper-V emulation context */ 888 904 struct kvm_hv { 889 905 struct mutex hv_lock; 890 906 u64 hv_guest_os_id; 891 907 u64 hv_hypercall; 892 908 u64 hv_tsc_page; 909 + enum hv_tsc_page_status hv_tsc_page_status; 893 910 894 911 /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ 895 912 u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; ··· 946 929 KVM_IRQCHIP_NONE, 947 930 KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ 948 931 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ 932 + }; 933 + 934 + struct kvm_x86_msr_filter { 935 + u8 count; 936 + bool default_allow:1; 937 + struct msr_bitmap_range ranges[16]; 949 938 }; 950 939 951 940 #define APICV_INHIBIT_REASON_DISABLE 0 ··· 1048 1025 bool guest_can_read_msr_platform_info; 1049 1026 bool exception_payload_enabled; 1050 1027 1028 + bool bus_lock_detection_enabled; 1029 + 1051 1030 /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ 1052 1031 u32 user_space_msr_mask; 1053 - 1054 - struct { 1055 - u8 count; 1056 - bool default_allow:1; 1057 - struct msr_bitmap_range ranges[16]; 1058 - } msr_filter; 1059 - 1060 - bool bus_lock_detection_enabled; 1032 + struct kvm_x86_msr_filter __rcu *msr_filter; 1061 1033 1062 1034 struct kvm_pmu_event_filter __rcu *pmu_event_filter; 1063 1035 struct task_struct *nx_lpage_recovery_thread;
+10 -13
arch/x86/kernel/kvm.c
··· 836 836 837 837 static void kvm_wait(u8 *ptr, u8 val) 838 838 { 839 - unsigned long flags; 840 - 841 839 if (in_nmi()) 842 840 return; 843 - 844 - local_irq_save(flags); 845 - 846 - if (READ_ONCE(*ptr) != val) 847 - goto out; 848 841 849 842 /* 850 843 * halt until it's our turn and kicked. Note that we do safe halt 851 844 * for irq enabled case to avoid hang when lock info is overwritten 852 845 * in irq spinlock slowpath and no spurious interrupt occur to save us. 853 846 */ 854 - if (arch_irqs_disabled_flags(flags)) 855 - halt(); 856 - else 857 - safe_halt(); 847 + if (irqs_disabled()) { 848 + if (READ_ONCE(*ptr) == val) 849 + halt(); 850 + } else { 851 + local_irq_disable(); 858 852 859 - out: 860 - local_irq_restore(flags); 853 + if (READ_ONCE(*ptr) == val) 854 + safe_halt(); 855 + 856 + local_irq_enable(); 857 + } 861 858 } 862 859 863 860 #ifdef CONFIG_X86_32
+81 -10
arch/x86/kvm/hyperv.c
··· 520 520 u64 tsc; 521 521 522 522 /* 523 - * The guest has not set up the TSC page or the clock isn't 524 - * stable, fall back to get_kvmclock_ns. 523 + * Fall back to get_kvmclock_ns() when TSC page hasn't been set up, 524 + * is broken, disabled or being updated. 525 525 */ 526 - if (!hv->tsc_ref.tsc_sequence) 526 + if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET) 527 527 return div_u64(get_kvmclock_ns(kvm), 100); 528 528 529 529 vcpu = kvm_get_vcpu(kvm, 0); ··· 1077 1077 return true; 1078 1078 } 1079 1079 1080 + /* 1081 + * Don't touch TSC page values if the guest has opted for TSC emulation after 1082 + * migration. KVM doesn't fully support reenlightenment notifications and TSC 1083 + * access emulation and Hyper-V is known to expect the values in TSC page to 1084 + * stay constant before TSC access emulation is disabled from guest side 1085 + * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC 1086 + * frequency and guest visible TSC value across migration (and prevent it when 1087 + * TSC scaling is unsupported). 1088 + */ 1089 + static inline bool tsc_page_update_unsafe(struct kvm_hv *hv) 1090 + { 1091 + return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) && 1092 + hv->hv_tsc_emulation_control; 1093 + } 1094 + 1080 1095 void kvm_hv_setup_tsc_page(struct kvm *kvm, 1081 1096 struct pvclock_vcpu_time_info *hv_clock) 1082 1097 { ··· 1102 1087 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1103 1088 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1104 1089 1105 - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1090 + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1091 + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) 1106 1092 return; 1107 1093 1108 1094 mutex_lock(&hv->hv_lock); ··· 1117 1101 */ 1118 1102 if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), 1119 1103 &tsc_seq, sizeof(tsc_seq)))) 1104 + goto out_err; 1105 + 1106 + if (tsc_seq && tsc_page_update_unsafe(hv)) { 1107 + if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1108 + goto out_err; 1109 + 1110 + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1120 1111 goto out_unlock; 1112 + } 1121 1113 1122 1114 /* 1123 1115 * While we're computing and writing the parameters, force the ··· 1134 1110 hv->tsc_ref.tsc_sequence = 0; 1135 1111 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1136 1112 &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1137 - goto out_unlock; 1113 + goto out_err; 1138 1114 1139 1115 if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) 1140 - goto out_unlock; 1116 + goto out_err; 1141 1117 1142 1118 /* Ensure sequence is zero before writing the rest of the struct. */ 1143 1119 smp_wmb(); 1144 1120 if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) 1145 - goto out_unlock; 1121 + goto out_err; 1146 1122 1147 1123 /* 1148 1124 * Now switch to the TSC page mechanism by writing the sequence. ··· 1155 1131 smp_wmb(); 1156 1132 1157 1133 hv->tsc_ref.tsc_sequence = tsc_seq; 1158 - kvm_write_guest(kvm, gfn_to_gpa(gfn), 1159 - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); 1134 + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1135 + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1136 + goto out_err; 1137 + 1138 + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1139 + goto out_unlock; 1140 + 1141 + out_err: 1142 + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1143 + out_unlock: 1144 + mutex_unlock(&hv->hv_lock); 1145 + } 1146 + 1147 + void kvm_hv_invalidate_tsc_page(struct kvm *kvm) 1148 + { 1149 + struct kvm_hv *hv = to_kvm_hv(kvm); 1150 + u64 gfn; 1151 + 1152 + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1153 + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || 1154 + tsc_page_update_unsafe(hv)) 1155 + return; 1156 + 1157 + mutex_lock(&hv->hv_lock); 1158 + 1159 + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1160 + goto out_unlock; 1161 + 1162 + /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ 1163 + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) 1164 + hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; 1165 + 1166 + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1167 + 1168 + hv->tsc_ref.tsc_sequence = 0; 1169 + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), 1170 + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) 1171 + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1172 + 1160 1173 out_unlock: 1161 1174 mutex_unlock(&hv->hv_lock); 1162 1175 } ··· 1254 1193 } 1255 1194 case HV_X64_MSR_REFERENCE_TSC: 1256 1195 hv->hv_tsc_page = data; 1257 - if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) 1196 + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { 1197 + if (!host) 1198 + hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED; 1199 + else 1200 + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; 1258 1201 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); 1202 + } else { 1203 + hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET; 1204 + } 1259 1205 break; 1260 1206 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 1261 1207 return kvm_hv_msr_set_crash_data(kvm, ··· 1297 1229 hv->hv_tsc_emulation_control = data; 1298 1230 break; 1299 1231 case HV_X64_MSR_TSC_EMULATION_STATUS: 1232 + if (data && !host) 1233 + return 1; 1234 + 1300 1235 hv->hv_tsc_emulation_status = data; 1301 1236 break; 1302 1237 case HV_X64_MSR_TIME_REF_COUNT:
+1
arch/x86/kvm/hyperv.h
··· 133 133 134 134 void kvm_hv_setup_tsc_page(struct kvm *kvm, 135 135 struct pvclock_vcpu_time_info *hv_clock); 136 + void kvm_hv_invalidate_tsc_page(struct kvm *kvm); 136 137 137 138 void kvm_hv_init_vm(struct kvm *kvm); 138 139 void kvm_hv_destroy_vm(struct kvm *kvm);
+5
arch/x86/kvm/mmu/mmu_internal.h
··· 78 78 return to_shadow_page(__pa(sptep)); 79 79 } 80 80 81 + static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 82 + { 83 + return sp->role.smm ? 1 : 0; 84 + } 85 + 81 86 static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) 82 87 { 83 88 /*
+18 -12
arch/x86/kvm/mmu/tdp_iter.c
··· 21 21 } 22 22 23 23 /* 24 + * Return the TDP iterator to the root PT and allow it to continue its 25 + * traversal over the paging structure from there. 26 + */ 27 + void tdp_iter_restart(struct tdp_iter *iter) 28 + { 29 + iter->yielded_gfn = iter->next_last_level_gfn; 30 + iter->level = iter->root_level; 31 + 32 + iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); 33 + tdp_iter_refresh_sptep(iter); 34 + 35 + iter->valid = true; 36 + } 37 + 38 + /* 24 39 * Sets a TDP iterator to walk a pre-order traversal of the paging structure 25 40 * rooted at root_pt, starting with the walk to translate next_last_level_gfn. 26 41 */ ··· 46 31 WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); 47 32 48 33 iter->next_last_level_gfn = next_last_level_gfn; 49 - iter->yielded_gfn = iter->next_last_level_gfn; 50 34 iter->root_level = root_level; 51 35 iter->min_level = min_level; 52 - iter->level = root_level; 53 - iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt; 36 + iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt; 37 + iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt)); 54 38 55 - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); 56 - tdp_iter_refresh_sptep(iter); 57 - 58 - iter->valid = true; 39 + tdp_iter_restart(iter); 59 40 } 60 41 61 42 /* ··· 168 157 return; 169 158 } while (try_step_up(iter)); 170 159 iter->valid = false; 171 - } 172 - 173 - tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter) 174 - { 175 - return iter->pt_path[iter->root_level - 1]; 176 160 } 177 161
+3 -1
arch/x86/kvm/mmu/tdp_iter.h
··· 36 36 int min_level; 37 37 /* The iterator's current level within the paging structure */ 38 38 int level; 39 + /* The address space ID, i.e. SMM vs. regular. */ 40 + int as_id; 39 41 /* A snapshot of the value at sptep */ 40 42 u64 old_spte; 41 43 /* ··· 64 62 void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, 65 63 int min_level, gfn_t next_last_level_gfn); 66 64 void tdp_iter_next(struct tdp_iter *iter); 67 - tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); 65 + void tdp_iter_restart(struct tdp_iter *iter); 68 66 69 67 #endif /* __KVM_X86_MMU_TDP_ITER_H */
+15 -25
arch/x86/kvm/mmu/tdp_mmu.c
··· 203 203 u64 old_spte, u64 new_spte, int level, 204 204 bool shared); 205 205 206 - static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) 207 - { 208 - return sp->role.smm ? 1 : 0; 209 - } 210 - 211 206 static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) 212 207 { 213 208 bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); ··· 296 301 * 297 302 * Given a page table that has been removed from the TDP paging structure, 298 303 * iterates through the page table to clear SPTEs and free child page tables. 304 + * 305 + * Note that pt is passed in as a tdp_ptep_t, but it does not need RCU 306 + * protection. Since this thread removed it from the paging structure, 307 + * this thread will be responsible for ensuring the page is freed. Hence the 308 + * early rcu_dereferences in the function. 299 309 */ 300 - static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, 310 + static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, 301 311 bool shared) 302 312 { 303 - struct kvm_mmu_page *sp = sptep_to_sp(pt); 313 + struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt)); 304 314 int level = sp->role.level; 305 315 gfn_t base_gfn = sp->gfn; 306 316 u64 old_child_spte; ··· 318 318 tdp_mmu_unlink_page(kvm, sp, shared); 319 319 320 320 for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 321 - sptep = pt + i; 321 + sptep = rcu_dereference(pt) + i; 322 322 gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)); 323 323 324 324 if (shared) { ··· 492 492 struct tdp_iter *iter, 493 493 u64 new_spte) 494 494 { 495 - u64 *root_pt = tdp_iter_root_pt(iter); 496 - struct kvm_mmu_page *root = sptep_to_sp(root_pt); 497 - int as_id = kvm_mmu_page_as_id(root); 498 - 499 495 lockdep_assert_held_read(&kvm->mmu_lock); 500 496 501 497 /* ··· 505 509 new_spte) != iter->old_spte) 506 510 return false; 507 511 508 - handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 509 - iter->level, true); 512 + handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, 513 + new_spte, iter->level, true); 510 514 511 515 return true; 512 516 } ··· 534 538 * here since the SPTE is going from non-present 535 539 * to non-present. 536 540 */ 537 - WRITE_ONCE(*iter->sptep, 0); 541 + WRITE_ONCE(*rcu_dereference(iter->sptep), 0); 538 542 539 543 return true; 540 544 } ··· 560 564 u64 new_spte, bool record_acc_track, 561 565 bool record_dirty_log) 562 566 { 563 - tdp_ptep_t root_pt = tdp_iter_root_pt(iter); 564 - struct kvm_mmu_page *root = sptep_to_sp(root_pt); 565 - int as_id = kvm_mmu_page_as_id(root); 566 - 567 567 lockdep_assert_held_write(&kvm->mmu_lock); 568 568 569 569 /* ··· 573 581 574 582 WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); 575 583 576 - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, 577 - iter->level, false); 584 + __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, 585 + new_spte, iter->level, false); 578 586 if (record_acc_track) 579 587 handle_changed_spte_acc_track(iter->old_spte, new_spte, 580 588 iter->level); 581 589 if (record_dirty_log) 582 - handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, 590 + handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, 583 591 iter->old_spte, new_spte, 584 592 iter->level); 585 593 } ··· 651 659 652 660 WARN_ON(iter->gfn > iter->next_last_level_gfn); 653 661 654 - tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], 655 - iter->root_level, iter->min_level, 656 - iter->next_last_level_gfn); 662 + tdp_iter_restart(iter); 657 663 658 664 return true; 659 665 }
+69 -46
arch/x86/kvm/x86.c
··· 1526 1526 1527 1527 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) 1528 1528 { 1529 + struct kvm_x86_msr_filter *msr_filter; 1530 + struct msr_bitmap_range *ranges; 1529 1531 struct kvm *kvm = vcpu->kvm; 1530 - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; 1531 - u32 count = kvm->arch.msr_filter.count; 1532 - u32 i; 1533 - bool r = kvm->arch.msr_filter.default_allow; 1532 + bool allowed; 1534 1533 int idx; 1534 + u32 i; 1535 1535 1536 - /* MSR filtering not set up or x2APIC enabled, allow everything */ 1537 - if (!count || (index >= 0x800 && index <= 0x8ff)) 1536 + /* x2APIC MSRs do not support filtering. */ 1537 + if (index >= 0x800 && index <= 0x8ff) 1538 1538 return true; 1539 1539 1540 - /* Prevent collision with set_msr_filter */ 1541 1540 idx = srcu_read_lock(&kvm->srcu); 1542 1541 1543 - for (i = 0; i < count; i++) { 1542 + msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu); 1543 + if (!msr_filter) { 1544 + allowed = true; 1545 + goto out; 1546 + } 1547 + 1548 + allowed = msr_filter->default_allow; 1549 + ranges = msr_filter->ranges; 1550 + 1551 + for (i = 0; i < msr_filter->count; i++) { 1544 1552 u32 start = ranges[i].base; 1545 1553 u32 end = start + ranges[i].nmsrs; 1546 1554 u32 flags = ranges[i].flags; 1547 1555 unsigned long *bitmap = ranges[i].bitmap; 1548 1556 1549 1557 if ((index >= start) && (index < end) && (flags & type)) { 1550 - r = !!test_bit(index - start, bitmap); 1558 + allowed = !!test_bit(index - start, bitmap); 1551 1559 break; 1552 1560 } 1553 1561 } 1554 1562 1563 + out: 1555 1564 srcu_read_unlock(&kvm->srcu, idx); 1556 1565 1557 - return r; 1566 + return allowed; 1558 1567 } 1559 1568 EXPORT_SYMBOL_GPL(kvm_msr_allowed); 1560 1569 ··· 2559 2550 int i; 2560 2551 struct kvm_vcpu *vcpu; 2561 2552 struct kvm_arch *ka = &kvm->arch; 2553 + 2554 + kvm_hv_invalidate_tsc_page(kvm); 2562 2555 2563 2556 spin_lock(&ka->pvclock_gtod_sync_lock); 2564 2557 kvm_make_mclock_inprogress_request(kvm); ··· 5363 5352 return r; 5364 5353 } 5365 5354 5366 - static void kvm_clear_msr_filter(struct kvm *kvm) 5355 + static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow) 5367 5356 { 5368 - u32 i; 5369 - u32 count = kvm->arch.msr_filter.count; 5370 - struct msr_bitmap_range ranges[16]; 5357 + struct kvm_x86_msr_filter *msr_filter; 5371 5358 5372 - mutex_lock(&kvm->lock); 5373 - kvm->arch.msr_filter.count = 0; 5374 - memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0])); 5375 - mutex_unlock(&kvm->lock); 5376 - synchronize_srcu(&kvm->srcu); 5359 + msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT); 5360 + if (!msr_filter) 5361 + return NULL; 5377 5362 5378 - for (i = 0; i < count; i++) 5379 - kfree(ranges[i].bitmap); 5363 + msr_filter->default_allow = default_allow; 5364 + return msr_filter; 5380 5365 } 5381 5366 5382 - static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range) 5367 + static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) 5383 5368 { 5384 - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; 5369 + u32 i; 5370 + 5371 + if (!msr_filter) 5372 + return; 5373 + 5374 + for (i = 0; i < msr_filter->count; i++) 5375 + kfree(msr_filter->ranges[i].bitmap); 5376 + 5377 + kfree(msr_filter); 5378 + } 5379 + 5380 + static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, 5381 + struct kvm_msr_filter_range *user_range) 5382 + { 5385 5383 struct msr_bitmap_range range; 5386 5384 unsigned long *bitmap = NULL; 5387 5385 size_t bitmap_size; ··· 5424 5404 goto err; 5425 5405 } 5426 5406 5427 - /* Everything ok, add this range identifier to our global pool */ 5428 - ranges[kvm->arch.msr_filter.count] = range; 5429 - /* Make sure we filled the array before we tell anyone to walk it */ 5430 - smp_wmb(); 5431 - kvm->arch.msr_filter.count++; 5407 + /* Everything ok, add this range identifier. */ 5408 + msr_filter->ranges[msr_filter->count] = range; 5409 + msr_filter->count++; 5432 5410 5433 5411 return 0; 5434 5412 err: ··· 5437 5419 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) 5438 5420 { 5439 5421 struct kvm_msr_filter __user *user_msr_filter = argp; 5422 + struct kvm_x86_msr_filter *new_filter, *old_filter; 5440 5423 struct kvm_msr_filter filter; 5441 5424 bool default_allow; 5442 - int r = 0; 5443 5425 bool empty = true; 5426 + int r = 0; 5444 5427 u32 i; 5445 5428 5446 5429 if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) ··· 5454 5435 if (empty && !default_allow) 5455 5436 return -EINVAL; 5456 5437 5457 - kvm_clear_msr_filter(kvm); 5438 + new_filter = kvm_alloc_msr_filter(default_allow); 5439 + if (!new_filter) 5440 + return -ENOMEM; 5458 5441 5459 - kvm->arch.msr_filter.default_allow = default_allow; 5460 - 5461 - /* 5462 - * Protect from concurrent calls to this function that could trigger 5463 - * a TOCTOU violation on kvm->arch.msr_filter.count. 5464 - */ 5465 - mutex_lock(&kvm->lock); 5466 5442 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { 5467 - r = kvm_add_msr_filter(kvm, &filter.ranges[i]); 5468 - if (r) 5469 - break; 5443 + r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); 5444 + if (r) { 5445 + kvm_free_msr_filter(new_filter); 5446 + return r; 5447 + } 5470 5448 } 5449 + 5450 + mutex_lock(&kvm->lock); 5451 + 5452 + /* The per-VM filter is protected by kvm->lock... */ 5453 + old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1); 5454 + 5455 + rcu_assign_pointer(kvm->arch.msr_filter, new_filter); 5456 + synchronize_srcu(&kvm->srcu); 5457 + 5458 + kvm_free_msr_filter(old_filter); 5471 5459 5472 5460 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); 5473 5461 mutex_unlock(&kvm->lock); 5474 5462 5475 - return r; 5463 + return 0; 5476 5464 } 5477 5465 5478 5466 long kvm_arch_vm_ioctl(struct file *filp, ··· 6629 6603 int cpu = get_cpu(); 6630 6604 6631 6605 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); 6632 - smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, 6606 + on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask, 6633 6607 wbinvd_ipi, NULL, 1); 6634 6608 put_cpu(); 6635 6609 cpumask_clear(vcpu->arch.wbinvd_dirty_mask); ··· 10660 10634 10661 10635 void kvm_arch_destroy_vm(struct kvm *kvm) 10662 10636 { 10663 - u32 i; 10664 - 10665 10637 if (current->mm == kvm->mm) { 10666 10638 /* 10667 10639 * Free memory regions allocated on behalf of userspace, ··· 10675 10651 mutex_unlock(&kvm->slots_lock); 10676 10652 } 10677 10653 static_call_cond(kvm_x86_vm_destroy)(kvm); 10678 - for (i = 0; i < kvm->arch.msr_filter.count; i++) 10679 - kfree(kvm->arch.msr_filter.ranges[i].bitmap); 10654 + kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); 10680 10655 kvm_pic_destroy(kvm); 10681 10656 kvm_ioapic_destroy(kvm); 10682 10657 kvm_free_vcpus(kvm);
+3
tools/testing/selftests/kvm/.gitignore
··· 8 8 /x86_64/debug_regs 9 9 /x86_64/evmcs_test 10 10 /x86_64/get_cpuid_test 11 + /x86_64/get_msr_index_features 11 12 /x86_64/kvm_pv_test 13 + /x86_64/hyperv_clock 12 14 /x86_64/hyperv_cpuid 13 15 /x86_64/mmio_warning_test 14 16 /x86_64/platform_info_test 17 + /x86_64/set_boot_cpu_id 15 18 /x86_64/set_sregs_test 16 19 /x86_64/smm_test 17 20 /x86_64/state_test
+3
tools/testing/selftests/kvm/Makefile
··· 39 39 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c 40 40 41 41 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test 42 + TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features 42 43 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test 43 44 TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test 45 + TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock 44 46 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid 45 47 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test 46 48 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test 47 49 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test 50 + TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id 48 51 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test 49 52 TEST_GEN_PROGS_x86_64 += x86_64/smm_test 50 53 TEST_GEN_PROGS_x86_64 += x86_64/state_test
+2
tools/testing/selftests/kvm/include/kvm_util.h
··· 16 16 17 17 #include "sparsebit.h" 18 18 19 + #define KVM_DEV_PATH "/dev/kvm" 19 20 #define KVM_MAX_VCPUS 512 20 21 21 22 /* ··· 134 133 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, 135 134 void *arg); 136 135 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); 136 + int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); 137 137 void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); 138 138 int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); 139 139 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+6 -1
tools/testing/selftests/kvm/lib/kvm_util.c
··· 1697 1697 { 1698 1698 int ret; 1699 1699 1700 - ret = ioctl(vm->fd, cmd, arg); 1700 + ret = _vm_ioctl(vm, cmd, arg); 1701 1701 TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", 1702 1702 cmd, ret, errno, strerror(errno)); 1703 + } 1704 + 1705 + int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1706 + { 1707 + return ioctl(vm->fd, cmd, arg); 1703 1708 } 1704 1709 1705 1710 /*
-2
tools/testing/selftests/kvm/lib/kvm_util_internal.h
··· 10 10 11 11 #include "sparsebit.h" 12 12 13 - #define KVM_DEV_PATH "/dev/kvm" 14 - 15 13 struct userspace_mem_region { 16 14 struct kvm_userspace_memory_region region; 17 15 struct sparsebit *unused_phy_pages;
+134
tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test that KVM_GET_MSR_INDEX_LIST and 4 + * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended 5 + * 6 + * Copyright (C) 2020, Red Hat, Inc. 7 + */ 8 + #include <fcntl.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <sys/ioctl.h> 13 + 14 + #include "test_util.h" 15 + #include "kvm_util.h" 16 + #include "processor.h" 17 + 18 + static int kvm_num_index_msrs(int kvm_fd, int nmsrs) 19 + { 20 + struct kvm_msr_list *list; 21 + int r; 22 + 23 + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 24 + list->nmsrs = nmsrs; 25 + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 26 + TEST_ASSERT(r == -1 && errno == E2BIG, 27 + "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", 28 + r); 29 + 30 + r = list->nmsrs; 31 + free(list); 32 + return r; 33 + } 34 + 35 + static void test_get_msr_index(void) 36 + { 37 + int old_res, res, kvm_fd, r; 38 + struct kvm_msr_list *list; 39 + 40 + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 41 + if (kvm_fd < 0) 42 + exit(KSFT_SKIP); 43 + 44 + old_res = kvm_num_index_msrs(kvm_fd, 0); 45 + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); 46 + 47 + if (old_res != 1) { 48 + res = kvm_num_index_msrs(kvm_fd, 1); 49 + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); 50 + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); 51 + } 52 + 53 + list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0])); 54 + list->nmsrs = old_res; 55 + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); 56 + 57 + TEST_ASSERT(r == 0, 58 + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", 59 + r); 60 + TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical"); 61 + free(list); 62 + 63 + close(kvm_fd); 64 + } 65 + 66 + static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) 67 + { 68 + struct kvm_msr_list *list; 69 + int r; 70 + 71 + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 72 + list->nmsrs = nmsrs; 73 + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); 74 + TEST_ASSERT(r == -1 && errno == E2BIG, 75 + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", 76 + r); 77 + 78 + r = list->nmsrs; 79 + free(list); 80 + return r; 81 + } 82 + 83 + struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) 84 + { 85 + struct kvm_msr_list *list; 86 + int r; 87 + 88 + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); 89 + list->nmsrs = nmsrs; 90 + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); 91 + 92 + TEST_ASSERT(r == 0, 93 + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", 94 + r); 95 + 96 + return list; 97 + } 98 + 99 + static void test_get_msr_feature(void) 100 + { 101 + int res, old_res, i, kvm_fd; 102 + struct kvm_msr_list *feature_list; 103 + 104 + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 105 + if (kvm_fd < 0) 106 + exit(KSFT_SKIP); 107 + 108 + old_res = kvm_num_feature_msrs(kvm_fd, 0); 109 + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); 110 + 111 + if (old_res != 1) { 112 + res = kvm_num_feature_msrs(kvm_fd, 1); 113 + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); 114 + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); 115 + } 116 + 117 + feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); 118 + TEST_ASSERT(old_res == feature_list->nmsrs, 119 + "Unmatching number of msr indexes"); 120 + 121 + for (i = 0; i < feature_list->nmsrs; i++) 122 + kvm_get_feature_msr(feature_list->indices[i]); 123 + 124 + free(feature_list); 125 + close(kvm_fd); 126 + } 127 + 128 + int main(int argc, char *argv[]) 129 + { 130 + if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) 131 + test_get_msr_feature(); 132 + 133 + test_get_msr_index(); 134 + }
+260
tools/testing/selftests/kvm/x86_64/hyperv_clock.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2021, Red Hat, Inc. 4 + * 5 + * Tests for Hyper-V clocksources 6 + */ 7 + #include "test_util.h" 8 + #include "kvm_util.h" 9 + #include "processor.h" 10 + 11 + struct ms_hyperv_tsc_page { 12 + volatile u32 tsc_sequence; 13 + u32 reserved1; 14 + volatile u64 tsc_scale; 15 + volatile s64 tsc_offset; 16 + } __packed; 17 + 18 + #define HV_X64_MSR_GUEST_OS_ID 0x40000000 19 + #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 20 + #define HV_X64_MSR_REFERENCE_TSC 0x40000021 21 + #define HV_X64_MSR_TSC_FREQUENCY 0x40000022 22 + #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 23 + #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 24 + 25 + /* Simplified mul_u64_u64_shr() */ 26 + static inline u64 mul_u64_u64_shr64(u64 a, u64 b) 27 + { 28 + union { 29 + u64 ll; 30 + struct { 31 + u32 low, high; 32 + } l; 33 + } rm, rn, rh, a0, b0; 34 + u64 c; 35 + 36 + a0.ll = a; 37 + b0.ll = b; 38 + 39 + rm.ll = (u64)a0.l.low * b0.l.high; 40 + rn.ll = (u64)a0.l.high * b0.l.low; 41 + rh.ll = (u64)a0.l.high * b0.l.high; 42 + 43 + rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; 44 + rh.l.high = (c >> 32) + rh.l.high; 45 + 46 + return rh.ll; 47 + } 48 + 49 + static inline void nop_loop(void) 50 + { 51 + int i; 52 + 53 + for (i = 0; i < 1000000; i++) 54 + asm volatile("nop"); 55 + } 56 + 57 + static inline void check_tsc_msr_rdtsc(void) 58 + { 59 + u64 tsc_freq, r1, r2, t1, t2; 60 + s64 delta_ns; 61 + 62 + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); 63 + GUEST_ASSERT(tsc_freq > 0); 64 + 65 + /* First, check MSR-based clocksource */ 66 + r1 = rdtsc(); 67 + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); 68 + nop_loop(); 69 + r2 = rdtsc(); 70 + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); 71 + 72 + GUEST_ASSERT(r2 > r1 && t2 > t1); 73 + 74 + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ 75 + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); 76 + if (delta_ns < 0) 77 + delta_ns = -delta_ns; 78 + 79 + /* 1% tolerance */ 80 + GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); 81 + } 82 + 83 + static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) 84 + { 85 + u64 r1, r2, t1, t2; 86 + 87 + /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ 88 + t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; 89 + r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); 90 + 91 + /* 10 ms tolerance */ 92 + GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); 93 + nop_loop(); 94 + 95 + t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; 96 + r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); 97 + GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); 98 + } 99 + 100 + static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) 101 + { 102 + u64 tsc_scale, tsc_offset; 103 + 104 + /* Set Guest OS id to enable Hyper-V emulation */ 105 + GUEST_SYNC(1); 106 + wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); 107 + GUEST_SYNC(2); 108 + 109 + check_tsc_msr_rdtsc(); 110 + 111 + GUEST_SYNC(3); 112 + 113 + /* Set up TSC page is disabled state, check that it's clean */ 114 + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); 115 + GUEST_ASSERT(tsc_page->tsc_sequence == 0); 116 + GUEST_ASSERT(tsc_page->tsc_scale == 0); 117 + GUEST_ASSERT(tsc_page->tsc_offset == 0); 118 + 119 + GUEST_SYNC(4); 120 + 121 + /* Set up TSC page is enabled state */ 122 + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); 123 + GUEST_ASSERT(tsc_page->tsc_sequence != 0); 124 + 125 + GUEST_SYNC(5); 126 + 127 + check_tsc_msr_tsc_page(tsc_page); 128 + 129 + GUEST_SYNC(6); 130 + 131 + tsc_offset = tsc_page->tsc_offset; 132 + /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ 133 + GUEST_SYNC(7); 134 + GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); 135 + 136 + nop_loop(); 137 + 138 + /* 139 + * Enable Re-enlightenment and check that TSC page stays constant across 140 + * KVM_SET_CLOCK. 141 + */ 142 + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); 143 + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); 144 + tsc_offset = tsc_page->tsc_offset; 145 + tsc_scale = tsc_page->tsc_scale; 146 + GUEST_SYNC(8); 147 + GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); 148 + GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); 149 + 150 + GUEST_SYNC(9); 151 + 152 + check_tsc_msr_tsc_page(tsc_page); 153 + 154 + /* 155 + * Disable re-enlightenment and TSC page, check that KVM doesn't update 156 + * it anymore. 157 + */ 158 + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); 159 + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); 160 + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); 161 + memset(tsc_page, 0, sizeof(*tsc_page)); 162 + 163 + GUEST_SYNC(10); 164 + GUEST_ASSERT(tsc_page->tsc_sequence == 0); 165 + GUEST_ASSERT(tsc_page->tsc_offset == 0); 166 + GUEST_ASSERT(tsc_page->tsc_scale == 0); 167 + 168 + GUEST_DONE(); 169 + } 170 + 171 + #define VCPU_ID 0 172 + 173 + static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) 174 + { 175 + u64 tsc_freq, r1, r2, t1, t2; 176 + s64 delta_ns; 177 + 178 + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); 179 + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); 180 + 181 + /* First, check MSR-based clocksource */ 182 + r1 = rdtsc(); 183 + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); 184 + nop_loop(); 185 + r2 = rdtsc(); 186 + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); 187 + 188 + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); 189 + 190 + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ 191 + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); 192 + if (delta_ns < 0) 193 + delta_ns = -delta_ns; 194 + 195 + /* 1% tolerance */ 196 + TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, 197 + "Elapsed time does not match (MSR=%ld, TSC=%ld)", 198 + (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); 199 + } 200 + 201 + int main(void) 202 + { 203 + struct kvm_vm *vm; 204 + struct kvm_run *run; 205 + struct ucall uc; 206 + vm_vaddr_t tsc_page_gva; 207 + int stage; 208 + 209 + vm = vm_create_default(VCPU_ID, 0, guest_main); 210 + run = vcpu_state(vm, VCPU_ID); 211 + 212 + vcpu_set_hv_cpuid(vm, VCPU_ID); 213 + 214 + tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); 215 + memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); 216 + TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, 217 + "TSC page has to be page aligned\n"); 218 + vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); 219 + 220 + host_check_tsc_msr_rdtsc(vm); 221 + 222 + for (stage = 1;; stage++) { 223 + _vcpu_run(vm, VCPU_ID); 224 + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 225 + "Stage %d: unexpected exit reason: %u (%s),\n", 226 + stage, run->exit_reason, 227 + exit_reason_str(run->exit_reason)); 228 + 229 + switch (get_ucall(vm, VCPU_ID, &uc)) { 230 + case UCALL_ABORT: 231 + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], 232 + __FILE__, uc.args[1]); 233 + /* NOT REACHED */ 234 + case UCALL_SYNC: 235 + break; 236 + case UCALL_DONE: 237 + /* Keep in sync with guest_main() */ 238 + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", 239 + stage); 240 + goto out; 241 + default: 242 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 243 + } 244 + 245 + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 246 + uc.args[1] == stage, 247 + "Stage %d: Unexpected register values vmexit, got %lx", 248 + stage, (ulong)uc.args[1]); 249 + 250 + /* Reset kvmclock triggering TSC page update */ 251 + if (stage == 7 || stage == 8 || stage == 10) { 252 + struct kvm_clock_data clock = {0}; 253 + 254 + vm_ioctl(vm, KVM_SET_CLOCK, &clock); 255 + } 256 + } 257 + 258 + out: 259 + kvm_vm_free(vm); 260 + }
+166
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test that KVM_SET_BOOT_CPU_ID works as intended 4 + * 5 + * Copyright (C) 2020, Red Hat, Inc. 6 + */ 7 + #define _GNU_SOURCE /* for program_invocation_name */ 8 + #include <fcntl.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <sys/ioctl.h> 13 + 14 + #include "test_util.h" 15 + #include "kvm_util.h" 16 + #include "processor.h" 17 + 18 + #define N_VCPU 2 19 + #define VCPU_ID0 0 20 + #define VCPU_ID1 1 21 + 22 + static uint32_t get_bsp_flag(void) 23 + { 24 + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; 25 + } 26 + 27 + static void guest_bsp_vcpu(void *arg) 28 + { 29 + GUEST_SYNC(1); 30 + 31 + GUEST_ASSERT(get_bsp_flag() != 0); 32 + 33 + GUEST_DONE(); 34 + } 35 + 36 + static void guest_not_bsp_vcpu(void *arg) 37 + { 38 + GUEST_SYNC(1); 39 + 40 + GUEST_ASSERT(get_bsp_flag() == 0); 41 + 42 + GUEST_DONE(); 43 + } 44 + 45 + static void test_set_boot_busy(struct kvm_vm *vm) 46 + { 47 + int res; 48 + 49 + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0); 50 + TEST_ASSERT(res == -1 && errno == EBUSY, 51 + "KVM_SET_BOOT_CPU_ID set while running vm"); 52 + } 53 + 54 + static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) 55 + { 56 + struct ucall uc; 57 + int stage; 58 + 59 + for (stage = 0; stage < 2; stage++) { 60 + 61 + vcpu_run(vm, vcpuid); 62 + 63 + switch (get_ucall(vm, vcpuid, &uc)) { 64 + case UCALL_SYNC: 65 + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 66 + uc.args[1] == stage + 1, 67 + "Stage %d: Unexpected register values vmexit, got %lx", 68 + stage + 1, (ulong)uc.args[1]); 69 + test_set_boot_busy(vm); 70 + break; 71 + case UCALL_DONE: 72 + TEST_ASSERT(stage == 1, 73 + "Expected GUEST_DONE in stage 2, got stage %d", 74 + stage); 75 + break; 76 + case UCALL_ABORT: 77 + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", 78 + (const char *)uc.args[0], __FILE__, 79 + uc.args[1], uc.args[2], uc.args[3]); 80 + default: 81 + TEST_ASSERT(false, "Unexpected exit: %s", 82 + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); 83 + } 84 + } 85 + } 86 + 87 + static struct kvm_vm *create_vm(void) 88 + { 89 + struct kvm_vm *vm; 90 + uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; 91 + uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; 92 + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; 93 + 94 + pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); 95 + vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); 96 + 97 + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 98 + vm_create_irqchip(vm); 99 + 100 + return vm; 101 + } 102 + 103 + static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) 104 + { 105 + if (bsp_code) 106 + vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); 107 + else 108 + vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); 109 + 110 + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); 111 + } 112 + 113 + static void run_vm_bsp(uint32_t bsp_vcpu) 114 + { 115 + struct kvm_vm *vm; 116 + bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1; 117 + 118 + vm = create_vm(); 119 + 120 + if (is_bsp_vcpu1) 121 + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); 122 + 123 + add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1); 124 + add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1); 125 + 126 + run_vcpu(vm, VCPU_ID0); 127 + run_vcpu(vm, VCPU_ID1); 128 + 129 + kvm_vm_free(vm); 130 + } 131 + 132 + static void check_set_bsp_busy(void) 133 + { 134 + struct kvm_vm *vm; 135 + int res; 136 + 137 + vm = create_vm(); 138 + 139 + add_x86_vcpu(vm, VCPU_ID0, true); 140 + add_x86_vcpu(vm, VCPU_ID1, false); 141 + 142 + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); 143 + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu"); 144 + 145 + run_vcpu(vm, VCPU_ID0); 146 + run_vcpu(vm, VCPU_ID1); 147 + 148 + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); 149 + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu"); 150 + 151 + kvm_vm_free(vm); 152 + } 153 + 154 + int main(int argc, char *argv[]) 155 + { 156 + if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { 157 + print_skip("set_boot_cpu_id not available"); 158 + return 0; 159 + } 160 + 161 + run_vm_bsp(VCPU_ID0); 162 + run_vm_bsp(VCPU_ID1); 163 + run_vm_bsp(VCPU_ID0); 164 + 165 + check_set_bsp_busy(); 166 + }