Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"x86:

- fix NULL dereference when using userspace lapic

- optimize spectre v1 mitigations by allowing guests to use LFENCE

- make microcode revision configurable to prevent guests from
unnecessarily blacklisting spectre v2 mitigation feature"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: fix vcpu initialization with userspace lapic
KVM: X86: Allow userspace to define the microcode version
KVM: X86: Introduce kvm_get_msr_feature()
KVM: SVM: Add MSR-based feature support for serializing LFENCE
KVM: x86: Add a framework for supporting MSR-based features

+179 -29
+28 -12
Documentation/virtual/kvm/api.txt
··· 123 123 flag KVM_VM_MIPS_VZ. 124 124 125 125 126 - 4.3 KVM_GET_MSR_INDEX_LIST 126 + 4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST 127 127 128 - Capability: basic 128 + Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST 129 129 Architectures: x86 130 - Type: system 130 + Type: system ioctl 131 131 Parameters: struct kvm_msr_list (in/out) 132 132 Returns: 0 on success; -1 on error 133 133 Errors: 134 + EFAULT: the msr index list cannot be read from or written to 134 135 E2BIG: the msr index list is to be to fit in the array specified by 135 136 the user. 136 137 ··· 140 139 __u32 indices[0]; 141 140 }; 142 141 143 - This ioctl returns the guest msrs that are supported. The list varies 144 - by kvm version and host processor, but does not change otherwise. The 145 - user fills in the size of the indices array in nmsrs, and in return 146 - kvm adjusts nmsrs to reflect the actual number of msrs and fills in 147 - the indices array with their numbers. 142 + The user fills in the size of the indices array in nmsrs, and in return 143 + kvm adjusts nmsrs to reflect the actual number of msrs and fills in the 144 + indices array with their numbers. 145 + 146 + KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported. The list 147 + varies by kvm version and host processor, but does not change otherwise. 148 148 149 149 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are 150 150 not returned in the MSR list, as different vcpus can have a different number 151 151 of banks, as set via the KVM_X86_SETUP_MCE ioctl. 152 + 153 + KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed 154 + to the KVM_GET_MSRS system ioctl. This lets userspace probe host capabilities 155 + and processor features that are exposed via MSRs (e.g., VMX capabilities). 156 + This list also varies by kvm version and host processor, but does not change 157 + otherwise. 152 158 153 159 154 160 4.4 KVM_CHECK_EXTENSION ··· 483 475 484 476 4.18 KVM_GET_MSRS 485 477 486 - Capability: basic 478 + Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system) 487 479 Architectures: x86 488 - Type: vcpu ioctl 480 + Type: system ioctl, vcpu ioctl 489 481 Parameters: struct kvm_msrs (in/out) 490 - Returns: 0 on success, -1 on error 482 + Returns: number of msrs successfully returned; 483 + -1 on error 491 484 485 + When used as a system ioctl: 486 + Reads the values of MSR-based features that are available for the VM. This 487 + is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values. 488 + The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST 489 + in a system ioctl. 490 + 491 + When used as a vcpu ioctl: 492 492 Reads model-specific registers from the vcpu. Supported msr indices can 493 - be obtained using KVM_GET_MSR_INDEX_LIST. 493 + be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl. 494 494 495 495 struct kvm_msrs { 496 496 __u32 nmsrs; /* number of msrs in entries */
+3
arch/x86/include/asm/kvm_host.h
··· 507 507 u64 smi_count; 508 508 bool tpr_access_reporting; 509 509 u64 ia32_xss; 510 + u64 microcode_version; 510 511 511 512 /* 512 513 * Paging state of the vcpu ··· 1096 1095 int (*mem_enc_op)(struct kvm *kvm, void __user *argp); 1097 1096 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1098 1097 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1098 + 1099 + int (*get_msr_feature)(struct kvm_msr_entry *entry); 1099 1100 }; 1100 1101 1101 1102 struct kvm_arch_async_pf {
+4 -6
arch/x86/kvm/lapic.c
··· 2002 2002 2003 2003 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2004 2004 { 2005 - struct kvm_lapic *apic; 2005 + struct kvm_lapic *apic = vcpu->arch.apic; 2006 2006 int i; 2007 2007 2008 - apic_debug("%s\n", __func__); 2008 + if (!apic) 2009 + return; 2009 2010 2010 - ASSERT(vcpu); 2011 - apic = vcpu->arch.apic; 2012 - ASSERT(apic != NULL); 2011 + apic_debug("%s\n", __func__); 2013 2012 2014 2013 /* Stop the timer in case it's a reset to an active apic */ 2015 2014 hrtimer_cancel(&apic->lapic_timer.timer); ··· 2567 2568 2568 2569 pe = xchg(&apic->pending_events, 0); 2569 2570 if (test_bit(KVM_APIC_INIT, &pe)) { 2570 - kvm_lapic_reset(vcpu, true); 2571 2571 kvm_vcpu_reset(vcpu, true); 2572 2572 if (kvm_vcpu_is_bsp(apic->vcpu)) 2573 2573 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+41 -3
arch/x86/kvm/svm.c
··· 179 179 uint64_t sysenter_eip; 180 180 uint64_t tsc_aux; 181 181 182 + u64 msr_decfg; 183 + 182 184 u64 next_rip; 183 185 184 186 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; ··· 1908 1906 u32 dummy; 1909 1907 u32 eax = 1; 1910 1908 1909 + vcpu->arch.microcode_version = 0x01000065; 1911 1910 svm->spec_ctrl = 0; 1912 1911 1913 1912 if (!init_event) { ··· 3873 3870 return 0; 3874 3871 } 3875 3872 3873 + static int svm_get_msr_feature(struct kvm_msr_entry *msr) 3874 + { 3875 + msr->data = 0; 3876 + 3877 + switch (msr->index) { 3878 + case MSR_F10H_DECFG: 3879 + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) 3880 + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; 3881 + break; 3882 + default: 3883 + return 1; 3884 + } 3885 + 3886 + return 0; 3887 + } 3888 + 3876 3889 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 3877 3890 { 3878 3891 struct vcpu_svm *svm = to_svm(vcpu); ··· 3964 3945 3965 3946 msr_info->data = svm->spec_ctrl; 3966 3947 break; 3967 - case MSR_IA32_UCODE_REV: 3968 - msr_info->data = 0x01000065; 3969 - break; 3970 3948 case MSR_F15H_IC_CFG: { 3971 3949 3972 3950 int family, model; ··· 3980 3964 (model >= 0x2 && model < 0x20)) 3981 3965 msr_info->data = 0x1E; 3982 3966 } 3967 + break; 3968 + case MSR_F10H_DECFG: 3969 + msr_info->data = svm->msr_decfg; 3983 3970 break; 3984 3971 default: 3985 3972 return kvm_get_msr_common(vcpu, msr_info); ··· 4162 4143 case MSR_VM_IGNNE: 4163 4144 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 4164 4145 break; 4146 + case MSR_F10H_DECFG: { 4147 + struct kvm_msr_entry msr_entry; 4148 + 4149 + msr_entry.index = msr->index; 4150 + if (svm_get_msr_feature(&msr_entry)) 4151 + return 1; 4152 + 4153 + /* Check the supported bits */ 4154 + if (data & ~msr_entry.data) 4155 + return 1; 4156 + 4157 + /* Don't allow the guest to change a bit, #GP */ 4158 + if (!msr->host_initiated && (data ^ msr_entry.data)) 4159 + return 1; 4160 + 4161 + svm->msr_decfg = data; 4162 + break; 4163 + } 4165 4164 case MSR_IA32_APICBASE: 4166 4165 if (kvm_vcpu_apicv_active(vcpu)) 4167 4166 avic_update_vapic_bar(to_svm(vcpu), data); ··· 6870 6833 .vcpu_unblocking = svm_vcpu_unblocking, 6871 6834 6872 6835 .update_bp_intercept = update_bp_intercept, 6836 + .get_msr_feature = svm_get_msr_feature, 6873 6837 .get_msr = svm_get_msr, 6874 6838 .set_msr = svm_set_msr, 6875 6839 .get_segment_base = svm_get_segment_base,
+7
arch/x86/kvm/vmx.c
··· 3227 3227 return !(val & ~valid_bits); 3228 3228 } 3229 3229 3230 + static int vmx_get_msr_feature(struct kvm_msr_entry *msr) 3231 + { 3232 + return 1; 3233 + } 3234 + 3230 3235 /* 3231 3236 * Reads an msr value (of 'msr_index') into 'pdata'. 3232 3237 * Returns 0 on success, non-0 otherwise. ··· 5772 5767 vmx->rmode.vm86_active = 0; 5773 5768 vmx->spec_ctrl = 0; 5774 5769 5770 + vcpu->arch.microcode_version = 0x100000000ULL; 5775 5771 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 5776 5772 kvm_set_cr8(vcpu, 0); 5777 5773 ··· 12303 12297 .vcpu_put = vmx_vcpu_put, 12304 12298 12305 12299 .update_bp_intercept = update_exception_bitmap, 12300 + .get_msr_feature = vmx_get_msr_feature, 12306 12301 .get_msr = vmx_get_msr, 12307 12302 .set_msr = vmx_set_msr, 12308 12303 .get_segment_base = vmx_get_segment_base,
+94 -8
arch/x86/kvm/x86.c
··· 1049 1049 1050 1050 static unsigned num_emulated_msrs; 1051 1051 1052 + /* 1053 + * List of msr numbers which are used to expose MSR-based features that 1054 + * can be used by a hypervisor to validate requested CPU features. 1055 + */ 1056 + static u32 msr_based_features[] = { 1057 + MSR_F10H_DECFG, 1058 + MSR_IA32_UCODE_REV, 1059 + }; 1060 + 1061 + static unsigned int num_msr_based_features; 1062 + 1063 + static int kvm_get_msr_feature(struct kvm_msr_entry *msr) 1064 + { 1065 + switch (msr->index) { 1066 + case MSR_IA32_UCODE_REV: 1067 + rdmsrl(msr->index, msr->data); 1068 + break; 1069 + default: 1070 + if (kvm_x86_ops->get_msr_feature(msr)) 1071 + return 1; 1072 + } 1073 + return 0; 1074 + } 1075 + 1076 + static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) 1077 + { 1078 + struct kvm_msr_entry msr; 1079 + int r; 1080 + 1081 + msr.index = index; 1082 + r = kvm_get_msr_feature(&msr); 1083 + if (r) 1084 + return r; 1085 + 1086 + *data = msr.data; 1087 + 1088 + return 0; 1089 + } 1090 + 1052 1091 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) 1053 1092 { 1054 1093 if (efer & efer_reserved_bits) ··· 2261 2222 2262 2223 switch (msr) { 2263 2224 case MSR_AMD64_NB_CFG: 2264 - case MSR_IA32_UCODE_REV: 2265 2225 case MSR_IA32_UCODE_WRITE: 2266 2226 case MSR_VM_HSAVE_PA: 2267 2227 case MSR_AMD64_PATCH_LOADER: ··· 2268 2230 case MSR_AMD64_DC_CFG: 2269 2231 break; 2270 2232 2233 + case MSR_IA32_UCODE_REV: 2234 + if (msr_info->host_initiated) 2235 + vcpu->arch.microcode_version = data; 2236 + break; 2271 2237 case MSR_EFER: 2272 2238 return set_efer(vcpu, data); 2273 2239 case MSR_K7_HWCR: ··· 2567 2525 msr_info->data = 0; 2568 2526 break; 2569 2527 case MSR_IA32_UCODE_REV: 2570 - msr_info->data = 0x100000000ULL; 2528 + msr_info->data = vcpu->arch.microcode_version; 2571 2529 break; 2572 2530 case MSR_MTRRcap: 2573 2531 case 0x200 ... 0x2ff: ··· 2722 2680 int (*do_msr)(struct kvm_vcpu *vcpu, 2723 2681 unsigned index, u64 *data)) 2724 2682 { 2725 - int i, idx; 2683 + int i; 2726 2684 2727 - idx = srcu_read_lock(&vcpu->kvm->srcu); 2728 2685 for (i = 0; i < msrs->nmsrs; ++i) 2729 2686 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 2730 2687 break; 2731 - srcu_read_unlock(&vcpu->kvm->srcu, idx); 2732 2688 2733 2689 return i; 2734 2690 } ··· 2825 2785 case KVM_CAP_SET_BOOT_CPU_ID: 2826 2786 case KVM_CAP_SPLIT_IRQCHIP: 2827 2787 case KVM_CAP_IMMEDIATE_EXIT: 2788 + case KVM_CAP_GET_MSR_FEATURES: 2828 2789 r = 1; 2829 2790 break; 2830 2791 case KVM_CAP_ADJUST_CLOCK: ··· 2939 2898 sizeof(kvm_mce_cap_supported))) 2940 2899 goto out; 2941 2900 r = 0; 2901 + break; 2902 + case KVM_GET_MSR_FEATURE_INDEX_LIST: { 2903 + struct kvm_msr_list __user *user_msr_list = argp; 2904 + struct kvm_msr_list msr_list; 2905 + unsigned int n; 2906 + 2907 + r = -EFAULT; 2908 + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) 2909 + goto out; 2910 + n = msr_list.nmsrs; 2911 + msr_list.nmsrs = num_msr_based_features; 2912 + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) 2913 + goto out; 2914 + r = -E2BIG; 2915 + if (n < msr_list.nmsrs) 2916 + goto out; 2917 + r = -EFAULT; 2918 + if (copy_to_user(user_msr_list->indices, &msr_based_features, 2919 + num_msr_based_features * sizeof(u32))) 2920 + goto out; 2921 + r = 0; 2922 + break; 2923 + } 2924 + case KVM_GET_MSRS: 2925 + r = msr_io(NULL, argp, do_get_msr_feature, 1); 2942 2926 break; 2943 2927 } 2944 2928 default: ··· 3702 3636 r = 0; 3703 3637 break; 3704 3638 } 3705 - case KVM_GET_MSRS: 3639 + case KVM_GET_MSRS: { 3640 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 3706 3641 r = msr_io(vcpu, argp, do_get_msr, 1); 3642 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 3707 3643 break; 3708 - case KVM_SET_MSRS: 3644 + } 3645 + case KVM_SET_MSRS: { 3646 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 3709 3647 r = msr_io(vcpu, argp, do_set_msr, 0); 3648 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 3710 3649 break; 3650 + } 3711 3651 case KVM_TPR_ACCESS_REPORTING: { 3712 3652 struct kvm_tpr_access_ctl tac; 3713 3653 ··· 4536 4464 j++; 4537 4465 } 4538 4466 num_emulated_msrs = j; 4467 + 4468 + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { 4469 + struct kvm_msr_entry msr; 4470 + 4471 + msr.index = msr_based_features[i]; 4472 + if (kvm_get_msr_feature(&msr)) 4473 + continue; 4474 + 4475 + if (j < i) 4476 + msr_based_features[j] = msr_based_features[i]; 4477 + j++; 4478 + } 4479 + num_msr_based_features = j; 4539 4480 } 4540 4481 4541 4482 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, ··· 8060 7975 kvm_vcpu_mtrr_init(vcpu); 8061 7976 vcpu_load(vcpu); 8062 7977 kvm_vcpu_reset(vcpu, false); 8063 - kvm_lapic_reset(vcpu, false); 8064 7978 kvm_mmu_setup(vcpu); 8065 7979 vcpu_put(vcpu); 8066 7980 return 0; ··· 8102 8018 8103 8019 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 8104 8020 { 8021 + kvm_lapic_reset(vcpu, init_event); 8022 + 8105 8023 vcpu->arch.hflags = 0; 8106 8024 8107 8025 vcpu->arch.smi_pending = 0;
+2
include/uapi/linux/kvm.h
··· 761 761 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 762 762 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 763 763 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) 764 + #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) 764 765 765 766 /* 766 767 * Extension capability list. ··· 935 934 #define KVM_CAP_S390_AIS_MIGRATION 150 936 935 #define KVM_CAP_PPC_GET_CPU_CHAR 151 937 936 #define KVM_CAP_S390_BPB 152 937 + #define KVM_CAP_GET_MSR_FEATURES 153 938 938 939 939 #ifdef KVM_CAP_IRQ_ROUTING 940 940