Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvm-x86-misc-7.1' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 7.1

- Advertise support for AVX512 Bit Matrix Multiply (BMM) when it's present in
hardware (no additional emulation/virtualization required).

- Immediately fail the build if a required #define is missing in one of KVM's
headers that is included multiple times.

- Reject SET_GUEST_DEBUG with -EBUSY if there's an already injected exception,
mostly to prevent syzkaller from abusing the uAPI to trigger WARNs, but also
because it can help prevent userspace from unintentionally crashing the VM.

- Exempt SMM from CPUID faulting on Intel, as per the spec.

- Misc hardening and cleanup changes.

+81 -69
+1
arch/x86/include/asm/cpufeatures.h
··· 473 473 #define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ 474 474 475 475 #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ 476 + #define X86_FEATURE_AVX512_BMM (20*32+23) /* AVX512 Bit Matrix Multiply instructions */ 476 477 #define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */ 477 478 #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ 478 479 #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+6 -4
arch/x86/include/asm/kvm-x86-ops.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #if !defined(KVM_X86_OP) || !defined(KVM_X86_OP_OPTIONAL) 3 - BUILD_BUG_ON(1) 4 - #endif 5 - 2 + #if !defined(KVM_X86_OP) || \ 3 + !defined(KVM_X86_OP_OPTIONAL) || \ 4 + !defined(KVM_X86_OP_OPTIONAL_RET0) 5 + #error Missing one or more KVM_X86_OP #defines 6 + #else 6 7 /* 7 8 * KVM_X86_OP() and KVM_X86_OP_OPTIONAL() are used to help generate 8 9 * both DECLARE/DEFINE_STATIC_CALL() invocations and ··· 149 148 KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) 150 149 KVM_X86_OP_OPTIONAL_RET0(gmem_max_mapping_level) 151 150 KVM_X86_OP_OPTIONAL(gmem_invalidate) 151 + #endif 152 152 153 153 #undef KVM_X86_OP 154 154 #undef KVM_X86_OP_OPTIONAL
+5 -3
arch/x86/include/asm/kvm-x86-pmu-ops.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 - #if !defined(KVM_X86_PMU_OP) || !defined(KVM_X86_PMU_OP_OPTIONAL) 3 - BUILD_BUG_ON(1) 4 - #endif 2 + #if !defined(KVM_X86_PMU_OP) || \ 3 + !defined(KVM_X86_PMU_OP_OPTIONAL) 4 + #error Missing one or more KVM_X86_PMU_OP #defines 5 + #else 5 6 6 7 /* 7 8 * KVM_X86_PMU_OP() and KVM_X86_PMU_OP_OPTIONAL() are used to help generate ··· 27 26 KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl) 28 27 KVM_X86_PMU_OP(mediated_load) 29 28 KVM_X86_PMU_OP(mediated_put) 29 + #endif 30 30 31 31 #undef KVM_X86_PMU_OP 32 32 #undef KVM_X86_PMU_OP_OPTIONAL
+1 -1
arch/x86/include/asm/kvm_host.h
··· 1261 1261 __u32 nr_excludes; 1262 1262 __u64 *includes; 1263 1263 __u64 *excludes; 1264 - __u64 events[]; 1264 + __u64 events[] __counted_by(nevents); 1265 1265 }; 1266 1266 1267 1267 enum kvm_apicv_inhibit {
+4 -2
arch/x86/kvm/cpuid.c
··· 1246 1246 F(NULL_SEL_CLR_BASE), 1247 1247 /* UpperAddressIgnore */ 1248 1248 F(AUTOIBRS), 1249 - F(PREFETCHI), 1250 1249 EMULATED_F(NO_SMM_CTL_MSR), 1251 1250 /* PrefetchCtlMsr */ 1252 1251 /* GpOnUserCpuid */ 1253 1252 /* EPSF */ 1253 + F(PREFETCHI), 1254 + F(AVX512_BMM), 1254 1255 F(ERAPS), 1255 1256 SYNTHESIZED_F(SBPB), 1256 1257 SYNTHESIZED_F(IBPB_BRTYPE), ··· 2161 2160 { 2162 2161 u32 eax, ebx, ecx, edx; 2163 2162 2164 - if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) 2163 + if (!is_smm(vcpu) && cpuid_fault_enabled(vcpu) && 2164 + !kvm_require_cpl(vcpu, 0)) 2165 2165 return 1; 2166 2166 2167 2167 eax = kvm_rax_read(vcpu);
+5 -5
arch/x86/kvm/emulate.c
··· 3583 3583 u64 msr = 0; 3584 3584 3585 3585 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr); 3586 - if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && 3587 - ctxt->ops->cpl(ctxt)) { 3586 + if (!ctxt->ops->is_smm(ctxt) && 3587 + (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT) && 3588 + ctxt->ops->cpl(ctxt)) 3588 3589 return emulate_gp(ctxt, 0); 3589 - } 3590 3590 3591 3591 eax = reg_read(ctxt, VCPU_REGS_RAX); 3592 3592 ecx = reg_read(ctxt, VCPU_REGS_RCX); ··· 3708 3708 */ 3709 3709 static int em_fxsave(struct x86_emulate_ctxt *ctxt) 3710 3710 { 3711 - struct fxregs_state fx_state; 3711 + struct fxregs_state fx_state = {}; 3712 3712 int rc; 3713 3713 3714 3714 rc = check_fxsr(ctxt); ··· 3738 3738 static noinline int fxregs_fixup(struct fxregs_state *fx_state, 3739 3739 const size_t used_size) 3740 3740 { 3741 - struct fxregs_state fx_tmp; 3741 + struct fxregs_state fx_tmp = {}; 3742 3742 int rc; 3743 3743 3744 3744 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
+8 -5
arch/x86/kvm/lapic.c
··· 840 840 { 841 841 int i, count = 0; 842 842 struct kvm_vcpu *vcpu; 843 + size_t map_index; 843 844 844 845 if (min > map->max_apic_id) 845 846 return 0; 846 847 847 - min = array_index_nospec(min, map->max_apic_id + 1); 848 - 849 848 for_each_set_bit(i, ipi_bitmap, 850 - min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 851 - if (map->phys_map[min + i]) { 852 - vcpu = map->phys_map[min + i]->vcpu; 849 + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 850 + map_index = array_index_nospec(min + i, map->max_apic_id + 1); 851 + if (map->phys_map[map_index]) { 852 + vcpu = map->phys_map[map_index]->vcpu; 853 853 count += kvm_apic_set_irq(vcpu, irq, NULL); 854 854 } 855 855 } ··· 2656 2656 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2657 2657 { 2658 2658 struct kvm_lapic *apic = vcpu->arch.apic; 2659 + 2660 + if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) 2661 + return; 2659 2662 2660 2663 /* 2661 2664 * ICR is a single 64-bit register when x2APIC is enabled, all others
+1 -1
arch/x86/kvm/pmu.c
··· 1256 1256 1257 1257 r = -EFAULT; 1258 1258 if (copy_from_user(filter->events, user_filter->events, 1259 - sizeof(filter->events[0]) * filter->nevents)) 1259 + flex_array_size(filter, events, filter->nevents))) 1260 1260 goto cleanup; 1261 1261 1262 1262 r = prepare_filter_lists(filter);
+2 -2
arch/x86/kvm/svm/avic.c
··· 86 86 * Enable / disable AVIC. In "auto" mode (default behavior), AVIC is enabled 87 87 * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met). 88 88 */ 89 - static int avic = AVIC_AUTO_MODE; 89 + static int __ro_after_init avic = AVIC_AUTO_MODE; 90 90 module_param_cb(avic, &avic_ops, &avic, 0444); 91 91 __MODULE_PARM_TYPE(avic, "bool"); 92 92 93 93 module_param(enable_ipiv, bool, 0444); 94 94 95 - static bool force_avic; 95 + static bool __ro_after_init force_avic; 96 96 module_param_unsafe(force_avic, bool, 0444); 97 97 98 98 /* Note:
+4 -4
arch/x86/kvm/svm/sev.c
··· 52 52 #define SNP_GUEST_VMM_ERR_GENERIC (~0U) 53 53 54 54 /* enable/disable SEV support */ 55 - static bool sev_enabled = true; 55 + static bool __ro_after_init sev_enabled = true; 56 56 module_param_named(sev, sev_enabled, bool, 0444); 57 57 58 58 /* enable/disable SEV-ES support */ 59 - static bool sev_es_enabled = true; 59 + static bool __ro_after_init sev_es_enabled = true; 60 60 module_param_named(sev_es, sev_es_enabled, bool, 0444); 61 61 62 62 /* enable/disable SEV-SNP support */ 63 - static bool sev_snp_enabled = true; 63 + static bool __ro_after_init sev_snp_enabled = true; 64 64 module_param_named(sev_snp, sev_snp_enabled, bool, 0444); 65 65 66 - static unsigned int nr_ciphertext_hiding_asids; 66 + static unsigned int __ro_after_init nr_ciphertext_hiding_asids; 67 67 module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 0444); 68 68 69 69 #define AP_RESET_HOLD_NONE 0
+16 -23
arch/x86/kvm/svm/svm.c
··· 110 110 * count only mode. 111 111 */ 112 112 113 - static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; 113 + static unsigned short __ro_after_init pause_filter_thresh = KVM_DEFAULT_PLE_GAP; 114 114 module_param(pause_filter_thresh, ushort, 0444); 115 115 116 - static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; 116 + static unsigned short __ro_after_init pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; 117 117 module_param(pause_filter_count, ushort, 0444); 118 118 119 119 /* Default doubles per-vcpu window every exit. */ 120 - static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; 120 + static unsigned short __ro_after_init pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; 121 121 module_param(pause_filter_count_grow, ushort, 0444); 122 122 123 123 /* Default resets per-vcpu window every exit to pause_filter_count. */ 124 - static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; 124 + static unsigned short __ro_after_init pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; 125 125 module_param(pause_filter_count_shrink, ushort, 0444); 126 126 127 127 /* Default is to compute the maximum so we can never overflow. */ 128 - static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; 128 + static unsigned short __ro_after_init pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; 129 129 module_param(pause_filter_count_max, ushort, 0444); 130 130 131 131 /* 132 132 * Use nested page tables by default. Note, NPT may get forced off by 133 133 * svm_hardware_setup() if it's unsupported by hardware or the host kernel. 134 134 */ 135 - bool npt_enabled = true; 135 + bool __ro_after_init npt_enabled = true; 136 136 module_param_named(npt, npt_enabled, bool, 0444); 137 137 138 138 /* allow nested virtualization in KVM/SVM */ 139 - static int nested = true; 139 + static int __ro_after_init nested = true; 140 140 module_param(nested, int, 0444); 141 141 142 142 /* enable/disable Next RIP Save */ 143 - int nrips = true; 143 + int __ro_after_init nrips = true; 144 144 module_param(nrips, int, 0444); 145 145 146 146 /* enable/disable Virtual VMLOAD VMSAVE */ 147 - static int vls = true; 147 + static int __ro_after_init vls = true; 148 148 module_param(vls, int, 0444); 149 149 150 150 /* enable/disable Virtual GIF */ 151 - int vgif = true; 151 + int __ro_after_init vgif = true; 152 152 module_param(vgif, int, 0444); 153 153 154 154 /* enable/disable LBR virtualization */ 155 - int lbrv = true; 155 + int __ro_after_init lbrv = true; 156 156 module_param(lbrv, int, 0444); 157 157 158 - static int tsc_scaling = true; 158 + static int __ro_after_init tsc_scaling = true; 159 159 module_param(tsc_scaling, int, 0444); 160 160 161 161 module_param(enable_device_posted_irqs, bool, 0444); ··· 164 164 module_param(dump_invalid_vmcb, bool, 0644); 165 165 166 166 167 - bool intercept_smi = true; 167 + bool __ro_after_init intercept_smi = true; 168 168 module_param(intercept_smi, bool, 0444); 169 169 170 - bool vnmi = true; 170 + bool __ro_after_init vnmi = true; 171 171 module_param(vnmi, bool, 0444); 172 172 173 173 module_param(enable_mediated_pmu, bool, 0444); 174 174 175 - static bool svm_gp_erratum_intercept = true; 175 + static bool __ro_after_init svm_gp_erratum_intercept = true; 176 176 177 177 static u8 rsm_ins_bytes[] = "\x0f\xaa"; 178 178 179 - static unsigned long iopm_base; 179 + static unsigned long __read_mostly iopm_base; 180 180 181 181 DEFINE_PER_CPU(struct svm_cpu_data, svm_data); 182 182 ··· 5410 5410 pr_err_ratelimited("NX (Execute Disable) not supported\n"); 5411 5411 return -EOPNOTSUPP; 5412 5412 } 5413 - kvm_enable_efer_bits(EFER_NX); 5414 5413 5415 5414 kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | 5416 5415 XFEATURE_MASK_BNDCSR); 5417 - 5418 - if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 5419 - kvm_enable_efer_bits(EFER_FFXSR); 5420 5416 5421 5417 if (tsc_scaling) { 5422 5418 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { ··· 5426 5430 kvm_caps.tsc_scaling_ratio_frac_bits = 32; 5427 5431 5428 5432 tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); 5429 - 5430 - if (boot_cpu_has(X86_FEATURE_AUTOIBRS)) 5431 - kvm_enable_efer_bits(EFER_AUTOIBRS); 5432 5433 5433 5434 /* Check for pause filtering support */ 5434 5435 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+3 -2
arch/x86/kvm/vmx/vmcs_shadow_fields.h
··· 1 1 #if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW) 2 - BUILD_BUG_ON(1) 3 - #endif 2 + #error Must #define at least one of SHADOW_FIELD_RO or SHADOW_FIELD_RW 3 + #else 4 4 5 5 #ifndef SHADOW_FIELD_RO 6 6 #define SHADOW_FIELD_RO(x, y) ··· 74 74 /* 64-bit */ 75 75 SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address) 76 76 SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address) 77 + #endif 77 78 78 79 #undef SHADOW_FIELD_RO 79 80 #undef SHADOW_FIELD_RW
-4
arch/x86/kvm/vmx/vmx.c
··· 8698 8698 8699 8699 vmx_setup_user_return_msrs(); 8700 8700 8701 - 8702 - if (boot_cpu_has(X86_FEATURE_NX)) 8703 - kvm_enable_efer_bits(EFER_NX); 8704 - 8705 8701 if (boot_cpu_has(X86_FEATURE_MPX)) { 8706 8702 rdmsrq(MSR_IA32_BNDCFGS, host_bndcfgs); 8707 8703 WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
+19 -7
arch/x86/kvm/x86.c
··· 9998 9998 } 9999 9999 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps); 10000 10000 10001 + static void kvm_setup_efer_caps(void) 10002 + { 10003 + if (kvm_cpu_cap_has(X86_FEATURE_NX)) 10004 + kvm_enable_efer_bits(EFER_NX); 10005 + 10006 + if (kvm_cpu_cap_has(X86_FEATURE_FXSR_OPT)) 10007 + kvm_enable_efer_bits(EFER_FFXSR); 10008 + 10009 + if (kvm_cpu_cap_has(X86_FEATURE_AUTOIBRS)) 10010 + kvm_enable_efer_bits(EFER_AUTOIBRS); 10011 + } 10012 + 10001 10013 static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) 10002 10014 { 10003 10015 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); ··· 10145 10133 r = ops->hardware_setup(); 10146 10134 if (r != 0) 10147 10135 goto out_mmu_exit; 10136 + 10137 + kvm_setup_efer_caps(); 10148 10138 10149 10139 enable_device_posted_irqs &= enable_apicv && 10150 10140 irq_remapping_cap(IRQ_POSTING_CAP); ··· 10750 10736 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | 10751 10737 X86_EFLAGS_RF); 10752 10738 10753 - if (vcpu->arch.exception.vector == DB_VECTOR) { 10754 - kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception); 10755 - if (vcpu->arch.dr7 & DR7_GD) { 10756 - vcpu->arch.dr7 &= ~DR7_GD; 10757 - kvm_update_dr7(vcpu); 10758 - } 10739 + if (vcpu->arch.exception.vector == DB_VECTOR && 10740 + vcpu->arch.dr7 & DR7_GD) { 10741 + vcpu->arch.dr7 &= ~DR7_GD; 10742 + kvm_update_dr7(vcpu); 10759 10743 } 10760 10744 10761 10745 kvm_inject_exception(vcpu); ··· 12541 12529 12542 12530 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 12543 12531 r = -EBUSY; 12544 - if (kvm_is_exception_pending(vcpu)) 12532 + if (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected) 12545 12533 goto out; 12546 12534 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 12547 12535 kvm_queue_exception(vcpu, DB_VECTOR);
+6 -6
virt/kvm/kvm_main.c
··· 76 76 MODULE_LICENSE("GPL"); 77 77 78 78 /* Architectures should define their poll value according to the halt latency */ 79 - unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; 79 + unsigned int __read_mostly halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; 80 80 module_param(halt_poll_ns, uint, 0644); 81 81 EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns); 82 82 83 83 /* Default doubles per-vcpu halt_poll_ns. */ 84 - unsigned int halt_poll_ns_grow = 2; 84 + unsigned int __read_mostly halt_poll_ns_grow = 2; 85 85 module_param(halt_poll_ns_grow, uint, 0644); 86 86 EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow); 87 87 88 88 /* The start value to grow halt_poll_ns from */ 89 - unsigned int halt_poll_ns_grow_start = 10000; /* 10us */ 89 + unsigned int __read_mostly halt_poll_ns_grow_start = 10000; /* 10us */ 90 90 module_param(halt_poll_ns_grow_start, uint, 0644); 91 91 EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_grow_start); 92 92 93 93 /* Default halves per-vcpu halt_poll_ns. */ 94 - unsigned int halt_poll_ns_shrink = 2; 94 + unsigned int __read_mostly halt_poll_ns_shrink = 2; 95 95 module_param(halt_poll_ns_shrink, uint, 0644); 96 96 EXPORT_SYMBOL_FOR_KVM_INTERNAL(halt_poll_ns_shrink); 97 97 ··· 99 99 * Allow direct access (from KVM or the CPU) without MMU notifier protection 100 100 * to unpinned pages. 101 101 */ 102 - static bool allow_unsafe_mappings; 102 + static bool __ro_after_init allow_unsafe_mappings; 103 103 module_param(allow_unsafe_mappings, bool, 0444); 104 104 105 105 /* ··· 5574 5574 }; 5575 5575 5576 5576 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING 5577 - bool enable_virt_at_load = true; 5577 + bool __ro_after_init enable_virt_at_load = true; 5578 5578 module_param(enable_virt_at_load, bool, 0444); 5579 5579 EXPORT_SYMBOL_FOR_KVM_INTERNAL(enable_virt_at_load); 5580 5580