Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"ARM:

- Address some fallout of the locking rework, this time affecting the
way the vgic is configured

- Fix an issue where the page table walker frees a subtree and then
proceeds with walking what it has just freed...

- Check that a given PA donated to the guest is actually memory (only
affecting pKVM)

- Correctly handle MTE CMOs by Set/Way

- Fix the reported address of a watchpoint forwarded to userspace

- Fix the freeing of the root of stage-2 page tables

- Stop creating spurious PMU events to perform detection of the
default PMU and use the existing PMU list instead

x86:

- Fix a memslot lookup bug in the NX recovery thread that could
theoretically let userspace bypass the NX hugepage mitigation

- Fix a s/BLOCKING/PENDING bug in SVM's vNMI support

- Account exit stats for fastpath VM-Exits that never leave the super
tight run-loop

- Fix an out-of-bounds bug in the optimized APIC map code, and add a
regression test for the race"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: selftests: Add test for race in kvm_recalculate_apic_map()
KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds
KVM: x86: Account fastpath-only VM-Exits in vCPU stats
KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK
KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker
KVM: arm64: Document default vPMU behavior on heterogeneous systems
KVM: arm64: Iterate arm_pmus list to probe for default PMU
KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed()
KVM: arm64: Populate fault info for watchpoint
KVM: arm64: Reload PTE after invoking walker callback on preorder traversal
KVM: arm64: Handle trap of tagged Set/Way CMOs
arm64: Add missing Set/Way CMO encodings
KVM: arm64: Prevent unconditional donation of unmapped regions from the host
KVM: arm64: vgic: Fix a comment
KVM: arm64: vgic: Fix locking comment
KVM: arm64: vgic: Wrap vgic_its_create() with config_lock
KVM: arm64: vgic: Fix a circular locking issue

+248 -95
+3 -3
arch/arm64/include/asm/kvm_pgtable.h
··· 632 632 * 633 633 * The walker will walk the page-table entries corresponding to the input 634 634 * address range specified, visiting entries according to the walker flags. 635 - * Invalid entries are treated as leaf entries. Leaf entries are reloaded 636 - * after invoking the walker callback, allowing the walker to descend into 637 - * a newly installed table. 635 + * Invalid entries are treated as leaf entries. The visited page table entry is 636 + * reloaded after invoking the walker callback, allowing the walker to descend 637 + * into a newly installed table. 638 638 * 639 639 * Returning a negative error code from the walker callback function will 640 640 * terminate the walk immediately with the same error code.
+6
arch/arm64/include/asm/sysreg.h
··· 115 115 #define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) 116 116 117 117 #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) 118 + #define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4) 119 + #define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6) 118 120 #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) 121 + #define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4) 122 + #define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6) 119 123 #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) 124 + #define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) 125 + #define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) 120 126 121 127 /* 122 128 * Automatically generated definitions for system registers, the
+6 -2
arch/arm64/kvm/hyp/include/hyp/switch.h
··· 412 412 return false; 413 413 } 414 414 415 - static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 415 + static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) 416 416 { 417 417 if (!__populate_fault_info(vcpu)) 418 418 return true; 419 419 420 420 return false; 421 421 } 422 + static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 423 + __alias(kvm_hyp_handle_memory_fault); 424 + static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 425 + __alias(kvm_hyp_handle_memory_fault); 422 426 423 427 static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 424 428 { 425 - if (!__populate_fault_info(vcpu)) 429 + if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) 426 430 return true; 427 431 428 432 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+7 -7
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 575 575 576 576 struct check_walk_data { 577 577 enum pkvm_page_state desired; 578 - enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); 578 + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); 579 579 }; 580 580 581 581 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, ··· 583 583 { 584 584 struct check_walk_data *d = ctx->arg; 585 585 586 - if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) 587 - return -EINVAL; 588 - 589 - return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; 586 + return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; 590 587 } 591 588 592 589 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, ··· 598 601 return kvm_pgtable_walk(pgt, addr, size, &walker); 599 602 } 600 603 601 - static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) 604 + static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) 602 605 { 606 + if (!addr_is_allowed_memory(addr)) 607 + return PKVM_NOPAGE; 608 + 603 609 if (!kvm_pte_valid(pte) && pte) 604 610 return PKVM_NOPAGE; 605 611 ··· 709 709 return host_stage2_set_owner_locked(addr, size, host_id); 710 710 } 711 711 712 - static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) 712 + static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) 713 713 { 714 714 if (!kvm_pte_valid(pte)) 715 715 return PKVM_NOPAGE;
+2
arch/arm64/kvm/hyp/nvhe/switch.c
··· 186 186 [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, 187 187 [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, 188 188 [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, 189 + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, 189 190 [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, 190 191 }; 191 192 ··· 197 196 [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, 198 197 [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, 199 198 [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, 199 + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, 200 200 [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, 201 201 }; 202 202
+16 -1
arch/arm64/kvm/hyp/pgtable.c
··· 209 209 .flags = flags, 210 210 }; 211 211 int ret = 0; 212 + bool reload = false; 212 213 kvm_pteref_t childp; 213 214 bool table = kvm_pte_table(ctx.old, level); 214 215 215 - if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) 216 + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) { 216 217 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); 218 + reload = true; 219 + } 217 220 218 221 if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { 219 222 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); 223 + reload = true; 224 + } 225 + 226 + /* 227 + * Reload the page table after invoking the walker callback for leaf 228 + * entries or after pre-order traversal, to allow the walker to descend 229 + * into a newly installed or replaced table. 230 + */ 231 + if (reload) { 220 232 ctx.old = READ_ONCE(*ptep); 221 233 table = kvm_pte_table(ctx.old, level); 222 234 } ··· 1332 1320 }; 1333 1321 1334 1322 WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); 1323 + 1324 + WARN_ON(mm_ops->page_count(pgtable) != 1); 1325 + mm_ops->put_page(pgtable); 1335 1326 }
+1
arch/arm64/kvm/hyp/vhe/switch.c
··· 110 110 [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, 111 111 [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, 112 112 [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, 113 + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, 113 114 [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, 114 115 }; 115 116
+23 -35
arch/arm64/kvm/pmu-emul.c
··· 694 694 695 695 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 696 696 { 697 - struct perf_event_attr attr = { }; 698 - struct perf_event *event; 699 - struct arm_pmu *pmu = NULL; 697 + struct arm_pmu *tmp, *pmu = NULL; 698 + struct arm_pmu_entry *entry; 699 + int cpu; 700 700 701 - /* 702 - * Create a dummy event that only counts user cycles. As we'll never 703 - * leave this function with the event being live, it will never 704 - * count anything. But it allows us to probe some of the PMU 705 - * details. Yes, this is terrible. 706 - */ 707 - attr.type = PERF_TYPE_RAW; 708 - attr.size = sizeof(attr); 709 - attr.pinned = 1; 710 - attr.disabled = 0; 711 - attr.exclude_user = 0; 712 - attr.exclude_kernel = 1; 713 - attr.exclude_hv = 1; 714 - attr.exclude_host = 1; 715 - attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 716 - attr.sample_period = GENMASK(63, 0); 701 + mutex_lock(&arm_pmus_lock); 717 702 718 - event = perf_event_create_kernel_counter(&attr, -1, current, 719 - kvm_pmu_perf_overflow, &attr); 703 + cpu = smp_processor_id(); 704 + list_for_each_entry(entry, &arm_pmus, entry) { 705 + tmp = entry->arm_pmu; 720 706 721 - if (IS_ERR(event)) { 722 - pr_err_once("kvm: pmu event creation failed %ld\n", 723 - PTR_ERR(event)); 724 - return NULL; 707 + if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { 708 + pmu = tmp; 709 + break; 710 + } 725 711 } 726 712 727 - if (event->pmu) { 728 - pmu = to_arm_pmu(event->pmu); 729 - if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 730 - pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 731 - pmu = NULL; 732 - } 733 - 734 - perf_event_disable(event); 735 - perf_event_release_kernel(event); 713 + mutex_unlock(&arm_pmus_lock); 736 714 737 715 return pmu; 738 716 } ··· 890 912 return -EBUSY; 891 913 892 914 if (!kvm->arch.arm_pmu) { 893 - /* No PMU set, get the default one */ 915 + /* 916 + * No PMU set, get the default one. 917 + * 918 + * The observant among you will notice that the supported_cpus 919 + * mask does not get updated for the default PMU even though it 920 + * is quite possible the selected instance supports only a 921 + * subset of cores in the system. This is intentional, and 922 + * upholds the preexisting behavior on heterogeneous systems 923 + * where vCPUs can be scheduled on any core but the guest 924 + * counters could stop working. 925 + */ 894 926 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 895 927 if (!kvm->arch.arm_pmu) 896 928 return -ENODEV;
+19
arch/arm64/kvm/sys_regs.c
··· 211 211 return true; 212 212 } 213 213 214 + static bool access_dcgsw(struct kvm_vcpu *vcpu, 215 + struct sys_reg_params *p, 216 + const struct sys_reg_desc *r) 217 + { 218 + if (!kvm_has_mte(vcpu->kvm)) { 219 + kvm_inject_undefined(vcpu); 220 + return false; 221 + } 222 + 223 + /* Treat MTE S/W ops as we treat the classic ones: with contempt */ 224 + return access_dcsw(vcpu, p, r); 225 + } 226 + 214 227 static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) 215 228 { 216 229 switch (r->aarch32_map) { ··· 1769 1756 */ 1770 1757 static const struct sys_reg_desc sys_reg_descs[] = { 1771 1758 { SYS_DESC(SYS_DC_ISW), access_dcsw }, 1759 + { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, 1760 + { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, 1772 1761 { SYS_DESC(SYS_DC_CSW), access_dcsw }, 1762 + { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, 1763 + { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, 1773 1764 { SYS_DESC(SYS_DC_CISW), access_dcsw }, 1765 + { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, 1766 + { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, 1774 1767 1775 1768 DBG_BCR_BVR_WCR_WVR_EL1(0), 1776 1769 DBG_BCR_BVR_WCR_WVR_EL1(1),
+21 -6
arch/arm64/kvm/vgic/vgic-init.c
··· 235 235 * KVM io device for the redistributor that belongs to this VCPU. 236 236 */ 237 237 if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 238 - mutex_lock(&vcpu->kvm->arch.config_lock); 238 + mutex_lock(&vcpu->kvm->slots_lock); 239 239 ret = vgic_register_redist_iodev(vcpu); 240 - mutex_unlock(&vcpu->kvm->arch.config_lock); 240 + mutex_unlock(&vcpu->kvm->slots_lock); 241 241 } 242 242 return ret; 243 243 } ··· 406 406 407 407 /** 408 408 * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest 409 - * is a GICv2. A GICv3 must be explicitly initialized by the guest using the 409 + * is a GICv2. A GICv3 must be explicitly initialized by userspace using the 410 410 * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. 411 411 * @kvm: kvm struct pointer 412 412 */ ··· 446 446 int kvm_vgic_map_resources(struct kvm *kvm) 447 447 { 448 448 struct vgic_dist *dist = &kvm->arch.vgic; 449 + gpa_t dist_base; 449 450 int ret = 0; 450 451 451 452 if (likely(vgic_ready(kvm))) 452 453 return 0; 453 454 455 + mutex_lock(&kvm->slots_lock); 454 456 mutex_lock(&kvm->arch.config_lock); 455 457 if (vgic_ready(kvm)) 456 458 goto out; ··· 465 463 else 466 464 ret = vgic_v3_map_resources(kvm); 467 465 468 - if (ret) 466 + if (ret) { 469 467 __kvm_vgic_destroy(kvm); 470 - else 471 - dist->ready = true; 468 + goto out; 469 + } 470 + dist->ready = true; 471 + dist_base = dist->vgic_dist_base; 472 + mutex_unlock(&kvm->arch.config_lock); 473 + 474 + ret = vgic_register_dist_iodev(kvm, dist_base, 475 + kvm_vgic_global_state.type); 476 + if (ret) { 477 + kvm_err("Unable to register VGIC dist MMIO regions\n"); 478 + kvm_vgic_destroy(kvm); 479 + } 480 + mutex_unlock(&kvm->slots_lock); 481 + return ret; 472 482 473 483 out: 474 484 mutex_unlock(&kvm->arch.config_lock); 485 + mutex_unlock(&kvm->slots_lock); 475 486 return ret; 476 487 } 477 488
+10 -4
arch/arm64/kvm/vgic/vgic-its.c
··· 1936 1936 1937 1937 static int vgic_its_create(struct kvm_device *dev, u32 type) 1938 1938 { 1939 + int ret; 1939 1940 struct vgic_its *its; 1940 1941 1941 1942 if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) ··· 1946 1945 if (!its) 1947 1946 return -ENOMEM; 1948 1947 1948 + mutex_lock(&dev->kvm->arch.config_lock); 1949 + 1949 1950 if (vgic_initialized(dev->kvm)) { 1950 - int ret = vgic_v4_init(dev->kvm); 1951 + ret = vgic_v4_init(dev->kvm); 1951 1952 if (ret < 0) { 1953 + mutex_unlock(&dev->kvm->arch.config_lock); 1952 1954 kfree(its); 1953 1955 return ret; 1954 1956 } ··· 1964 1960 1965 1961 /* Yep, even more trickery for lock ordering... */ 1966 1962 #ifdef CONFIG_LOCKDEP 1967 - mutex_lock(&dev->kvm->arch.config_lock); 1968 1963 mutex_lock(&its->cmd_lock); 1969 1964 mutex_lock(&its->its_lock); 1970 1965 mutex_unlock(&its->its_lock); 1971 1966 mutex_unlock(&its->cmd_lock); 1972 - mutex_unlock(&dev->kvm->arch.config_lock); 1973 1967 #endif 1974 1968 1975 1969 its->vgic_its_base = VGIC_ADDR_UNDEF; ··· 1988 1986 1989 1987 dev->private = its; 1990 1988 1991 - return vgic_its_set_abi(its, NR_ITS_ABIS - 1); 1989 + ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1); 1990 + 1991 + mutex_unlock(&dev->kvm->arch.config_lock); 1992 + 1993 + return ret; 1992 1994 } 1993 1995 1994 1996 static void vgic_its_destroy(struct kvm_device *kvm_dev)
+8 -2
arch/arm64/kvm/vgic/vgic-kvm-device.c
··· 102 102 if (get_user(addr, uaddr)) 103 103 return -EFAULT; 104 104 105 - mutex_lock(&kvm->arch.config_lock); 105 + /* 106 + * Since we can't hold config_lock while registering the redistributor 107 + * iodevs, take the slots_lock immediately. 108 + */ 109 + mutex_lock(&kvm->slots_lock); 106 110 switch (attr->attr) { 107 111 case KVM_VGIC_V2_ADDR_TYPE_DIST: 108 112 r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); ··· 186 182 if (r) 187 183 goto out; 188 184 185 + mutex_lock(&kvm->arch.config_lock); 189 186 if (write) { 190 187 r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); 191 188 if (!r) ··· 194 189 } else { 195 190 addr = *addr_ptr; 196 191 } 192 + mutex_unlock(&kvm->arch.config_lock); 197 193 198 194 out: 199 - mutex_unlock(&kvm->arch.config_lock); 195 + mutex_unlock(&kvm->slots_lock); 200 196 201 197 if (!r && !write) 202 198 r = put_user(addr, uaddr);
+21 -10
arch/arm64/kvm/vgic/vgic-mmio-v3.c
··· 769 769 struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; 770 770 struct vgic_redist_region *rdreg; 771 771 gpa_t rd_base; 772 - int ret; 772 + int ret = 0; 773 + 774 + lockdep_assert_held(&kvm->slots_lock); 775 + mutex_lock(&kvm->arch.config_lock); 773 776 774 777 if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) 775 - return 0; 778 + goto out_unlock; 776 779 777 780 /* 778 781 * We may be creating VCPUs before having set the base address for the ··· 785 782 */ 786 783 rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); 787 784 if (!rdreg) 788 - return 0; 785 + goto out_unlock; 789 786 790 - if (!vgic_v3_check_base(kvm)) 791 - return -EINVAL; 787 + if (!vgic_v3_check_base(kvm)) { 788 + ret = -EINVAL; 789 + goto out_unlock; 790 + } 792 791 793 792 vgic_cpu->rdreg = rdreg; 794 793 vgic_cpu->rdreg_index = rdreg->free_index; ··· 804 799 rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); 805 800 rd_dev->redist_vcpu = vcpu; 806 801 807 - mutex_lock(&kvm->slots_lock); 802 + mutex_unlock(&kvm->arch.config_lock); 803 + 808 804 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, 809 805 2 * SZ_64K, &rd_dev->dev); 810 - mutex_unlock(&kvm->slots_lock); 811 - 812 806 if (ret) 813 807 return ret; 814 808 809 + /* Protected by slots_lock */ 815 810 rdreg->free_index++; 816 811 return 0; 812 + 813 + out_unlock: 814 + mutex_unlock(&kvm->arch.config_lock); 815 + return ret; 817 816 } 818 817 819 818 static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) ··· 843 834 /* The current c failed, so iterate over the previous ones. */ 844 835 int i; 845 836 846 - mutex_lock(&kvm->slots_lock); 847 837 for (i = 0; i < c; i++) { 848 838 vcpu = kvm_get_vcpu(kvm, i); 849 839 vgic_unregister_redist_iodev(vcpu); 850 840 } 851 - mutex_unlock(&kvm->slots_lock); 852 841 } 853 842 854 843 return ret; ··· 945 938 { 946 939 int ret; 947 940 941 + mutex_lock(&kvm->arch.config_lock); 948 942 ret = vgic_v3_alloc_redist_region(kvm, index, addr, count); 943 + mutex_unlock(&kvm->arch.config_lock); 949 944 if (ret) 950 945 return ret; 951 946 ··· 959 950 if (ret) { 960 951 struct vgic_redist_region *rdreg; 961 952 953 + mutex_lock(&kvm->arch.config_lock); 962 954 rdreg = vgic_v3_rdist_region_from_index(kvm, index); 963 955 vgic_v3_free_redist_region(rdreg); 956 + mutex_unlock(&kvm->arch.config_lock); 964 957 return ret; 965 958 } 966 959
+2 -7
arch/arm64/kvm/vgic/vgic-mmio.c
··· 1096 1096 enum vgic_type type) 1097 1097 { 1098 1098 struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; 1099 - int ret = 0; 1100 1099 unsigned int len; 1101 1100 1102 1101 switch (type) { ··· 1113 1114 io_device->iodev_type = IODEV_DIST; 1114 1115 io_device->redist_vcpu = NULL; 1115 1116 1116 - mutex_lock(&kvm->slots_lock); 1117 - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, 1118 - len, &io_device->dev); 1119 - mutex_unlock(&kvm->slots_lock); 1120 - 1121 - return ret; 1117 + return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, 1118 + len, &io_device->dev); 1122 1119 }
-6
arch/arm64/kvm/vgic/vgic-v2.c
··· 312 312 return ret; 313 313 } 314 314 315 - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); 316 - if (ret) { 317 - kvm_err("Unable to register VGIC MMIO regions\n"); 318 - return ret; 319 - } 320 - 321 315 if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { 322 316 ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, 323 317 kvm_vgic_global_state.vcpu_base,
-7
arch/arm64/kvm/vgic/vgic-v3.c
··· 539 539 { 540 540 struct vgic_dist *dist = &kvm->arch.vgic; 541 541 struct kvm_vcpu *vcpu; 542 - int ret = 0; 543 542 unsigned long c; 544 543 545 544 kvm_for_each_vcpu(c, vcpu, kvm) { ··· 566 567 */ 567 568 if (!vgic_initialized(kvm)) { 568 569 return -EBUSY; 569 - } 570 - 571 - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); 572 - if (ret) { 573 - kvm_err("Unable to register VGICv3 dist MMIO regions\n"); 574 - return ret; 575 570 } 576 571 577 572 if (kvm_vgic_global_state.has_gicv4_1)
+2 -1
arch/arm64/kvm/vgic/vgic-v4.c
··· 184 184 } 185 185 } 186 186 187 - /* Must be called with the kvm lock held */ 188 187 void vgic_v4_configure_vsgis(struct kvm *kvm) 189 188 { 190 189 struct vgic_dist *dist = &kvm->arch.vgic; 191 190 struct kvm_vcpu *vcpu; 192 191 unsigned long i; 192 + 193 + lockdep_assert_held(&kvm->arch.config_lock); 193 194 194 195 kvm_arm_halt_guest(kvm); 195 196
+18 -2
arch/x86/kvm/lapic.c
··· 229 229 u32 physical_id; 230 230 231 231 /* 232 + * For simplicity, KVM always allocates enough space for all possible 233 + * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on 234 + * without the optimized map. 235 + */ 236 + if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) 237 + return -EINVAL; 238 + 239 + /* 240 + * Bail if a vCPU was added and/or enabled its APIC between allocating 241 + * the map and doing the actual calculations for the map. Note, KVM 242 + * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if 243 + * the compiler decides to reload x2apic_id after this check. 244 + */ 245 + if (x2apic_id > new->max_apic_id) 246 + return -E2BIG; 247 + 248 + /* 232 249 * Deliberately truncate the vCPU ID when detecting a mismatched APIC 233 250 * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a 234 251 * 32-bit value. Any unwanted aliasing due to truncation results will ··· 270 253 */ 271 254 if (vcpu->kvm->arch.x2apic_format) { 272 255 /* See also kvm_apic_match_physical_addr(). */ 273 - if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 274 - x2apic_id <= new->max_apic_id) 256 + if (apic_x2apic_mode(apic) || x2apic_id > 0xff) 275 257 new->phys_map[x2apic_id] = apic; 276 258 277 259 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
+4 -1
arch/x86/kvm/mmu/mmu.c
··· 7091 7091 */ 7092 7092 slot = NULL; 7093 7093 if (atomic_read(&kvm->nr_memslots_dirty_logging)) { 7094 - slot = gfn_to_memslot(kvm, sp->gfn); 7094 + struct kvm_memslots *slots; 7095 + 7096 + slots = kvm_memslots_for_spte_role(kvm, sp->role); 7097 + slot = __gfn_to_memslot(slots, sp->gfn); 7095 7098 WARN_ON_ONCE(!slot); 7096 7099 } 7097 7100
+1 -1
arch/x86/kvm/svm/svm.c
··· 3510 3510 if (!is_vnmi_enabled(svm)) 3511 3511 return false; 3512 3512 3513 - return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK); 3513 + return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK); 3514 3514 } 3515 3515 3516 3516 static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
+3
arch/x86/kvm/x86.c
··· 10758 10758 exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; 10759 10759 break; 10760 10760 } 10761 + 10762 + /* Note, VM-Exits that go down the "slow" path are accounted below. */ 10763 + ++vcpu->stat.exits; 10761 10764 } 10762 10765 10763 10766 /*
+1
tools/testing/selftests/kvm/Makefile
··· 116 116 TEST_GEN_PROGS_x86_64 += x86_64/amx_test 117 117 TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test 118 118 TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test 119 + TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test 119 120 TEST_GEN_PROGS_x86_64 += access_tracking_perf_test 120 121 TEST_GEN_PROGS_x86_64 += demand_paging_test 121 122 TEST_GEN_PROGS_x86_64 += dirty_log_test
+74
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Test edge cases and race conditions in kvm_recalculate_apic_map(). 4 + */ 5 + 6 + #include <sys/ioctl.h> 7 + #include <pthread.h> 8 + #include <time.h> 9 + 10 + #include "processor.h" 11 + #include "test_util.h" 12 + #include "kvm_util.h" 13 + #include "apic.h" 14 + 15 + #define TIMEOUT 5 /* seconds */ 16 + 17 + #define LAPIC_DISABLED 0 18 + #define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) 19 + #define MAX_XAPIC_ID 0xff 20 + 21 + static void *race(void *arg) 22 + { 23 + struct kvm_lapic_state lapic = {}; 24 + struct kvm_vcpu *vcpu = arg; 25 + 26 + while (1) { 27 + /* Trigger kvm_recalculate_apic_map(). */ 28 + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); 29 + pthread_testcancel(); 30 + } 31 + 32 + return NULL; 33 + } 34 + 35 + int main(void) 36 + { 37 + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; 38 + struct kvm_vcpu *vcpuN; 39 + struct kvm_vm *vm; 40 + pthread_t thread; 41 + time_t t; 42 + int i; 43 + 44 + kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID); 45 + 46 + /* 47 + * Create the max number of vCPUs supported by selftests so that KVM 48 + * has decent amount of work to do when recalculating the map, i.e. to 49 + * make the problematic window large enough to hit. 50 + */ 51 + vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus); 52 + 53 + /* 54 + * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc 55 + * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits). 56 + */ 57 + for (i = 0; i < KVM_MAX_VCPUS; i++) 58 + vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); 59 + 60 + ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); 61 + 62 + vcpuN = vcpus[KVM_MAX_VCPUS - 1]; 63 + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { 64 + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC); 65 + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); 66 + } 67 + 68 + ASSERT_EQ(pthread_cancel(thread), 0); 69 + ASSERT_EQ(pthread_join(thread, NULL), 0); 70 + 71 + kvm_vm_free(vm); 72 + 73 + return 0; 74 + }