Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:
- Fix a problem with GICv3 userspace save/restore
- Clarify GICv2 userspace save/restore ABI
- Be more careful in clearing GIC LRs
- Add missing synchronization primitive to our MMU handling code

PPC:
- Check for a NULL return from kzalloc

s390:
- Prevent translation exception errors on valid page tables for the
instruction-exection-protection support

x86:
- Fix Page-Modification Logging when running a nested guest"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl
KVM: nVMX: initialize PML fields in vmcs02
KVM: nVMX: do not leak PML full vmexit to L1
KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI
KVM: arm64: Ensure LRs are clear when they should be
kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd
KVM: s390: remove change-recording override support
arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region
arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm

+120 -15
+6
Documentation/virtual/kvm/devices/arm-vgic.txt
··· 83 83 84 84 Bits for undefined preemption levels are RAZ/WI. 85 85 86 + For historical reasons and to provide ABI compatibility with userspace we 87 + export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask 88 + field in the lower 5 bits of a word, meaning that userspace must always 89 + use the lower 5 bits to communicate with the KVM device and must shift the 90 + value left by 3 places to obtain the actual priority mask level. 91 + 86 92 Limitations: 87 93 - Priorities are not implemented, and registers are RAZ/WI 88 94 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+3
arch/arm/kvm/arm.c
··· 1124 1124 if (__hyp_get_vectors() == hyp_default_vectors) 1125 1125 cpu_init_hyp_mode(NULL); 1126 1126 } 1127 + 1128 + if (vgic_present) 1129 + kvm_vgic_init_cpu_hardware(); 1127 1130 } 1128 1131 1129 1132 static void cpu_hyp_reset(void)
+20 -3
arch/arm/kvm/mmu.c
··· 292 292 phys_addr_t addr = start, end = start + size; 293 293 phys_addr_t next; 294 294 295 + assert_spin_locked(&kvm->mmu_lock); 295 296 pgd = kvm->arch.pgd + stage2_pgd_index(addr); 296 297 do { 297 298 next = stage2_pgd_addr_end(addr, end); 298 299 if (!stage2_pgd_none(*pgd)) 299 300 unmap_stage2_puds(kvm, pgd, addr, next); 301 + /* 302 + * If the range is too large, release the kvm->mmu_lock 303 + * to prevent starvation and lockup detector warnings. 304 + */ 305 + if (next != end) 306 + cond_resched_lock(&kvm->mmu_lock); 300 307 } while (pgd++, addr = next, addr != end); 301 308 } 302 309 ··· 810 803 int idx; 811 804 812 805 idx = srcu_read_lock(&kvm->srcu); 806 + down_read(&current->mm->mmap_sem); 813 807 spin_lock(&kvm->mmu_lock); 814 808 815 809 slots = kvm_memslots(kvm); ··· 818 810 stage2_unmap_memslot(kvm, memslot); 819 811 820 812 spin_unlock(&kvm->mmu_lock); 813 + up_read(&current->mm->mmap_sem); 821 814 srcu_read_unlock(&kvm->srcu, idx); 822 815 } 823 816 ··· 838 829 if (kvm->arch.pgd == NULL) 839 830 return; 840 831 832 + spin_lock(&kvm->mmu_lock); 841 833 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 834 + spin_unlock(&kvm->mmu_lock); 835 + 842 836 /* Free the HW pgd, one page at a time */ 843 837 free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); 844 838 kvm->arch.pgd = NULL; ··· 1813 1801 (KVM_PHYS_SIZE >> PAGE_SHIFT)) 1814 1802 return -EFAULT; 1815 1803 1804 + down_read(&current->mm->mmap_sem); 1816 1805 /* 1817 1806 * A memory region could potentially cover multiple VMAs, and any holes 1818 1807 * between them, so iterate over all of them to find out if we can map ··· 1857 1844 pa += vm_start - vma->vm_start; 1858 1845 1859 1846 /* IO region dirty page logging not allowed */ 1860 - if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) 1861 - return -EINVAL; 1847 + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { 1848 + ret = -EINVAL; 1849 + goto out; 1850 + } 1862 1851 1863 1852 ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 1864 1853 vm_end - vm_start, ··· 1872 1857 } while (hva < reg_end); 1873 1858 1874 1859 if (change == KVM_MR_FLAGS_ONLY) 1875 - return ret; 1860 + goto out; 1876 1861 1877 1862 spin_lock(&kvm->mmu_lock); 1878 1863 if (ret) ··· 1880 1865 else 1881 1866 stage2_flush_memslot(kvm, memslot); 1882 1867 spin_unlock(&kvm->mmu_lock); 1868 + out: 1869 + up_read(&current->mm->mmap_sem); 1883 1870 return ret; 1884 1871 } 1885 1872
+4
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 1487 1487 /* start new resize */ 1488 1488 1489 1489 resize = kzalloc(sizeof(*resize), GFP_KERNEL); 1490 + if (!resize) { 1491 + ret = -ENOMEM; 1492 + goto out; 1493 + } 1490 1494 resize->order = shift; 1491 1495 resize->kvm = kvm; 1492 1496 INIT_WORK(&resize->work, resize_hpt_prepare_work);
+2 -5
arch/s390/kvm/gaccess.c
··· 168 168 unsigned long z : 1; /* Zero Bit */ 169 169 unsigned long i : 1; /* Page-Invalid Bit */ 170 170 unsigned long p : 1; /* DAT-Protection Bit */ 171 - unsigned long co : 1; /* Change-Recording Override */ 172 - unsigned long : 8; 171 + unsigned long : 9; 173 172 }; 174 173 }; 175 174 ··· 744 745 return PGM_PAGE_TRANSLATION; 745 746 if (pte.z) 746 747 return PGM_TRANSLATION_SPEC; 747 - if (pte.co && !edat1) 748 - return PGM_TRANSLATION_SPEC; 749 748 dat_protection |= pte.p; 750 749 raddr.pfra = pte.pfra; 751 750 real_address: ··· 1179 1182 rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); 1180 1183 if (!rc && pte.i) 1181 1184 rc = PGM_PAGE_TRANSLATION; 1182 - if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) 1185 + if (!rc && pte.z) 1183 1186 rc = PGM_TRANSLATION_SPEC; 1184 1187 shadow_page: 1185 1188 pte.p |= dat_protection;
+15
arch/x86/kvm/vmx.c
··· 8198 8198 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); 8199 8199 case EXIT_REASON_PREEMPTION_TIMER: 8200 8200 return false; 8201 + case EXIT_REASON_PML_FULL: 8202 + /* We don't expose PML support to L1. */ 8203 + return false; 8201 8204 default: 8202 8205 return true; 8203 8206 } ··· 10268 10265 vmx_flush_tlb(vcpu); 10269 10266 } 10270 10267 10268 + } 10269 + 10270 + if (enable_pml) { 10271 + /* 10272 + * Conceptually we want to copy the PML address and index from 10273 + * vmcs01 here, and then back to vmcs01 on nested vmexit. But, 10274 + * since we always flush the log on each vmexit, this happens 10275 + * to be equivalent to simply resetting the fields in vmcs02. 10276 + */ 10277 + ASSERT(vmx->pml_pg); 10278 + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); 10279 + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); 10271 10280 } 10272 10281 10273 10282 if (nested_cpu_has_ept(vmcs12)) {
+1
include/kvm/arm_vgic.h
··· 295 295 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 296 296 int kvm_vgic_map_resources(struct kvm *kvm); 297 297 int kvm_vgic_hyp_init(void); 298 + void kvm_vgic_init_cpu_hardware(void); 298 299 299 300 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 300 301 bool level);
+3
include/linux/irqchip/arm-gic.h
··· 96 96 #define GICH_MISR_EOI (1 << 0) 97 97 #define GICH_MISR_U (1 << 1) 98 98 99 + #define GICV_PMR_PRIORITY_SHIFT 3 100 + #define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT) 101 + 99 102 #ifndef __ASSEMBLY__ 100 103 101 104 #include <linux/irqdomain.h>
+19
virt/kvm/arm/vgic/vgic-init.c
··· 392 392 } 393 393 394 394 /** 395 + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware 396 + * 397 + * For a specific CPU, initialize the GIC VE hardware. 398 + */ 399 + void kvm_vgic_init_cpu_hardware(void) 400 + { 401 + BUG_ON(preemptible()); 402 + 403 + /* 404 + * We want to make sure the list registers start out clear so that we 405 + * only have the program the used registers. 406 + */ 407 + if (kvm_vgic_global_state.type == VGIC_V2) 408 + vgic_v2_init_lrs(); 409 + else 410 + kvm_call_hyp(__vgic_v3_init_lrs); 411 + } 412 + 413 + /** 395 414 * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable 396 415 * according to the host GIC model. Accordingly calls either 397 416 * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
+18 -2
virt/kvm/arm/vgic/vgic-mmio-v2.c
··· 229 229 val = vmcr.ctlr; 230 230 break; 231 231 case GIC_CPU_PRIMASK: 232 - val = vmcr.pmr; 232 + /* 233 + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the 234 + * the PMR field as GICH_VMCR.VMPriMask rather than 235 + * GICC_PMR.Priority, so we expose the upper five bits of 236 + * priority mask to userspace using the lower bits in the 237 + * unsigned long. 238 + */ 239 + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> 240 + GICV_PMR_PRIORITY_SHIFT; 233 241 break; 234 242 case GIC_CPU_BINPOINT: 235 243 val = vmcr.bpr; ··· 270 262 vmcr.ctlr = val; 271 263 break; 272 264 case GIC_CPU_PRIMASK: 273 - vmcr.pmr = val; 265 + /* 266 + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the 267 + * the PMR field as GICH_VMCR.VMPriMask rather than 268 + * GICC_PMR.Priority, so we expose the upper five bits of 269 + * priority mask to userspace using the lower bits in the 270 + * unsigned long. 271 + */ 272 + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & 273 + GICV_PMR_PRIORITY_MASK; 274 274 break; 275 275 case GIC_CPU_BINPOINT: 276 276 vmcr.bpr = val;
+19 -4
virt/kvm/arm/vgic/vgic-v2.c
··· 36 36 return (unsigned long *)val; 37 37 } 38 38 39 + static inline void vgic_v2_write_lr(int lr, u32 val) 40 + { 41 + void __iomem *base = kvm_vgic_global_state.vctrl_base; 42 + 43 + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); 44 + } 45 + 46 + void vgic_v2_init_lrs(void) 47 + { 48 + int i; 49 + 50 + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) 51 + vgic_v2_write_lr(i, 0); 52 + } 53 + 39 54 void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) 40 55 { 41 56 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; ··· 206 191 GICH_VMCR_ALIAS_BINPOINT_MASK; 207 192 vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & 208 193 GICH_VMCR_BINPOINT_MASK; 209 - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & 210 - GICH_VMCR_PRIMASK_MASK; 194 + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << 195 + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; 211 196 212 197 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; 213 198 } ··· 222 207 GICH_VMCR_ALIAS_BINPOINT_SHIFT; 223 208 vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> 224 209 GICH_VMCR_BINPOINT_SHIFT; 225 - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> 226 - GICH_VMCR_PRIMASK_SHIFT; 210 + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> 211 + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; 227 212 } 228 213 229 214 void vgic_v2_enable(struct kvm_vcpu *vcpu)
+10 -1
virt/kvm/arm/vgic/vgic.h
··· 81 81 return irq->pending_latch || irq->line_level; 82 82 } 83 83 84 + /* 85 + * This struct provides an intermediate representation of the fields contained 86 + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC 87 + * state to userspace can generate either GICv2 or GICv3 CPU interface 88 + * registers regardless of the hardware backed GIC used. 89 + */ 84 90 struct vgic_vmcr { 85 91 u32 ctlr; 86 92 u32 abpr; 87 93 u32 bpr; 88 - u32 pmr; 94 + u32 pmr; /* Priority mask field in the GICC_PMR and 95 + * ICC_PMR_EL1 priority field format */ 89 96 /* Below member variable are valid only for GICv3 */ 90 97 u32 grpen0; 91 98 u32 grpen1; ··· 136 129 int vgic_v2_map_resources(struct kvm *kvm); 137 130 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, 138 131 enum vgic_type); 132 + 133 + void vgic_v2_init_lrs(void); 139 134 140 135 static inline void vgic_get_irq_kref(struct vgic_irq *irq) 141 136 {