Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"ARM:
- fixes for ITS init issues, error handling, IRQ leakage, race
conditions
- an erratum workaround for timers
- some removal of misleading use of errors and comments
- a fix for GICv3 on 32-bit guests

MIPS:
- fix for where the guest could wrongly map the first page of
physical memory

x86:
- nested virtualization fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
MIPS: KVM: Check for pfn noslot case
kvm: nVMX: fix nested tsc scaling
KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write
KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC
arm64: KVM: report configured SRE value to 32-bit world
arm64: KVM: remove misleading comment on pmu status
KVM: arm/arm64: timer: Workaround misconfigured timer interrupt
arm64: Document workaround for Cortex-A72 erratum #853709
KVM: arm/arm64: Change misleading use of is_error_pfn
KVM: arm64: ITS: avoid re-mapping LPIs
KVM: arm64: check for ITS device on MSI injection
KVM: arm64: ITS: move ITS registration into first VCPU run
KVM: arm64: vgic-its: Make updates to propbaser/pendbaser atomic
KVM: arm64: vgic-its: Plug race in vgic_put_irq
KVM: arm64: vgic-its: Handle errors from vgic_add_lpi
KVM: arm64: ITS: return 1 on successful MSI injection

+234 -139
+1
Documentation/arm64/silicon-errata.txt
··· 53 53 | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | 54 54 | ARM | Cortex-A57 | #852523 | N/A | 55 55 | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | 56 + | ARM | Cortex-A72 | #853709 | N/A | 56 57 | ARM | MMU-500 | #841119,#826419 | N/A | 57 58 | | | | | 58 59 | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
+1 -1
arch/arm/kvm/mmu.c
··· 1309 1309 smp_rmb(); 1310 1310 1311 1311 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); 1312 - if (is_error_pfn(pfn)) 1312 + if (is_error_noslot_pfn(pfn)) 1313 1313 return -EFAULT; 1314 1314 1315 1315 if (kvm_is_device_pfn(pfn)) {
+1 -1
arch/arm64/kvm/hyp/switch.c
··· 256 256 257 257 /* 258 258 * We must restore the 32-bit state before the sysregs, thanks 259 - * to Cortex-A57 erratum #852523. 259 + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). 260 260 */ 261 261 __sysreg32_restore_state(vcpu); 262 262 __sysreg_restore_guest_state(guest_ctxt);
+1 -9
arch/arm64/kvm/sys_regs.c
··· 823 823 * Architected system registers. 824 824 * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 825 825 * 826 - * We could trap ID_DFR0 and tell the guest we don't support performance 827 - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was 828 - * NAKed, so it will read the PMCR anyway. 829 - * 830 - * Therefore we tell the guest we have 0 counters. Unfortunately, we 831 - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for 832 - * all PM registers, which doesn't crash the guest kernel at least. 833 - * 834 826 * Debug handling: We do trap most, if not all debug related system 835 827 * registers. The implementation is good enough to ensure that a guest 836 828 * can use these with minimal performance degradation. The drawback is ··· 1352 1360 { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, 1353 1361 1354 1362 /* ICC_SRE */ 1355 - { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, 1363 + { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, 1356 1364 1357 1365 { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, 1358 1366
+1 -1
arch/mips/kvm/mmu.c
··· 40 40 srcu_idx = srcu_read_lock(&kvm->srcu); 41 41 pfn = gfn_to_pfn(kvm, gfn); 42 42 43 - if (is_error_pfn(pfn)) { 43 + if (is_error_noslot_pfn(pfn)) { 44 44 kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); 45 45 err = -EFAULT; 46 46 goto out;
+69 -67
arch/x86/kvm/vmx.c
··· 422 422 struct list_head vmcs02_pool; 423 423 int vmcs02_num; 424 424 u64 vmcs01_tsc_offset; 425 + bool change_vmcs01_virtual_x2apic_mode; 425 426 /* L2 must run next, and mustn't decide to exit to L1. */ 426 427 bool nested_run_pending; 427 428 /* ··· 435 434 struct pi_desc *pi_desc; 436 435 bool pi_pending; 437 436 u16 posted_intr_nv; 437 + 438 + unsigned long *msr_bitmap; 438 439 439 440 struct hrtimer preemption_timer; 440 441 bool preemption_timer_expired; ··· 927 924 static unsigned long *vmx_msr_bitmap_longmode; 928 925 static unsigned long *vmx_msr_bitmap_legacy_x2apic; 929 926 static unsigned long *vmx_msr_bitmap_longmode_x2apic; 930 - static unsigned long *vmx_msr_bitmap_nested; 931 927 static unsigned long *vmx_vmread_bitmap; 932 928 static unsigned long *vmx_vmwrite_bitmap; 933 929 ··· 2200 2198 new.control) != old.control); 2201 2199 } 2202 2200 2201 + static void decache_tsc_multiplier(struct vcpu_vmx *vmx) 2202 + { 2203 + vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; 2204 + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); 2205 + } 2206 + 2203 2207 /* 2204 2208 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 2205 2209 * vcpu mutex is already taken. ··· 2264 2256 2265 2257 /* Setup TSC multiplier */ 2266 2258 if (kvm_has_tsc_control && 2267 - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { 2268 - vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; 2269 - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); 2270 - } 2259 + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) 2260 + decache_tsc_multiplier(vmx); 2271 2261 2272 2262 vmx_vcpu_pi_load(vcpu, cpu); 2273 2263 vmx->host_pkru = read_pkru(); ··· 2514 2508 unsigned long *msr_bitmap; 2515 2509 2516 2510 if (is_guest_mode(vcpu)) 2517 - msr_bitmap = vmx_msr_bitmap_nested; 2511 + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; 2518 2512 else if (cpu_has_secondary_exec_ctrls() && 2519 2513 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & 2520 2514 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { ··· 6369 6363 if (!vmx_msr_bitmap_longmode_x2apic) 6370 6364 goto out4; 6371 6365 6372 - if (nested) { 6373 - vmx_msr_bitmap_nested = 6374 - (unsigned long *)__get_free_page(GFP_KERNEL); 6375 - if (!vmx_msr_bitmap_nested) 6376 - goto out5; 6377 - } 6378 - 6379 6366 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); 6380 6367 if (!vmx_vmread_bitmap) 6381 6368 goto out6; ··· 6391 6392 6392 6393 memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); 6393 6394 memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); 6394 - if (nested) 6395 - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); 6396 6395 6397 6396 if (setup_vmcs_config(&vmcs_config) < 0) { 6398 6397 r = -EIO; ··· 6526 6529 out7: 6527 6530 free_page((unsigned long)vmx_vmread_bitmap); 6528 6531 out6: 6529 - if (nested) 6530 - free_page((unsigned long)vmx_msr_bitmap_nested); 6531 - out5: 6532 6532 free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); 6533 6533 out4: 6534 6534 free_page((unsigned long)vmx_msr_bitmap_longmode); ··· 6551 6557 free_page((unsigned long)vmx_io_bitmap_a); 6552 6558 free_page((unsigned long)vmx_vmwrite_bitmap); 6553 6559 free_page((unsigned long)vmx_vmread_bitmap); 6554 - if (nested) 6555 - free_page((unsigned long)vmx_msr_bitmap_nested); 6556 6560 6557 6561 free_kvm_area(); 6558 6562 } ··· 6987 6995 return 1; 6988 6996 } 6989 6997 6998 + if (cpu_has_vmx_msr_bitmap()) { 6999 + vmx->nested.msr_bitmap = 7000 + (unsigned long *)__get_free_page(GFP_KERNEL); 7001 + if (!vmx->nested.msr_bitmap) 7002 + goto out_msr_bitmap; 7003 + } 7004 + 6990 7005 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); 6991 7006 if (!vmx->nested.cached_vmcs12) 6992 - return -ENOMEM; 7007 + goto out_cached_vmcs12; 6993 7008 6994 7009 if (enable_shadow_vmcs) { 6995 7010 shadow_vmcs = alloc_vmcs(); 6996 - if (!shadow_vmcs) { 6997 - kfree(vmx->nested.cached_vmcs12); 6998 - return -ENOMEM; 6999 - } 7011 + if (!shadow_vmcs) 7012 + goto out_shadow_vmcs; 7000 7013 /* mark vmcs as shadow */ 7001 7014 shadow_vmcs->revision_id |= (1u << 31); 7002 7015 /* init shadow vmcs */ ··· 7021 7024 skip_emulated_instruction(vcpu); 7022 7025 nested_vmx_succeed(vcpu); 7023 7026 return 1; 7027 + 7028 + out_shadow_vmcs: 7029 + kfree(vmx->nested.cached_vmcs12); 7030 + 7031 + out_cached_vmcs12: 7032 + free_page((unsigned long)vmx->nested.msr_bitmap); 7033 + 7034 + out_msr_bitmap: 7035 + return -ENOMEM; 7024 7036 } 7025 7037 7026 7038 /* ··· 7104 7098 vmx->nested.vmxon = false; 7105 7099 free_vpid(vmx->nested.vpid02); 7106 7100 nested_release_vmcs12(vmx); 7101 + if (vmx->nested.msr_bitmap) { 7102 + free_page((unsigned long)vmx->nested.msr_bitmap); 7103 + vmx->nested.msr_bitmap = NULL; 7104 + } 7107 7105 if (enable_shadow_vmcs) 7108 7106 free_vmcs(vmx->nested.current_shadow_vmcs); 7109 7107 kfree(vmx->nested.cached_vmcs12); ··· 8429 8419 { 8430 8420 u32 sec_exec_control; 8431 8421 8422 + /* Postpone execution until vmcs01 is the current VMCS. */ 8423 + if (is_guest_mode(vcpu)) { 8424 + to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; 8425 + return; 8426 + } 8427 + 8432 8428 /* 8433 8429 * There is not point to enable virtualize x2apic without enable 8434 8430 * apicv ··· 9488 9472 { 9489 9473 int msr; 9490 9474 struct page *page; 9491 - unsigned long *msr_bitmap; 9475 + unsigned long *msr_bitmap_l1; 9476 + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; 9492 9477 9478 + /* This shortcut is ok because we support only x2APIC MSRs so far. */ 9493 9479 if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) 9494 9480 return false; 9495 9481 ··· 9500 9482 WARN_ON(1); 9501 9483 return false; 9502 9484 } 9503 - msr_bitmap = (unsigned long *)kmap(page); 9504 - if (!msr_bitmap) { 9485 + msr_bitmap_l1 = (unsigned long *)kmap(page); 9486 + if (!msr_bitmap_l1) { 9505 9487 nested_release_page_clean(page); 9506 9488 WARN_ON(1); 9507 9489 return false; 9508 9490 } 9509 9491 9492 + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); 9493 + 9510 9494 if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { 9511 9495 if (nested_cpu_has_apic_reg_virt(vmcs12)) 9512 9496 for (msr = 0x800; msr <= 0x8ff; msr++) 9513 9497 nested_vmx_disable_intercept_for_msr( 9514 - msr_bitmap, 9515 - vmx_msr_bitmap_nested, 9498 + msr_bitmap_l1, msr_bitmap_l0, 9516 9499 msr, MSR_TYPE_R); 9517 - /* TPR is allowed */ 9518 - nested_vmx_disable_intercept_for_msr(msr_bitmap, 9519 - vmx_msr_bitmap_nested, 9500 + 9501 + nested_vmx_disable_intercept_for_msr( 9502 + msr_bitmap_l1, msr_bitmap_l0, 9520 9503 APIC_BASE_MSR + (APIC_TASKPRI >> 4), 9521 9504 MSR_TYPE_R | MSR_TYPE_W); 9505 + 9522 9506 if (nested_cpu_has_vid(vmcs12)) { 9523 - /* EOI and self-IPI are allowed */ 9524 9507 nested_vmx_disable_intercept_for_msr( 9525 - msr_bitmap, 9526 - vmx_msr_bitmap_nested, 9508 + msr_bitmap_l1, msr_bitmap_l0, 9527 9509 APIC_BASE_MSR + (APIC_EOI >> 4), 9528 9510 MSR_TYPE_W); 9529 9511 nested_vmx_disable_intercept_for_msr( 9530 - msr_bitmap, 9531 - vmx_msr_bitmap_nested, 9512 + msr_bitmap_l1, msr_bitmap_l0, 9532 9513 APIC_BASE_MSR + (APIC_SELF_IPI >> 4), 9533 9514 MSR_TYPE_W); 9534 9515 } 9535 - } else { 9536 - /* 9537 - * Enable reading intercept of all the x2apic 9538 - * MSRs. We should not rely on vmcs12 to do any 9539 - * optimizations here, it may have been modified 9540 - * by L1. 9541 - */ 9542 - for (msr = 0x800; msr <= 0x8ff; msr++) 9543 - __vmx_enable_intercept_for_msr( 9544 - vmx_msr_bitmap_nested, 9545 - msr, 9546 - MSR_TYPE_R); 9547 - 9548 - __vmx_enable_intercept_for_msr( 9549 - vmx_msr_bitmap_nested, 9550 - APIC_BASE_MSR + (APIC_TASKPRI >> 4), 9551 - MSR_TYPE_W); 9552 - __vmx_enable_intercept_for_msr( 9553 - vmx_msr_bitmap_nested, 9554 - APIC_BASE_MSR + (APIC_EOI >> 4), 9555 - MSR_TYPE_W); 9556 - __vmx_enable_intercept_for_msr( 9557 - vmx_msr_bitmap_nested, 9558 - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), 9559 - MSR_TYPE_W); 9560 9516 } 9561 9517 kunmap(page); 9562 9518 nested_release_page_clean(page); ··· 9949 9957 } 9950 9958 9951 9959 if (cpu_has_vmx_msr_bitmap() && 9952 - exec_control & CPU_BASED_USE_MSR_BITMAPS) { 9953 - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); 9954 - /* MSR_BITMAP will be set by following vmx_set_efer. */ 9955 - } else 9960 + exec_control & CPU_BASED_USE_MSR_BITMAPS && 9961 + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) 9962 + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ 9963 + else 9956 9964 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; 9957 9965 9958 9966 /* ··· 10003 10011 vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); 10004 10012 else 10005 10013 vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); 10014 + if (kvm_has_tsc_control) 10015 + decache_tsc_multiplier(vmx); 10006 10016 10007 10017 if (enable_vpid) { 10008 10018 /* ··· 10761 10767 else 10762 10768 vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, 10763 10769 PIN_BASED_VMX_PREEMPTION_TIMER); 10770 + if (kvm_has_tsc_control) 10771 + decache_tsc_multiplier(vmx); 10772 + 10773 + if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { 10774 + vmx->nested.change_vmcs01_virtual_x2apic_mode = false; 10775 + vmx_set_virtual_x2apic_mode(vcpu, 10776 + vcpu->arch.apic_base & X2APIC_ENABLE); 10777 + } 10764 10778 10765 10779 /* This is needed for same reason as it was needed in prepare_vmcs02 */ 10766 10780 vmx->host_rsp = 0;
+1
include/linux/irqchip/arm-gic-v3.h
··· 337 337 */ 338 338 #define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 339 339 #define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 340 + #define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 340 341 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 341 342 #define E_ITS_MAPD_DEVICE_OOR 0x010801 342 343 #define E_ITS_MAPC_PROCNUM_OOR 0x010902
+10 -1
virt/kvm/arm/arch_timer.c
··· 33 33 static struct timecounter *timecounter; 34 34 static struct workqueue_struct *wqueue; 35 35 static unsigned int host_vtimer_irq; 36 + static u32 host_vtimer_irq_flags; 36 37 37 38 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 38 39 { ··· 366 365 367 366 static void kvm_timer_init_interrupt(void *info) 368 367 { 369 - enable_percpu_irq(host_vtimer_irq, 0); 368 + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 370 369 } 371 370 372 371 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) ··· 432 431 return -ENODEV; 433 432 } 434 433 host_vtimer_irq = info->virtual_irq; 434 + 435 + host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); 436 + if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && 437 + host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { 438 + kvm_err("Invalid trigger for IRQ%d, assuming level low\n", 439 + host_vtimer_irq); 440 + host_vtimer_irq_flags = IRQF_TRIGGER_LOW; 441 + } 435 442 436 443 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 437 444 "kvm guest timer", kvm_get_running_vcpus());
+113 -43
virt/kvm/arm/vgic/vgic-its.c
··· 51 51 52 52 irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); 53 53 if (!irq) 54 - return NULL; 54 + return ERR_PTR(-ENOMEM); 55 55 56 56 INIT_LIST_HEAD(&irq->lpi_list); 57 57 INIT_LIST_HEAD(&irq->ap_list); ··· 441 441 * Find the target VCPU and the LPI number for a given devid/eventid pair 442 442 * and make this IRQ pending, possibly injecting it. 443 443 * Must be called with the its_lock mutex held. 444 + * Returns 0 on success, a positive error value for any ITS mapping 445 + * related errors and negative error values for generic errors. 444 446 */ 445 - static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, 446 - u32 devid, u32 eventid) 447 + static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, 448 + u32 devid, u32 eventid) 447 449 { 450 + struct kvm_vcpu *vcpu; 448 451 struct its_itte *itte; 449 452 450 453 if (!its->enabled) 451 - return; 454 + return -EBUSY; 452 455 453 456 itte = find_itte(its, devid, eventid); 454 - /* Triggering an unmapped IRQ gets silently dropped. */ 455 - if (itte && its_is_collection_mapped(itte->collection)) { 456 - struct kvm_vcpu *vcpu; 457 + if (!itte || !its_is_collection_mapped(itte->collection)) 458 + return E_ITS_INT_UNMAPPED_INTERRUPT; 457 459 458 - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); 459 - if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) { 460 - spin_lock(&itte->irq->irq_lock); 461 - itte->irq->pending = true; 462 - vgic_queue_irq_unlock(kvm, itte->irq); 463 - } 464 - } 460 + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); 461 + if (!vcpu) 462 + return E_ITS_INT_UNMAPPED_INTERRUPT; 463 + 464 + if (!vcpu->arch.vgic_cpu.lpis_enabled) 465 + return -EBUSY; 466 + 467 + spin_lock(&itte->irq->irq_lock); 468 + itte->irq->pending = true; 469 + vgic_queue_irq_unlock(kvm, itte->irq); 470 + 471 + return 0; 472 + } 473 + 474 + static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) 475 + { 476 + struct vgic_io_device *iodev; 477 + 478 + if (dev->ops != &kvm_io_gic_ops) 479 + return NULL; 480 + 481 + iodev = container_of(dev, struct vgic_io_device, dev); 482 + 483 + if (iodev->iodev_type != IODEV_ITS) 484 + return NULL; 485 + 486 + return iodev; 465 487 } 466 488 467 489 /* 468 490 * Queries the KVM IO bus framework to get the ITS pointer from the given 469 491 * doorbell address. 470 492 * We then call vgic_its_trigger_msi() with the decoded data. 493 + * According to the KVM_SIGNAL_MSI API description returns 1 on success. 471 494 */ 472 495 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) 473 496 { 474 497 u64 address; 475 498 struct kvm_io_device *kvm_io_dev; 476 499 struct vgic_io_device *iodev; 500 + int ret; 477 501 478 502 if (!vgic_has_its(kvm)) 479 503 return -ENODEV; ··· 509 485 510 486 kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); 511 487 if (!kvm_io_dev) 512 - return -ENODEV; 488 + return -EINVAL; 513 489 514 - iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); 490 + iodev = vgic_get_its_iodev(kvm_io_dev); 491 + if (!iodev) 492 + return -EINVAL; 515 493 516 494 mutex_lock(&iodev->its->its_lock); 517 - vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); 495 + ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); 518 496 mutex_unlock(&iodev->its->its_lock); 519 497 520 - return 0; 498 + if (ret < 0) 499 + return ret; 500 + 501 + /* 502 + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 503 + * if the guest has blocked the MSI. So we map any LPI mapping 504 + * related error to that. 505 + */ 506 + if (ret) 507 + return 0; 508 + else 509 + return 1; 521 510 } 522 511 523 512 /* Requires the its_lock to be held. */ ··· 539 502 list_del(&itte->itte_list); 540 503 541 504 /* This put matches the get in vgic_add_lpi. */ 542 - vgic_put_irq(kvm, itte->irq); 505 + if (itte->irq) 506 + vgic_put_irq(kvm, itte->irq); 543 507 544 508 kfree(itte); 545 509 } ··· 735 697 struct its_device *device; 736 698 struct its_collection *collection, *new_coll = NULL; 737 699 int lpi_nr; 700 + struct vgic_irq *irq; 738 701 739 702 device = find_its_device(its, device_id); 740 703 if (!device) ··· 749 710 lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) 750 711 return E_ITS_MAPTI_PHYSICALID_OOR; 751 712 713 + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ 714 + if (find_itte(its, device_id, event_id)) 715 + return 0; 716 + 752 717 collection = find_collection(its, coll_id); 753 718 if (!collection) { 754 719 int ret = vgic_its_alloc_collection(its, &collection, coll_id); ··· 761 718 new_coll = collection; 762 719 } 763 720 764 - itte = find_itte(its, device_id, event_id); 721 + itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); 765 722 if (!itte) { 766 - itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); 767 - if (!itte) { 768 - if (new_coll) 769 - vgic_its_free_collection(its, coll_id); 770 - return -ENOMEM; 771 - } 772 - 773 - itte->event_id = event_id; 774 - list_add_tail(&itte->itte_list, &device->itt_head); 723 + if (new_coll) 724 + vgic_its_free_collection(its, coll_id); 725 + return -ENOMEM; 775 726 } 727 + 728 + itte->event_id = event_id; 729 + list_add_tail(&itte->itte_list, &device->itt_head); 776 730 777 731 itte->collection = collection; 778 732 itte->lpi = lpi_nr; 779 - itte->irq = vgic_add_lpi(kvm, lpi_nr); 733 + 734 + irq = vgic_add_lpi(kvm, lpi_nr); 735 + if (IS_ERR(irq)) { 736 + if (new_coll) 737 + vgic_its_free_collection(its, coll_id); 738 + its_free_itte(kvm, itte); 739 + return PTR_ERR(irq); 740 + } 741 + itte->irq = irq; 742 + 780 743 update_affinity_itte(kvm, itte); 781 744 782 745 /* ··· 1030 981 u32 msi_data = its_cmd_get_id(its_cmd); 1031 982 u64 msi_devid = its_cmd_get_deviceid(its_cmd); 1032 983 1033 - vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); 1034 - 1035 - return 0; 984 + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); 1036 985 } 1037 986 1038 987 /* ··· 1335 1288 its_sync_lpi_pending_table(vcpu); 1336 1289 } 1337 1290 1338 - static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) 1291 + static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) 1339 1292 { 1340 1293 struct vgic_io_device *iodev = &its->iodev; 1341 1294 int ret; 1342 1295 1343 - if (its->initialized) 1344 - return 0; 1296 + if (!its->initialized) 1297 + return -EBUSY; 1345 1298 1346 1299 if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) 1347 1300 return -ENXIO; ··· 1357 1310 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, 1358 1311 KVM_VGIC_V3_ITS_SIZE, &iodev->dev); 1359 1312 mutex_unlock(&kvm->slots_lock); 1360 - 1361 - if (!ret) 1362 - its->initialized = true; 1363 1313 1364 1314 return ret; 1365 1315 } ··· 1479 1435 if (type != KVM_VGIC_ITS_ADDR_TYPE) 1480 1436 return -ENODEV; 1481 1437 1482 - if (its->initialized) 1483 - return -EBUSY; 1484 - 1485 1438 if (copy_from_user(&addr, uaddr, sizeof(addr))) 1486 1439 return -EFAULT; 1487 1440 ··· 1494 1453 case KVM_DEV_ARM_VGIC_GRP_CTRL: 1495 1454 switch (attr->attr) { 1496 1455 case KVM_DEV_ARM_VGIC_CTRL_INIT: 1497 - return vgic_its_init_its(dev->kvm, its); 1456 + its->initialized = true; 1457 + 1458 + return 0; 1498 1459 } 1499 1460 break; 1500 1461 } ··· 1540 1497 { 1541 1498 return kvm_register_device_ops(&kvm_arm_vgic_its_ops, 1542 1499 KVM_DEV_TYPE_ARM_VGIC_ITS); 1500 + } 1501 + 1502 + /* 1503 + * Registers all ITSes with the kvm_io_bus framework. 1504 + * To follow the existing VGIC initialization sequence, this has to be 1505 + * done as late as possible, just before the first VCPU runs. 1506 + */ 1507 + int vgic_register_its_iodevs(struct kvm *kvm) 1508 + { 1509 + struct kvm_device *dev; 1510 + int ret = 0; 1511 + 1512 + list_for_each_entry(dev, &kvm->devices, vm_node) { 1513 + if (dev->ops != &kvm_arm_vgic_its_ops) 1514 + continue; 1515 + 1516 + ret = vgic_register_its_iodev(kvm, dev->private); 1517 + if (ret) 1518 + return ret; 1519 + /* 1520 + * We don't need to care about tearing down previously 1521 + * registered ITSes, as the kvm_io_bus framework removes 1522 + * them for us if the VM gets destroyed. 1523 + */ 1524 + } 1525 + 1526 + return ret; 1543 1527 }
+16 -10
virt/kvm/arm/vgic/vgic-mmio-v3.c
··· 306 306 { 307 307 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 308 308 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 309 - u64 propbaser = dist->propbaser; 309 + u64 old_propbaser, propbaser; 310 310 311 311 /* Storing a value with LPIs already enabled is undefined */ 312 312 if (vgic_cpu->lpis_enabled) 313 313 return; 314 314 315 - propbaser = update_64bit_reg(propbaser, addr & 4, len, val); 316 - propbaser = vgic_sanitise_propbaser(propbaser); 317 - 318 - dist->propbaser = propbaser; 315 + do { 316 + old_propbaser = dist->propbaser; 317 + propbaser = old_propbaser; 318 + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); 319 + propbaser = vgic_sanitise_propbaser(propbaser); 320 + } while (cmpxchg64(&dist->propbaser, old_propbaser, 321 + propbaser) != old_propbaser); 319 322 } 320 323 321 324 static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, ··· 334 331 unsigned long val) 335 332 { 336 333 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 337 - u64 pendbaser = vgic_cpu->pendbaser; 334 + u64 old_pendbaser, pendbaser; 338 335 339 336 /* Storing a value with LPIs already enabled is undefined */ 340 337 if (vgic_cpu->lpis_enabled) 341 338 return; 342 339 343 - pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); 344 - pendbaser = vgic_sanitise_pendbaser(pendbaser); 345 - 346 - vgic_cpu->pendbaser = pendbaser; 340 + do { 341 + old_pendbaser = vgic_cpu->pendbaser; 342 + pendbaser = old_pendbaser; 343 + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); 344 + pendbaser = vgic_sanitise_pendbaser(pendbaser); 345 + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, 346 + pendbaser) != old_pendbaser); 347 347 } 348 348 349 349 /*
+8
virt/kvm/arm/vgic/vgic-v3.c
··· 289 289 goto out; 290 290 } 291 291 292 + if (vgic_has_its(kvm)) { 293 + ret = vgic_register_its_iodevs(kvm); 294 + if (ret) { 295 + kvm_err("Unable to register VGIC ITS MMIO regions\n"); 296 + goto out; 297 + } 298 + } 299 + 292 300 dist->ready = true; 293 301 294 302 out:
+6 -6
virt/kvm/arm/vgic/vgic.c
··· 117 117 118 118 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 119 119 { 120 - struct vgic_dist *dist; 120 + struct vgic_dist *dist = &kvm->arch.vgic; 121 121 122 122 if (irq->intid < VGIC_MIN_LPI) 123 123 return; 124 124 125 - if (!kref_put(&irq->refcount, vgic_irq_release)) 126 - return; 127 - 128 - dist = &kvm->arch.vgic; 129 - 130 125 spin_lock(&dist->lpi_list_lock); 126 + if (!kref_put(&irq->refcount, vgic_irq_release)) { 127 + spin_unlock(&dist->lpi_list_lock); 128 + return; 129 + }; 130 + 131 131 list_del(&irq->lpi_list); 132 132 dist->lpi_list_count--; 133 133 spin_unlock(&dist->lpi_list_lock);
+6
virt/kvm/arm/vgic/vgic.h
··· 84 84 int vgic_v3_probe(const struct gic_kvm_info *info); 85 85 int vgic_v3_map_resources(struct kvm *kvm); 86 86 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); 87 + int vgic_register_its_iodevs(struct kvm *kvm); 87 88 bool vgic_has_its(struct kvm *kvm); 88 89 int kvm_vgic_register_its_device(void); 89 90 void vgic_enable_lpis(struct kvm_vcpu *vcpu); ··· 137 136 138 137 static inline int vgic_register_redist_iodevs(struct kvm *kvm, 139 138 gpa_t dist_base_address) 139 + { 140 + return -ENODEV; 141 + } 142 + 143 + static inline int vgic_register_its_iodevs(struct kvm *kvm) 140 144 { 141 145 return -ENODEV; 142 146 }