Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"Fixes for PPC and s390"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: PPC: Book3S HV: Restore SPRG3 in kvmhv_p9_guest_entry()
KVM: PPC: Book3S HV: Fix lockdep warning when entering guest on POWER9
KVM: PPC: Book3S HV: XIVE: Fix page offset when clearing ESB pages
KVM: PPC: Book3S HV: XIVE: Take the srcu read lock when accessing memslots
KVM: PPC: Book3S HV: XIVE: Do not clear IRQ data of passthrough interrupts
KVM: PPC: Book3S HV: XIVE: Introduce a new mutex for the XIVE device
KVM: PPC: Book3S HV: XIVE: Fix the enforced limit on the vCPU identifier
KVM: PPC: Book3S HV: XIVE: Do not test the EQ flag validity when resetting
KVM: PPC: Book3S HV: XIVE: Clear file mapping when device is released
KVM: PPC: Book3S HV: Don't take kvm->lock around kvm_for_each_vcpu
KVM: PPC: Book3S: Use new mutex to synchronize access to rtas token list
KVM: PPC: Book3S HV: Use new mutex to synchronize MMU setup
KVM: PPC: Book3S HV: Avoid touching arch.mmu_ready in XIVE release functions
KVM: s390: Do not report unusabled IDs via KVM_CAP_MAX_VCPU_ID
kvm: fix compile on s390 part 2

+157 -117
+3
arch/mips/kvm/mips.c
··· 1122 1122 case KVM_CAP_MAX_VCPUS: 1123 1123 r = KVM_MAX_VCPUS; 1124 1124 break; 1125 + case KVM_CAP_MAX_VCPU_ID: 1126 + r = KVM_MAX_VCPU_ID; 1127 + break; 1125 1128 case KVM_CAP_MIPS_FPU: 1126 1129 /* We don't handle systems with inconsistent cpu_has_fpu */ 1127 1130 r = !!raw_cpu_has_fpu;
+2
arch/powerpc/include/asm/kvm_host.h
··· 309 309 #ifdef CONFIG_PPC_BOOK3S_64 310 310 struct list_head spapr_tce_tables; 311 311 struct list_head rtas_tokens; 312 + struct mutex rtas_token_lock; 312 313 DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); 313 314 #endif 314 315 #ifdef CONFIG_KVM_MPIC ··· 326 325 #endif 327 326 struct kvmppc_ops *kvm_ops; 328 327 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 328 + struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */ 329 329 u64 l1_ptcr; 330 330 int max_nested_lpid; 331 331 struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
+1
arch/powerpc/kvm/book3s.c
··· 902 902 #ifdef CONFIG_PPC64 903 903 INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables); 904 904 INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 905 + mutex_init(&kvm->arch.rtas_token_lock); 905 906 #endif 906 907 907 908 return kvm->arch.kvm_ops->init_vm(kvm);
+18 -18
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 63 63 struct work_struct work; 64 64 u32 order; 65 65 66 - /* These fields protected by kvm->lock */ 66 + /* These fields protected by kvm->arch.mmu_setup_lock */ 67 67 68 68 /* Possible values and their usage: 69 69 * <0 an error occurred during allocation, ··· 73 73 int error; 74 74 75 75 /* Private to the work thread, until error != -EBUSY, 76 - * then protected by kvm->lock. 76 + * then protected by kvm->arch.mmu_setup_lock. 77 77 */ 78 78 struct kvm_hpt_info hpt; 79 79 }; ··· 139 139 long err = -EBUSY; 140 140 struct kvm_hpt_info info; 141 141 142 - mutex_lock(&kvm->lock); 142 + mutex_lock(&kvm->arch.mmu_setup_lock); 143 143 if (kvm->arch.mmu_ready) { 144 144 kvm->arch.mmu_ready = 0; 145 145 /* order mmu_ready vs. vcpus_running */ ··· 183 183 /* Ensure that each vcpu will flush its TLB on next entry. */ 184 184 cpumask_setall(&kvm->arch.need_tlb_flush); 185 185 186 - mutex_unlock(&kvm->lock); 186 + mutex_unlock(&kvm->arch.mmu_setup_lock); 187 187 return err; 188 188 } 189 189 ··· 1447 1447 1448 1448 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) 1449 1449 { 1450 - if (WARN_ON(!mutex_is_locked(&kvm->lock))) 1450 + if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock))) 1451 1451 return; 1452 1452 1453 1453 if (!resize) ··· 1474 1474 if (WARN_ON(resize->error != -EBUSY)) 1475 1475 return; 1476 1476 1477 - mutex_lock(&kvm->lock); 1477 + mutex_lock(&kvm->arch.mmu_setup_lock); 1478 1478 1479 1479 /* Request is still current? */ 1480 1480 if (kvm->arch.resize_hpt == resize) { 1481 1481 /* We may request large allocations here: 1482 - * do not sleep with kvm->lock held for a while. 1482 + * do not sleep with kvm->arch.mmu_setup_lock held for a while. 1483 1483 */ 1484 - mutex_unlock(&kvm->lock); 1484 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1485 1485 1486 1486 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", 1487 1487 resize->order); ··· 1494 1494 if (WARN_ON(err == -EBUSY)) 1495 1495 err = -EINPROGRESS; 1496 1496 1497 - mutex_lock(&kvm->lock); 1497 + mutex_lock(&kvm->arch.mmu_setup_lock); 1498 1498 /* It is possible that kvm->arch.resize_hpt != resize 1499 - * after we grab kvm->lock again. 1499 + * after we grab kvm->arch.mmu_setup_lock again. 1500 1500 */ 1501 1501 } 1502 1502 ··· 1505 1505 if (kvm->arch.resize_hpt != resize) 1506 1506 resize_hpt_release(kvm, resize); 1507 1507 1508 - mutex_unlock(&kvm->lock); 1508 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1509 1509 } 1510 1510 1511 1511 long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ··· 1522 1522 if (shift && ((shift < 18) || (shift > 46))) 1523 1523 return -EINVAL; 1524 1524 1525 - mutex_lock(&kvm->lock); 1525 + mutex_lock(&kvm->arch.mmu_setup_lock); 1526 1526 1527 1527 resize = kvm->arch.resize_hpt; 1528 1528 ··· 1565 1565 ret = 100; /* estimated time in ms */ 1566 1566 1567 1567 out: 1568 - mutex_unlock(&kvm->lock); 1568 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1569 1569 return ret; 1570 1570 } 1571 1571 ··· 1588 1588 if (shift && ((shift < 18) || (shift > 46))) 1589 1589 return -EINVAL; 1590 1590 1591 - mutex_lock(&kvm->lock); 1591 + mutex_lock(&kvm->arch.mmu_setup_lock); 1592 1592 1593 1593 resize = kvm->arch.resize_hpt; 1594 1594 ··· 1625 1625 smp_mb(); 1626 1626 out_no_hpt: 1627 1627 resize_hpt_release(kvm, resize); 1628 - mutex_unlock(&kvm->lock); 1628 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1629 1629 return ret; 1630 1630 } 1631 1631 ··· 1868 1868 return -EINVAL; 1869 1869 1870 1870 /* lock out vcpus from running while we're doing this */ 1871 - mutex_lock(&kvm->lock); 1871 + mutex_lock(&kvm->arch.mmu_setup_lock); 1872 1872 mmu_ready = kvm->arch.mmu_ready; 1873 1873 if (mmu_ready) { 1874 1874 kvm->arch.mmu_ready = 0; /* temporarily */ ··· 1876 1876 smp_mb(); 1877 1877 if (atomic_read(&kvm->arch.vcpus_running)) { 1878 1878 kvm->arch.mmu_ready = 1; 1879 - mutex_unlock(&kvm->lock); 1879 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1880 1880 return -EBUSY; 1881 1881 } 1882 1882 } ··· 1963 1963 /* Order HPTE updates vs. mmu_ready */ 1964 1964 smp_wmb(); 1965 1965 kvm->arch.mmu_ready = mmu_ready; 1966 - mutex_unlock(&kvm->lock); 1966 + mutex_unlock(&kvm->arch.mmu_setup_lock); 1967 1967 1968 1968 if (err) 1969 1969 return err;
+30 -18
arch/powerpc/kvm/book3s_hv.c
··· 446 446 447 447 static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 448 448 { 449 - struct kvm_vcpu *ret; 450 - 451 - mutex_lock(&kvm->lock); 452 - ret = kvm_get_vcpu_by_id(kvm, id); 453 - mutex_unlock(&kvm->lock); 454 - return ret; 449 + return kvm_get_vcpu_by_id(kvm, id); 455 450 } 456 451 457 452 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) ··· 1578 1583 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1579 1584 u64 mask; 1580 1585 1581 - mutex_lock(&kvm->lock); 1582 1586 spin_lock(&vc->lock); 1583 1587 /* 1584 1588 * If ILE (interrupt little-endian) has changed, update the ··· 1617 1623 mask &= 0xFFFFFFFF; 1618 1624 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); 1619 1625 spin_unlock(&vc->lock); 1620 - mutex_unlock(&kvm->lock); 1621 1626 } 1622 1627 1623 1628 static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, ··· 2331 2338 pr_devel("KVM: collision on id %u", id); 2332 2339 vcore = NULL; 2333 2340 } else if (!vcore) { 2341 + /* 2342 + * Take mmu_setup_lock for mutual exclusion 2343 + * with kvmppc_update_lpcr(). 2344 + */ 2334 2345 err = -ENOMEM; 2335 2346 vcore = kvmppc_vcore_create(kvm, 2336 2347 id & ~(kvm->arch.smt_mode - 1)); 2348 + mutex_lock(&kvm->arch.mmu_setup_lock); 2337 2349 kvm->arch.vcores[core] = vcore; 2338 2350 kvm->arch.online_vcores++; 2351 + mutex_unlock(&kvm->arch.mmu_setup_lock); 2339 2352 } 2340 2353 } 2341 2354 mutex_unlock(&kvm->lock); ··· 3662 3663 vc->in_guest = 0; 3663 3664 3664 3665 mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); 3666 + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); 3665 3667 3666 3668 kvmhv_load_host_pmu(); 3667 3669 ··· 3859 3859 int r = 0; 3860 3860 struct kvm *kvm = vcpu->kvm; 3861 3861 3862 - mutex_lock(&kvm->lock); 3862 + mutex_lock(&kvm->arch.mmu_setup_lock); 3863 3863 if (!kvm->arch.mmu_ready) { 3864 3864 if (!kvm_is_radix(kvm)) 3865 3865 r = kvmppc_hv_setup_htab_rma(vcpu); ··· 3869 3869 kvm->arch.mmu_ready = 1; 3870 3870 } 3871 3871 } 3872 - mutex_unlock(&kvm->lock); 3872 + mutex_unlock(&kvm->arch.mmu_setup_lock); 3873 3873 return r; 3874 3874 } 3875 3875 ··· 4091 4091 kvmppc_check_need_tlb_flush(kvm, pcpu, nested); 4092 4092 } 4093 4093 4094 - trace_hardirqs_on(); 4095 4094 guest_enter_irqoff(); 4096 4095 4097 4096 srcu_idx = srcu_read_lock(&kvm->srcu); 4098 4097 4099 4098 this_cpu_disable_ftrace(); 4100 4099 4100 + /* Tell lockdep that we're about to enable interrupts */ 4101 + trace_hardirqs_on(); 4102 + 4101 4103 trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); 4102 4104 vcpu->arch.trap = trap; 4105 + 4106 + trace_hardirqs_off(); 4103 4107 4104 4108 this_cpu_enable_ftrace(); 4105 4109 ··· 4114 4110 isync(); 4115 4111 } 4116 4112 4117 - trace_hardirqs_off(); 4118 4113 set_irq_happened(trap); 4119 4114 4120 4115 kvmppc_set_host_core(pcpu); ··· 4481 4478 4482 4479 /* 4483 4480 * Update LPCR values in kvm->arch and in vcores. 4484 - * Caller must hold kvm->lock. 4481 + * Caller must hold kvm->arch.mmu_setup_lock (for mutual exclusion 4482 + * of kvm->arch.lpcr update). 4485 4483 */ 4486 4484 void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) 4487 4485 { ··· 4534 4530 4535 4531 /* 4536 4532 * Set up HPT (hashed page table) and RMA (real-mode area). 4537 - * Must be called with kvm->lock held. 4533 + * Must be called with kvm->arch.mmu_setup_lock held. 4538 4534 */ 4539 4535 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 4540 4536 { ··· 4622 4618 goto out_srcu; 4623 4619 } 4624 4620 4625 - /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ 4621 + /* 4622 + * Must be called with kvm->arch.mmu_setup_lock held and 4623 + * mmu_ready = 0 and no vcpus running. 4624 + */ 4626 4625 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) 4627 4626 { 4628 4627 if (nesting_enabled(kvm)) ··· 4642 4635 return 0; 4643 4636 } 4644 4637 4645 - /* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */ 4638 + /* 4639 + * Must be called with kvm->arch.mmu_setup_lock held and 4640 + * mmu_ready = 0 and no vcpus running. 4641 + */ 4646 4642 int kvmppc_switch_mmu_to_radix(struct kvm *kvm) 4647 4643 { 4648 4644 int err; ··· 4749 4739 unsigned long lpcr, lpid; 4750 4740 char buf[32]; 4751 4741 int ret; 4742 + 4743 + mutex_init(&kvm->arch.mmu_setup_lock); 4752 4744 4753 4745 /* Allocate the guest's logical partition ID */ 4754 4746 ··· 5277 5265 if (kvmhv_on_pseries() && !radix) 5278 5266 return -EINVAL; 5279 5267 5280 - mutex_lock(&kvm->lock); 5268 + mutex_lock(&kvm->arch.mmu_setup_lock); 5281 5269 if (radix != kvm_is_radix(kvm)) { 5282 5270 if (kvm->arch.mmu_ready) { 5283 5271 kvm->arch.mmu_ready = 0; ··· 5305 5293 err = 0; 5306 5294 5307 5295 out_unlock: 5308 - mutex_unlock(&kvm->lock); 5296 + mutex_unlock(&kvm->arch.mmu_setup_lock); 5309 5297 return err; 5310 5298 } 5311 5299
+6 -8
arch/powerpc/kvm/book3s_rtas.c
··· 146 146 { 147 147 struct rtas_token_definition *d, *tmp; 148 148 149 - lockdep_assert_held(&kvm->lock); 149 + lockdep_assert_held(&kvm->arch.rtas_token_lock); 150 150 151 151 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { 152 152 if (rtas_name_matches(d->handler->name, name)) { ··· 167 167 bool found; 168 168 int i; 169 169 170 - lockdep_assert_held(&kvm->lock); 170 + lockdep_assert_held(&kvm->arch.rtas_token_lock); 171 171 172 172 list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { 173 173 if (d->token == token) ··· 206 206 if (copy_from_user(&args, argp, sizeof(args))) 207 207 return -EFAULT; 208 208 209 - mutex_lock(&kvm->lock); 209 + mutex_lock(&kvm->arch.rtas_token_lock); 210 210 211 211 if (args.token) 212 212 rc = rtas_token_define(kvm, args.name, args.token); 213 213 else 214 214 rc = rtas_token_undefine(kvm, args.name); 215 215 216 - mutex_unlock(&kvm->lock); 216 + mutex_unlock(&kvm->arch.rtas_token_lock); 217 217 218 218 return rc; 219 219 } ··· 245 245 orig_rets = args.rets; 246 246 args.rets = &args.args[be32_to_cpu(args.nargs)]; 247 247 248 - mutex_lock(&vcpu->kvm->lock); 248 + mutex_lock(&vcpu->kvm->arch.rtas_token_lock); 249 249 250 250 rc = -ENOENT; 251 251 list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { ··· 256 256 } 257 257 } 258 258 259 - mutex_unlock(&vcpu->kvm->lock); 259 + mutex_unlock(&vcpu->kvm->arch.rtas_token_lock); 260 260 261 261 if (rc == 0) { 262 262 args.rets = orig_rets; ··· 281 281 void kvmppc_rtas_tokens_free(struct kvm *kvm) 282 282 { 283 283 struct rtas_token_definition *d, *tmp; 284 - 285 - lockdep_assert_held(&kvm->lock); 286 284 287 285 list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { 288 286 list_del(&d->list);
+27 -28
arch/powerpc/kvm/book3s_xive.c
··· 271 271 return rc; 272 272 } 273 273 274 - /* Called with kvm_lock held */ 274 + /* Called with xive->lock held */ 275 275 static int xive_check_provisioning(struct kvm *kvm, u8 prio) 276 276 { 277 277 struct kvmppc_xive *xive = kvm->arch.xive; 278 278 struct kvm_vcpu *vcpu; 279 279 int i, rc; 280 280 281 - lockdep_assert_held(&kvm->lock); 281 + lockdep_assert_held(&xive->lock); 282 282 283 283 /* Already provisioned ? */ 284 284 if (xive->qmap & (1 << prio)) ··· 621 621 irq, server, priority); 622 622 623 623 /* First, check provisioning of queues */ 624 - if (priority != MASKED) 624 + if (priority != MASKED) { 625 + mutex_lock(&xive->lock); 625 626 rc = xive_check_provisioning(xive->kvm, 626 627 xive_prio_from_guest(priority)); 628 + mutex_unlock(&xive->lock); 629 + } 627 630 if (rc) { 628 631 pr_devel(" provisioning failure %d !\n", rc); 629 632 return rc; ··· 1202 1199 return -ENOMEM; 1203 1200 1204 1201 /* We need to synchronize with queue provisioning */ 1205 - mutex_lock(&vcpu->kvm->lock); 1202 + mutex_lock(&xive->lock); 1206 1203 vcpu->arch.xive_vcpu = xc; 1207 1204 xc->xive = xive; 1208 1205 xc->vcpu = vcpu; ··· 1286 1283 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00); 1287 1284 1288 1285 bail: 1289 - mutex_unlock(&vcpu->kvm->lock); 1286 + mutex_unlock(&xive->lock); 1290 1287 if (r) { 1291 1288 kvmppc_xive_cleanup_vcpu(vcpu); 1292 1289 return r; ··· 1530 1527 struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( 1531 1528 struct kvmppc_xive *xive, int irq) 1532 1529 { 1533 - struct kvm *kvm = xive->kvm; 1534 1530 struct kvmppc_xive_src_block *sb; 1535 1531 int i, bid; 1536 1532 1537 1533 bid = irq >> KVMPPC_XICS_ICS_SHIFT; 1538 1534 1539 - mutex_lock(&kvm->lock); 1535 + mutex_lock(&xive->lock); 1540 1536 1541 1537 /* block already exists - somebody else got here first */ 1542 1538 if (xive->src_blocks[bid]) ··· 1562 1560 xive->max_sbid = bid; 1563 1561 1564 1562 out: 1565 - mutex_unlock(&kvm->lock); 1563 + mutex_unlock(&xive->lock); 1566 1564 return xive->src_blocks[bid]; 1567 1565 } 1568 1566 ··· 1672 1670 /* If we have a priority target the interrupt */ 1673 1671 if (act_prio != MASKED) { 1674 1672 /* First, check provisioning of queues */ 1675 - mutex_lock(&xive->kvm->lock); 1673 + mutex_lock(&xive->lock); 1676 1674 rc = xive_check_provisioning(xive->kvm, act_prio); 1677 - mutex_unlock(&xive->kvm->lock); 1675 + mutex_unlock(&xive->lock); 1678 1676 1679 1677 /* Target interrupt */ 1680 1678 if (rc == 0) ··· 1828 1826 { 1829 1827 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); 1830 1828 xive_native_configure_irq(hw_num, 0, MASKED, 0); 1831 - xive_cleanup_irq_data(xd); 1832 1829 } 1833 1830 1834 1831 void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) ··· 1841 1840 continue; 1842 1841 1843 1842 kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data); 1843 + xive_cleanup_irq_data(&state->ipi_data); 1844 1844 xive_native_free_irq(state->ipi_number); 1845 1845 1846 - /* Pass-through, cleanup too */ 1846 + /* Pass-through, cleanup too but keep IRQ hw data */ 1847 1847 if (state->pt_number) 1848 1848 kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data); 1849 1849 ··· 1861 1859 struct kvm *kvm = xive->kvm; 1862 1860 struct kvm_vcpu *vcpu; 1863 1861 int i; 1864 - int was_ready; 1865 1862 1866 1863 pr_devel("Releasing xive device\n"); 1867 1864 1868 - debugfs_remove(xive->dentry); 1869 - 1870 1865 /* 1871 - * Clearing mmu_ready temporarily while holding kvm->lock 1872 - * is a way of ensuring that no vcpus can enter the guest 1873 - * until we drop kvm->lock. Doing kick_all_cpus_sync() 1874 - * ensures that any vcpu executing inside the guest has 1875 - * exited the guest. Once kick_all_cpus_sync() has finished, 1876 - * we know that no vcpu can be executing the XIVE push or 1877 - * pull code, or executing a XICS hcall. 1878 - * 1879 1866 * Since this is the device release function, we know that 1880 1867 * userspace does not have any open fd referring to the 1881 1868 * device. Therefore there can not be any of the device ··· 1872 1881 * and similarly, the connect_vcpu and set/clr_mapped 1873 1882 * functions also cannot be being executed. 1874 1883 */ 1875 - was_ready = kvm->arch.mmu_ready; 1876 - kvm->arch.mmu_ready = 0; 1877 - kick_all_cpus_sync(); 1884 + 1885 + debugfs_remove(xive->dentry); 1878 1886 1879 1887 /* 1880 1888 * We should clean up the vCPU interrupt presenters first. ··· 1882 1892 /* 1883 1893 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1884 1894 * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. 1895 + * Holding the vcpu->mutex also means that the vcpu cannot 1896 + * be executing the KVM_RUN ioctl, and therefore it cannot 1897 + * be executing the XIVE push or pull code or accessing 1898 + * the XIVE MMIO regions. 1885 1899 */ 1886 1900 mutex_lock(&vcpu->mutex); 1887 1901 kvmppc_xive_cleanup_vcpu(vcpu); 1888 1902 mutex_unlock(&vcpu->mutex); 1889 1903 } 1890 1904 1905 + /* 1906 + * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1907 + * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1908 + * against xive code getting called during vcpu execution or 1909 + * set/get one_reg operations. 1910 + */ 1891 1911 kvm->arch.xive = NULL; 1892 1912 1893 1913 /* Mask and free interrupts */ ··· 1910 1910 1911 1911 if (xive->vp_base != XIVE_INVALID_VP) 1912 1912 xive_native_free_vp_block(xive->vp_base); 1913 - 1914 - kvm->arch.mmu_ready = was_ready; 1915 1913 1916 1914 /* 1917 1915 * A reference of the kvmppc_xive pointer is now kept under ··· 1965 1967 dev->private = xive; 1966 1968 xive->dev = dev; 1967 1969 xive->kvm = kvm; 1970 + mutex_init(&xive->lock); 1968 1971 1969 1972 /* Already there ? */ 1970 1973 if (kvm->arch.xive)
+1
arch/powerpc/kvm/book3s_xive.h
··· 141 141 struct kvmppc_xive_ops *ops; 142 142 struct address_space *mapping; 143 143 struct mutex mapping_lock; 144 + struct mutex lock; 144 145 }; 145 146 146 147 #define KVMPPC_XIVE_Q_COUNT 8
+57 -43
arch/powerpc/kvm/book3s_xive_native.c
··· 109 109 return -EPERM; 110 110 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 111 111 return -EBUSY; 112 - if (server_num >= KVM_MAX_VCPUS) { 112 + if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { 113 113 pr_devel("Out of bounds !\n"); 114 114 return -EINVAL; 115 115 } 116 116 117 - mutex_lock(&vcpu->kvm->lock); 117 + mutex_lock(&xive->lock); 118 118 119 119 if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { 120 120 pr_devel("Duplicate !\n"); ··· 159 159 160 160 /* TODO: reset all queues to a clean state ? */ 161 161 bail: 162 - mutex_unlock(&vcpu->kvm->lock); 162 + mutex_unlock(&xive->lock); 163 163 if (rc) 164 164 kvmppc_xive_native_cleanup_vcpu(vcpu); 165 165 ··· 172 172 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 173 173 { 174 174 struct kvmppc_xive *xive = kvm->arch.xive; 175 + pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 175 176 176 177 if (irq >= KVMPPC_XIVE_NR_IRQS) 177 178 return -EINVAL; ··· 186 185 mutex_lock(&xive->mapping_lock); 187 186 if (xive->mapping) 188 187 unmap_mapping_range(xive->mapping, 189 - irq * (2ull << PAGE_SHIFT), 188 + esb_pgoff << PAGE_SHIFT, 190 189 2ull << PAGE_SHIFT, 1); 191 190 mutex_unlock(&xive->mapping_lock); 192 191 return 0; ··· 536 535 struct xive_q *q; 537 536 gfn_t gfn; 538 537 unsigned long page_size; 538 + int srcu_idx; 539 539 540 540 /* 541 541 * Demangle priority/server tuple from the EQ identifier ··· 567 565 __func__, server, priority, kvm_eq.flags, 568 566 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 569 567 570 - /* 571 - * sPAPR specifies a "Unconditional Notify (n) flag" for the 572 - * H_INT_SET_QUEUE_CONFIG hcall which forces notification 573 - * without using the coalescing mechanisms provided by the 574 - * XIVE END ESBs. This is required on KVM as notification 575 - * using the END ESBs is not supported. 576 - */ 577 - if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 578 - pr_err("invalid flags %d\n", kvm_eq.flags); 579 - return -EINVAL; 580 - } 581 - 582 - rc = xive_native_validate_queue_size(kvm_eq.qshift); 583 - if (rc) { 584 - pr_err("invalid queue size %d\n", kvm_eq.qshift); 585 - return rc; 586 - } 587 - 588 568 /* reset queue and disable queueing */ 589 569 if (!kvm_eq.qshift) { 590 570 q->guest_qaddr = 0; ··· 588 604 return 0; 589 605 } 590 606 607 + /* 608 + * sPAPR specifies a "Unconditional Notify (n) flag" for the 609 + * H_INT_SET_QUEUE_CONFIG hcall which forces notification 610 + * without using the coalescing mechanisms provided by the 611 + * XIVE END ESBs. This is required on KVM as notification 612 + * using the END ESBs is not supported. 613 + */ 614 + if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 615 + pr_err("invalid flags %d\n", kvm_eq.flags); 616 + return -EINVAL; 617 + } 618 + 619 + rc = xive_native_validate_queue_size(kvm_eq.qshift); 620 + if (rc) { 621 + pr_err("invalid queue size %d\n", kvm_eq.qshift); 622 + return rc; 623 + } 624 + 591 625 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 592 626 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 593 627 1ull << kvm_eq.qshift); 594 628 return -EINVAL; 595 629 } 596 630 631 + srcu_idx = srcu_read_lock(&kvm->srcu); 597 632 gfn = gpa_to_gfn(kvm_eq.qaddr); 598 633 page = gfn_to_page(kvm, gfn); 599 634 if (is_error_page(page)) { 635 + srcu_read_unlock(&kvm->srcu, srcu_idx); 600 636 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 601 637 return -EINVAL; 602 638 } 603 639 604 640 page_size = kvm_host_page_size(kvm, gfn); 605 641 if (1ull << kvm_eq.qshift > page_size) { 642 + srcu_read_unlock(&kvm->srcu, srcu_idx); 606 643 pr_warn("Incompatible host page size %lx!\n", page_size); 607 644 return -EINVAL; 608 645 } 609 646 610 647 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 648 + srcu_read_unlock(&kvm->srcu, srcu_idx); 611 649 612 650 /* 613 651 * Backup the queue page guest address to the mark EQ page ··· 778 772 779 773 pr_devel("%s\n", __func__); 780 774 781 - mutex_lock(&kvm->lock); 775 + mutex_lock(&xive->lock); 782 776 783 777 kvm_for_each_vcpu(i, vcpu, kvm) { 784 778 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; ··· 816 810 } 817 811 } 818 812 819 - mutex_unlock(&kvm->lock); 813 + mutex_unlock(&xive->lock); 820 814 821 815 return 0; 822 816 } ··· 860 854 { 861 855 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 862 856 unsigned int prio; 857 + int srcu_idx; 863 858 864 859 if (!xc) 865 860 return -ENOENT; ··· 872 865 continue; 873 866 874 867 /* Mark EQ page dirty for migration */ 868 + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 875 869 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 870 + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 876 871 } 877 872 return 0; 878 873 } ··· 887 878 888 879 pr_devel("%s\n", __func__); 889 880 890 - mutex_lock(&kvm->lock); 881 + mutex_lock(&xive->lock); 891 882 for (i = 0; i <= xive->max_sbid; i++) { 892 883 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 893 884 ··· 901 892 kvm_for_each_vcpu(i, vcpu, kvm) { 902 893 kvmppc_xive_native_vcpu_eq_sync(vcpu); 903 894 } 904 - mutex_unlock(&kvm->lock); 895 + mutex_unlock(&xive->lock); 905 896 906 897 return 0; 907 898 } ··· 974 965 } 975 966 976 967 /* 977 - * Called when device fd is closed 968 + * Called when device fd is closed. kvm->lock is held. 978 969 */ 979 970 static void kvmppc_xive_native_release(struct kvm_device *dev) 980 971 { ··· 982 973 struct kvm *kvm = xive->kvm; 983 974 struct kvm_vcpu *vcpu; 984 975 int i; 985 - int was_ready; 986 - 987 - debugfs_remove(xive->dentry); 988 976 989 977 pr_devel("Releasing xive native device\n"); 990 978 991 979 /* 992 - * Clearing mmu_ready temporarily while holding kvm->lock 993 - * is a way of ensuring that no vcpus can enter the guest 994 - * until we drop kvm->lock. Doing kick_all_cpus_sync() 995 - * ensures that any vcpu executing inside the guest has 996 - * exited the guest. Once kick_all_cpus_sync() has finished, 997 - * we know that no vcpu can be executing the XIVE push or 998 - * pull code or accessing the XIVE MMIO regions. 999 - * 980 + * Clear the KVM device file address_space which is used to 981 + * unmap the ESB pages when a device is passed-through. 982 + */ 983 + mutex_lock(&xive->mapping_lock); 984 + xive->mapping = NULL; 985 + mutex_unlock(&xive->mapping_lock); 986 + 987 + /* 1000 988 * Since this is the device release function, we know that 1001 989 * userspace does not have any open fd or mmap referring to 1002 990 * the device. Therefore there can not be any of the ··· 1002 996 * connect_vcpu and set/clr_mapped functions also cannot 1003 997 * be being executed. 1004 998 */ 1005 - was_ready = kvm->arch.mmu_ready; 1006 - kvm->arch.mmu_ready = 0; 1007 - kick_all_cpus_sync(); 999 + 1000 + debugfs_remove(xive->dentry); 1008 1001 1009 1002 /* 1010 1003 * We should clean up the vCPU interrupt presenters first. ··· 1012 1007 /* 1013 1008 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1014 1009 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1010 + * Holding the vcpu->mutex also means that the vcpu cannot 1011 + * be executing the KVM_RUN ioctl, and therefore it cannot 1012 + * be executing the XIVE push or pull code or accessing 1013 + * the XIVE MMIO regions. 1015 1014 */ 1016 1015 mutex_lock(&vcpu->mutex); 1017 1016 kvmppc_xive_native_cleanup_vcpu(vcpu); 1018 1017 mutex_unlock(&vcpu->mutex); 1019 1018 } 1020 1019 1020 + /* 1021 + * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1022 + * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1023 + * against xive code getting called during vcpu execution or 1024 + * set/get one_reg operations. 1025 + */ 1021 1026 kvm->arch.xive = NULL; 1022 1027 1023 1028 for (i = 0; i <= xive->max_sbid; i++) { ··· 1039 1024 1040 1025 if (xive->vp_base != XIVE_INVALID_VP) 1041 1026 xive_native_free_vp_block(xive->vp_base); 1042 - 1043 - kvm->arch.mmu_ready = was_ready; 1044 1027 1045 1028 /* 1046 1029 * A reference of the kvmppc_xive pointer is now kept under ··· 1073 1060 xive->kvm = kvm; 1074 1061 kvm->arch.xive = xive; 1075 1062 mutex_init(&xive->mapping_lock); 1063 + mutex_init(&xive->lock); 1076 1064 1077 1065 /* 1078 1066 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
+3
arch/powerpc/kvm/powerpc.c
··· 657 657 case KVM_CAP_MAX_VCPUS: 658 658 r = KVM_MAX_VCPUS; 659 659 break; 660 + case KVM_CAP_MAX_VCPU_ID: 661 + r = KVM_MAX_VCPU_ID; 662 + break; 660 663 #ifdef CONFIG_PPC_BOOK3S_64 661 664 case KVM_CAP_PPC_GET_SMMU_INFO: 662 665 r = 1;
+1
arch/s390/kvm/kvm-s390.c
··· 539 539 break; 540 540 case KVM_CAP_NR_VCPUS: 541 541 case KVM_CAP_MAX_VCPUS: 542 + case KVM_CAP_MAX_VCPU_ID: 542 543 r = KVM_S390_BSCA_CPU_SLOTS; 543 544 if (!kvm_s390_use_sca_entries()) 544 545 r = KVM_MAX_VCPUS;
+3
arch/x86/kvm/x86.c
··· 3122 3122 case KVM_CAP_MAX_VCPUS: 3123 3123 r = KVM_MAX_VCPUS; 3124 3124 break; 3125 + case KVM_CAP_MAX_VCPU_ID: 3126 + r = KVM_MAX_VCPU_ID; 3127 + break; 3125 3128 case KVM_CAP_PV_MMU: /* obsolete */ 3126 3129 r = 0; 3127 3130 break;
+3
virt/kvm/arm/arm.c
··· 224 224 case KVM_CAP_MAX_VCPUS: 225 225 r = KVM_MAX_VCPUS; 226 226 break; 227 + case KVM_CAP_MAX_VCPU_ID: 228 + r = KVM_MAX_VCPU_ID; 229 + break; 227 230 case KVM_CAP_MSI_DEVID: 228 231 if (!kvm) 229 232 r = -EINVAL;
+2 -2
virt/kvm/kvm_main.c
··· 1795 1795 1796 1796 if (map->page) 1797 1797 kunmap(map->page); 1798 + #ifdef CONFIG_HAS_IOMEM 1798 1799 else 1799 1800 memunmap(map->hva); 1801 + #endif 1800 1802 1801 1803 if (dirty) { 1802 1804 kvm_vcpu_mark_page_dirty(vcpu, map->gfn); ··· 3151 3149 case KVM_CAP_MULTI_ADDRESS_SPACE: 3152 3150 return KVM_ADDRESS_SPACE_NUM; 3153 3151 #endif 3154 - case KVM_CAP_MAX_VCPU_ID: 3155 - return KVM_MAX_VCPU_ID; 3156 3152 case KVM_CAP_NR_MEMSLOTS: 3157 3153 return KVM_USER_MEM_SLOTS; 3158 3154 default: