Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Paolo Bonzini:
"Mostly bugfixes, but also:

- s390 support for KVM selftests

- LAPIC timer offloading to housekeeping CPUs

- Extend an s390 optimization for overcommitted hosts to all
architectures

- Debugging cleanups and improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
KVM: x86: Add fixed counters to PMU filter
KVM: nVMX: do not use dangling shadow VMCS after guest reset
KVM: VMX: dump VMCS on failed entry
KVM: x86/vPMU: refine kvm_pmu err msg when event creation failed
KVM: s390: Use kvm_vcpu_wake_up in kvm_s390_vcpu_wakeup
KVM: Boost vCPUs that are delivering interrupts
KVM: selftests: Remove superfluous define from vmx.c
KVM: SVM: Fix detection of AMD Errata 1096
KVM: LAPIC: Inject timer interrupt via posted interrupt
KVM: LAPIC: Make lapic timer unpinned
KVM: x86/vPMU: reset pmc->counter to 0 for pmu fixed_counters
KVM: nVMX: Ignore segment base for VMX memory operand when segment not FS or GS
kvm: x86: ioapic and apic debug macros cleanup
kvm: x86: some tsc debug cleanup
kvm: vmx: fix coccinelle warnings
x86: kvm: avoid constant-conversion warning
x86: kvm: avoid -Wsometimes-uninitized warning
KVM: x86: expose AVX512_BF16 feature to guest
KVM: selftests: enable pgste option for the linker on s390
KVM: selftests: Move kvm_create_max_vcpus test to generic code
...

+723 -232
+10 -5
Documentation/virtual/kvm/api.txt
··· 4090 4090 Returns: 0 on success, -1 on error 4091 4091 4092 4092 struct kvm_pmu_event_filter { 4093 - __u32 action; 4094 - __u32 nevents; 4095 - __u64 events[0]; 4093 + __u32 action; 4094 + __u32 nevents; 4095 + __u32 fixed_counter_bitmap; 4096 + __u32 flags; 4097 + __u32 pad[4]; 4098 + __u64 events[0]; 4096 4099 }; 4097 4100 4098 4101 This ioctl restricts the set of PMU events that the guest can program. 4099 4102 The argument holds a list of events which will be allowed or denied. 4100 4103 The eventsel+umask of each event the guest attempts to program is compared 4101 4104 against the events field to determine whether the guest should have access. 4102 - This only affects general purpose counters; fixed purpose counters can 4103 - be disabled by changing the perfmon CPUID leaf. 4105 + The events field only controls general purpose counters; fixed purpose 4106 + counters are controlled by the fixed_counter_bitmap. 4107 + 4108 + No flags are defined yet, the field must be zero. 4104 4109 4105 4110 Valid values for 'action': 4106 4111 #define KVM_PMU_EVENT_ALLOW 0
+2
MAINTAINERS
··· 8878 8878 F: arch/s390/include/asm/kvm* 8879 8879 F: arch/s390/kvm/ 8880 8880 F: arch/s390/mm/gmap.c 8881 + F: tools/testing/selftests/kvm/s390x/ 8882 + F: tools/testing/selftests/kvm/*/s390x/ 8881 8883 8882 8884 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) 8883 8885 M: Paolo Bonzini <pbonzini@redhat.com>
+3 -20
arch/s390/kvm/interrupt.c
··· 1224 1224 1225 1225 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) 1226 1226 { 1227 - /* 1228 - * We cannot move this into the if, as the CPU might be already 1229 - * in kvm_vcpu_block without having the waitqueue set (polling) 1230 - */ 1231 1227 vcpu->valid_wakeup = true; 1228 + kvm_vcpu_wake_up(vcpu); 1229 + 1232 1230 /* 1233 - * This is mostly to document, that the read in swait_active could 1234 - * be moved before other stores, leading to subtle races. 1235 - * All current users do not store or use an atomic like update 1236 - */ 1237 - smp_mb__after_atomic(); 1238 - if (swait_active(&vcpu->wq)) { 1239 - /* 1240 - * The vcpu gave up the cpu voluntarily, mark it as a good 1241 - * yield-candidate. 1242 - */ 1243 - vcpu->preempted = true; 1244 - swake_up_one(&vcpu->wq); 1245 - vcpu->stat.halt_wakeup++; 1246 - } 1247 - /* 1248 - * The VCPU might not be sleeping but is executing the VSIE. Let's 1231 + * The VCPU might not be sleeping but rather executing VSIE. Let's 1249 1232 * kick it, so it leaves the SIE to process the request. 1250 1233 */ 1251 1234 kvm_s390_vsie_kick(vcpu);
+6 -3
arch/x86/include/uapi/asm/kvm.h
··· 435 435 436 436 /* for KVM_CAP_PMU_EVENT_FILTER */ 437 437 struct kvm_pmu_event_filter { 438 - __u32 action; 439 - __u32 nevents; 440 - __u64 events[0]; 438 + __u32 action; 439 + __u32 nevents; 440 + __u32 fixed_counter_bitmap; 441 + __u32 flags; 442 + __u32 pad[4]; 443 + __u64 events[0]; 441 444 }; 442 445 443 446 #define KVM_PMU_EVENT_ALLOW 0
+11 -1
arch/x86/kvm/cpuid.c
··· 368 368 F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | 369 369 F(MD_CLEAR); 370 370 371 + /* cpuid 7.1.eax */ 372 + const u32 kvm_cpuid_7_1_eax_x86_features = 373 + F(AVX512_BF16); 374 + 371 375 switch (index) { 372 376 case 0: 373 - entry->eax = 0; 377 + entry->eax = min(entry->eax, 1u); 374 378 entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; 375 379 cpuid_mask(&entry->ebx, CPUID_7_0_EBX); 376 380 /* TSC_ADJUST is emulated */ ··· 397 393 * if the host doesn't support it. 398 394 */ 399 395 entry->edx |= F(ARCH_CAPABILITIES); 396 + break; 397 + case 1: 398 + entry->eax &= kvm_cpuid_7_1_eax_x86_features; 399 + entry->ebx = 0; 400 + entry->ecx = 0; 401 + entry->edx = 0; 400 402 break; 401 403 default: 402 404 WARN_ON_ONCE(1);
+9 -11
arch/x86/kvm/hyperv.c
··· 1594 1594 { 1595 1595 u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; 1596 1596 uint16_t code, rep_idx, rep_cnt; 1597 - bool fast, longmode, rep; 1597 + bool fast, rep; 1598 1598 1599 1599 /* 1600 1600 * hypercall generates UD from non zero cpl and real mode ··· 1605 1605 return 1; 1606 1606 } 1607 1607 1608 - longmode = is_64_bit_mode(vcpu); 1609 - 1610 - if (!longmode) { 1608 + #ifdef CONFIG_X86_64 1609 + if (is_64_bit_mode(vcpu)) { 1610 + param = kvm_rcx_read(vcpu); 1611 + ingpa = kvm_rdx_read(vcpu); 1612 + outgpa = kvm_r8_read(vcpu); 1613 + } else 1614 + #endif 1615 + { 1611 1616 param = ((u64)kvm_rdx_read(vcpu) << 32) | 1612 1617 (kvm_rax_read(vcpu) & 0xffffffff); 1613 1618 ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | ··· 1620 1615 outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | 1621 1616 (kvm_rsi_read(vcpu) & 0xffffffff); 1622 1617 } 1623 - #ifdef CONFIG_X86_64 1624 - else { 1625 - param = kvm_rcx_read(vcpu); 1626 - ingpa = kvm_rdx_read(vcpu); 1627 - outgpa = kvm_r8_read(vcpu); 1628 - } 1629 - #endif 1630 1618 1631 1619 code = param & 0xffff; 1632 1620 fast = !!(param & HV_HYPERCALL_FAST_BIT);
-15
arch/x86/kvm/ioapic.c
··· 45 45 #include "lapic.h" 46 46 #include "irq.h" 47 47 48 - #if 0 49 - #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) 50 - #else 51 - #define ioapic_debug(fmt, arg...) 52 - #endif 53 48 static int ioapic_service(struct kvm_ioapic *vioapic, int irq, 54 49 bool line_status); 55 50 ··· 289 294 default: 290 295 index = (ioapic->ioregsel - 0x10) >> 1; 291 296 292 - ioapic_debug("change redir index %x val %x\n", index, val); 293 297 if (index >= IOAPIC_NUM_PINS) 294 298 return; 295 299 e = &ioapic->redirtbl[index]; ··· 336 342 (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && 337 343 entry->fields.remote_irr)) 338 344 return -1; 339 - 340 - ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 341 - "vector=%x trig_mode=%x\n", 342 - entry->fields.dest_id, entry->fields.dest_mode, 343 - entry->fields.delivery_mode, entry->fields.vector, 344 - entry->fields.trig_mode); 345 345 346 346 irqe.dest_id = entry->fields.dest_id; 347 347 irqe.vector = entry->fields.vector; ··· 503 515 if (!ioapic_in_range(ioapic, addr)) 504 516 return -EOPNOTSUPP; 505 517 506 - ioapic_debug("addr %lx\n", (unsigned long)addr); 507 518 ASSERT(!(addr & 0xf)); /* check alignment */ 508 519 509 520 addr &= 0xff; ··· 545 558 if (!ioapic_in_range(ioapic, addr)) 546 559 return -EOPNOTSUPP; 547 560 548 - ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", 549 - (void*)addr, len, val); 550 561 ASSERT(!(addr & 0xf)); /* check alignment */ 551 562 552 563 switch (len) {
+77 -125
arch/x86/kvm/lapic.c
··· 52 52 #define PRIu64 "u" 53 53 #define PRIo64 "o" 54 54 55 - /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 56 - #define apic_debug(fmt, arg...) do {} while (0) 57 - 58 55 /* 14 is the version for Xeon and Pentium 8.4.8*/ 59 56 #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) 60 57 #define LAPIC_MMIO_LENGTH (1 << 12) ··· 116 119 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 117 120 { 118 121 return apic->vcpu->vcpu_id; 122 + } 123 + 124 + bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 125 + { 126 + return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); 127 + } 128 + EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); 129 + 130 + static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 131 + { 132 + return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 119 133 } 120 134 121 135 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, ··· 635 627 { 636 628 u8 val; 637 629 if (pv_eoi_get_user(vcpu, &val) < 0) 638 - apic_debug("Can't read EOI MSR value: 0x%llx\n", 630 + printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", 639 631 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 640 632 return val & 0x1; 641 633 } ··· 643 635 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 644 636 { 645 637 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 646 - apic_debug("Can't set EOI MSR value: 0x%llx\n", 638 + printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", 647 639 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 648 640 return; 649 641 } ··· 653 645 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 654 646 { 655 647 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 656 - apic_debug("Can't clear EOI MSR value: 0x%llx\n", 648 + printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", 657 649 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 658 650 return; 659 651 } ··· 686 678 ppr = tpr & 0xff; 687 679 else 688 680 ppr = isrv & 0xf0; 689 - 690 - apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 691 - apic, ppr, isr, isrv); 692 681 693 682 *new_ppr = ppr; 694 683 if (old_ppr != ppr) ··· 763 758 return ((logical_id >> 4) == (mda >> 4)) 764 759 && (logical_id & mda & 0xf) != 0; 765 760 default: 766 - apic_debug("Bad DFR vcpu %d: %08x\n", 767 - apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR)); 768 761 return false; 769 762 } 770 763 } ··· 801 798 struct kvm_lapic *target = vcpu->arch.apic; 802 799 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 803 800 804 - apic_debug("target %p, source %p, dest 0x%x, " 805 - "dest_mode 0x%x, short_hand 0x%x\n", 806 - target, source, dest, dest_mode, short_hand); 807 - 808 801 ASSERT(target); 809 802 switch (short_hand) { 810 803 case APIC_DEST_NOSHORT: ··· 815 816 case APIC_DEST_ALLBUT: 816 817 return target != source; 817 818 default: 818 - apic_debug("kvm: apic: Bad dest shorthand value %x\n", 819 - short_hand); 820 819 return false; 821 820 } 822 821 } ··· 1092 1095 smp_wmb(); 1093 1096 kvm_make_request(KVM_REQ_EVENT, vcpu); 1094 1097 kvm_vcpu_kick(vcpu); 1095 - } else { 1096 - apic_debug("Ignoring de-assert INIT to vcpu %d\n", 1097 - vcpu->vcpu_id); 1098 1098 } 1099 1099 break; 1100 1100 1101 1101 case APIC_DM_STARTUP: 1102 - apic_debug("SIPI to vcpu %d vector 0x%02x\n", 1103 - vcpu->vcpu_id, vector); 1104 1102 result = 1; 1105 1103 apic->sipi_vector = vector; 1106 1104 /* make sure sipi_vector is visible for the receiver */ ··· 1213 1221 1214 1222 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1215 1223 1216 - apic_debug("icr_high 0x%x, icr_low 0x%x, " 1217 - "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 1218 - "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, " 1219 - "msi_redir_hint 0x%x\n", 1220 - icr_high, icr_low, irq.shorthand, irq.dest_id, 1221 - irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 1222 - irq.vector, irq.msi_redir_hint); 1223 - 1224 1224 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1225 1225 } 1226 1226 ··· 1266 1282 1267 1283 switch (offset) { 1268 1284 case APIC_ARBPRI: 1269 - apic_debug("Access APIC ARBPRI register which is for P6\n"); 1270 1285 break; 1271 1286 1272 1287 case APIC_TMCCT: /* Timer CCR */ ··· 1332 1349 if (!apic_x2apic_mode(apic)) 1333 1350 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1334 1351 1335 - if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { 1336 - apic_debug("KVM_APIC_READ: read reserved register %x\n", 1337 - offset); 1352 + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1338 1353 return 1; 1339 - } 1340 1354 1341 1355 result = __apic_read(apic, offset & ~0xf); 1342 1356 ··· 1391 1411 tmp1 = tdcr & 0xf; 1392 1412 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1393 1413 apic->divide_count = 0x1 << (tmp2 & 0x7); 1394 - 1395 - apic_debug("timer divide count is 0x%x\n", 1396 - apic->divide_count); 1397 1414 } 1398 1415 1399 1416 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) ··· 1430 1453 apic->lapic_timer.timer_mode = timer_mode; 1431 1454 limit_periodic_timer_frequency(apic); 1432 1455 } 1433 - } 1434 - 1435 - static void apic_timer_expired(struct kvm_lapic *apic) 1436 - { 1437 - struct kvm_vcpu *vcpu = apic->vcpu; 1438 - struct swait_queue_head *q = &vcpu->wq; 1439 - struct kvm_timer *ktimer = &apic->lapic_timer; 1440 - 1441 - if (atomic_read(&apic->lapic_timer.pending)) 1442 - return; 1443 - 1444 - atomic_inc(&apic->lapic_timer.pending); 1445 - kvm_set_pending_timer(vcpu); 1446 - 1447 - /* 1448 - * For x86, the atomic_inc() is serialized, thus 1449 - * using swait_active() is safe. 1450 - */ 1451 - if (swait_active(q)) 1452 - swake_up_one(q); 1453 - 1454 - if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1455 - ktimer->expired_tscdeadline = ktimer->tscdeadline; 1456 1456 } 1457 1457 1458 1458 /* ··· 1505 1551 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1506 1552 } 1507 1553 1508 - void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1554 + static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1509 1555 { 1510 1556 struct kvm_lapic *apic = vcpu->arch.apic; 1511 1557 u64 guest_tsc, tsc_deadline; 1512 1558 1513 1559 if (apic->lapic_timer.expired_tscdeadline == 0) 1514 - return; 1515 - 1516 - if (!lapic_timer_int_injected(vcpu)) 1517 1560 return; 1518 1561 1519 1562 tsc_deadline = apic->lapic_timer.expired_tscdeadline; ··· 1524 1573 if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) 1525 1574 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); 1526 1575 } 1576 + 1577 + void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1578 + { 1579 + if (lapic_timer_int_injected(vcpu)) 1580 + __kvm_wait_lapic_expire(vcpu); 1581 + } 1527 1582 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1583 + 1584 + static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1585 + { 1586 + struct kvm_timer *ktimer = &apic->lapic_timer; 1587 + 1588 + kvm_apic_local_deliver(apic, APIC_LVTT); 1589 + if (apic_lvtt_tscdeadline(apic)) 1590 + ktimer->tscdeadline = 0; 1591 + if (apic_lvtt_oneshot(apic)) { 1592 + ktimer->tscdeadline = 0; 1593 + ktimer->target_expiration = 0; 1594 + } 1595 + } 1596 + 1597 + static void apic_timer_expired(struct kvm_lapic *apic) 1598 + { 1599 + struct kvm_vcpu *vcpu = apic->vcpu; 1600 + struct swait_queue_head *q = &vcpu->wq; 1601 + struct kvm_timer *ktimer = &apic->lapic_timer; 1602 + 1603 + if (atomic_read(&apic->lapic_timer.pending)) 1604 + return; 1605 + 1606 + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1607 + ktimer->expired_tscdeadline = ktimer->tscdeadline; 1608 + 1609 + if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1610 + if (apic->lapic_timer.timer_advance_ns) 1611 + __kvm_wait_lapic_expire(vcpu); 1612 + kvm_apic_inject_pending_timer_irqs(apic); 1613 + return; 1614 + } 1615 + 1616 + atomic_inc(&apic->lapic_timer.pending); 1617 + kvm_set_pending_timer(vcpu); 1618 + 1619 + /* 1620 + * For x86, the atomic_inc() is serialized, thus 1621 + * using swait_active() is safe. 1622 + */ 1623 + if (swait_active(q)) 1624 + swake_up_one(q); 1625 + } 1528 1626 1529 1627 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1530 1628 { ··· 1601 1601 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1602 1602 expire = ktime_add_ns(now, ns); 1603 1603 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1604 - hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); 1604 + hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS); 1605 1605 } else 1606 1606 apic_timer_expired(apic); 1607 1607 ··· 1648 1648 1649 1649 limit_periodic_timer_frequency(apic); 1650 1650 1651 - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1652 - PRIx64 ", " 1653 - "timer initial count 0x%x, period %lldns, " 1654 - "expire @ 0x%016" PRIx64 ".\n", __func__, 1655 - APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1656 - kvm_lapic_get_reg(apic, APIC_TMICT), 1657 - apic->lapic_timer.period, 1658 - ktime_to_ns(ktime_add_ns(now, 1659 - apic->lapic_timer.period))); 1660 - 1661 1651 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1662 1652 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); 1663 1653 apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); ··· 1693 1703 1694 1704 hrtimer_start(&apic->lapic_timer.timer, 1695 1705 apic->lapic_timer.target_expiration, 1696 - HRTIMER_MODE_ABS_PINNED); 1706 + HRTIMER_MODE_ABS); 1697 1707 } 1698 1708 1699 1709 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) ··· 1850 1860 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 1851 1861 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 1852 1862 if (lvt0_in_nmi_mode) { 1853 - apic_debug("Receive NMI setting on APIC_LVT0 " 1854 - "for cpu %d\n", apic->vcpu->vcpu_id); 1855 1863 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1856 1864 } else 1857 1865 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); ··· 1963 1975 case APIC_TDCR: { 1964 1976 uint32_t old_divisor = apic->divide_count; 1965 1977 1966 - if (val & 4) 1967 - apic_debug("KVM_WRITE:TDCR %x\n", val); 1968 1978 kvm_lapic_set_reg(apic, APIC_TDCR, val); 1969 1979 update_divide_count(apic); 1970 1980 if (apic->divide_count != old_divisor && ··· 1974 1988 break; 1975 1989 } 1976 1990 case APIC_ESR: 1977 - if (apic_x2apic_mode(apic) && val != 0) { 1978 - apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1991 + if (apic_x2apic_mode(apic) && val != 0) 1979 1992 ret = 1; 1980 - } 1981 1993 break; 1982 1994 1983 1995 case APIC_SELF_IPI: ··· 1988 2004 ret = 1; 1989 2005 break; 1990 2006 } 1991 - if (ret) 1992 - apic_debug("Local APIC Write to read-only register %x\n", reg); 2007 + 1993 2008 return ret; 1994 2009 } 1995 2010 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); ··· 2016 2033 * 32/64/128 bits registers must be accessed thru 32 bits. 2017 2034 * Refer SDM 8.4.1 2018 2035 */ 2019 - if (len != 4 || (offset & 0xf)) { 2020 - /* Don't shout loud, $infamous_os would cause only noise. */ 2021 - apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 2036 + if (len != 4 || (offset & 0xf)) 2022 2037 return 0; 2023 - } 2024 2038 2025 2039 val = *(u32*)data; 2026 2040 2027 - /* too common printing */ 2028 - if (offset != APIC_EOI) 2029 - apic_debug("%s: offset 0x%x with length 0x%x, and value is " 2030 - "0x%x\n", __func__, offset, len, val); 2031 - 2032 - kvm_lapic_reg_write(apic, offset, val); 2041 + kvm_lapic_reg_write(apic, offset & 0xff0, val); 2033 2042 2034 2043 return 0; 2035 2044 } ··· 2153 2178 if ((value & MSR_IA32_APICBASE_ENABLE) && 2154 2179 apic->base_address != APIC_DEFAULT_PHYS_BASE) 2155 2180 pr_warn_once("APIC base relocation is unsupported by KVM"); 2156 - 2157 - /* with FSB delivery interrupt, we can restart APIC functionality */ 2158 - apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 2159 - "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 2160 - 2161 2181 } 2162 2182 2163 2183 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) ··· 2162 2192 2163 2193 if (!apic) 2164 2194 return; 2165 - 2166 - apic_debug("%s\n", __func__); 2167 2195 2168 2196 /* Stop the timer in case it's a reset to an active apic */ 2169 2197 hrtimer_cancel(&apic->lapic_timer.timer); ··· 2215 2247 2216 2248 vcpu->arch.apic_arb_prio = 0; 2217 2249 vcpu->arch.apic_attention = 0; 2218 - 2219 - apic_debug("%s: vcpu=%p, id=0x%x, base_msr=" 2220 - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 2221 - vcpu, kvm_lapic_get_reg(apic, APIC_ID), 2222 - vcpu->arch.apic_base, apic->base_address); 2223 2250 } 2224 2251 2225 2252 /* ··· 2286 2323 struct kvm_lapic *apic; 2287 2324 2288 2325 ASSERT(vcpu != NULL); 2289 - apic_debug("apic_init %d\n", vcpu->vcpu_id); 2290 2326 2291 2327 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2292 2328 if (!apic) ··· 2302 2340 apic->vcpu = vcpu; 2303 2341 2304 2342 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2305 - HRTIMER_MODE_ABS_PINNED); 2343 + HRTIMER_MODE_ABS); 2306 2344 apic->lapic_timer.timer.function = apic_timer_fn; 2307 2345 if (timer_advance_ns == -1) { 2308 2346 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; ··· 2359 2397 struct kvm_lapic *apic = vcpu->arch.apic; 2360 2398 2361 2399 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2362 - kvm_apic_local_deliver(apic, APIC_LVTT); 2363 - if (apic_lvtt_tscdeadline(apic)) 2364 - apic->lapic_timer.tscdeadline = 0; 2365 - if (apic_lvtt_oneshot(apic)) { 2366 - apic->lapic_timer.tscdeadline = 0; 2367 - apic->lapic_timer.target_expiration = 0; 2368 - } 2400 + kvm_apic_inject_pending_timer_irqs(apic); 2369 2401 atomic_set(&apic->lapic_timer.pending, 0); 2370 2402 } 2371 2403 } ··· 2481 2525 { 2482 2526 struct hrtimer *timer; 2483 2527 2484 - if (!lapic_in_kernel(vcpu)) 2528 + if (!lapic_in_kernel(vcpu) || 2529 + kvm_can_post_timer_interrupt(vcpu)) 2485 2530 return; 2486 2531 2487 2532 timer = &vcpu->arch.apic->lapic_timer.timer; 2488 2533 if (hrtimer_cancel(timer)) 2489 - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); 2534 + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 2490 2535 } 2491 2536 2492 2537 /* ··· 2635 2678 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2636 2679 return 1; 2637 2680 2638 - if (reg == APIC_DFR || reg == APIC_ICR2) { 2639 - apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n", 2640 - reg); 2681 + if (reg == APIC_DFR || reg == APIC_ICR2) 2641 2682 return 1; 2642 - } 2643 2683 2644 2684 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2645 2685 return 1; ··· 2734 2780 /* evaluate pending_events before reading the vector */ 2735 2781 smp_rmb(); 2736 2782 sipi_vector = apic->sipi_vector; 2737 - apic_debug("vcpu %d received sipi with vector # %x\n", 2738 - vcpu->vcpu_id, sipi_vector); 2739 2783 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 2740 2784 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2741 2785 }
+1
arch/x86/kvm/lapic.h
··· 236 236 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); 237 237 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); 238 238 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); 239 + bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu); 239 240 240 241 static inline enum lapic_mode kvm_apic_mode(u64 apic_base) 241 242 {
+3 -3
arch/x86/kvm/mmu.c
··· 4597 4597 */ 4598 4598 4599 4599 /* Faults from writes to non-writable pages */ 4600 - u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; 4600 + u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0; 4601 4601 /* Faults from user mode accesses to supervisor pages */ 4602 - u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; 4602 + u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0; 4603 4603 /* Faults from fetches of non-executable pages*/ 4604 - u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; 4604 + u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0; 4605 4605 /* Faults from kernel mode fetches of user pages */ 4606 4606 u8 smepf = 0; 4607 4607 /* Faults from kernel mode accesses of user pages */
+21 -6
arch/x86/kvm/pmu.c
··· 19 19 #include "lapic.h" 20 20 #include "pmu.h" 21 21 22 - /* This keeps the total size of the filter under 4k. */ 23 - #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 22 + /* This is enough to filter the vast majority of currently defined events. */ 23 + #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 24 24 25 25 /* NOTE: 26 26 * - Each perf counter is defined as "struct kvm_pmc"; ··· 131 131 intr ? kvm_perf_overflow_intr : 132 132 kvm_perf_overflow, pmc); 133 133 if (IS_ERR(event)) { 134 - printk_once("kvm_pmu: event creation failed %ld\n", 135 - PTR_ERR(event)); 134 + pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", 135 + PTR_ERR(event), pmc->idx); 136 136 return; 137 137 } 138 138 ··· 206 206 { 207 207 unsigned en_field = ctrl & 0x3; 208 208 bool pmi = ctrl & 0x8; 209 + struct kvm_pmu_event_filter *filter; 210 + struct kvm *kvm = pmc->vcpu->kvm; 209 211 210 212 pmc_stop_counter(pmc); 211 213 212 214 if (!en_field || !pmc_is_enabled(pmc)) 213 215 return; 216 + 217 + filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); 218 + if (filter) { 219 + if (filter->action == KVM_PMU_EVENT_DENY && 220 + test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) 221 + return; 222 + if (filter->action == KVM_PMU_EVENT_ALLOW && 223 + !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) 224 + return; 225 + } 214 226 215 227 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, 216 228 kvm_x86_ops->pmu_ops->find_fixed_event(idx), ··· 397 385 tmp.action != KVM_PMU_EVENT_DENY) 398 386 return -EINVAL; 399 387 388 + if (tmp.flags != 0) 389 + return -EINVAL; 390 + 400 391 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) 401 392 return -E2BIG; 402 393 ··· 421 406 mutex_unlock(&kvm->lock); 422 407 423 408 synchronize_srcu_expedited(&kvm->srcu); 424 - r = 0; 409 + r = 0; 425 410 cleanup: 426 411 kfree(filter); 427 - return r; 412 + return r; 428 413 }
+35 -7
arch/x86/kvm/svm.c
··· 7128 7128 7129 7129 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) 7130 7130 { 7131 - bool is_user, smap; 7132 - 7133 - is_user = svm_get_cpl(vcpu) == 3; 7134 - smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); 7131 + unsigned long cr4 = kvm_read_cr4(vcpu); 7132 + bool smep = cr4 & X86_CR4_SMEP; 7133 + bool smap = cr4 & X86_CR4_SMAP; 7134 + bool is_user = svm_get_cpl(vcpu) == 3; 7135 7135 7136 7136 /* 7137 - * Detect and workaround Errata 1096 Fam_17h_00_0Fh 7137 + * Detect and workaround Errata 1096 Fam_17h_00_0Fh. 7138 + * 7139 + * Errata: 7140 + * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is 7141 + * possible that CPU microcode implementing DecodeAssist will fail 7142 + * to read bytes of instruction which caused #NPF. In this case, 7143 + * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly 7144 + * return 0 instead of the correct guest instruction bytes. 7145 + * 7146 + * This happens because CPU microcode reading instruction bytes 7147 + * uses a special opcode which attempts to read data using CPL=0 7148 + * priviledges. The microcode reads CS:RIP and if it hits a SMAP 7149 + * fault, it gives up and returns no instruction bytes. 7150 + * 7151 + * Detection: 7152 + * We reach here in case CPU supports DecodeAssist, raised #NPF and 7153 + * returned 0 in GuestIntrBytes field of the VMCB. 7154 + * First, errata can only be triggered in case vCPU CR4.SMAP=1. 7155 + * Second, if vCPU CR4.SMEP=1, errata could only be triggered 7156 + * in case vCPU CPL==3 (Because otherwise guest would have triggered 7157 + * a SMEP fault instead of #NPF). 7158 + * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. 7159 + * As most guests enable SMAP if they have also enabled SMEP, use above 7160 + * logic in order to attempt minimize false-positive of detecting errata 7161 + * while still preserving all cases semantic correctness. 7162 + * 7163 + * Workaround: 7164 + * To determine what instruction the guest was executing, the hypervisor 7165 + * will have to decode the instruction at the instruction pointer. 7138 7166 * 7139 7167 * In non SEV guest, hypervisor will be able to read the guest 7140 7168 * memory to decode the instruction pointer when insn_len is zero ··· 7173 7145 * instruction pointer so we will not able to workaround it. Lets 7174 7146 * print the error and request to kill the guest. 7175 7147 */ 7176 - if (is_user && smap) { 7148 + if (smap && (!smep || is_user)) { 7177 7149 if (!sev_guest(vcpu->kvm)) 7178 7150 return true; 7179 7151 7180 - pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); 7152 + pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); 7181 7153 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 7182 7154 } 7183 7155
+11 -2
arch/x86/kvm/vmx/nested.c
··· 194 194 { 195 195 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); 196 196 vmcs_write64(VMCS_LINK_POINTER, -1ull); 197 + vmx->nested.need_vmcs12_to_shadow_sync = false; 197 198 } 198 199 199 200 static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) ··· 1342 1341 unsigned long val; 1343 1342 int i; 1344 1343 1344 + if (WARN_ON(!shadow_vmcs)) 1345 + return; 1346 + 1345 1347 preempt_disable(); 1346 1348 1347 1349 vmcs_load(shadow_vmcs); ··· 1376 1372 struct shadow_vmcs_field field; 1377 1373 unsigned long val; 1378 1374 int i, q; 1375 + 1376 + if (WARN_ON(!shadow_vmcs)) 1377 + return; 1379 1378 1380 1379 vmcs_load(shadow_vmcs); 1381 1380 ··· 4201 4194 * mode, e.g. a 32-bit address size can yield a 64-bit virtual 4202 4195 * address when using FS/GS with a non-zero base. 4203 4196 */ 4204 - *ret = s.base + off; 4197 + if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS) 4198 + *ret = s.base + off; 4199 + else 4200 + *ret = off; 4205 4201 4206 4202 /* Long mode: #GP(0)/#SS(0) if the memory address is in a 4207 4203 * non-canonical form. This is the only check on the memory ··· 4443 4433 /* copy to memory all shadowed fields in case 4444 4434 they were modified */ 4445 4435 copy_shadow_to_vmcs12(vmx); 4446 - vmx->nested.need_vmcs12_to_shadow_sync = false; 4447 4436 vmx_disable_shadow_vmcs(vmx); 4448 4437 } 4449 4438 vmx->nested.posted_intr_nv = -1;
+8 -3
arch/x86/kvm/vmx/pmu_intel.c
··· 337 337 static void intel_pmu_reset(struct kvm_vcpu *vcpu) 338 338 { 339 339 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 340 + struct kvm_pmc *pmc = NULL; 340 341 int i; 341 342 342 343 for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { 343 - struct kvm_pmc *pmc = &pmu->gp_counters[i]; 344 + pmc = &pmu->gp_counters[i]; 344 345 345 346 pmc_stop_counter(pmc); 346 347 pmc->counter = pmc->eventsel = 0; 347 348 } 348 349 349 - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) 350 - pmc_stop_counter(&pmu->fixed_counters[i]); 350 + for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { 351 + pmc = &pmu->fixed_counters[i]; 352 + 353 + pmc_stop_counter(pmc); 354 + pmc->counter = 0; 355 + } 351 356 352 357 pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 353 358 pmu->global_ovf_ctrl = 0;
+4 -2
arch/x86/kvm/vmx/vmx.c
··· 5829 5829 } 5830 5830 5831 5831 if (unlikely(vmx->fail)) { 5832 + dump_vmcs(); 5832 5833 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; 5833 5834 vcpu->run->fail_entry.hardware_entry_failure_reason 5834 5835 = vmcs_read32(VM_INSTRUCTION_ERROR); ··· 7065 7064 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; 7066 7065 struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; 7067 7066 7068 - if (kvm_mwait_in_guest(vcpu->kvm)) 7067 + if (kvm_mwait_in_guest(vcpu->kvm) || 7068 + kvm_can_post_timer_interrupt(vcpu)) 7069 7069 return -EOPNOTSUPP; 7070 7070 7071 7071 vmx = to_vmx(vcpu); ··· 7455 7453 7456 7454 static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) 7457 7455 { 7458 - return 0; 7456 + return false; 7459 7457 } 7460 7458 7461 7459 static __init int hardware_setup(void)
+7 -13
arch/x86/kvm/x86.c
··· 51 51 #include <linux/kvm_irqfd.h> 52 52 #include <linux/irqbypass.h> 53 53 #include <linux/sched/stat.h> 54 + #include <linux/sched/isolation.h> 54 55 #include <linux/mem_encrypt.h> 55 56 56 57 #include <trace/events/kvm.h> ··· 153 152 154 153 static bool __read_mostly force_emulation_prefix = false; 155 154 module_param(force_emulation_prefix, bool, S_IRUGO); 155 + 156 + int __read_mostly pi_inject_timer = -1; 157 + module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); 156 158 157 159 #define KVM_NR_SHARED_MSRS 16 158 160 ··· 1460 1456 1461 1457 void kvm_set_pending_timer(struct kvm_vcpu *vcpu) 1462 1458 { 1463 - /* 1464 - * Note: KVM_REQ_PENDING_TIMER is implicitly checked in 1465 - * vcpu_enter_guest. This function is only called from 1466 - * the physical CPU that is running vcpu. 1467 - */ 1468 1459 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1460 + kvm_vcpu_kick(vcpu); 1469 1461 } 1470 1462 1471 1463 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) ··· 1540 1540 1541 1541 *pshift = shift; 1542 1542 *pmultiplier = div_frac(scaled64, tps32); 1543 - 1544 - pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", 1545 - __func__, base_hz, scaled_hz, shift, *pmultiplier); 1546 1543 } 1547 1544 1548 1545 #ifdef CONFIG_X86_64 ··· 1782 1785 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { 1783 1786 if (!kvm_check_tsc_unstable()) { 1784 1787 offset = kvm->arch.cur_tsc_offset; 1785 - pr_debug("kvm: matched tsc offset for %llu\n", data); 1786 1788 } else { 1787 1789 u64 delta = nsec_to_cycles(vcpu, elapsed); 1788 1790 data += delta; 1789 1791 offset = kvm_compute_tsc_offset(vcpu, data); 1790 - pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1791 1792 } 1792 1793 matched = true; 1793 1794 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); ··· 1804 1809 kvm->arch.cur_tsc_write = data; 1805 1810 kvm->arch.cur_tsc_offset = offset; 1806 1811 matched = false; 1807 - pr_debug("kvm: new tsc generation %llu, clock %llu\n", 1808 - kvm->arch.cur_tsc_generation, data); 1809 1812 } 1810 1813 1811 1814 /* ··· 6904 6911 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 6905 6912 CPUFREQ_TRANSITION_NOTIFIER); 6906 6913 } 6907 - pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); 6908 6914 6909 6915 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", 6910 6916 kvmclock_cpu_online, kvmclock_cpu_down_prep); ··· 7062 7070 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 7063 7071 7064 7072 kvm_lapic_init(); 7073 + if (pi_inject_timer == -1) 7074 + pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER); 7065 7075 #ifdef CONFIG_X86_64 7066 7076 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); 7067 7077
+2
arch/x86/kvm/x86.h
··· 301 301 302 302 extern bool enable_vmware_backdoor; 303 303 304 + extern int pi_inject_timer; 305 + 304 306 extern struct static_key kvm_no_apic_vcpu; 305 307 306 308 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+1
include/linux/kvm_host.h
··· 318 318 } spin_loop; 319 319 #endif 320 320 bool preempted; 321 + bool ready; 321 322 struct kvm_vcpu_arch arch; 322 323 struct dentry *debugfs_dentry; 323 324 };
+6
include/linux/sched/isolation.h
··· 19 19 DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); 20 20 extern int housekeeping_any_cpu(enum hk_flags flags); 21 21 extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); 22 + extern bool housekeeping_enabled(enum hk_flags flags); 22 23 extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); 23 24 extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); 24 25 extern void __init housekeeping_init(void); ··· 34 33 static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) 35 34 { 36 35 return cpu_possible_mask; 36 + } 37 + 38 + static inline bool housekeeping_enabled(enum hk_flags flags) 39 + { 40 + return false; 37 41 } 38 42 39 43 static inline void housekeeping_affine(struct task_struct *t,
+6
kernel/sched/isolation.c
··· 14 14 static cpumask_var_t housekeeping_mask; 15 15 static unsigned int housekeeping_flags; 16 16 17 + bool housekeeping_enabled(enum hk_flags flags) 18 + { 19 + return !!(housekeeping_flags & flags); 20 + } 21 + EXPORT_SYMBOL_GPL(housekeeping_enabled); 22 + 17 23 int housekeeping_any_cpu(enum hk_flags flags) 18 24 { 19 25 if (static_branch_unlikely(&housekeeping_overridden))
+12 -2
tools/testing/selftests/kvm/Makefile
··· 10 10 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c 11 11 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c 12 12 LIBKVM_aarch64 = lib/aarch64/processor.c 13 + LIBKVM_s390x = lib/s390x/processor.c 13 14 14 15 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test 15 16 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test 16 17 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid 17 - TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus 18 18 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test 19 19 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test 20 20 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test ··· 26 26 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test 27 27 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test 28 28 TEST_GEN_PROGS_x86_64 += dirty_log_test 29 + TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus 29 30 30 31 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test 31 32 TEST_GEN_PROGS_aarch64 += dirty_log_test 33 + TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus 34 + 35 + TEST_GEN_PROGS_s390x += s390x/sync_regs_test 36 + TEST_GEN_PROGS_s390x += kvm_create_max_vcpus 32 37 33 38 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) 34 39 LIBKVM += $(LIBKVM_$(UNAME_M)) ··· 48 43 no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ 49 44 $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) 50 45 51 - LDFLAGS += -pthread $(no-pie-option) 46 + # On s390, build the testcases KVM-enabled 47 + pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ 48 + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) 49 + 50 + 51 + LDFLAGS += -pthread $(no-pie-option) $(pgste-option) 52 52 53 53 # After inclusion, $(OUTPUT) is defined and 54 54 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+8
tools/testing/selftests/kvm/include/kvm_util.h
··· 41 41 NUM_VM_MODES, 42 42 }; 43 43 44 + #ifdef __aarch64__ 45 + #define VM_MODE_DEFAULT VM_MODE_P40V48_4K 46 + #else 47 + #define VM_MODE_DEFAULT VM_MODE_P52V48_4K 48 + #endif 49 + 44 50 #define vm_guest_mode_string(m) vm_guest_mode_string[m] 45 51 extern const char * const vm_guest_mode_string[]; 46 52 ··· 117 111 struct kvm_sregs *sregs); 118 112 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, 119 113 struct kvm_sregs *sregs); 114 + #ifdef __KVM_HAVE_VCPU_EVENTS 120 115 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 121 116 struct kvm_vcpu_events *events); 122 117 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, 123 118 struct kvm_vcpu_events *events); 119 + #endif 124 120 #ifdef __x86_64__ 125 121 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, 126 122 struct kvm_nested_state *state);
+22
tools/testing/selftests/kvm/include/s390x/processor.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * s390x processor specific defines 4 + */ 5 + #ifndef SELFTEST_KVM_PROCESSOR_H 6 + #define SELFTEST_KVM_PROCESSOR_H 7 + 8 + /* Bits in the region/segment table entry */ 9 + #define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ 10 + #define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ 11 + #define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */ 12 + #define REGION_ENTRY_OFFSET 0xc0 /* region table offset */ 13 + #define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ 14 + #define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */ 15 + #define REGION_ENTRY_LENGTH 0x03 /* region third length */ 16 + 17 + /* Bits in the page table entry */ 18 + #define PAGE_INVALID 0x400 /* HW invalid bit */ 19 + #define PAGE_PROTECT 0x200 /* HW read-only bit */ 20 + #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ 21 + 22 + #endif
+1 -1
tools/testing/selftests/kvm/lib/aarch64/processor.c
··· 227 227 uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; 228 228 struct kvm_vm *vm; 229 229 230 - vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); 230 + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); 231 231 232 232 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 233 233 vm_vcpu_add_default(vm, vcpuid, guest_code);
+18 -5
tools/testing/selftests/kvm/lib/kvm_util.c
··· 556 556 int ret; 557 557 struct userspace_mem_region *region; 558 558 size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; 559 + size_t alignment; 559 560 560 561 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 561 562 "address not on a page boundary.\n" ··· 606 605 TEST_ASSERT(region != NULL, "Insufficient Memory"); 607 606 region->mmap_size = npages * vm->page_size; 608 607 609 - /* Enough memory to align up to a huge page. */ 608 + #ifdef __s390x__ 609 + /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 610 + alignment = 0x100000; 611 + #else 612 + alignment = 1; 613 + #endif 614 + 610 615 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 611 - region->mmap_size += huge_page_size; 616 + alignment = max(huge_page_size, alignment); 617 + 618 + /* Add enough memory to align up if necessary */ 619 + if (alignment > 1) 620 + region->mmap_size += alignment; 621 + 612 622 region->mmap_start = mmap(NULL, region->mmap_size, 613 623 PROT_READ | PROT_WRITE, 614 624 MAP_PRIVATE | MAP_ANONYMOUS ··· 629 617 "test_malloc failed, mmap_start: %p errno: %i", 630 618 region->mmap_start, errno); 631 619 632 - /* Align THP allocation up to start of a huge page. */ 633 - region->host_mem = align(region->mmap_start, 634 - src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); 620 + /* Align host address */ 621 + region->host_mem = align(region->mmap_start, alignment); 635 622 636 623 /* As needed perform madvise */ 637 624 if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { ··· 1229 1218 ret, errno); 1230 1219 } 1231 1220 1221 + #ifdef __KVM_HAVE_VCPU_EVENTS 1232 1222 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 1233 1223 struct kvm_vcpu_events *events) 1234 1224 { ··· 1255 1243 TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", 1256 1244 ret, errno); 1257 1245 } 1246 + #endif 1258 1247 1259 1248 #ifdef __x86_64__ 1260 1249 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+278
tools/testing/selftests/kvm/lib/s390x/processor.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * KVM selftest s390x library code - CPU-related functions (page tables...) 4 + * 5 + * Copyright (C) 2019, Red Hat, Inc. 6 + */ 7 + 8 + #define _GNU_SOURCE /* for program_invocation_name */ 9 + 10 + #include "processor.h" 11 + #include "kvm_util.h" 12 + #include "../kvm_util_internal.h" 13 + 14 + #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 15 + 16 + #define PAGES_PER_REGION 4 17 + 18 + void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) 19 + { 20 + vm_paddr_t paddr; 21 + 22 + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", 23 + vm->page_size); 24 + 25 + if (vm->pgd_created) 26 + return; 27 + 28 + paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, 29 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); 30 + memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); 31 + 32 + vm->pgd = paddr; 33 + vm->pgd_created = true; 34 + } 35 + 36 + /* 37 + * Allocate 4 pages for a region/segment table (ri < 4), or one page for 38 + * a page table (ri == 4). Returns a suitable region/segment table entry 39 + * which points to the freshly allocated pages. 40 + */ 41 + static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) 42 + { 43 + uint64_t taddr; 44 + 45 + taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, 46 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); 47 + memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); 48 + 49 + return (taddr & REGION_ENTRY_ORIGIN) 50 + | (((4 - ri) << 2) & REGION_ENTRY_TYPE) 51 + | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); 52 + } 53 + 54 + /* 55 + * VM Virtual Page Map 56 + * 57 + * Input Args: 58 + * vm - Virtual Machine 59 + * gva - VM Virtual Address 60 + * gpa - VM Physical Address 61 + * memslot - Memory region slot for new virtual translation tables 62 + * 63 + * Output Args: None 64 + * 65 + * Return: None 66 + * 67 + * Within the VM given by vm, creates a virtual translation for the page 68 + * starting at vaddr to the page starting at paddr. 69 + */ 70 + void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, 71 + uint32_t memslot) 72 + { 73 + int ri, idx; 74 + uint64_t *entry; 75 + 76 + TEST_ASSERT((gva % vm->page_size) == 0, 77 + "Virtual address not on page boundary,\n" 78 + " vaddr: 0x%lx vm->page_size: 0x%x", 79 + gva, vm->page_size); 80 + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 81 + (gva >> vm->page_shift)), 82 + "Invalid virtual address, vaddr: 0x%lx", 83 + gva); 84 + TEST_ASSERT((gpa % vm->page_size) == 0, 85 + "Physical address not on page boundary,\n" 86 + " paddr: 0x%lx vm->page_size: 0x%x", 87 + gva, vm->page_size); 88 + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, 89 + "Physical address beyond beyond maximum supported,\n" 90 + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 91 + gva, vm->max_gfn, vm->page_size); 92 + 93 + /* Walk through region and segment tables */ 94 + entry = addr_gpa2hva(vm, vm->pgd); 95 + for (ri = 1; ri <= 4; ri++) { 96 + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; 97 + if (entry[idx] & REGION_ENTRY_INVALID) 98 + entry[idx] = virt_alloc_region(vm, ri, memslot); 99 + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); 100 + } 101 + 102 + /* Fill in page table entry */ 103 + idx = (gva >> 12) & 0x0ffu; /* page index */ 104 + if (!(entry[idx] & PAGE_INVALID)) 105 + fprintf(stderr, 106 + "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); 107 + entry[idx] = gpa; 108 + } 109 + 110 + /* 111 + * Address Guest Virtual to Guest Physical 112 + * 113 + * Input Args: 114 + * vm - Virtual Machine 115 + * gpa - VM virtual address 116 + * 117 + * Output Args: None 118 + * 119 + * Return: 120 + * Equivalent VM physical address 121 + * 122 + * Translates the VM virtual address given by gva to a VM physical 123 + * address and then locates the memory region containing the VM 124 + * physical address, within the VM given by vm. When found, the host 125 + * virtual address providing the memory to the vm physical address is 126 + * returned. 127 + * A TEST_ASSERT failure occurs if no region containing translated 128 + * VM virtual address exists. 129 + */ 130 + vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 131 + { 132 + int ri, idx; 133 + uint64_t *entry; 134 + 135 + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", 136 + vm->page_size); 137 + 138 + entry = addr_gpa2hva(vm, vm->pgd); 139 + for (ri = 1; ri <= 4; ri++) { 140 + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; 141 + TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), 142 + "No region mapping for vm virtual address 0x%lx", 143 + gva); 144 + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); 145 + } 146 + 147 + idx = (gva >> 12) & 0x0ffu; /* page index */ 148 + 149 + TEST_ASSERT(!(entry[idx] & PAGE_INVALID), 150 + "No page mapping for vm virtual address 0x%lx", gva); 151 + 152 + return (entry[idx] & ~0xffful) + (gva & 0xffful); 153 + } 154 + 155 + static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, 156 + uint64_t ptea_start) 157 + { 158 + uint64_t *pte, ptea; 159 + 160 + for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { 161 + pte = addr_gpa2hva(vm, ptea); 162 + if (*pte & PAGE_INVALID) 163 + continue; 164 + fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n", 165 + indent, "", ptea, *pte); 166 + } 167 + } 168 + 169 + static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, 170 + uint64_t reg_tab_addr) 171 + { 172 + uint64_t addr, *entry; 173 + 174 + for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { 175 + entry = addr_gpa2hva(vm, addr); 176 + if (*entry & REGION_ENTRY_INVALID) 177 + continue; 178 + fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n", 179 + indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2), 180 + addr, *entry); 181 + if (*entry & REGION_ENTRY_TYPE) { 182 + virt_dump_region(stream, vm, indent + 2, 183 + *entry & REGION_ENTRY_ORIGIN); 184 + } else { 185 + virt_dump_ptes(stream, vm, indent + 2, 186 + *entry & REGION_ENTRY_ORIGIN); 187 + } 188 + } 189 + } 190 + 191 + void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 192 + { 193 + if (!vm->pgd_created) 194 + return; 195 + 196 + virt_dump_region(stream, vm, indent, vm->pgd); 197 + } 198 + 199 + /* 200 + * Create a VM with reasonable defaults 201 + * 202 + * Input Args: 203 + * vcpuid - The id of the single VCPU to add to the VM. 204 + * extra_mem_pages - The size of extra memories to add (this will 205 + * decide how much extra space we will need to 206 + * setup the page tables using mem slot 0) 207 + * guest_code - The vCPU's entry point 208 + * 209 + * Output Args: None 210 + * 211 + * Return: 212 + * Pointer to opaque structure that describes the created VM. 213 + */ 214 + struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 215 + void *guest_code) 216 + { 217 + /* 218 + * The additional amount of pages required for the page tables is: 219 + * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ... 220 + * which is definitely smaller than (n / 256) * 2. 221 + */ 222 + uint64_t extra_pg_pages = extra_mem_pages / 256 * 2; 223 + struct kvm_vm *vm; 224 + 225 + vm = vm_create(VM_MODE_DEFAULT, 226 + DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); 227 + 228 + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 229 + vm_vcpu_add_default(vm, vcpuid, guest_code); 230 + 231 + return vm; 232 + } 233 + 234 + /* 235 + * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW) 236 + * 237 + * Input Args: 238 + * vcpuid - The id of the VCPU to add to the VM. 239 + * guest_code - The vCPU's entry point 240 + */ 241 + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) 242 + { 243 + size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); 244 + uint64_t stack_vaddr; 245 + struct kvm_regs regs; 246 + struct kvm_sregs sregs; 247 + struct kvm_run *run; 248 + 249 + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", 250 + vm->page_size); 251 + 252 + stack_vaddr = vm_vaddr_alloc(vm, stack_size, 253 + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); 254 + 255 + vm_vcpu_add(vm, vcpuid); 256 + 257 + /* Setup guest registers */ 258 + vcpu_regs_get(vm, vcpuid, &regs); 259 + regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; 260 + vcpu_regs_set(vm, vcpuid, &regs); 261 + 262 + vcpu_sregs_get(vm, vcpuid, &sregs); 263 + sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ 264 + sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ 265 + vcpu_sregs_set(vm, vcpuid, &sregs); 266 + 267 + run = vcpu_state(vm, vcpuid); 268 + run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ 269 + run->psw_addr = (uintptr_t)guest_code; 270 + } 271 + 272 + void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) 273 + { 274 + struct vcpu *vcpu = vm->vcpu_head; 275 + 276 + fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", 277 + indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); 278 + }
+1 -1
tools/testing/selftests/kvm/lib/x86_64/processor.c
··· 821 821 uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; 822 822 823 823 /* Create VM */ 824 - vm = vm_create(VM_MODE_P52V48_4K, 824 + vm = vm_create(VM_MODE_DEFAULT, 825 825 DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, 826 826 O_RDWR); 827 827
-2
tools/testing/selftests/kvm/lib/x86_64/vmx.c
··· 5 5 * Copyright (C) 2018, Google LLC. 6 6 */ 7 7 8 - #define _GNU_SOURCE /* for program_invocation_name */ 9 - 10 8 #include "test_util.h" 11 9 #include "kvm_util.h" 12 10 #include "processor.h"
+151
tools/testing/selftests/kvm/s390x/sync_regs_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Test for s390x KVM_CAP_SYNC_REGS 4 + * 5 + * Based on the same test for x86: 6 + * Copyright (C) 2018, Google LLC. 7 + * 8 + * Adaptions for s390x: 9 + * Copyright (C) 2019, Red Hat, Inc. 10 + * 11 + * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. 12 + */ 13 + 14 + #define _GNU_SOURCE /* for program_invocation_short_name */ 15 + #include <fcntl.h> 16 + #include <stdio.h> 17 + #include <stdlib.h> 18 + #include <string.h> 19 + #include <sys/ioctl.h> 20 + 21 + #include "test_util.h" 22 + #include "kvm_util.h" 23 + 24 + #define VCPU_ID 5 25 + 26 + static void guest_code(void) 27 + { 28 + for (;;) { 29 + asm volatile ("diag 0,0,0x501"); 30 + asm volatile ("ahi 11,1"); 31 + } 32 + } 33 + 34 + #define REG_COMPARE(reg) \ 35 + TEST_ASSERT(left->reg == right->reg, \ 36 + "Register " #reg \ 37 + " values did not match: 0x%llx, 0x%llx\n", \ 38 + left->reg, right->reg) 39 + 40 + static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) 41 + { 42 + int i; 43 + 44 + for (i = 0; i < 16; i++) 45 + REG_COMPARE(gprs[i]); 46 + } 47 + 48 + static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) 49 + { 50 + int i; 51 + 52 + for (i = 0; i < 16; i++) 53 + REG_COMPARE(acrs[i]); 54 + 55 + for (i = 0; i < 16; i++) 56 + REG_COMPARE(crs[i]); 57 + } 58 + 59 + #undef REG_COMPARE 60 + 61 + #define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS) 62 + #define INVALID_SYNC_FIELD 0x80000000 63 + 64 + int main(int argc, char *argv[]) 65 + { 66 + struct kvm_vm *vm; 67 + struct kvm_run *run; 68 + struct kvm_regs regs; 69 + struct kvm_sregs sregs; 70 + int rv, cap; 71 + 72 + /* Tell stdout not to buffer its content */ 73 + setbuf(stdout, NULL); 74 + 75 + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); 76 + if (!cap) { 77 + fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n"); 78 + exit(KSFT_SKIP); 79 + } 80 + 81 + /* Create VM */ 82 + vm = vm_create_default(VCPU_ID, 0, guest_code); 83 + 84 + run = vcpu_state(vm, VCPU_ID); 85 + 86 + /* Request and verify all valid register sets. */ 87 + run->kvm_valid_regs = TEST_SYNC_FIELDS; 88 + rv = _vcpu_run(vm, VCPU_ID); 89 + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); 90 + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, 91 + "Unexpected exit reason: %u (%s)\n", 92 + run->exit_reason, 93 + exit_reason_str(run->exit_reason)); 94 + TEST_ASSERT(run->s390_sieic.icptcode == 4 && 95 + (run->s390_sieic.ipa >> 8) == 0x83 && 96 + (run->s390_sieic.ipb >> 16) == 0x501, 97 + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", 98 + run->s390_sieic.icptcode, run->s390_sieic.ipa, 99 + run->s390_sieic.ipb); 100 + 101 + vcpu_regs_get(vm, VCPU_ID, &regs); 102 + compare_regs(&regs, &run->s.regs); 103 + 104 + vcpu_sregs_get(vm, VCPU_ID, &sregs); 105 + compare_sregs(&sregs, &run->s.regs); 106 + 107 + /* Set and verify various register values */ 108 + run->s.regs.gprs[11] = 0xBAD1DEA; 109 + run->s.regs.acrs[0] = 1 << 11; 110 + 111 + run->kvm_valid_regs = TEST_SYNC_FIELDS; 112 + run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; 113 + rv = _vcpu_run(vm, VCPU_ID); 114 + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); 115 + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, 116 + "Unexpected exit reason: %u (%s)\n", 117 + run->exit_reason, 118 + exit_reason_str(run->exit_reason)); 119 + TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, 120 + "r11 sync regs value incorrect 0x%llx.", 121 + run->s.regs.gprs[11]); 122 + TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, 123 + "acr0 sync regs value incorrect 0x%llx.", 124 + run->s.regs.acrs[0]); 125 + 126 + vcpu_regs_get(vm, VCPU_ID, &regs); 127 + compare_regs(&regs, &run->s.regs); 128 + 129 + vcpu_sregs_get(vm, VCPU_ID, &sregs); 130 + compare_sregs(&sregs, &run->s.regs); 131 + 132 + /* Clear kvm_dirty_regs bits, verify new s.regs values are 133 + * overwritten with existing guest values. 134 + */ 135 + run->kvm_valid_regs = TEST_SYNC_FIELDS; 136 + run->kvm_dirty_regs = 0; 137 + run->s.regs.gprs[11] = 0xDEADBEEF; 138 + rv = _vcpu_run(vm, VCPU_ID); 139 + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); 140 + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, 141 + "Unexpected exit reason: %u (%s)\n", 142 + run->exit_reason, 143 + exit_reason_str(run->exit_reason)); 144 + TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, 145 + "r11 sync regs value incorrect 0x%llx.", 146 + run->s.regs.gprs[11]); 147 + 148 + kvm_vm_free(vm); 149 + 150 + return 0; 151 + }
+1 -1
tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c tools/testing/selftests/kvm/kvm_create_max_vcpus.c
··· 27 27 printf("Testing creating %d vCPUs, with IDs %d...%d.\n", 28 28 num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); 29 29 30 - vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 30 + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); 31 31 32 32 for (i = 0; i < num_vcpus; i++) { 33 33 int vcpu_id = first_vcpu_id + i;
+8 -4
virt/kvm/kvm_main.c
··· 314 314 kvm_vcpu_set_in_spin_loop(vcpu, false); 315 315 kvm_vcpu_set_dy_eligible(vcpu, false); 316 316 vcpu->preempted = false; 317 + vcpu->ready = false; 317 318 318 319 r = kvm_arch_vcpu_init(vcpu); 319 320 if (r < 0) ··· 2388 2387 wqp = kvm_arch_vcpu_wq(vcpu); 2389 2388 if (swq_has_sleeper(wqp)) { 2390 2389 swake_up_one(wqp); 2390 + WRITE_ONCE(vcpu->ready, true); 2391 2391 ++vcpu->stat.halt_wakeup; 2392 2392 return true; 2393 2393 } ··· 2502 2500 continue; 2503 2501 } else if (pass && i > last_boosted_vcpu) 2504 2502 break; 2505 - if (!READ_ONCE(vcpu->preempted)) 2503 + if (!READ_ONCE(vcpu->ready)) 2506 2504 continue; 2507 2505 if (vcpu == me) 2508 2506 continue; ··· 4205 4203 { 4206 4204 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 4207 4205 4208 - if (vcpu->preempted) 4209 - vcpu->preempted = false; 4206 + vcpu->preempted = false; 4207 + WRITE_ONCE(vcpu->ready, false); 4210 4208 4211 4209 kvm_arch_sched_in(vcpu, cpu); 4212 4210 ··· 4218 4216 { 4219 4217 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 4220 4218 4221 - if (current->state == TASK_RUNNING) 4219 + if (current->state == TASK_RUNNING) { 4222 4220 vcpu->preempted = true; 4221 + WRITE_ONCE(vcpu->ready, true); 4222 + } 4223 4223 kvm_arch_vcpu_put(vcpu); 4224 4224 } 4225 4225