Merge tag 'kvm-x86-apic-6.20' of https://github.com/kvm-x86/linux into HEAD

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'kvm-x86-apic-6.20' of https://github.com/kvm-x86/linux into HEAD

KVM x86 APIC-ish changes for 6.20

- Fix a benign bug where KVM could use the wrong memslots (ignored SMM) when
creating a vCPU-specific mapping of guest memory.

- Clean up KVM's handling of marking mapped vCPU pages dirty.

- Drop a pile of *ancient* sanity checks hidden behind in KVM's unused
ASSERT() macro, most of which could be trivially triggered by the guest
and/or user, and all of which were useless.

- Fold "struct dest_map" into its sole user, "struct rtc_status", to make it
more obvious what the weird parameter is used for, and to allow burying the
RTC shenanigans behind CONFIG_KVM_IOAPIC=y.

- Bury all of ioapic.h and KVM_IRQCHIP_KERNEL behind CONFIG_KVM_IOAPIC=y.

- Add a regression test for recent APICv update fixes.

- Rework KVM's handling of VMCS updates while L2 is active to temporarily
switch to vmcs01 instead of deferring the update until the next nested
VM-Exit. The deferred updates approach directly contributed to several
bugs, was proving to be a maintenance burden due to the difficulty in
auditing the correctness of deferred updates, and was polluting
"struct nested_vmx" with a growing pile of booleans.

- Handle "hardware APIC ISR", a.k.a. SVI, updates in kvm_apic_update_apicv()
to consolidate the updates, and to co-locate SVI updates with the updates
for KVM's own cache of ISR information.

- Drop a dead function declaration.

Paolo Bonzini 3 months ago 1b13885e 9123c5f9

+334 -227

18 changed files

expand all collapse all

arch

x86

include

asm

kvm_host.h

kvm

hyperv.c

ioapic.c

ioapic.h

irq.c

lapic.c

lapic.h

vmx

nested.c

nested.h

vmx.c

vmx.h

x86.c

xen.c

include

linux

kvm_host.h

tools

testing

selftests

kvm

Makefile.kvm

include

x86

apic.h

x86

vmx_apicv_updates_test.c

virt

kvm

kvm_main.c

arch/x86/include/asm/kvm_host.h

reviewed

··· 1232 1232 1233 1233 enum kvm_irqchip_mode { 1234 1234 KVM_IRQCHIP_NONE, 1235 1235 + #ifdef CONFIG_KVM_IOAPIC 1235 1236 KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ 1237 1237 + #endif 1236 1238 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ 1237 1239 }; 1238 1240

+1 -1

arch/x86/kvm/hyperv.c

reviewed

··· 492 492 irq.vector = vector; 493 493 irq.level = 1; 494 494 495 495 - ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); 495 495 + ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq); 496 496 trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 497 497 return ret; 498 498 }

+16 -27

arch/x86/kvm/ioapic.c

reviewed

··· 37 37 static int ioapic_service(struct kvm_ioapic *vioapic, int irq, 38 38 bool line_status); 39 39 40 40 - static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, 41 41 - struct kvm_ioapic *ioapic, 42 42 - int trigger_mode, 43 43 - int pin); 44 44 - 45 40 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) 46 41 { 47 42 unsigned long result = 0; ··· 77 82 static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) 78 83 { 79 84 ioapic->rtc_status.pending_eoi = 0; 80 80 - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS); 85 85 + bitmap_zero(ioapic->rtc_status.map, KVM_MAX_VCPU_IDS); 81 86 } 82 87 83 88 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); ··· 92 97 { 93 98 bool new_val, old_val; 94 99 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; 95 95 - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; 100 100 + struct rtc_status *status = &ioapic->rtc_status; 96 101 union kvm_ioapic_redirect_entry *e; 97 102 98 103 e = &ioapic->redirtbl[RTC_GSI]; ··· 102 107 return; 103 108 104 109 new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); 105 105 - old_val = test_bit(vcpu->vcpu_id, dest_map->map); 110 110 + old_val = test_bit(vcpu->vcpu_id, status->map); 106 111 107 112 if (new_val == old_val) 108 113 return; 109 114 110 115 if (new_val) { 111 111 - __set_bit(vcpu->vcpu_id, dest_map->map); 112 112 - dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; 116 116 + __set_bit(vcpu->vcpu_id, status->map); 117 117 + status->vectors[vcpu->vcpu_id] = e->fields.vector; 113 118 ioapic->rtc_status.pending_eoi++; 114 119 } else { 115 115 - __clear_bit(vcpu->vcpu_id, dest_map->map); 120 120 + __clear_bit(vcpu->vcpu_id, status->map); 116 121 ioapic->rtc_status.pending_eoi--; 117 122 rtc_status_pending_eoi_check_valid(ioapic); 118 123 } ··· 143 148 static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, 144 149 int vector) 145 150 { 146 146 - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; 151 151 + struct rtc_status *status = &ioapic->rtc_status; 147 152 148 153 /* RTC special handling */ 149 149 - if (test_bit(vcpu->vcpu_id, dest_map->map) && 150 150 - (vector == dest_map->vectors[vcpu->vcpu_id]) && 151 151 - (test_and_clear_bit(vcpu->vcpu_id, 152 152 - ioapic->rtc_status.dest_map.map))) { 154 154 + if (test_bit(vcpu->vcpu_id, status->map) && 155 155 + (vector == status->vectors[vcpu->vcpu_id]) && 156 156 + (test_and_clear_bit(vcpu->vcpu_id, status->map))) { 153 157 --ioapic->rtc_status.pending_eoi; 154 158 rtc_status_pending_eoi_check_valid(ioapic); 155 159 } ··· 259 265 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) 260 266 { 261 267 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; 262 262 - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; 268 268 + struct rtc_status *status = &ioapic->rtc_status; 263 269 union kvm_ioapic_redirect_entry *e; 264 270 int index; 265 271 266 272 spin_lock(&ioapic->lock); 267 273 268 274 /* Make sure we see any missing RTC EOI */ 269 269 - if (test_bit(vcpu->vcpu_id, dest_map->map)) 270 270 - __set_bit(dest_map->vectors[vcpu->vcpu_id], 275 275 + if (test_bit(vcpu->vcpu_id, status->map)) 276 276 + __set_bit(status->vectors[vcpu->vcpu_id], 271 277 ioapic_handled_vectors); 272 278 273 279 for (index = 0; index < IOAPIC_NUM_PINS; index++) { ··· 484 490 * if rtc_irq_check_coalesced returns false). 485 491 */ 486 492 BUG_ON(ioapic->rtc_status.pending_eoi != 0); 487 487 - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, 488 488 - &ioapic->rtc_status.dest_map); 493 493 + ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, 494 494 + &ioapic->rtc_status); 489 495 ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); 490 496 } else 491 491 - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); 497 497 + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); 492 498 493 499 if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) 494 500 entry->fields.remote_irr = 1; ··· 558 564 kvm_lapic_suppress_eoi_broadcast(apic)) 559 565 return; 560 566 561 561 - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 562 567 ent->fields.remote_irr = 0; 563 568 if (!ent->fields.mask && (ioapic->irr & (1 << pin))) { 564 569 ++ioapic->irq_eoi[pin]; ··· 617 624 if (!ioapic_in_range(ioapic, addr)) 618 625 return -EOPNOTSUPP; 619 626 620 620 - ASSERT(!(addr & 0xf)); /* check alignment */ 621 621 - 622 627 addr &= 0xff; 623 628 spin_lock(&ioapic->lock); 624 629 switch (addr) { ··· 656 665 u32 data; 657 666 if (!ioapic_in_range(ioapic, addr)) 658 667 return -EOPNOTSUPP; 659 659 - 660 660 - ASSERT(!(addr & 0xf)); /* check alignment */ 661 668 662 669 switch (len) { 663 670 case 8:

+12 -26

arch/x86/kvm/ioapic.h

reviewed

··· 6 6 #include <kvm/iodev.h> 7 7 #include "irq.h" 8 8 9 9 + #ifdef CONFIG_KVM_IOAPIC 10 10 + 9 11 struct kvm; 10 12 struct kvm_vcpu; 11 13 12 14 #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS 13 13 - #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES 14 15 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ 15 16 #define IOAPIC_EDGE_TRIG 0 16 17 #define IOAPIC_LEVEL_TRIG 1 ··· 38 37 39 38 #define RTC_GSI 8 40 39 41 41 - struct dest_map { 40 40 + struct rtc_status { 41 41 + int pending_eoi; 42 42 + 42 43 /* vcpu bitmap where IRQ has been sent */ 43 44 DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS); 44 45 ··· 49 46 * the vcpu's bit in map is set 50 47 */ 51 48 u8 vectors[KVM_MAX_VCPU_IDS]; 52 52 - }; 53 53 - 54 54 - 55 55 - struct rtc_status { 56 56 - int pending_eoi; 57 57 - struct dest_map dest_map; 58 49 }; 59 50 60 51 union kvm_ioapic_redirect_entry { ··· 101 104 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, 102 105 bool mask); 103 106 104 104 - #ifdef DEBUG 105 105 - #define ASSERT(x) \ 106 106 - do { \ 107 107 - if (!(x)) { \ 108 108 - printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ 109 109 - __FILE__, __LINE__, #x); \ 110 110 - BUG(); \ 111 111 - } \ 112 112 - } while (0) 113 113 - #else 114 114 - #define ASSERT(x) do { } while (0) 115 115 - #endif 116 116 - 117 117 - static inline int ioapic_in_kernel(struct kvm *kvm) 118 118 - { 119 119 - return irqchip_full(kvm); 120 120 - } 121 121 - 122 107 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); 123 108 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, 124 109 int trigger_mode); ··· 113 134 void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 114 135 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 115 136 ulong *ioapic_handled_vectors); 137 137 + #endif /* CONFIG_KVM_IOAPIC */ 138 138 + 139 139 + static inline int ioapic_in_kernel(struct kvm *kvm) 140 140 + { 141 141 + return irqchip_full(kvm); 142 142 + } 143 143 + 116 144 void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, 117 145 ulong *ioapic_handled_vectors); 118 146 void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,

+2 -2

arch/x86/kvm/irq.c

reviewed

··· 235 235 236 236 kvm_msi_to_lapic_irq(kvm, e, &irq); 237 237 238 238 - return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); 238 238 + return kvm_irq_delivery_to_apic(kvm, NULL, &irq); 239 239 } 240 240 241 241 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, ··· 258 258 259 259 kvm_msi_to_lapic_irq(kvm, e, &irq); 260 260 261 261 - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) 261 261 + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) 262 262 return r; 263 263 break; 264 264

+43 -54

arch/x86/kvm/lapic.c

reviewed

··· 717 717 718 718 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 719 719 { 720 720 - int result; 721 721 - 722 720 /* 723 721 * Note that irr_pending is just a hint. It will be always 724 722 * true with virtual interrupt delivery enabled. ··· 724 726 if (!apic->irr_pending) 725 727 return -1; 726 728 727 727 - result = apic_search_irr(apic); 728 728 - ASSERT(result == -1 || result >= 16); 729 729 - 730 730 - return result; 729 729 + return apic_search_irr(apic); 731 730 } 732 731 733 732 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) ··· 777 782 778 783 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 779 784 { 780 780 - int result; 781 781 - 782 785 /* 783 786 * Note that isr_count is always 1, and highest_isr_cache 784 787 * is always -1, with APIC virtualization enabled. ··· 786 793 if (likely(apic->highest_isr_cache != -1)) 787 794 return apic->highest_isr_cache; 788 795 789 789 - result = apic_find_highest_vector(apic->regs + APIC_ISR); 790 790 - ASSERT(result == -1 || result >= 16); 791 791 - 792 792 - return result; 796 796 + return apic_find_highest_vector(apic->regs + APIC_ISR); 793 797 } 794 798 795 799 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) ··· 811 821 } 812 822 } 813 823 814 814 - void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) 815 815 - { 816 816 - struct kvm_lapic *apic = vcpu->arch.apic; 817 817 - 818 818 - if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) 819 819 - return; 820 820 - 821 821 - kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); 822 822 - } 823 823 - EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr); 824 824 - 825 824 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 826 825 { 827 826 /* This may race with setting of irr in __apic_accept_irq() and ··· 824 845 825 846 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 826 847 int vector, int level, int trig_mode, 827 827 - struct dest_map *dest_map); 848 848 + struct rtc_status *rtc_status); 828 849 829 850 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 830 830 - struct dest_map *dest_map) 851 851 + struct rtc_status *rtc_status) 831 852 { 832 853 struct kvm_lapic *apic = vcpu->arch.apic; 833 854 834 855 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 835 835 - irq->level, irq->trig_mode, dest_map); 856 856 + irq->level, irq->trig_mode, rtc_status); 836 857 } 837 858 838 859 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, ··· 1078 1099 struct kvm_lapic *target = vcpu->arch.apic; 1079 1100 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 1080 1101 1081 1081 - ASSERT(target); 1082 1102 switch (shorthand) { 1083 1103 case APIC_DEST_NOSHORT: 1084 1104 if (dest_mode == APIC_DEST_PHYSICAL) ··· 1215 1237 return true; 1216 1238 } 1217 1239 1218 1218 - bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 1219 1219 - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 1240 1240 + static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 1241 1241 + struct kvm_lapic_irq *irq, int *r, 1242 1242 + struct rtc_status *rtc_status) 1220 1243 { 1221 1244 struct kvm_apic_map *map; 1222 1245 unsigned long bitmap; ··· 1232 1253 *r = 0; 1233 1254 return true; 1234 1255 } 1235 1235 - *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 1256 1256 + *r = kvm_apic_set_irq(src->vcpu, irq, rtc_status); 1236 1257 return true; 1237 1258 } 1238 1259 ··· 1245 1266 for_each_set_bit(i, &bitmap, 16) { 1246 1267 if (!dst[i]) 1247 1268 continue; 1248 1248 - *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 1269 1269 + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, rtc_status); 1249 1270 } 1250 1271 } 1251 1272 1252 1273 rcu_read_unlock(); 1253 1274 return ret; 1275 1275 + } 1276 1276 + 1277 1277 + 1278 1278 + bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 1279 1279 + struct kvm_lapic_irq *irq, int *r) 1280 1280 + { 1281 1281 + return __kvm_irq_delivery_to_apic_fast(kvm, src, irq, r, NULL); 1254 1282 } 1255 1283 1256 1284 /* ··· 1331 1345 } 1332 1346 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu); 1333 1347 1334 1334 - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 1335 1335 - struct kvm_lapic_irq *irq, struct dest_map *dest_map) 1348 1348 + int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 1349 1349 + struct kvm_lapic_irq *irq, 1350 1350 + struct rtc_status *rtc_status) 1336 1351 { 1337 1352 int r = -1; 1338 1353 struct kvm_vcpu *vcpu, *lowest = NULL; 1339 1354 unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; 1340 1355 unsigned int dest_vcpus = 0; 1341 1356 1342 1342 - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) 1357 1357 + if (__kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, rtc_status)) 1343 1358 return r; 1344 1359 1345 1360 if (irq->dest_mode == APIC_DEST_PHYSICAL && ··· 1362 1375 if (!kvm_lowest_prio_delivery(irq)) { 1363 1376 if (r < 0) 1364 1377 r = 0; 1365 1365 - r += kvm_apic_set_irq(vcpu, irq, dest_map); 1378 1378 + r += kvm_apic_set_irq(vcpu, irq, rtc_status); 1366 1379 } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { 1367 1380 if (!vector_hashing_enabled) { 1368 1381 if (!lowest) ··· 1384 1397 } 1385 1398 1386 1399 if (lowest) 1387 1387 - r = kvm_apic_set_irq(lowest, irq, dest_map); 1400 1400 + r = kvm_apic_set_irq(lowest, irq, rtc_status); 1388 1401 1389 1402 return r; 1390 1403 } ··· 1395 1408 */ 1396 1409 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1397 1410 int vector, int level, int trig_mode, 1398 1398 - struct dest_map *dest_map) 1411 1411 + struct rtc_status *rtc_status) 1399 1412 { 1400 1413 int result = 0; 1401 1414 struct kvm_vcpu *vcpu = apic->vcpu; ··· 1416 1429 1417 1430 result = 1; 1418 1431 1419 1419 - if (dest_map) { 1420 1420 - __set_bit(vcpu->vcpu_id, dest_map->map); 1421 1421 - dest_map->vectors[vcpu->vcpu_id] = vector; 1432 1432 + #ifdef CONFIG_KVM_IOAPIC 1433 1433 + if (rtc_status) { 1434 1434 + __set_bit(vcpu->vcpu_id, rtc_status->map); 1435 1435 + rtc_status->vectors[vcpu->vcpu_id] = vector; 1422 1436 } 1437 1437 + #endif 1423 1438 1424 1439 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1425 1440 if (trig_mode) ··· 1647 1658 1648 1659 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1649 1660 1650 1650 - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1661 1661 + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 1651 1662 } 1652 1663 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi); 1653 1664 ··· 1655 1666 { 1656 1667 ktime_t remaining, now; 1657 1668 s64 ns; 1658 1658 - 1659 1659 - ASSERT(apic != NULL); 1660 1669 1661 1670 /* if initial count is 0, current count should also be 0 */ 1662 1671 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || ··· 2616 2629 kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq); 2617 2630 2618 2631 if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq, 2619 2619 - &ignored, NULL)) 2632 2632 + &ignored)) 2620 2633 return -EWOULDBLOCK; 2621 2634 2622 2635 trace_kvm_apic_ipi((u32)data, irq.dest_id); ··· 2832 2845 */ 2833 2846 apic->irr_pending = true; 2834 2847 2835 2835 - if (apic->apicv_active) 2848 2848 + /* 2849 2849 + * Update SVI when APICv gets enabled, otherwise SVI won't reflect the 2850 2850 + * highest bit in vISR and the next accelerated EOI in the guest won't 2851 2851 + * be virtualized correctly (the CPU uses SVI to determine which vISR 2852 2852 + * vector to clear). 2853 2853 + */ 2854 2854 + if (apic->apicv_active) { 2836 2855 apic->isr_count = 1; 2837 2837 - else 2856 2856 + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); 2857 2857 + } else { 2838 2858 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2859 2859 + } 2839 2860 2840 2861 apic->highest_isr_cache = -1; 2841 2862 } ··· 2971 2976 2972 2977 vcpu->arch.pv_eoi.msr_val = 0; 2973 2978 apic_update_ppr(apic); 2974 2974 - if (apic->apicv_active) { 2979 2979 + if (apic->apicv_active) 2975 2980 kvm_x86_call(apicv_post_state_restore)(vcpu); 2976 2976 - kvm_x86_call(hwapic_isr_update)(vcpu, -1); 2977 2977 - } 2978 2981 2979 2982 vcpu->arch.apic_arb_prio = 0; 2980 2983 vcpu->arch.apic_attention = 0; ··· 3052 3059 int kvm_create_lapic(struct kvm_vcpu *vcpu) 3053 3060 { 3054 3061 struct kvm_lapic *apic; 3055 3055 - 3056 3056 - ASSERT(vcpu != NULL); 3057 3062 3058 3063 if (!irqchip_in_kernel(vcpu->kvm)) { 3059 3064 static_branch_inc(&kvm_has_noapic_vcpu); ··· 3283 3292 __start_apic_timer(apic, APIC_TMCCT); 3284 3293 kvm_lapic_set_reg(apic, APIC_TMCCT, 0); 3285 3294 kvm_apic_update_apicv(vcpu); 3286 3286 - if (apic->apicv_active) { 3295 3295 + if (apic->apicv_active) 3287 3296 kvm_x86_call(apicv_post_state_restore)(vcpu); 3288 3288 - kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); 3289 3289 - } 3290 3297 kvm_make_request(KVM_REQ_EVENT, vcpu); 3291 3298 3292 3299 #ifdef CONFIG_KVM_IOAPIC

+14 -7

arch/x86/kvm/lapic.h

reviewed

··· 88 88 int nr_lvt_entries; 89 89 }; 90 90 91 91 - struct dest_map; 91 91 + struct rtc_status; 92 92 93 93 int kvm_create_lapic(struct kvm_vcpu *vcpu); 94 94 void kvm_free_lapic(struct kvm_vcpu *vcpu); ··· 110 110 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr); 111 111 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); 112 112 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 113 113 - struct dest_map *dest_map); 113 113 + struct rtc_status *rtc_status); 114 114 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); 115 115 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); 116 116 int kvm_alloc_apic_access_page(struct kvm *kvm); 117 117 void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); 118 118 119 119 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 120 120 - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); 121 121 - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 122 122 - struct kvm_lapic_irq *irq, 123 123 - struct dest_map *dest_map); 120 120 + struct kvm_lapic_irq *irq, int *r); 121 121 + int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 122 122 + struct kvm_lapic_irq *irq, 123 123 + struct rtc_status *rtc_status); 124 124 + 125 125 + static inline int kvm_irq_delivery_to_apic(struct kvm *kvm, 126 126 + struct kvm_lapic *src, 127 127 + struct kvm_lapic_irq *irq) 128 128 + { 129 129 + return __kvm_irq_delivery_to_apic(kvm, src, irq, NULL); 130 130 + } 131 131 + 124 132 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); 125 133 126 134 int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); 127 135 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); 128 136 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); 129 129 - void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); 130 137 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 131 138 132 139 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);

+2 -52

arch/x86/kvm/vmx/nested.c

reviewed

··· 2405 2405 exec_control &= ~CPU_BASED_TPR_SHADOW; 2406 2406 exec_control |= vmcs12->cpu_based_vm_exec_control; 2407 2407 2408 2408 - vmx->nested.l1_tpr_threshold = -1; 2409 2408 if (exec_control & CPU_BASED_TPR_SHADOW) 2410 2409 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); 2411 2410 #ifdef CONFIG_X86_64 ··· 3983 3984 } 3984 3985 } 3985 3986 3986 3986 - 3987 3987 - void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) 3988 3988 - { 3989 3989 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 3990 3990 - gfn_t gfn; 3991 3991 - 3992 3992 - /* 3993 3993 - * Don't need to mark the APIC access page dirty; it is never 3994 3994 - * written to by the CPU during APIC virtualization. 3995 3995 - */ 3996 3996 - 3997 3997 - if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 3998 3998 - gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; 3999 3999 - kvm_vcpu_mark_page_dirty(vcpu, gfn); 4000 4000 - } 4001 4001 - 4002 4002 - if (nested_cpu_has_posted_intr(vmcs12)) { 4003 4003 - gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; 4004 4004 - kvm_vcpu_mark_page_dirty(vcpu, gfn); 4005 4005 - } 4006 4006 - } 4007 4007 - 4008 3987 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) 4009 3988 { 4010 3989 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 4017 4040 } 4018 4041 } 4019 4042 4020 4020 - nested_mark_vmcs12_pages_dirty(vcpu); 4043 4043 + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map); 4044 4044 + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map); 4021 4045 return 0; 4022 4046 4023 4047 mmio_needed: ··· 5125 5147 if (kvm_caps.has_tsc_control) 5126 5148 vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); 5127 5149 5128 5128 - if (vmx->nested.l1_tpr_threshold != -1) 5129 5129 - vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); 5130 5130 - 5131 5131 - if (vmx->nested.change_vmcs01_virtual_apic_mode) { 5132 5132 - vmx->nested.change_vmcs01_virtual_apic_mode = false; 5133 5133 - vmx_set_virtual_apic_mode(vcpu); 5134 5134 - } 5135 5135 - 5136 5136 - if (vmx->nested.update_vmcs01_cpu_dirty_logging) { 5137 5137 - vmx->nested.update_vmcs01_cpu_dirty_logging = false; 5138 5138 - vmx_update_cpu_dirty_logging(vcpu); 5139 5139 - } 5140 5140 - 5141 5150 nested_put_vmcs12_pages(vcpu); 5142 5142 - 5143 5143 - if (vmx->nested.reload_vmcs01_apic_access_page) { 5144 5144 - vmx->nested.reload_vmcs01_apic_access_page = false; 5145 5145 - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 5146 5146 - } 5147 5147 - 5148 5148 - if (vmx->nested.update_vmcs01_apicv_status) { 5149 5149 - vmx->nested.update_vmcs01_apicv_status = false; 5150 5150 - vmx_refresh_apicv_exec_ctrl(vcpu); 5151 5151 - } 5152 5152 - 5153 5153 - if (vmx->nested.update_vmcs01_hwapic_isr) { 5154 5154 - vmx->nested.update_vmcs01_hwapic_isr = false; 5155 5155 - kvm_apic_update_hwapic_isr(vcpu); 5156 5156 - } 5157 5151 5158 5152 if ((vm_exit_reason != -1) && 5159 5153 (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))

-1

arch/x86/kvm/vmx/nested.h

reviewed

··· 51 51 int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); 52 52 int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, 53 53 u32 vmx_instruction_info, bool wr, int len, gva_t *ret); 54 54 - void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu); 55 54 bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, 56 55 int size); 57 56

+70 -36

arch/x86/kvm/vmx/vmx.c

reviewed

··· 1594 1594 vmx_prepare_switch_to_host(to_vmx(vcpu)); 1595 1595 } 1596 1596 1597 1597 + static void vmx_switch_loaded_vmcs(struct kvm_vcpu *vcpu, 1598 1598 + struct loaded_vmcs *vmcs) 1599 1599 + { 1600 1600 + struct vcpu_vmx *vmx = to_vmx(vcpu); 1601 1601 + int cpu; 1602 1602 + 1603 1603 + cpu = get_cpu(); 1604 1604 + vmx->loaded_vmcs = vmcs; 1605 1605 + vmx_vcpu_load_vmcs(vcpu, cpu); 1606 1606 + put_cpu(); 1607 1607 + } 1608 1608 + 1609 1609 + static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) 1610 1610 + { 1611 1611 + struct vcpu_vmx *vmx = to_vmx(vcpu); 1612 1612 + 1613 1613 + if (!is_guest_mode(vcpu)) { 1614 1614 + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); 1615 1615 + return; 1616 1616 + } 1617 1617 + 1618 1618 + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->nested.vmcs02); 1619 1619 + vmx_switch_loaded_vmcs(vcpu, &vmx->vmcs01); 1620 1620 + } 1621 1621 + 1622 1622 + static void vmx_put_vmcs01(struct kvm_vcpu *vcpu) 1623 1623 + { 1624 1624 + if (!is_guest_mode(vcpu)) 1625 1625 + return; 1626 1626 + 1627 1627 + vmx_switch_loaded_vmcs(vcpu, &to_vmx(vcpu)->nested.vmcs02); 1628 1628 + } 1629 1629 + DEFINE_GUARD(vmx_vmcs01, struct kvm_vcpu *, 1630 1630 + vmx_load_vmcs01(_T), vmx_put_vmcs01(_T)) 1631 1631 + 1597 1632 bool vmx_emulation_required(struct kvm_vcpu *vcpu) 1598 1633 { 1599 1634 return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu); ··· 4593 4558 { 4594 4559 struct vcpu_vmx *vmx = to_vmx(vcpu); 4595 4560 4596 4596 - if (is_guest_mode(vcpu)) { 4597 4597 - vmx->nested.update_vmcs01_apicv_status = true; 4598 4598 - return; 4599 4599 - } 4561 4561 + guard(vmx_vmcs01)(vcpu); 4600 4562 4601 4563 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 4602 4564 ··· 6455 6423 vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); 6456 6424 } 6457 6425 6426 6426 + static void nested_vmx_mark_all_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) 6427 6427 + { 6428 6428 + struct vcpu_vmx *vmx = to_vmx(vcpu); 6429 6429 + 6430 6430 + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.apic_access_page_map); 6431 6431 + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map); 6432 6432 + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map); 6433 6433 + } 6434 6434 + 6458 6435 static void vmx_dump_sel(char *name, uint32_t sel) 6459 6436 { 6460 6437 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", ··· 6741 6700 * Mark them dirty on every exit from L2 to prevent them from 6742 6701 * getting out of sync with dirty tracking. 6743 6702 */ 6744 6744 - nested_mark_vmcs12_pages_dirty(vcpu); 6703 6703 + nested_vmx_mark_all_vmcs12_pages_dirty(vcpu); 6745 6704 6746 6705 /* 6747 6706 * Synthesize a triple fault if L2 state is invalid. In normal ··· 6878 6837 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) 6879 6838 return; 6880 6839 6840 6840 + guard(vmx_vmcs01)(vcpu); 6841 6841 + 6881 6842 tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; 6882 6882 - if (is_guest_mode(vcpu)) 6883 6883 - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; 6884 6884 - else 6885 6885 - vmcs_write32(TPR_THRESHOLD, tpr_threshold); 6843 6843 + vmcs_write32(TPR_THRESHOLD, tpr_threshold); 6886 6844 } 6887 6845 6888 6846 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) ··· 6896 6856 !cpu_has_vmx_virtualize_x2apic_mode()) 6897 6857 return; 6898 6858 6899 6899 - /* Postpone execution until vmcs01 is the current VMCS. */ 6900 6900 - if (is_guest_mode(vcpu)) { 6901 6901 - vmx->nested.change_vmcs01_virtual_apic_mode = true; 6902 6902 - return; 6903 6903 - } 6859 6859 + guard(vmx_vmcs01)(vcpu); 6904 6860 6905 6861 sec_exec_control = secondary_exec_controls_get(vmx); 6906 6862 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ··· 6919 6883 * only do so if its physical address has changed, but 6920 6884 * the guest may have inserted a non-APIC mapping into 6921 6885 * the TLB while the APIC access page was disabled. 6886 6886 + * 6887 6887 + * If L2 is active, immediately flush L1's TLB instead 6888 6888 + * of requesting a flush of the current TLB, because 6889 6889 + * the current TLB context is L2's. 6922 6890 */ 6923 6923 - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 6891 6891 + if (!is_guest_mode(vcpu)) 6892 6892 + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 6893 6893 + else if (!enable_ept) 6894 6894 + vpid_sync_context(vmx->vpid); 6895 6895 + else if (VALID_PAGE(vcpu->arch.root_mmu.root.hpa)) 6896 6896 + vmx_flush_tlb_ept_root(vcpu->arch.root_mmu.root.hpa); 6924 6897 } 6925 6898 break; 6926 6899 case LAPIC_MODE_X2APIC: ··· 6954 6909 kvm_pfn_t pfn; 6955 6910 bool writable; 6956 6911 6957 6957 - /* Defer reload until vmcs01 is the current VMCS. */ 6958 6958 - if (is_guest_mode(vcpu)) { 6959 6959 - to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; 6960 6960 - return; 6961 6961 - } 6912 6912 + /* Note, the VIRTUALIZE_APIC_ACCESSES check needs to query vmcs01. */ 6913 6913 + guard(vmx_vmcs01)(vcpu); 6962 6914 6963 6915 if (!(secondary_exec_controls_get(to_vmx(vcpu)) & 6964 6916 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) ··· 7016 6974 u16 status; 7017 6975 u8 old; 7018 6976 7019 7019 - /* 7020 7020 - * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI 7021 7021 - * is only relevant for if and only if Virtual Interrupt Delivery is 7022 7022 - * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's 7023 7023 - * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested 7024 7024 - * VM-Exit, otherwise L1 with run with a stale SVI. 7025 7025 - */ 7026 7026 - if (is_guest_mode(vcpu)) { 7027 7027 - to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; 7028 7028 - return; 7029 7029 - } 7030 7030 - 7031 6977 if (max_isr == -1) 7032 6978 max_isr = 0; 6979 6979 + 6980 6980 + /* 6981 6981 + * Always update SVI in vmcs01, as SVI is only relevant for L2 if and 6982 6982 + * only if Virtual Interrupt Delivery is enabled in vmcs12, and if VID 6983 6983 + * is enabled then L2 EOIs affect L2's vAPIC, not L1's vAPIC. 6984 6984 + */ 6985 6985 + guard(vmx_vmcs01)(vcpu); 7033 6986 7034 6987 status = vmcs_read16(GUEST_INTR_STATUS); 7035 6988 old = status >> 8; ··· 8352 8315 if (WARN_ON_ONCE(!enable_pml)) 8353 8316 return; 8354 8317 8355 8355 - if (is_guest_mode(vcpu)) { 8356 8356 - vmx->nested.update_vmcs01_cpu_dirty_logging = true; 8357 8357 - return; 8358 8358 - } 8318 8318 + guard(vmx_vmcs01)(vcpu); 8359 8319 8360 8320 /* 8361 8321 * Note, nr_memslots_dirty_logging can be changed concurrent with this

-9

arch/x86/kvm/vmx/vmx.h

reviewed

··· 131 131 */ 132 132 bool vmcs02_initialized; 133 133 134 134 - bool change_vmcs01_virtual_apic_mode; 135 135 - bool reload_vmcs01_apic_access_page; 136 136 - bool update_vmcs01_cpu_dirty_logging; 137 137 - bool update_vmcs01_apicv_status; 138 138 - bool update_vmcs01_hwapic_isr; 139 139 - 140 134 /* 141 135 * Enlightened VMCS has been enabled. It does not mean that L1 has to 142 136 * use it. However, VMX features available to L1 will be limited based ··· 178 184 u64 pre_vmenter_s_cet; 179 185 u64 pre_vmenter_ssp; 180 186 u64 pre_vmenter_ssp_tbl; 181 181 - 182 182 - /* to migrate it to L1 if L2 writes to L1's CR8 directly */ 183 183 - int l1_tpr_threshold; 184 187 185 188 u16 vpid02; 186 189 u16 last_vpid;

+2 -9

arch/x86/kvm/x86.c

reviewed

··· 6731 6731 case KVM_CAP_SPLIT_IRQCHIP: { 6732 6732 mutex_lock(&kvm->lock); 6733 6733 r = -EINVAL; 6734 6734 - if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) 6734 6734 + if (cap->args[0] > KVM_MAX_IRQ_ROUTES) 6735 6735 goto split_irqchip_unlock; 6736 6736 r = -EEXIST; 6737 6737 if (irqchip_in_kernel(kvm)) ··· 10276 10276 .dest_id = apicid, 10277 10277 }; 10278 10278 10279 10279 - kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); 10279 10279 + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq); 10280 10280 } 10281 10281 10282 10282 bool kvm_apicv_activated(struct kvm *kvm) ··· 10917 10917 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was 10918 10918 * still active when the interrupt got accepted. Make sure 10919 10919 * kvm_check_and_inject_events() is called to check for that. 10920 10920 - * 10921 10921 - * Update SVI when APICv gets enabled, otherwise SVI won't reflect the 10922 10922 - * highest bit in vISR and the next accelerated EOI in the guest won't 10923 10923 - * be virtualized correctly (the CPU uses SVI to determine which vISR 10924 10924 - * vector to clear). 10925 10920 */ 10926 10921 if (!apic->apicv_active) 10927 10922 kvm_make_request(KVM_REQ_EVENT, vcpu); 10928 10928 - else 10929 10929 - kvm_apic_update_hwapic_isr(vcpu); 10930 10923 10931 10924 out: 10932 10925 preempt_enable();

+1 -1

arch/x86/kvm/xen.c

reviewed

··· 626 626 irq.delivery_mode = APIC_DM_FIXED; 627 627 irq.level = 1; 628 628 629 629 - kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); 629 629 + kvm_irq_delivery_to_apic(v->kvm, NULL, &irq); 630 630 } 631 631 632 632 /*

+8 -1

include/linux/kvm_host.h

reviewed

··· 1381 1381 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); 1382 1382 void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); 1383 1383 void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 1384 1384 + void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); 1384 1385 1385 1386 int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, 1386 1387 bool writable); ··· 1399 1398 return __kvm_vcpu_map(vcpu, gpa, map, false); 1400 1399 } 1401 1400 1401 1401 + static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu, 1402 1402 + struct kvm_host_map *map) 1403 1403 + { 1404 1404 + if (kvm_vcpu_mapped(map)) 1405 1405 + kvm_vcpu_mark_page_dirty(vcpu, map->gfn); 1406 1406 + } 1407 1407 + 1402 1408 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); 1403 1409 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); 1404 1410 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, ··· 1418 1410 int offset, int len); 1419 1411 int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, 1420 1412 unsigned long len); 1421 1421 - void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); 1422 1413 1423 1414 /** 1424 1415 * kvm_gpc_init - initialize gfn_to_pfn_cache.

tools/testing/selftests/kvm/Makefile.kvm

reviewed

··· 118 118 TEST_GEN_PROGS_x86 += x86/userspace_io_test 119 119 TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test 120 120 TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test 121 121 + TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test 121 122 TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state 122 123 TEST_GEN_PROGS_x86 += x86/vmx_msrs_test 123 124 TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state

tools/testing/selftests/kvm/include/x86/apic.h

reviewed

··· 34 34 #define APIC_SPIV 0xF0 35 35 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) 36 36 #define APIC_SPIV_APIC_ENABLED (1 << 8) 37 37 + #define APIC_ISR 0x100 37 38 #define APIC_IRR 0x200 38 39 #define APIC_ICR 0x300 39 40 #define APIC_LVTCMCI 0x2f0 ··· 71 70 #define APIC_TMCCT 0x390 72 71 #define APIC_TDCR 0x3E0 73 72 #define APIC_SELF_IPI 0x3F0 73 73 + 74 74 + #define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32) 75 75 + #define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10) 74 76 75 77 void apic_disable(void); 76 78 void xapic_enable(void);

+155

tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c

reviewed

··· 1 1 + // SPDX-License-Identifier: GPL-2.0-only 2 2 + #include "test_util.h" 3 3 + #include "kvm_util.h" 4 4 + #include "processor.h" 5 5 + #include "vmx.h" 6 6 + 7 7 + #define GOOD_IPI_VECTOR 0xe0 8 8 + #define BAD_IPI_VECTOR 0xf0 9 9 + 10 10 + static volatile int good_ipis_received; 11 11 + 12 12 + static void good_ipi_handler(struct ex_regs *regs) 13 13 + { 14 14 + good_ipis_received++; 15 15 + } 16 16 + 17 17 + static void bad_ipi_handler(struct ex_regs *regs) 18 18 + { 19 19 + GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored"); 20 20 + } 21 21 + 22 22 + static void l2_guest_code(void) 23 23 + { 24 24 + x2apic_enable(); 25 25 + vmcall(); 26 26 + 27 27 + xapic_enable(); 28 28 + xapic_write_reg(APIC_ID, 1 << 24); 29 29 + vmcall(); 30 30 + } 31 31 + 32 32 + static void l1_guest_code(struct vmx_pages *vmx_pages) 33 33 + { 34 34 + #define L2_GUEST_STACK_SIZE 64 35 35 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 36 36 + uint32_t control; 37 37 + 38 38 + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 39 39 + GUEST_ASSERT(load_vmcs(vmx_pages)); 40 40 + 41 41 + /* Prepare the VMCS for L2 execution. */ 42 42 + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); 43 43 + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); 44 44 + control |= CPU_BASED_USE_MSR_BITMAPS; 45 45 + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); 46 46 + 47 47 + /* Modify APIC ID to coerce KVM into inhibiting APICv. */ 48 48 + xapic_enable(); 49 49 + xapic_write_reg(APIC_ID, 1 << 24); 50 50 + 51 51 + /* 52 52 + * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR 53 53 + * but not SVI. APICv should be inhibited due to running with a 54 54 + * modified APIC ID. 55 55 + */ 56 56 + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); 57 57 + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24); 58 58 + 59 59 + /* Enable IRQs and verify the IRQ was received. */ 60 60 + sti_nop(); 61 61 + GUEST_ASSERT_EQ(good_ipis_received, 1); 62 62 + 63 63 + /* 64 64 + * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv, 65 65 + * as KVM should force the APIC ID back to its default. 66 66 + */ 67 67 + GUEST_ASSERT(!vmlaunch()); 68 68 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 69 69 + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); 70 70 + GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD); 71 71 + 72 72 + /* 73 73 + * Scribble the APIC access page to verify KVM disabled xAPIC 74 74 + * virtualization in vmcs01, and to verify that KVM flushes L1's TLB 75 75 + * when L2 switches back to accelerated xAPIC mode. 76 76 + */ 77 77 + xapic_write_reg(APIC_ICR2, 0xdeadbeefu); 78 78 + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR); 79 79 + 80 80 + /* 81 81 + * Verify the IRQ is still in-service and emit an EOI to verify KVM 82 82 + * propagates the highest vISR vector to SVI when APICv is activated 83 83 + * (and does so even if APICv was uninhibited while L2 was active). 84 84 + */ 85 85 + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 86 86 + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); 87 87 + x2apic_write_reg(APIC_EOI, 0); 88 88 + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); 89 89 + 90 90 + /* 91 91 + * Run L2 one more time to switch back to xAPIC mode to verify that KVM 92 92 + * handles the x2APIC => xAPIC transition and inhibits APICv while L2 93 93 + * is active. 94 94 + */ 95 95 + GUEST_ASSERT(!vmresume()); 96 96 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 97 97 + GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD)); 98 98 + 99 99 + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); 100 100 + /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */ 101 101 + sti_nop(); 102 102 + GUEST_ASSERT_EQ(good_ipis_received, 2); 103 103 + 104 104 + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 105 105 + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); 106 106 + xapic_write_reg(APIC_EOI, 0); 107 107 + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); 108 108 + GUEST_DONE(); 109 109 + } 110 110 + 111 111 + int main(int argc, char *argv[]) 112 112 + { 113 113 + vm_vaddr_t vmx_pages_gva; 114 114 + struct vmx_pages *vmx; 115 115 + struct kvm_vcpu *vcpu; 116 116 + struct kvm_vm *vm; 117 117 + struct ucall uc; 118 118 + 119 119 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 120 120 + 121 121 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 122 122 + 123 123 + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); 124 124 + prepare_virtualize_apic_accesses(vmx, vm); 125 125 + vcpu_args_set(vcpu, 1, vmx_pages_gva); 126 126 + 127 127 + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 128 128 + vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler); 129 129 + vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler); 130 130 + 131 131 + vcpu_run(vcpu); 132 132 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 133 133 + 134 134 + switch (get_ucall(vcpu, &uc)) { 135 135 + case UCALL_ABORT: 136 136 + REPORT_GUEST_ASSERT(uc); 137 137 + /* NOT REACHED */ 138 138 + case UCALL_DONE: 139 139 + break; 140 140 + default: 141 141 + TEST_FAIL("Unexpected ucall %lu", uc.cmd); 142 142 + } 143 143 + 144 144 + /* 145 145 + * Verify at least two IRQs were injected. Unfortunately, KVM counts 146 146 + * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation), 147 147 + * so being more precise isn't possible given the current stats. 148 148 + */ 149 149 + TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2, 150 150 + "Wanted at least 2 IRQ injections, got %lu\n", 151 151 + vcpu_get_stat(vcpu, irq_injections)); 152 152 + 153 153 + kvm_vm_free(vm); 154 154 + return 0; 155 155 + }

+1 -1

virt/kvm/kvm_main.c

reviewed

··· 3134 3134 bool writable) 3135 3135 { 3136 3136 struct kvm_follow_pfn kfp = { 3137 3137 - .slot = gfn_to_memslot(vcpu->kvm, gfn), 3137 3137 + .slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn), 3138 3138 .gfn = gfn, 3139 3139 .flags = writable ? FOLL_WRITE : 0, 3140 3140 .refcounted_page = &map->pinned_page,