Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvm-x86-selftests-6.20' of https://github.com/kvm-x86/linux into HEAD

KVM selftests changes for 6.20

- Add a regression test for TPR<=>CR8 synchronization and IRQ masking.

- Overhaul selftest's MMU infrastructure to genericize stage-2 MMU support,
and extend x86's infrastructure to support EPT and NPT (for L2 guests).

- Extend several nested VMX tests to also cover nested SVM.

- Add a selftest for nested VMLOAD/VMSAVE.

- Rework the nested dirty log test, originally added as a regression test for
PML where KVM logged L2 GPAs instead of L1 GPAs, to improve test coverage
and to hopefully make the test easier to understand and maintain.

+1242 -570
+3 -1
tools/testing/selftests/kvm/Makefile.kvm
··· 89 89 TEST_GEN_PROGS_x86 += x86/monitor_mwait_test 90 90 TEST_GEN_PROGS_x86 += x86/msrs_test 91 91 TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test 92 + TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test 92 93 TEST_GEN_PROGS_x86 += x86/nested_emulation_test 93 94 TEST_GEN_PROGS_x86 += x86/nested_exceptions_test 94 95 TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test 95 96 TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test 96 97 TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test 98 + TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test 97 99 TEST_GEN_PROGS_x86 += x86/platform_info_test 98 100 TEST_GEN_PROGS_x86 += x86/pmu_counters_test 99 101 TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test ··· 117 115 TEST_GEN_PROGS_x86 += x86/userspace_io_test 118 116 TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test 119 117 TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test 120 - TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test 121 118 TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state 122 119 TEST_GEN_PROGS_x86 += x86/vmx_msrs_test 123 120 TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state ··· 125 124 TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test 126 125 TEST_GEN_PROGS_x86 += x86/xapic_ipi_test 127 126 TEST_GEN_PROGS_x86 += x86/xapic_state_test 127 + TEST_GEN_PROGS_x86 += x86/xapic_tpr_test 128 128 TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test 129 129 TEST_GEN_PROGS_x86 += x86/xss_msr_test 130 130 TEST_GEN_PROGS_x86 += x86/debug_regs
+2
tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
··· 2 2 #ifndef SELFTEST_KVM_UTIL_ARCH_H 3 3 #define SELFTEST_KVM_UTIL_ARCH_H 4 4 5 + struct kvm_mmu_arch {}; 6 + 5 7 struct kvm_vm_arch { 6 8 bool has_gic; 7 9 int gic_fd;
+15 -3
tools/testing/selftests/kvm/include/kvm_util.h
··· 88 88 NR_MEM_REGIONS, 89 89 }; 90 90 91 + struct kvm_mmu { 92 + bool pgd_created; 93 + uint64_t pgd; 94 + int pgtable_levels; 95 + 96 + struct kvm_mmu_arch arch; 97 + }; 98 + 91 99 struct kvm_vm { 92 100 int mode; 93 101 unsigned long type; 94 102 int kvm_fd; 95 103 int fd; 96 - unsigned int pgtable_levels; 97 104 unsigned int page_size; 98 105 unsigned int page_shift; 99 106 unsigned int pa_bits; ··· 111 104 struct sparsebit *vpages_valid; 112 105 struct sparsebit *vpages_mapped; 113 106 bool has_irqchip; 114 - bool pgd_created; 115 107 vm_paddr_t ucall_mmio_addr; 116 - vm_paddr_t pgd; 117 108 vm_vaddr_t handlers; 118 109 uint32_t dirty_ring_size; 119 110 uint64_t gpa_tag_mask; 111 + 112 + /* 113 + * "mmu" is the guest's stage-1, with a short name because the vast 114 + * majority of tests only care about the stage-1 MMU. 115 + */ 116 + struct kvm_mmu mmu; 117 + struct kvm_mmu stage2_mmu; 120 118 121 119 struct kvm_vm_arch arch; 122 120
+1
tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
··· 2 2 #ifndef SELFTEST_KVM_UTIL_ARCH_H 3 3 #define SELFTEST_KVM_UTIL_ARCH_H 4 4 5 + struct kvm_mmu_arch {}; 5 6 struct kvm_vm_arch {}; 6 7 7 8 #endif // SELFTEST_KVM_UTIL_ARCH_H
+1
tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
··· 2 2 #ifndef SELFTEST_KVM_UTIL_ARCH_H 3 3 #define SELFTEST_KVM_UTIL_ARCH_H 4 4 5 + struct kvm_mmu_arch {}; 5 6 struct kvm_vm_arch {}; 6 7 7 8 #endif // SELFTEST_KVM_UTIL_ARCH_H
+1
tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
··· 2 2 #ifndef SELFTEST_KVM_UTIL_ARCH_H 3 3 #define SELFTEST_KVM_UTIL_ARCH_H 4 4 5 + struct kvm_mmu_arch {}; 5 6 struct kvm_vm_arch {}; 6 7 7 8 #endif // SELFTEST_KVM_UTIL_ARCH_H
+3
tools/testing/selftests/kvm/include/x86/apic.h
··· 28 28 #define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) 29 29 #define APIC_TASKPRI 0x80 30 30 #define APIC_PROCPRI 0xA0 31 + #define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4) 32 + #define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4)) 31 33 #define APIC_EOI 0xB0 32 34 #define APIC_SPIV 0xF0 33 35 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) ··· 69 67 #define APIC_TMICT 0x380 70 68 #define APIC_TMCCT 0x390 71 69 #define APIC_TDCR 0x3E0 70 + #define APIC_SELF_IPI 0x3F0 72 71 73 72 void apic_disable(void); 74 73 void xapic_enable(void);
+22
tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
··· 10 10 11 11 extern bool is_forced_emulation_enabled; 12 12 13 + struct pte_masks { 14 + uint64_t present; 15 + uint64_t writable; 16 + uint64_t user; 17 + uint64_t readable; 18 + uint64_t executable; 19 + uint64_t accessed; 20 + uint64_t dirty; 21 + uint64_t huge; 22 + uint64_t nx; 23 + uint64_t c; 24 + uint64_t s; 25 + 26 + uint64_t always_set; 27 + }; 28 + 29 + struct kvm_mmu_arch { 30 + struct pte_masks pte_masks; 31 + }; 32 + 33 + struct kvm_mmu; 34 + 13 35 struct kvm_vm_arch { 14 36 vm_vaddr_t gdt; 15 37 vm_vaddr_t tss;
+49 -16
tools/testing/selftests/kvm/include/x86/processor.h
··· 201 201 #define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) 202 202 #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) 203 203 #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) 204 + #define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15) 204 205 #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) 205 206 #define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) 206 207 #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) ··· 363 362 return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); 364 363 } 365 364 366 - /* Page table bitfield declarations */ 367 - #define PTE_PRESENT_MASK BIT_ULL(0) 368 - #define PTE_WRITABLE_MASK BIT_ULL(1) 369 - #define PTE_USER_MASK BIT_ULL(2) 370 - #define PTE_ACCESSED_MASK BIT_ULL(5) 371 - #define PTE_DIRTY_MASK BIT_ULL(6) 372 - #define PTE_LARGE_MASK BIT_ULL(7) 373 - #define PTE_GLOBAL_MASK BIT_ULL(8) 374 - #define PTE_NX_MASK BIT_ULL(63) 375 - 376 365 #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) 377 366 378 367 #define PAGE_SHIFT 12 ··· 427 436 428 437 static inline uint64_t get_desc64_base(const struct desc64 *desc) 429 438 { 430 - return ((uint64_t)desc->base3 << 32) | 431 - (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); 439 + return (uint64_t)desc->base3 << 32 | 440 + (uint64_t)desc->base2 << 24 | 441 + (uint64_t)desc->base1 << 16 | 442 + (uint64_t)desc->base0; 432 443 } 433 444 434 445 static inline uint64_t rdtsc(void) ··· 1360 1367 return get_kvm_param_bool("ignore_msrs"); 1361 1368 } 1362 1369 1363 - uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, 1364 - int *level); 1365 - uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); 1370 + uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr); 1366 1371 1367 1372 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, 1368 1373 uint64_t a3); ··· 1442 1451 #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) 1443 1452 #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) 1444 1453 1445 - void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); 1454 + #define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present) 1455 + #define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable) 1456 + #define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user) 1457 + #define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable) 1458 + #define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable) 1459 + #define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed) 1460 + #define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty) 1461 + #define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge) 1462 + #define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx) 1463 + #define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c) 1464 + #define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s) 1465 + #define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set) 1466 + 1467 + /* 1468 + * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present 1469 + * if it's executable or readable, as EPT supports execute-only PTEs, but not 1470 + * write-only PTEs. 1471 + */ 1472 + #define is_present_pte(mmu, pte) \ 1473 + (PTE_PRESENT_MASK(mmu) ? \ 1474 + !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \ 1475 + !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu)))) 1476 + #define is_executable_pte(mmu, pte) \ 1477 + ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu)) 1478 + #define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu))) 1479 + #define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu))) 1480 + #define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu))) 1481 + #define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu))) 1482 + #define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu))) 1483 + #define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte)) 1484 + 1485 + void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, 1486 + struct pte_masks *pte_masks); 1487 + 1488 + void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, 1489 + uint64_t paddr, int level); 1446 1490 void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1447 1491 uint64_t nr_bytes, int level); 1492 + 1493 + void vm_enable_tdp(struct kvm_vm *vm); 1494 + bool kvm_cpu_has_tdp(void); 1495 + void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size); 1496 + void tdp_identity_map_default_memslots(struct kvm_vm *vm); 1497 + void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size); 1498 + uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa); 1448 1499 1449 1500 /* 1450 1501 * Basic CPU control in CR0
+9
tools/testing/selftests/kvm/include/x86/svm_util.h
··· 27 27 void *msr; /* gva */ 28 28 void *msr_hva; 29 29 uint64_t msr_gpa; 30 + 31 + /* NPT */ 32 + uint64_t ncr3_gpa; 30 33 }; 31 34 32 35 static inline void vmmcall(void) ··· 59 56 struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); 60 57 void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); 61 58 void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); 59 + 60 + static inline bool kvm_cpu_has_npt(void) 61 + { 62 + return kvm_cpu_has(X86_FEATURE_NPT); 63 + } 64 + void vm_enable_npt(struct kvm_vm *vm); 62 65 63 66 int open_sev_dev_path_or_exit(void); 64 67
+3 -13
tools/testing/selftests/kvm/include/x86/vmx.h
··· 520 520 uint64_t vmwrite_gpa; 521 521 void *vmwrite; 522 522 523 - void *eptp_hva; 524 - uint64_t eptp_gpa; 525 - void *eptp; 526 - 527 523 void *apic_access_hva; 528 524 uint64_t apic_access_gpa; 529 525 void *apic_access; 526 + 527 + uint64_t eptp_gpa; 530 528 }; 531 529 532 530 union vmx_basic { ··· 557 559 558 560 bool ept_1g_pages_supported(void); 559 561 560 - void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 561 - uint64_t nested_paddr, uint64_t paddr); 562 - void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 563 - uint64_t nested_paddr, uint64_t paddr, uint64_t size); 564 - void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, 565 - uint32_t memslot); 566 - void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, 567 - uint64_t addr, uint64_t size); 568 562 bool kvm_cpu_has_ept(void); 569 - void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); 563 + void vm_enable_ept(struct kvm_vm *vm); 570 564 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); 571 565 572 566 #endif /* SELFTEST_KVM_VMX_H */
+19 -19
tools/testing/selftests/kvm/lib/arm64/processor.c
··· 23 23 24 24 static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) 25 25 { 26 - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; 26 + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; 27 27 uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; 28 28 29 29 return (gva >> shift) & mask; ··· 34 34 unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; 35 35 uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; 36 36 37 - TEST_ASSERT(vm->pgtable_levels == 4, 37 + TEST_ASSERT(vm->mmu.pgtable_levels == 4, 38 38 "Mode %d does not have 4 page table levels", vm->mode); 39 39 40 40 return (gva >> shift) & mask; ··· 45 45 unsigned int shift = (vm->page_shift - 3) + vm->page_shift; 46 46 uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; 47 47 48 - TEST_ASSERT(vm->pgtable_levels >= 3, 48 + TEST_ASSERT(vm->mmu.pgtable_levels >= 3, 49 49 "Mode %d does not have >= 3 page table levels", vm->mode); 50 50 51 51 return (gva >> shift) & mask; ··· 99 99 100 100 static uint64_t ptrs_per_pgd(struct kvm_vm *vm) 101 101 { 102 - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; 102 + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; 103 103 return 1 << (vm->va_bits - shift); 104 104 } 105 105 ··· 112 112 { 113 113 size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; 114 114 115 - if (vm->pgd_created) 115 + if (vm->mmu.pgd_created) 116 116 return; 117 117 118 - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, 119 - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 120 - vm->memslots[MEM_REGION_PT]); 121 - vm->pgd_created = true; 118 + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, 119 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 120 + vm->memslots[MEM_REGION_PT]); 121 + vm->mmu.pgd_created = true; 122 122 } 123 123 124 124 static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ··· 142 142 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 143 143 paddr, vm->max_gfn, vm->page_size); 144 144 145 - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; 145 + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8; 146 146 if (!*ptep) 147 147 *ptep = addr_pte(vm, vm_alloc_page_table(vm), 148 148 PGD_TYPE_TABLE | PTE_VALID); 149 149 150 - switch (vm->pgtable_levels) { 150 + switch (vm->mmu.pgtable_levels) { 151 151 case 4: 152 152 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; 153 153 if (!*ptep) ··· 185 185 { 186 186 uint64_t *ptep; 187 187 188 - if (!vm->pgd_created) 188 + if (!vm->mmu.pgd_created) 189 189 goto unmapped_gva; 190 190 191 - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; 191 + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; 192 192 if (!ptep) 193 193 goto unmapped_gva; 194 194 if (level == 0) 195 195 return ptep; 196 196 197 - switch (vm->pgtable_levels) { 197 + switch (vm->mmu.pgtable_levels) { 198 198 case 4: 199 199 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; 200 200 if (!ptep) ··· 258 258 259 259 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 260 260 { 261 - int level = 4 - (vm->pgtable_levels - 1); 261 + int level = 4 - (vm->mmu.pgtable_levels - 1); 262 262 uint64_t pgd, *ptep; 263 263 264 - if (!vm->pgd_created) 264 + if (!vm->mmu.pgd_created) 265 265 return; 266 266 267 - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { 267 + for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { 268 268 ptep = addr_gpa2hva(vm, pgd); 269 269 if (!*ptep) 270 270 continue; ··· 345 345 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 346 346 } 347 347 348 - ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); 348 + ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift); 349 349 350 350 /* Configure output size */ 351 351 switch (vm->mode) { ··· 353 353 case VM_MODE_P52V48_16K: 354 354 case VM_MODE_P52V48_64K: 355 355 tcr_el1 |= TCR_IPS_52_BITS; 356 - ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; 356 + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2; 357 357 break; 358 358 case VM_MODE_P48V48_4K: 359 359 case VM_MODE_P48V48_16K:
+14 -14
tools/testing/selftests/kvm/lib/kvm_util.c
··· 281 281 /* Setup mode specific traits. */ 282 282 switch (vm->mode) { 283 283 case VM_MODE_P52V48_4K: 284 - vm->pgtable_levels = 4; 284 + vm->mmu.pgtable_levels = 4; 285 285 break; 286 286 case VM_MODE_P52V48_64K: 287 - vm->pgtable_levels = 3; 287 + vm->mmu.pgtable_levels = 3; 288 288 break; 289 289 case VM_MODE_P48V48_4K: 290 - vm->pgtable_levels = 4; 290 + vm->mmu.pgtable_levels = 4; 291 291 break; 292 292 case VM_MODE_P48V48_64K: 293 - vm->pgtable_levels = 3; 293 + vm->mmu.pgtable_levels = 3; 294 294 break; 295 295 case VM_MODE_P40V48_4K: 296 296 case VM_MODE_P36V48_4K: 297 - vm->pgtable_levels = 4; 297 + vm->mmu.pgtable_levels = 4; 298 298 break; 299 299 case VM_MODE_P40V48_64K: 300 300 case VM_MODE_P36V48_64K: 301 - vm->pgtable_levels = 3; 301 + vm->mmu.pgtable_levels = 3; 302 302 break; 303 303 case VM_MODE_P52V48_16K: 304 304 case VM_MODE_P48V48_16K: 305 305 case VM_MODE_P40V48_16K: 306 306 case VM_MODE_P36V48_16K: 307 - vm->pgtable_levels = 4; 307 + vm->mmu.pgtable_levels = 4; 308 308 break; 309 309 case VM_MODE_P47V47_16K: 310 310 case VM_MODE_P36V47_16K: 311 - vm->pgtable_levels = 3; 311 + vm->mmu.pgtable_levels = 3; 312 312 break; 313 313 case VM_MODE_PXXVYY_4K: 314 314 #ifdef __x86_64__ ··· 321 321 vm->va_bits); 322 322 323 323 if (vm->va_bits == 57) { 324 - vm->pgtable_levels = 5; 324 + vm->mmu.pgtable_levels = 5; 325 325 } else { 326 326 TEST_ASSERT(vm->va_bits == 48, 327 327 "Unexpected guest virtual address width: %d", 328 328 vm->va_bits); 329 - vm->pgtable_levels = 4; 329 + vm->mmu.pgtable_levels = 4; 330 330 } 331 331 #else 332 332 TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); 333 333 #endif 334 334 break; 335 335 case VM_MODE_P47V64_4K: 336 - vm->pgtable_levels = 5; 336 + vm->mmu.pgtable_levels = 5; 337 337 break; 338 338 case VM_MODE_P44V64_4K: 339 - vm->pgtable_levels = 5; 339 + vm->mmu.pgtable_levels = 5; 340 340 break; 341 341 default: 342 342 TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); ··· 1956 1956 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1957 1957 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1958 1958 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1959 - vm->pgd_created); 1960 - if (vm->pgd_created) { 1959 + vm->mmu.pgd_created); 1960 + if (vm->mmu.pgd_created) { 1961 1961 fprintf(stream, "%*sVirtual Translation Tables:\n", 1962 1962 indent + 2, ""); 1963 1963 virt_dump(stream, vm, indent + 4);
+14 -14
tools/testing/selftests/kvm/lib/loongarch/processor.c
··· 50 50 int i; 51 51 vm_paddr_t child, table; 52 52 53 - if (vm->pgd_created) 53 + if (vm->mmu.pgd_created) 54 54 return; 55 55 56 56 child = table = 0; 57 - for (i = 0; i < vm->pgtable_levels; i++) { 57 + for (i = 0; i < vm->mmu.pgtable_levels; i++) { 58 58 invalid_pgtable[i] = child; 59 59 table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, 60 60 vm->memslots[MEM_REGION_PT]); ··· 62 62 virt_set_pgtable(vm, table, child); 63 63 child = table; 64 64 } 65 - vm->pgd = table; 66 - vm->pgd_created = true; 65 + vm->mmu.pgd = table; 66 + vm->mmu.pgd_created = true; 67 67 } 68 68 69 69 static int virt_pte_none(uint64_t *ptep, int level) ··· 77 77 uint64_t *ptep; 78 78 vm_paddr_t child; 79 79 80 - if (!vm->pgd_created) 80 + if (!vm->mmu.pgd_created) 81 81 goto unmapped_gva; 82 82 83 - child = vm->pgd; 84 - level = vm->pgtable_levels - 1; 83 + child = vm->mmu.pgd; 84 + level = vm->mmu.pgtable_levels - 1; 85 85 while (level > 0) { 86 86 ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; 87 87 if (virt_pte_none(ptep, level)) { ··· 161 161 { 162 162 int level; 163 163 164 - if (!vm->pgd_created) 164 + if (!vm->mmu.pgd_created) 165 165 return; 166 166 167 - level = vm->pgtable_levels - 1; 168 - pte_dump(stream, vm, indent, vm->pgd, level); 167 + level = vm->mmu.pgtable_levels - 1; 168 + pte_dump(stream, vm, indent, vm->mmu.pgd, level); 169 169 } 170 170 171 171 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) ··· 297 297 298 298 width = vm->page_shift - 3; 299 299 300 - switch (vm->pgtable_levels) { 300 + switch (vm->mmu.pgtable_levels) { 301 301 case 4: 302 302 /* pud page shift and width */ 303 303 val = (vm->page_shift + width * 2) << 20 | (width << 25); ··· 309 309 val |= vm->page_shift | width << 5; 310 310 break; 311 311 default: 312 - TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); 312 + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels); 313 313 } 314 314 315 315 loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); 316 316 317 317 /* PGD page shift and width */ 318 - val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; 318 + val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6; 319 319 loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); 320 - loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); 320 + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd); 321 321 322 322 /* 323 323 * Refill exception runs on real mode
+16 -15
tools/testing/selftests/kvm/lib/riscv/processor.c
··· 55 55 { 56 56 TEST_ASSERT(level > -1, 57 57 "Negative page table level (%d) not possible", level); 58 - TEST_ASSERT(level < vm->pgtable_levels, 58 + TEST_ASSERT(level < vm->mmu.pgtable_levels, 59 59 "Invalid page table level (%d)", level); 60 60 61 61 return (gva & pte_index_mask[level]) >> pte_index_shift[level]; ··· 65 65 { 66 66 size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; 67 67 68 - if (vm->pgd_created) 68 + if (vm->mmu.pgd_created) 69 69 return; 70 70 71 - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, 72 - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 73 - vm->memslots[MEM_REGION_PT]); 74 - vm->pgd_created = true; 71 + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, 72 + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 73 + vm->memslots[MEM_REGION_PT]); 74 + vm->mmu.pgd_created = true; 75 75 } 76 76 77 77 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 78 78 { 79 79 uint64_t *ptep, next_ppn; 80 - int level = vm->pgtable_levels - 1; 80 + int level = vm->mmu.pgtable_levels - 1; 81 81 82 82 TEST_ASSERT((vaddr % vm->page_size) == 0, 83 83 "Virtual address not on page boundary,\n" ··· 93 93 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 94 94 paddr, vm->max_gfn, vm->page_size); 95 95 96 - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; 96 + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8; 97 97 if (!*ptep) { 98 98 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; 99 99 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | ··· 121 121 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 122 122 { 123 123 uint64_t *ptep; 124 - int level = vm->pgtable_levels - 1; 124 + int level = vm->mmu.pgtable_levels - 1; 125 125 126 - if (!vm->pgd_created) 126 + if (!vm->mmu.pgd_created) 127 127 goto unmapped_gva; 128 128 129 - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; 129 + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; 130 130 if (!ptep) 131 131 goto unmapped_gva; 132 132 level--; ··· 171 171 172 172 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 173 173 { 174 - int level = vm->pgtable_levels - 1; 174 + struct kvm_mmu *mmu = &vm->mmu; 175 + int level = mmu->pgtable_levels - 1; 175 176 uint64_t pgd, *ptep; 176 177 177 - if (!vm->pgd_created) 178 + if (!mmu->pgd_created) 178 179 return; 179 180 180 - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { 181 + for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { 181 182 ptep = addr_gpa2hva(vm, pgd); 182 183 if (!*ptep) 183 184 continue; ··· 207 206 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 208 207 } 209 208 210 - satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; 209 + satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; 211 210 satp |= SATP_MODE_48; 212 211 213 212 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
+8 -8
tools/testing/selftests/kvm/lib/s390/processor.c
··· 17 17 TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", 18 18 vm->page_size); 19 19 20 - if (vm->pgd_created) 20 + if (vm->mmu.pgd_created) 21 21 return; 22 22 23 23 paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, ··· 25 25 vm->memslots[MEM_REGION_PT]); 26 26 memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); 27 27 28 - vm->pgd = paddr; 29 - vm->pgd_created = true; 28 + vm->mmu.pgd = paddr; 29 + vm->mmu.pgd_created = true; 30 30 } 31 31 32 32 /* ··· 70 70 gva, vm->max_gfn, vm->page_size); 71 71 72 72 /* Walk through region and segment tables */ 73 - entry = addr_gpa2hva(vm, vm->pgd); 73 + entry = addr_gpa2hva(vm, vm->mmu.pgd); 74 74 for (ri = 1; ri <= 4; ri++) { 75 75 idx = (gva >> (64 - 11 * ri)) & 0x7ffu; 76 76 if (entry[idx] & REGION_ENTRY_INVALID) ··· 94 94 TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", 95 95 vm->page_size); 96 96 97 - entry = addr_gpa2hva(vm, vm->pgd); 97 + entry = addr_gpa2hva(vm, vm->mmu.pgd); 98 98 for (ri = 1; ri <= 4; ri++) { 99 99 idx = (gva >> (64 - 11 * ri)) & 0x7ffu; 100 100 TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), ··· 149 149 150 150 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 151 151 { 152 - if (!vm->pgd_created) 152 + if (!vm->mmu.pgd_created) 153 153 return; 154 154 155 - virt_dump_region(stream, vm, indent, vm->pgd); 155 + virt_dump_region(stream, vm, indent, vm->mmu.pgd); 156 156 } 157 157 158 158 void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) ··· 184 184 185 185 vcpu_sregs_get(vcpu, &sregs); 186 186 sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ 187 - sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ 187 + sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */ 188 188 vcpu_sregs_set(vcpu, &sregs); 189 189 190 190 vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
+41 -24
tools/testing/selftests/kvm/lib/x86/memstress.c
··· 13 13 #include "kvm_util.h" 14 14 #include "memstress.h" 15 15 #include "processor.h" 16 + #include "svm_util.h" 16 17 #include "vmx.h" 17 18 18 19 void memstress_l2_guest_code(uint64_t vcpu_id) ··· 30 29 " ud2;" 31 30 ); 32 31 33 - static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) 34 - { 35 32 #define L2_GUEST_STACK_SIZE 64 33 + 34 + static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id) 35 + { 36 36 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 37 37 unsigned long *rsp; 38 38 ··· 47 45 prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); 48 46 49 47 GUEST_ASSERT(!vmlaunch()); 50 - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 48 + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); 51 49 GUEST_DONE(); 50 + } 51 + 52 + static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id) 53 + { 54 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 55 + unsigned long *rsp; 56 + 57 + 58 + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; 59 + *rsp = vcpu_id; 60 + generic_svm_setup(svm, memstress_l2_guest_entry, rsp); 61 + 62 + run_guest(svm->vmcb, svm->vmcb_gpa); 63 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); 64 + GUEST_DONE(); 65 + } 66 + 67 + 68 + static void memstress_l1_guest_code(void *data, uint64_t vcpu_id) 69 + { 70 + if (this_cpu_has(X86_FEATURE_VMX)) 71 + l1_vmx_code(data, vcpu_id); 72 + else 73 + l1_svm_code(data, vcpu_id); 52 74 } 53 75 54 76 uint64_t memstress_nested_pages(int nr_vcpus) ··· 85 59 return 513 + 10 * nr_vcpus; 86 60 } 87 61 88 - void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) 62 + static void memstress_setup_ept_mappings(struct kvm_vm *vm) 89 63 { 90 64 uint64_t start, end; 91 - 92 - prepare_eptp(vmx, vm); 93 65 94 66 /* 95 67 * Identity map the first 4G and the test region with 1G pages so that 96 68 * KVM can shadow the EPT12 with the maximum huge page size supported 97 69 * by the backing source. 98 70 */ 99 - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); 71 + tdp_identity_map_1g(vm, 0, 0x100000000ULL); 100 72 101 73 start = align_down(memstress_args.gpa, PG_SIZE_1G); 102 74 end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); 103 - nested_identity_map_1g(vmx, vm, start, end - start); 75 + tdp_identity_map_1g(vm, start, end - start); 104 76 } 105 77 106 78 void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) 107 79 { 108 - struct vmx_pages *vmx, *vmx0 = NULL; 109 80 struct kvm_regs regs; 110 - vm_vaddr_t vmx_gva; 81 + vm_vaddr_t nested_gva; 111 82 int vcpu_id; 112 83 113 - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 114 - TEST_REQUIRE(kvm_cpu_has_ept()); 84 + TEST_REQUIRE(kvm_cpu_has_tdp()); 115 85 86 + vm_enable_tdp(vm); 87 + memstress_setup_ept_mappings(vm); 116 88 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 117 - vmx = vcpu_alloc_vmx(vm, &vmx_gva); 118 - 119 - if (vcpu_id == 0) { 120 - memstress_setup_ept(vmx, vm); 121 - vmx0 = vmx; 122 - } else { 123 - /* Share the same EPT table across all vCPUs. */ 124 - vmx->eptp = vmx0->eptp; 125 - vmx->eptp_hva = vmx0->eptp_hva; 126 - vmx->eptp_gpa = vmx0->eptp_gpa; 127 - } 89 + if (kvm_cpu_has(X86_FEATURE_VMX)) 90 + vcpu_alloc_vmx(vm, &nested_gva); 91 + else 92 + vcpu_alloc_svm(vm, &nested_gva); 128 93 129 94 /* 130 95 * Override the vCPU to run memstress_l1_guest_code() which will ··· 124 107 vcpu_regs_get(vcpus[vcpu_id], &regs); 125 108 regs.rip = (unsigned long) memstress_l1_guest_code; 126 109 vcpu_regs_set(vcpus[vcpu_id], &regs); 127 - vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); 110 + vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id); 128 111 } 129 112 }
+177 -56
tools/testing/selftests/kvm/lib/x86/processor.c
··· 8 8 #include "kvm_util.h" 9 9 #include "pmu.h" 10 10 #include "processor.h" 11 + #include "svm_util.h" 11 12 #include "sev.h" 13 + #include "vmx.h" 12 14 13 15 #ifndef NUM_INTERRUPTS 14 16 #define NUM_INTERRUPTS 256 ··· 158 156 return get_kvm_amd_param_bool("npt"); 159 157 } 160 158 159 + static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu, 160 + struct pte_masks *pte_masks) 161 + { 162 + /* If needed, create the top-level page table. */ 163 + if (!mmu->pgd_created) { 164 + mmu->pgd = vm_alloc_page_table(vm); 165 + mmu->pgd_created = true; 166 + mmu->arch.pte_masks = *pte_masks; 167 + } 168 + 169 + TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5, 170 + "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging", 171 + mmu->pgtable_levels); 172 + } 173 + 161 174 void virt_arch_pgd_alloc(struct kvm_vm *vm) 162 175 { 163 176 TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, 164 177 "Unknown or unsupported guest mode: 0x%x", vm->mode); 165 178 166 - /* If needed, create the top-level page table. */ 167 - if (!vm->pgd_created) { 168 - vm->pgd = vm_alloc_page_table(vm); 169 - vm->pgd_created = true; 170 - } 179 + struct pte_masks pte_masks = (struct pte_masks){ 180 + .present = BIT_ULL(0), 181 + .writable = BIT_ULL(1), 182 + .user = BIT_ULL(2), 183 + .accessed = BIT_ULL(5), 184 + .dirty = BIT_ULL(6), 185 + .huge = BIT_ULL(7), 186 + .nx = BIT_ULL(63), 187 + .executable = 0, 188 + .c = vm->arch.c_bit, 189 + .s = vm->arch.s_bit, 190 + }; 191 + 192 + virt_mmu_init(vm, &vm->mmu, &pte_masks); 171 193 } 172 194 173 - static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, 174 - uint64_t vaddr, int level) 195 + void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, 196 + struct pte_masks *pte_masks) 197 + { 198 + TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized"); 199 + 200 + vm->stage2_mmu.pgtable_levels = pgtable_levels; 201 + virt_mmu_init(vm, &vm->stage2_mmu, pte_masks); 202 + } 203 + 204 + static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, 205 + uint64_t *parent_pte, uint64_t vaddr, int level) 175 206 { 176 207 uint64_t pt_gpa = PTE_GET_PA(*parent_pte); 177 208 uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); 178 209 int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 179 210 180 - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, 211 + TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), 181 212 "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", 182 213 level + 1, vaddr); 183 214 ··· 218 183 } 219 184 220 185 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, 186 + struct kvm_mmu *mmu, 221 187 uint64_t *parent_pte, 222 188 uint64_t vaddr, 223 189 uint64_t paddr, 224 190 int current_level, 225 191 int target_level) 226 192 { 227 - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); 193 + uint64_t *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level); 228 194 229 195 paddr = vm_untag_gpa(vm, paddr); 230 196 231 - if (!(*pte & PTE_PRESENT_MASK)) { 232 - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; 197 + if (!is_present_pte(mmu, pte)) { 198 + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | 199 + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | 200 + PTE_ALWAYS_SET_MASK(mmu); 233 201 if (current_level == target_level) 234 - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); 202 + *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); 235 203 else 236 204 *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; 237 205 } else { ··· 246 208 TEST_ASSERT(current_level != target_level, 247 209 "Cannot create hugepage at level: %u, vaddr: 0x%lx", 248 210 current_level, vaddr); 249 - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), 211 + TEST_ASSERT(!is_huge_pte(mmu, pte), 250 212 "Cannot create page table at level: %u, vaddr: 0x%lx", 251 213 current_level, vaddr); 252 214 } 253 215 return pte; 254 216 } 255 217 256 - void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) 218 + void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, 219 + uint64_t paddr, int level) 257 220 { 258 221 const uint64_t pg_size = PG_LEVEL_SIZE(level); 259 - uint64_t *pte = &vm->pgd; 222 + uint64_t *pte = &mmu->pgd; 260 223 int current_level; 261 224 262 225 TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, ··· 278 239 TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, 279 240 "Unexpected bits in paddr: %lx", paddr); 280 241 242 + TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), 243 + "X and NX bit masks cannot be used simultaneously"); 244 + 281 245 /* 282 246 * Allocate upper level page tables, if not already present. Return 283 247 * early if a hugepage was created. 284 248 */ 285 - for (current_level = vm->pgtable_levels; 249 + for (current_level = mmu->pgtable_levels; 286 250 current_level > PG_LEVEL_4K; 287 251 current_level--) { 288 - pte = virt_create_upper_pte(vm, pte, vaddr, paddr, 252 + pte = virt_create_upper_pte(vm, mmu, pte, vaddr, paddr, 289 253 current_level, level); 290 - if (*pte & PTE_LARGE_MASK) 254 + if (is_huge_pte(mmu, pte)) 291 255 return; 292 256 } 293 257 294 258 /* Fill in page table entry. */ 295 - pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); 296 - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), 259 + pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); 260 + TEST_ASSERT(!is_present_pte(mmu, pte), 297 261 "PTE already present for 4k page at vaddr: 0x%lx", vaddr); 298 - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); 262 + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | 263 + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | 264 + PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); 299 265 300 266 /* 301 267 * Neither SEV nor TDX supports shared page tables, so only the final 302 268 * leaf PTE needs manually set the C/S-bit. 303 269 */ 304 270 if (vm_is_gpa_protected(vm, paddr)) 305 - *pte |= vm->arch.c_bit; 271 + *pte |= PTE_C_BIT_MASK(mmu); 306 272 else 307 - *pte |= vm->arch.s_bit; 273 + *pte |= PTE_S_BIT_MASK(mmu); 308 274 } 309 275 310 276 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 311 277 { 312 - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); 278 + __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K); 313 279 } 314 280 315 281 void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ··· 329 285 nr_bytes, pg_size); 330 286 331 287 for (i = 0; i < nr_pages; i++) { 332 - __virt_pg_map(vm, vaddr, paddr, level); 288 + __virt_pg_map(vm, &vm->mmu, vaddr, paddr, level); 333 289 sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, 334 290 nr_bytes / PAGE_SIZE); 335 291 ··· 338 294 } 339 295 } 340 296 341 - static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) 297 + static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte, 298 + int *level, int current_level) 342 299 { 343 - if (*pte & PTE_LARGE_MASK) { 300 + if (is_huge_pte(mmu, pte)) { 344 301 TEST_ASSERT(*level == PG_LEVEL_NONE || 345 302 *level == current_level, 346 303 "Unexpected hugepage at level %d", current_level); ··· 351 306 return *level == current_level; 352 307 } 353 308 354 - uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, 355 - int *level) 309 + static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, 310 + struct kvm_mmu *mmu, 311 + uint64_t vaddr, 312 + int *level) 356 313 { 357 - int va_width = 12 + (vm->pgtable_levels) * 9; 358 - uint64_t *pte = &vm->pgd; 314 + int va_width = 12 + (mmu->pgtable_levels) * 9; 315 + uint64_t *pte = &mmu->pgd; 359 316 int current_level; 360 317 361 318 TEST_ASSERT(!vm->arch.is_pt_protected, 362 319 "Walking page tables of protected guests is impossible"); 363 320 364 - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels, 321 + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels, 365 322 "Invalid PG_LEVEL_* '%d'", *level); 366 323 367 324 TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, ··· 379 332 (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), 380 333 "Canonical check failed. The virtual address is invalid."); 381 334 382 - for (current_level = vm->pgtable_levels; 335 + for (current_level = mmu->pgtable_levels; 383 336 current_level > PG_LEVEL_4K; 384 337 current_level--) { 385 - pte = virt_get_pte(vm, pte, vaddr, current_level); 386 - if (vm_is_target_pte(pte, level, current_level)) 338 + pte = virt_get_pte(vm, mmu, pte, vaddr, current_level); 339 + if (vm_is_target_pte(mmu, pte, level, current_level)) 387 340 return pte; 388 341 } 389 342 390 - return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); 343 + return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); 391 344 } 392 345 393 - uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) 346 + uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa) 394 347 { 395 348 int level = PG_LEVEL_4K; 396 349 397 - return __vm_get_page_table_entry(vm, vaddr, &level); 350 + return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); 351 + } 352 + 353 + uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr) 354 + { 355 + int level = PG_LEVEL_4K; 356 + 357 + return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level); 398 358 } 399 359 400 360 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 401 361 { 362 + struct kvm_mmu *mmu = &vm->mmu; 402 363 uint64_t *pml4e, *pml4e_start; 403 364 uint64_t *pdpe, *pdpe_start; 404 365 uint64_t *pde, *pde_start; 405 366 uint64_t *pte, *pte_start; 406 367 407 - if (!vm->pgd_created) 368 + if (!mmu->pgd_created) 408 369 return; 409 370 410 371 fprintf(stream, "%*s " ··· 420 365 fprintf(stream, "%*s index hvaddr gpaddr " 421 366 "addr w exec dirty\n", 422 367 indent, ""); 423 - pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); 368 + pml4e_start = (uint64_t *) addr_gpa2hva(vm, mmu->pgd); 424 369 for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { 425 370 pml4e = &pml4e_start[n1]; 426 - if (!(*pml4e & PTE_PRESENT_MASK)) 371 + if (!is_present_pte(mmu, pml4e)) 427 372 continue; 428 373 fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " 429 374 " %u\n", 430 375 indent, "", 431 376 pml4e - pml4e_start, pml4e, 432 377 addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), 433 - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); 378 + is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e)); 434 379 435 380 pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); 436 381 for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { 437 382 pdpe = &pdpe_start[n2]; 438 - if (!(*pdpe & PTE_PRESENT_MASK)) 383 + if (!is_present_pte(mmu, pdpe)) 439 384 continue; 440 385 fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " 441 386 "%u %u\n", 442 387 indent, "", 443 388 pdpe - pdpe_start, pdpe, 444 389 addr_hva2gpa(vm, pdpe), 445 - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), 446 - !!(*pdpe & PTE_NX_MASK)); 390 + PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe), 391 + is_nx_pte(mmu, pdpe)); 447 392 448 393 pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); 449 394 for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { 450 395 pde = &pde_start[n3]; 451 - if (!(*pde & PTE_PRESENT_MASK)) 396 + if (!is_present_pte(mmu, pde)) 452 397 continue; 453 398 fprintf(stream, "%*spde 0x%-3zx %p " 454 399 "0x%-12lx 0x%-10llx %u %u\n", 455 400 indent, "", pde - pde_start, pde, 456 401 addr_hva2gpa(vm, pde), 457 - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), 458 - !!(*pde & PTE_NX_MASK)); 402 + PTE_GET_PFN(*pde), is_writable_pte(mmu, pde), 403 + is_nx_pte(mmu, pde)); 459 404 460 405 pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); 461 406 for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { 462 407 pte = &pte_start[n4]; 463 - if (!(*pte & PTE_PRESENT_MASK)) 408 + if (!is_present_pte(mmu, pte)) 464 409 continue; 465 410 fprintf(stream, "%*spte 0x%-3zx %p " 466 411 "0x%-12lx 0x%-10llx %u %u " ··· 469 414 pte - pte_start, pte, 470 415 addr_hva2gpa(vm, pte), 471 416 PTE_GET_PFN(*pte), 472 - !!(*pte & PTE_WRITABLE_MASK), 473 - !!(*pte & PTE_NX_MASK), 474 - !!(*pte & PTE_DIRTY_MASK), 417 + is_writable_pte(mmu, pte), 418 + is_nx_pte(mmu, pte), 419 + is_dirty_pte(mmu, pte), 475 420 ((uint64_t) n1 << 27) 476 421 | ((uint64_t) n2 << 18) 477 422 | ((uint64_t) n3 << 9) ··· 480 425 } 481 426 } 482 427 } 428 + } 429 + 430 + void vm_enable_tdp(struct kvm_vm *vm) 431 + { 432 + if (kvm_cpu_has(X86_FEATURE_VMX)) 433 + vm_enable_ept(vm); 434 + else 435 + vm_enable_npt(vm); 436 + } 437 + 438 + bool kvm_cpu_has_tdp(void) 439 + { 440 + return kvm_cpu_has_ept() || kvm_cpu_has_npt(); 441 + } 442 + 443 + void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, 444 + uint64_t size, int level) 445 + { 446 + size_t page_size = PG_LEVEL_SIZE(level); 447 + size_t npages = size / page_size; 448 + 449 + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); 450 + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 451 + 452 + while (npages--) { 453 + __virt_pg_map(vm, &vm->stage2_mmu, nested_paddr, paddr, level); 454 + nested_paddr += page_size; 455 + paddr += page_size; 456 + } 457 + } 458 + 459 + void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, 460 + uint64_t size) 461 + { 462 + __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K); 463 + } 464 + 465 + /* Prepare an identity extended page table that maps all the 466 + * physical pages in VM. 467 + */ 468 + void tdp_identity_map_default_memslots(struct kvm_vm *vm) 469 + { 470 + uint32_t s, memslot = 0; 471 + sparsebit_idx_t i, last; 472 + struct userspace_mem_region *region = memslot2region(vm, memslot); 473 + 474 + /* Only memslot 0 is mapped here, ensure it's the only one being used */ 475 + for (s = 0; s < NR_MEM_REGIONS; s++) 476 + TEST_ASSERT_EQ(vm->memslots[s], 0); 477 + 478 + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; 479 + last = i + (region->region.memory_size >> vm->page_shift); 480 + for (;;) { 481 + i = sparsebit_next_clear(region->unused_phy_pages, i); 482 + if (i > last) 483 + break; 484 + 485 + tdp_map(vm, (uint64_t)i << vm->page_shift, 486 + (uint64_t)i << vm->page_shift, 1 << vm->page_shift); 487 + } 488 + } 489 + 490 + /* Identity map a region with 1GiB Pages. */ 491 + void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size) 492 + { 493 + __tdp_map(vm, addr, addr, size, PG_LEVEL_1G); 483 494 } 484 495 485 496 /* ··· 618 497 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 619 498 { 620 499 int level = PG_LEVEL_NONE; 621 - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); 500 + uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); 622 501 623 - TEST_ASSERT(*pte & PTE_PRESENT_MASK, 502 + TEST_ASSERT(is_present_pte(&vm->mmu, pte), 624 503 "Leaf PTE not PRESENT for gva: 0x%08lx", gva); 625 504 626 505 /* ··· 659 538 sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; 660 539 if (kvm_cpu_has(X86_FEATURE_XSAVE)) 661 540 sregs.cr4 |= X86_CR4_OSXSAVE; 662 - if (vm->pgtable_levels == 5) 541 + if (vm->mmu.pgtable_levels == 5) 663 542 sregs.cr4 |= X86_CR4_LA57; 664 543 sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); 665 544 ··· 670 549 kvm_seg_set_kernel_data_64bit(&sregs.gs); 671 550 kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); 672 551 673 - sregs.cr3 = vm->pgd; 552 + sregs.cr3 = vm->mmu.pgd; 674 553 vcpu_sregs_set(vcpu, &sregs); 675 554 } 676 555
+27
tools/testing/selftests/kvm/lib/x86/svm.c
··· 46 46 svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); 47 47 memset(svm->msr_hva, 0, getpagesize()); 48 48 49 + if (vm->stage2_mmu.pgd_created) 50 + svm->ncr3_gpa = vm->stage2_mmu.pgd; 51 + 49 52 *p_svm_gva = svm_gva; 50 53 return svm; 51 54 } ··· 60 57 seg->attrib = attr; 61 58 seg->limit = limit; 62 59 seg->base = base; 60 + } 61 + 62 + void vm_enable_npt(struct kvm_vm *vm) 63 + { 64 + struct pte_masks pte_masks; 65 + 66 + TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT"); 67 + 68 + /* 69 + * NPTs use the same PTE format, but deliberately drop the C-bit as the 70 + * per-VM shared vs. private information is only meant for stage-1. 71 + */ 72 + pte_masks = vm->mmu.arch.pte_masks; 73 + pte_masks.c = 0; 74 + 75 + /* NPT walks are treated as user accesses, so set the 'user' bit. */ 76 + pte_masks.always_set = pte_masks.user; 77 + 78 + tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks); 63 79 } 64 80 65 81 void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) ··· 124 102 vmcb->save.rip = (u64)guest_rip; 125 103 vmcb->save.rsp = (u64)guest_rsp; 126 104 guest_regs.rdi = (u64)svm; 105 + 106 + if (svm->ncr3_gpa) { 107 + ctrl->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; 108 + ctrl->nested_cr3 = svm->ncr3_gpa; 109 + } 127 110 } 128 111 129 112 /*
+48 -203
tools/testing/selftests/kvm/lib/x86/vmx.c
··· 10 10 #include "processor.h" 11 11 #include "vmx.h" 12 12 13 - #define PAGE_SHIFT_4K 12 14 - 15 13 #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 14 + 15 + #define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */ 16 + #define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */ 17 + #define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */ 18 + 19 + #define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT) 20 + #define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */ 21 + #define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT) 16 22 17 23 bool enable_evmcs; 18 24 19 25 struct hv_enlightened_vmcs *current_evmcs; 20 26 struct hv_vp_assist_page *current_vp_assist; 21 27 22 - struct eptPageTableEntry { 23 - uint64_t readable:1; 24 - uint64_t writable:1; 25 - uint64_t executable:1; 26 - uint64_t memory_type:3; 27 - uint64_t ignore_pat:1; 28 - uint64_t page_size:1; 29 - uint64_t accessed:1; 30 - uint64_t dirty:1; 31 - uint64_t ignored_11_10:2; 32 - uint64_t address:40; 33 - uint64_t ignored_62_52:11; 34 - uint64_t suppress_ve:1; 35 - }; 36 - 37 - struct eptPageTablePointer { 38 - uint64_t memory_type:3; 39 - uint64_t page_walk_length:3; 40 - uint64_t ad_enabled:1; 41 - uint64_t reserved_11_07:5; 42 - uint64_t address:40; 43 - uint64_t reserved_63_52:12; 44 - }; 45 28 int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) 46 29 { 47 30 uint16_t evmcs_ver; ··· 39 56 evmcs_ver & 0xff, evmcs_ver >> 8); 40 57 41 58 return evmcs_ver; 59 + } 60 + 61 + void vm_enable_ept(struct kvm_vm *vm) 62 + { 63 + struct pte_masks pte_masks; 64 + 65 + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); 66 + 67 + /* 68 + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the 69 + * 'readable' bit. 70 + */ 71 + pte_masks = (struct pte_masks) { 72 + .present = 0, 73 + .user = 0, 74 + .readable = BIT_ULL(0), 75 + .writable = BIT_ULL(1), 76 + .executable = BIT_ULL(2), 77 + .huge = BIT_ULL(7), 78 + .accessed = BIT_ULL(8), 79 + .dirty = BIT_ULL(9), 80 + .nx = 0, 81 + }; 82 + 83 + /* TODO: Add support for 5-level EPT. */ 84 + tdp_mmu_init(vm, 4, &pte_masks); 42 85 } 43 86 44 87 /* Allocate memory regions for nested VMX tests. ··· 115 106 vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); 116 107 vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); 117 108 memset(vmx->vmwrite_hva, 0, getpagesize()); 109 + 110 + if (vm->stage2_mmu.pgd_created) 111 + vmx->eptp_gpa = vm->stage2_mmu.pgd; 118 112 119 113 *p_vmx_gva = vmx_gva; 120 114 return vmx; ··· 208 196 vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); 209 197 210 198 if (vmx->eptp_gpa) { 211 - uint64_t ept_paddr; 212 - struct eptPageTablePointer eptp = { 213 - .memory_type = X86_MEMTYPE_WB, 214 - .page_walk_length = 3, /* + 1 */ 215 - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), 216 - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, 217 - }; 199 + uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; 218 200 219 - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); 220 - vmwrite(EPT_POINTER, ept_paddr); 201 + TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, 202 + "Illegal bits set in vmx->eptp_gpa"); 203 + 204 + if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS)) 205 + eptp |= EPTP_AD_ENABLED; 206 + 207 + vmwrite(EPT_POINTER, eptp); 221 208 sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; 222 209 } 223 210 ··· 373 362 init_vmcs_guest_state(guest_rip, guest_rsp); 374 363 } 375 364 376 - static void nested_create_pte(struct kvm_vm *vm, 377 - struct eptPageTableEntry *pte, 378 - uint64_t nested_paddr, 379 - uint64_t paddr, 380 - int current_level, 381 - int target_level) 382 - { 383 - if (!pte->readable) { 384 - pte->writable = true; 385 - pte->readable = true; 386 - pte->executable = true; 387 - pte->page_size = (current_level == target_level); 388 - if (pte->page_size) 389 - pte->address = paddr >> vm->page_shift; 390 - else 391 - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; 392 - } else { 393 - /* 394 - * Entry already present. Assert that the caller doesn't want 395 - * a hugepage at this level, and that there isn't a hugepage at 396 - * this level. 397 - */ 398 - TEST_ASSERT(current_level != target_level, 399 - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", 400 - current_level, nested_paddr); 401 - TEST_ASSERT(!pte->page_size, 402 - "Cannot create page table at level: %u, nested_paddr: 0x%lx", 403 - current_level, nested_paddr); 404 - } 405 - } 406 - 407 - 408 - void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 409 - uint64_t nested_paddr, uint64_t paddr, int target_level) 410 - { 411 - const uint64_t page_size = PG_LEVEL_SIZE(target_level); 412 - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; 413 - uint16_t index; 414 - 415 - TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, 416 - "Unknown or unsupported guest mode: 0x%x", vm->mode); 417 - 418 - TEST_ASSERT((nested_paddr >> 48) == 0, 419 - "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", 420 - nested_paddr); 421 - TEST_ASSERT((nested_paddr % page_size) == 0, 422 - "Nested physical address not on page boundary,\n" 423 - " nested_paddr: 0x%lx page_size: 0x%lx", 424 - nested_paddr, page_size); 425 - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, 426 - "Physical address beyond beyond maximum supported,\n" 427 - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 428 - paddr, vm->max_gfn, vm->page_size); 429 - TEST_ASSERT((paddr % page_size) == 0, 430 - "Physical address not on page boundary,\n" 431 - " paddr: 0x%lx page_size: 0x%lx", 432 - paddr, page_size); 433 - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 434 - "Physical address beyond beyond maximum supported,\n" 435 - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 436 - paddr, vm->max_gfn, vm->page_size); 437 - 438 - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { 439 - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 440 - pte = &pt[index]; 441 - 442 - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); 443 - 444 - if (pte->page_size) 445 - break; 446 - 447 - pt = addr_gpa2hva(vm, pte->address * vm->page_size); 448 - } 449 - 450 - /* 451 - * For now mark these as accessed and dirty because the only 452 - * testcase we have needs that. Can be reconsidered later. 453 - */ 454 - pte->accessed = true; 455 - pte->dirty = true; 456 - 457 - } 458 - 459 - void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 460 - uint64_t nested_paddr, uint64_t paddr) 461 - { 462 - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); 463 - } 464 - 465 - /* 466 - * Map a range of EPT guest physical addresses to the VM's physical address 467 - * 468 - * Input Args: 469 - * vm - Virtual Machine 470 - * nested_paddr - Nested guest physical address to map 471 - * paddr - VM Physical Address 472 - * size - The size of the range to map 473 - * level - The level at which to map the range 474 - * 475 - * Output Args: None 476 - * 477 - * Return: None 478 - * 479 - * Within the VM given by vm, creates a nested guest translation for the 480 - * page range starting at nested_paddr to the page range starting at paddr. 481 - */ 482 - void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 483 - uint64_t nested_paddr, uint64_t paddr, uint64_t size, 484 - int level) 485 - { 486 - size_t page_size = PG_LEVEL_SIZE(level); 487 - size_t npages = size / page_size; 488 - 489 - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); 490 - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 491 - 492 - while (npages--) { 493 - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); 494 - nested_paddr += page_size; 495 - paddr += page_size; 496 - } 497 - } 498 - 499 - void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 500 - uint64_t nested_paddr, uint64_t paddr, uint64_t size) 501 - { 502 - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); 503 - } 504 - 505 - /* Prepare an identity extended page table that maps all the 506 - * physical pages in VM. 507 - */ 508 - void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, 509 - uint32_t memslot) 510 - { 511 - sparsebit_idx_t i, last; 512 - struct userspace_mem_region *region = 513 - memslot2region(vm, memslot); 514 - 515 - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; 516 - last = i + (region->region.memory_size >> vm->page_shift); 517 - for (;;) { 518 - i = sparsebit_next_clear(region->unused_phy_pages, i); 519 - if (i > last) 520 - break; 521 - 522 - nested_map(vmx, vm, 523 - (uint64_t)i << vm->page_shift, 524 - (uint64_t)i << vm->page_shift, 525 - 1 << vm->page_shift); 526 - } 527 - } 528 - 529 - /* Identity map a region with 1GiB Pages. */ 530 - void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, 531 - uint64_t addr, uint64_t size) 532 - { 533 - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); 534 - } 535 - 536 365 bool kvm_cpu_has_ept(void) 537 366 { 538 367 uint64_t ctrl; 368 + 369 + if (!kvm_cpu_has(X86_FEATURE_VMX)) 370 + return false; 539 371 540 372 ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; 541 373 if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) ··· 386 532 387 533 ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; 388 534 return ctrl & SECONDARY_EXEC_ENABLE_EPT; 389 - } 390 - 391 - void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) 392 - { 393 - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); 394 - 395 - vmx->eptp = (void *)vm_vaddr_alloc_page(vm); 396 - vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); 397 - vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); 398 535 } 399 536 400 537 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+1 -1
tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
··· 619 619 */ 620 620 gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); 621 621 for (i = 0; i < NTEST_PAGES; i++) { 622 - pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); 622 + pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE); 623 623 gpa = addr_hva2gpa(vm, pte); 624 624 virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); 625 625 data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+293
tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KVM dirty page logging test 4 + * 5 + * Copyright (C) 2018, Red Hat, Inc. 6 + */ 7 + #include <stdio.h> 8 + #include <stdlib.h> 9 + #include <linux/bitmap.h> 10 + #include <linux/bitops.h> 11 + 12 + #include "test_util.h" 13 + #include "kvm_util.h" 14 + #include "processor.h" 15 + #include "svm_util.h" 16 + #include "vmx.h" 17 + 18 + /* The memory slot index to track dirty pages */ 19 + #define TEST_MEM_SLOT_INDEX 1 20 + 21 + /* 22 + * Allocate four pages total. Two pages are used to verify that the KVM marks 23 + * the accessed page/GFN as marked dirty, but not the "other" page. Times two 24 + * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA 25 + * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to 26 + * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page 27 + * tables, as marking L2's GPA dirty would get a false pass if L1 == L2). 28 + */ 29 + #define TEST_MEM_PAGES 4 30 + 31 + #define TEST_MEM_BASE 0xc0000000 32 + #define TEST_MEM_ALIAS_BASE 0xc0002000 33 + 34 + #define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE) 35 + 36 + #define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) 37 + #define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) 38 + 39 + #define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx) 40 + 41 + #define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx)) 42 + 43 + #define L2_GUEST_STACK_SIZE 64 44 + 45 + /* Use the page offset bits to communicate the access+fault type. */ 46 + #define TEST_SYNC_READ_FAULT BIT(0) 47 + #define TEST_SYNC_WRITE_FAULT BIT(1) 48 + #define TEST_SYNC_NO_FAULT BIT(2) 49 + 50 + static void l2_guest_code(vm_vaddr_t base) 51 + { 52 + vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0); 53 + vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1); 54 + 55 + READ_ONCE(*(u64 *)page0); 56 + GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT); 57 + WRITE_ONCE(*(u64 *)page0, 1); 58 + GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT); 59 + READ_ONCE(*(u64 *)page0); 60 + GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT); 61 + 62 + WRITE_ONCE(*(u64 *)page1, 1); 63 + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); 64 + WRITE_ONCE(*(u64 *)page1, 1); 65 + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); 66 + READ_ONCE(*(u64 *)page1); 67 + GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT); 68 + 69 + /* Exit to L1 and never come back. */ 70 + vmcall(); 71 + } 72 + 73 + static void l2_guest_code_tdp_enabled(void) 74 + { 75 + /* 76 + * Use the aliased virtual addresses when running with TDP to verify 77 + * that KVM correctly handles the case where a page is dirtied via a 78 + * different GPA than would be used by L1. 79 + */ 80 + l2_guest_code(TEST_MEM_ALIAS_BASE); 81 + } 82 + 83 + static void l2_guest_code_tdp_disabled(void) 84 + { 85 + /* 86 + * Use the "normal" virtual addresses when running without TDP enabled, 87 + * in which case L2 will use the same page tables as L1, and thus needs 88 + * to use the same virtual addresses that are mapped into L1. 89 + */ 90 + l2_guest_code(TEST_MEM_BASE); 91 + } 92 + 93 + void l1_vmx_code(struct vmx_pages *vmx) 94 + { 95 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 96 + void *l2_rip; 97 + 98 + GUEST_ASSERT(vmx->vmcs_gpa); 99 + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); 100 + GUEST_ASSERT(load_vmcs(vmx)); 101 + 102 + if (vmx->eptp_gpa) 103 + l2_rip = l2_guest_code_tdp_enabled; 104 + else 105 + l2_rip = l2_guest_code_tdp_disabled; 106 + 107 + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); 108 + 109 + GUEST_SYNC(TEST_SYNC_NO_FAULT); 110 + GUEST_ASSERT(!vmlaunch()); 111 + GUEST_SYNC(TEST_SYNC_NO_FAULT); 112 + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); 113 + GUEST_DONE(); 114 + } 115 + 116 + static void l1_svm_code(struct svm_test_data *svm) 117 + { 118 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 119 + void *l2_rip; 120 + 121 + if (svm->ncr3_gpa) 122 + l2_rip = l2_guest_code_tdp_enabled; 123 + else 124 + l2_rip = l2_guest_code_tdp_disabled; 125 + 126 + generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); 127 + 128 + GUEST_SYNC(TEST_SYNC_NO_FAULT); 129 + run_guest(svm->vmcb, svm->vmcb_gpa); 130 + GUEST_SYNC(TEST_SYNC_NO_FAULT); 131 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); 132 + GUEST_DONE(); 133 + } 134 + 135 + static void l1_guest_code(void *data) 136 + { 137 + if (this_cpu_has(X86_FEATURE_VMX)) 138 + l1_vmx_code(data); 139 + else 140 + l1_svm_code(data); 141 + } 142 + 143 + static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg, 144 + unsigned long *bmap) 145 + { 146 + vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1); 147 + int page_nr, i; 148 + 149 + /* 150 + * Extract the page number of underlying physical page, which is also 151 + * the _L1_ page number. The dirty bitmap _must_ be updated based on 152 + * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA 153 + * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty 154 + * bitmap and which underlying physical page is accessed. 155 + * 156 + * Note, gva will be '0' if there was no access, i.e. if the purpose of 157 + * the sync is to verify all pages are clean. 158 + */ 159 + if (!gva) 160 + page_nr = 0; 161 + else if (gva >= TEST_MEM_ALIAS_BASE) 162 + page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT; 163 + else 164 + page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT; 165 + TEST_ASSERT(page_nr == 0 || page_nr == 1, 166 + "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg); 167 + TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT), 168 + "Test bug, gva must be valid if a fault is expected"); 169 + 170 + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); 171 + 172 + /* 173 + * Check all pages to verify the correct physical page was modified (or 174 + * not), and that all pages are clean/dirty as expected. 175 + * 176 + * If a fault of any kind is expected, the target page should be dirty 177 + * as the Dirty bit is set in the gPTE. KVM should create a writable 178 + * SPTE even on a read fault, *and* KVM must mark the GFN as dirty 179 + * when doing so. 180 + */ 181 + for (i = 0; i < TEST_MEM_PAGES; i++) { 182 + if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT)) 183 + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1, 184 + "Page %u incorrectly not written by guest", i); 185 + else 186 + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL, 187 + "Page %u incorrectly written by guest", i); 188 + 189 + if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT)) 190 + TEST_ASSERT(test_bit(i, bmap), 191 + "Page %u incorrectly reported clean on %s fault", 192 + i, arg & TEST_SYNC_READ_FAULT ? "read" : "write"); 193 + else 194 + TEST_ASSERT(!test_bit(i, bmap), 195 + "Page %u incorrectly reported dirty", i); 196 + } 197 + } 198 + 199 + static void test_dirty_log(bool nested_tdp) 200 + { 201 + vm_vaddr_t nested_gva = 0; 202 + unsigned long *bmap; 203 + struct kvm_vcpu *vcpu; 204 + struct kvm_vm *vm; 205 + struct ucall uc; 206 + bool done = false; 207 + 208 + pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled"); 209 + 210 + /* Create VM */ 211 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 212 + if (nested_tdp) 213 + vm_enable_tdp(vm); 214 + 215 + if (kvm_cpu_has(X86_FEATURE_VMX)) 216 + vcpu_alloc_vmx(vm, &nested_gva); 217 + else 218 + vcpu_alloc_svm(vm, &nested_gva); 219 + 220 + vcpu_args_set(vcpu, 1, nested_gva); 221 + 222 + /* Add an extra memory slot for testing dirty logging */ 223 + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 224 + TEST_MEM_BASE, 225 + TEST_MEM_SLOT_INDEX, 226 + TEST_MEM_PAGES, 227 + KVM_MEM_LOG_DIRTY_PAGES); 228 + 229 + /* 230 + * Add an identity map for GVA range [0xc0000000, 0xc0004000). This 231 + * affects both L1 and L2. However... 232 + */ 233 + virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES); 234 + 235 + /* 236 + * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will 237 + * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2). 238 + * 239 + * When TDP is disabled, the L2 guest code will still access the same L1 240 + * GPAs as the TDP enabled case. 241 + * 242 + * Set the Dirty bit in the PTEs used by L2 so that KVM will create 243 + * writable SPTEs when handling read faults (if the Dirty bit isn't 244 + * set, KVM must intercept the next write to emulate the Dirty bit 245 + * update). 246 + */ 247 + if (nested_tdp) { 248 + tdp_identity_map_default_memslots(vm); 249 + tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE); 250 + tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE); 251 + 252 + *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); 253 + *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); 254 + } else { 255 + *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu); 256 + *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu); 257 + } 258 + 259 + bmap = bitmap_zalloc(TEST_MEM_PAGES); 260 + 261 + while (!done) { 262 + memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE); 263 + 264 + vcpu_run(vcpu); 265 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 266 + 267 + switch (get_ucall(vcpu, &uc)) { 268 + case UCALL_ABORT: 269 + REPORT_GUEST_ASSERT(uc); 270 + /* NOT REACHED */ 271 + case UCALL_SYNC: 272 + test_handle_ucall_sync(vm, uc.args[1], bmap); 273 + break; 274 + case UCALL_DONE: 275 + done = true; 276 + break; 277 + default: 278 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 279 + } 280 + } 281 + } 282 + 283 + int main(int argc, char *argv[]) 284 + { 285 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); 286 + 287 + test_dirty_log(/*nested_tdp=*/false); 288 + 289 + if (kvm_cpu_has_tdp()) 290 + test_dirty_log(/*nested_tdp=*/true); 291 + 292 + return 0; 293 + }
+197
tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2026, Google LLC. 4 + */ 5 + #include "kvm_util.h" 6 + #include "vmx.h" 7 + #include "svm_util.h" 8 + #include "kselftest.h" 9 + 10 + /* 11 + * Allocate two VMCB pages for testing. Both pages have different GVAs (shared 12 + * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that: 13 + * - L2 GPA == L1 GPA for VMCB0. 14 + * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1. 15 + * 16 + * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is 17 + * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends 18 + * on which VMCB is accessed. 19 + */ 20 + #define TEST_MEM_SLOT_INDEX 1 21 + #define TEST_MEM_PAGES 2 22 + #define TEST_MEM_BASE 0xc0000000 23 + 24 + #define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE) 25 + 26 + #define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx) 27 + #define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx) 28 + 29 + #define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0) 30 + 31 + #define L2_GUEST_STACK_SIZE 64 32 + 33 + static void l2_guest_code_vmsave(void) 34 + { 35 + asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); 36 + } 37 + 38 + static void l2_guest_code_vmload(void) 39 + { 40 + asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); 41 + } 42 + 43 + static void l2_guest_code_vmcb(int vmcb_idx) 44 + { 45 + wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa); 46 + l2_guest_code_vmsave(); 47 + 48 + /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */ 49 + GUEST_SYNC(vmcb_idx); 50 + 51 + l2_guest_code_vmload(); 52 + GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb); 53 + 54 + /* Reset MSR_KERNEL_GS_BASE */ 55 + wrmsr(MSR_KERNEL_GS_BASE, 0); 56 + l2_guest_code_vmsave(); 57 + 58 + vmmcall(); 59 + } 60 + 61 + static void l2_guest_code_vmcb0(void) 62 + { 63 + l2_guest_code_vmcb(0); 64 + } 65 + 66 + static void l2_guest_code_vmcb1(void) 67 + { 68 + l2_guest_code_vmcb(1); 69 + } 70 + 71 + static void l1_guest_code(struct svm_test_data *svm) 72 + { 73 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 74 + 75 + /* Each test case initializes the guest RIP below */ 76 + generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); 77 + 78 + /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */ 79 + svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) | 80 + BIT_ULL(INTERCEPT_VMLOAD)); 81 + 82 + /* ..VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK cleared.. */ 83 + svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; 84 + 85 + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; 86 + run_guest(svm->vmcb, svm->vmcb_gpa); 87 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); 88 + 89 + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; 90 + run_guest(svm->vmcb, svm->vmcb_gpa); 91 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); 92 + 93 + /* ..and VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK set */ 94 + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; 95 + 96 + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; 97 + run_guest(svm->vmcb, svm->vmcb_gpa); 98 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); 99 + 100 + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; 101 + run_guest(svm->vmcb, svm->vmcb_gpa); 102 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); 103 + 104 + /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */ 105 + svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) | 106 + BIT_ULL(INTERCEPT_VMLOAD)); 107 + 108 + /* 109 + * Without VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be 110 + * interpreted as an L1 GPA, so VMCB0 should be used. 111 + */ 112 + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0; 113 + svm->vmcb->control.virt_ext &= ~VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; 114 + run_guest(svm->vmcb, svm->vmcb_gpa); 115 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); 116 + 117 + /* 118 + * With VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK, the GPA will be interpeted as 119 + * an L2 GPA, and translated through the NPT to VMCB1. 120 + */ 121 + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1; 122 + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; 123 + run_guest(svm->vmcb, svm->vmcb_gpa); 124 + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); 125 + 126 + GUEST_DONE(); 127 + } 128 + 129 + int main(int argc, char *argv[]) 130 + { 131 + vm_vaddr_t nested_gva = 0; 132 + struct vmcb *test_vmcb[2]; 133 + struct kvm_vcpu *vcpu; 134 + struct kvm_vm *vm; 135 + int i; 136 + 137 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); 138 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT)); 139 + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD)); 140 + 141 + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 142 + vm_enable_tdp(vm); 143 + 144 + vcpu_alloc_svm(vm, &nested_gva); 145 + vcpu_args_set(vcpu, 1, nested_gva); 146 + 147 + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 148 + TEST_MEM_BASE, TEST_MEM_SLOT_INDEX, 149 + TEST_MEM_PAGES, 0); 150 + 151 + for (i = 0; i <= 1; i++) { 152 + virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1); 153 + test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i)); 154 + } 155 + 156 + tdp_identity_map_default_memslots(vm); 157 + 158 + /* 159 + * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing 160 + * whether the L2 GPA is interpreted as an L1 GPA or translated through 161 + * the NPT. 162 + */ 163 + TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0)); 164 + tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE); 165 + 166 + for (;;) { 167 + struct ucall uc; 168 + 169 + vcpu_run(vcpu); 170 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 171 + 172 + switch (get_ucall(vcpu, &uc)) { 173 + case UCALL_ABORT: 174 + REPORT_GUEST_ASSERT(uc); 175 + case UCALL_SYNC: 176 + i = uc.args[1]; 177 + TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i); 178 + 179 + /* 180 + * Check that only the expected VMCB has KERNEL_GS_BASE 181 + * set to 0xaaaa, and update it to 0xbbbb. 182 + */ 183 + TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa); 184 + TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0); 185 + test_vmcb[i]->save.kernel_gs_base = 0xbbbb; 186 + break; 187 + case UCALL_DONE: 188 + goto done; 189 + default: 190 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 191 + } 192 + } 193 + 194 + done: 195 + kvm_vm_free(vm); 196 + return 0; 197 + }
+1 -3
tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
··· 47 47 struct kvm_vcpu *vcpu; 48 48 struct kvm_vm *vm; 49 49 struct ucall uc; 50 - uint64_t *pte; 51 50 uint64_t *hva; 52 51 uint64_t gpa; 53 52 int rc; ··· 72 73 hva = addr_gpa2hva(vm, MEM_REGION_GPA); 73 74 memset(hva, 0, PAGE_SIZE); 74 75 75 - pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); 76 - *pte |= BIT_ULL(MAXPHYADDR); 76 + *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR); 77 77 78 78 vcpu_run(vcpu); 79 79
-179
tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * KVM dirty page logging test 4 - * 5 - * Copyright (C) 2018, Red Hat, Inc. 6 - */ 7 - #include <stdio.h> 8 - #include <stdlib.h> 9 - #include <linux/bitmap.h> 10 - #include <linux/bitops.h> 11 - 12 - #include "test_util.h" 13 - #include "kvm_util.h" 14 - #include "processor.h" 15 - #include "vmx.h" 16 - 17 - /* The memory slot index to track dirty pages */ 18 - #define TEST_MEM_SLOT_INDEX 1 19 - #define TEST_MEM_PAGES 3 20 - 21 - /* L1 guest test virtual memory offset */ 22 - #define GUEST_TEST_MEM 0xc0000000 23 - 24 - /* L2 guest test virtual memory offset */ 25 - #define NESTED_TEST_MEM1 0xc0001000 26 - #define NESTED_TEST_MEM2 0xc0002000 27 - 28 - static void l2_guest_code(u64 *a, u64 *b) 29 - { 30 - READ_ONCE(*a); 31 - WRITE_ONCE(*a, 1); 32 - GUEST_SYNC(true); 33 - GUEST_SYNC(false); 34 - 35 - WRITE_ONCE(*b, 1); 36 - GUEST_SYNC(true); 37 - WRITE_ONCE(*b, 1); 38 - GUEST_SYNC(true); 39 - GUEST_SYNC(false); 40 - 41 - /* Exit to L1 and never come back. */ 42 - vmcall(); 43 - } 44 - 45 - static void l2_guest_code_ept_enabled(void) 46 - { 47 - l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); 48 - } 49 - 50 - static void l2_guest_code_ept_disabled(void) 51 - { 52 - /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ 53 - l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); 54 - } 55 - 56 - void l1_guest_code(struct vmx_pages *vmx) 57 - { 58 - #define L2_GUEST_STACK_SIZE 64 59 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 60 - void *l2_rip; 61 - 62 - GUEST_ASSERT(vmx->vmcs_gpa); 63 - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); 64 - GUEST_ASSERT(load_vmcs(vmx)); 65 - 66 - if (vmx->eptp_gpa) 67 - l2_rip = l2_guest_code_ept_enabled; 68 - else 69 - l2_rip = l2_guest_code_ept_disabled; 70 - 71 - prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); 72 - 73 - GUEST_SYNC(false); 74 - GUEST_ASSERT(!vmlaunch()); 75 - GUEST_SYNC(false); 76 - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 77 - GUEST_DONE(); 78 - } 79 - 80 - static void test_vmx_dirty_log(bool enable_ept) 81 - { 82 - vm_vaddr_t vmx_pages_gva = 0; 83 - struct vmx_pages *vmx; 84 - unsigned long *bmap; 85 - uint64_t *host_test_mem; 86 - 87 - struct kvm_vcpu *vcpu; 88 - struct kvm_vm *vm; 89 - struct ucall uc; 90 - bool done = false; 91 - 92 - pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); 93 - 94 - /* Create VM */ 95 - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); 96 - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); 97 - vcpu_args_set(vcpu, 1, vmx_pages_gva); 98 - 99 - /* Add an extra memory slot for testing dirty logging */ 100 - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 101 - GUEST_TEST_MEM, 102 - TEST_MEM_SLOT_INDEX, 103 - TEST_MEM_PAGES, 104 - KVM_MEM_LOG_DIRTY_PAGES); 105 - 106 - /* 107 - * Add an identity map for GVA range [0xc0000000, 0xc0002000). This 108 - * affects both L1 and L2. However... 109 - */ 110 - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); 111 - 112 - /* 113 - * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to 114 - * 0xc0000000. 115 - * 116 - * Note that prepare_eptp should be called only L1's GPA map is done, 117 - * meaning after the last call to virt_map. 118 - * 119 - * When EPT is disabled, the L2 guest code will still access the same L1 120 - * GPAs as the EPT enabled case. 121 - */ 122 - if (enable_ept) { 123 - prepare_eptp(vmx, vm); 124 - nested_map_memslot(vmx, vm, 0); 125 - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); 126 - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); 127 - } 128 - 129 - bmap = bitmap_zalloc(TEST_MEM_PAGES); 130 - host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); 131 - 132 - while (!done) { 133 - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); 134 - vcpu_run(vcpu); 135 - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 136 - 137 - switch (get_ucall(vcpu, &uc)) { 138 - case UCALL_ABORT: 139 - REPORT_GUEST_ASSERT(uc); 140 - /* NOT REACHED */ 141 - case UCALL_SYNC: 142 - /* 143 - * The nested guest wrote at offset 0x1000 in the memslot, but the 144 - * dirty bitmap must be filled in according to L1 GPA, not L2. 145 - */ 146 - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); 147 - if (uc.args[1]) { 148 - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); 149 - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); 150 - } else { 151 - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); 152 - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); 153 - } 154 - 155 - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); 156 - TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); 157 - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); 158 - TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); 159 - break; 160 - case UCALL_DONE: 161 - done = true; 162 - break; 163 - default: 164 - TEST_FAIL("Unknown ucall %lu", uc.cmd); 165 - } 166 - } 167 - } 168 - 169 - int main(int argc, char *argv[]) 170 - { 171 - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 172 - 173 - test_vmx_dirty_log(/*enable_ept=*/false); 174 - 175 - if (kvm_cpu_has_ept()) 176 - test_vmx_dirty_log(/*enable_ept=*/true); 177 - 178 - return 0; 179 - }
+1 -1
tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
··· 90 90 * L1 needs to read its own PML5 table to set up L2. Identity map 91 91 * the PML5 table to facilitate this. 92 92 */ 93 - virt_map(vm, vm->pgd, vm->pgd, 1); 93 + virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1); 94 94 95 95 vcpu_alloc_vmx(vm, &vmx_pages_gva); 96 96 vcpu_args_set(vcpu, 1, vmx_pages_gva);
+276
tools/testing/selftests/kvm/x86/xapic_tpr_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <fcntl.h> 3 + #include <stdatomic.h> 4 + #include <stdio.h> 5 + #include <stdlib.h> 6 + #include <string.h> 7 + #include <sys/ioctl.h> 8 + #include <unistd.h> 9 + 10 + #include "apic.h" 11 + #include "kvm_util.h" 12 + #include "processor.h" 13 + #include "test_util.h" 14 + 15 + static bool is_x2apic; 16 + 17 + #define IRQ_VECTOR 0x20 18 + 19 + /* See also the comment at similar assertion in memslot_perf_test.c */ 20 + static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); 21 + 22 + static atomic_uint tpr_guest_irq_sync_val; 23 + 24 + static void tpr_guest_irq_sync_flag_reset(void) 25 + { 26 + atomic_store_explicit(&tpr_guest_irq_sync_val, 0, 27 + memory_order_release); 28 + } 29 + 30 + static unsigned int tpr_guest_irq_sync_val_get(void) 31 + { 32 + return atomic_load_explicit(&tpr_guest_irq_sync_val, 33 + memory_order_acquire); 34 + } 35 + 36 + static void tpr_guest_irq_sync_val_inc(void) 37 + { 38 + atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1, 39 + memory_order_acq_rel); 40 + } 41 + 42 + static void tpr_guest_irq_handler_xapic(struct ex_regs *regs) 43 + { 44 + tpr_guest_irq_sync_val_inc(); 45 + 46 + xapic_write_reg(APIC_EOI, 0); 47 + } 48 + 49 + static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs) 50 + { 51 + tpr_guest_irq_sync_val_inc(); 52 + 53 + x2apic_write_reg(APIC_EOI, 0); 54 + } 55 + 56 + static void tpr_guest_irq_queue(void) 57 + { 58 + if (is_x2apic) { 59 + x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR); 60 + } else { 61 + uint32_t icr, icr2; 62 + 63 + icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | 64 + IRQ_VECTOR; 65 + icr2 = 0; 66 + 67 + xapic_write_reg(APIC_ICR2, icr2); 68 + xapic_write_reg(APIC_ICR, icr); 69 + } 70 + } 71 + 72 + static uint8_t tpr_guest_tpr_get(void) 73 + { 74 + uint32_t taskpri; 75 + 76 + if (is_x2apic) 77 + taskpri = x2apic_read_reg(APIC_TASKPRI); 78 + else 79 + taskpri = xapic_read_reg(APIC_TASKPRI); 80 + 81 + return GET_APIC_PRI(taskpri); 82 + } 83 + 84 + static uint8_t tpr_guest_ppr_get(void) 85 + { 86 + uint32_t procpri; 87 + 88 + if (is_x2apic) 89 + procpri = x2apic_read_reg(APIC_PROCPRI); 90 + else 91 + procpri = xapic_read_reg(APIC_PROCPRI); 92 + 93 + return GET_APIC_PRI(procpri); 94 + } 95 + 96 + static uint8_t tpr_guest_cr8_get(void) 97 + { 98 + uint64_t cr8; 99 + 100 + asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8)); 101 + 102 + return cr8 & GENMASK(3, 0); 103 + } 104 + 105 + static void tpr_guest_check_tpr_ppr_cr8_equal(void) 106 + { 107 + uint8_t tpr; 108 + 109 + tpr = tpr_guest_tpr_get(); 110 + 111 + GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr); 112 + GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr); 113 + } 114 + 115 + static void tpr_guest_code(void) 116 + { 117 + cli(); 118 + 119 + if (is_x2apic) 120 + x2apic_enable(); 121 + else 122 + xapic_enable(); 123 + 124 + GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0); 125 + tpr_guest_check_tpr_ppr_cr8_equal(); 126 + 127 + tpr_guest_irq_queue(); 128 + 129 + /* TPR = 0 but IRQ masked by IF=0, should not fire */ 130 + udelay(1000); 131 + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0); 132 + 133 + sti(); 134 + 135 + /* IF=1 now, IRQ should fire */ 136 + while (tpr_guest_irq_sync_val_get() == 0) 137 + cpu_relax(); 138 + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); 139 + 140 + GUEST_SYNC(true); 141 + tpr_guest_check_tpr_ppr_cr8_equal(); 142 + 143 + tpr_guest_irq_queue(); 144 + 145 + /* IRQ masked by barely high enough TPR now, should not fire */ 146 + udelay(1000); 147 + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); 148 + 149 + GUEST_SYNC(false); 150 + tpr_guest_check_tpr_ppr_cr8_equal(); 151 + 152 + /* TPR barely low enough now to unmask IRQ, should fire */ 153 + while (tpr_guest_irq_sync_val_get() == 1) 154 + cpu_relax(); 155 + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2); 156 + 157 + GUEST_DONE(); 158 + } 159 + 160 + static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic) 161 + { 162 + return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI])); 163 + } 164 + 165 + static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val) 166 + { 167 + u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI]; 168 + 169 + *taskpri = SET_APIC_PRI(*taskpri, val); 170 + } 171 + 172 + static uint8_t sregs_tpr(struct kvm_sregs *sregs) 173 + { 174 + return sregs->cr8 & GENMASK(3, 0); 175 + } 176 + 177 + static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu) 178 + { 179 + struct kvm_lapic_state xapic; 180 + 181 + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); 182 + 183 + TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0); 184 + } 185 + 186 + static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu) 187 + { 188 + struct kvm_sregs sregs; 189 + struct kvm_lapic_state xapic; 190 + 191 + vcpu_sregs_get(vcpu, &sregs); 192 + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); 193 + 194 + TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic)); 195 + } 196 + 197 + static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask) 198 + { 199 + struct kvm_lapic_state xapic; 200 + uint8_t tpr; 201 + 202 + static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number"); 203 + tpr = IRQ_VECTOR / 16; 204 + if (!mask) 205 + tpr--; 206 + 207 + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); 208 + lapic_tpr_set(&xapic, tpr); 209 + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); 210 + } 211 + 212 + static void test_tpr(bool __is_x2apic) 213 + { 214 + struct kvm_vcpu *vcpu; 215 + struct kvm_vm *vm; 216 + bool done = false; 217 + 218 + is_x2apic = __is_x2apic; 219 + 220 + vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code); 221 + if (is_x2apic) { 222 + vm_install_exception_handler(vm, IRQ_VECTOR, 223 + tpr_guest_irq_handler_x2apic); 224 + } else { 225 + vm_install_exception_handler(vm, IRQ_VECTOR, 226 + tpr_guest_irq_handler_xapic); 227 + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC); 228 + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 229 + } 230 + 231 + sync_global_to_guest(vcpu->vm, is_x2apic); 232 + 233 + /* According to the SDM/APM the TPR value at reset is 0 */ 234 + test_tpr_check_tpr_zero(vcpu); 235 + test_tpr_check_tpr_cr8_equal(vcpu); 236 + 237 + tpr_guest_irq_sync_flag_reset(); 238 + sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val); 239 + 240 + while (!done) { 241 + struct ucall uc; 242 + 243 + alarm(2); 244 + vcpu_run(vcpu); 245 + alarm(0); 246 + 247 + switch (get_ucall(vcpu, &uc)) { 248 + case UCALL_ABORT: 249 + REPORT_GUEST_ASSERT(uc); 250 + break; 251 + case UCALL_DONE: 252 + test_tpr_check_tpr_cr8_equal(vcpu); 253 + done = true; 254 + break; 255 + case UCALL_SYNC: 256 + test_tpr_check_tpr_cr8_equal(vcpu); 257 + test_tpr_set_tpr_for_irq(vcpu, uc.args[1]); 258 + break; 259 + default: 260 + TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd); 261 + break; 262 + } 263 + } 264 + kvm_vm_free(vm); 265 + } 266 + 267 + int main(int argc, char *argv[]) 268 + { 269 + /* 270 + * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can 271 + * be fully hidden from the guest. KVM disallows changing CPUID after 272 + * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. 273 + */ 274 + test_tpr(false); 275 + test_tpr(true); 276 + }