Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvmarm-6.19' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 6.19

- Support for userspace handling of synchronous external aborts (SEAs),
allowing the VMM to potentially handle the abort in a non-fatal
manner.

- Large rework of the VGIC's list register handling with the goal of
supporting more active/pending IRQs than available list registers in
hardware. In addition, the VGIC now supports EOImode==1 style
deactivations for IRQs which may occur on a separate vCPU than the
one that acked the IRQ.

- Support for FEAT_XNX (user / privileged execute permissions) and
FEAT_HAF (hardware update to the Access Flag) in the software page
table walkers and shadow MMU.

- Allow page table destruction to reschedule, fixing long need_resched
latencies observed when destroying a large VM.

- Minor fixes to KVM and selftests

+2586 -542
+47
Documentation/virt/kvm/api.rst
··· 7286 7286 it will enter with output fields already valid; in the common case, the 7287 7287 ``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``. 7288 7288 Userspace need not do anything if it does not wish to support a TDVMCALL. 7289 + 7290 + :: 7291 + 7292 + /* KVM_EXIT_ARM_SEA */ 7293 + struct { 7294 + #define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) 7295 + __u64 flags; 7296 + __u64 esr; 7297 + __u64 gva; 7298 + __u64 gpa; 7299 + } arm_sea; 7300 + 7301 + Used on arm64 systems. When the VM capability ``KVM_CAP_ARM_SEA_TO_USER`` is 7302 + enabled, a KVM exits to userspace if a guest access causes a synchronous 7303 + external abort (SEA) and the host APEI fails to handle the SEA. 7304 + 7305 + ``esr`` is set to a sanitized value of ESR_EL2 from the exception taken to KVM, 7306 + consisting of the following fields: 7307 + 7308 + - ``ESR_EL2.EC`` 7309 + - ``ESR_EL2.IL`` 7310 + - ``ESR_EL2.FnV`` 7311 + - ``ESR_EL2.EA`` 7312 + - ``ESR_EL2.CM`` 7313 + - ``ESR_EL2.WNR`` 7314 + - ``ESR_EL2.FSC`` 7315 + - ``ESR_EL2.SET`` (when FEAT_RAS is implemented for the VM) 7316 + 7317 + ``gva`` is set to the value of FAR_EL2 from the exception taken to KVM when 7318 + ``ESR_EL2.FnV == 0``. Otherwise, the value of ``gva`` is unknown. 7319 + 7320 + ``gpa`` is set to the faulting IPA from the exception taken to KVM when 7321 + the ``KVM_EXIT_ARM_SEA_FLAG_GPA_VALID`` flag is set. Otherwise, the value of 7322 + ``gpa`` is unknown. 7323 + 7289 7324 :: 7290 7325 7291 7326 /* Fix the size of the union. */ ··· 8737 8702 This capability indicate to the userspace whether a PFNMAP memory region 8738 8703 can be safely mapped as cacheable. This relies on the presence of 8739 8704 force write back (FWB) feature support on the hardware. 8705 + 8706 + 7.45 KVM_CAP_ARM_SEA_TO_USER 8707 + ---------------------------- 8708 + 8709 + :Architecture: arm64 8710 + :Target: VM 8711 + :Parameters: none 8712 + :Returns: 0 on success, -EINVAL if unsupported. 8713 + 8714 + When this capability is enabled, KVM may exit to userspace for SEAs taken to 8715 + EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more 8716 + information. 8740 8717 8741 8718 8. Other capabilities. 8742 8719 ======================
+1
arch/arm64/include/asm/kvm_arm.h
··· 111 111 #define TCR_EL2_DS (1UL << 32) 112 112 #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) 113 113 #define TCR_EL2_HPD (1 << 24) 114 + #define TCR_EL2_HA (1 << 21) 114 115 #define TCR_EL2_TBI (1 << 20) 115 116 #define TCR_EL2_PS_SHIFT 16 116 117 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
+4 -4
arch/arm64/include/asm/kvm_asm.h
··· 79 79 __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, 80 80 __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, 81 81 __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, 82 - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, 82 + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, 83 83 __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, 84 84 __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, 85 85 __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, ··· 246 246 extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); 247 247 248 248 extern void __kvm_timer_set_cntvoff(u64 cntvoff); 249 - extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 250 - extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 251 - extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 249 + extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 250 + extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 251 + extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); 252 252 253 253 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 254 254
+3
arch/arm64/include/asm/kvm_host.h
··· 54 54 #define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) 55 55 #define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9) 56 56 #define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10) 57 + #define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11) 57 58 58 59 #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ 59 60 KVM_DIRTY_LOG_INITIALLY_SET) ··· 351 350 #define KVM_ARCH_FLAG_GUEST_HAS_SVE 9 352 351 /* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */ 353 352 #define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10 353 + /* Unhandled SEAs are taken to userspace */ 354 + #define KVM_ARCH_FLAG_EXIT_SEA 11 354 355 unsigned long flags; 355 356 356 357 /* VM-wide vCPU feature set */
+2 -1
arch/arm64/include/asm/kvm_hyp.h
··· 77 77 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); 78 78 79 79 u64 __gic_v3_get_lr(unsigned int lr); 80 + void __gic_v3_set_lr(u64 val, int lr); 80 81 81 82 void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); 82 83 void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); 83 84 void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); 84 85 void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); 85 - void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); 86 + void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); 86 87 void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); 87 88 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); 88 89
+38 -2
arch/arm64/include/asm/kvm_nested.h
··· 120 120 return trans->writable; 121 121 } 122 122 123 - static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) 123 + static inline bool kvm_has_xnx(struct kvm *kvm) 124 124 { 125 - return !(trans->desc & BIT(54)); 125 + return cpus_have_final_cap(ARM64_HAS_XNX) && 126 + kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP); 127 + } 128 + 129 + static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans) 130 + { 131 + u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); 132 + 133 + if (!kvm_has_xnx(kvm)) 134 + xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); 135 + 136 + switch (xn) { 137 + case 0b00: 138 + case 0b01: 139 + return true; 140 + default: 141 + return false; 142 + } 143 + } 144 + 145 + static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans) 146 + { 147 + u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc); 148 + 149 + if (!kvm_has_xnx(kvm)) 150 + xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10); 151 + 152 + switch (xn) { 153 + case 0b00: 154 + case 0b11: 155 + return true; 156 + default: 157 + return false; 158 + } 126 159 } 127 160 128 161 extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, ··· 353 320 bool be; 354 321 bool s2; 355 322 bool pa52bit; 323 + bool ha; 356 324 }; 357 325 358 326 struct s1_walk_result { ··· 403 369 BUG_ON(__c >= NR_CPUS); \ 404 370 (FIX_VNCR - __c); \ 405 371 }) 372 + 373 + int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new); 406 374 407 375 #endif /* __ARM64_KVM_NESTED_H */
+42 -7
arch/arm64/include/asm/kvm_pgtable.h
··· 89 89 90 90 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 91 91 92 - #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 92 + #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) 93 93 94 94 #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) 95 95 ··· 240 240 241 241 /** 242 242 * enum kvm_pgtable_prot - Page-table permissions and attributes. 243 - * @KVM_PGTABLE_PROT_X: Execute permission. 243 + * @KVM_PGTABLE_PROT_UX: Unprivileged execute permission. 244 + * @KVM_PGTABLE_PROT_PX: Privileged execute permission. 245 + * @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission. 244 246 * @KVM_PGTABLE_PROT_W: Write permission. 245 247 * @KVM_PGTABLE_PROT_R: Read permission. 246 248 * @KVM_PGTABLE_PROT_DEVICE: Device attributes. ··· 253 251 * @KVM_PGTABLE_PROT_SW3: Software bit 3. 254 252 */ 255 253 enum kvm_pgtable_prot { 256 - KVM_PGTABLE_PROT_X = BIT(0), 257 - KVM_PGTABLE_PROT_W = BIT(1), 258 - KVM_PGTABLE_PROT_R = BIT(2), 254 + KVM_PGTABLE_PROT_PX = BIT(0), 255 + KVM_PGTABLE_PROT_UX = BIT(1), 256 + KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX | 257 + KVM_PGTABLE_PROT_UX, 258 + KVM_PGTABLE_PROT_W = BIT(2), 259 + KVM_PGTABLE_PROT_R = BIT(3), 259 260 260 - KVM_PGTABLE_PROT_DEVICE = BIT(3), 261 - KVM_PGTABLE_PROT_NORMAL_NC = BIT(4), 261 + KVM_PGTABLE_PROT_DEVICE = BIT(4), 262 + KVM_PGTABLE_PROT_NORMAL_NC = BIT(5), 262 263 263 264 KVM_PGTABLE_PROT_SW0 = BIT(55), 264 265 KVM_PGTABLE_PROT_SW1 = BIT(56), ··· 360 355 return pteref; 361 356 } 362 357 358 + static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) 359 + { 360 + return pteref; 361 + } 362 + 363 363 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) 364 364 { 365 365 /* ··· 392 382 kvm_pteref_t pteref) 393 383 { 394 384 return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); 385 + } 386 + 387 + static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) 388 + { 389 + return rcu_dereference_raw(pteref); 395 390 } 396 391 397 392 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) ··· 565 550 * to freeing and therefore no TLB invalidation is performed. 566 551 */ 567 552 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 553 + 554 + /** 555 + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. 556 + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 557 + * @addr: Intermediate physical address at which to place the mapping. 558 + * @size: Size of the mapping. 559 + * 560 + * The page-table is assumed to be unreachable by any hardware walkers prior 561 + * to freeing and therefore no TLB invalidation is performed. 562 + */ 563 + void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 564 + u64 addr, u64 size); 565 + 566 + /** 567 + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. 568 + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 569 + * 570 + * It is assumed that the rest of the page-table is freed before this operation. 571 + */ 572 + void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); 568 573 569 574 /** 570 575 * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
+3 -1
arch/arm64/include/asm/kvm_pkvm.h
··· 180 180 181 181 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 182 182 struct kvm_pgtable_mm_ops *mm_ops); 183 - void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 183 + void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 184 + u64 addr, u64 size); 185 + void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); 184 186 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 185 187 enum kvm_pgtable_prot prot, void *mc, 186 188 enum kvm_pgtable_walk_flags flags);
+6 -1
arch/arm64/include/asm/virt.h
··· 40 40 */ 41 41 #define HVC_FINALISE_EL2 3 42 42 43 + /* 44 + * HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value 45 + */ 46 + #define HVC_GET_ICH_VTR_EL2 4 47 + 43 48 /* Max number of HYP stub hypercalls */ 44 - #define HVC_STUB_HCALL_NR 4 49 + #define HVC_STUB_HCALL_NR 5 45 50 46 51 /* Error returned when an invalid stub number is passed into x0 */ 47 52 #define HVC_STUB_ERR 0xbadca11
+59
arch/arm64/kernel/cpufeature.c
··· 2304 2304 } 2305 2305 #endif 2306 2306 2307 + static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, 2308 + int scope) 2309 + { 2310 + static const struct midr_range has_vgic_v3[] = { 2311 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), 2312 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), 2313 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), 2314 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), 2315 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), 2316 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), 2317 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), 2318 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), 2319 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), 2320 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), 2321 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), 2322 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), 2323 + {}, 2324 + }; 2325 + struct arm_smccc_res res = {}; 2326 + 2327 + BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF); 2328 + BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY); 2329 + if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && 2330 + !is_midr_in_range_list(has_vgic_v3)) 2331 + return false; 2332 + 2333 + if (!is_hyp_mode_available()) 2334 + return false; 2335 + 2336 + if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY)) 2337 + return true; 2338 + 2339 + if (is_kernel_in_hyp_mode()) 2340 + res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); 2341 + else 2342 + arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res); 2343 + 2344 + if (res.a0 == HVC_STUB_ERR) 2345 + return false; 2346 + 2347 + return res.a1 & ICH_VTR_EL2_TDS; 2348 + } 2349 + 2307 2350 #ifdef CONFIG_ARM64_BTI 2308 2351 static void bti_enable(const struct arm64_cpu_capabilities *__unused) 2309 2352 { ··· 2858 2815 .matches = has_gic_prio_relaxed_sync, 2859 2816 }, 2860 2817 #endif 2818 + { 2819 + /* 2820 + * Depends on having GICv3 2821 + */ 2822 + .desc = "ICV_DIR_EL1 trapping", 2823 + .capability = ARM64_HAS_ICH_HCR_EL2_TDIR, 2824 + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, 2825 + .matches = can_trap_icv_dir_el1, 2826 + }, 2861 2827 #ifdef CONFIG_ARM64_E0PD 2862 2828 { 2863 2829 .desc = "E0PD", ··· 3140 3088 .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, 3141 3089 .capability = ARM64_HAS_GICV5_LEGACY, 3142 3090 .matches = test_has_gicv5_legacy, 3091 + }, 3092 + { 3093 + .desc = "XNX", 3094 + .capability = ARM64_HAS_XNX, 3095 + .type = ARM64_CPUCAP_SYSTEM_FEATURE, 3096 + .matches = has_cpuid_feature, 3097 + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP) 3143 3098 }, 3144 3099 {}, 3145 3100 };
+5
arch/arm64/kernel/hyp-stub.S
··· 54 54 1: cmp x0, #HVC_FINALISE_EL2 55 55 b.eq __finalise_el2 56 56 57 + cmp x0, #HVC_GET_ICH_VTR_EL2 58 + b.ne 2f 59 + mrs_s x1, SYS_ICH_VTR_EL2 60 + b 9f 61 + 57 62 2: cmp x0, #HVC_SOFT_RESTART 58 63 b.ne 3f 59 64 mov x0, x2
+1
arch/arm64/kernel/image-vars.h
··· 91 91 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); 92 92 KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); 93 93 KVM_NVHE_ALIAS(alt_cb_patch_nops); 94 + KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits); 94 95 95 96 /* Global kernel state accessed by nVHE hyp code. */ 96 97 KVM_NVHE_ALIAS(kvm_vgic_global_state);
+11 -3
arch/arm64/kvm/arm.c
··· 132 132 } 133 133 mutex_unlock(&kvm->lock); 134 134 break; 135 + case KVM_CAP_ARM_SEA_TO_USER: 136 + r = 0; 137 + set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags); 138 + break; 135 139 default: 136 140 break; 137 141 } ··· 331 327 case KVM_CAP_IRQFD_RESAMPLE: 332 328 case KVM_CAP_COUNTER_OFFSET: 333 329 case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: 330 + case KVM_CAP_ARM_SEA_TO_USER: 334 331 r = 1; 335 332 break; 336 333 case KVM_CAP_SET_GUEST_DEBUG2: ··· 445 440 if (!has_vhe()) 446 441 return kzalloc(sz, GFP_KERNEL_ACCOUNT); 447 442 448 - return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO); 443 + return kvzalloc(sz, GFP_KERNEL_ACCOUNT); 449 444 } 450 445 451 446 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) ··· 664 659 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 665 660 { 666 661 if (is_protected_kvm_enabled()) { 667 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, 668 - &vcpu->arch.vgic_cpu.vgic_v3); 662 + kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3); 669 663 kvm_call_hyp_nvhe(__pkvm_vcpu_put); 670 664 } 671 665 ··· 1045 1041 * that a VCPU sees new virtual interrupts. 1046 1042 */ 1047 1043 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); 1044 + 1045 + /* Process interrupts deactivated through a trap */ 1046 + if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu)) 1047 + kvm_vgic_process_async_update(vcpu); 1048 1048 1049 1049 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) 1050 1050 kvm_update_stolen_time(vcpu);
+176 -20
arch/arm64/kvm/at.c
··· 346 346 347 347 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 348 348 349 + wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF); 350 + wi->ha &= (wi->regime == TR_EL2 ? 351 + FIELD_GET(TCR_EL2_HA, tcr) : 352 + FIELD_GET(TCR_HA, tcr)); 353 + 349 354 return 0; 350 355 351 356 addrsz: ··· 367 362 return -EFAULT; 368 363 } 369 364 365 + static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc, 366 + struct s1_walk_info *wi) 367 + { 368 + u64 val; 369 + int r; 370 + 371 + r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val)); 372 + if (r) 373 + return r; 374 + 375 + if (wi->be) 376 + *desc = be64_to_cpu((__force __be64)val); 377 + else 378 + *desc = le64_to_cpu((__force __le64)val); 379 + 380 + return 0; 381 + } 382 + 383 + static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new, 384 + struct s1_walk_info *wi) 385 + { 386 + if (wi->be) { 387 + old = (__force u64)cpu_to_be64(old); 388 + new = (__force u64)cpu_to_be64(new); 389 + } else { 390 + old = (__force u64)cpu_to_le64(old); 391 + new = (__force u64)cpu_to_le64(new); 392 + } 393 + 394 + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); 395 + } 396 + 370 397 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 371 398 struct s1_walk_result *wr, u64 va) 372 399 { 373 - u64 va_top, va_bottom, baddr, desc; 400 + u64 va_top, va_bottom, baddr, desc, new_desc, ipa; 374 401 int level, stride, ret; 375 402 376 403 level = wi->sl; ··· 412 375 va_top = get_ia_size(wi) - 1; 413 376 414 377 while (1) { 415 - u64 index, ipa; 378 + u64 index; 416 379 417 380 va_bottom = (3 - level) * stride + wi->pgshift; 418 381 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); ··· 451 414 return ret; 452 415 } 453 416 454 - ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 417 + ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi); 455 418 if (ret) { 456 419 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); 457 420 return ret; 458 421 } 459 422 460 - if (wi->be) 461 - desc = be64_to_cpu((__force __be64)desc); 462 - else 463 - desc = le64_to_cpu((__force __le64)desc); 423 + new_desc = desc; 464 424 465 425 /* Invalid descriptor */ 466 426 if (!(desc & BIT(0))) ··· 510 476 baddr = desc_to_oa(wi, desc); 511 477 if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) 512 478 goto addrsz; 479 + 480 + if (wi->ha) 481 + new_desc |= PTE_AF; 482 + 483 + if (new_desc != desc) { 484 + ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); 485 + if (ret) 486 + return ret; 487 + 488 + desc = new_desc; 489 + } 513 490 514 491 if (!(desc & PTE_AF)) { 515 492 fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); ··· 1266 1221 wr->pr &= !pan; 1267 1222 } 1268 1223 1269 - static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1224 + static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par) 1270 1225 { 1271 1226 struct s1_walk_result wr = {}; 1272 1227 struct s1_walk_info wi = {}; ··· 1291 1246 1292 1247 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1293 1248 1249 + /* 1250 + * Race to update a descriptor -- restart the walk. 1251 + */ 1252 + if (ret == -EAGAIN) 1253 + return ret; 1294 1254 if (ret) 1295 1255 goto compute_par; 1296 1256 ··· 1329 1279 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); 1330 1280 1331 1281 compute_par: 1332 - return compute_par_s1(vcpu, &wi, &wr); 1282 + *par = compute_par_s1(vcpu, &wi, &wr); 1283 + return 0; 1333 1284 } 1334 1285 1335 1286 /* ··· 1458 1407 !(par & SYS_PAR_EL1_S)); 1459 1408 } 1460 1409 1461 - void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1410 + int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1462 1411 { 1463 1412 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1413 + int ret; 1464 1414 1465 1415 /* 1466 1416 * If PAR_EL1 reports that AT failed on a S1 permission or access ··· 1473 1421 */ 1474 1422 if ((par & SYS_PAR_EL1_F) && 1475 1423 !par_check_s1_perm_fault(par) && 1476 - !par_check_s1_access_fault(par)) 1477 - par = handle_at_slow(vcpu, op, vaddr); 1424 + !par_check_s1_access_fault(par)) { 1425 + ret = handle_at_slow(vcpu, op, vaddr, &par); 1426 + if (ret) 1427 + return ret; 1428 + } 1478 1429 1479 1430 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1431 + return 0; 1480 1432 } 1481 1433 1482 - void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1434 + int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1483 1435 { 1484 1436 u64 par; 1437 + int ret; 1485 1438 1486 1439 /* 1487 1440 * We've trapped, so everything is live on the CPU. As we will be ··· 1533 1476 } 1534 1477 1535 1478 /* We failed the translation, let's replay it in slow motion */ 1536 - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1537 - par = handle_at_slow(vcpu, op, vaddr); 1479 + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) { 1480 + ret = handle_at_slow(vcpu, op, vaddr, &par); 1481 + if (ret) 1482 + return ret; 1483 + } 1538 1484 1539 1485 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1486 + return 0; 1540 1487 } 1541 1488 1542 - void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1489 + int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1543 1490 { 1544 1491 struct kvm_s2_trans out = {}; 1545 1492 u64 ipa, par; ··· 1570 1509 break; 1571 1510 default: 1572 1511 WARN_ON_ONCE(1); 1573 - return; 1512 + return 0; 1574 1513 } 1575 1514 1576 1515 __kvm_at_s1e01(vcpu, op, vaddr); 1577 1516 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1578 1517 if (par & SYS_PAR_EL1_F) 1579 - return; 1518 + return 0; 1580 1519 1581 1520 /* 1582 1521 * If we only have a single stage of translation (EL2&0), exit ··· 1584 1523 */ 1585 1524 if (compute_translation_regime(vcpu, op) == TR_EL20 || 1586 1525 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1587 - return; 1526 + return 0; 1588 1527 1589 1528 /* Do the stage-2 translation */ 1590 1529 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1591 1530 out.esr = 0; 1592 1531 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1593 1532 if (ret < 0) 1594 - return; 1533 + return ret; 1595 1534 1596 1535 /* Check the access permission */ 1597 1536 if (!out.esr && ··· 1600 1539 1601 1540 par = compute_par_s12(vcpu, par, &out); 1602 1541 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1542 + return 0; 1603 1543 } 1604 1544 1605 1545 /* ··· 1698 1636 /* Any other error... */ 1699 1637 return ret; 1700 1638 } 1639 + } 1640 + 1641 + #ifdef CONFIG_ARM64_LSE_ATOMICS 1642 + static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) 1643 + { 1644 + u64 tmp = old; 1645 + int ret = 0; 1646 + 1647 + uaccess_enable_privileged(); 1648 + 1649 + asm volatile(__LSE_PREAMBLE 1650 + "1: cas %[old], %[new], %[addr]\n" 1651 + "2:\n" 1652 + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret]) 1653 + : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret) 1654 + : [new] "r" (new) 1655 + : "memory"); 1656 + 1657 + uaccess_disable_privileged(); 1658 + 1659 + if (ret) 1660 + return ret; 1661 + if (tmp != old) 1662 + return -EAGAIN; 1663 + 1664 + return ret; 1665 + } 1666 + #else 1667 + static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new) 1668 + { 1669 + return -EINVAL; 1670 + } 1671 + #endif 1672 + 1673 + static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new) 1674 + { 1675 + int ret = 1; 1676 + u64 tmp; 1677 + 1678 + uaccess_enable_privileged(); 1679 + 1680 + asm volatile("prfm pstl1strm, %[addr]\n" 1681 + "1: ldxr %[tmp], %[addr]\n" 1682 + "sub %[tmp], %[tmp], %[old]\n" 1683 + "cbnz %[tmp], 3f\n" 1684 + "2: stlxr %w[ret], %[new], %[addr]\n" 1685 + "3:\n" 1686 + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret]) 1687 + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret]) 1688 + : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp) 1689 + : [old] "r" (old), [new] "r" (new) 1690 + : "memory"); 1691 + 1692 + uaccess_disable_privileged(); 1693 + 1694 + /* STLXR didn't update the descriptor, or the compare failed */ 1695 + if (ret == 1) 1696 + return -EAGAIN; 1697 + 1698 + return ret; 1699 + } 1700 + 1701 + int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new) 1702 + { 1703 + struct kvm_memory_slot *slot; 1704 + unsigned long hva; 1705 + u64 __user *ptep; 1706 + bool writable; 1707 + int offset; 1708 + gfn_t gfn; 1709 + int r; 1710 + 1711 + lockdep_assert(srcu_read_lock_held(&kvm->srcu)); 1712 + 1713 + gfn = ipa >> PAGE_SHIFT; 1714 + offset = offset_in_page(ipa); 1715 + slot = gfn_to_memslot(kvm, gfn); 1716 + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); 1717 + if (kvm_is_error_hva(hva)) 1718 + return -EINVAL; 1719 + if (!writable) 1720 + return -EPERM; 1721 + 1722 + ptep = (u64 __user *)hva + offset; 1723 + if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS)) 1724 + r = __lse_swap_desc(ptep, old, new); 1725 + else 1726 + r = __llsc_swap_desc(ptep, old, new); 1727 + 1728 + if (r < 0) 1729 + return r; 1730 + 1731 + mark_page_dirty_in_slot(kvm, slot, gfn); 1732 + return 0; 1701 1733 }
+4 -3
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 157 157 host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; 158 158 159 159 host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; 160 + host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr; 160 161 for (i = 0; i < hyp_cpu_if->used_lrs; ++i) 161 162 host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; 162 163 } ··· 465 464 __vgic_v3_init_lrs(); 466 465 } 467 466 468 - static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) 467 + static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) 469 468 { 470 469 DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); 471 470 472 - __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); 471 + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); 473 472 } 474 473 475 474 static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) ··· 617 616 HANDLE_FUNC(__kvm_tlb_flush_vmid_range), 618 617 HANDLE_FUNC(__kvm_flush_cpu_context), 619 618 HANDLE_FUNC(__kvm_timer_set_cntvoff), 620 - HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), 619 + HANDLE_FUNC(__vgic_v3_save_aprs), 621 620 HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), 622 621 HANDLE_FUNC(__pkvm_reserve_vm), 623 622 HANDLE_FUNC(__pkvm_unreserve_vm),
+3
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 337 337 /* CTR_EL0 is always under host control, even for protected VMs. */ 338 338 hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0; 339 339 340 + /* Preserve the vgic model so that GICv3 emulation works */ 341 + hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model; 342 + 340 343 if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) 341 344 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); 342 345
+5
arch/arm64/kvm/hyp/nvhe/sys_regs.c
··· 444 444 445 445 /* Scalable Vector Registers are restricted. */ 446 446 447 + HOST_HANDLED(SYS_ICC_PMR_EL1), 448 + 447 449 RAZ_WI(SYS_ERRIDR_EL1), 448 450 RAZ_WI(SYS_ERRSELR_EL1), 449 451 RAZ_WI(SYS_ERXFR_EL1), ··· 459 457 460 458 /* Limited Ordering Regions Registers are restricted. */ 461 459 460 + HOST_HANDLED(SYS_ICC_DIR_EL1), 461 + HOST_HANDLED(SYS_ICC_RPR_EL1), 462 462 HOST_HANDLED(SYS_ICC_SGI1R_EL1), 463 463 HOST_HANDLED(SYS_ICC_ASGI1R_EL1), 464 464 HOST_HANDLED(SYS_ICC_SGI0R_EL1), 465 + HOST_HANDLED(SYS_ICC_CTLR_EL1), 465 466 { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, }, 466 467 467 468 HOST_HANDLED(SYS_CCSIDR_EL1),
+107 -21
arch/arm64/kvm/hyp/pgtable.c
··· 661 661 662 662 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) 663 663 664 + static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr) 665 + { 666 + bool px, ux; 667 + u8 xn; 668 + 669 + px = prot & KVM_PGTABLE_PROT_PX; 670 + ux = prot & KVM_PGTABLE_PROT_UX; 671 + 672 + if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux) 673 + return -EINVAL; 674 + 675 + if (px && ux) 676 + xn = 0b00; 677 + else if (!px && ux) 678 + xn = 0b01; 679 + else if (!px && !ux) 680 + xn = 0b10; 681 + else 682 + xn = 0b11; 683 + 684 + *attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN; 685 + *attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn); 686 + return 0; 687 + } 688 + 664 689 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, 665 690 kvm_pte_t *ptep) 666 691 { 667 692 kvm_pte_t attr; 668 693 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 694 + int r; 669 695 670 696 switch (prot & (KVM_PGTABLE_PROT_DEVICE | 671 697 KVM_PGTABLE_PROT_NORMAL_NC)) { ··· 711 685 attr = KVM_S2_MEMATTR(pgt, NORMAL); 712 686 } 713 687 714 - if (!(prot & KVM_PGTABLE_PROT_X)) 715 - attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 688 + r = stage2_set_xn_attr(prot, &attr); 689 + if (r) 690 + return r; 716 691 717 692 if (prot & KVM_PGTABLE_PROT_R) 718 693 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; ··· 742 715 prot |= KVM_PGTABLE_PROT_R; 743 716 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) 744 717 prot |= KVM_PGTABLE_PROT_W; 745 - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) 746 - prot |= KVM_PGTABLE_PROT_X; 718 + 719 + switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) { 720 + case 0b00: 721 + prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX; 722 + break; 723 + case 0b01: 724 + prot |= KVM_PGTABLE_PROT_UX; 725 + break; 726 + case 0b11: 727 + prot |= KVM_PGTABLE_PROT_PX; 728 + break; 729 + default: 730 + break; 731 + } 747 732 748 733 return prot; 749 734 } ··· 1329 1290 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 1330 1291 enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) 1331 1292 { 1332 - int ret; 1293 + kvm_pte_t xn = 0, set = 0, clr = 0; 1333 1294 s8 level; 1334 - kvm_pte_t set = 0, clr = 0; 1295 + int ret; 1335 1296 1336 1297 if (prot & KVM_PTE_LEAF_ATTR_HI_SW) 1337 1298 return -EINVAL; ··· 1342 1303 if (prot & KVM_PGTABLE_PROT_W) 1343 1304 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 1344 1305 1345 - if (prot & KVM_PGTABLE_PROT_X) 1346 - clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 1306 + ret = stage2_set_xn_attr(prot, &xn); 1307 + if (ret) 1308 + return ret; 1309 + 1310 + set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; 1311 + clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; 1347 1312 1348 1313 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags); 1349 1314 if (!ret || ret == -EAGAIN) ··· 1578 1535 return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 1579 1536 } 1580 1537 1581 - static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, 1582 - enum kvm_pgtable_walk_flags visit) 1538 + static int stage2_free_leaf(const struct kvm_pgtable_visit_ctx *ctx) 1583 1539 { 1584 1540 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 1585 1541 1586 - if (!stage2_pte_is_counted(ctx->old)) 1587 - return 0; 1588 - 1589 1542 mm_ops->put_page(ctx->ptep); 1590 - 1591 - if (kvm_pte_table(ctx->old, ctx->level)) 1592 - mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); 1593 - 1594 1543 return 0; 1595 1544 } 1596 1545 1597 - void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 1546 + static int stage2_free_table_post(const struct kvm_pgtable_visit_ctx *ctx) 1598 1547 { 1599 - size_t pgd_sz; 1548 + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 1549 + kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); 1550 + 1551 + if (mm_ops->page_count(childp) != 1) 1552 + return 0; 1553 + 1554 + /* 1555 + * Drop references and clear the now stale PTE to avoid rewalking the 1556 + * freed page table. 1557 + */ 1558 + mm_ops->put_page(ctx->ptep); 1559 + mm_ops->put_page(childp); 1560 + kvm_clear_pte(ctx->ptep); 1561 + return 0; 1562 + } 1563 + 1564 + static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, 1565 + enum kvm_pgtable_walk_flags visit) 1566 + { 1567 + if (!stage2_pte_is_counted(ctx->old)) 1568 + return 0; 1569 + 1570 + switch (visit) { 1571 + case KVM_PGTABLE_WALK_LEAF: 1572 + return stage2_free_leaf(ctx); 1573 + case KVM_PGTABLE_WALK_TABLE_POST: 1574 + return stage2_free_table_post(ctx); 1575 + default: 1576 + return -EINVAL; 1577 + } 1578 + } 1579 + 1580 + void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 1581 + u64 addr, u64 size) 1582 + { 1600 1583 struct kvm_pgtable_walker walker = { 1601 1584 .cb = stage2_free_walker, 1602 1585 .flags = KVM_PGTABLE_WALK_LEAF | 1603 1586 KVM_PGTABLE_WALK_TABLE_POST, 1604 1587 }; 1605 1588 1606 - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 1589 + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); 1590 + } 1591 + 1592 + void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) 1593 + { 1594 + size_t pgd_sz; 1595 + 1607 1596 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 1608 - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); 1597 + 1598 + /* 1599 + * Since the pgtable is unlinked at this point, and not shared with 1600 + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() 1601 + */ 1602 + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); 1609 1603 pgt->pgd = NULL; 1604 + } 1605 + 1606 + void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 1607 + { 1608 + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); 1609 + kvm_pgtable_stage2_destroy_pgd(pgt); 1610 1610 } 1611 1611 1612 1612 void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
+4
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
··· 63 63 return -1; 64 64 } 65 65 66 + /* Handle deactivation as a normal exit */ 67 + if ((fault_ipa - vgic->vgic_cpu_base) >= GIC_CPU_DEACTIVATE) 68 + return 0; 69 + 66 70 rd = kvm_vcpu_dabt_get_rd(vcpu); 67 71 addr = kvm_vgic_global_state.vcpu_hyp_va; 68 72 addr += fault_ipa - vgic->vgic_cpu_base;
+63 -33
arch/arm64/kvm/hyp/vgic-v3-sr.c
··· 14 14 #include <asm/kvm_hyp.h> 15 15 #include <asm/kvm_mmu.h> 16 16 17 + #include "../../vgic/vgic.h" 18 + 17 19 #define vtr_to_max_lr_idx(v) ((v) & 0xf) 18 20 #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) 19 21 #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) ··· 60 58 unreachable(); 61 59 } 62 60 63 - static void __gic_v3_set_lr(u64 val, int lr) 61 + void __gic_v3_set_lr(u64 val, int lr) 64 62 { 65 63 switch (lr & 0xf) { 66 64 case 0: ··· 198 196 return val; 199 197 } 200 198 199 + static u64 compute_ich_hcr(struct vgic_v3_cpu_if *cpu_if) 200 + { 201 + return cpu_if->vgic_hcr | vgic_ich_hcr_trap_bits(); 202 + } 203 + 201 204 void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) 202 205 { 203 206 u64 used_lrs = cpu_if->used_lrs; ··· 219 212 } 220 213 } 221 214 222 - if (used_lrs || cpu_if->its_vpe.its_vm) { 215 + if (used_lrs) { 223 216 int i; 224 217 u32 elrsr; 225 218 226 219 elrsr = read_gicreg(ICH_ELRSR_EL2); 227 - 228 - write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2); 229 220 230 221 for (i = 0; i < used_lrs; i++) { 231 222 if (elrsr & (1 << i)) ··· 234 229 __gic_v3_set_lr(0, i); 235 230 } 236 231 } 232 + 233 + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 234 + 235 + if (cpu_if->vgic_hcr & ICH_HCR_EL2_LRENPIE) { 236 + u64 val = read_gicreg(ICH_HCR_EL2); 237 + cpu_if->vgic_hcr &= ~ICH_HCR_EL2_EOIcount; 238 + cpu_if->vgic_hcr |= val & ICH_HCR_EL2_EOIcount; 239 + } 240 + 241 + write_gicreg(0, ICH_HCR_EL2); 242 + 243 + /* 244 + * Hack alert: On NV, this results in a trap so that the above write 245 + * actually takes effect... No synchronisation is necessary, as we 246 + * only care about the effects when this traps. 247 + */ 248 + read_gicreg(ICH_MISR_EL2); 237 249 } 238 250 239 251 void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) ··· 258 236 u64 used_lrs = cpu_if->used_lrs; 259 237 int i; 260 238 261 - if (used_lrs || cpu_if->its_vpe.its_vm) { 262 - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 239 + write_gicreg(compute_ich_hcr(cpu_if), ICH_HCR_EL2); 263 240 264 - for (i = 0; i < used_lrs; i++) 265 - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 266 - } 241 + for (i = 0; i < used_lrs; i++) 242 + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 267 243 268 244 /* 269 245 * Ensure that writes to the LRs, and on non-VHE systems ensure that ··· 327 307 } 328 308 329 309 /* 330 - * If we need to trap system registers, we must write 331 - * ICH_HCR_EL2 anyway, even if no interrupts are being 332 - * injected. Note that this also applies if we don't expect 333 - * any system register access (no vgic at all). 310 + * If we need to trap system registers, we must write ICH_HCR_EL2 311 + * anyway, even if no interrupts are being injected. Note that this 312 + * also applies if we don't expect any system register access (no 313 + * vgic at all). In any case, no need to provide MI configuration. 334 314 */ 335 315 if (static_branch_unlikely(&vgic_v3_cpuif_trap) || 336 316 cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre) 337 - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 317 + write_gicreg(vgic_ich_hcr_trap_bits() | ICH_HCR_EL2_En, ICH_HCR_EL2); 338 318 } 339 319 340 320 void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) 341 321 { 342 322 u64 val; 343 - 344 - if (!cpu_if->vgic_sre) { 345 - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 346 - } 347 323 348 324 /* Only restore SRE if the host implements the GICv2 interface */ 349 325 if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { ··· 362 346 write_gicreg(0, ICH_HCR_EL2); 363 347 } 364 348 365 - static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 349 + void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 366 350 { 367 351 u64 val; 368 352 u32 nr_pre_bits; ··· 521 505 static void __vgic_v3_write_vmcr(u32 vmcr) 522 506 { 523 507 write_gicreg(vmcr, ICH_VMCR_EL2); 524 - } 525 - 526 - void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) 527 - { 528 - __vgic_v3_save_aprs(cpu_if); 529 - if (cpu_if->vgic_sre) 530 - cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); 531 508 } 532 509 533 510 void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) ··· 799 790 write_gicreg(hcr, ICH_HCR_EL2); 800 791 } 801 792 802 - static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 793 + static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 803 794 { 804 795 u32 vid = vcpu_get_reg(vcpu, rt); 805 796 u64 lr_val; ··· 807 798 808 799 /* EOImode == 0, nothing to be done here */ 809 800 if (!(vmcr & ICH_VMCR_EOIM_MASK)) 810 - return; 801 + return 1; 811 802 812 803 /* No deactivate to be performed on an LPI */ 813 804 if (vid >= VGIC_MIN_LPI) 814 - return; 805 + return 1; 815 806 816 807 lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); 817 - if (lr == -1) { 818 - __vgic_v3_bump_eoicount(); 819 - return; 808 + if (lr != -1) { 809 + __vgic_v3_clear_active_lr(lr, lr_val); 810 + return 1; 820 811 } 821 812 822 - __vgic_v3_clear_active_lr(lr, lr_val); 813 + return 0; 814 + } 815 + 816 + static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 817 + { 818 + if (!___vgic_v3_write_dir(vcpu, vmcr, rt)) 819 + __vgic_v3_bump_eoicount(); 823 820 } 824 821 825 822 static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) ··· 1260 1245 case SYS_ICC_DIR_EL1: 1261 1246 if (unlikely(is_read)) 1262 1247 return 0; 1248 + /* 1249 + * Full exit if required to handle overflow deactivation, 1250 + * unless we can emulate it in the LRs (likely the majority 1251 + * of the cases). 1252 + */ 1253 + if (vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr & ICH_HCR_EL2_TDIR) { 1254 + int ret; 1255 + 1256 + ret = ___vgic_v3_write_dir(vcpu, __vgic_v3_read_vmcr(), 1257 + kvm_vcpu_sys_get_rt(vcpu)); 1258 + if (ret) 1259 + __kvm_skip_instr(vcpu); 1260 + 1261 + return ret; 1262 + } 1263 1263 fn = __vgic_v3_write_dir; 1264 1264 break; 1265 1265 case SYS_ICC_RPR_EL1:
+124 -8
arch/arm64/kvm/mmu.c
··· 904 904 return 0; 905 905 } 906 906 907 + /* 908 + * Assume that @pgt is valid and unlinked from the KVM MMU to free the 909 + * page-table without taking the kvm_mmu_lock and without performing any 910 + * TLB invalidations. 911 + * 912 + * Also, the range of addresses can be large enough to cause need_resched 913 + * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke 914 + * cond_resched() periodically to prevent hogging the CPU for a long time 915 + * and schedule something else, if required. 916 + */ 917 + static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr, 918 + phys_addr_t end) 919 + { 920 + u64 next; 921 + 922 + do { 923 + next = stage2_range_addr_end(addr, end); 924 + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr, 925 + next - addr); 926 + if (next != end) 927 + cond_resched(); 928 + } while (addr = next, addr != end); 929 + } 930 + 931 + static void kvm_stage2_destroy(struct kvm_pgtable *pgt) 932 + { 933 + unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); 934 + 935 + stage2_destroy_range(pgt, 0, BIT(ia_bits)); 936 + KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); 937 + } 938 + 907 939 /** 908 940 * kvm_init_stage2_mmu - Initialise a S2 MMU structure 909 941 * @kvm: The pointer to the KVM structure ··· 1012 980 return 0; 1013 981 1014 982 out_destroy_pgtable: 1015 - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 983 + kvm_stage2_destroy(pgt); 1016 984 out_free_pgtable: 1017 985 kfree(pgt); 1018 986 return err; ··· 1113 1081 write_unlock(&kvm->mmu_lock); 1114 1082 1115 1083 if (pgt) { 1116 - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 1084 + kvm_stage2_destroy(pgt); 1117 1085 kfree(pgt); 1118 1086 } 1119 1087 } ··· 1553 1521 *prot |= kvm_encode_nested_level(nested); 1554 1522 } 1555 1523 1524 + static void adjust_nested_exec_perms(struct kvm *kvm, 1525 + struct kvm_s2_trans *nested, 1526 + enum kvm_pgtable_prot *prot) 1527 + { 1528 + if (!kvm_s2_trans_exec_el0(kvm, nested)) 1529 + *prot &= ~KVM_PGTABLE_PROT_UX; 1530 + if (!kvm_s2_trans_exec_el1(kvm, nested)) 1531 + *prot &= ~KVM_PGTABLE_PROT_PX; 1532 + } 1533 + 1556 1534 #define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) 1557 1535 1558 1536 static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, ··· 1614 1572 if (writable) 1615 1573 prot |= KVM_PGTABLE_PROT_W; 1616 1574 1617 - if (exec_fault || 1618 - (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && 1619 - (!nested || kvm_s2_trans_executable(nested)))) 1575 + if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) 1620 1576 prot |= KVM_PGTABLE_PROT_X; 1577 + 1578 + if (nested) 1579 + adjust_nested_exec_perms(kvm, nested, &prot); 1621 1580 1622 1581 kvm_fault_lock(kvm); 1623 1582 if (mmu_invalidate_retry(kvm, mmu_seq)) { ··· 1894 1851 prot |= KVM_PGTABLE_PROT_NORMAL_NC; 1895 1852 else 1896 1853 prot |= KVM_PGTABLE_PROT_DEVICE; 1897 - } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && 1898 - (!nested || kvm_s2_trans_executable(nested))) { 1854 + } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) { 1899 1855 prot |= KVM_PGTABLE_PROT_X; 1900 1856 } 1857 + 1858 + if (nested) 1859 + adjust_nested_exec_perms(kvm, nested, &prot); 1901 1860 1902 1861 /* 1903 1862 * Under the premise of getting a FSC_PERM fault, we just need to relax ··· 1944 1899 read_unlock(&vcpu->kvm->mmu_lock); 1945 1900 } 1946 1901 1902 + /* 1903 + * Returns true if the SEA should be handled locally within KVM if the abort 1904 + * is caused by a kernel memory allocation (e.g. stage-2 table memory). 1905 + */ 1906 + static bool host_owns_sea(struct kvm_vcpu *vcpu, u64 esr) 1907 + { 1908 + /* 1909 + * Without FEAT_RAS HCR_EL2.TEA is RES0, meaning any external abort 1910 + * taken from a guest EL to EL2 is due to a host-imposed access (e.g. 1911 + * stage-2 PTW). 1912 + */ 1913 + if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) 1914 + return true; 1915 + 1916 + /* KVM owns the VNCR when the vCPU isn't in a nested context. */ 1917 + if (is_hyp_ctxt(vcpu) && !kvm_vcpu_trap_is_iabt(vcpu) && (esr & ESR_ELx_VNCR)) 1918 + return true; 1919 + 1920 + /* 1921 + * Determining if an external abort during a table walk happened at 1922 + * stage-2 is only possible with S1PTW is set. Otherwise, since KVM 1923 + * sets HCR_EL2.TEA, SEAs due to a stage-1 walk (i.e. accessing the 1924 + * PA of the stage-1 descriptor) can reach here and are reported 1925 + * with a TTW ESR value. 1926 + */ 1927 + return (esr_fsc_is_sea_ttw(esr) && (esr & ESR_ELx_S1PTW)); 1928 + } 1929 + 1947 1930 int kvm_handle_guest_sea(struct kvm_vcpu *vcpu) 1948 1931 { 1932 + struct kvm *kvm = vcpu->kvm; 1933 + struct kvm_run *run = vcpu->run; 1934 + u64 esr = kvm_vcpu_get_esr(vcpu); 1935 + u64 esr_mask = ESR_ELx_EC_MASK | 1936 + ESR_ELx_IL | 1937 + ESR_ELx_FnV | 1938 + ESR_ELx_EA | 1939 + ESR_ELx_CM | 1940 + ESR_ELx_WNR | 1941 + ESR_ELx_FSC; 1942 + u64 ipa; 1943 + 1949 1944 /* 1950 1945 * Give APEI the opportunity to claim the abort before handling it 1951 1946 * within KVM. apei_claim_sea() expects to be called with IRQs enabled. ··· 1994 1909 if (apei_claim_sea(NULL) == 0) 1995 1910 return 1; 1996 1911 1997 - return kvm_inject_serror(vcpu); 1912 + if (host_owns_sea(vcpu, esr) || 1913 + !test_bit(KVM_ARCH_FLAG_EXIT_SEA, &vcpu->kvm->arch.flags)) 1914 + return kvm_inject_serror(vcpu); 1915 + 1916 + /* ESR_ELx.SET is RES0 when FEAT_RAS isn't implemented. */ 1917 + if (kvm_has_ras(kvm)) 1918 + esr_mask |= ESR_ELx_SET_MASK; 1919 + 1920 + /* 1921 + * Exit to userspace, and provide faulting guest virtual and physical 1922 + * addresses in case userspace wants to emulate SEA to guest by 1923 + * writing to FAR_ELx and HPFAR_ELx registers. 1924 + */ 1925 + memset(&run->arm_sea, 0, sizeof(run->arm_sea)); 1926 + run->exit_reason = KVM_EXIT_ARM_SEA; 1927 + run->arm_sea.esr = esr & esr_mask; 1928 + 1929 + if (!(esr & ESR_ELx_FnV)) 1930 + run->arm_sea.gva = kvm_vcpu_get_hfar(vcpu); 1931 + 1932 + ipa = kvm_vcpu_get_fault_ipa(vcpu); 1933 + if (ipa != INVALID_GPA) { 1934 + run->arm_sea.flags |= KVM_EXIT_ARM_SEA_FLAG_GPA_VALID; 1935 + run->arm_sea.gpa = ipa; 1936 + } 1937 + 1938 + return 0; 1998 1939 } 1999 1940 2000 1941 /** ··· 2110 1999 u32 esr; 2111 2000 2112 2001 ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans); 2002 + if (ret == -EAGAIN) { 2003 + ret = 1; 2004 + goto out_unlock; 2005 + } 2006 + 2113 2007 if (ret) { 2114 2008 esr = kvm_s2_trans_esr(&nested_trans); 2115 2009 kvm_inject_s2_fault(vcpu, esr);
+84 -39
arch/arm64/kvm/nested.c
··· 124 124 } 125 125 126 126 struct s2_walk_info { 127 - int (*read_desc)(phys_addr_t pa, u64 *desc, void *data); 128 - void *data; 129 - u64 baddr; 130 - unsigned int max_oa_bits; 131 - unsigned int pgshift; 132 - unsigned int sl; 133 - unsigned int t0sz; 134 - bool be; 127 + u64 baddr; 128 + unsigned int max_oa_bits; 129 + unsigned int pgshift; 130 + unsigned int sl; 131 + unsigned int t0sz; 132 + bool be; 133 + bool ha; 135 134 }; 136 135 137 136 static u32 compute_fsc(int level, u32 fsc) ··· 198 199 return 0; 199 200 } 200 201 202 + static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc, 203 + struct s2_walk_info *wi) 204 + { 205 + u64 val; 206 + int r; 207 + 208 + r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val)); 209 + if (r) 210 + return r; 211 + 212 + /* 213 + * Handle reversedescriptors if endianness differs between the 214 + * host and the guest hypervisor. 215 + */ 216 + if (wi->be) 217 + *desc = be64_to_cpu((__force __be64)val); 218 + else 219 + *desc = le64_to_cpu((__force __le64)val); 220 + 221 + return 0; 222 + } 223 + 224 + static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new, 225 + struct s2_walk_info *wi) 226 + { 227 + if (wi->be) { 228 + old = (__force u64)cpu_to_be64(old); 229 + new = (__force u64)cpu_to_be64(new); 230 + } else { 231 + old = (__force u64)cpu_to_le64(old); 232 + new = (__force u64)cpu_to_le64(new); 233 + } 234 + 235 + return __kvm_at_swap_desc(vcpu->kvm, pa, old, new); 236 + } 237 + 201 238 /* 202 239 * This is essentially a C-version of the pseudo code from the ARM ARM 203 240 * AArch64.TranslationTableWalk function. I strongly recommend looking at ··· 241 206 * 242 207 * Must be called with the kvm->srcu read lock held 243 208 */ 244 - static int walk_nested_s2_pgd(phys_addr_t ipa, 209 + static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa, 245 210 struct s2_walk_info *wi, struct kvm_s2_trans *out) 246 211 { 247 212 int first_block_level, level, stride, input_size, base_lower_bound; 248 213 phys_addr_t base_addr; 249 214 unsigned int addr_top, addr_bottom; 250 - u64 desc; /* page table entry */ 215 + u64 desc, new_desc; /* page table entry */ 251 216 int ret; 252 217 phys_addr_t paddr; 253 218 ··· 292 257 >> (addr_bottom - 3); 293 258 294 259 paddr = base_addr | index; 295 - ret = wi->read_desc(paddr, &desc, wi->data); 260 + ret = read_guest_s2_desc(vcpu, paddr, &desc, wi); 296 261 if (ret < 0) 297 262 return ret; 298 263 299 - /* 300 - * Handle reversedescriptors if endianness differs between the 301 - * host and the guest hypervisor. 302 - */ 303 - if (wi->be) 304 - desc = be64_to_cpu((__force __be64)desc); 305 - else 306 - desc = le64_to_cpu((__force __le64)desc); 264 + new_desc = desc; 307 265 308 266 /* Check for valid descriptor at this point */ 309 - if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { 267 + if (!(desc & KVM_PTE_VALID)) { 310 268 out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); 311 269 out->desc = desc; 312 270 return 1; 313 271 } 314 272 315 - /* We're at the final level or block translation level */ 316 - if ((desc & 3) == 1 || level == 3) 273 + if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) { 274 + if (level < 3) 275 + break; 276 + 277 + out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); 278 + out->desc = desc; 279 + return 1; 280 + } 281 + 282 + /* We're at the final level */ 283 + if (level == 3) 317 284 break; 318 285 319 286 if (check_output_size(wi, desc)) { ··· 342 305 return 1; 343 306 } 344 307 345 - if (!(desc & BIT(10))) { 308 + if (wi->ha) 309 + new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 310 + 311 + if (new_desc != desc) { 312 + ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi); 313 + if (ret) 314 + return ret; 315 + 316 + desc = new_desc; 317 + } 318 + 319 + if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) { 346 320 out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); 347 321 out->desc = desc; 348 322 return 1; ··· 366 318 (ipa & GENMASK_ULL(addr_bottom - 1, 0)); 367 319 out->output = paddr; 368 320 out->block_size = 1UL << ((3 - level) * stride + wi->pgshift); 369 - out->readable = desc & (0b01 << 6); 370 - out->writable = desc & (0b10 << 6); 321 + out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 322 + out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 371 323 out->level = level; 372 324 out->desc = desc; 373 325 return 0; 374 - } 375 - 376 - static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) 377 - { 378 - struct kvm_vcpu *vcpu = data; 379 - 380 - return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); 381 326 } 382 327 383 328 static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) ··· 391 350 /* Global limit for now, should eventually be per-VM */ 392 351 wi->max_oa_bits = min(get_kvm_ipa_limit(), 393 352 ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); 353 + 354 + wi->ha = vtcr & VTCR_EL2_HA; 394 355 } 395 356 396 357 int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, ··· 407 364 if (!vcpu_has_nv(vcpu)) 408 365 return 0; 409 366 410 - wi.read_desc = read_guest_s2_desc; 411 - wi.data = vcpu; 412 367 wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); 413 368 414 369 vtcr_to_walk_info(vtcr, &wi); 415 370 416 371 wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; 417 372 418 - ret = walk_nested_s2_pgd(gipa, &wi, result); 373 + ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result); 419 374 if (ret) 420 375 result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC); 421 376 ··· 829 788 return 0; 830 789 831 790 if (kvm_vcpu_trap_is_iabt(vcpu)) { 832 - forward_fault = !kvm_s2_trans_executable(trans); 791 + if (vcpu_mode_priv(vcpu)) 792 + forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans); 793 + else 794 + forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans); 833 795 } else { 834 796 bool write_fault = kvm_is_write_fault(vcpu); 835 797 ··· 1599 1555 case SYS_ID_AA64MMFR1_EL1: 1600 1556 val &= ~(ID_AA64MMFR1_EL1_CMOW | 1601 1557 ID_AA64MMFR1_EL1_nTLBPA | 1602 - ID_AA64MMFR1_EL1_ETS | 1603 - ID_AA64MMFR1_EL1_XNX | 1604 - ID_AA64MMFR1_EL1_HAFDBS); 1558 + ID_AA64MMFR1_EL1_ETS); 1559 + 1605 1560 /* FEAT_E2H0 implies no VHE */ 1606 1561 if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) 1607 1562 val &= ~ID_AA64MMFR1_EL1_VH; 1563 + 1564 + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF); 1608 1565 break; 1609 1566 1610 1567 case SYS_ID_AA64MMFR2_EL1:
+9 -2
arch/arm64/kvm/pkvm.c
··· 344 344 return 0; 345 345 } 346 346 347 - void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 347 + void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 348 + u64 addr, u64 size) 348 349 { 349 - __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); 350 + __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 351 + } 352 + 353 + void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) 354 + { 355 + /* Expected to be called after all pKVM mappings have been released. */ 356 + WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); 350 357 } 351 358 352 359 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
+27 -8
arch/arm64/kvm/ptdump.c
··· 31 31 .val = PTE_VALID, 32 32 .set = " ", 33 33 .clear = "F", 34 - }, { 34 + }, 35 + { 35 36 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, 36 37 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, 37 38 .set = "R", 38 39 .clear = " ", 39 - }, { 40 + }, 41 + { 40 42 .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 41 43 .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 42 44 .set = "W", 43 45 .clear = " ", 44 - }, { 46 + }, 47 + { 45 48 .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, 46 - .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, 47 - .set = "NX", 48 - .clear = "x ", 49 - }, { 49 + .val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), 50 + .set = "px ux ", 51 + }, 52 + { 53 + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, 54 + .val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), 55 + .set = "PXNux ", 56 + }, 57 + { 58 + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, 59 + .val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), 60 + .set = "PXNUXN", 61 + }, 62 + { 63 + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, 64 + .val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN), 65 + .set = "px UXN", 66 + }, 67 + { 50 68 .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, 51 69 .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, 52 70 .set = "AF", 53 71 .clear = " ", 54 - }, { 72 + }, 73 + { 55 74 .mask = PMD_TYPE_MASK, 56 75 .val = PMD_TYPE_SECT, 57 76 .set = "BLK",
+23 -5
arch/arm64/kvm/sys_regs.c
··· 666 666 return true; 667 667 } 668 668 669 + static bool access_gic_dir(struct kvm_vcpu *vcpu, 670 + struct sys_reg_params *p, 671 + const struct sys_reg_desc *r) 672 + { 673 + if (!kvm_has_gicv3(vcpu->kvm)) 674 + return undef_access(vcpu, p, r); 675 + 676 + if (!p->is_write) 677 + return undef_access(vcpu, p, r); 678 + 679 + vgic_v3_deactivate(vcpu, p->regval); 680 + 681 + return true; 682 + } 683 + 669 684 static bool trap_raz_wi(struct kvm_vcpu *vcpu, 670 685 struct sys_reg_params *p, 671 686 const struct sys_reg_desc *r) ··· 3388 3373 { SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, 3389 3374 { SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, 3390 3375 { SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, 3391 - { SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, 3376 + { SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, 3392 3377 { SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, 3393 3378 { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, 3394 3379 { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi }, ··· 3785 3770 { 3786 3771 u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); 3787 3772 3788 - __kvm_at_s1e01(vcpu, op, p->regval); 3773 + if (__kvm_at_s1e01(vcpu, op, p->regval)) 3774 + return false; 3789 3775 3790 3776 return true; 3791 3777 } ··· 3803 3787 return false; 3804 3788 } 3805 3789 3806 - __kvm_at_s1e2(vcpu, op, p->regval); 3790 + if (__kvm_at_s1e2(vcpu, op, p->regval)) 3791 + return false; 3807 3792 3808 3793 return true; 3809 3794 } ··· 3814 3797 { 3815 3798 u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); 3816 3799 3817 - __kvm_at_s12(vcpu, op, p->regval); 3800 + if (__kvm_at_s12(vcpu, op, p->regval)) 3801 + return false; 3818 3802 3819 3803 return true; 3820 3804 } ··· 4516 4498 { CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, 4517 4499 { CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, 4518 4500 { CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, 4519 - { CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, 4501 + { CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, 4520 4502 { CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, 4521 4503 { CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access }, 4522 4504 { CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+5 -4
arch/arm64/kvm/vgic/vgic-init.c
··· 198 198 struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); 199 199 int i; 200 200 201 + dist->active_spis = (atomic_t)ATOMIC_INIT(0); 201 202 dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); 202 203 if (!dist->spis) 203 204 return -ENOMEM; ··· 364 363 return ret; 365 364 } 366 365 367 - static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) 366 + static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu) 368 367 { 369 368 if (kvm_vgic_global_state.type == VGIC_V2) 370 - vgic_v2_enable(vcpu); 369 + vgic_v2_reset(vcpu); 371 370 else 372 - vgic_v3_enable(vcpu); 371 + vgic_v3_reset(vcpu); 373 372 } 374 373 375 374 /* ··· 416 415 } 417 416 418 417 kvm_for_each_vcpu(idx, vcpu, kvm) 419 - kvm_vgic_vcpu_enable(vcpu); 418 + kvm_vgic_vcpu_reset(vcpu); 420 419 421 420 ret = kvm_vgic_setup_default_irq_routing(kvm); 422 421 if (ret)
+24
arch/arm64/kvm/vgic/vgic-mmio-v2.c
··· 359 359 vgic_set_vmcr(vcpu, &vmcr); 360 360 } 361 361 362 + static void vgic_mmio_write_dir(struct kvm_vcpu *vcpu, 363 + gpa_t addr, unsigned int len, 364 + unsigned long val) 365 + { 366 + if (kvm_vgic_global_state.type == VGIC_V2) 367 + vgic_v2_deactivate(vcpu, val); 368 + else 369 + vgic_v3_deactivate(vcpu, val); 370 + } 371 + 362 372 static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, 363 373 gpa_t addr, unsigned int len) 364 374 { ··· 492 482 REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, 493 483 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 494 484 VGIC_ACCESS_32bit), 485 + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_CPU_DEACTIVATE, 486 + vgic_mmio_read_raz, vgic_mmio_write_dir, 487 + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 488 + 4, VGIC_ACCESS_32bit), 495 489 }; 496 490 497 491 unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) ··· 506 492 kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 507 493 508 494 return SZ_4K; 495 + } 496 + 497 + unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev) 498 + { 499 + dev->regions = vgic_v2_cpu_registers; 500 + dev->nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); 501 + 502 + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 503 + 504 + return KVM_VGIC_V2_CPU_SIZE; 509 505 } 510 506 511 507 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+1
arch/arm64/kvm/vgic/vgic-mmio.h
··· 213 213 const u32 val); 214 214 215 215 unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); 216 + unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev); 216 217 217 218 unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); 218 219
+222 -73
arch/arm64/kvm/vgic/vgic-v2.c
··· 9 9 #include <kvm/arm_vgic.h> 10 10 #include <asm/kvm_mmu.h> 11 11 12 + #include "vgic-mmio.h" 12 13 #include "vgic.h" 13 14 14 15 static inline void vgic_v2_write_lr(int lr, u32 val) ··· 27 26 vgic_v2_write_lr(i, 0); 28 27 } 29 28 30 - void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) 29 + void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, 30 + struct ap_list_summary *als) 31 31 { 32 32 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; 33 33 34 - cpuif->vgic_hcr |= GICH_HCR_UIE; 34 + cpuif->vgic_hcr = GICH_HCR_EN; 35 + 36 + if (irqs_pending_outside_lrs(als)) 37 + cpuif->vgic_hcr |= GICH_HCR_NPIE; 38 + if (irqs_active_outside_lrs(als)) 39 + cpuif->vgic_hcr |= GICH_HCR_LRENPIE; 40 + if (irqs_outside_lrs(als)) 41 + cpuif->vgic_hcr |= GICH_HCR_UIE; 42 + 43 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP0_MASK) ? 44 + GICH_HCR_VGrp0DIE : GICH_HCR_VGrp0EIE; 45 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP1_MASK) ? 46 + GICH_HCR_VGrp1DIE : GICH_HCR_VGrp1EIE; 35 47 } 36 48 37 49 static bool lr_signals_eoi_mi(u32 lr_val) ··· 53 39 !(lr_val & GICH_LR_HW); 54 40 } 55 41 56 - /* 57 - * transfer the content of the LRs back into the corresponding ap_list: 58 - * - active bit is transferred as is 59 - * - pending bit is 60 - * - transferred as is in case of edge sensitive IRQs 61 - * - set to the line-level (resample time) for level sensitive IRQs 62 - */ 63 - void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) 42 + static void vgic_v2_fold_lr(struct kvm_vcpu *vcpu, u32 val) 64 43 { 65 - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 66 - struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 67 - int lr; 44 + u32 cpuid, intid = val & GICH_LR_VIRTUALID; 45 + struct vgic_irq *irq; 46 + bool deactivated; 68 47 69 - DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 48 + /* Extract the source vCPU id from the LR */ 49 + cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val) & 7; 70 50 71 - cpuif->vgic_hcr &= ~GICH_HCR_UIE; 51 + /* Notify fds when the guest EOI'ed a level-triggered SPI */ 52 + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 53 + kvm_notify_acked_irq(vcpu->kvm, 0, 54 + intid - VGIC_NR_PRIVATE_IRQS); 72 55 73 - for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { 74 - u32 val = cpuif->vgic_lr[lr]; 75 - u32 cpuid, intid = val & GICH_LR_VIRTUALID; 76 - struct vgic_irq *irq; 77 - bool deactivated; 56 + irq = vgic_get_vcpu_irq(vcpu, intid); 78 57 79 - /* Extract the source vCPU id from the LR */ 80 - cpuid = val & GICH_LR_PHYSID_CPUID; 81 - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 82 - cpuid &= 7; 83 - 84 - /* Notify fds when the guest EOI'ed a level-triggered SPI */ 85 - if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 86 - kvm_notify_acked_irq(vcpu->kvm, 0, 87 - intid - VGIC_NR_PRIVATE_IRQS); 88 - 89 - irq = vgic_get_vcpu_irq(vcpu, intid); 90 - 91 - raw_spin_lock(&irq->irq_lock); 92 - 58 + scoped_guard(raw_spinlock, &irq->irq_lock) { 93 59 /* Always preserve the active bit, note deactivation */ 94 60 deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT); 95 61 irq->active = !!(val & GICH_LR_ACTIVE_BIT); ··· 95 101 /* Handle resampling for mapped interrupts if required */ 96 102 vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT); 97 103 98 - raw_spin_unlock(&irq->irq_lock); 99 - vgic_put_irq(vcpu->kvm, irq); 104 + irq->on_lr = false; 105 + } 106 + 107 + vgic_put_irq(vcpu->kvm, irq); 108 + } 109 + 110 + static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); 111 + 112 + /* 113 + * transfer the content of the LRs back into the corresponding ap_list: 114 + * - active bit is transferred as is 115 + * - pending bit is 116 + * - transferred as is in case of edge sensitive IRQs 117 + * - set to the line-level (resample time) for level sensitive IRQs 118 + */ 119 + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) 120 + { 121 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 122 + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 123 + u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr); 124 + struct vgic_irq *irq; 125 + 126 + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 127 + 128 + for (int lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) 129 + vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]); 130 + 131 + /* See the GICv3 equivalent for the EOIcount handling rationale */ 132 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 133 + u32 lr; 134 + 135 + if (!eoicount) { 136 + break; 137 + } else { 138 + guard(raw_spinlock)(&irq->irq_lock); 139 + 140 + if (!(likely(vgic_target_oracle(irq) == vcpu) && 141 + irq->active)) 142 + continue; 143 + 144 + lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT; 145 + } 146 + 147 + if (lr & GICH_LR_HW) 148 + writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr), 149 + kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE); 150 + vgic_v2_fold_lr(vcpu, lr); 151 + eoicount--; 100 152 } 101 153 102 154 cpuif->used_lrs = 0; 103 155 } 104 156 105 - /* 106 - * Populates the particular LR with the state of a given IRQ: 107 - * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq 108 - * - for a level sensitive IRQ the pending state value is unchanged; 109 - * it is dictated directly by the input level 110 - * 111 - * If @irq describes an SGI with multiple sources, we choose the 112 - * lowest-numbered source VCPU and clear that bit in the source bitmap. 113 - * 114 - * The irq_lock must be held by the caller. 115 - */ 116 - void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 157 + void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val) 158 + { 159 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 160 + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 161 + struct kvm_vcpu *target_vcpu = NULL; 162 + bool mmio = false; 163 + struct vgic_irq *irq; 164 + unsigned long flags; 165 + u64 lr = 0; 166 + u8 cpuid; 167 + 168 + /* Snapshot CPUID, and remove it from the INTID */ 169 + cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); 170 + val &= ~GENMASK_ULL(12, 10); 171 + 172 + /* We only deal with DIR when EOIMode==1 */ 173 + if (!(cpuif->vgic_vmcr & GICH_VMCR_EOI_MODE_MASK)) 174 + return; 175 + 176 + /* Make sure we're in the same context as LR handling */ 177 + local_irq_save(flags); 178 + 179 + irq = vgic_get_vcpu_irq(vcpu, val); 180 + if (WARN_ON_ONCE(!irq)) 181 + goto out; 182 + 183 + /* See the corresponding v3 code for the rationale */ 184 + scoped_guard(raw_spinlock, &irq->irq_lock) { 185 + target_vcpu = irq->vcpu; 186 + 187 + /* Not on any ap_list? */ 188 + if (!target_vcpu) 189 + goto put; 190 + 191 + /* 192 + * Urgh. We're deactivating something that we cannot 193 + * observe yet... Big hammer time. 194 + */ 195 + if (irq->on_lr) { 196 + mmio = true; 197 + goto put; 198 + } 199 + 200 + /* SGI: check that the cpuid matches */ 201 + if (val < VGIC_NR_SGIS && irq->active_source != cpuid) { 202 + target_vcpu = NULL; 203 + goto put; 204 + } 205 + 206 + /* (with a Dalek voice) DEACTIVATE!!!! */ 207 + lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT; 208 + } 209 + 210 + if (lr & GICH_LR_HW) 211 + writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr), 212 + kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE); 213 + 214 + vgic_v2_fold_lr(vcpu, lr); 215 + 216 + put: 217 + vgic_put_irq(vcpu->kvm, irq); 218 + 219 + out: 220 + local_irq_restore(flags); 221 + 222 + if (mmio) 223 + vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); 224 + 225 + /* Force the ap_list to be pruned */ 226 + if (target_vcpu) 227 + kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); 228 + } 229 + 230 + static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) 117 231 { 118 232 u32 val = irq->intid; 119 233 bool allow_pending = true; 234 + 235 + WARN_ON(irq->on_lr); 120 236 121 237 if (irq->active) { 122 238 val |= GICH_LR_ACTIVE_BIT; ··· 267 163 if (allow_pending && irq_is_pending(irq)) { 268 164 val |= GICH_LR_PENDING_BIT; 269 165 166 + if (vgic_irq_is_sgi(irq->intid)) { 167 + u32 src = ffs(irq->source); 168 + 169 + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 170 + irq->intid)) 171 + return 0; 172 + 173 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 174 + if (irq->source & ~BIT(src - 1)) 175 + val |= GICH_LR_EOI; 176 + } 177 + } 178 + 179 + /* The GICv2 LR only holds five bits of priority. */ 180 + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 181 + 182 + return val; 183 + } 184 + 185 + /* 186 + * Populates the particular LR with the state of a given IRQ: 187 + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq 188 + * - for a level sensitive IRQ the pending state value is unchanged; 189 + * it is dictated directly by the input level 190 + * 191 + * If @irq describes an SGI with multiple sources, we choose the 192 + * lowest-numbered source VCPU and clear that bit in the source bitmap. 193 + * 194 + * The irq_lock must be held by the caller. 195 + */ 196 + void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 197 + { 198 + u32 val = vgic_v2_compute_lr(vcpu, irq); 199 + 200 + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; 201 + 202 + if (val & GICH_LR_PENDING_BIT) { 270 203 if (irq->config == VGIC_CONFIG_EDGE) 271 204 irq->pending_latch = false; 272 205 273 206 if (vgic_irq_is_sgi(irq->intid)) { 274 207 u32 src = ffs(irq->source); 275 208 276 - if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 277 - irq->intid)) 278 - return; 279 - 280 - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 281 - irq->source &= ~(1 << (src - 1)); 282 - if (irq->source) { 209 + irq->source &= ~BIT(src - 1); 210 + if (irq->source) 283 211 irq->pending_latch = true; 284 - val |= GICH_LR_EOI; 285 - } 286 212 } 287 213 } 288 214 ··· 328 194 /* The GICv2 LR only holds five bits of priority. */ 329 195 val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 330 196 331 - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; 197 + irq->on_lr = true; 332 198 } 333 199 334 200 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) ··· 391 257 GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; 392 258 } 393 259 394 - void vgic_v2_enable(struct kvm_vcpu *vcpu) 260 + void vgic_v2_reset(struct kvm_vcpu *vcpu) 395 261 { 396 262 /* 397 263 * By forcing VMCR to zero, the GIC will restore the binary ··· 399 265 * anyway. 400 266 */ 401 267 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; 402 - 403 - /* Get the show on the road... */ 404 - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; 405 268 } 406 269 407 270 /* check for overlapping regions and for regions crossing the end of memory */ ··· 420 289 int vgic_v2_map_resources(struct kvm *kvm) 421 290 { 422 291 struct vgic_dist *dist = &kvm->arch.vgic; 292 + unsigned int len; 423 293 int ret = 0; 424 294 425 295 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || ··· 444 312 return ret; 445 313 } 446 314 315 + len = vgic_v2_init_cpuif_iodev(&dist->cpuif_iodev); 316 + dist->cpuif_iodev.base_addr = dist->vgic_cpu_base; 317 + dist->cpuif_iodev.iodev_type = IODEV_CPUIF; 318 + dist->cpuif_iodev.redist_vcpu = NULL; 319 + 320 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist->vgic_cpu_base, 321 + len, &dist->cpuif_iodev.dev); 322 + if (ret) 323 + return ret; 324 + 447 325 if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { 448 326 ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, 449 327 kvm_vgic_global_state.vcpu_base, 450 - KVM_VGIC_V2_CPU_SIZE, true); 328 + KVM_VGIC_V2_CPU_SIZE - SZ_4K, true); 451 329 if (ret) { 452 330 kvm_err("Unable to remap VGIC CPU to VCPU\n"); 453 331 return ret; ··· 527 385 528 386 kvm_vgic_global_state.can_emulate_gicv2 = true; 529 387 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 388 + kvm_vgic_global_state.gicc_base = info->gicc_base; 530 389 kvm_vgic_global_state.type = VGIC_V2; 531 390 kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; 532 391 ··· 566 423 567 424 void vgic_v2_save_state(struct kvm_vcpu *vcpu) 568 425 { 426 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 569 427 void __iomem *base = kvm_vgic_global_state.vctrl_base; 570 428 u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 571 429 572 430 if (!base) 573 431 return; 574 432 575 - if (used_lrs) { 433 + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 434 + 435 + if (used_lrs) 576 436 save_lrs(vcpu, base); 577 - writel_relaxed(0, base + GICH_HCR); 437 + 438 + if (cpu_if->vgic_hcr & GICH_HCR_LRENPIE) { 439 + u32 val = readl_relaxed(base + GICH_HCR); 440 + 441 + cpu_if->vgic_hcr &= ~GICH_HCR_EOICOUNT; 442 + cpu_if->vgic_hcr |= val & GICH_HCR_EOICOUNT; 578 443 } 444 + 445 + writel_relaxed(0, base + GICH_HCR); 579 446 } 580 447 581 448 void vgic_v2_restore_state(struct kvm_vcpu *vcpu) ··· 598 445 if (!base) 599 446 return; 600 447 601 - if (used_lrs) { 602 - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); 603 - for (i = 0; i < used_lrs; i++) { 604 - writel_relaxed(cpu_if->vgic_lr[i], 605 - base + GICH_LR0 + (i * 4)); 606 - } 607 - } 448 + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); 449 + 450 + for (i = 0; i < used_lrs; i++) 451 + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); 608 452 } 609 453 610 454 void vgic_v2_load(struct kvm_vcpu *vcpu) ··· 618 468 { 619 469 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 620 470 621 - cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 622 471 cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); 623 472 }
+53 -53
arch/arm64/kvm/vgic/vgic-v3-nested.c
··· 70 70 * - on L2 put: perform the inverse transformation, so that the result of L2 71 71 * running becomes visible to L1 in the VNCR-accessible registers. 72 72 * 73 - * - there is nothing to do on L2 entry, as everything will have happened 74 - * on load. However, this is the point where we detect that an interrupt 75 - * targeting L1 and prepare the grand switcheroo. 73 + * - there is nothing to do on L2 entry apart from enabling the vgic, as 74 + * everything will have happened on load. However, this is the point where 75 + * we detect that an interrupt targeting L1 and prepare the grand 76 + * switcheroo. 76 77 * 77 - * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1 78 - * interrupt. The L0 active state will be cleared by the HW if the L1 79 - * interrupt was itself backed by a HW interrupt. 78 + * - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate 79 + * corresponding the L1 interrupt. The L0 active state will be cleared by 80 + * the HW if the L1 interrupt was itself backed by a HW interrupt. 80 81 * 81 82 * Maintenance Interrupt (MI) management: 82 83 * ··· 94 93 * 95 94 * - because most of the ICH_*_EL2 registers live in the VNCR page, the 96 95 * quality of emulation is poor: L1 can setup the vgic so that an MI would 97 - * immediately fire, and not observe anything until the next exit. Trying 98 - * to read ICH_MISR_EL2 would do the trick, for example. 96 + * immediately fire, and not observe anything until the next exit. 97 + * Similarly, a pending MI is not immediately disabled by clearing 98 + * ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for 99 + * example. 99 100 * 100 101 * System register emulation: 101 102 * ··· 268 265 s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); 269 266 } 270 267 268 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu) 269 + { 270 + u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 271 + 272 + write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2); 273 + } 274 + 271 275 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) 272 276 { 273 277 struct shadow_if *shadow_if = get_shadow_if(); 274 278 int i; 275 279 276 280 for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 277 - u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 278 - struct vgic_irq *irq; 281 + u64 val, host_lr, lr; 279 282 280 - if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) 283 + host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 284 + 285 + /* Propagate the new LR state */ 286 + lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 287 + val = lr & ~ICH_LR_STATE; 288 + val |= host_lr & ICH_LR_STATE; 289 + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 290 + 291 + /* 292 + * Deactivation of a HW interrupt: the LR must have the HW 293 + * bit set, have been in a non-invalid state before the run, 294 + * and now be in an invalid state. If any of that doesn't 295 + * hold, we're done with this LR. 296 + */ 297 + if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) && 298 + !(host_lr & ICH_LR_STATE))) 281 299 continue; 282 300 283 301 /* ··· 306 282 * need to emulate the HW effect between the guest hypervisor 307 283 * and the nested guest. 308 284 */ 309 - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 310 - if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ 311 - continue; 312 - 313 - lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 314 - if (!(lr & ICH_LR_STATE)) 315 - irq->active = false; 316 - 317 - vgic_put_irq(vcpu->kvm, irq); 285 + vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 318 286 } 287 + 288 + /* We need these to be synchronised to generate the MI */ 289 + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2)); 290 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount); 291 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount); 292 + 293 + write_sysreg_s(0, SYS_ICH_HCR_EL2); 294 + isb(); 295 + 296 + vgic_v3_nested_update_mi(vcpu); 319 297 } 320 298 321 299 static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu, 322 300 struct vgic_v3_cpu_if *s_cpu_if) 323 301 { 324 302 struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3; 325 - u64 val = 0; 326 303 int i; 327 304 328 - /* 329 - * If we're on a system with a broken vgic that requires 330 - * trapping, propagate the trapping requirements. 331 - * 332 - * Ah, the smell of rotten fruits... 333 - */ 334 - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) 335 - val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 336 - ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR); 337 - s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val; 305 + s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 338 306 s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); 339 307 s_cpu_if->vgic_sre = host_if->vgic_sre; 340 308 ··· 350 334 __vgic_v3_restore_vmcr_aprs(cpu_if); 351 335 __vgic_v3_activate_traps(cpu_if); 352 336 353 - __vgic_v3_restore_state(cpu_if); 337 + for (int i = 0; i < cpu_if->used_lrs; i++) 338 + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 354 339 355 340 /* 356 341 * Propagate the number of used LRs for the benefit of the HYP ··· 364 347 { 365 348 struct shadow_if *shadow_if = get_shadow_if(); 366 349 struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif; 367 - u64 val; 368 350 int i; 369 351 370 - __vgic_v3_save_vmcr_aprs(s_cpu_if); 371 - __vgic_v3_deactivate_traps(s_cpu_if); 372 - __vgic_v3_save_state(s_cpu_if); 373 - 374 - /* 375 - * Translate the shadow state HW fields back to the virtual ones 376 - * before copying the shadow struct back to the nested one. 377 - */ 378 - val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 379 - val &= ~ICH_HCR_EL2_EOIcount_MASK; 380 - val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); 381 - __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); 382 - __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); 352 + __vgic_v3_save_aprs(s_cpu_if); 383 353 384 354 for (i = 0; i < 4; i++) { 385 355 __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); 386 356 __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); 387 357 } 388 358 389 - for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 390 - val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 359 + for (i = 0; i < s_cpu_if->used_lrs; i++) 360 + __gic_v3_set_lr(0, i); 391 361 392 - val &= ~ICH_LR_STATE; 393 - val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; 394 - 395 - __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 396 - } 362 + __vgic_v3_deactivate_traps(s_cpu_if); 397 363 398 364 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; 399 365 }
+330 -100
arch/arm64/kvm/vgic/vgic-v3.c
··· 12 12 #include <asm/kvm_mmu.h> 13 13 #include <asm/kvm_asm.h> 14 14 15 + #include "vgic-mmio.h" 15 16 #include "vgic.h" 16 17 17 18 static bool group0_trap; ··· 21 20 static bool dir_trap; 22 21 static bool gicv4_enable; 23 22 24 - void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) 23 + void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, 24 + struct ap_list_summary *als) 25 25 { 26 26 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 27 27 28 - cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; 28 + if (!irqchip_in_kernel(vcpu->kvm)) 29 + return; 30 + 31 + cpuif->vgic_hcr = ICH_HCR_EL2_En; 32 + 33 + if (irqs_pending_outside_lrs(als)) 34 + cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE; 35 + if (irqs_active_outside_lrs(als)) 36 + cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE; 37 + if (irqs_outside_lrs(als)) 38 + cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; 39 + 40 + if (!als->nr_sgi) 41 + cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; 42 + 43 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ? 44 + ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; 45 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ? 46 + ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; 47 + 48 + /* 49 + * Dealing with EOImode=1 is a massive source of headache. Not 50 + * only do we need to track that we have active interrupts 51 + * outside of the LRs and force DIR to be trapped, we also 52 + * need to deal with SPIs that can be deactivated on another 53 + * CPU. 54 + * 55 + * On systems that do not implement TDIR, force the bit in the 56 + * shadow state anyway to avoid IPI-ing on these poor sods. 57 + * 58 + * Note that we set the trap irrespective of EOIMode, as that 59 + * can change behind our back without any warning... 60 + */ 61 + if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) || 62 + irqs_active_outside_lrs(als) || 63 + atomic_read(&vcpu->kvm->arch.vgic.active_spis)) 64 + cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR; 29 65 } 30 66 31 67 static bool lr_signals_eoi_mi(u64 lr_val) ··· 71 33 !(lr_val & ICH_LR_HW); 72 34 } 73 35 74 - void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 36 + static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val) 75 37 { 76 - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 77 - struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 78 - u32 model = vcpu->kvm->arch.vgic.vgic_model; 79 - int lr; 38 + struct vgic_irq *irq; 39 + bool is_v2_sgi = false; 40 + bool deactivated; 41 + u32 intid; 80 42 81 - DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 43 + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 44 + intid = val & ICH_LR_VIRTUAL_ID_MASK; 45 + } else { 46 + intid = val & GICH_LR_VIRTUALID; 47 + is_v2_sgi = vgic_irq_is_sgi(intid); 48 + } 82 49 83 - cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE; 50 + irq = vgic_get_vcpu_irq(vcpu, intid); 51 + if (!irq) /* An LPI could have been unmapped. */ 52 + return; 84 53 85 - for (lr = 0; lr < cpuif->used_lrs; lr++) { 86 - u64 val = cpuif->vgic_lr[lr]; 87 - u32 intid, cpuid; 88 - struct vgic_irq *irq; 89 - bool is_v2_sgi = false; 90 - bool deactivated; 91 - 92 - cpuid = val & GICH_LR_PHYSID_CPUID; 93 - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 94 - 95 - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { 96 - intid = val & ICH_LR_VIRTUAL_ID_MASK; 97 - } else { 98 - intid = val & GICH_LR_VIRTUALID; 99 - is_v2_sgi = vgic_irq_is_sgi(intid); 100 - } 101 - 102 - /* Notify fds when the guest EOI'ed a level-triggered IRQ */ 103 - if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 104 - kvm_notify_acked_irq(vcpu->kvm, 0, 105 - intid - VGIC_NR_PRIVATE_IRQS); 106 - 107 - irq = vgic_get_vcpu_irq(vcpu, intid); 108 - if (!irq) /* An LPI could have been unmapped. */ 109 - continue; 110 - 111 - raw_spin_lock(&irq->irq_lock); 112 - 113 - /* Always preserve the active bit, note deactivation */ 54 + scoped_guard(raw_spinlock, &irq->irq_lock) { 55 + /* Always preserve the active bit for !LPIs, note deactivation */ 56 + if (irq->intid >= VGIC_MIN_LPI) 57 + val &= ~ICH_LR_ACTIVE_BIT; 114 58 deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); 115 59 irq->active = !!(val & ICH_LR_ACTIVE_BIT); 116 60 117 - if (irq->active && is_v2_sgi) 118 - irq->active_source = cpuid; 119 - 120 61 /* Edge is the only case where we preserve the pending bit */ 121 62 if (irq->config == VGIC_CONFIG_EDGE && 122 - (val & ICH_LR_PENDING_BIT)) { 63 + (val & ICH_LR_PENDING_BIT)) 123 64 irq->pending_latch = true; 124 - 125 - if (is_v2_sgi) 126 - irq->source |= (1 << cpuid); 127 - } 128 65 129 66 /* 130 67 * Clear soft pending state when level irqs have been acked. ··· 107 94 if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) 108 95 irq->pending_latch = false; 109 96 97 + if (is_v2_sgi) { 98 + u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val); 99 + 100 + if (irq->active) 101 + irq->active_source = cpuid; 102 + 103 + if (val & ICH_LR_PENDING_BIT) 104 + irq->source |= BIT(cpuid); 105 + } 106 + 110 107 /* Handle resampling for mapped interrupts if required */ 111 108 vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); 112 109 113 - raw_spin_unlock(&irq->irq_lock); 114 - vgic_put_irq(vcpu->kvm, irq); 110 + irq->on_lr = false; 111 + } 112 + 113 + /* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */ 114 + if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) { 115 + kvm_notify_acked_irq(vcpu->kvm, 0, 116 + intid - VGIC_NR_PRIVATE_IRQS); 117 + atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis); 118 + } 119 + 120 + vgic_put_irq(vcpu->kvm, irq); 121 + } 122 + 123 + static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); 124 + 125 + static void vgic_v3_deactivate_phys(u32 intid) 126 + { 127 + if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) 128 + gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI); 129 + else 130 + gic_write_dir(intid); 131 + } 132 + 133 + void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 134 + { 135 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 136 + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 137 + u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); 138 + struct vgic_irq *irq; 139 + 140 + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 141 + 142 + for (int lr = 0; lr < cpuif->used_lrs; lr++) 143 + vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]); 144 + 145 + /* 146 + * EOIMode=0: use EOIcount to emulate deactivation. We are 147 + * guaranteed to deactivate in reverse order of the activation, so 148 + * just pick one active interrupt after the other in the ap_list, 149 + * and replay the deactivation as if the CPU was doing it. We also 150 + * rely on priority drop to have taken place, and the list to be 151 + * sorted by priority. 152 + */ 153 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 154 + u64 lr; 155 + 156 + /* 157 + * I would have loved to write this using a scoped_guard(), 158 + * but using 'continue' here is a total train wreck. 159 + */ 160 + if (!eoicount) { 161 + break; 162 + } else { 163 + guard(raw_spinlock)(&irq->irq_lock); 164 + 165 + if (!(likely(vgic_target_oracle(irq) == vcpu) && 166 + irq->active)) 167 + continue; 168 + 169 + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 170 + } 171 + 172 + if (lr & ICH_LR_HW) 173 + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 174 + 175 + vgic_v3_fold_lr(vcpu, lr); 176 + eoicount--; 115 177 } 116 178 117 179 cpuif->used_lrs = 0; 118 180 } 119 181 182 + void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) 183 + { 184 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 185 + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 186 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 187 + struct kvm_vcpu *target_vcpu = NULL; 188 + bool mmio = false, is_v2_sgi; 189 + struct vgic_irq *irq; 190 + unsigned long flags; 191 + u64 lr = 0; 192 + u8 cpuid; 193 + 194 + /* Snapshot CPUID, and remove it from the INTID */ 195 + cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); 196 + val &= ~GENMASK_ULL(12, 10); 197 + 198 + is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 && 199 + val < VGIC_NR_SGIS); 200 + 201 + /* 202 + * We only deal with DIR when EOIMode==1, and only for SGI, 203 + * PPI or SPI. 204 + */ 205 + if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) || 206 + val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) 207 + return; 208 + 209 + /* Make sure we're in the same context as LR handling */ 210 + local_irq_save(flags); 211 + 212 + irq = vgic_get_vcpu_irq(vcpu, val); 213 + if (WARN_ON_ONCE(!irq)) 214 + goto out; 215 + 216 + /* 217 + * EOIMode=1: we must rely on traps to handle deactivate of 218 + * overflowing interrupts, as there is no ordering guarantee and 219 + * EOIcount isn't being incremented. Priority drop will have taken 220 + * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs. 221 + * 222 + * Three possibities: 223 + * 224 + * - The irq is not queued on any CPU, and there is nothing to 225 + * do, 226 + * 227 + * - Or the irq is in an LR, meaning that its state is not 228 + * directly observable. Treat it bluntly by making it as if 229 + * this was a write to GICD_ICACTIVER, which will force an 230 + * exit on all vcpus. If it hurts, don't do that. 231 + * 232 + * - Or the irq is active, but not in an LR, and we can 233 + * directly deactivate it by building a pseudo-LR, fold it, 234 + * and queue a request to prune the resulting ap_list, 235 + * 236 + * Special care must be taken to match the source CPUID when 237 + * deactivating a GICv2 SGI. 238 + */ 239 + scoped_guard(raw_spinlock, &irq->irq_lock) { 240 + target_vcpu = irq->vcpu; 241 + 242 + /* Not on any ap_list? */ 243 + if (!target_vcpu) 244 + goto put; 245 + 246 + /* 247 + * Urgh. We're deactivating something that we cannot 248 + * observe yet... Big hammer time. 249 + */ 250 + if (irq->on_lr) { 251 + mmio = true; 252 + goto put; 253 + } 254 + 255 + /* GICv2 SGI: check that the cpuid matches */ 256 + if (is_v2_sgi && irq->active_source != cpuid) { 257 + target_vcpu = NULL; 258 + goto put; 259 + } 260 + 261 + /* (with a Dalek voice) DEACTIVATE!!!! */ 262 + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 263 + } 264 + 265 + if (lr & ICH_LR_HW) 266 + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 267 + 268 + vgic_v3_fold_lr(vcpu, lr); 269 + 270 + put: 271 + vgic_put_irq(vcpu->kvm, irq); 272 + 273 + out: 274 + local_irq_restore(flags); 275 + 276 + if (mmio) 277 + vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); 278 + 279 + /* Force the ap_list to be pruned */ 280 + if (target_vcpu) 281 + kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); 282 + } 283 + 120 284 /* Requires the irq to be locked already */ 121 - void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 285 + static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) 122 286 { 123 287 u32 model = vcpu->kvm->arch.vgic.vgic_model; 124 288 u64 val = irq->intid; 125 289 bool allow_pending = true, is_v2_sgi; 290 + 291 + WARN_ON(irq->on_lr); 126 292 127 293 is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && 128 294 model == KVM_DEV_TYPE_ARM_VGIC_V2); ··· 342 150 if (allow_pending && irq_is_pending(irq)) { 343 151 val |= ICH_LR_PENDING_BIT; 344 152 153 + if (is_v2_sgi) { 154 + u32 src = ffs(irq->source); 155 + 156 + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 157 + irq->intid)) 158 + return 0; 159 + 160 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 161 + if (irq->source & ~BIT(src - 1)) 162 + val |= ICH_LR_EOI; 163 + } 164 + } 165 + 166 + if (irq->group) 167 + val |= ICH_LR_GROUP; 168 + 169 + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 170 + 171 + return val; 172 + } 173 + 174 + void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 175 + { 176 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 177 + u64 val = vgic_v3_compute_lr(vcpu, irq); 178 + 179 + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 180 + 181 + if (val & ICH_LR_PENDING_BIT) { 345 182 if (irq->config == VGIC_CONFIG_EDGE) 346 183 irq->pending_latch = false; 347 184 ··· 378 157 model == KVM_DEV_TYPE_ARM_VGIC_V2) { 379 158 u32 src = ffs(irq->source); 380 159 381 - if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 382 - irq->intid)) 383 - return; 384 - 385 - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 386 - irq->source &= ~(1 << (src - 1)); 387 - if (irq->source) { 160 + irq->source &= ~BIT(src - 1); 161 + if (irq->source) 388 162 irq->pending_latch = true; 389 - val |= ICH_LR_EOI; 390 - } 391 163 } 392 164 } 393 165 ··· 393 179 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) 394 180 irq->line_level = false; 395 181 396 - if (irq->group) 397 - val |= ICH_LR_GROUP; 398 - 399 - val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 400 - 401 - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 182 + irq->on_lr = true; 402 183 } 403 184 404 185 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) ··· 467 258 GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ 468 259 GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) 469 260 470 - void vgic_v3_enable(struct kvm_vcpu *vcpu) 261 + void vgic_v3_reset(struct kvm_vcpu *vcpu) 471 262 { 472 263 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; 473 264 ··· 497 288 kvm_vgic_global_state.ich_vtr_el2); 498 289 vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, 499 290 kvm_vgic_global_state.ich_vtr_el2) + 1; 500 - 501 - /* Get the show on the road... */ 502 - vgic_v3->vgic_hcr = ICH_HCR_EL2_En; 503 291 } 504 292 505 293 void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) ··· 508 302 509 303 /* Hide GICv3 sysreg if necessary */ 510 304 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || 511 - !irqchip_in_kernel(vcpu->kvm)) { 305 + !irqchip_in_kernel(vcpu->kvm)) 512 306 vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 513 307 ICH_HCR_EL2_TC); 514 - return; 515 - } 516 - 517 - if (group0_trap) 518 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0; 519 - if (group1_trap) 520 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1; 521 - if (common_trap) 522 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC; 523 - if (dir_trap) 524 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR; 525 308 } 526 309 527 310 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) ··· 831 636 832 637 static bool vgic_v3_broken_seis(void) 833 638 { 834 - return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) && 835 - is_midr_in_range_list(broken_seis)); 639 + return (is_kernel_in_hyp_mode() && 640 + is_midr_in_range_list(broken_seis) && 641 + (read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS)); 642 + } 643 + 644 + void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, 645 + __le32 *origptr, __le32 *updptr, 646 + int nr_inst) 647 + { 648 + u32 insn, oinsn, rd; 649 + u64 hcr = 0; 650 + 651 + if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 652 + group0_trap = true; 653 + group1_trap = true; 654 + } 655 + 656 + if (vgic_v3_broken_seis()) { 657 + /* We know that these machines have ICH_HCR_EL2.TDIR */ 658 + group0_trap = true; 659 + group1_trap = true; 660 + dir_trap = true; 661 + } 662 + 663 + if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR)) 664 + common_trap = true; 665 + 666 + if (group0_trap) 667 + hcr |= ICH_HCR_EL2_TALL0; 668 + if (group1_trap) 669 + hcr |= ICH_HCR_EL2_TALL1; 670 + if (common_trap) 671 + hcr |= ICH_HCR_EL2_TC; 672 + if (dir_trap) 673 + hcr |= ICH_HCR_EL2_TDIR; 674 + 675 + /* Compute target register */ 676 + oinsn = le32_to_cpu(*origptr); 677 + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); 678 + 679 + /* movz rd, #(val & 0xffff) */ 680 + insn = aarch64_insn_gen_movewide(rd, 681 + (u16)hcr, 682 + 0, 683 + AARCH64_INSN_VARIANT_64BIT, 684 + AARCH64_INSN_MOVEWIDE_ZERO); 685 + *updptr = cpu_to_le32(insn); 836 686 } 837 687 838 688 /** ··· 891 651 { 892 652 u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); 893 653 bool has_v2; 654 + u64 traps; 894 655 int ret; 895 656 896 657 has_v2 = ich_vtr_el2 >> 63; ··· 950 709 if (has_v2) 951 710 static_branch_enable(&vgic_v3_has_v2_compat); 952 711 953 - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 954 - group0_trap = true; 955 - group1_trap = true; 956 - } 957 - 958 712 if (vgic_v3_broken_seis()) { 959 713 kvm_info("GICv3 with broken locally generated SEI\n"); 960 - 961 714 kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; 962 - group0_trap = true; 963 - group1_trap = true; 964 - if (ich_vtr_el2 & ICH_VTR_EL2_TDS) 965 - dir_trap = true; 966 - else 967 - common_trap = true; 968 715 } 969 716 970 - if (group0_trap || group1_trap || common_trap | dir_trap) { 717 + traps = vgic_ich_hcr_trap_bits(); 718 + if (traps) { 971 719 kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", 972 - group0_trap ? "G0" : "", 973 - group1_trap ? "G1" : "", 974 - common_trap ? "C" : "", 975 - dir_trap ? "D" : ""); 720 + (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", 721 + (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", 722 + (traps & ICH_HCR_EL2_TC) ? "C" : "", 723 + (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); 976 724 static_branch_enable(&vgic_v3_cpuif_trap); 977 725 } 978 726 ··· 1001 771 } 1002 772 1003 773 if (likely(!is_protected_kvm_enabled())) 1004 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 774 + kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); 1005 775 WARN_ON(vgic_v4_put(vcpu)); 1006 776 1007 777 if (has_vhe())
+4 -1
arch/arm64/kvm/vgic/vgic-v4.c
··· 163 163 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); 164 164 struct irq_desc *desc; 165 165 unsigned long flags; 166 + bool pending; 166 167 int ret; 167 168 168 169 raw_spin_lock_irqsave(&irq->irq_lock, flags); ··· 174 173 irq->hw = false; 175 174 ret = irq_get_irqchip_state(irq->host_irq, 176 175 IRQCHIP_STATE_PENDING, 177 - &irq->pending_latch); 176 + &pending); 178 177 WARN_ON(ret); 178 + 179 + irq->pending_latch = pending; 179 180 180 181 desc = irq_to_desc(irq->host_irq); 181 182 irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+189 -115
arch/arm64/kvm/vgic/vgic.c
··· 244 244 * 245 245 * Requires the IRQ lock to be held. 246 246 */ 247 - static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 247 + struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 248 248 { 249 249 lockdep_assert_held(&irq->irq_lock); 250 250 ··· 272 272 return NULL; 273 273 } 274 274 275 + struct vgic_sort_info { 276 + struct kvm_vcpu *vcpu; 277 + struct vgic_vmcr vmcr; 278 + }; 279 + 275 280 /* 276 281 * The order of items in the ap_lists defines how we'll pack things in LRs as 277 282 * well, the first items in the list being the first things populated in the 278 283 * LRs. 279 284 * 280 - * A hard rule is that active interrupts can never be pushed out of the LRs 281 - * (and therefore take priority) since we cannot reliably trap on deactivation 282 - * of IRQs and therefore they have to be present in the LRs. 283 - * 285 + * Pending, non-active interrupts must be placed at the head of the list. 284 286 * Otherwise things should be sorted by the priority field and the GIC 285 287 * hardware support will take care of preemption of priority groups etc. 288 + * Interrupts that are not deliverable should be at the end of the list. 286 289 * 287 290 * Return negative if "a" sorts before "b", 0 to preserve order, and positive 288 291 * to sort "b" before "a". ··· 295 292 { 296 293 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 297 294 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 295 + struct vgic_sort_info *info = priv; 296 + struct kvm_vcpu *vcpu = info->vcpu; 298 297 bool penda, pendb; 299 298 int ret; 300 299 ··· 310 305 raw_spin_lock(&irqa->irq_lock); 311 306 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 312 307 313 - if (irqa->active || irqb->active) { 314 - ret = (int)irqb->active - (int)irqa->active; 308 + /* Undeliverable interrupts should be last */ 309 + ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu); 310 + if (ret) 315 311 goto out; 316 - } 317 312 318 - penda = irqa->enabled && irq_is_pending(irqa); 319 - pendb = irqb->enabled && irq_is_pending(irqb); 320 - 321 - if (!penda || !pendb) { 322 - ret = (int)pendb - (int)penda; 313 + /* Same thing for interrupts targeting a disabled group */ 314 + ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0); 315 + ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0); 316 + if (ret) 323 317 goto out; 324 - } 325 318 326 - /* Both pending and enabled, sort by priority */ 327 - ret = irqa->priority - irqb->priority; 319 + penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active; 320 + pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active; 321 + 322 + ret = (int)pendb - (int)penda; 323 + if (ret) 324 + goto out; 325 + 326 + /* Both pending and enabled, sort by priority (lower number first) */ 327 + ret = (int)irqa->priority - (int)irqb->priority; 328 + if (ret) 329 + goto out; 330 + 331 + /* Finally, HW bit active interrupts have priority over non-HW ones */ 332 + ret = (int)irqb->hw - (int)irqa->hw; 333 + 328 334 out: 329 335 raw_spin_unlock(&irqb->irq_lock); 330 336 raw_spin_unlock(&irqa->irq_lock); ··· 346 330 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 347 331 { 348 332 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 333 + struct vgic_sort_info info = { .vcpu = vcpu, }; 349 334 350 335 lockdep_assert_held(&vgic_cpu->ap_list_lock); 351 336 352 - list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 337 + vgic_get_vmcr(vcpu, &info.vmcr); 338 + list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp); 353 339 } 354 340 355 341 /* ··· 374 356 return false; 375 357 } 376 358 359 + static bool vgic_model_needs_bcst_kick(struct kvm *kvm) 360 + { 361 + /* 362 + * A GICv3 (or GICv3-like) system exposing a GICv3 to the guest 363 + * needs a broadcast kick to set TDIR globally. 364 + * 365 + * For systems that do not have TDIR (ARM's own v8.0 CPUs), the 366 + * shadow TDIR bit is always set, and so is the register's TC bit, 367 + * so no need to kick the CPUs. 368 + */ 369 + return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) && 370 + kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3); 371 + } 372 + 377 373 /* 378 374 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 379 375 * Do the queuing if necessary, taking the right locks in the right order. ··· 400 368 unsigned long flags) __releases(&irq->irq_lock) 401 369 { 402 370 struct kvm_vcpu *vcpu; 371 + bool bcast; 403 372 404 373 lockdep_assert_held(&irq->irq_lock); 405 374 ··· 475 442 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 476 443 irq->vcpu = vcpu; 477 444 445 + /* A new SPI may result in deactivation trapping on all vcpus */ 446 + bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) && 447 + vgic_valid_spi(vcpu->kvm, irq->intid) && 448 + atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0); 449 + 478 450 raw_spin_unlock(&irq->irq_lock); 479 451 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 480 452 481 - kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 482 - kvm_vcpu_kick(vcpu); 453 + if (!bcast) { 454 + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 455 + kvm_vcpu_kick(vcpu); 456 + } else { 457 + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING); 458 + } 483 459 484 460 return true; 485 461 } ··· 840 798 vgic_v3_clear_lr(vcpu, lr); 841 799 } 842 800 843 - static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 844 - { 845 - if (kvm_vgic_global_state.type == VGIC_V2) 846 - vgic_v2_set_underflow(vcpu); 847 - else 848 - vgic_v3_set_underflow(vcpu); 849 - } 850 - 851 - /* Requires the ap_list_lock to be held. */ 852 - static int compute_ap_list_depth(struct kvm_vcpu *vcpu, 853 - bool *multi_sgi) 801 + static void summarize_ap_list(struct kvm_vcpu *vcpu, 802 + struct ap_list_summary *als) 854 803 { 855 804 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 856 805 struct vgic_irq *irq; 857 - int count = 0; 858 - 859 - *multi_sgi = false; 860 806 861 807 lockdep_assert_held(&vgic_cpu->ap_list_lock); 862 808 809 + *als = (typeof(*als)){}; 810 + 863 811 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 864 - int w; 812 + guard(raw_spinlock)(&irq->irq_lock); 865 813 866 - raw_spin_lock(&irq->irq_lock); 867 - /* GICv2 SGIs can count for more than one... */ 868 - w = vgic_irq_get_lr_count(irq); 869 - raw_spin_unlock(&irq->irq_lock); 814 + if (unlikely(vgic_target_oracle(irq) != vcpu)) 815 + continue; 870 816 871 - count += w; 872 - *multi_sgi |= (w > 1); 817 + if (!irq->active) 818 + als->nr_pend++; 819 + else 820 + als->nr_act++; 821 + 822 + if (irq->intid < VGIC_NR_SGIS) 823 + als->nr_sgi++; 873 824 } 874 - return count; 875 825 } 876 826 877 - /* Requires the VCPU's ap_list_lock to be held. */ 827 + /* 828 + * Dealing with LR overflow is close to black magic -- dress accordingly. 829 + * 830 + * We have to present an almost infinite number of interrupts through a very 831 + * limited number of registers. Therefore crucial decisions must be made to 832 + * ensure we feed the most relevant interrupts into the LRs, and yet have 833 + * some facilities to let the guest interact with those that are not there. 834 + * 835 + * All considerations below are in the context of interrupts targeting a 836 + * single vcpu with non-idle state (either pending, active, or both), 837 + * colloquially called the ap_list: 838 + * 839 + * - Pending interrupts must have priority over active interrupts. This also 840 + * excludes pending+active interrupts. This ensures that a guest can 841 + * perform priority drops on any number of interrupts, and yet be 842 + * presented the next pending one. 843 + * 844 + * - Deactivation of interrupts outside of the LRs must be tracked by using 845 + * either the EOIcount-driven maintenance interrupt, and sometimes by 846 + * trapping the DIR register. 847 + * 848 + * - For EOImode=0, a non-zero EOIcount means walking the ap_list past the 849 + * point that made it into the LRs, and deactivate interrupts that would 850 + * have made it onto the LRs if we had the space. 851 + * 852 + * - The MI-generation bits must be used to try and force an exit when the 853 + * guest has done enough changes to the LRs that we want to reevaluate the 854 + * situation: 855 + * 856 + * - if the total number of pending interrupts exceeds the number of 857 + * LR, NPIE must be set in order to exit once no pending interrupts 858 + * are present in the LRs, allowing us to populate the next batch. 859 + * 860 + * - if there are active interrupts outside of the LRs, then LRENPIE 861 + * must be set so that we exit on deactivation of one of these, and 862 + * work out which one is to be deactivated. Note that this is not 863 + * enough to deal with EOImode=1, see below. 864 + * 865 + * - if the overall number of interrupts exceeds the number of LRs, 866 + * then UIE must be set to allow refilling of the LRs once the 867 + * majority of them has been processed. 868 + * 869 + * - as usual, MI triggers are only an optimisation, since we cannot 870 + * rely on the MI being delivered in timely manner... 871 + * 872 + * - EOImode=1 creates some additional problems: 873 + * 874 + * - deactivation can happen in any order, and we cannot rely on 875 + * EOImode=0's coupling of priority-drop and deactivation which 876 + * imposes strict reverse Ack order. This means that DIR must 877 + * trap if we have active interrupts outside of the LRs. 878 + * 879 + * - deactivation of SPIs can occur on any CPU, while the SPI is only 880 + * present in the ap_list of the CPU that actually ack-ed it. In that 881 + * case, EOIcount doesn't provide enough information, and we must 882 + * resort to trapping DIR even if we don't overflow the LRs. Bonus 883 + * point for not trapping DIR when no SPIs are pending or active in 884 + * the whole VM. 885 + * 886 + * - LPIs do not suffer the same problem as SPIs on deactivation, as we 887 + * have to essentially discard the active state, see below. 888 + * 889 + * - Virtual LPIs have an active state (surprise!), which gets removed on 890 + * priority drop (EOI). However, EOIcount doesn't get bumped when the LPI 891 + * is not present in the LR (surprise again!). Special care must therefore 892 + * be taken to remove the active state from any activated LPI when exiting 893 + * from the guest. This is in a way no different from what happens on the 894 + * physical side. We still rely on the running priority to have been 895 + * removed from the APRs, irrespective of the LPI being present in the LRs 896 + * or not. 897 + * 898 + * - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as 899 + * they are not managed in SW and don't have a true active state. So only 900 + * set vSGIEOICount when no SGIs are in the ap_list. 901 + * 902 + * - GICv2 SGIs with multiple sources are injected one source at a time, as 903 + * if they were made pending sequentially. This may mean that we don't 904 + * always present the HPPI if other interrupts with lower priority are 905 + * pending in the LRs. Big deal. 906 + */ 878 907 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 879 908 { 880 909 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 910 + struct ap_list_summary als; 881 911 struct vgic_irq *irq; 882 - int count; 883 - bool multi_sgi; 884 - u8 prio = 0xff; 885 - int i = 0; 912 + int count = 0; 886 913 887 914 lockdep_assert_held(&vgic_cpu->ap_list_lock); 888 915 889 - count = compute_ap_list_depth(vcpu, &multi_sgi); 890 - if (count > kvm_vgic_global_state.nr_lr || multi_sgi) 916 + summarize_ap_list(vcpu, &als); 917 + 918 + if (irqs_outside_lrs(&als)) 891 919 vgic_sort_ap_list(vcpu); 892 920 893 - count = 0; 894 - 895 921 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 896 - raw_spin_lock(&irq->irq_lock); 922 + scoped_guard(raw_spinlock, &irq->irq_lock) { 923 + if (likely(vgic_target_oracle(irq) == vcpu)) { 924 + vgic_populate_lr(vcpu, irq, count++); 925 + } 926 + } 897 927 898 - /* 899 - * If we have multi-SGIs in the pipeline, we need to 900 - * guarantee that they are all seen before any IRQ of 901 - * lower priority. In that case, we need to filter out 902 - * these interrupts by exiting early. This is easy as 903 - * the AP list has been sorted already. 904 - */ 905 - if (multi_sgi && irq->priority > prio) { 906 - raw_spin_unlock(&irq->irq_lock); 928 + if (count == kvm_vgic_global_state.nr_lr) 907 929 break; 908 - } 909 - 910 - if (likely(vgic_target_oracle(irq) == vcpu)) { 911 - vgic_populate_lr(vcpu, irq, count++); 912 - 913 - if (irq->source) 914 - prio = irq->priority; 915 - } 916 - 917 - raw_spin_unlock(&irq->irq_lock); 918 - 919 - if (count == kvm_vgic_global_state.nr_lr) { 920 - if (!list_is_last(&irq->ap_list, 921 - &vgic_cpu->ap_list_head)) 922 - vgic_set_underflow(vcpu); 923 - break; 924 - } 925 930 } 926 931 927 932 /* Nuke remaining LRs */ 928 - for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) 933 + for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++) 929 934 vgic_clear_lr(vcpu, i); 930 935 931 - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 936 + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 932 937 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; 933 - else 938 + vgic_v2_configure_hcr(vcpu, &als); 939 + } else { 934 940 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; 941 + vgic_v3_configure_hcr(vcpu, &als); 942 + } 935 943 } 936 944 937 945 static inline bool can_access_vgic_from_kernel(void) ··· 1005 913 /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 1006 914 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1007 915 { 1008 - int used_lrs; 1009 - 1010 916 /* If nesting, emulate the HW effect from L0 to L1 */ 1011 917 if (vgic_state_is_nested(vcpu)) { 1012 918 vgic_v3_sync_nested(vcpu); ··· 1014 924 if (vcpu_has_nv(vcpu)) 1015 925 vgic_v3_nested_update_mi(vcpu); 1016 926 1017 - /* An empty ap_list_head implies used_lrs == 0 */ 1018 - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 1019 - return; 1020 - 1021 927 if (can_access_vgic_from_kernel()) 1022 928 vgic_save_state(vcpu); 1023 929 1024 - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1025 - used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 1026 - else 1027 - used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 1028 - 1029 - if (used_lrs) 1030 - vgic_fold_lr_state(vcpu); 930 + vgic_fold_lr_state(vcpu); 1031 931 vgic_prune_ap_list(vcpu); 932 + } 933 + 934 + /* Sync interrupts that were deactivated through a DIR trap */ 935 + void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu) 936 + { 937 + unsigned long flags; 938 + 939 + /* Make sure we're in the same context as LR handling */ 940 + local_irq_save(flags); 941 + vgic_prune_ap_list(vcpu); 942 + local_irq_restore(flags); 1032 943 } 1033 944 1034 945 static inline void vgic_restore_state(struct kvm_vcpu *vcpu) ··· 1056 965 * abort the entry procedure and inject the exception at the 1057 966 * beginning of the run loop. 1058 967 * 1059 - * - Otherwise, do exactly *NOTHING*. The guest state is 1060 - * already loaded, and we can carry on with running it. 968 + * - Otherwise, do exactly *NOTHING* apart from enabling the virtual 969 + * CPU interface. The guest state is already loaded, and we can 970 + * carry on with running it. 1061 971 * 1062 972 * If we have NV, but are not in a nested state, compute the 1063 973 * maintenance interrupt state, as it may fire. ··· 1067 975 if (kvm_vgic_vcpu_pending_irq(vcpu)) 1068 976 kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); 1069 977 978 + vgic_v3_flush_nested(vcpu); 1070 979 return; 1071 980 } 1072 981 1073 982 if (vcpu_has_nv(vcpu)) 1074 983 vgic_v3_nested_update_mi(vcpu); 1075 984 1076 - /* 1077 - * If there are no virtual interrupts active or pending for this 1078 - * VCPU, then there is no work to do and we can bail out without 1079 - * taking any lock. There is a potential race with someone injecting 1080 - * interrupts to the VCPU, but it is a benign race as the VCPU will 1081 - * either observe the new interrupt before or after doing this check, 1082 - * and introducing additional synchronization mechanism doesn't change 1083 - * this. 1084 - * 1085 - * Note that we still need to go through the whole thing if anything 1086 - * can be directly injected (GICv4). 1087 - */ 1088 - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && 1089 - !vgic_supports_direct_irqs(vcpu->kvm)) 1090 - return; 1091 - 1092 985 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 1093 986 1094 - if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { 1095 - raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 987 + scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock) 1096 988 vgic_flush_lr_state(vcpu); 1097 - raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 1098 - } 1099 989 1100 990 if (can_access_vgic_from_kernel()) 1101 991 vgic_restore_state(vcpu);
+39 -4
arch/arm64/kvm/vgic/vgic.h
··· 164 164 return ret; 165 165 } 166 166 167 + void kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, 168 + __le32 *origptr, __le32 *updptr, int nr_inst); 169 + 170 + static inline u64 vgic_ich_hcr_trap_bits(void) 171 + { 172 + u64 hcr; 173 + 174 + /* All the traps are in the bottom 16bits */ 175 + asm volatile(ALTERNATIVE_CB("movz %0, #0\n", 176 + ARM64_ALWAYS_SYSTEM, 177 + kvm_compute_ich_hcr_trap_bits) 178 + : "=r" (hcr)); 179 + 180 + return hcr; 181 + } 182 + 167 183 /* 168 184 * This struct provides an intermediate representation of the fields contained 169 185 * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC ··· 236 220 u32 event_id; 237 221 }; 238 222 223 + struct ap_list_summary { 224 + unsigned int nr_pend; /* purely pending, not active */ 225 + unsigned int nr_act; /* active, or active+pending */ 226 + unsigned int nr_sgi; /* any SGI */ 227 + }; 228 + 229 + #define irqs_outside_lrs(s) \ 230 + (((s)->nr_pend + (s)->nr_act) > kvm_vgic_global_state.nr_lr) 231 + 232 + #define irqs_pending_outside_lrs(s) \ 233 + ((s)->nr_pend > kvm_vgic_global_state.nr_lr) 234 + 235 + #define irqs_active_outside_lrs(s) \ 236 + ((s)->nr_act && irqs_outside_lrs(s)) 237 + 239 238 int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, 240 239 struct vgic_reg_attr *reg_attr); 241 240 int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, ··· 261 230 struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid); 262 231 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid); 263 232 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); 233 + struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq); 264 234 bool vgic_get_phys_line_level(struct vgic_irq *irq); 265 235 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); 266 236 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); ··· 277 245 278 246 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); 279 247 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 248 + void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val); 280 249 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); 281 - void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); 250 + void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); 282 251 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); 283 252 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, 284 253 int offset, u32 *val); ··· 287 254 int offset, u32 *val); 288 255 void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 289 256 void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 290 - void vgic_v2_enable(struct kvm_vcpu *vcpu); 257 + void vgic_v2_reset(struct kvm_vcpu *vcpu); 291 258 int vgic_v2_probe(const struct gic_kvm_info *info); 292 259 int vgic_v2_map_resources(struct kvm *kvm); 293 260 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, ··· 319 286 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); 320 287 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 321 288 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); 322 - void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); 289 + void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val); 290 + void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); 323 291 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 324 292 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 325 - void vgic_v3_enable(struct kvm_vcpu *vcpu); 293 + void vgic_v3_reset(struct kvm_vcpu *vcpu); 326 294 int vgic_v3_probe(const struct gic_kvm_info *info); 327 295 int vgic_v3_map_resources(struct kvm *kvm); 328 296 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); ··· 446 412 return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP); 447 413 } 448 414 415 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu); 449 416 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu); 450 417 void vgic_v3_load_nested(struct kvm_vcpu *vcpu); 451 418 void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
+2
arch/arm64/tools/cpucaps
··· 40 40 HAS_GICV5_LEGACY 41 41 HAS_GIC_PRIO_MASKING 42 42 HAS_GIC_PRIO_RELAXED_SYNC 43 + HAS_ICH_HCR_EL2_TDIR 43 44 HAS_HCR_NV1 44 45 HAS_HCX 45 46 HAS_LDAPR ··· 65 64 HAS_VA52 66 65 HAS_VIRT_HOST_EXTN 67 66 HAS_WFXT 67 + HAS_XNX 68 68 HAFT 69 69 HW_DBM 70 70 KVM_HVHE
+5 -2
drivers/irqchip/irq-apple-aic.c
··· 411 411 if (is_kernel_in_hyp_mode() && 412 412 (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && 413 413 read_sysreg_s(SYS_ICH_MISR_EL2) != 0) { 414 + u64 val; 415 + 414 416 generic_handle_domain_irq(aic_irqc->hw_domain, 415 417 AIC_FIQ_HWIRQ(AIC_VGIC_MI)); 416 418 417 419 if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && 418 - read_sysreg_s(SYS_ICH_MISR_EL2))) { 419 - pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n"); 420 + (val = read_sysreg_s(SYS_ICH_MISR_EL2)))) { 421 + pr_err_ratelimited("vGIC IRQ fired and not handled by KVM (MISR=%llx), disabling.\n", 422 + val); 420 423 sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); 421 424 } 422 425 }
+3
drivers/irqchip/irq-gic.c
··· 1459 1459 if (ret) 1460 1460 return; 1461 1461 1462 + gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base; 1463 + 1462 1464 if (static_branch_likely(&supports_deactivate_key)) 1463 1465 vgic_set_kvm_info(&gic_v2_kvm_info); 1464 1466 } ··· 1622 1620 return; 1623 1621 1624 1622 gic_v2_kvm_info.maint_irq = irq; 1623 + gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base; 1625 1624 1626 1625 vgic_set_kvm_info(&gic_v2_kvm_info); 1627 1626 }
+19 -10
include/kvm/arm_vgic.h
··· 59 59 /* virtual control interface mapping, HYP VA */ 60 60 void __iomem *vctrl_hyp; 61 61 62 + /* Physical CPU interface, kernel VA */ 63 + void __iomem *gicc_base; 64 + 62 65 /* Number of implemented list registers */ 63 66 int nr_lr; 64 67 ··· 123 120 124 121 struct vgic_irq { 125 122 raw_spinlock_t irq_lock; /* Protects the content of the struct */ 123 + u32 intid; /* Guest visible INTID */ 126 124 struct rcu_head rcu; 127 125 struct list_head ap_list; 128 126 ··· 138 134 * affinity reg (v3). 139 135 */ 140 136 141 - u32 intid; /* Guest visible INTID */ 142 - bool line_level; /* Level only */ 143 - bool pending_latch; /* The pending latch state used to calculate 144 - * the pending state for both level 145 - * and edge triggered IRQs. */ 146 - bool active; 147 - bool pending_release; /* Used for LPIs only, unreferenced IRQ 137 + bool pending_release:1; /* Used for LPIs only, unreferenced IRQ 148 138 * pending a release */ 149 139 150 - bool enabled; 151 - bool hw; /* Tied to HW IRQ */ 140 + bool pending_latch:1; /* The pending latch state used to calculate 141 + * the pending state for both level 142 + * and edge triggered IRQs. */ 143 + enum vgic_irq_config config:1; /* Level or edge */ 144 + bool line_level:1; /* Level only */ 145 + bool enabled:1; 146 + bool active:1; 147 + bool hw:1; /* Tied to HW IRQ */ 148 + bool on_lr:1; /* Present in a CPU LR */ 152 149 refcount_t refcount; /* Used for LPIs */ 153 150 u32 hwintid; /* HW INTID number */ 154 151 unsigned int host_irq; /* linux irq corresponding to hwintid */ ··· 161 156 u8 active_source; /* GICv2 SGIs only */ 162 157 u8 priority; 163 158 u8 group; /* 0 == group 0, 1 == group 1 */ 164 - enum vgic_irq_config config; /* Level or edge */ 165 159 166 160 struct irq_ops *ops; 167 161 ··· 263 259 /* The GIC maintenance IRQ for nested hypervisors. */ 264 260 u32 mi_intid; 265 261 262 + /* Track the number of in-flight active SPIs */ 263 + atomic_t active_spis; 264 + 266 265 /* base addresses in guest physical address space: */ 267 266 gpa_t vgic_dist_base; /* distributor */ 268 267 union { ··· 287 280 struct vgic_irq *spis; 288 281 289 282 struct vgic_io_device dist_iodev; 283 + struct vgic_io_device cpuif_iodev; 290 284 291 285 bool has_its; 292 286 bool table_write_in_progress; ··· 425 417 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 426 418 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 427 419 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); 420 + void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu); 428 421 429 422 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); 430 423
+6
include/linux/irqchip/arm-gic.h
··· 86 86 87 87 #define GICH_HCR_EN (1 << 0) 88 88 #define GICH_HCR_UIE (1 << 1) 89 + #define GICH_HCR_LRENPIE (1 << 2) 89 90 #define GICH_HCR_NPIE (1 << 3) 91 + #define GICH_HCR_VGrp0EIE (1 << 4) 92 + #define GICH_HCR_VGrp0DIE (1 << 5) 93 + #define GICH_HCR_VGrp1EIE (1 << 6) 94 + #define GICH_HCR_VGrp1DIE (1 << 7) 95 + #define GICH_HCR_EOICOUNT GENMASK(31, 27) 90 96 91 97 #define GICH_LR_VIRTUALID (0x3ff << 0) 92 98 #define GICH_LR_PHYSID_CPUID_SHIFT (10)
+2
include/linux/irqchip/arm-vgic-info.h
··· 24 24 enum gic_type type; 25 25 /* Virtual CPU interface */ 26 26 struct resource vcpu; 27 + /* GICv2 GICC VA */ 28 + void __iomem *gicc_base; 27 29 /* Interrupt number */ 28 30 unsigned int maint_irq; 29 31 /* No interrupt mask, no need to use the above field */
+10
include/uapi/linux/kvm.h
··· 179 179 #define KVM_EXIT_LOONGARCH_IOCSR 38 180 180 #define KVM_EXIT_MEMORY_FAULT 39 181 181 #define KVM_EXIT_TDX 40 182 + #define KVM_EXIT_ARM_SEA 41 182 183 183 184 /* For KVM_EXIT_INTERNAL_ERROR */ 184 185 /* Emulate instruction failed. */ ··· 474 473 } setup_event_notify; 475 474 }; 476 475 } tdx; 476 + /* KVM_EXIT_ARM_SEA */ 477 + struct { 478 + #define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0) 479 + __u64 flags; 480 + __u64 esr; 481 + __u64 gva; 482 + __u64 gpa; 483 + } arm_sea; 477 484 /* Fix the size of the union. */ 478 485 char padding[256]; 479 486 }; ··· 972 963 #define KVM_CAP_RISCV_MP_STATE_RESET 242 973 964 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 974 965 #define KVM_CAP_GUEST_MEMFD_FLAGS 244 966 + #define KVM_CAP_ARM_SEA_TO_USER 245 975 967 976 968 struct kvm_irq_routing_irqchip { 977 969 __u32 irqchip;
+2
tools/arch/arm64/include/asm/esr.h
··· 141 141 #define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT) 142 142 #define ESR_ELx_AR_SHIFT (14) 143 143 #define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT) 144 + #define ESR_ELx_VNCR_SHIFT (13) 145 + #define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT) 144 146 #define ESR_ELx_CM_SHIFT (8) 145 147 #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) 146 148
+2
tools/testing/selftests/kvm/Makefile.kvm
··· 158 158 TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) 159 159 TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs 160 160 TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases 161 + TEST_GEN_PROGS_arm64 += arm64/at 161 162 TEST_GEN_PROGS_arm64 += arm64/debug-exceptions 162 163 TEST_GEN_PROGS_arm64 += arm64/hello_el2 163 164 TEST_GEN_PROGS_arm64 += arm64/host_sve ··· 166 165 TEST_GEN_PROGS_arm64 += arm64/external_aborts 167 166 TEST_GEN_PROGS_arm64 += arm64/page_fault_test 168 167 TEST_GEN_PROGS_arm64 += arm64/psci_test 168 + TEST_GEN_PROGS_arm64 += arm64/sea_to_user 169 169 TEST_GEN_PROGS_arm64 += arm64/set_id_regs 170 170 TEST_GEN_PROGS_arm64 += arm64/smccc_filter 171 171 TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+166
tools/testing/selftests/kvm/arm64/at.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes. 4 + */ 5 + #include "kvm_util.h" 6 + #include "processor.h" 7 + #include "test_util.h" 8 + #include "ucall.h" 9 + 10 + #include <asm/sysreg.h> 11 + 12 + #define TEST_ADDR 0x80000000 13 + 14 + enum { 15 + CLEAR_ACCESS_FLAG, 16 + TEST_ACCESS_FLAG, 17 + }; 18 + 19 + static u64 *ptep_hva; 20 + 21 + #define copy_el2_to_el1(reg) \ 22 + write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12) 23 + 24 + /* Yes, this is an ugly hack */ 25 + #define __at(op, addr) write_sysreg_s(addr, op) 26 + 27 + #define test_at_insn(op, expect_fault) \ 28 + do { \ 29 + u64 par, fsc; \ 30 + bool fault; \ 31 + \ 32 + GUEST_SYNC(CLEAR_ACCESS_FLAG); \ 33 + \ 34 + __at(OP_AT_##op, TEST_ADDR); \ 35 + isb(); \ 36 + par = read_sysreg(par_el1); \ 37 + \ 38 + fault = par & SYS_PAR_EL1_F; \ 39 + fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \ 40 + \ 41 + __GUEST_ASSERT((expect_fault) == fault, \ 42 + "AT "#op": %sexpected fault (par: %lx)1", \ 43 + (expect_fault) ? "" : "un", par); \ 44 + if ((expect_fault)) { \ 45 + __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \ 46 + "AT "#op": expected access flag fault (par: %lx)", \ 47 + par); \ 48 + } else { \ 49 + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ 50 + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ 51 + GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ 52 + GUEST_SYNC(TEST_ACCESS_FLAG); \ 53 + } \ 54 + } while (0) 55 + 56 + static void test_at(bool expect_fault) 57 + { 58 + test_at_insn(S1E2R, expect_fault); 59 + test_at_insn(S1E2W, expect_fault); 60 + 61 + /* Reuse the stage-1 MMU context from EL2 at EL1 */ 62 + copy_el2_to_el1(SCTLR); 63 + copy_el2_to_el1(MAIR); 64 + copy_el2_to_el1(TCR); 65 + copy_el2_to_el1(TTBR0); 66 + copy_el2_to_el1(TTBR1); 67 + 68 + /* Disable stage-2 translation and enter a non-host context */ 69 + write_sysreg(0, vtcr_el2); 70 + write_sysreg(0, vttbr_el2); 71 + sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0); 72 + isb(); 73 + 74 + test_at_insn(S1E1R, expect_fault); 75 + test_at_insn(S1E1W, expect_fault); 76 + } 77 + 78 + static void guest_code(void) 79 + { 80 + sysreg_clear_set(tcr_el1, TCR_HA, 0); 81 + isb(); 82 + 83 + test_at(true); 84 + 85 + if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) 86 + GUEST_DONE(); 87 + 88 + /* 89 + * KVM's software PTW makes the implementation choice that the AT 90 + * instruction sets the access flag. 91 + */ 92 + sysreg_clear_set(tcr_el1, 0, TCR_HA); 93 + isb(); 94 + test_at(false); 95 + 96 + GUEST_DONE(); 97 + } 98 + 99 + static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) 100 + { 101 + switch (uc->args[1]) { 102 + case CLEAR_ACCESS_FLAG: 103 + /* 104 + * Delete + reinstall the memslot to invalidate stage-2 105 + * mappings of the stage-1 page tables, forcing KVM to 106 + * use the 'slow' AT emulation path. 107 + * 108 + * This and clearing the access flag from host userspace 109 + * ensures that the access flag cannot be set speculatively 110 + * and is reliably cleared at the time of the AT instruction. 111 + */ 112 + clear_bit(__ffs(PTE_AF), ptep_hva); 113 + vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); 114 + break; 115 + case TEST_ACCESS_FLAG: 116 + TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva), 117 + "Expected access flag to be set (desc: %lu)", *ptep_hva); 118 + break; 119 + default: 120 + TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); 121 + } 122 + } 123 + 124 + static void run_test(struct kvm_vcpu *vcpu) 125 + { 126 + struct ucall uc; 127 + 128 + while (true) { 129 + vcpu_run(vcpu); 130 + switch (get_ucall(vcpu, &uc)) { 131 + case UCALL_DONE: 132 + return; 133 + case UCALL_SYNC: 134 + handle_sync(vcpu, &uc); 135 + continue; 136 + case UCALL_ABORT: 137 + REPORT_GUEST_ASSERT(uc); 138 + return; 139 + default: 140 + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 141 + } 142 + } 143 + } 144 + 145 + int main(void) 146 + { 147 + struct kvm_vcpu_init init; 148 + struct kvm_vcpu *vcpu; 149 + struct kvm_vm *vm; 150 + 151 + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); 152 + 153 + vm = vm_create(1); 154 + 155 + kvm_get_default_vcpu_target(vm, &init); 156 + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); 157 + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); 158 + kvm_arch_vm_finalize_vcpus(vm); 159 + 160 + virt_map(vm, TEST_ADDR, TEST_ADDR, 1); 161 + ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3); 162 + run_test(vcpu); 163 + 164 + kvm_vm_free(vm); 165 + return 0; 166 + }
+331
tools/testing/selftests/kvm/arm64/sea_to_user.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails 4 + * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER. 5 + * 6 + * After reaching userspace with expected arm_sea info, also test userspace 7 + * injecting a synchronous external data abort into the guest. 8 + * 9 + * This test utilizes EINJ to generate a REAL synchronous external data 10 + * abort by consuming a recoverable uncorrectable memory error. Therefore 11 + * the device under test must support EINJ in both firmware and host kernel, 12 + * including the notrigger feature. Otherwise the test will be skipped. 13 + * The under-test platform's APEI should be unable to claim SEA. Otherwise 14 + * the test will also be skipped. 15 + */ 16 + 17 + #include <signal.h> 18 + #include <stdio.h> 19 + #include <stdlib.h> 20 + #include <unistd.h> 21 + 22 + #include "test_util.h" 23 + #include "kvm_util.h" 24 + #include "processor.h" 25 + #include "guest_modes.h" 26 + 27 + #define PAGE_PRESENT (1ULL << 63) 28 + #define PAGE_PHYSICAL 0x007fffffffffffffULL 29 + #define PAGE_ADDR_MASK (~(0xfffULL)) 30 + 31 + /* Group ISV and ISS[23:14]. */ 32 + #define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \ 33 + (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \ 34 + (ESR_ELx_SF) | (ESR_ELx_AR)) 35 + 36 + #define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type" 37 + #define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1" 38 + #define EINJ_MASK "/sys/kernel/debug/apei/einj/param2" 39 + #define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" 40 + #define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" 41 + #define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" 42 + /* Memory Uncorrectable non-fatal. */ 43 + #define ERROR_TYPE_MEMORY_UER 0x10 44 + /* Memory address and mask valid (param1 and param2). */ 45 + #define MASK_MEMORY_UER 0b10 46 + 47 + /* Guest virtual address region = [2G, 3G). */ 48 + #define START_GVA 0x80000000UL 49 + #define VM_MEM_SIZE 0x40000000UL 50 + /* Note: EINJ_OFFSET must < VM_MEM_SIZE. */ 51 + #define EINJ_OFFSET 0x01234badUL 52 + #define EINJ_GVA ((START_GVA) + (EINJ_OFFSET)) 53 + 54 + static vm_paddr_t einj_gpa; 55 + static void *einj_hva; 56 + static uint64_t einj_hpa; 57 + static bool far_invalid; 58 + 59 + static uint64_t translate_to_host_paddr(unsigned long vaddr) 60 + { 61 + uint64_t pinfo; 62 + int64_t offset = vaddr / getpagesize() * sizeof(pinfo); 63 + int fd; 64 + uint64_t page_addr; 65 + uint64_t paddr; 66 + 67 + fd = open("/proc/self/pagemap", O_RDONLY); 68 + if (fd < 0) 69 + ksft_exit_fail_perror("Failed to open /proc/self/pagemap"); 70 + if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { 71 + close(fd); 72 + ksft_exit_fail_perror("Failed to read /proc/self/pagemap"); 73 + } 74 + 75 + close(fd); 76 + 77 + if ((pinfo & PAGE_PRESENT) == 0) 78 + ksft_exit_fail_perror("Page not present"); 79 + 80 + page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT; 81 + paddr = page_addr + (vaddr & (getpagesize() - 1)); 82 + return paddr; 83 + } 84 + 85 + static void write_einj_entry(const char *einj_path, uint64_t val) 86 + { 87 + char cmd[256] = {0}; 88 + FILE *cmdfile = NULL; 89 + 90 + sprintf(cmd, "echo %#lx > %s", val, einj_path); 91 + cmdfile = popen(cmd, "r"); 92 + 93 + if (pclose(cmdfile) == 0) 94 + ksft_print_msg("echo %#lx > %s - done\n", val, einj_path); 95 + else 96 + ksft_exit_fail_perror("Failed to write EINJ entry"); 97 + } 98 + 99 + static void inject_uer(uint64_t paddr) 100 + { 101 + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) 102 + ksft_test_result_skip("EINJ table no available in firmware"); 103 + 104 + if (access(EINJ_ETYPE, R_OK | W_OK) == -1) 105 + ksft_test_result_skip("EINJ module probably not loaded?"); 106 + 107 + write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER); 108 + write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER); 109 + write_einj_entry(EINJ_ADDR, paddr); 110 + write_einj_entry(EINJ_MASK, ~0x0UL); 111 + write_einj_entry(EINJ_NOTRIGGER, 1); 112 + write_einj_entry(EINJ_DOIT, 1); 113 + } 114 + 115 + /* 116 + * When host APEI successfully claims the SEA caused by guest_code, kernel 117 + * will send SIGBUS signal with BUS_MCEERR_AR to test thread. 118 + * 119 + * We set up this SIGBUS handler to skip the test for that case. 120 + */ 121 + static void sigbus_signal_handler(int sig, siginfo_t *si, void *v) 122 + { 123 + ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig); 124 + ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n", 125 + si->si_signo, si->si_errno, si->si_code, si->si_addr); 126 + if (si->si_code == BUS_MCEERR_AR) 127 + ksft_test_result_skip("SEA is claimed by host APEI\n"); 128 + else 129 + ksft_test_result_fail("Exit with signal unhandled\n"); 130 + 131 + exit(0); 132 + } 133 + 134 + static void setup_sigbus_handler(void) 135 + { 136 + struct sigaction act; 137 + 138 + memset(&act, 0, sizeof(act)); 139 + sigemptyset(&act.sa_mask); 140 + act.sa_sigaction = sigbus_signal_handler; 141 + act.sa_flags = SA_SIGINFO; 142 + TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0, 143 + "Failed to setup SIGBUS handler"); 144 + } 145 + 146 + static void guest_code(void) 147 + { 148 + uint64_t guest_data; 149 + 150 + /* Consumes error will cause a SEA. */ 151 + guest_data = *(uint64_t *)EINJ_GVA; 152 + 153 + GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n", 154 + EINJ_GVA, guest_data); 155 + } 156 + 157 + static void expect_sea_handler(struct ex_regs *regs) 158 + { 159 + u64 esr = read_sysreg(esr_el1); 160 + u64 far = read_sysreg(far_el1); 161 + bool expect_far_invalid = far_invalid; 162 + 163 + GUEST_PRINTF("Handling Guest SEA\n"); 164 + GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far); 165 + 166 + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); 167 + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); 168 + 169 + if (expect_far_invalid) { 170 + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV); 171 + GUEST_PRINTF("Guest observed garbage value in FAR\n"); 172 + } else { 173 + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0); 174 + GUEST_ASSERT_EQ(far, EINJ_GVA); 175 + } 176 + 177 + GUEST_DONE(); 178 + } 179 + 180 + static void vcpu_inject_sea(struct kvm_vcpu *vcpu) 181 + { 182 + struct kvm_vcpu_events events = {}; 183 + 184 + events.exception.ext_dabt_pending = true; 185 + vcpu_events_set(vcpu, &events); 186 + } 187 + 188 + static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 189 + { 190 + struct ucall uc; 191 + bool guest_done = false; 192 + struct kvm_run *run = vcpu->run; 193 + u64 esr; 194 + 195 + /* Resume the vCPU after error injection to consume the error. */ 196 + vcpu_run(vcpu); 197 + 198 + ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n", 199 + exit_reason_str(run->exit_reason)); 200 + ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n", 201 + run->arm_sea.esr, run->arm_sea.flags); 202 + ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n", 203 + run->arm_sea.gva, run->arm_sea.gpa); 204 + 205 + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA); 206 + 207 + esr = run->arm_sea.esr; 208 + TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW); 209 + TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); 210 + TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0); 211 + TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0); 212 + TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0); 213 + 214 + if (!(esr & ESR_ELx_FnV)) { 215 + ksft_print_msg("Expect gva to match given FnV bit is 0\n"); 216 + TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA); 217 + } 218 + 219 + if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) { 220 + ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n"); 221 + TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK); 222 + } 223 + 224 + far_invalid = esr & ESR_ELx_FnV; 225 + 226 + /* Inject a SEA into guest and expect handled in SEA handler. */ 227 + vcpu_inject_sea(vcpu); 228 + 229 + /* Expect the guest to reach GUEST_DONE gracefully. */ 230 + do { 231 + vcpu_run(vcpu); 232 + switch (get_ucall(vcpu, &uc)) { 233 + case UCALL_PRINTF: 234 + ksft_print_msg("From guest: %s", uc.buffer); 235 + break; 236 + case UCALL_DONE: 237 + ksft_print_msg("Guest done gracefully!\n"); 238 + guest_done = 1; 239 + break; 240 + case UCALL_ABORT: 241 + ksft_print_msg("Guest aborted!\n"); 242 + guest_done = 1; 243 + REPORT_GUEST_ASSERT(uc); 244 + break; 245 + default: 246 + TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); 247 + } 248 + } while (!guest_done); 249 + } 250 + 251 + static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) 252 + { 253 + size_t backing_page_size; 254 + size_t guest_page_size; 255 + size_t alignment; 256 + uint64_t num_guest_pages; 257 + vm_paddr_t start_gpa; 258 + enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; 259 + struct kvm_vm *vm; 260 + 261 + backing_page_size = get_backing_src_pagesz(src_type); 262 + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; 263 + alignment = max(backing_page_size, guest_page_size); 264 + num_guest_pages = VM_MEM_SIZE / guest_page_size; 265 + 266 + vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code); 267 + vm_init_descriptor_tables(vm); 268 + vcpu_init_descriptor_tables(*vcpu); 269 + 270 + vm_install_sync_handler(vm, 271 + /*vector=*/VECTOR_SYNC_CURRENT, 272 + /*ec=*/ESR_ELx_EC_DABT_CUR, 273 + /*handler=*/expect_sea_handler); 274 + 275 + start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size; 276 + start_gpa = align_down(start_gpa, alignment); 277 + 278 + vm_userspace_mem_region_add( 279 + /*vm=*/vm, 280 + /*src_type=*/src_type, 281 + /*guest_paddr=*/start_gpa, 282 + /*slot=*/1, 283 + /*npages=*/num_guest_pages, 284 + /*flags=*/0); 285 + 286 + virt_map(vm, START_GVA, start_gpa, num_guest_pages); 287 + 288 + ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n", 289 + num_guest_pages, START_GVA, start_gpa); 290 + return vm; 291 + } 292 + 293 + static void vm_inject_memory_uer(struct kvm_vm *vm) 294 + { 295 + uint64_t guest_data; 296 + 297 + einj_gpa = addr_gva2gpa(vm, EINJ_GVA); 298 + einj_hva = addr_gva2hva(vm, EINJ_GVA); 299 + 300 + /* Populate certain data before injecting UER. */ 301 + *(uint64_t *)einj_hva = 0xBAADCAFE; 302 + guest_data = *(uint64_t *)einj_hva; 303 + ksft_print_msg("Before EINJect: data=%#lx\n", 304 + guest_data); 305 + 306 + einj_hpa = translate_to_host_paddr((unsigned long)einj_hva); 307 + 308 + ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n", 309 + EINJ_GVA, einj_gpa, einj_hva, einj_hpa); 310 + 311 + inject_uer(einj_hpa); 312 + ksft_print_msg("Memory UER EINJected\n"); 313 + } 314 + 315 + int main(int argc, char *argv[]) 316 + { 317 + struct kvm_vm *vm; 318 + struct kvm_vcpu *vcpu; 319 + 320 + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER)); 321 + 322 + setup_sigbus_handler(); 323 + 324 + vm = vm_create_with_sea_handler(&vcpu); 325 + vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0); 326 + vm_inject_memory_uer(vm); 327 + run_vm(vm, vcpu); 328 + kvm_vm_free(vm); 329 + 330 + return 0; 331 + }
+263 -22
tools/testing/selftests/kvm/arm64/vgic_irq.c
··· 29 29 bool level_sensitive; /* 1 is level, 0 is edge */ 30 30 int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ 31 31 bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ 32 + uint32_t shared_data; 32 33 }; 33 34 34 35 /* ··· 206 205 do { \ 207 206 uint32_t _intid; \ 208 207 _intid = gic_get_and_ack_irq(); \ 209 - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ 208 + GUEST_ASSERT(_intid == IAR_SPURIOUS); \ 210 209 } while (0) 211 210 212 211 #define CAT_HELPER(a, b) a ## b ··· 360 359 * interrupts for the whole test. 361 360 */ 362 361 static void test_inject_preemption(struct test_args *args, 363 - uint32_t first_intid, int num, 364 - kvm_inject_cmd cmd) 362 + uint32_t first_intid, int num, 363 + const unsigned long *exclude, 364 + kvm_inject_cmd cmd) 365 365 { 366 366 uint32_t intid, prio, step = KVM_PRIO_STEPS; 367 367 int i; ··· 381 379 for (i = 0; i < num; i++) { 382 380 uint32_t tmp; 383 381 intid = i + first_intid; 382 + 383 + if (exclude && test_bit(i, exclude)) 384 + continue; 385 + 384 386 KVM_INJECT(cmd, intid); 385 387 /* Each successive IRQ will preempt the previous one. */ 386 388 tmp = wait_for_and_activate_irq(); ··· 396 390 /* finish handling the IRQs starting with the highest priority one. */ 397 391 for (i = 0; i < num; i++) { 398 392 intid = num - i - 1 + first_intid; 393 + 394 + if (exclude && test_bit(intid - first_intid, exclude)) 395 + continue; 396 + 399 397 gic_set_eoi(intid); 400 - if (args->eoi_split) 401 - gic_set_dir(intid); 398 + } 399 + 400 + if (args->eoi_split) { 401 + for (i = 0; i < num; i++) { 402 + intid = i + first_intid; 403 + 404 + if (exclude && test_bit(i, exclude)) 405 + continue; 406 + 407 + if (args->eoi_split) 408 + gic_set_dir(intid); 409 + } 402 410 } 403 411 404 412 local_irq_enable(); 405 413 406 - for (i = 0; i < num; i++) 414 + for (i = 0; i < num; i++) { 415 + if (exclude && test_bit(i, exclude)) 416 + continue; 417 + 407 418 GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); 419 + } 408 420 GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); 409 421 GUEST_ASSERT_IAR_EMPTY(); 410 422 ··· 460 436 461 437 static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) 462 438 { 463 - /* 464 - * Test up to 4 levels of preemption. The reason is that KVM doesn't 465 - * currently implement the ability to have more than the number-of-LRs 466 - * number of concurrently active IRQs. The number of LRs implemented is 467 - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. 468 - */ 439 + /* Timer PPIs cannot be injected from userspace */ 440 + static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) | 441 + BIT(30 - MIN_PPI) | 442 + BIT(28 - MIN_PPI) | 443 + BIT(26 - MIN_PPI)); 444 + 469 445 if (f->sgi) 470 - test_inject_preemption(args, MIN_SGI, 4, f->cmd); 446 + test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd); 471 447 472 448 if (f->ppi) 473 - test_inject_preemption(args, MIN_PPI, 4, f->cmd); 449 + test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd); 474 450 475 451 if (f->spi) 476 - test_inject_preemption(args, MIN_SPI, 4, f->cmd); 452 + test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd); 477 453 } 478 454 479 455 static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) 480 456 { 481 - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ 482 457 if (f->sgi) 483 - guest_restore_active(args, MIN_SGI, 4, f->cmd); 458 + guest_restore_active(args, MIN_SGI, 16, f->cmd); 484 459 485 460 if (f->ppi) 486 - guest_restore_active(args, MIN_PPI, 4, f->cmd); 461 + guest_restore_active(args, MIN_PPI, 16, f->cmd); 487 462 488 463 if (f->spi) 489 - guest_restore_active(args, MIN_SPI, 4, f->cmd); 464 + guest_restore_active(args, MIN_SPI, 31, f->cmd); 490 465 } 491 466 492 467 static void guest_code(struct test_args *args) ··· 496 473 497 474 gic_init(GIC_V3, 1); 498 475 499 - for (i = 0; i < nr_irqs; i++) 500 - gic_irq_enable(i); 501 - 502 476 for (i = MIN_SPI; i < nr_irqs; i++) 503 477 gic_irq_set_config(i, !level_sensitive); 478 + 479 + for (i = 0; i < nr_irqs; i++) 480 + gic_irq_enable(i); 504 481 505 482 gic_set_eoi_split(args->eoi_split); 506 483 ··· 802 779 kvm_vm_free(vm); 803 780 } 804 781 782 + static void guest_code_asym_dir(struct test_args *args, int cpuid) 783 + { 784 + gic_init(GIC_V3, 2); 785 + 786 + gic_set_eoi_split(1); 787 + gic_set_priority_mask(CPU_PRIO_MASK); 788 + 789 + if (cpuid == 0) { 790 + uint32_t intid; 791 + 792 + local_irq_disable(); 793 + 794 + gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO); 795 + gic_irq_enable(MIN_SPI); 796 + gic_irq_set_pending(MIN_SPI); 797 + 798 + intid = wait_for_and_activate_irq(); 799 + GUEST_ASSERT_EQ(intid, MIN_SPI); 800 + 801 + gic_set_eoi(intid); 802 + isb(); 803 + 804 + WRITE_ONCE(args->shared_data, MIN_SPI); 805 + dsb(ishst); 806 + 807 + do { 808 + dsb(ishld); 809 + } while (READ_ONCE(args->shared_data) == MIN_SPI); 810 + GUEST_ASSERT(!gic_irq_get_active(MIN_SPI)); 811 + } else { 812 + do { 813 + dsb(ishld); 814 + } while (READ_ONCE(args->shared_data) != MIN_SPI); 815 + 816 + gic_set_dir(MIN_SPI); 817 + isb(); 818 + 819 + WRITE_ONCE(args->shared_data, 0); 820 + dsb(ishst); 821 + } 822 + 823 + GUEST_DONE(); 824 + } 825 + 826 + static void guest_code_group_en(struct test_args *args, int cpuid) 827 + { 828 + uint32_t intid; 829 + 830 + gic_init(GIC_V3, 2); 831 + 832 + gic_set_eoi_split(0); 833 + gic_set_priority_mask(CPU_PRIO_MASK); 834 + /* SGI0 is G0, which is disabled */ 835 + gic_irq_set_group(0, 0); 836 + 837 + /* Configure all SGIs with decreasing priority */ 838 + for (intid = 0; intid < MIN_PPI; intid++) { 839 + gic_set_priority(intid, (intid + 1) * 8); 840 + gic_irq_enable(intid); 841 + gic_irq_set_pending(intid); 842 + } 843 + 844 + /* Ack and EOI all G1 interrupts */ 845 + for (int i = 1; i < MIN_PPI; i++) { 846 + intid = wait_for_and_activate_irq(); 847 + 848 + GUEST_ASSERT(intid < MIN_PPI); 849 + gic_set_eoi(intid); 850 + isb(); 851 + } 852 + 853 + /* 854 + * Check that SGI0 is still pending, inactive, and that we cannot 855 + * ack anything. 856 + */ 857 + GUEST_ASSERT(gic_irq_get_pending(0)); 858 + GUEST_ASSERT(!gic_irq_get_active(0)); 859 + GUEST_ASSERT_IAR_EMPTY(); 860 + GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS); 861 + 862 + /* Open the G0 gates, and verify we can ack SGI0 */ 863 + write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1); 864 + isb(); 865 + 866 + do { 867 + intid = read_sysreg_s(SYS_ICC_IAR0_EL1); 868 + } while (intid == IAR_SPURIOUS); 869 + 870 + GUEST_ASSERT(intid == 0); 871 + GUEST_DONE(); 872 + } 873 + 874 + static void guest_code_timer_spi(struct test_args *args, int cpuid) 875 + { 876 + uint32_t intid; 877 + u64 val; 878 + 879 + gic_init(GIC_V3, 2); 880 + 881 + gic_set_eoi_split(1); 882 + gic_set_priority_mask(CPU_PRIO_MASK); 883 + 884 + /* Add a pending SPI so that KVM starts trapping DIR */ 885 + gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO); 886 + gic_irq_set_pending(MIN_SPI + cpuid); 887 + 888 + /* Configure the timer with a higher priority, make it pending */ 889 + gic_set_priority(27, IRQ_DEFAULT_PRIO - 8); 890 + 891 + isb(); 892 + val = read_sysreg(cntvct_el0); 893 + write_sysreg(val, cntv_cval_el0); 894 + write_sysreg(1, cntv_ctl_el0); 895 + isb(); 896 + 897 + GUEST_ASSERT(gic_irq_get_pending(27)); 898 + 899 + /* Enable both interrupts */ 900 + gic_irq_enable(MIN_SPI + cpuid); 901 + gic_irq_enable(27); 902 + 903 + /* The timer must fire */ 904 + intid = wait_for_and_activate_irq(); 905 + GUEST_ASSERT(intid == 27); 906 + 907 + /* Check that we can deassert it */ 908 + write_sysreg(0, cntv_ctl_el0); 909 + isb(); 910 + 911 + GUEST_ASSERT(!gic_irq_get_pending(27)); 912 + 913 + /* 914 + * Priority drop, deactivation -- we expect that the host 915 + * deactivation will have been effective 916 + */ 917 + gic_set_eoi(27); 918 + gic_set_dir(27); 919 + 920 + GUEST_ASSERT(!gic_irq_get_active(27)); 921 + 922 + /* Do it one more time */ 923 + isb(); 924 + val = read_sysreg(cntvct_el0); 925 + write_sysreg(val, cntv_cval_el0); 926 + write_sysreg(1, cntv_ctl_el0); 927 + isb(); 928 + 929 + GUEST_ASSERT(gic_irq_get_pending(27)); 930 + 931 + /* The timer must fire again */ 932 + intid = wait_for_and_activate_irq(); 933 + GUEST_ASSERT(intid == 27); 934 + 935 + GUEST_DONE(); 936 + } 937 + 938 + static void *test_vcpu_run(void *arg) 939 + { 940 + struct kvm_vcpu *vcpu = arg; 941 + struct ucall uc; 942 + 943 + while (1) { 944 + vcpu_run(vcpu); 945 + 946 + switch (get_ucall(vcpu, &uc)) { 947 + case UCALL_ABORT: 948 + REPORT_GUEST_ASSERT(uc); 949 + break; 950 + case UCALL_DONE: 951 + return NULL; 952 + default: 953 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 954 + } 955 + } 956 + 957 + return NULL; 958 + } 959 + 960 + static void test_vgic_two_cpus(void *gcode) 961 + { 962 + pthread_t thr[2]; 963 + struct kvm_vcpu *vcpus[2]; 964 + struct test_args args = {}; 965 + struct kvm_vm *vm; 966 + vm_vaddr_t args_gva; 967 + int gic_fd, ret; 968 + 969 + vm = vm_create_with_vcpus(2, gcode, vcpus); 970 + 971 + vm_init_descriptor_tables(vm); 972 + vcpu_init_descriptor_tables(vcpus[0]); 973 + vcpu_init_descriptor_tables(vcpus[1]); 974 + 975 + /* Setup the guest args page (so it gets the args). */ 976 + args_gva = vm_vaddr_alloc_page(vm); 977 + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); 978 + vcpu_args_set(vcpus[0], 2, args_gva, 0); 979 + vcpu_args_set(vcpus[1], 2, args_gva, 1); 980 + 981 + gic_fd = vgic_v3_setup(vm, 2, 64); 982 + 983 + ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]); 984 + if (ret) 985 + TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret); 986 + ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]); 987 + if (ret) 988 + TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret); 989 + 990 + pthread_join(thr[0], NULL); 991 + pthread_join(thr[1], NULL); 992 + 993 + close(gic_fd); 994 + kvm_vm_free(vm); 995 + } 996 + 805 997 static void help(const char *name) 806 998 { 807 999 printf( ··· 1073 835 test_vgic(nr_irqs, false /* level */, true /* eoi_split */); 1074 836 test_vgic(nr_irqs, true /* level */, false /* eoi_split */); 1075 837 test_vgic(nr_irqs, true /* level */, true /* eoi_split */); 838 + test_vgic_two_cpus(guest_code_asym_dir); 839 + test_vgic_two_cpus(guest_code_group_en); 840 + test_vgic_two_cpus(guest_code_timer_spi); 1076 841 } else { 1077 842 test_vgic(nr_irqs, level_sensitive, eoi_split); 1078 843 }
+4
tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
··· 118 118 119 119 guest_setup_its_mappings(); 120 120 guest_invalidate_all_rdists(); 121 + 122 + /* SYNC to ensure ITS setup is complete */ 123 + for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++) 124 + its_send_sync_cmd(test_data.cmdq_base_va, cpuid); 121 125 } 122 126 123 127 static void guest_code(size_t nr_lpis)
+1
tools/testing/selftests/kvm/include/arm64/gic.h
··· 57 57 void gic_irq_clear_pending(unsigned int intid); 58 58 bool gic_irq_get_pending(unsigned int intid); 59 59 void gic_irq_set_config(unsigned int intid, bool is_edge); 60 + void gic_irq_set_group(unsigned int intid, bool group); 60 61 61 62 void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, 62 63 vm_paddr_t pend_table);
+1
tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
··· 15 15 void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, 16 16 u32 collection_id, u32 intid); 17 17 void its_send_invall_cmd(void *cmdq_base, u32 collection_id); 18 + void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id); 18 19 19 20 #endif // __SELFTESTS_GIC_V3_ITS_H__
+1
tools/testing/selftests/kvm/include/kvm_util.h
··· 688 688 #endif 689 689 690 690 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); 691 + void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); 691 692 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); 692 693 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); 693 694 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+6
tools/testing/selftests/kvm/lib/arm64/gic.c
··· 155 155 GUEST_ASSERT(gic_common_ops); 156 156 gic_common_ops->gic_irq_set_config(intid, is_edge); 157 157 } 158 + 159 + void gic_irq_set_group(unsigned int intid, bool group) 160 + { 161 + GUEST_ASSERT(gic_common_ops); 162 + gic_common_ops->gic_irq_set_group(intid, group); 163 + }
+1
tools/testing/selftests/kvm/lib/arm64/gic_private.h
··· 25 25 void (*gic_irq_clear_pending)(uint32_t intid); 26 26 bool (*gic_irq_get_pending)(uint32_t intid); 27 27 void (*gic_irq_set_config)(uint32_t intid, bool is_edge); 28 + void (*gic_irq_set_group)(uint32_t intid, bool group); 28 29 }; 29 30 30 31 extern const struct gic_common_ops gicv3_ops;
+22
tools/testing/selftests/kvm/lib/arm64/gic_v3.c
··· 293 293 } 294 294 } 295 295 296 + static void gicv3_set_group(uint32_t intid, bool grp) 297 + { 298 + uint32_t cpu_or_dist; 299 + uint32_t val; 300 + 301 + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); 302 + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); 303 + if (grp) 304 + val |= BIT(intid % 32); 305 + else 306 + val &= ~BIT(intid % 32); 307 + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); 308 + } 309 + 296 310 static void gicv3_cpu_init(unsigned int cpu) 297 311 { 298 312 volatile void *sgi_base; 299 313 unsigned int i; 300 314 volatile void *redist_base_cpu; 315 + u64 typer; 301 316 302 317 GUEST_ASSERT(cpu < gicv3_data.nr_cpus); 303 318 304 319 redist_base_cpu = gicr_base_cpu(cpu); 305 320 sgi_base = sgi_base_from_redist(redist_base_cpu); 321 + 322 + /* Verify assumption that GICR_TYPER.Processor_number == cpu */ 323 + typer = readq_relaxed(redist_base_cpu + GICR_TYPER); 324 + GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu); 306 325 307 326 gicv3_enable_redist(redist_base_cpu); 308 327 ··· 347 328 /* Set a default priority threshold */ 348 329 write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); 349 330 331 + /* Disable Group-0 interrupts */ 332 + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); 350 333 /* Enable non-secure Group-1 interrupts */ 351 334 write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); 352 335 } ··· 421 400 .gic_irq_clear_pending = gicv3_irq_clear_pending, 422 401 .gic_irq_get_pending = gicv3_irq_get_pending, 423 402 .gic_irq_set_config = gicv3_irq_set_config, 403 + .gic_irq_set_group = gicv3_set_group, 424 404 }; 425 405 426 406 void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+10
tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
··· 253 253 254 254 its_send_cmd(cmdq_base, &cmd); 255 255 } 256 + 257 + void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) 258 + { 259 + struct its_cmd_block cmd = {}; 260 + 261 + its_encode_cmd(&cmd, GITS_CMD_SYNC); 262 + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); 263 + 264 + its_send_cmd(cmdq_base, &cmd); 265 + }
+11
tools/testing/selftests/kvm/lib/kvm_util.c
··· 1184 1184 ret, errno, slot, flags); 1185 1185 } 1186 1186 1187 + void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) 1188 + { 1189 + struct userspace_mem_region *region = memslot2region(vm, slot); 1190 + struct kvm_userspace_memory_region2 tmp = region->region; 1191 + 1192 + tmp.memory_size = 0; 1193 + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); 1194 + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region); 1195 + } 1196 + 1187 1197 /* 1188 1198 * VM Memory Region Move 1189 1199 * ··· 2015 2005 KVM_EXIT_STRING(NOTIFY), 2016 2006 KVM_EXIT_STRING(LOONGARCH_IOCSR), 2017 2007 KVM_EXIT_STRING(MEMORY_FAULT), 2008 + KVM_EXIT_STRING(ARM_SEA), 2018 2009 }; 2019 2010 2020 2011 /*