Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch kvm-arm64/pkvm-np-thp-6.16 into kvmarm-master/next

* kvm-arm64/pkvm-np-thp-6.16: (21 commits)
: .
: Large mapping support for non-protected pKVM guests, courtesy of
: Vincent Donnefort. From the cover letter:
:
: "This series adds support for stage-2 huge mappings (PMD_SIZE) to pKVM
: np-guests, that is installing PMD-level mappings in the stage-2,
: whenever the stage-1 is backed by either Hugetlbfs or THPs."
: .
KVM: arm64: np-guest CMOs with PMD_SIZE fixmap
KVM: arm64: Stage-2 huge mappings for np-guests
KVM: arm64: Add a range to pkvm_mappings
KVM: arm64: Convert pkvm_mappings to interval tree
KVM: arm64: Add a range to __pkvm_host_test_clear_young_guest()
KVM: arm64: Add a range to __pkvm_host_wrprotect_guest()
KVM: arm64: Add a range to __pkvm_host_unshare_guest()
KVM: arm64: Add a range to __pkvm_host_share_guest()
KVM: arm64: Introduce for_each_hyp_page
KVM: arm64: Handle huge mappings for np-guest CMOs
KVM: arm64: Extend pKVM selftest for np-guests
KVM: arm64: Selftest for pKVM transitions
KVM: arm64: Don't WARN from __pkvm_host_share_guest()
KVM: arm64: Add .hyp.data section
KVM: arm64: Unconditionally cross check hyp state
KVM: arm64: Defer EL2 stage-1 mapping on share
KVM: arm64: Move hyp state to hyp_vmemmap
KVM: arm64: Introduce {get,set}_host_state() helpers
KVM: arm64: Use 0b11 for encoding PKVM_NOPAGE
KVM: arm64: Fix pKVM page-tracking comments
...

Signed-off-by: Marc Zyngier <maz@kernel.org>

+737 -252
+7 -5
arch/arm64/include/asm/kvm_host.h
··· 971 971 #define vcpu_sve_zcr_elx(vcpu) \ 972 972 (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1) 973 973 974 - #define vcpu_sve_state_size(vcpu) ({ \ 974 + #define sve_state_size_from_vl(sve_max_vl) ({ \ 975 975 size_t __size_ret; \ 976 - unsigned int __vcpu_vq; \ 976 + unsigned int __vq; \ 977 977 \ 978 - if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \ 978 + if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \ 979 979 __size_ret = 0; \ 980 980 } else { \ 981 - __vcpu_vq = vcpu_sve_max_vq(vcpu); \ 982 - __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \ 981 + __vq = sve_vq_from_vl(sve_max_vl); \ 982 + __size_ret = SVE_SIG_REGS_SIZE(__vq); \ 983 983 } \ 984 984 \ 985 985 __size_ret; \ 986 986 }) 987 + 988 + #define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl) 987 989 988 990 #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ 989 991 KVM_GUESTDBG_USE_SW_BP | \
+6 -1
arch/arm64/include/asm/kvm_pgtable.h
··· 59 59 60 60 #define KVM_PHYS_INVALID (-1ULL) 61 61 62 + #define KVM_PTE_TYPE BIT(1) 63 + #define KVM_PTE_TYPE_BLOCK 0 64 + #define KVM_PTE_TYPE_PAGE 1 65 + #define KVM_PTE_TYPE_TABLE 1 66 + 62 67 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 63 68 64 69 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) ··· 418 413 */ 419 414 struct kvm_pgtable { 420 415 union { 421 - struct rb_root pkvm_mappings; 416 + struct rb_root_cached pkvm_mappings; 422 417 struct { 423 418 u32 ia_bits; 424 419 s8 start_level;
+8
arch/arm64/include/asm/kvm_pkvm.h
··· 135 135 return res; 136 136 } 137 137 138 + #ifdef CONFIG_NVHE_EL2_DEBUG 139 + static inline unsigned long pkvm_selftest_pages(void) { return 32; } 140 + #else 141 + static inline unsigned long pkvm_selftest_pages(void) { return 0; } 142 + #endif 143 + 138 144 #define KVM_FFA_MBOX_NR_PAGES 1 139 145 140 146 static inline unsigned long hyp_ffa_proxy_pages(void) ··· 173 167 struct rb_node node; 174 168 u64 gfn; 175 169 u64 pfn; 170 + u64 nr_pages; 171 + u64 __subtree_last; /* Internal member for interval tree */ 176 172 }; 177 173 178 174 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+1
arch/arm64/include/asm/sections.h
··· 11 11 extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; 12 12 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; 13 13 extern char __hyp_text_start[], __hyp_text_end[]; 14 + extern char __hyp_data_start[], __hyp_data_end[]; 14 15 extern char __hyp_rodata_start[], __hyp_rodata_end[]; 15 16 extern char __hyp_reloc_begin[], __hyp_reloc_end[]; 16 17 extern char __hyp_bss_start[], __hyp_bss_end[];
+2
arch/arm64/kernel/image-vars.h
··· 127 127 KVM_NVHE_ALIAS(__hyp_text_end); 128 128 KVM_NVHE_ALIAS(__hyp_bss_start); 129 129 KVM_NVHE_ALIAS(__hyp_bss_end); 130 + KVM_NVHE_ALIAS(__hyp_data_start); 131 + KVM_NVHE_ALIAS(__hyp_data_end); 130 132 KVM_NVHE_ALIAS(__hyp_rodata_start); 131 133 KVM_NVHE_ALIAS(__hyp_rodata_end); 132 134
+15 -3
arch/arm64/kernel/vmlinux.lds.S
··· 13 13 *(__kvm_ex_table) \ 14 14 __stop___kvm_ex_table = .; 15 15 16 - #define HYPERVISOR_DATA_SECTIONS \ 16 + #define HYPERVISOR_RODATA_SECTIONS \ 17 17 HYP_SECTION_NAME(.rodata) : { \ 18 18 . = ALIGN(PAGE_SIZE); \ 19 19 __hyp_rodata_start = .; \ ··· 21 21 *(HYP_SECTION_NAME(.rodata)) \ 22 22 . = ALIGN(PAGE_SIZE); \ 23 23 __hyp_rodata_end = .; \ 24 + } 25 + 26 + #define HYPERVISOR_DATA_SECTION \ 27 + HYP_SECTION_NAME(.data) : { \ 28 + . = ALIGN(PAGE_SIZE); \ 29 + __hyp_data_start = .; \ 30 + *(HYP_SECTION_NAME(.data)) \ 31 + . = ALIGN(PAGE_SIZE); \ 32 + __hyp_data_end = .; \ 24 33 } 25 34 26 35 #define HYPERVISOR_PERCPU_SECTION \ ··· 60 51 #define SBSS_ALIGN PAGE_SIZE 61 52 #else /* CONFIG_KVM */ 62 53 #define HYPERVISOR_EXTABLE 63 - #define HYPERVISOR_DATA_SECTIONS 54 + #define HYPERVISOR_RODATA_SECTIONS 55 + #define HYPERVISOR_DATA_SECTION 64 56 #define HYPERVISOR_PERCPU_SECTION 65 57 #define HYPERVISOR_RELOC_SECTION 66 58 #define SBSS_ALIGN 0 ··· 200 190 /* everything from this point to __init_begin will be marked RO NX */ 201 191 RO_DATA(PAGE_SIZE) 202 192 203 - HYPERVISOR_DATA_SECTIONS 193 + HYPERVISOR_RODATA_SECTIONS 204 194 205 195 .got : { *(.got) } 206 196 /* ··· 304 294 _data = .; 305 295 _sdata = .; 306 296 RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) 297 + 298 + HYPERVISOR_DATA_SECTION 307 299 308 300 /* 309 301 * Data written with the MMU off but read with the MMU on requires
+7
arch/arm64/kvm/arm.c
··· 2604 2604 goto out_err; 2605 2605 } 2606 2606 2607 + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start), 2608 + kvm_ksym_ref(__hyp_data_end), PAGE_HYP); 2609 + if (err) { 2610 + kvm_err("Cannot map .hyp.data section\n"); 2611 + goto out_err; 2612 + } 2613 + 2607 2614 err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), 2608 2615 kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); 2609 2616 if (err) {
+10 -4
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
··· 39 39 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); 40 40 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages); 41 41 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); 42 - int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 42 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu, 43 43 enum kvm_pgtable_prot prot); 44 - int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 44 + int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm); 45 45 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot); 46 - int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 47 - int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm); 46 + int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm); 47 + int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm); 48 48 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu); 49 49 50 50 bool addr_is_memory(phys_addr_t phys); ··· 67 67 else 68 68 write_sysreg(0, vttbr_el2); 69 69 } 70 + 71 + #ifdef CONFIG_NVHE_EL2_DEBUG 72 + void pkvm_ownership_selftest(void *base); 73 + #else 74 + static inline void pkvm_ownership_selftest(void *base) { } 75 + #endif 70 76 #endif /* __KVM_NVHE_MEM_PROTECT__ */
+46 -12
arch/arm64/kvm/hyp/include/nvhe/memory.h
··· 8 8 #include <linux/types.h> 9 9 10 10 /* 11 - * Bits 0-1 are reserved to track the memory ownership state of each page: 12 - * 00: The page is owned exclusively by the page-table owner. 13 - * 01: The page is owned by the page-table owner, but is shared 14 - * with another entity. 15 - * 10: The page is shared with, but not owned by the page-table owner. 16 - * 11: Reserved for future use (lending). 11 + * Bits 0-1 are used to encode the memory ownership state of each page from the 12 + * point of view of a pKVM "component" (host, hyp, guest, ... see enum 13 + * pkvm_component_id): 14 + * 00: The page is owned and exclusively accessible by the component; 15 + * 01: The page is owned and accessible by the component, but is also 16 + * accessible by another component; 17 + * 10: The page is accessible but not owned by the component; 18 + * The storage of this state depends on the component: either in the 19 + * hyp_vmemmap for the host and hyp states or in PTE software bits for guests. 17 20 */ 18 21 enum pkvm_page_state { 19 22 PKVM_PAGE_OWNED = 0ULL, 20 23 PKVM_PAGE_SHARED_OWNED = BIT(0), 21 24 PKVM_PAGE_SHARED_BORROWED = BIT(1), 22 - __PKVM_PAGE_RESERVED = BIT(0) | BIT(1), 23 25 24 - /* Meta-states which aren't encoded directly in the PTE's SW bits */ 25 - PKVM_NOPAGE = BIT(2), 26 + /* 27 + * 'Meta-states' are not stored directly in PTE SW bits for guest 28 + * states, but inferred from the context (e.g. invalid PTE entries). 29 + * For the host and hyp, meta-states are stored directly in the 30 + * struct hyp_page. 31 + */ 32 + PKVM_NOPAGE = BIT(0) | BIT(1), 26 33 }; 27 - #define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED) 34 + #define PKVM_PAGE_STATE_MASK (BIT(0) | BIT(1)) 28 35 29 36 #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) 30 37 static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, ··· 51 44 u16 refcount; 52 45 u8 order; 53 46 54 - /* Host (non-meta) state. Guarded by the host stage-2 lock. */ 55 - enum pkvm_page_state host_state : 8; 47 + /* Host state. Guarded by the host stage-2 lock. */ 48 + unsigned __host_state : 4; 49 + 50 + /* 51 + * Complement of the hyp state. Guarded by the hyp stage-1 lock. We use 52 + * the complement so that the initial 0 in __hyp_state_comp (due to the 53 + * entire vmemmap starting off zeroed) encodes PKVM_NOPAGE. 54 + */ 55 + unsigned __hyp_state_comp : 4; 56 56 57 57 u32 host_share_guest_count; 58 58 }; ··· 95 81 #define hyp_page_to_phys(page) hyp_pfn_to_phys((hyp_page_to_pfn(page))) 96 82 #define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) 97 83 #define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) 84 + 85 + static inline enum pkvm_page_state get_host_state(struct hyp_page *p) 86 + { 87 + return p->__host_state; 88 + } 89 + 90 + static inline void set_host_state(struct hyp_page *p, enum pkvm_page_state state) 91 + { 92 + p->__host_state = state; 93 + } 94 + 95 + static inline enum pkvm_page_state get_hyp_state(struct hyp_page *p) 96 + { 97 + return p->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK; 98 + } 99 + 100 + static inline void set_hyp_state(struct hyp_page *p, enum pkvm_page_state state) 101 + { 102 + p->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK; 103 + } 98 104 99 105 /* 100 106 * Refcounting for 'struct hyp_page'.
+3 -1
arch/arm64/kvm/hyp/include/nvhe/mm.h
··· 13 13 extern struct kvm_pgtable pkvm_pgtable; 14 14 extern hyp_spinlock_t pkvm_pgd_lock; 15 15 16 - int hyp_create_pcpu_fixmap(void); 16 + int hyp_create_fixmap(void); 17 17 void *hyp_fixmap_map(phys_addr_t phys); 18 18 void hyp_fixmap_unmap(void); 19 + void *hyp_fixblock_map(phys_addr_t phys, size_t *size); 20 + void hyp_fixblock_unmap(void); 19 21 20 22 int hyp_create_idmap(u32 hyp_va_bits); 21 23 int hyp_map_vectors(void);
+10 -10
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 123 123 124 124 hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; 125 125 126 - hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); 127 - /* Limit guest vector length to the maximum supported by the host. */ 128 - hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); 129 - 130 126 hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; 131 127 hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); 132 128 hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & ··· 245 249 { 246 250 DECLARE_REG(u64, pfn, host_ctxt, 1); 247 251 DECLARE_REG(u64, gfn, host_ctxt, 2); 248 - DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); 252 + DECLARE_REG(u64, nr_pages, host_ctxt, 3); 253 + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4); 249 254 struct pkvm_hyp_vcpu *hyp_vcpu; 250 255 int ret = -EINVAL; 251 256 ··· 261 264 if (ret) 262 265 goto out; 263 266 264 - ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot); 267 + ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot); 265 268 out: 266 269 cpu_reg(host_ctxt, 1) = ret; 267 270 } ··· 270 273 { 271 274 DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 272 275 DECLARE_REG(u64, gfn, host_ctxt, 2); 276 + DECLARE_REG(u64, nr_pages, host_ctxt, 3); 273 277 struct pkvm_hyp_vm *hyp_vm; 274 278 int ret = -EINVAL; 275 279 ··· 281 283 if (!hyp_vm) 282 284 goto out; 283 285 284 - ret = __pkvm_host_unshare_guest(gfn, hyp_vm); 286 + ret = __pkvm_host_unshare_guest(gfn, nr_pages, hyp_vm); 285 287 put_pkvm_hyp_vm(hyp_vm); 286 288 out: 287 289 cpu_reg(host_ctxt, 1) = ret; ··· 310 312 { 311 313 DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 312 314 DECLARE_REG(u64, gfn, host_ctxt, 2); 315 + DECLARE_REG(u64, nr_pages, host_ctxt, 3); 313 316 struct pkvm_hyp_vm *hyp_vm; 314 317 int ret = -EINVAL; 315 318 ··· 321 322 if (!hyp_vm) 322 323 goto out; 323 324 324 - ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm); 325 + ret = __pkvm_host_wrprotect_guest(gfn, nr_pages, hyp_vm); 325 326 put_pkvm_hyp_vm(hyp_vm); 326 327 out: 327 328 cpu_reg(host_ctxt, 1) = ret; ··· 331 332 { 332 333 DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 333 334 DECLARE_REG(u64, gfn, host_ctxt, 2); 334 - DECLARE_REG(bool, mkold, host_ctxt, 3); 335 + DECLARE_REG(u64, nr_pages, host_ctxt, 3); 336 + DECLARE_REG(bool, mkold, host_ctxt, 4); 335 337 struct pkvm_hyp_vm *hyp_vm; 336 338 int ret = -EINVAL; 337 339 ··· 343 343 if (!hyp_vm) 344 344 goto out; 345 345 346 - ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm); 346 + ret = __pkvm_host_test_clear_young_guest(gfn, nr_pages, mkold, hyp_vm); 347 347 put_pkvm_hyp_vm(hyp_vm); 348 348 out: 349 349 cpu_reg(host_ctxt, 1) = ret;
+2
arch/arm64/kvm/hyp/nvhe/hyp.lds.S
··· 25 25 BEGIN_HYP_SECTION(.data..percpu) 26 26 PERCPU_INPUT(L1_CACHE_BYTES) 27 27 END_HYP_SECTION 28 + 28 29 HYP_SECTION(.bss) 30 + HYP_SECTION(.data) 29 31 }
+388 -120
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 60 60 hyp_spin_unlock(&pkvm_pgd_lock); 61 61 } 62 62 63 + #define for_each_hyp_page(__p, __st, __sz) \ 64 + for (struct hyp_page *__p = hyp_phys_to_page(__st), \ 65 + *__e = __p + ((__sz) >> PAGE_SHIFT); \ 66 + __p < __e; __p++) 67 + 63 68 static void *host_s2_zalloc_pages_exact(size_t size) 64 69 { 65 70 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); ··· 166 161 return 0; 167 162 } 168 163 169 - static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 170 - enum kvm_pgtable_prot prot) 171 - { 172 - return true; 173 - } 174 - 175 164 static void *guest_s2_zalloc_pages_exact(size_t size) 176 165 { 177 166 void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size)); ··· 216 217 hyp_put_page(&current_vm->pool, addr); 217 218 } 218 219 220 + static void __apply_guest_page(void *va, size_t size, 221 + void (*func)(void *addr, size_t size)) 222 + { 223 + size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE); 224 + va = PTR_ALIGN_DOWN(va, PAGE_SIZE); 225 + size = PAGE_ALIGN(size); 226 + 227 + while (size) { 228 + size_t map_size = PAGE_SIZE; 229 + void *map; 230 + 231 + if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE) 232 + map = hyp_fixblock_map(__hyp_pa(va), &map_size); 233 + else 234 + map = hyp_fixmap_map(__hyp_pa(va)); 235 + 236 + func(map, map_size); 237 + 238 + if (map_size == PMD_SIZE) 239 + hyp_fixblock_unmap(); 240 + else 241 + hyp_fixmap_unmap(); 242 + 243 + size -= map_size; 244 + va += map_size; 245 + } 246 + } 247 + 219 248 static void clean_dcache_guest_page(void *va, size_t size) 220 249 { 221 - __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 222 - hyp_fixmap_unmap(); 250 + __apply_guest_page(va, size, __clean_dcache_guest_page); 223 251 } 224 252 225 253 static void invalidate_icache_guest_page(void *va, size_t size) 226 254 { 227 - __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 228 - hyp_fixmap_unmap(); 255 + __apply_guest_page(va, size, __invalidate_icache_guest_page); 229 256 } 230 257 231 258 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) ··· 280 255 }; 281 256 282 257 guest_lock_component(vm); 283 - ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 284 - guest_stage2_force_pte_cb); 258 + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL); 285 259 guest_unlock_component(vm); 286 260 if (ret) 287 261 return ret; ··· 491 467 return -EAGAIN; 492 468 493 469 if (pte) { 494 - WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); 470 + WARN_ON(addr_is_memory(addr) && 471 + get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE); 495 472 return -EPERM; 496 473 } 497 474 ··· 518 493 519 494 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) 520 495 { 521 - phys_addr_t end = addr + size; 522 - 523 - for (; addr < end; addr += PAGE_SIZE) 524 - hyp_phys_to_page(addr)->host_state = state; 496 + for_each_hyp_page(page, addr, size) 497 + set_host_state(page, state); 525 498 } 526 499 527 500 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) ··· 641 618 static int __host_check_page_state_range(u64 addr, u64 size, 642 619 enum pkvm_page_state state) 643 620 { 644 - u64 end = addr + size; 645 621 int ret; 646 622 647 - ret = check_range_allowed_memory(addr, end); 623 + ret = check_range_allowed_memory(addr, addr + size); 648 624 if (ret) 649 625 return ret; 650 626 651 627 hyp_assert_lock_held(&host_mmu.lock); 652 - for (; addr < end; addr += PAGE_SIZE) { 653 - if (hyp_phys_to_page(addr)->host_state != state) 628 + 629 + for_each_hyp_page(page, addr, size) { 630 + if (get_host_state(page) != state) 654 631 return -EPERM; 655 632 } 656 633 ··· 660 637 static int __host_set_page_state_range(u64 addr, u64 size, 661 638 enum pkvm_page_state state) 662 639 { 663 - if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { 640 + if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) { 664 641 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); 665 642 666 643 if (ret) ··· 672 649 return 0; 673 650 } 674 651 675 - static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) 652 + static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) 676 653 { 677 - if (!kvm_pte_valid(pte)) 678 - return PKVM_NOPAGE; 679 - 680 - return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); 654 + for_each_hyp_page(page, phys, size) 655 + set_hyp_state(page, state); 681 656 } 682 657 683 - static int __hyp_check_page_state_range(u64 addr, u64 size, 684 - enum pkvm_page_state state) 658 + static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) 685 659 { 686 - struct check_walk_data d = { 687 - .desired = state, 688 - .get_page_state = hyp_get_page_state, 689 - }; 660 + for_each_hyp_page(page, phys, size) { 661 + if (get_hyp_state(page) != state) 662 + return -EPERM; 663 + } 690 664 691 - hyp_assert_lock_held(&pkvm_pgd_lock); 692 - return check_page_state_range(&pkvm_pgtable, addr, size, &d); 665 + return 0; 693 666 } 694 667 695 668 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) ··· 696 677 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 697 678 } 698 679 699 - static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, 680 + static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr, 700 681 u64 size, enum pkvm_page_state state) 701 682 { 702 - struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 703 683 struct check_walk_data d = { 704 684 .desired = state, 705 685 .get_page_state = guest_get_page_state, ··· 711 693 int __pkvm_host_share_hyp(u64 pfn) 712 694 { 713 695 u64 phys = hyp_pfn_to_phys(pfn); 714 - void *virt = __hyp_va(phys); 715 - enum kvm_pgtable_prot prot; 716 696 u64 size = PAGE_SIZE; 717 697 int ret; 718 698 ··· 720 704 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 721 705 if (ret) 722 706 goto unlock; 723 - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 724 - ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 725 - if (ret) 726 - goto unlock; 727 - } 707 + ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); 708 + if (ret) 709 + goto unlock; 728 710 729 - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 730 - WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 711 + __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); 731 712 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); 732 713 733 714 unlock: ··· 747 734 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 748 735 if (ret) 749 736 goto unlock; 750 - ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); 737 + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); 751 738 if (ret) 752 739 goto unlock; 753 740 if (hyp_page_count((void *)virt)) { ··· 755 742 goto unlock; 756 743 } 757 744 758 - WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 745 + __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); 759 746 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); 760 747 761 748 unlock: ··· 770 757 u64 phys = hyp_pfn_to_phys(pfn); 771 758 u64 size = PAGE_SIZE * nr_pages; 772 759 void *virt = __hyp_va(phys); 773 - enum kvm_pgtable_prot prot; 774 760 int ret; 775 761 776 762 host_lock_component(); ··· 778 766 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 779 767 if (ret) 780 768 goto unlock; 781 - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 782 - ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 783 - if (ret) 784 - goto unlock; 785 - } 769 + ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); 770 + if (ret) 771 + goto unlock; 786 772 787 - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 788 - WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 773 + __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED); 774 + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP)); 789 775 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); 790 776 791 777 unlock: ··· 803 793 host_lock_component(); 804 794 hyp_lock_component(); 805 795 806 - ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); 796 + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 807 797 if (ret) 808 798 goto unlock; 809 - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 810 - ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); 811 - if (ret) 812 - goto unlock; 813 - } 799 + ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); 800 + if (ret) 801 + goto unlock; 814 802 803 + __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); 815 804 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 816 805 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); 817 806 ··· 825 816 { 826 817 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 827 818 u64 end = PAGE_ALIGN((u64)to); 819 + u64 phys = __hyp_pa(start); 828 820 u64 size = end - start; 821 + struct hyp_page *p; 829 822 int ret; 830 823 831 824 host_lock_component(); 832 825 hyp_lock_component(); 833 826 834 - ret = __host_check_page_state_range(__hyp_pa(start), size, 835 - PKVM_PAGE_SHARED_OWNED); 827 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 836 828 if (ret) 837 829 goto unlock; 838 830 839 - ret = __hyp_check_page_state_range(start, size, 840 - PKVM_PAGE_SHARED_BORROWED); 831 + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); 841 832 if (ret) 842 833 goto unlock; 843 834 844 - for (cur = start; cur < end; cur += PAGE_SIZE) 845 - hyp_page_ref_inc(hyp_virt_to_page(cur)); 835 + for (cur = start; cur < end; cur += PAGE_SIZE) { 836 + p = hyp_virt_to_page(cur); 837 + hyp_page_ref_inc(p); 838 + if (p->refcount == 1) 839 + WARN_ON(pkvm_create_mappings_locked((void *)cur, 840 + (void *)cur + PAGE_SIZE, 841 + PAGE_HYP)); 842 + } 846 843 847 844 unlock: 848 845 hyp_unlock_component(); ··· 861 846 { 862 847 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 863 848 u64 end = PAGE_ALIGN((u64)to); 849 + struct hyp_page *p; 864 850 865 851 host_lock_component(); 866 852 hyp_lock_component(); 867 853 868 - for (cur = start; cur < end; cur += PAGE_SIZE) 869 - hyp_page_ref_dec(hyp_virt_to_page(cur)); 854 + for (cur = start; cur < end; cur += PAGE_SIZE) { 855 + p = hyp_virt_to_page(cur); 856 + if (p->refcount == 1) 857 + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE); 858 + hyp_page_ref_dec(p); 859 + } 870 860 871 861 hyp_unlock_component(); 872 862 host_unlock_component(); ··· 907 887 return ret; 908 888 } 909 889 910 - int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 890 + static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size) 891 + { 892 + size_t block_size; 893 + 894 + if (nr_pages == 1) { 895 + *size = PAGE_SIZE; 896 + return 0; 897 + } 898 + 899 + /* We solely support second to last level huge mapping */ 900 + block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1); 901 + 902 + if (nr_pages != block_size >> PAGE_SHIFT) 903 + return -EINVAL; 904 + 905 + if (!IS_ALIGNED(phys | ipa, block_size)) 906 + return -EINVAL; 907 + 908 + *size = block_size; 909 + return 0; 910 + } 911 + 912 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu, 911 913 enum kvm_pgtable_prot prot) 912 914 { 913 915 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 914 916 u64 phys = hyp_pfn_to_phys(pfn); 915 917 u64 ipa = hyp_pfn_to_phys(gfn); 916 - struct hyp_page *page; 918 + u64 size; 917 919 int ret; 918 920 919 921 if (prot & ~KVM_PGTABLE_PROT_RWX) 920 922 return -EINVAL; 921 923 922 - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 924 + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); 925 + if (ret) 926 + return ret; 927 + 928 + ret = check_range_allowed_memory(phys, phys + size); 923 929 if (ret) 924 930 return ret; 925 931 926 932 host_lock_component(); 927 933 guest_lock_component(vm); 928 934 929 - ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); 935 + ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE); 930 936 if (ret) 931 937 goto unlock; 932 938 933 - page = hyp_phys_to_page(phys); 934 - switch (page->host_state) { 935 - case PKVM_PAGE_OWNED: 936 - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); 937 - break; 938 - case PKVM_PAGE_SHARED_OWNED: 939 - if (page->host_share_guest_count) 940 - break; 941 - /* Only host to np-guest multi-sharing is tolerated */ 942 - WARN_ON(1); 943 - fallthrough; 944 - default: 945 - ret = -EPERM; 946 - goto unlock; 939 + for_each_hyp_page(page, phys, size) { 940 + switch (get_host_state(page)) { 941 + case PKVM_PAGE_OWNED: 942 + continue; 943 + case PKVM_PAGE_SHARED_OWNED: 944 + if (page->host_share_guest_count == U32_MAX) { 945 + ret = -EBUSY; 946 + goto unlock; 947 + } 948 + 949 + /* Only host to np-guest multi-sharing is tolerated */ 950 + if (page->host_share_guest_count) 951 + continue; 952 + 953 + fallthrough; 954 + default: 955 + ret = -EPERM; 956 + goto unlock; 957 + } 947 958 } 948 959 949 - WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, 960 + for_each_hyp_page(page, phys, size) { 961 + set_host_state(page, PKVM_PAGE_SHARED_OWNED); 962 + page->host_share_guest_count++; 963 + } 964 + 965 + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, 950 966 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), 951 967 &vcpu->vcpu.arch.pkvm_memcache, 0)); 952 - page->host_share_guest_count++; 953 968 954 969 unlock: 955 970 guest_unlock_component(vm); ··· 993 938 return ret; 994 939 } 995 940 996 - static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) 941 + static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size) 997 942 { 998 943 enum pkvm_page_state state; 999 - struct hyp_page *page; 1000 944 kvm_pte_t pte; 1001 945 u64 phys; 1002 946 s8 level; ··· 1006 952 return ret; 1007 953 if (!kvm_pte_valid(pte)) 1008 954 return -ENOENT; 1009 - if (level != KVM_PGTABLE_LAST_LEVEL) 955 + if (kvm_granule_size(level) != size) 1010 956 return -E2BIG; 1011 957 1012 958 state = guest_get_page_state(pte, ipa); ··· 1014 960 return -EPERM; 1015 961 1016 962 phys = kvm_pte_to_phys(pte); 1017 - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 963 + ret = check_range_allowed_memory(phys, phys + size); 1018 964 if (WARN_ON(ret)) 1019 965 return ret; 1020 966 1021 - page = hyp_phys_to_page(phys); 1022 - if (page->host_state != PKVM_PAGE_SHARED_OWNED) 1023 - return -EPERM; 1024 - if (WARN_ON(!page->host_share_guest_count)) 1025 - return -EINVAL; 967 + for_each_hyp_page(page, phys, size) { 968 + if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED) 969 + return -EPERM; 970 + if (WARN_ON(!page->host_share_guest_count)) 971 + return -EINVAL; 972 + } 1026 973 1027 974 *__phys = phys; 1028 975 1029 976 return 0; 1030 977 } 1031 978 1032 - int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) 979 + int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) 1033 980 { 1034 981 u64 ipa = hyp_pfn_to_phys(gfn); 1035 - struct hyp_page *page; 1036 - u64 phys; 982 + u64 size, phys; 1037 983 int ret; 984 + 985 + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); 986 + if (ret) 987 + return ret; 1038 988 1039 989 host_lock_component(); 1040 990 guest_lock_component(vm); 1041 991 1042 - ret = __check_host_shared_guest(vm, &phys, ipa); 992 + ret = __check_host_shared_guest(vm, &phys, ipa, size); 1043 993 if (ret) 1044 994 goto unlock; 1045 995 1046 - ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); 996 + ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size); 1047 997 if (ret) 1048 998 goto unlock; 1049 999 1050 - page = hyp_phys_to_page(phys); 1051 - page->host_share_guest_count--; 1052 - if (!page->host_share_guest_count) 1053 - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); 1000 + for_each_hyp_page(page, phys, size) { 1001 + /* __check_host_shared_guest() protects against underflow */ 1002 + page->host_share_guest_count--; 1003 + if (!page->host_share_guest_count) 1004 + set_host_state(page, PKVM_PAGE_OWNED); 1005 + } 1054 1006 1055 1007 unlock: 1056 1008 guest_unlock_component(vm); ··· 1065 1005 return ret; 1066 1006 } 1067 1007 1068 - static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa) 1008 + static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size) 1069 1009 { 1070 1010 u64 phys; 1071 1011 int ret; ··· 1076 1016 host_lock_component(); 1077 1017 guest_lock_component(vm); 1078 1018 1079 - ret = __check_host_shared_guest(vm, &phys, ipa); 1019 + ret = __check_host_shared_guest(vm, &phys, ipa, size); 1080 1020 1081 1021 guest_unlock_component(vm); 1082 1022 host_unlock_component(); ··· 1096 1036 if (prot & ~KVM_PGTABLE_PROT_RWX) 1097 1037 return -EINVAL; 1098 1038 1099 - assert_host_shared_guest(vm, ipa); 1039 + assert_host_shared_guest(vm, ipa, PAGE_SIZE); 1100 1040 guest_lock_component(vm); 1101 1041 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1102 1042 guest_unlock_component(vm); ··· 1104 1044 return ret; 1105 1045 } 1106 1046 1107 - int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1047 + int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) 1108 1048 { 1109 - u64 ipa = hyp_pfn_to_phys(gfn); 1049 + u64 size, ipa = hyp_pfn_to_phys(gfn); 1110 1050 int ret; 1111 1051 1112 1052 if (pkvm_hyp_vm_is_protected(vm)) 1113 1053 return -EPERM; 1114 1054 1115 - assert_host_shared_guest(vm, ipa); 1055 + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); 1056 + if (ret) 1057 + return ret; 1058 + 1059 + assert_host_shared_guest(vm, ipa, size); 1116 1060 guest_lock_component(vm); 1117 - ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); 1061 + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size); 1118 1062 guest_unlock_component(vm); 1119 1063 1120 1064 return ret; 1121 1065 } 1122 1066 1123 - int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) 1067 + int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm) 1124 1068 { 1125 - u64 ipa = hyp_pfn_to_phys(gfn); 1069 + u64 size, ipa = hyp_pfn_to_phys(gfn); 1126 1070 int ret; 1127 1071 1128 1072 if (pkvm_hyp_vm_is_protected(vm)) 1129 1073 return -EPERM; 1130 1074 1131 - assert_host_shared_guest(vm, ipa); 1075 + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); 1076 + if (ret) 1077 + return ret; 1078 + 1079 + assert_host_shared_guest(vm, ipa, size); 1132 1080 guest_lock_component(vm); 1133 - ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); 1081 + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold); 1134 1082 guest_unlock_component(vm); 1135 1083 1136 1084 return ret; ··· 1152 1084 if (pkvm_hyp_vm_is_protected(vm)) 1153 1085 return -EPERM; 1154 1086 1155 - assert_host_shared_guest(vm, ipa); 1087 + assert_host_shared_guest(vm, ipa, PAGE_SIZE); 1156 1088 guest_lock_component(vm); 1157 1089 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1158 1090 guest_unlock_component(vm); 1159 1091 1160 1092 return 0; 1161 1093 } 1094 + 1095 + #ifdef CONFIG_NVHE_EL2_DEBUG 1096 + struct pkvm_expected_state { 1097 + enum pkvm_page_state host; 1098 + enum pkvm_page_state hyp; 1099 + enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */ 1100 + }; 1101 + 1102 + static struct pkvm_expected_state selftest_state; 1103 + static struct hyp_page *selftest_page; 1104 + 1105 + static struct pkvm_hyp_vm selftest_vm = { 1106 + .kvm = { 1107 + .arch = { 1108 + .mmu = { 1109 + .arch = &selftest_vm.kvm.arch, 1110 + .pgt = &selftest_vm.pgt, 1111 + }, 1112 + }, 1113 + }, 1114 + }; 1115 + 1116 + static struct pkvm_hyp_vcpu selftest_vcpu = { 1117 + .vcpu = { 1118 + .arch = { 1119 + .hw_mmu = &selftest_vm.kvm.arch.mmu, 1120 + }, 1121 + .kvm = &selftest_vm.kvm, 1122 + }, 1123 + }; 1124 + 1125 + static void init_selftest_vm(void *virt) 1126 + { 1127 + struct hyp_page *p = hyp_virt_to_page(virt); 1128 + int i; 1129 + 1130 + selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; 1131 + WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt)); 1132 + 1133 + for (i = 0; i < pkvm_selftest_pages(); i++) { 1134 + if (p[i].refcount) 1135 + continue; 1136 + p[i].refcount = 1; 1137 + hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); 1138 + } 1139 + } 1140 + 1141 + static u64 selftest_ipa(void) 1142 + { 1143 + return BIT(selftest_vm.pgt.ia_bits - 1); 1144 + } 1145 + 1146 + static void assert_page_state(void) 1147 + { 1148 + void *virt = hyp_page_to_virt(selftest_page); 1149 + u64 size = PAGE_SIZE << selftest_page->order; 1150 + struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; 1151 + u64 phys = hyp_virt_to_phys(virt); 1152 + u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE }; 1153 + struct pkvm_hyp_vm *vm; 1154 + 1155 + vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1156 + 1157 + host_lock_component(); 1158 + WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host)); 1159 + host_unlock_component(); 1160 + 1161 + hyp_lock_component(); 1162 + WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp)); 1163 + hyp_unlock_component(); 1164 + 1165 + guest_lock_component(&selftest_vm); 1166 + WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0])); 1167 + WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1])); 1168 + guest_unlock_component(&selftest_vm); 1169 + } 1170 + 1171 + #define assert_transition_res(res, fn, ...) \ 1172 + do { \ 1173 + WARN_ON(fn(__VA_ARGS__) != res); \ 1174 + assert_page_state(); \ 1175 + } while (0) 1176 + 1177 + void pkvm_ownership_selftest(void *base) 1178 + { 1179 + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX; 1180 + void *virt = hyp_alloc_pages(&host_s2_pool, 0); 1181 + struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; 1182 + struct pkvm_hyp_vm *vm = &selftest_vm; 1183 + u64 phys, size, pfn, gfn; 1184 + 1185 + WARN_ON(!virt); 1186 + selftest_page = hyp_virt_to_page(virt); 1187 + selftest_page->refcount = 0; 1188 + init_selftest_vm(base); 1189 + 1190 + size = PAGE_SIZE << selftest_page->order; 1191 + phys = hyp_virt_to_phys(virt); 1192 + pfn = hyp_phys_to_pfn(phys); 1193 + gfn = hyp_phys_to_pfn(selftest_ipa()); 1194 + 1195 + selftest_state.host = PKVM_NOPAGE; 1196 + selftest_state.hyp = PKVM_PAGE_OWNED; 1197 + selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE; 1198 + assert_page_state(); 1199 + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); 1200 + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); 1201 + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); 1202 + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); 1203 + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); 1204 + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); 1205 + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1206 + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); 1207 + 1208 + selftest_state.host = PKVM_PAGE_OWNED; 1209 + selftest_state.hyp = PKVM_NOPAGE; 1210 + assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1); 1211 + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); 1212 + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); 1213 + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); 1214 + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); 1215 + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); 1216 + 1217 + selftest_state.host = PKVM_PAGE_SHARED_OWNED; 1218 + selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED; 1219 + assert_transition_res(0, __pkvm_host_share_hyp, pfn); 1220 + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); 1221 + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); 1222 + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); 1223 + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); 1224 + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1225 + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); 1226 + 1227 + assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); 1228 + assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); 1229 + hyp_unpin_shared_mem(virt, virt + size); 1230 + WARN_ON(hyp_page_count(virt) != 1); 1231 + assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn); 1232 + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); 1233 + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); 1234 + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); 1235 + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); 1236 + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1237 + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); 1238 + 1239 + hyp_unpin_shared_mem(virt, virt + size); 1240 + assert_page_state(); 1241 + WARN_ON(hyp_page_count(virt)); 1242 + 1243 + selftest_state.host = PKVM_PAGE_OWNED; 1244 + selftest_state.hyp = PKVM_NOPAGE; 1245 + assert_transition_res(0, __pkvm_host_unshare_hyp, pfn); 1246 + 1247 + selftest_state.host = PKVM_PAGE_SHARED_OWNED; 1248 + selftest_state.hyp = PKVM_NOPAGE; 1249 + assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1); 1250 + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); 1251 + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); 1252 + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); 1253 + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); 1254 + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); 1255 + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1256 + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); 1257 + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); 1258 + 1259 + selftest_state.host = PKVM_PAGE_OWNED; 1260 + selftest_state.hyp = PKVM_NOPAGE; 1261 + assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1); 1262 + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); 1263 + 1264 + selftest_state.host = PKVM_PAGE_SHARED_OWNED; 1265 + selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED; 1266 + assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1267 + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); 1268 + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); 1269 + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); 1270 + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); 1271 + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); 1272 + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); 1273 + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); 1274 + 1275 + selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED; 1276 + assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot); 1277 + WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2); 1278 + 1279 + selftest_state.guest[0] = PKVM_NOPAGE; 1280 + assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm); 1281 + 1282 + selftest_state.guest[1] = PKVM_NOPAGE; 1283 + selftest_state.host = PKVM_PAGE_OWNED; 1284 + assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm); 1285 + 1286 + selftest_state.host = PKVM_NOPAGE; 1287 + selftest_state.hyp = PKVM_PAGE_OWNED; 1288 + assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1); 1289 + 1290 + selftest_page->refcount = 1; 1291 + hyp_put_page(&host_s2_pool, virt); 1292 + } 1293 + #endif
+89 -8
arch/arm64/kvm/hyp/nvhe/mm.c
··· 229 229 return 0; 230 230 } 231 231 232 - void *hyp_fixmap_map(phys_addr_t phys) 232 + static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys) 233 233 { 234 - struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); 235 234 kvm_pte_t pte, *ptep = slot->ptep; 236 235 237 236 pte = *ptep; ··· 242 243 return (void *)slot->addr; 243 244 } 244 245 246 + void *hyp_fixmap_map(phys_addr_t phys) 247 + { 248 + return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys); 249 + } 250 + 245 251 static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) 246 252 { 247 253 kvm_pte_t *ptep = slot->ptep; 248 254 u64 addr = slot->addr; 255 + u32 level; 256 + 257 + if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE) 258 + level = KVM_PGTABLE_LAST_LEVEL; 259 + else 260 + level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */ 249 261 250 262 WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); 251 263 ··· 270 260 * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 271 261 */ 272 262 dsb(ishst); 273 - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL); 263 + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); 274 264 dsb(ish); 275 265 isb(); 276 266 } ··· 283 273 static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, 284 274 enum kvm_pgtable_walk_flags visit) 285 275 { 286 - struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); 276 + struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg; 287 277 288 - if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL) 278 + if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level)) 289 279 return -EINVAL; 290 280 291 281 slot->addr = ctx->addr; ··· 306 296 struct kvm_pgtable_walker walker = { 307 297 .cb = __create_fixmap_slot_cb, 308 298 .flags = KVM_PGTABLE_WALK_LEAF, 309 - .arg = (void *)cpu, 299 + .arg = per_cpu_ptr(&fixmap_slots, cpu), 310 300 }; 311 301 312 302 return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); 313 303 } 314 304 315 - int hyp_create_pcpu_fixmap(void) 305 + #if PAGE_SHIFT < 16 306 + #define HAS_FIXBLOCK 307 + static struct hyp_fixmap_slot hyp_fixblock_slot; 308 + static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock); 309 + #endif 310 + 311 + static int create_fixblock(void) 312 + { 313 + #ifdef HAS_FIXBLOCK 314 + struct kvm_pgtable_walker walker = { 315 + .cb = __create_fixmap_slot_cb, 316 + .flags = KVM_PGTABLE_WALK_LEAF, 317 + .arg = &hyp_fixblock_slot, 318 + }; 319 + unsigned long addr; 320 + phys_addr_t phys; 321 + int ret, i; 322 + 323 + /* Find a RAM phys address, PMD aligned */ 324 + for (i = 0; i < hyp_memblock_nr; i++) { 325 + phys = ALIGN(hyp_memory[i].base, PMD_SIZE); 326 + if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size)) 327 + break; 328 + } 329 + 330 + if (i >= hyp_memblock_nr) 331 + return -EINVAL; 332 + 333 + hyp_spin_lock(&pkvm_pgd_lock); 334 + addr = ALIGN(__io_map_base, PMD_SIZE); 335 + ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE); 336 + if (ret) 337 + goto unlock; 338 + 339 + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP); 340 + if (ret) 341 + goto unlock; 342 + 343 + ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker); 344 + 345 + unlock: 346 + hyp_spin_unlock(&pkvm_pgd_lock); 347 + 348 + return ret; 349 + #else 350 + return 0; 351 + #endif 352 + } 353 + 354 + void *hyp_fixblock_map(phys_addr_t phys, size_t *size) 355 + { 356 + #ifdef HAS_FIXBLOCK 357 + *size = PMD_SIZE; 358 + hyp_spin_lock(&hyp_fixblock_lock); 359 + return fixmap_map_slot(&hyp_fixblock_slot, phys); 360 + #else 361 + *size = PAGE_SIZE; 362 + return hyp_fixmap_map(phys); 363 + #endif 364 + } 365 + 366 + void hyp_fixblock_unmap(void) 367 + { 368 + #ifdef HAS_FIXBLOCK 369 + fixmap_clear_slot(&hyp_fixblock_slot); 370 + hyp_spin_unlock(&hyp_fixblock_lock); 371 + #else 372 + hyp_fixmap_unmap(); 373 + #endif 374 + } 375 + 376 + int hyp_create_fixmap(void) 316 377 { 317 378 unsigned long addr, i; 318 379 int ret; ··· 403 322 return ret; 404 323 } 405 324 406 - return 0; 325 + return create_fixblock(); 407 326 } 408 327 409 328 int hyp_create_idmap(u32 hyp_va_bits)
+44 -3
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 372 372 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); 373 373 } 374 374 375 + static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu) 376 + { 377 + void *sve_state; 378 + 379 + if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE)) 380 + return; 381 + 382 + sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state); 383 + hyp_unpin_shared_mem(sve_state, 384 + sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu)); 385 + } 386 + 375 387 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], 376 388 unsigned int nr_vcpus) 377 389 { ··· 396 384 continue; 397 385 398 386 unpin_host_vcpu(hyp_vcpu->host_vcpu); 387 + unpin_host_sve_state(hyp_vcpu); 399 388 } 400 389 } 401 390 ··· 411 398 pkvm_init_features_from_host(hyp_vm, host_kvm); 412 399 } 413 400 414 - static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) 401 + static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) 415 402 { 416 403 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 404 + unsigned int sve_max_vl; 405 + size_t sve_state_size; 406 + void *sve_state; 407 + int ret = 0; 417 408 418 - if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) 409 + if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { 419 410 vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); 411 + return 0; 412 + } 413 + 414 + /* Limit guest vector length to the maximum supported by the host. */ 415 + sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl); 416 + sve_state_size = sve_state_size_from_vl(sve_max_vl); 417 + sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state)); 418 + 419 + if (!sve_state || !sve_state_size) { 420 + ret = -EINVAL; 421 + goto err; 422 + } 423 + 424 + ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size); 425 + if (ret) 426 + goto err; 427 + 428 + vcpu->arch.sve_state = sve_state; 429 + vcpu->arch.sve_max_vl = sve_max_vl; 430 + 431 + return 0; 432 + err: 433 + clear_bit(KVM_ARM_VCPU_SVE, vcpu->kvm->arch.vcpu_features); 434 + return ret; 420 435 } 421 436 422 437 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, ··· 473 432 if (ret) 474 433 goto done; 475 434 476 - pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); 435 + ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); 477 436 done: 478 437 if (ret) 479 438 unpin_host_vcpu(host_vcpu);
+23 -4
arch/arm64/kvm/hyp/nvhe/setup.c
··· 28 28 static void *vm_table_base; 29 29 static void *hyp_pgt_base; 30 30 static void *host_s2_pgt_base; 31 + static void *selftest_base; 31 32 static void *ffa_proxy_pages; 32 33 static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; 33 34 static struct hyp_pool hpool; ··· 38 37 unsigned long nr_pages; 39 38 40 39 hyp_early_alloc_init(virt, size); 40 + 41 + nr_pages = pkvm_selftest_pages(); 42 + selftest_base = hyp_early_alloc_contig(nr_pages); 43 + if (nr_pages && !selftest_base) 44 + return -ENOMEM; 41 45 42 46 nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); 43 47 vmemmap_base = hyp_early_alloc_contig(nr_pages); ··· 125 119 if (ret) 126 120 return ret; 127 121 122 + ret = pkvm_create_mappings(__hyp_data_start, __hyp_data_end, PAGE_HYP); 123 + if (ret) 124 + return ret; 125 + 128 126 ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); 129 127 if (ret) 130 128 return ret; ··· 190 180 enum kvm_pgtable_walk_flags visit) 191 181 { 192 182 enum pkvm_page_state state; 183 + struct hyp_page *page; 193 184 phys_addr_t phys; 194 185 195 186 if (!kvm_pte_valid(ctx->old)) ··· 203 192 if (!addr_is_memory(phys)) 204 193 return -EINVAL; 205 194 195 + page = hyp_phys_to_page(phys); 196 + 206 197 /* 207 198 * Adjust the host stage-2 mappings to match the ownership attributes 208 - * configured in the hypervisor stage-1. 199 + * configured in the hypervisor stage-1, and make sure to propagate them 200 + * to the hyp_vmemmap state. 209 201 */ 210 202 state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); 211 203 switch (state) { 212 204 case PKVM_PAGE_OWNED: 205 + set_hyp_state(page, PKVM_PAGE_OWNED); 213 206 return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); 214 207 case PKVM_PAGE_SHARED_OWNED: 215 - hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED; 208 + set_hyp_state(page, PKVM_PAGE_SHARED_OWNED); 209 + set_host_state(page, PKVM_PAGE_SHARED_BORROWED); 216 210 break; 217 211 case PKVM_PAGE_SHARED_BORROWED: 218 - hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED; 212 + set_hyp_state(page, PKVM_PAGE_SHARED_BORROWED); 213 + set_host_state(page, PKVM_PAGE_SHARED_OWNED); 219 214 break; 220 215 default: 221 216 return -EINVAL; ··· 312 295 if (ret) 313 296 goto out; 314 297 315 - ret = hyp_create_pcpu_fixmap(); 298 + ret = hyp_create_fixmap(); 316 299 if (ret) 317 300 goto out; 318 301 ··· 321 304 goto out; 322 305 323 306 pkvm_hyp_vm_table_init(vm_table_base); 307 + 308 + pkvm_ownership_selftest(selftest_base); 324 309 out: 325 310 /* 326 311 * We tail-called to here from handle___pkvm_init() and will not return,
-6
arch/arm64/kvm/hyp/pgtable.c
··· 11 11 #include <asm/kvm_pgtable.h> 12 12 #include <asm/stage2_pgtable.h> 13 13 14 - 15 - #define KVM_PTE_TYPE BIT(1) 16 - #define KVM_PTE_TYPE_BLOCK 0 17 - #define KVM_PTE_TYPE_PAGE 1 18 - #define KVM_PTE_TYPE_TABLE 1 19 - 20 14 struct kvm_pgtable_walk_data { 21 15 struct kvm_pgtable_walker *walker; 22 16
+5 -1
arch/arm64/kvm/mmu.c
··· 1304 1304 if (map_size == PAGE_SIZE) 1305 1305 return true; 1306 1306 1307 + /* pKVM only supports PMD_SIZE huge-mappings */ 1308 + if (is_protected_kvm_enabled() && map_size != PMD_SIZE) 1309 + return false; 1310 + 1307 1311 size = memslot->npages * PAGE_SIZE; 1308 1312 1309 1313 gpa_start = memslot->base_gfn << PAGE_SHIFT; ··· 1541 1537 * logging_active is guaranteed to never be true for VM_PFNMAP 1542 1538 * memslots. 1543 1539 */ 1544 - if (logging_active || is_protected_kvm_enabled()) { 1540 + if (logging_active) { 1545 1541 force_pte = true; 1546 1542 vma_shift = PAGE_SHIFT; 1547 1543 } else {
+71 -74
arch/arm64/kvm/pkvm.c
··· 5 5 */ 6 6 7 7 #include <linux/init.h> 8 + #include <linux/interval_tree_generic.h> 8 9 #include <linux/kmemleak.h> 9 10 #include <linux/kvm_host.h> 10 11 #include <asm/kvm_mmu.h> ··· 80 79 hyp_mem_pages += host_s2_pgtable_pages(); 81 80 hyp_mem_pages += hyp_vm_table_pages(); 82 81 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); 82 + hyp_mem_pages += pkvm_selftest_pages(); 83 83 hyp_mem_pages += hyp_ffa_proxy_pages(); 84 84 85 85 /* ··· 264 262 * at, which would end badly once inaccessible. 265 263 */ 266 264 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); 265 + kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start); 267 266 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); 268 267 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); 269 268 ··· 276 273 } 277 274 device_initcall_sync(finalize_pkvm); 278 275 279 - static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) 276 + static u64 __pkvm_mapping_start(struct pkvm_mapping *m) 280 277 { 281 - struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); 282 - struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node); 283 - 284 - if (a->gfn < b->gfn) 285 - return -1; 286 - if (a->gfn > b->gfn) 287 - return 1; 288 - return 0; 278 + return m->gfn * PAGE_SIZE; 289 279 } 290 280 291 - static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) 281 + static u64 __pkvm_mapping_end(struct pkvm_mapping *m) 292 282 { 293 - struct rb_node *node = root->rb_node, *prev = NULL; 294 - struct pkvm_mapping *mapping; 295 - 296 - while (node) { 297 - mapping = rb_entry(node, struct pkvm_mapping, node); 298 - if (mapping->gfn == gfn) 299 - return node; 300 - prev = node; 301 - node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right; 302 - } 303 - 304 - return prev; 283 + return (m->gfn + m->nr_pages) * PAGE_SIZE - 1; 305 284 } 285 + 286 + INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last, 287 + __pkvm_mapping_start, __pkvm_mapping_end, static, 288 + pkvm_mapping); 306 289 307 290 /* 308 - * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing 309 - * of __map inline. 291 + * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow 292 + * freeing of __map inline. 310 293 */ 311 294 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ 312 - for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \ 313 - ((__start) >> PAGE_SHIFT)); \ 295 + for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \ 296 + __start, __end - 1); \ 314 297 __tmp && ({ \ 315 - __map = rb_entry(__tmp, struct pkvm_mapping, node); \ 316 - __tmp = rb_next(__tmp); \ 298 + __map = __tmp; \ 299 + __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \ 317 300 true; \ 318 301 }); \ 319 - ) \ 320 - if (__map->gfn < ((__start) >> PAGE_SHIFT)) \ 321 - continue; \ 322 - else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \ 323 - break; \ 324 - else 302 + ) 325 303 326 304 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 327 305 struct kvm_pgtable_mm_ops *mm_ops) 328 306 { 329 - pgt->pkvm_mappings = RB_ROOT; 307 + pgt->pkvm_mappings = RB_ROOT_CACHED; 330 308 pgt->mmu = mmu; 309 + 310 + return 0; 311 + } 312 + 313 + static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end) 314 + { 315 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 316 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 317 + struct pkvm_mapping *mapping; 318 + int ret; 319 + 320 + if (!handle) 321 + return 0; 322 + 323 + for_each_mapping_in_range_safe(pgt, start, end, mapping) { 324 + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn, 325 + mapping->nr_pages); 326 + if (WARN_ON(ret)) 327 + return ret; 328 + pkvm_mapping_remove(mapping, &pgt->pkvm_mappings); 329 + kfree(mapping); 330 + } 331 331 332 332 return 0; 333 333 } 334 334 335 335 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 336 336 { 337 - struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 338 - pkvm_handle_t handle = kvm->arch.pkvm.handle; 339 - struct pkvm_mapping *mapping; 340 - struct rb_node *node; 341 - 342 - if (!handle) 343 - return; 344 - 345 - node = rb_first(&pgt->pkvm_mappings); 346 - while (node) { 347 - mapping = rb_entry(node, struct pkvm_mapping, node); 348 - kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 349 - node = rb_next(node); 350 - rb_erase(&mapping->node, &pgt->pkvm_mappings); 351 - kfree(mapping); 352 - } 337 + __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); 353 338 } 354 339 355 340 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, ··· 351 360 u64 pfn = phys >> PAGE_SHIFT; 352 361 int ret; 353 362 354 - if (size != PAGE_SIZE) 363 + if (size != PAGE_SIZE && size != PMD_SIZE) 355 364 return -EINVAL; 356 365 357 366 lockdep_assert_held_write(&kvm->mmu_lock); 358 - ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot); 359 - if (ret) { 360 - /* Is the gfn already mapped due to a racing vCPU? */ 361 - if (ret == -EPERM) 367 + 368 + /* 369 + * Calling stage2_map() on top of existing mappings is either happening because of a race 370 + * with another vCPU, or because we're changing between page and block mappings. As per 371 + * user_mem_abort(), same-size permission faults are handled in the relax_perms() path. 372 + */ 373 + mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1); 374 + if (mapping) { 375 + if (size == (mapping->nr_pages * PAGE_SIZE)) 362 376 return -EAGAIN; 377 + 378 + /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */ 379 + ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 380 + if (ret) 381 + return ret; 382 + mapping = NULL; 363 383 } 384 + 385 + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot); 386 + if (WARN_ON(ret)) 387 + return ret; 364 388 365 389 swap(mapping, cache->mapping); 366 390 mapping->gfn = gfn; 367 391 mapping->pfn = pfn; 368 - WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings)); 392 + mapping->nr_pages = size / PAGE_SIZE; 393 + pkvm_mapping_insert(mapping, &pgt->pkvm_mappings); 369 394 370 395 return ret; 371 396 } 372 397 373 398 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 374 399 { 375 - struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 376 - pkvm_handle_t handle = kvm->arch.pkvm.handle; 377 - struct pkvm_mapping *mapping; 378 - int ret = 0; 400 + lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock); 379 401 380 - lockdep_assert_held_write(&kvm->mmu_lock); 381 - for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 382 - ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 383 - if (WARN_ON(ret)) 384 - break; 385 - rb_erase(&mapping->node, &pgt->pkvm_mappings); 386 - kfree(mapping); 387 - } 388 - 389 - return ret; 402 + return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 390 403 } 391 404 392 405 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) ··· 402 407 403 408 lockdep_assert_held(&kvm->mmu_lock); 404 409 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 405 - ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn); 410 + ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn, 411 + mapping->nr_pages); 406 412 if (WARN_ON(ret)) 407 413 break; 408 414 } ··· 418 422 419 423 lockdep_assert_held(&kvm->mmu_lock); 420 424 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 421 - __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE); 425 + __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), 426 + PAGE_SIZE * mapping->nr_pages); 422 427 423 428 return 0; 424 429 } ··· 434 437 lockdep_assert_held(&kvm->mmu_lock); 435 438 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 436 439 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, 437 - mkold); 440 + mapping->nr_pages, mkold); 438 441 439 442 return young; 440 443 }