Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"The ARM changes are largish, but not too scary. And a simple fix for
x86 (bug introduced in 3.19)"

(Paolo sayus these are the "Final" fixes. We'll see).

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: check LAPIC presence when building apic_map
arm/arm64: KVM: Use kernel mapping to perform invalidation on page fault
arm/arm64: KVM: Invalidate data cache on unmap
arm/arm64: KVM: Use set/way op trapping to track the state of the caches

Linus Torvalds 11 years ago 1f59fe76 f3a34041

+330 -178

15 changed files

expand all collapse all

arch

arm

include

asm

kvm_emulate.h

kvm_host.h

kvm_mmu.h

kvm

arm.c

coproc.c

coproc.h

coproc_a15.c

coproc_a7.c

mmu.c

trace.h

arm64

include

asm

kvm_emulate.h

kvm_host.h

kvm_mmu.h

kvm

sys_regs.c

x86

kvm

lapic.c

+10

arch/arm/include/asm/kvm_emulate.h

reviewed

··· 38 38 vcpu->arch.hcr = HCR_GUEST_MASK; 39 39 } 40 40 41 41 + static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) 42 42 + { 43 43 + return vcpu->arch.hcr; 44 44 + } 45 45 + 46 46 + static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) 47 47 + { 48 48 + vcpu->arch.hcr = hcr; 49 49 + } 50 50 + 41 51 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) 42 52 { 43 53 return 1;

-3

arch/arm/include/asm/kvm_host.h

reviewed

··· 125 125 * Anything that is not used directly from assembly code goes 126 126 * here. 127 127 */ 128 128 - /* dcache set/way operation pending */ 129 129 - int last_pcpu; 130 130 - cpumask_t require_dcache_flush; 131 128 132 129 /* Don't run the guest on this vcpu */ 133 130 bool pause;

+67 -10

arch/arm/include/asm/kvm_mmu.h

reviewed

··· 44 44 45 45 #ifndef __ASSEMBLY__ 46 46 47 47 + #include <linux/highmem.h> 47 48 #include <asm/cacheflush.h> 48 49 #include <asm/pgalloc.h> 49 50 ··· 162 161 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; 163 162 } 164 163 165 165 - static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 166 166 - unsigned long size, 167 167 - bool ipa_uncached) 164 164 + static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, 165 165 + unsigned long size, 166 166 + bool ipa_uncached) 168 167 { 169 169 - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) 170 170 - kvm_flush_dcache_to_poc((void *)hva, size); 171 171 - 172 168 /* 173 169 * If we are going to insert an instruction page and the icache is 174 170 * either VIPT or PIPT, there is a potential problem where the host ··· 177 179 * 178 180 * VIVT caches are tagged using both the ASID and the VMID and doesn't 179 181 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 182 182 + * 183 183 + * We need to do this through a kernel mapping (using the 184 184 + * user-space mapping has proved to be the wrong 185 185 + * solution). For that, we need to kmap one page at a time, 186 186 + * and iterate over the range. 180 187 */ 181 181 - if (icache_is_pipt()) { 182 182 - __cpuc_coherent_user_range(hva, hva + size); 183 183 - } else if (!icache_is_vivt_asid_tagged()) { 188 188 + 189 189 + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; 190 190 + 191 191 + VM_BUG_ON(size & PAGE_MASK); 192 192 + 193 193 + if (!need_flush && !icache_is_pipt()) 194 194 + goto vipt_cache; 195 195 + 196 196 + while (size) { 197 197 + void *va = kmap_atomic_pfn(pfn); 198 198 + 199 199 + if (need_flush) 200 200 + kvm_flush_dcache_to_poc(va, PAGE_SIZE); 201 201 + 202 202 + if (icache_is_pipt()) 203 203 + __cpuc_coherent_user_range((unsigned long)va, 204 204 + (unsigned long)va + PAGE_SIZE); 205 205 + 206 206 + size -= PAGE_SIZE; 207 207 + pfn++; 208 208 + 209 209 + kunmap_atomic(va); 210 210 + } 211 211 + 212 212 + vipt_cache: 213 213 + if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { 184 214 /* any kind of VIPT cache */ 185 215 __flush_icache_all(); 186 216 } 187 217 } 188 218 219 219 + static inline void __kvm_flush_dcache_pte(pte_t pte) 220 220 + { 221 221 + void *va = kmap_atomic(pte_page(pte)); 222 222 + 223 223 + kvm_flush_dcache_to_poc(va, PAGE_SIZE); 224 224 + 225 225 + kunmap_atomic(va); 226 226 + } 227 227 + 228 228 + static inline void __kvm_flush_dcache_pmd(pmd_t pmd) 229 229 + { 230 230 + unsigned long size = PMD_SIZE; 231 231 + pfn_t pfn = pmd_pfn(pmd); 232 232 + 233 233 + while (size) { 234 234 + void *va = kmap_atomic_pfn(pfn); 235 235 + 236 236 + kvm_flush_dcache_to_poc(va, PAGE_SIZE); 237 237 + 238 238 + pfn++; 239 239 + size -= PAGE_SIZE; 240 240 + 241 241 + kunmap_atomic(va); 242 242 + } 243 243 + } 244 244 + 245 245 + static inline void __kvm_flush_dcache_pud(pud_t pud) 246 246 + { 247 247 + } 248 248 + 189 249 #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) 190 250 191 191 - void stage2_flush_vm(struct kvm *kvm); 251 251 + void kvm_set_way_flush(struct kvm_vcpu *vcpu); 252 252 + void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); 192 253 193 254 #endif /* !__ASSEMBLY__ */ 194 255

-10

arch/arm/kvm/arm.c

reviewed

··· 281 281 vcpu->cpu = cpu; 282 282 vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); 283 283 284 284 - /* 285 285 - * Check whether this vcpu requires the cache to be flushed on 286 286 - * this physical CPU. This is a consequence of doing dcache 287 287 - * operations by set/way on this vcpu. We do it here to be in 288 288 - * a non-preemptible section. 289 289 - */ 290 290 - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) 291 291 - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ 292 292 - 293 284 kvm_arm_set_running_vcpu(vcpu); 294 285 } 295 286 ··· 532 541 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); 533 542 534 543 vcpu->mode = OUTSIDE_GUEST_MODE; 535 535 - vcpu->arch.last_pcpu = smp_processor_id(); 536 544 kvm_guest_exit(); 537 545 trace_kvm_exit(*vcpu_pc(vcpu)); 538 546 /*

+14 -56

arch/arm/kvm/coproc.c

reviewed

··· 189 189 return true; 190 190 } 191 191 192 192 - /* See note at ARM ARM B1.14.4 */ 192 192 + /* 193 193 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). 194 194 + */ 193 195 static bool access_dcsw(struct kvm_vcpu *vcpu, 194 196 const struct coproc_params *p, 195 197 const struct coproc_reg *r) 196 198 { 197 197 - unsigned long val; 198 198 - int cpu; 199 199 - 200 199 if (!p->is_write) 201 200 return read_from_write_only(vcpu, p); 202 201 203 203 - cpu = get_cpu(); 204 204 - 205 205 - cpumask_setall(&vcpu->arch.require_dcache_flush); 206 206 - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); 207 207 - 208 208 - /* If we were already preempted, take the long way around */ 209 209 - if (cpu != vcpu->arch.last_pcpu) { 210 210 - flush_cache_all(); 211 211 - goto done; 212 212 - } 213 213 - 214 214 - val = *vcpu_reg(vcpu, p->Rt1); 215 215 - 216 216 - switch (p->CRm) { 217 217 - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ 218 218 - case 14: /* DCCISW */ 219 219 - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val)); 220 220 - break; 221 221 - 222 222 - case 10: /* DCCSW */ 223 223 - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val)); 224 224 - break; 225 225 - } 226 226 - 227 227 - done: 228 228 - put_cpu(); 229 229 - 202 202 + kvm_set_way_flush(vcpu); 230 203 return true; 231 204 } 232 205 233 206 /* 234 207 * Generic accessor for VM registers. Only called as long as HCR_TVM 235 235 - * is set. 208 208 + * is set. If the guest enables the MMU, we stop trapping the VM 209 209 + * sys_regs and leave it in complete control of the caches. 210 210 + * 211 211 + * Used by the cpu-specific code. 236 212 */ 237 237 - static bool access_vm_reg(struct kvm_vcpu *vcpu, 238 238 - const struct coproc_params *p, 239 239 - const struct coproc_reg *r) 213 213 + bool access_vm_reg(struct kvm_vcpu *vcpu, 214 214 + const struct coproc_params *p, 215 215 + const struct coproc_reg *r) 240 216 { 217 217 + bool was_enabled = vcpu_has_cache_enabled(vcpu); 218 218 + 241 219 BUG_ON(!p->is_write); 242 220 243 221 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); 244 222 if (p->is_64bit) 245 223 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); 246 224 247 247 - return true; 248 248 - } 249 249 - 250 250 - /* 251 251 - * SCTLR accessor. Only called as long as HCR_TVM is set. If the 252 252 - * guest enables the MMU, we stop trapping the VM sys_regs and leave 253 253 - * it in complete control of the caches. 254 254 - * 255 255 - * Used by the cpu-specific code. 256 256 - */ 257 257 - bool access_sctlr(struct kvm_vcpu *vcpu, 258 258 - const struct coproc_params *p, 259 259 - const struct coproc_reg *r) 260 260 - { 261 261 - access_vm_reg(vcpu, p, r); 262 262 - 263 263 - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ 264 264 - vcpu->arch.hcr &= ~HCR_TVM; 265 265 - stage2_flush_vm(vcpu->kvm); 266 266 - } 267 267 - 225 225 + kvm_toggle_cache(vcpu, was_enabled); 268 226 return true; 269 227 } 270 228

+3 -3

arch/arm/kvm/coproc.h

reviewed

··· 153 153 #define is64 .is_64 = true 154 154 #define is32 .is_64 = false 155 155 156 156 - bool access_sctlr(struct kvm_vcpu *vcpu, 157 157 - const struct coproc_params *p, 158 158 - const struct coproc_reg *r); 156 156 + bool access_vm_reg(struct kvm_vcpu *vcpu, 157 157 + const struct coproc_params *p, 158 158 + const struct coproc_reg *r); 159 159 160 160 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */

+1 -1

arch/arm/kvm/coproc_a15.c

reviewed

··· 34 34 static const struct coproc_reg a15_regs[] = { 35 35 /* SCTLR: swapped by interrupt.S. */ 36 36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 37 37 - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, 37 37 + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 }, 38 38 }; 39 39 40 40 static struct kvm_coproc_target_table a15_target_table = {

+1 -1

arch/arm/kvm/coproc_a7.c

reviewed

··· 37 37 static const struct coproc_reg a7_regs[] = { 38 38 /* SCTLR: swapped by interrupt.S. */ 39 39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 40 40 - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, 40 40 + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 }, 41 41 }; 42 42 43 43 static struct kvm_coproc_target_table a7_target_table = {

+144 -20

arch/arm/kvm/mmu.c

reviewed

··· 58 58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 59 59 } 60 60 61 61 + /* 62 62 + * D-Cache management functions. They take the page table entries by 63 63 + * value, as they are flushing the cache using the kernel mapping (or 64 64 + * kmap on 32bit). 65 65 + */ 66 66 + static void kvm_flush_dcache_pte(pte_t pte) 67 67 + { 68 68 + __kvm_flush_dcache_pte(pte); 69 69 + } 70 70 + 71 71 + static void kvm_flush_dcache_pmd(pmd_t pmd) 72 72 + { 73 73 + __kvm_flush_dcache_pmd(pmd); 74 74 + } 75 75 + 76 76 + static void kvm_flush_dcache_pud(pud_t pud) 77 77 + { 78 78 + __kvm_flush_dcache_pud(pud); 79 79 + } 80 80 + 61 81 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 62 82 int min, int max) 63 83 { ··· 139 119 put_page(virt_to_page(pmd)); 140 120 } 141 121 122 122 + /* 123 123 + * Unmapping vs dcache management: 124 124 + * 125 125 + * If a guest maps certain memory pages as uncached, all writes will 126 126 + * bypass the data cache and go directly to RAM. However, the CPUs 127 127 + * can still speculate reads (not writes) and fill cache lines with 128 128 + * data. 129 129 + * 130 130 + * Those cache lines will be *clean* cache lines though, so a 131 131 + * clean+invalidate operation is equivalent to an invalidate 132 132 + * operation, because no cache lines are marked dirty. 133 133 + * 134 134 + * Those clean cache lines could be filled prior to an uncached write 135 135 + * by the guest, and the cache coherent IO subsystem would therefore 136 136 + * end up writing old data to disk. 137 137 + * 138 138 + * This is why right after unmapping a page/section and invalidating 139 139 + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 140 140 + * the IO subsystem will never hit in the cache. 141 141 + */ 142 142 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 143 143 phys_addr_t addr, phys_addr_t end) 144 144 { ··· 168 128 start_pte = pte = pte_offset_kernel(pmd, addr); 169 129 do { 170 130 if (!pte_none(*pte)) { 131 131 + pte_t old_pte = *pte; 132 132 + 171 133 kvm_set_pte(pte, __pte(0)); 172 172 - put_page(virt_to_page(pte)); 173 134 kvm_tlb_flush_vmid_ipa(kvm, addr); 135 135 + 136 136 + /* No need to invalidate the cache for device mappings */ 137 137 + if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) 138 138 + kvm_flush_dcache_pte(old_pte); 139 139 + 140 140 + put_page(virt_to_page(pte)); 174 141 } 175 142 } while (pte++, addr += PAGE_SIZE, addr != end); 176 143 ··· 196 149 next = kvm_pmd_addr_end(addr, end); 197 150 if (!pmd_none(*pmd)) { 198 151 if (kvm_pmd_huge(*pmd)) { 152 152 + pmd_t old_pmd = *pmd; 153 153 + 199 154 pmd_clear(pmd); 200 155 kvm_tlb_flush_vmid_ipa(kvm, addr); 156 156 + 157 157 + kvm_flush_dcache_pmd(old_pmd); 158 158 + 201 159 put_page(virt_to_page(pmd)); 202 160 } else { 203 161 unmap_ptes(kvm, pmd, addr, next); ··· 225 173 next = kvm_pud_addr_end(addr, end); 226 174 if (!pud_none(*pud)) { 227 175 if (pud_huge(*pud)) { 176 176 + pud_t old_pud = *pud; 177 177 + 228 178 pud_clear(pud); 229 179 kvm_tlb_flush_vmid_ipa(kvm, addr); 180 180 + 181 181 + kvm_flush_dcache_pud(old_pud); 182 182 + 230 183 put_page(virt_to_page(pud)); 231 184 } else { 232 185 unmap_pmds(kvm, pud, addr, next); ··· 266 209 267 210 pte = pte_offset_kernel(pmd, addr); 268 211 do { 269 269 - if (!pte_none(*pte)) { 270 270 - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 271 271 - kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); 272 272 - } 212 212 + if (!pte_none(*pte) && 213 213 + (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) 214 214 + kvm_flush_dcache_pte(*pte); 273 215 } while (pte++, addr += PAGE_SIZE, addr != end); 274 216 } 275 217 ··· 282 226 do { 283 227 next = kvm_pmd_addr_end(addr, end); 284 228 if (!pmd_none(*pmd)) { 285 285 - if (kvm_pmd_huge(*pmd)) { 286 286 - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 287 287 - kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); 288 288 - } else { 229 229 + if (kvm_pmd_huge(*pmd)) 230 230 + kvm_flush_dcache_pmd(*pmd); 231 231 + else 289 232 stage2_flush_ptes(kvm, pmd, addr, next); 290 290 - } 291 233 } 292 234 } while (pmd++, addr = next, addr != end); 293 235 } ··· 300 246 do { 301 247 next = kvm_pud_addr_end(addr, end); 302 248 if (!pud_none(*pud)) { 303 303 - if (pud_huge(*pud)) { 304 304 - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); 305 305 - kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); 306 306 - } else { 249 249 + if (pud_huge(*pud)) 250 250 + kvm_flush_dcache_pud(*pud); 251 251 + else 307 252 stage2_flush_pmds(kvm, pud, addr, next); 308 308 - } 309 253 } 310 254 } while (pud++, addr = next, addr != end); 311 255 } ··· 330 278 * Go through the stage 2 page tables and invalidate any cache lines 331 279 * backing memory already mapped to the VM. 332 280 */ 333 333 - void stage2_flush_vm(struct kvm *kvm) 281 281 + static void stage2_flush_vm(struct kvm *kvm) 334 282 { 335 283 struct kvm_memslots *slots; 336 284 struct kvm_memory_slot *memslot; ··· 957 905 return !pfn_valid(pfn); 958 906 } 959 907 908 908 + static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, 909 909 + unsigned long size, bool uncached) 910 910 + { 911 911 + __coherent_cache_guest_page(vcpu, pfn, size, uncached); 912 912 + } 913 913 + 960 914 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 961 915 struct kvm_memory_slot *memslot, unsigned long hva, 962 916 unsigned long fault_status) ··· 1052 994 kvm_set_s2pmd_writable(&new_pmd); 1053 995 kvm_set_pfn_dirty(pfn); 1054 996 } 1055 1055 - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, 1056 1056 - fault_ipa_uncached); 997 997 + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); 1057 998 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1058 999 } else { 1059 1000 pte_t new_pte = pfn_pte(pfn, mem_type); ··· 1060 1003 kvm_set_s2pte_writable(&new_pte); 1061 1004 kvm_set_pfn_dirty(pfn); 1062 1005 } 1063 1063 - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, 1064 1064 - fault_ipa_uncached); 1006 1006 + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); 1065 1007 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1066 1008 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 1067 1009 } ··· 1466 1410 spin_lock(&kvm->mmu_lock); 1467 1411 unmap_stage2_range(kvm, gpa, size); 1468 1412 spin_unlock(&kvm->mmu_lock); 1413 1413 + } 1414 1414 + 1415 1415 + /* 1416 1416 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). 1417 1417 + * 1418 1418 + * Main problems: 1419 1419 + * - S/W ops are local to a CPU (not broadcast) 1420 1420 + * - We have line migration behind our back (speculation) 1421 1421 + * - System caches don't support S/W at all (damn!) 1422 1422 + * 1423 1423 + * In the face of the above, the best we can do is to try and convert 1424 1424 + * S/W ops to VA ops. Because the guest is not allowed to infer the 1425 1425 + * S/W to PA mapping, it can only use S/W to nuke the whole cache, 1426 1426 + * which is a rather good thing for us. 1427 1427 + * 1428 1428 + * Also, it is only used when turning caches on/off ("The expected 1429 1429 + * usage of the cache maintenance instructions that operate by set/way 1430 1430 + * is associated with the cache maintenance instructions associated 1431 1431 + * with the powerdown and powerup of caches, if this is required by 1432 1432 + * the implementation."). 1433 1433 + * 1434 1434 + * We use the following policy: 1435 1435 + * 1436 1436 + * - If we trap a S/W operation, we enable VM trapping to detect 1437 1437 + * caches being turned on/off, and do a full clean. 1438 1438 + * 1439 1439 + * - We flush the caches on both caches being turned on and off. 1440 1440 + * 1441 1441 + * - Once the caches are enabled, we stop trapping VM ops. 1442 1442 + */ 1443 1443 + void kvm_set_way_flush(struct kvm_vcpu *vcpu) 1444 1444 + { 1445 1445 + unsigned long hcr = vcpu_get_hcr(vcpu); 1446 1446 + 1447 1447 + /* 1448 1448 + * If this is the first time we do a S/W operation 1449 1449 + * (i.e. HCR_TVM not set) flush the whole memory, and set the 1450 1450 + * VM trapping. 1451 1451 + * 1452 1452 + * Otherwise, rely on the VM trapping to wait for the MMU + 1453 1453 + * Caches to be turned off. At that point, we'll be able to 1454 1454 + * clean the caches again. 1455 1455 + */ 1456 1456 + if (!(hcr & HCR_TVM)) { 1457 1457 + trace_kvm_set_way_flush(*vcpu_pc(vcpu), 1458 1458 + vcpu_has_cache_enabled(vcpu)); 1459 1459 + stage2_flush_vm(vcpu->kvm); 1460 1460 + vcpu_set_hcr(vcpu, hcr | HCR_TVM); 1461 1461 + } 1462 1462 + } 1463 1463 + 1464 1464 + void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) 1465 1465 + { 1466 1466 + bool now_enabled = vcpu_has_cache_enabled(vcpu); 1467 1467 + 1468 1468 + /* 1469 1469 + * If switching the MMU+caches on, need to invalidate the caches. 1470 1470 + * If switching it off, need to clean the caches. 1471 1471 + * Clean + invalidate does the trick always. 1472 1472 + */ 1473 1473 + if (now_enabled != was_enabled) 1474 1474 + stage2_flush_vm(vcpu->kvm); 1475 1475 + 1476 1476 + /* Caches are now on, stop trapping VM ops (until a S/W op) */ 1477 1477 + if (now_enabled) 1478 1478 + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); 1479 1479 + 1480 1480 + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); 1469 1481 }

+39

arch/arm/kvm/trace.h

reviewed

··· 223 223 __entry->vcpu_pc, __entry->r0, __entry->imm) 224 224 ); 225 225 226 226 + TRACE_EVENT(kvm_set_way_flush, 227 227 + TP_PROTO(unsigned long vcpu_pc, bool cache), 228 228 + TP_ARGS(vcpu_pc, cache), 229 229 + 230 230 + TP_STRUCT__entry( 231 231 + __field( unsigned long, vcpu_pc ) 232 232 + __field( bool, cache ) 233 233 + ), 234 234 + 235 235 + TP_fast_assign( 236 236 + __entry->vcpu_pc = vcpu_pc; 237 237 + __entry->cache = cache; 238 238 + ), 239 239 + 240 240 + TP_printk("S/W flush at 0x%016lx (cache %s)", 241 241 + __entry->vcpu_pc, __entry->cache ? "on" : "off") 242 242 + ); 243 243 + 244 244 + TRACE_EVENT(kvm_toggle_cache, 245 245 + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), 246 246 + TP_ARGS(vcpu_pc, was, now), 247 247 + 248 248 + TP_STRUCT__entry( 249 249 + __field( unsigned long, vcpu_pc ) 250 250 + __field( bool, was ) 251 251 + __field( bool, now ) 252 252 + ), 253 253 + 254 254 + TP_fast_assign( 255 255 + __entry->vcpu_pc = vcpu_pc; 256 256 + __entry->was = was; 257 257 + __entry->now = now; 258 258 + ), 259 259 + 260 260 + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", 261 261 + __entry->vcpu_pc, __entry->was ? "on" : "off", 262 262 + __entry->now ? "on" : "off") 263 263 + ); 264 264 + 226 265 #endif /* _TRACE_KVM_H */ 227 266 228 267 #undef TRACE_INCLUDE_PATH

+10

arch/arm64/include/asm/kvm_emulate.h

reviewed

··· 45 45 vcpu->arch.hcr_el2 &= ~HCR_RW; 46 46 } 47 47 48 48 + static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) 49 49 + { 50 50 + return vcpu->arch.hcr_el2; 51 51 + } 52 52 + 53 53 + static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) 54 54 + { 55 55 + vcpu->arch.hcr_el2 = hcr; 56 56 + } 57 57 + 48 58 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 49 59 { 50 60 return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;

-3

arch/arm64/include/asm/kvm_host.h

reviewed

··· 116 116 * Anything that is not used directly from assembly code goes 117 117 * here. 118 118 */ 119 119 - /* dcache set/way operation pending */ 120 120 - int last_pcpu; 121 121 - cpumask_t require_dcache_flush; 122 119 123 120 /* Don't run the guest */ 124 121 bool pause;

+28 -6

arch/arm64/include/asm/kvm_mmu.h

reviewed

··· 243 243 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; 244 244 } 245 245 246 246 - static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, 247 247 - unsigned long size, 248 248 - bool ipa_uncached) 246 246 + static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, 247 247 + unsigned long size, 248 248 + bool ipa_uncached) 249 249 { 250 250 + void *va = page_address(pfn_to_page(pfn)); 251 251 + 250 252 if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) 251 251 - kvm_flush_dcache_to_poc((void *)hva, size); 253 253 + kvm_flush_dcache_to_poc(va, size); 252 254 253 255 if (!icache_is_aliasing()) { /* PIPT */ 254 254 - flush_icache_range(hva, hva + size); 256 256 + flush_icache_range((unsigned long)va, 257 257 + (unsigned long)va + size); 255 258 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 256 259 /* any kind of VIPT cache */ 257 260 __flush_icache_all(); 258 261 } 259 262 } 260 263 264 264 + static inline void __kvm_flush_dcache_pte(pte_t pte) 265 265 + { 266 266 + struct page *page = pte_page(pte); 267 267 + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); 268 268 + } 269 269 + 270 270 + static inline void __kvm_flush_dcache_pmd(pmd_t pmd) 271 271 + { 272 272 + struct page *page = pmd_page(pmd); 273 273 + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); 274 274 + } 275 275 + 276 276 + static inline void __kvm_flush_dcache_pud(pud_t pud) 277 277 + { 278 278 + struct page *page = pud_page(pud); 279 279 + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); 280 280 + } 281 281 + 261 282 #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) 262 283 263 263 - void stage2_flush_vm(struct kvm *kvm); 284 284 + void kvm_set_way_flush(struct kvm_vcpu *vcpu); 285 285 + void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); 264 286 265 287 #endif /* __ASSEMBLY__ */ 266 288 #endif /* __ARM64_KVM_MMU_H__ */

+10 -65

arch/arm64/kvm/sys_regs.c

reviewed

··· 69 69 return ccsidr; 70 70 } 71 71 72 72 - static void do_dc_cisw(u32 val) 73 73 - { 74 74 - asm volatile("dc cisw, %x0" : : "r" (val)); 75 75 - dsb(ish); 76 76 - } 77 77 - 78 78 - static void do_dc_csw(u32 val) 79 79 - { 80 80 - asm volatile("dc csw, %x0" : : "r" (val)); 81 81 - dsb(ish); 82 82 - } 83 83 - 84 84 - /* See note at ARM ARM B1.14.4 */ 72 72 + /* 73 73 + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). 74 74 + */ 85 75 static bool access_dcsw(struct kvm_vcpu *vcpu, 86 76 const struct sys_reg_params *p, 87 77 const struct sys_reg_desc *r) 88 78 { 89 89 - unsigned long val; 90 90 - int cpu; 91 91 - 92 79 if (!p->is_write) 93 80 return read_from_write_only(vcpu, p); 94 81 95 95 - cpu = get_cpu(); 96 96 - 97 97 - cpumask_setall(&vcpu->arch.require_dcache_flush); 98 98 - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); 99 99 - 100 100 - /* If we were already preempted, take the long way around */ 101 101 - if (cpu != vcpu->arch.last_pcpu) { 102 102 - flush_cache_all(); 103 103 - goto done; 104 104 - } 105 105 - 106 106 - val = *vcpu_reg(vcpu, p->Rt); 107 107 - 108 108 - switch (p->CRm) { 109 109 - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ 110 110 - case 14: /* DCCISW */ 111 111 - do_dc_cisw(val); 112 112 - break; 113 113 - 114 114 - case 10: /* DCCSW */ 115 115 - do_dc_csw(val); 116 116 - break; 117 117 - } 118 118 - 119 119 - done: 120 120 - put_cpu(); 121 121 - 82 82 + kvm_set_way_flush(vcpu); 122 83 return true; 123 84 } 124 85 125 86 /* 126 87 * Generic accessor for VM registers. Only called as long as HCR_TVM 127 127 - * is set. 88 88 + * is set. If the guest enables the MMU, we stop trapping the VM 89 89 + * sys_regs and leave it in complete control of the caches. 128 90 */ 129 91 static bool access_vm_reg(struct kvm_vcpu *vcpu, 130 92 const struct sys_reg_params *p, 131 93 const struct sys_reg_desc *r) 132 94 { 133 95 unsigned long val; 96 96 + bool was_enabled = vcpu_has_cache_enabled(vcpu); 134 97 135 98 BUG_ON(!p->is_write); 136 99 ··· 106 143 vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; 107 144 } 108 145 109 109 - return true; 110 110 - } 111 111 - 112 112 - /* 113 113 - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the 114 114 - * guest enables the MMU, we stop trapping the VM sys_regs and leave 115 115 - * it in complete control of the caches. 116 116 - */ 117 117 - static bool access_sctlr(struct kvm_vcpu *vcpu, 118 118 - const struct sys_reg_params *p, 119 119 - const struct sys_reg_desc *r) 120 120 - { 121 121 - access_vm_reg(vcpu, p, r); 122 122 - 123 123 - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ 124 124 - vcpu->arch.hcr_el2 &= ~HCR_TVM; 125 125 - stage2_flush_vm(vcpu->kvm); 126 126 - } 127 127 - 146 146 + kvm_toggle_cache(vcpu, was_enabled); 128 147 return true; 129 148 } 130 149 ··· 322 377 NULL, reset_mpidr, MPIDR_EL1 }, 323 378 /* SCTLR_EL1 */ 324 379 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), 325 325 - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, 380 380 + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, 326 381 /* CPACR_EL1 */ 327 382 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), 328 383 NULL, reset_val, CPACR_EL1, 0 }, ··· 602 657 * register). 603 658 */ 604 659 static const struct sys_reg_desc cp15_regs[] = { 605 605 - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, 660 660 + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, 606 661 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 607 662 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, 608 663 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },

arch/x86/kvm/lapic.c

reviewed

··· 192 192 u16 cid, lid; 193 193 u32 ldr, aid; 194 194 195 195 + if (!kvm_apic_present(vcpu)) 196 196 + continue; 197 197 + 195 198 aid = kvm_apic_id(apic); 196 199 ldr = kvm_apic_get_reg(apic, APIC_LDR); 197 200 cid = apic_cluster_id(new, ldr);