Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"PPC:

- Fix guest time accounting in the host

- Fix large-page backing for radix guests on POWER9

- Fix HPT guests on POWER9 backed by 2M or 1G pages

- Compile fixes for some configs and gcc versions

s390:

- Fix random memory corruption when running as guest2 (e.g. KVM in
LPAR) and starting guest3 (e.g. nested KVM) with many CPUs

- Export forgotten io interrupt delivery statistics counter"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: s390: fix memory overwrites when not using SCA entries
KVM: PPC: Book3S HV: Fix guest time accounting with VIRT_CPU_ACCOUNTING_GEN
KVM: PPC: Book3S HV: Fix VRMA initialization with 2MB or 1GB memory backing
KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler
KVM: s390: provide io interrupt kvm_stat
KVM: PPC: Book3S: Fix compile error that occurs with some gcc versions
KVM: PPC: Fix compile error that occurs when CONFIG_ALTIVEC=n

+57 -35
+43 -26
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 195 195 kmem_cache_free(kvm_pte_cache, ptep); 196 196 } 197 197 198 + /* Like pmd_huge() and pmd_large(), but works regardless of config options */ 199 + static inline int pmd_is_leaf(pmd_t pmd) 200 + { 201 + return !!(pmd_val(pmd) & _PAGE_PTE); 202 + } 203 + 198 204 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, 199 205 unsigned int level, unsigned long mmu_seq) 200 206 { ··· 225 219 else 226 220 new_pmd = pmd_alloc_one(kvm->mm, gpa); 227 221 228 - if (level == 0 && !(pmd && pmd_present(*pmd))) 222 + if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) 229 223 new_ptep = kvmppc_pte_alloc(); 230 224 231 225 /* Check if we might have been invalidated; let the guest retry if so */ ··· 250 244 new_pmd = NULL; 251 245 } 252 246 pmd = pmd_offset(pud, gpa); 253 - if (pmd_large(*pmd)) { 254 - /* Someone else has instantiated a large page here; retry */ 255 - ret = -EAGAIN; 256 - goto out_unlock; 257 - } 258 - if (level == 1 && !pmd_none(*pmd)) { 247 + if (pmd_is_leaf(*pmd)) { 248 + unsigned long lgpa = gpa & PMD_MASK; 249 + 250 + /* 251 + * If we raced with another CPU which has just put 252 + * a 2MB pte in after we saw a pte page, try again. 253 + */ 254 + if (level == 0 && !new_ptep) { 255 + ret = -EAGAIN; 256 + goto out_unlock; 257 + } 258 + /* Valid 2MB page here already, remove it */ 259 + old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), 260 + ~0UL, 0, lgpa, PMD_SHIFT); 261 + kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT); 262 + if (old & _PAGE_DIRTY) { 263 + unsigned long gfn = lgpa >> PAGE_SHIFT; 264 + struct kvm_memory_slot *memslot; 265 + memslot = gfn_to_memslot(kvm, gfn); 266 + if (memslot && memslot->dirty_bitmap) 267 + kvmppc_update_dirty_map(memslot, 268 + gfn, PMD_SIZE); 269 + } 270 + } else if (level == 1 && !pmd_none(*pmd)) { 259 271 /* 260 272 * There's a page table page here, but we wanted 261 273 * to install a large page. Tell the caller and let ··· 436 412 } else { 437 413 page = pages[0]; 438 414 pfn = page_to_pfn(page); 439 - if (PageHuge(page)) { 440 - page = compound_head(page); 441 - pte_size <<= compound_order(page); 415 + if (PageCompound(page)) { 416 + pte_size <<= compound_order(compound_head(page)); 442 417 /* See if we can insert a 2MB large-page PTE here */ 443 418 if (pte_size >= PMD_SIZE && 444 - (gpa & PMD_MASK & PAGE_MASK) == 445 - (hva & PMD_MASK & PAGE_MASK)) { 419 + (gpa & (PMD_SIZE - PAGE_SIZE)) == 420 + (hva & (PMD_SIZE - PAGE_SIZE))) { 446 421 level = 1; 447 422 pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); 448 423 } 449 424 } 450 425 /* See if we can provide write access */ 451 426 if (writing) { 452 - /* 453 - * We assume gup_fast has set dirty on the host PTE. 454 - */ 455 427 pgflags |= _PAGE_WRITE; 456 428 } else { 457 429 local_irq_save(flags); 458 430 ptep = find_current_mm_pte(current->mm->pgd, 459 431 hva, NULL, NULL); 460 - if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) 432 + if (ptep && pte_write(*ptep)) 461 433 pgflags |= _PAGE_WRITE; 462 434 local_irq_restore(flags); 463 435 } ··· 479 459 pte = pfn_pte(pfn, __pgprot(pgflags)); 480 460 ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 481 461 } 482 - if (ret == 0 || ret == -EAGAIN) 483 - ret = RESUME_GUEST; 484 462 485 463 if (page) { 486 - /* 487 - * We drop pages[0] here, not page because page might 488 - * have been set to the head page of a compound, but 489 - * we have to drop the reference on the correct tail 490 - * page to match the get inside gup() 491 - */ 492 - put_page(pages[0]); 464 + if (!ret && (pgflags & _PAGE_WRITE)) 465 + set_page_dirty_lock(page); 466 + put_page(page); 493 467 } 468 + 469 + if (ret == 0 || ret == -EAGAIN) 470 + ret = RESUME_GUEST; 494 471 return ret; 495 472 } 496 473 ··· 661 644 continue; 662 645 pmd = pmd_offset(pud, 0); 663 646 for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { 664 - if (pmd_huge(*pmd)) { 647 + if (pmd_is_leaf(*pmd)) { 665 648 pmd_clear(pmd); 666 649 continue; 667 650 }
+9 -8
arch/powerpc/kvm/book3s_hv.c
··· 2885 2885 */ 2886 2886 trace_hardirqs_on(); 2887 2887 2888 - guest_enter(); 2888 + guest_enter_irqoff(); 2889 2889 2890 2890 srcu_idx = srcu_read_lock(&vc->kvm->srcu); 2891 2891 2892 2892 trap = __kvmppc_vcore_entry(); 2893 2893 2894 2894 srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2895 - 2896 - guest_exit(); 2897 2895 2898 2896 trace_hardirqs_off(); 2899 2897 set_irq_happened(trap); ··· 2935 2937 kvmppc_set_host_core(pcpu); 2936 2938 2937 2939 local_irq_enable(); 2940 + guest_exit(); 2938 2941 2939 2942 /* Let secondaries go back to the offline loop */ 2940 2943 for (i = 0; i < controlled_threads; ++i) { ··· 3655 3656 goto up_out; 3656 3657 3657 3658 psize = vma_kernel_pagesize(vma); 3658 - porder = __ilog2(psize); 3659 3659 3660 3660 up_read(&current->mm->mmap_sem); 3661 3661 3662 3662 /* We can handle 4k, 64k or 16M pages in the VRMA */ 3663 - err = -EINVAL; 3664 - if (!(psize == 0x1000 || psize == 0x10000 || 3665 - psize == 0x1000000)) 3666 - goto out_srcu; 3663 + if (psize >= 0x1000000) 3664 + psize = 0x1000000; 3665 + else if (psize >= 0x10000) 3666 + psize = 0x10000; 3667 + else 3668 + psize = 0x1000; 3669 + porder = __ilog2(psize); 3667 3670 3668 3671 senc = slb_pgsize_encoding(psize); 3669 3672 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+3 -1
arch/powerpc/kvm/powerpc.c
··· 1345 1345 int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, 1346 1346 unsigned int rt, int is_default_endian) 1347 1347 { 1348 - enum emulation_result emulated; 1348 + enum emulation_result emulated = EMULATE_DONE; 1349 1349 1350 1350 while (vcpu->arch.mmio_vmx_copy_nums) { 1351 1351 emulated = __kvmppc_handle_load(run, vcpu, rt, 8, ··· 1608 1608 1609 1609 kvm_sigset_deactivate(vcpu); 1610 1610 1611 + #ifdef CONFIG_ALTIVEC 1611 1612 out: 1613 + #endif 1612 1614 vcpu_put(vcpu); 1613 1615 return r; 1614 1616 }
+2
arch/s390/kvm/kvm-s390.c
··· 86 86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 87 87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 88 88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 89 + { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) }, 89 90 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 90 91 { "instruction_epsw", VCPU_STAT(instruction_epsw) }, 91 92 { "instruction_gs", VCPU_STAT(instruction_gs) }, ··· 2147 2146 /* we still need the basic sca for the ipte control */ 2148 2147 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 2149 2148 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 2149 + return; 2150 2150 } 2151 2151 read_lock(&vcpu->kvm->arch.sca_lock); 2152 2152 if (vcpu->kvm->arch.use_esca) {