Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Radim Krčmář:
"Second batch of KVM changes for the 4.11 merge window:

PPC:
- correct assumption about ASDR on POWER9
- fix MMIO emulation on POWER9

x86:
- add a simple test for ioperm
- cleanup TSS (going through KVM tree as the whole undertaking was
caused by VMX's use of TSS)
- fix nVMX interrupt delivery
- fix some performance counters in the guest

... and two cleanup patches"

* tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: nVMX: Fix pending events injection
x86/kvm/vmx: remove unused variable in segment_base()
selftests/x86: Add a basic selftest for ioperm
x86/asm: Tidy up TSS limit code
kvm: convert kvm.users_count from atomic_t to refcount_t
KVM: x86: never specify a sample period for virtualized in_tx_cp counters
KVM: PPC: Book3S HV: Don't use ASDR for real-mode HPT faults on POWER9
KVM: PPC: Book3S HV: Fix software walk of guest process page tables

+223 -30
+2 -1
arch/powerpc/include/asm/book3s/64/mmu.h
··· 46 46 47 47 /* Bits in patb0 field */ 48 48 #define PATB_HR (1UL << 63) 49 - #define RPDB_MASK 0x0ffffffffffff00fUL 49 + #define RPDB_MASK 0x0fffffffffffff00UL 50 50 #define RPDB_SHIFT (1UL << 8) 51 51 #define RTS1_SHIFT 61 /* top 2 bits of radix tree size */ 52 52 #define RTS1_MASK (3UL << RTS1_SHIFT) ··· 57 57 /* Bits in patb1 field */ 58 58 #define PATB_GR (1UL << 63) /* guest uses radix; must match HR */ 59 59 #define PRTS_MASK 0x1f /* process table size field */ 60 + #define PRTB_MASK 0x0ffffffffffff000UL 60 61 61 62 /* 62 63 * Limit process table to PAGE_SIZE table. This
+3 -2
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 32 32 u32 pid; 33 33 int ret, level, ps; 34 34 __be64 prte, rpte; 35 + unsigned long ptbl; 35 36 unsigned long root, pte, index; 36 37 unsigned long rts, bits, offset; 37 38 unsigned long gpa; ··· 54 53 return -EINVAL; 55 54 56 55 /* Read partition table to find root of tree for effective PID */ 57 - ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16, 58 - &prte, sizeof(prte)); 56 + ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16); 57 + ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte)); 59 58 if (ret) 60 59 return ret; 61 60
+4 -4
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 1787 1787 /* HPTE not found fault or protection fault? */ 1788 1788 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h 1789 1789 beq 1f /* if not, send it to the guest */ 1790 + andi. r0, r11, MSR_DR /* data relocation enabled? */ 1791 + beq 3f 1790 1792 BEGIN_FTR_SECTION 1791 1793 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 1792 1794 b 4f 1793 1795 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1794 - andi. r0, r11, MSR_DR /* data relocation enabled? */ 1795 - beq 3f 1796 1796 clrrdi r0, r4, 28 1797 1797 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ 1798 1798 li r0, BOOK3S_INTERRUPT_DATA_SEGMENT ··· 1879 1879 bne .Lradix_hisi /* for radix, just save ASDR */ 1880 1880 andis. r0, r11, SRR1_ISI_NOPT@h 1881 1881 beq 1f 1882 + andi. r0, r11, MSR_IR /* instruction relocation enabled? */ 1883 + beq 3f 1882 1884 BEGIN_FTR_SECTION 1883 1885 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 1884 1886 b 4f 1885 1887 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1886 - andi. r0, r11, MSR_IR /* instruction relocation enabled? */ 1887 - beq 3f 1888 1888 clrrdi r0, r10, 28 1889 1889 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ 1890 1890 li r0, BOOK3S_INTERRUPT_INST_SEGMENT
+11 -7
arch/x86/include/asm/desc.h
··· 205 205 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); 206 206 } 207 207 208 + DECLARE_PER_CPU(bool, __tss_limit_invalid); 209 + 208 210 static inline void force_reload_TR(void) 209 211 { 210 212 struct desc_struct *d = get_cpu_gdt_table(smp_processor_id()); ··· 222 220 write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); 223 221 224 222 load_TR_desc(); 223 + this_cpu_write(__tss_limit_invalid, false); 225 224 } 226 225 227 - DECLARE_PER_CPU(bool, need_tr_refresh); 228 - 229 - static inline void refresh_TR(void) 226 + /* 227 + * Call this if you need the TSS limit to be correct, which should be the case 228 + * if and only if you have TIF_IO_BITMAP set or you're switching to a task 229 + * with TIF_IO_BITMAP set. 230 + */ 231 + static inline void refresh_tss_limit(void) 230 232 { 231 233 DEBUG_LOCKS_WARN_ON(preemptible()); 232 234 233 - if (unlikely(this_cpu_read(need_tr_refresh))) { 235 + if (unlikely(this_cpu_read(__tss_limit_invalid))) 234 236 force_reload_TR(); 235 - this_cpu_write(need_tr_refresh, false); 236 - } 237 237 } 238 238 239 239 /* ··· 254 250 if (unlikely(test_thread_flag(TIF_IO_BITMAP))) 255 251 force_reload_TR(); 256 252 else 257 - this_cpu_write(need_tr_refresh, true); 253 + this_cpu_write(__tss_limit_invalid, true); 258 254 } 259 255 260 256 static inline void native_load_gdt(const struct desc_ptr *dtr)
+7 -1
arch/x86/kernel/ioport.c
··· 48 48 t->io_bitmap_ptr = bitmap; 49 49 set_thread_flag(TIF_IO_BITMAP); 50 50 51 + /* 52 + * Now that we have an IO bitmap, we need our TSS limit to be 53 + * correct. It's fine if we are preempted after doing this: 54 + * with TIF_IO_BITMAP set, context switches will keep our TSS 55 + * limit correct. 56 + */ 51 57 preempt_disable(); 52 - refresh_TR(); 58 + refresh_tss_limit(); 53 59 preempt_enable(); 54 60 } 55 61
+3 -3
arch/x86/kernel/process.c
··· 69 69 }; 70 70 EXPORT_PER_CPU_SYMBOL(cpu_tss); 71 71 72 - DEFINE_PER_CPU(bool, need_tr_refresh); 73 - EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh); 72 + DEFINE_PER_CPU(bool, __tss_limit_invalid); 73 + EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); 74 74 75 75 /* 76 76 * this gets called so that we can store lazy state into memory and copy the ··· 222 222 * Make sure that the TSS limit is correct for the CPU 223 223 * to notice the IO bitmap. 224 224 */ 225 - refresh_TR(); 225 + refresh_tss_limit(); 226 226 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 227 227 /* 228 228 * Clear any possible leftover bits:
+10 -3
arch/x86/kvm/pmu.c
··· 113 113 .config = config, 114 114 }; 115 115 116 + attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); 117 + 116 118 if (in_tx) 117 119 attr.config |= HSW_IN_TX; 118 - if (in_tx_cp) 120 + if (in_tx_cp) { 121 + /* 122 + * HSW_IN_TX_CHECKPOINTED is not supported with nonzero 123 + * period. Just clear the sample period so at least 124 + * allocating the counter doesn't fail. 125 + */ 126 + attr.sample_period = 0; 119 127 attr.config |= HSW_IN_TX_CHECKPOINTED; 120 - 121 - attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); 128 + } 122 129 123 130 event = perf_event_create_kernel_counter(&attr, -1, current, 124 131 intr ? kvm_perf_overflow_intr :
+6 -3
arch/x86/kvm/vmx.c
··· 2053 2053 static unsigned long segment_base(u16 selector) 2054 2054 { 2055 2055 struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); 2056 - struct desc_struct *d; 2057 2056 struct desc_struct *table; 2058 2057 unsigned long v; 2059 2058 ··· 10641 10642 { 10642 10643 struct vcpu_vmx *vmx = to_vmx(vcpu); 10643 10644 10645 + if (vcpu->arch.exception.pending || 10646 + vcpu->arch.nmi_injected || 10647 + vcpu->arch.interrupt.pending) 10648 + return -EBUSY; 10649 + 10644 10650 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 10645 10651 vmx->nested.preemption_timer_expired) { 10646 10652 if (vmx->nested.nested_run_pending) ··· 10655 10651 } 10656 10652 10657 10653 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { 10658 - if (vmx->nested.nested_run_pending || 10659 - vcpu->arch.interrupt.pending) 10654 + if (vmx->nested.nested_run_pending) 10660 10655 return -EBUSY; 10661 10656 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 10662 10657 NMI_VECTOR | INTR_TYPE_NMI_INTR |
+2 -1
include/linux/kvm_host.h
··· 26 26 #include <linux/context_tracking.h> 27 27 #include <linux/irqbypass.h> 28 28 #include <linux/swait.h> 29 + #include <linux/refcount.h> 29 30 #include <asm/signal.h> 30 31 31 32 #include <linux/kvm.h> ··· 402 401 #endif 403 402 struct kvm_vm_stat stat; 404 403 struct kvm_arch arch; 405 - atomic_t users_count; 404 + refcount_t users_count; 406 405 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 407 406 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; 408 407 spinlock_t ring_lock;
+1 -1
tools/testing/selftests/x86/Makefile
··· 5 5 .PHONY: all all_32 all_64 warn_32bit_failure clean 6 6 7 7 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ 8 - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ 8 + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ 9 9 protection_keys test_vdso 10 10 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 11 11 test_FCMOV test_FCOMI test_FISTTP \
+170
tools/testing/selftests/x86/ioperm.c
··· 1 + /* 2 + * ioperm.c - Test case for ioperm(2) 3 + * Copyright (c) 2015 Andrew Lutomirski 4 + */ 5 + 6 + #define _GNU_SOURCE 7 + #include <err.h> 8 + #include <stdio.h> 9 + #include <stdint.h> 10 + #include <signal.h> 11 + #include <setjmp.h> 12 + #include <stdlib.h> 13 + #include <string.h> 14 + #include <errno.h> 15 + #include <unistd.h> 16 + #include <sys/types.h> 17 + #include <sys/wait.h> 18 + #include <stdbool.h> 19 + #include <sched.h> 20 + #include <sys/io.h> 21 + 22 + static int nerrs = 0; 23 + 24 + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), 25 + int flags) 26 + { 27 + struct sigaction sa; 28 + memset(&sa, 0, sizeof(sa)); 29 + sa.sa_sigaction = handler; 30 + sa.sa_flags = SA_SIGINFO | flags; 31 + sigemptyset(&sa.sa_mask); 32 + if (sigaction(sig, &sa, 0)) 33 + err(1, "sigaction"); 34 + 35 + } 36 + 37 + static void clearhandler(int sig) 38 + { 39 + struct sigaction sa; 40 + memset(&sa, 0, sizeof(sa)); 41 + sa.sa_handler = SIG_DFL; 42 + sigemptyset(&sa.sa_mask); 43 + if (sigaction(sig, &sa, 0)) 44 + err(1, "sigaction"); 45 + } 46 + 47 + static jmp_buf jmpbuf; 48 + 49 + static void sigsegv(int sig, siginfo_t *si, void *ctx_void) 50 + { 51 + siglongjmp(jmpbuf, 1); 52 + } 53 + 54 + static bool try_outb(unsigned short port) 55 + { 56 + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); 57 + if (sigsetjmp(jmpbuf, 1) != 0) { 58 + return false; 59 + } else { 60 + asm volatile ("outb %%al, %w[port]" 61 + : : [port] "Nd" (port), "a" (0)); 62 + return true; 63 + } 64 + clearhandler(SIGSEGV); 65 + } 66 + 67 + static void expect_ok(unsigned short port) 68 + { 69 + if (!try_outb(port)) { 70 + printf("[FAIL]\toutb to 0x%02hx failed\n", port); 71 + exit(1); 72 + } 73 + 74 + printf("[OK]\toutb to 0x%02hx worked\n", port); 75 + } 76 + 77 + static void expect_gp(unsigned short port) 78 + { 79 + if (try_outb(port)) { 80 + printf("[FAIL]\toutb to 0x%02hx worked\n", port); 81 + exit(1); 82 + } 83 + 84 + printf("[OK]\toutb to 0x%02hx failed\n", port); 85 + } 86 + 87 + int main(void) 88 + { 89 + cpu_set_t cpuset; 90 + CPU_ZERO(&cpuset); 91 + CPU_SET(0, &cpuset); 92 + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) 93 + err(1, "sched_setaffinity to CPU 0"); 94 + 95 + expect_gp(0x80); 96 + expect_gp(0xed); 97 + 98 + /* 99 + * Probe for ioperm support. Note that clearing ioperm bits 100 + * works even as nonroot. 101 + */ 102 + printf("[RUN]\tenable 0x80\n"); 103 + if (ioperm(0x80, 1, 1) != 0) { 104 + printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n", 105 + errno); 106 + return 0; 107 + } 108 + expect_ok(0x80); 109 + expect_gp(0xed); 110 + 111 + printf("[RUN]\tdisable 0x80\n"); 112 + if (ioperm(0x80, 1, 0) != 0) { 113 + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); 114 + return 1; 115 + } 116 + expect_gp(0x80); 117 + expect_gp(0xed); 118 + 119 + /* Make sure that fork() preserves ioperm. */ 120 + if (ioperm(0x80, 1, 1) != 0) { 121 + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); 122 + return 1; 123 + } 124 + 125 + pid_t child = fork(); 126 + if (child == -1) 127 + err(1, "fork"); 128 + 129 + if (child == 0) { 130 + printf("[RUN]\tchild: check that we inherited permissions\n"); 131 + expect_ok(0x80); 132 + expect_gp(0xed); 133 + return 0; 134 + } else { 135 + int status; 136 + if (waitpid(child, &status, 0) != child || 137 + !WIFEXITED(status)) { 138 + printf("[FAIL]\tChild died\n"); 139 + nerrs++; 140 + } else if (WEXITSTATUS(status) != 0) { 141 + printf("[FAIL]\tChild failed\n"); 142 + nerrs++; 143 + } else { 144 + printf("[OK]\tChild succeeded\n"); 145 + } 146 + } 147 + 148 + /* Test the capability checks. */ 149 + 150 + printf("\tDrop privileges\n"); 151 + if (setresuid(1, 1, 1) != 0) { 152 + printf("[WARN]\tDropping privileges failed\n"); 153 + return 0; 154 + } 155 + 156 + printf("[RUN]\tdisable 0x80\n"); 157 + if (ioperm(0x80, 1, 0) != 0) { 158 + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); 159 + return 1; 160 + } 161 + printf("[OK]\tit worked\n"); 162 + 163 + printf("[RUN]\tenable 0x80 again\n"); 164 + if (ioperm(0x80, 1, 1) == 0) { 165 + printf("[FAIL]\tit succeeded but should have failed.\n"); 166 + return 1; 167 + } 168 + printf("[OK]\tit failed\n"); 169 + return 0; 170 + }
+4 -4
virt/kvm/kvm_main.c
··· 619 619 mutex_init(&kvm->lock); 620 620 mutex_init(&kvm->irq_lock); 621 621 mutex_init(&kvm->slots_lock); 622 - atomic_set(&kvm->users_count, 1); 622 + refcount_set(&kvm->users_count, 1); 623 623 INIT_LIST_HEAD(&kvm->devices); 624 624 625 625 r = kvm_arch_init_vm(kvm, type); ··· 749 749 750 750 void kvm_get_kvm(struct kvm *kvm) 751 751 { 752 - atomic_inc(&kvm->users_count); 752 + refcount_inc(&kvm->users_count); 753 753 } 754 754 EXPORT_SYMBOL_GPL(kvm_get_kvm); 755 755 756 756 void kvm_put_kvm(struct kvm *kvm) 757 757 { 758 - if (atomic_dec_and_test(&kvm->users_count)) 758 + if (refcount_dec_and_test(&kvm->users_count)) 759 759 kvm_destroy_vm(kvm); 760 760 } 761 761 EXPORT_SYMBOL_GPL(kvm_put_kvm); ··· 3641 3641 * To avoid the race between open and the removal of the debugfs 3642 3642 * directory we test against the users count. 3643 3643 */ 3644 - if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) 3644 + if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) 3645 3645 return -ENOENT; 3646 3646 3647 3647 if (simple_attr_open(inode, file, get, set, fmt)) {