Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
"ARM:
- Fix handling of the 32bit cycle counter
- Fix cycle counter filtering

x86:
- Fix a race leading to double unregistering of user notifiers
- Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead
- Use SRCU around kvm_lapic_set_vapic_addr
- Avoid recursive flushing of asynchronous page faults
- Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP
- Let userspace know that KVM_GET_CLOCK is useful with master clock;
4.9 changed the return value to better match the guest clock, but
didn't provide means to let guests take advantage of it"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic
KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr
KVM: async_pf: avoid recursive flushing of work items
kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use
KVM: Disable irq while unregistering user notifier
KVM: x86: do not go through vcpu in __get_kvmclock_ns
KVM: arm64: Fix the issues when guest PMCCFILTR is configured
arm64: KVM: pmu: Fix AArch32 cycle counter access

+114 -60
+11
Documentation/virtual/kvm/api.txt
··· 777 777 conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios 778 778 such as migration. 779 779 780 + When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the 781 + set of bits that KVM can return in struct kvm_clock_data's flag member. 782 + 783 + The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned 784 + value is the exact kvmclock value seen by all VCPUs at the instant 785 + when KVM_GET_CLOCK was called. If clear, the returned value is simply 786 + CLOCK_MONOTONIC plus a constant offset; the offset can be modified 787 + with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock, 788 + but the exact value read by each VCPU could differ, because the host 789 + TSC is not stable. 790 + 780 791 struct kvm_clock_data { 781 792 __u64 clock; /* kvmclock current value */ 782 793 __u32 flags;
+9 -1
arch/arm64/include/asm/perf_event.h
··· 46 46 #define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ 47 47 #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ 48 48 49 - #define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ 49 + /* 50 + * PMUv3 event types: required events 51 + */ 52 + #define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 53 + #define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 54 + #define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 55 + #define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 56 + #define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 57 + #define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 50 58 51 59 /* 52 60 * Event filters for PMUv3
+1 -9
arch/arm64/kernel/perf_event.c
··· 31 31 32 32 /* 33 33 * ARMv8 PMUv3 Performance Events handling code. 34 - * Common event types. 34 + * Common event types (some are defined in asm/perf_event.h). 35 35 */ 36 - 37 - /* Required events. */ 38 - #define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 39 - #define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 40 - #define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 41 - #define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 42 - #define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 43 - #define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 44 36 45 37 /* At least one of the following is required. */ 46 38 #define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
+8 -2
arch/arm64/kvm/sys_regs.c
··· 597 597 598 598 idx = ARMV8_PMU_CYCLE_IDX; 599 599 } else { 600 - BUG(); 600 + return false; 601 601 } 602 + } else if (r->CRn == 0 && r->CRm == 9) { 603 + /* PMCCNTR */ 604 + if (pmu_access_event_counter_el0_disabled(vcpu)) 605 + return false; 606 + 607 + idx = ARMV8_PMU_CYCLE_IDX; 602 608 } else if (r->CRn == 14 && (r->CRm & 12) == 8) { 603 609 /* PMEVCNTRn_EL0 */ 604 610 if (pmu_access_event_counter_el0_disabled(vcpu)) ··· 612 606 613 607 idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); 614 608 } else { 615 - BUG(); 609 + return false; 616 610 } 617 611 618 612 if (!pmu_counter_idx_valid(vcpu, idx))
+27 -31
arch/x86/kvm/irq_comm.c
··· 156 156 } 157 157 158 158 159 + static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, 160 + struct kvm *kvm, int irq_source_id, int level, 161 + bool line_status) 162 + { 163 + if (!level) 164 + return -1; 165 + 166 + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); 167 + } 168 + 159 169 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, 160 170 struct kvm *kvm, int irq_source_id, int level, 161 171 bool line_status) ··· 173 163 struct kvm_lapic_irq irq; 174 164 int r; 175 165 176 - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) 177 - return -EWOULDBLOCK; 166 + switch (e->type) { 167 + case KVM_IRQ_ROUTING_HV_SINT: 168 + return kvm_hv_set_sint(e, kvm, irq_source_id, level, 169 + line_status); 178 170 179 - if (kvm_msi_route_invalid(kvm, e)) 180 - return -EINVAL; 171 + case KVM_IRQ_ROUTING_MSI: 172 + if (kvm_msi_route_invalid(kvm, e)) 173 + return -EINVAL; 181 174 182 - kvm_set_msi_irq(kvm, e, &irq); 175 + kvm_set_msi_irq(kvm, e, &irq); 183 176 184 - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) 185 - return r; 186 - else 187 - return -EWOULDBLOCK; 177 + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) 178 + return r; 179 + break; 180 + 181 + default: 182 + break; 183 + } 184 + 185 + return -EWOULDBLOCK; 188 186 } 189 187 190 188 int kvm_request_irq_source_id(struct kvm *kvm) ··· 270 252 if (kimn->irq == gsi) 271 253 kimn->func(kimn, mask); 272 254 srcu_read_unlock(&kvm->irq_srcu, idx); 273 - } 274 - 275 - static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, 276 - struct kvm *kvm, int irq_source_id, int level, 277 - bool line_status) 278 - { 279 - if (!level) 280 - return -1; 281 - 282 - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); 283 255 } 284 256 285 257 int kvm_set_routing_entry(struct kvm *kvm, ··· 429 421 } 430 422 } 431 423 srcu_read_unlock(&kvm->irq_srcu, idx); 432 - } 433 - 434 - int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, 435 - int irq_source_id, int level, bool line_status) 436 - { 437 - switch (irq->type) { 438 - case KVM_IRQ_ROUTING_HV_SINT: 439 - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, 440 - line_status); 441 - default: 442 - return -EWOULDBLOCK; 443 - } 444 424 } 445 425 446 426 void kvm_arch_irq_routing_update(struct kvm *kvm)
+34 -13
arch/x86/kvm/x86.c
··· 210 210 struct kvm_shared_msrs *locals 211 211 = container_of(urn, struct kvm_shared_msrs, urn); 212 212 struct kvm_shared_msr_values *values; 213 + unsigned long flags; 213 214 215 + /* 216 + * Disabling irqs at this point since the following code could be 217 + * interrupted and executed through kvm_arch_hardware_disable() 218 + */ 219 + local_irq_save(flags); 220 + if (locals->registered) { 221 + locals->registered = false; 222 + user_return_notifier_unregister(urn); 223 + } 224 + local_irq_restore(flags); 214 225 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 215 226 values = &locals->values[slot]; 216 227 if (values->host != values->curr) { ··· 229 218 values->curr = values->host; 230 219 } 231 220 } 232 - locals->registered = false; 233 - user_return_notifier_unregister(urn); 234 221 } 235 222 236 223 static void shared_msr_update(unsigned slot, u32 msr) ··· 1733 1724 1734 1725 static u64 __get_kvmclock_ns(struct kvm *kvm) 1735 1726 { 1736 - struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); 1737 1727 struct kvm_arch *ka = &kvm->arch; 1738 - s64 ns; 1728 + struct pvclock_vcpu_time_info hv_clock; 1739 1729 1740 - if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { 1741 - u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1742 - ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); 1743 - } else { 1744 - ns = ktime_get_boot_ns() + ka->kvmclock_offset; 1730 + spin_lock(&ka->pvclock_gtod_sync_lock); 1731 + if (!ka->use_master_clock) { 1732 + spin_unlock(&ka->pvclock_gtod_sync_lock); 1733 + return ktime_get_boot_ns() + ka->kvmclock_offset; 1745 1734 } 1746 1735 1747 - return ns; 1736 + hv_clock.tsc_timestamp = ka->master_cycle_now; 1737 + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; 1738 + spin_unlock(&ka->pvclock_gtod_sync_lock); 1739 + 1740 + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, 1741 + &hv_clock.tsc_shift, 1742 + &hv_clock.tsc_to_system_mul); 1743 + return __pvclock_read_cycles(&hv_clock, rdtsc()); 1748 1744 } 1749 1745 1750 1746 u64 get_kvmclock_ns(struct kvm *kvm) ··· 2610 2596 case KVM_CAP_PIT_STATE2: 2611 2597 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 2612 2598 case KVM_CAP_XEN_HVM: 2613 - case KVM_CAP_ADJUST_CLOCK: 2614 2599 case KVM_CAP_VCPU_EVENTS: 2615 2600 case KVM_CAP_HYPERV: 2616 2601 case KVM_CAP_HYPERV_VAPIC: ··· 2635 2622 case KVM_CAP_PCI_2_3: 2636 2623 #endif 2637 2624 r = 1; 2625 + break; 2626 + case KVM_CAP_ADJUST_CLOCK: 2627 + r = KVM_CLOCK_TSC_STABLE; 2638 2628 break; 2639 2629 case KVM_CAP_X86_SMM: 2640 2630 /* SMBASE is usually relocated above 1M on modern chipsets, ··· 3431 3415 }; 3432 3416 case KVM_SET_VAPIC_ADDR: { 3433 3417 struct kvm_vapic_addr va; 3418 + int idx; 3434 3419 3435 3420 r = -EINVAL; 3436 3421 if (!lapic_in_kernel(vcpu)) ··· 3439 3422 r = -EFAULT; 3440 3423 if (copy_from_user(&va, argp, sizeof va)) 3441 3424 goto out; 3425 + idx = srcu_read_lock(&vcpu->kvm->srcu); 3442 3426 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); 3427 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 3443 3428 break; 3444 3429 } 3445 3430 case KVM_X86_SETUP_MCE: { ··· 4122 4103 struct kvm_clock_data user_ns; 4123 4104 u64 now_ns; 4124 4105 4125 - now_ns = get_kvmclock_ns(kvm); 4106 + local_irq_disable(); 4107 + now_ns = __get_kvmclock_ns(kvm); 4126 4108 user_ns.clock = now_ns; 4127 - user_ns.flags = 0; 4109 + user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; 4110 + local_irq_enable(); 4128 4111 memset(&user_ns.pad, 0, sizeof(user_ns.pad)); 4129 4112 4130 4113 r = -EFAULT;
+7
include/uapi/linux/kvm.h
··· 972 972 __u8 pad[16]; 973 973 }; 974 974 975 + /* For KVM_CAP_ADJUST_CLOCK */ 976 + 977 + /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ 978 + #define KVM_CLOCK_TSC_STABLE 2 979 + 975 980 struct kvm_clock_data { 976 981 __u64 clock; 977 982 __u32 flags; 978 983 __u32 pad[9]; 979 984 }; 985 + 986 + /* For KVM_CAP_SW_TLB */ 980 987 981 988 #define KVM_MMU_FSL_BOOKE_NOHV 0 982 989 #define KVM_MMU_FSL_BOOKE_HV 1
+5 -3
virt/kvm/arm/pmu.c
··· 305 305 continue; 306 306 type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) 307 307 & ARMV8_PMU_EVTYPE_EVENT; 308 - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) 308 + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) 309 309 && (enable & BIT(i))) { 310 310 reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 311 311 reg = lower_32_bits(reg); ··· 379 379 eventsel = data & ARMV8_PMU_EVTYPE_EVENT; 380 380 381 381 /* Software increment event does't need to be backed by a perf event */ 382 - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) 382 + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && 383 + select_idx != ARMV8_PMU_CYCLE_IDX) 383 384 return; 384 385 385 386 memset(&attr, 0, sizeof(struct perf_event_attr)); ··· 392 391 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 393 392 attr.exclude_hv = 1; /* Don't count EL2 events */ 394 393 attr.exclude_host = 1; /* Don't count host events */ 395 - attr.config = eventsel; 394 + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? 395 + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; 396 396 397 397 counter = kvm_pmu_get_counter_value(vcpu, select_idx); 398 398 /* The initial sample period (overflow count) of an event. */
+12 -1
virt/kvm/async_pf.c
··· 91 91 92 92 spin_lock(&vcpu->async_pf.lock); 93 93 list_add_tail(&apf->link, &vcpu->async_pf.done); 94 + apf->vcpu = NULL; 94 95 spin_unlock(&vcpu->async_pf.lock); 95 96 96 97 /* ··· 114 113 115 114 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) 116 115 { 116 + spin_lock(&vcpu->async_pf.lock); 117 + 117 118 /* cancel outstanding work queue item */ 118 119 while (!list_empty(&vcpu->async_pf.queue)) { 119 120 struct kvm_async_pf *work = ··· 123 120 typeof(*work), queue); 124 121 list_del(&work->queue); 125 122 123 + /* 124 + * We know it's present in vcpu->async_pf.done, do 125 + * nothing here. 126 + */ 127 + if (!work->vcpu) 128 + continue; 129 + 130 + spin_unlock(&vcpu->async_pf.lock); 126 131 #ifdef CONFIG_KVM_ASYNC_PF_SYNC 127 132 flush_work(&work->work); 128 133 #else ··· 140 129 kmem_cache_free(async_pf_cache, work); 141 130 } 142 131 #endif 132 + spin_lock(&vcpu->async_pf.lock); 143 133 } 144 134 145 - spin_lock(&vcpu->async_pf.lock); 146 135 while (!list_empty(&vcpu->async_pf.done)) { 147 136 struct kvm_async_pf *work = 148 137 list_first_entry(&vcpu->async_pf.done,