Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"s390:
- SRCU fix

PPC:
- host crash fixes

x86:
- bugfixes, including making nested posted interrupts really work

Generic:
- tweaks to kvm_stat and to uevents"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: LAPIC: Fix reentrancy issues with preempt notifiers
tools/kvm_stat: add '-f help' to get the available event list
tools/kvm_stat: use variables instead of hard paths in help output
KVM: nVMX: Fix loss of L2's NMI blocking state
KVM: nVMX: Fix posted intr delivery when vcpu is in guest mode
x86: irq: Define a global vector for nested posted interrupts
KVM: x86: do mask out upper bits of PAE CR3
KVM: make pid available for uevents without debugfs
KVM: s390: take srcu lock when getting/setting storage keys
KVM: VMX: remove unused field
KVM: PPC: Book3S HV: Fix host crash on changing HPT size
KVM: PPC: Book3S HV: Enable TM before accessing TM registers

+98 -48
+3 -1
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 164 164 goto out; 165 165 } 166 166 167 - if (kvm->arch.hpt.virt) 167 + if (kvm->arch.hpt.virt) { 168 168 kvmppc_free_hpt(&kvm->arch.hpt); 169 + kvmppc_rmap_reset(kvm); 170 + } 169 171 170 172 err = kvmppc_allocate_hpt(&info, order); 171 173 if (err < 0)
+2
arch/powerpc/kvm/book3s_hv.c
··· 3211 3211 run->fail_entry.hardware_entry_failure_reason = 0; 3212 3212 return -EINVAL; 3213 3213 } 3214 + /* Enable TM so we can read the TM SPRs */ 3215 + mtmsr(mfmsr() | MSR_TM); 3214 3216 current->thread.tm_tfhar = mfspr(SPRN_TFHAR); 3215 3217 current->thread.tm_tfiar = mfspr(SPRN_TFIAR); 3216 3218 current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+6 -2
arch/s390/kvm/kvm-s390.c
··· 1324 1324 { 1325 1325 uint8_t *keys; 1326 1326 uint64_t hva; 1327 - int i, r = 0; 1327 + int srcu_idx, i, r = 0; 1328 1328 1329 1329 if (args->flags != 0) 1330 1330 return -EINVAL; ··· 1342 1342 return -ENOMEM; 1343 1343 1344 1344 down_read(&current->mm->mmap_sem); 1345 + srcu_idx = srcu_read_lock(&kvm->srcu); 1345 1346 for (i = 0; i < args->count; i++) { 1346 1347 hva = gfn_to_hva(kvm, args->start_gfn + i); 1347 1348 if (kvm_is_error_hva(hva)) { ··· 1354 1353 if (r) 1355 1354 break; 1356 1355 } 1356 + srcu_read_unlock(&kvm->srcu, srcu_idx); 1357 1357 up_read(&current->mm->mmap_sem); 1358 1358 1359 1359 if (!r) { ··· 1372 1370 { 1373 1371 uint8_t *keys; 1374 1372 uint64_t hva; 1375 - int i, r = 0; 1373 + int srcu_idx, i, r = 0; 1376 1374 1377 1375 if (args->flags != 0) 1378 1376 return -EINVAL; ··· 1398 1396 goto out; 1399 1397 1400 1398 down_read(&current->mm->mmap_sem); 1399 + srcu_idx = srcu_read_lock(&kvm->srcu); 1401 1400 for (i = 0; i < args->count; i++) { 1402 1401 hva = gfn_to_hva(kvm, args->start_gfn + i); 1403 1402 if (kvm_is_error_hva(hva)) { ··· 1416 1413 if (r) 1417 1414 break; 1418 1415 } 1416 + srcu_read_unlock(&kvm->srcu, srcu_idx); 1419 1417 up_read(&current->mm->mmap_sem); 1420 1418 out: 1421 1419 kvfree(keys);
+1
arch/x86/entry/entry_64.S
··· 705 705 #ifdef CONFIG_HAVE_KVM 706 706 apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi 707 707 apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi 708 + apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi 708 709 #endif 709 710 710 711 #ifdef CONFIG_X86_MCE_THRESHOLD
+2
arch/x86/include/asm/entry_arch.h
··· 25 25 smp_kvm_posted_intr_ipi) 26 26 BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, 27 27 smp_kvm_posted_intr_wakeup_ipi) 28 + BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR, 29 + smp_kvm_posted_intr_nested_ipi) 28 30 #endif 29 31 30 32 /*
+1
arch/x86/include/asm/hardirq.h
··· 15 15 #ifdef CONFIG_HAVE_KVM 16 16 unsigned int kvm_posted_intr_ipis; 17 17 unsigned int kvm_posted_intr_wakeup_ipis; 18 + unsigned int kvm_posted_intr_nested_ipis; 18 19 #endif 19 20 unsigned int x86_platform_ipis; /* arch dependent */ 20 21 unsigned int apic_perf_irqs;
+2
arch/x86/include/asm/hw_irq.h
··· 30 30 extern asmlinkage void x86_platform_ipi(void); 31 31 extern asmlinkage void kvm_posted_intr_ipi(void); 32 32 extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); 33 + extern asmlinkage void kvm_posted_intr_nested_ipi(void); 33 34 extern asmlinkage void error_interrupt(void); 34 35 extern asmlinkage void irq_work_interrupt(void); 35 36 ··· 63 62 #define trace_reboot_interrupt reboot_interrupt 64 63 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi 65 64 #define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi 65 + #define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi 66 66 #endif /* CONFIG_TRACING */ 67 67 68 68 #ifdef CONFIG_X86_LOCAL_APIC
+2 -1
arch/x86/include/asm/irq_vectors.h
··· 83 83 */ 84 84 #define X86_PLATFORM_IPI_VECTOR 0xf7 85 85 86 - #define POSTED_INTR_WAKEUP_VECTOR 0xf1 87 86 /* 88 87 * IRQ work vector: 89 88 */ ··· 97 98 /* Vector for KVM to deliver posted interrupt IPI */ 98 99 #ifdef CONFIG_HAVE_KVM 99 100 #define POSTED_INTR_VECTOR 0xf2 101 + #define POSTED_INTR_WAKEUP_VECTOR 0xf1 102 + #define POSTED_INTR_NESTED_VECTOR 0xf0 100 103 #endif 101 104 102 105 /*
+19
arch/x86/kernel/irq.c
··· 155 155 seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); 156 156 seq_puts(p, " Posted-interrupt notification event\n"); 157 157 158 + seq_printf(p, "%*s: ", prec, "NPI"); 159 + for_each_online_cpu(j) 160 + seq_printf(p, "%10u ", 161 + irq_stats(j)->kvm_posted_intr_nested_ipis); 162 + seq_puts(p, " Nested posted-interrupt event\n"); 163 + 158 164 seq_printf(p, "%*s: ", prec, "PIW"); 159 165 for_each_online_cpu(j) 160 166 seq_printf(p, "%10u ", ··· 316 310 entering_ack_irq(); 317 311 inc_irq_stat(kvm_posted_intr_wakeup_ipis); 318 312 kvm_posted_intr_wakeup_handler(); 313 + exiting_irq(); 314 + set_irq_regs(old_regs); 315 + } 316 + 317 + /* 318 + * Handler for POSTED_INTERRUPT_NESTED_VECTOR. 319 + */ 320 + __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) 321 + { 322 + struct pt_regs *old_regs = set_irq_regs(regs); 323 + 324 + entering_ack_irq(); 325 + inc_irq_stat(kvm_posted_intr_nested_ipis); 319 326 exiting_irq(); 320 327 set_irq_regs(old_regs); 321 328 }
+2
arch/x86/kernel/irqinit.c
··· 150 150 alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); 151 151 /* IPI for KVM to deliver interrupt to wake up tasks */ 152 152 alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); 153 + /* IPI for KVM to deliver nested posted interrupt */ 154 + alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); 153 155 #endif 154 156 155 157 /* IPI vectors for APIC spurious and error interrupts */
+14 -3
arch/x86/kvm/lapic.c
··· 1495 1495 1496 1496 static void cancel_hv_timer(struct kvm_lapic *apic) 1497 1497 { 1498 + WARN_ON(preemptible()); 1498 1499 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1499 - preempt_disable(); 1500 1500 kvm_x86_ops->cancel_hv_timer(apic->vcpu); 1501 1501 apic->lapic_timer.hv_timer_in_use = false; 1502 - preempt_enable(); 1503 1502 } 1504 1503 1505 1504 static bool start_hv_timer(struct kvm_lapic *apic) ··· 1506 1507 struct kvm_timer *ktimer = &apic->lapic_timer; 1507 1508 int r; 1508 1509 1510 + WARN_ON(preemptible()); 1509 1511 if (!kvm_x86_ops->set_hv_timer) 1510 1512 return false; 1511 1513 ··· 1538 1538 static void start_sw_timer(struct kvm_lapic *apic) 1539 1539 { 1540 1540 struct kvm_timer *ktimer = &apic->lapic_timer; 1541 + 1542 + WARN_ON(preemptible()); 1541 1543 if (apic->lapic_timer.hv_timer_in_use) 1542 1544 cancel_hv_timer(apic); 1543 1545 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) ··· 1554 1552 1555 1553 static void restart_apic_timer(struct kvm_lapic *apic) 1556 1554 { 1555 + preempt_disable(); 1557 1556 if (!start_hv_timer(apic)) 1558 1557 start_sw_timer(apic); 1558 + preempt_enable(); 1559 1559 } 1560 1560 1561 1561 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1562 1562 { 1563 1563 struct kvm_lapic *apic = vcpu->arch.apic; 1564 1564 1565 - WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1565 + preempt_disable(); 1566 + /* If the preempt notifier has already run, it also called apic_timer_expired */ 1567 + if (!apic->lapic_timer.hv_timer_in_use) 1568 + goto out; 1566 1569 WARN_ON(swait_active(&vcpu->wq)); 1567 1570 cancel_hv_timer(apic); 1568 1571 apic_timer_expired(apic); ··· 1576 1569 advance_periodic_target_expiration(apic); 1577 1570 restart_apic_timer(apic); 1578 1571 } 1572 + out: 1573 + preempt_enable(); 1579 1574 } 1580 1575 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1581 1576 ··· 1591 1582 { 1592 1583 struct kvm_lapic *apic = vcpu->arch.apic; 1593 1584 1585 + preempt_disable(); 1594 1586 /* Possibly the TSC deadline timer is not enabled yet */ 1595 1587 if (apic->lapic_timer.hv_timer_in_use) 1596 1588 start_sw_timer(apic); 1589 + preempt_enable(); 1597 1590 } 1598 1591 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); 1599 1592
+13 -12
arch/x86/kvm/vmx.c
··· 563 563 struct kvm_vcpu vcpu; 564 564 unsigned long host_rsp; 565 565 u8 fail; 566 - bool nmi_known_unmasked; 567 566 u32 exit_intr_info; 568 567 u32 idt_vectoring_info; 569 568 ulong rflags; ··· 4987 4988 } 4988 4989 } 4989 4990 4990 - static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) 4991 + static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, 4992 + bool nested) 4991 4993 { 4992 4994 #ifdef CONFIG_SMP 4995 + int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; 4996 + 4993 4997 if (vcpu->mode == IN_GUEST_MODE) { 4994 4998 struct vcpu_vmx *vmx = to_vmx(vcpu); 4995 4999 ··· 5010 5008 */ 5011 5009 WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); 5012 5010 5013 - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), 5014 - POSTED_INTR_VECTOR); 5011 + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); 5015 5012 return true; 5016 5013 } 5017 5014 #endif ··· 5025 5024 if (is_guest_mode(vcpu) && 5026 5025 vector == vmx->nested.posted_intr_nv) { 5027 5026 /* the PIR and ON have been set by L1. */ 5028 - kvm_vcpu_trigger_posted_interrupt(vcpu); 5027 + kvm_vcpu_trigger_posted_interrupt(vcpu, true); 5029 5028 /* 5030 5029 * If a posted intr is not recognized by hardware, 5031 5030 * we will accomplish it in the next vmentry. ··· 5059 5058 if (pi_test_and_set_on(&vmx->pi_desc)) 5060 5059 return; 5061 5060 5062 - if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) 5061 + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) 5063 5062 kvm_vcpu_kick(vcpu); 5064 5063 } 5065 5064 ··· 10042 10041 vmcs12->vm_entry_instruction_len); 10043 10042 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 10044 10043 vmcs12->guest_interruptibility_info); 10044 + vmx->loaded_vmcs->nmi_known_unmasked = 10045 + !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); 10045 10046 } else { 10046 10047 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); 10047 10048 } ··· 10068 10065 10069 10066 /* Posted interrupts setting is only taken from vmcs12. */ 10070 10067 if (nested_cpu_has_posted_intr(vmcs12)) { 10071 - /* 10072 - * Note that we use L0's vector here and in 10073 - * vmx_deliver_nested_posted_interrupt. 10074 - */ 10075 10068 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; 10076 10069 vmx->nested.pi_pending = false; 10077 - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 10070 + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); 10078 10071 } else { 10079 10072 exec_control &= ~PIN_BASED_POSTED_INTR; 10080 10073 } ··· 10941 10942 */ 10942 10943 vmx_flush_tlb(vcpu); 10943 10944 } 10944 - 10945 + /* Restore posted intr vector. */ 10946 + if (nested_cpu_has_posted_intr(vmcs12)) 10947 + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 10945 10948 10946 10949 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); 10947 10950 vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
+2 -2
arch/x86/kvm/x86.c
··· 597 597 (unsigned long *)&vcpu->arch.regs_avail)) 598 598 return true; 599 599 600 - gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT; 601 - offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1); 600 + gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT; 601 + offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1); 602 602 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), 603 603 PFERR_USER_MASK | PFERR_WRITE_MASK); 604 604 if (r < 0)
+1
include/linux/kvm_host.h
··· 445 445 struct kvm_stat_data **debugfs_stat_data; 446 446 struct srcu_struct srcu; 447 447 struct srcu_struct irq_srcu; 448 + pid_t userspace_pid; 448 449 }; 449 450 450 451 #define kvm_err(fmt, ...) \
+17 -5
tools/kvm/kvm_stat/kvm_stat
··· 474 474 @staticmethod 475 475 def is_field_wanted(fields_filter, field): 476 476 """Indicate whether field is valid according to fields_filter.""" 477 - if not fields_filter: 477 + if not fields_filter or fields_filter == "help": 478 478 return True 479 479 return re.match(fields_filter, field) is not None 480 480 ··· 1413 1413 1414 1414 Requirements: 1415 1415 - Access to: 1416 - /sys/kernel/debug/kvm 1417 - /sys/kernel/debug/trace/events/* 1416 + %s 1417 + %s/events/* 1418 1418 /proc/pid/task 1419 1419 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no 1420 1420 CAP_SYS_ADMIN and perf events are used. ··· 1434 1434 s set update interval 1435 1435 x toggle reporting of stats for individual child trace events 1436 1436 Press any other key to refresh statistics immediately. 1437 - """ 1437 + """ % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING) 1438 1438 1439 1439 class PlainHelpFormatter(optparse.IndentedHelpFormatter): 1440 1440 def format_description(self, description): ··· 1496 1496 action='store', 1497 1497 default=DEFAULT_REGEX, 1498 1498 dest='fields', 1499 - help='fields to display (regex)', 1499 + help='''fields to display (regex) 1500 + "-f help" for a list of available events''', 1500 1501 ) 1501 1502 optparser.add_option('-p', '--pid', 1502 1503 action='store', ··· 1559 1558 sys.exit('Specified pid does not exist.') 1560 1559 1561 1560 stats = Stats(options) 1561 + 1562 + if options.fields == "help": 1563 + event_list = "\n" 1564 + s = stats.get() 1565 + for key in s.keys(): 1566 + if key.find('(') != -1: 1567 + key = key[0:key.find('(')] 1568 + if event_list.find('\n' + key + '\n') == -1: 1569 + event_list += key + '\n' 1570 + sys.stdout.write(event_list) 1571 + return "" 1562 1572 1563 1573 if options.log: 1564 1574 log(stats)
+11 -22
virt/kvm/kvm_main.c
··· 3883 3883 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) 3884 3884 { 3885 3885 struct kobj_uevent_env *env; 3886 - char *tmp, *pathbuf = NULL; 3887 3886 unsigned long long created, active; 3888 3887 3889 3888 if (!kvm_dev.this_device || !kvm) ··· 3906 3907 add_uevent_var(env, "CREATED=%llu", created); 3907 3908 add_uevent_var(env, "COUNT=%llu", active); 3908 3909 3909 - if (type == KVM_EVENT_CREATE_VM) 3910 + if (type == KVM_EVENT_CREATE_VM) { 3910 3911 add_uevent_var(env, "EVENT=create"); 3911 - else if (type == KVM_EVENT_DESTROY_VM) 3912 + kvm->userspace_pid = task_pid_nr(current); 3913 + } else if (type == KVM_EVENT_DESTROY_VM) { 3912 3914 add_uevent_var(env, "EVENT=destroy"); 3915 + } 3916 + add_uevent_var(env, "PID=%d", kvm->userspace_pid); 3913 3917 3914 3918 if (kvm->debugfs_dentry) { 3915 - char p[ITOA_MAX_LEN]; 3919 + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); 3916 3920 3917 - snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name); 3918 - tmp = strchrnul(p + 1, '-'); 3919 - *tmp = '\0'; 3920 - add_uevent_var(env, "PID=%s", p); 3921 - pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 3922 - if (pathbuf) { 3923 - /* sizeof counts the final '\0' */ 3924 - int len = sizeof("STATS_PATH=") - 1; 3925 - const char *pvar = "STATS_PATH="; 3926 - 3927 - tmp = dentry_path_raw(kvm->debugfs_dentry, 3928 - pathbuf + len, 3929 - PATH_MAX - len); 3930 - if (!IS_ERR(tmp)) { 3931 - memcpy(tmp - len, pvar, len); 3932 - env->envp[env->envp_idx++] = tmp - len; 3933 - } 3921 + if (p) { 3922 + tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); 3923 + if (!IS_ERR(tmp)) 3924 + add_uevent_var(env, "STATS_PATH=%s", tmp); 3925 + kfree(p); 3934 3926 } 3935 3927 } 3936 3928 /* no need for checks, since we are adding at most only 5 keys */ 3937 3929 env->envp[env->envp_idx++] = NULL; 3938 3930 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); 3939 3931 kfree(env); 3940 - kfree(pathbuf); 3941 3932 } 3942 3933 3943 3934 static int kvm_init_debug(void)