Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

KVM: x86: KVM_CAP_SYNC_REGS

This commit implements an enhanced x86 version of S390
KVM_CAP_SYNC_REGS functionality. KVM_CAP_SYNC_REGS "allow[s]
userspace to access certain guest registers without having
to call SET/GET_*REGS”. This reduces ioctl overhead which
is particularly important when userspace is making synchronous
guest state modifications (e.g. when emulating and/or intercepting
instructions).

Originally implemented upstream for the S390, the x86 differences
follow:
- userspace can select the register sets to be synchronized with kvm_run
using bit-flags in the kvm_valid_registers and kvm_dirty_registers
fields.
- vcpu_events is available in addition to the regs and sregs register
sets.

Signed-off-by: Ken Hofsass <hofsass@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
[Removed wrapper around check for reserved kvm_valid_regs. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>

authored by

Ken Hofsass and committed by
Radim Krčmář
01643c51 7b7e3952

+145 -16
+40
Documentation/virtual/kvm/api.txt
··· 4110 4110 accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from 4111 4111 the guest. 4112 4112 4113 + 6.74 KVM_CAP_SYNC_REGS 4114 + Architectures: s390, x86 4115 + Target: s390: always enabled, x86: vcpu 4116 + Parameters: none 4117 + Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register 4118 + sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h). 4119 + 4120 + As described above in the kvm_sync_regs struct info in section 5 (kvm_run): 4121 + KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers 4122 + without having to call SET/GET_*REGS". This reduces overhead by eliminating 4123 + repeated ioctl calls for setting and/or getting register values. This is 4124 + particularly important when userspace is making synchronous guest state 4125 + modifications, e.g. when emulating and/or intercepting instructions in 4126 + userspace. 4127 + 4128 + For s390 specifics, please refer to the source code. 4129 + 4130 + For x86: 4131 + - the register sets to be copied out to kvm_run are selectable 4132 + by userspace (rather that all sets being copied out for every exit). 4133 + - vcpu_events are available in addition to regs and sregs. 4134 + 4135 + For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to 4136 + function as an input bit-array field set by userspace to indicate the 4137 + specific register sets to be copied out on the next exit. 4138 + 4139 + To indicate when userspace has modified values that should be copied into 4140 + the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set. 4141 + This is done using the same bitflags as for the 'kvm_valid_regs' field. 4142 + If the dirty bit is not set, then the register set values will not be copied 4143 + into the vCPU even if they've been modified. 4144 + 4145 + Unused bitfields in the bitarrays must be set to zero. 4146 + 4147 + struct kvm_sync_regs { 4148 + struct kvm_regs regs; 4149 + struct kvm_sregs sregs; 4150 + struct kvm_vcpu_events events; 4151 + }; 4152 + 4113 4153 7. Capabilities that can be enabled on VMs 4114 4154 ------------------------------------------ 4115 4155
+18 -1
arch/x86/include/uapi/asm/kvm.h
··· 354 354 __u64 padding[16]; 355 355 }; 356 356 357 - /* definition of registers in kvm_run */ 357 + #define KVM_SYNC_X86_REGS (1UL << 0) 358 + #define KVM_SYNC_X86_SREGS (1UL << 1) 359 + #define KVM_SYNC_X86_EVENTS (1UL << 2) 360 + 361 + #define KVM_SYNC_X86_VALID_FIELDS \ 362 + (KVM_SYNC_X86_REGS| \ 363 + KVM_SYNC_X86_SREGS| \ 364 + KVM_SYNC_X86_EVENTS) 365 + 366 + /* kvm_sync_regs struct included by kvm_run struct */ 358 367 struct kvm_sync_regs { 368 + /* Members of this structure are potentially malicious. 369 + * Care must be taken by code reading, esp. interpreting, 370 + * data fields from them inside KVM to prevent TOCTOU and 371 + * double-fetch types of vulnerabilities. 372 + */ 373 + struct kvm_regs regs; 374 + struct kvm_sregs sregs; 375 + struct kvm_vcpu_events events; 359 376 }; 360 377 361 378 #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+87 -15
arch/x86/kvm/x86.c
··· 102 102 static void process_nmi(struct kvm_vcpu *vcpu); 103 103 static void enter_smm(struct kvm_vcpu *vcpu); 104 104 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); 105 + static void store_regs(struct kvm_vcpu *vcpu); 106 + static int sync_regs(struct kvm_vcpu *vcpu); 105 107 106 108 struct kvm_x86_ops *kvm_x86_ops __read_mostly; 107 109 EXPORT_SYMBOL_GPL(kvm_x86_ops); ··· 2830 2828 case KVM_CAP_IMMEDIATE_EXIT: 2831 2829 case KVM_CAP_GET_MSR_FEATURES: 2832 2830 r = 1; 2831 + break; 2832 + case KVM_CAP_SYNC_REGS: 2833 + r = KVM_SYNC_X86_VALID_FIELDS; 2833 2834 break; 2834 2835 case KVM_CAP_ADJUST_CLOCK: 2835 2836 r = KVM_CLOCK_TSC_STABLE; ··· 7515 7510 return 0; 7516 7511 } 7517 7512 7518 - 7519 7513 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 7520 7514 { 7521 7515 int r; ··· 7538 7534 ++vcpu->stat.signal_exits; 7539 7535 } 7540 7536 goto out; 7537 + } 7538 + 7539 + if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { 7540 + r = -EINVAL; 7541 + goto out; 7542 + } 7543 + 7544 + if (vcpu->run->kvm_dirty_regs) { 7545 + r = sync_regs(vcpu); 7546 + if (r != 0) 7547 + goto out; 7541 7548 } 7542 7549 7543 7550 /* re-sync apic's tpr */ ··· 7575 7560 7576 7561 out: 7577 7562 kvm_put_guest_fpu(vcpu); 7563 + if (vcpu->run->kvm_valid_regs) 7564 + store_regs(vcpu); 7578 7565 post_kvm_run_save(vcpu); 7579 7566 kvm_sigset_deactivate(vcpu); 7580 7567 ··· 7584 7567 return r; 7585 7568 } 7586 7569 7587 - int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7570 + static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7588 7571 { 7589 - vcpu_load(vcpu); 7590 - 7591 7572 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { 7592 7573 /* 7593 7574 * We are here if userspace calls get_regs() in the middle of ··· 7618 7603 7619 7604 regs->rip = kvm_rip_read(vcpu); 7620 7605 regs->rflags = kvm_get_rflags(vcpu); 7606 + } 7621 7607 7608 + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7609 + { 7610 + vcpu_load(vcpu); 7611 + __get_regs(vcpu, regs); 7622 7612 vcpu_put(vcpu); 7623 7613 return 0; 7624 7614 } 7625 7615 7626 - int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7616 + static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7627 7617 { 7628 - vcpu_load(vcpu); 7629 - 7630 7618 vcpu->arch.emulate_regs_need_sync_from_vcpu = true; 7631 7619 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 7632 7620 ··· 7658 7640 vcpu->arch.exception.pending = false; 7659 7641 7660 7642 kvm_make_request(KVM_REQ_EVENT, vcpu); 7643 + } 7661 7644 7645 + int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 7646 + { 7647 + vcpu_load(vcpu); 7648 + __set_regs(vcpu, regs); 7662 7649 vcpu_put(vcpu); 7663 7650 return 0; 7664 7651 } ··· 7678 7655 } 7679 7656 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); 7680 7657 7681 - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 7682 - struct kvm_sregs *sregs) 7658 + static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 7683 7659 { 7684 7660 struct desc_ptr dt; 7685 - 7686 - vcpu_load(vcpu); 7687 7661 7688 7662 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 7689 7663 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); ··· 7712 7692 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) 7713 7693 set_bit(vcpu->arch.interrupt.nr, 7714 7694 (unsigned long *)sregs->interrupt_bitmap); 7695 + } 7715 7696 7697 + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 7698 + struct kvm_sregs *sregs) 7699 + { 7700 + vcpu_load(vcpu); 7701 + __get_sregs(vcpu, sregs); 7716 7702 vcpu_put(vcpu); 7717 7703 return 0; 7718 7704 } ··· 7813 7787 return 0; 7814 7788 } 7815 7789 7816 - int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7817 - struct kvm_sregs *sregs) 7790 + static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 7818 7791 { 7819 7792 struct msr_data apic_base_msr; 7820 7793 int mmu_reset_needed = 0; 7821 7794 int pending_vec, max_bits, idx; 7822 7795 struct desc_ptr dt; 7823 7796 int ret = -EINVAL; 7824 - 7825 - vcpu_load(vcpu); 7826 7797 7827 7798 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && 7828 7799 (sregs->cr4 & X86_CR4_OSXSAVE)) ··· 7899 7876 7900 7877 ret = 0; 7901 7878 out: 7879 + return ret; 7880 + } 7881 + 7882 + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 7883 + struct kvm_sregs *sregs) 7884 + { 7885 + int ret; 7886 + 7887 + vcpu_load(vcpu); 7888 + ret = __set_sregs(vcpu, sregs); 7902 7889 vcpu_put(vcpu); 7903 7890 return ret; 7904 7891 } ··· 8032 7999 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 8033 8000 8034 8001 vcpu_put(vcpu); 8002 + return 0; 8003 + } 8004 + 8005 + static void store_regs(struct kvm_vcpu *vcpu) 8006 + { 8007 + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); 8008 + 8009 + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) 8010 + __get_regs(vcpu, &vcpu->run->s.regs.regs); 8011 + 8012 + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) 8013 + __get_sregs(vcpu, &vcpu->run->s.regs.sregs); 8014 + 8015 + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) 8016 + kvm_vcpu_ioctl_x86_get_vcpu_events( 8017 + vcpu, &vcpu->run->s.regs.events); 8018 + } 8019 + 8020 + static int sync_regs(struct kvm_vcpu *vcpu) 8021 + { 8022 + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) 8023 + return -EINVAL; 8024 + 8025 + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { 8026 + __set_regs(vcpu, &vcpu->run->s.regs.regs); 8027 + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; 8028 + } 8029 + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { 8030 + if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) 8031 + return -EINVAL; 8032 + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; 8033 + } 8034 + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { 8035 + if (kvm_vcpu_ioctl_x86_set_vcpu_events( 8036 + vcpu, &vcpu->run->s.regs.events)) 8037 + return -EINVAL; 8038 + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; 8039 + } 8040 + 8035 8041 return 0; 8036 8042 } 8037 8043