Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:

- Doc fixes

- selftests fixes

- Add runstate information to the new Xen support

- Allow compiling out the Xen interface

- 32-bit PAE without EPT bugfix

- NULL pointer dereference bugfix

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: SVM: Clear the CR4 register on reset
KVM: x86/xen: Add support for vCPU runstate information
KVM: x86/xen: Fix return code when clearing vcpu_info and vcpu_time_info
selftests: kvm: Mmap the entire vcpu mmap area
KVM: Documentation: Fix index for KVM_CAP_PPC_DAWR1
KVM: x86: allow compiling out the Xen hypercall interface
KVM: xen: flush deferred static key before checking it
KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled
KVM: x86: hyper-v: Fix Hyper-V context null-ptr-deref
KVM: x86: remove misplaced comment on active_mmu_pages
KVM: Documentation: rectify rst markup in kvm_run->flags
Documentation: kvm: fix messy conversion from .txt to .rst

Linus Torvalds 5 years ago cee407c5 c5a58f87

+633 -76

13 changed files

expand all collapse all

Documentation

virt

kvm

api.rst

arch

x86

include

asm

kvm_host.h

kvm

Kconfig

Makefile

hyperv.c

mmu

mmu_internal.h

svm

svm.c

x86.c

xen.c

xen.h

include

uapi

linux

kvm.h

tools

testing

selftests

kvm

lib

kvm_util.c

x86_64

xen_shinfo_test.c

+60 -55

Documentation/virt/kvm/api.rst

reviewed

··· 3856 3856 -EFAULT if struct kvm_reinject_control cannot be read, 3857 3857 -EINVAL if the supplied shift or flags are invalid, 3858 3858 -ENOMEM if unable to allocate the new HPT, 3859 3859 - -ENOSPC if there was a hash collision 3860 3860 - 3861 3861 - :: 3862 3862 - 3863 3863 - struct kvm_ppc_rmmu_info { 3864 3864 - struct kvm_ppc_radix_geom { 3865 3865 - __u8 page_shift; 3866 3866 - __u8 level_bits[4]; 3867 3867 - __u8 pad[3]; 3868 3868 - } geometries[8]; 3869 3869 - __u32 ap_encodings[8]; 3870 3870 - }; 3871 3871 - 3872 3872 - The geometries[] field gives up to 8 supported geometries for the 3873 3873 - radix page table, in terms of the log base 2 of the smallest page 3874 3874 - size, and the number of bits indexed at each level of the tree, from 3875 3875 - the PTE level up to the PGD level in that order. Any unused entries 3876 3876 - will have 0 in the page_shift field. 3877 3877 - 3878 3878 - The ap_encodings gives the supported page sizes and their AP field 3879 3879 - encodings, encoded with the AP value in the top 3 bits and the log 3880 3880 - base 2 of the page size in the bottom 6 bits. 3881 3881 - 3882 3882 - 4.102 KVM_PPC_RESIZE_HPT_PREPARE 3883 3883 - -------------------------------- 3884 3884 - 3885 3885 - :Capability: KVM_CAP_SPAPR_RESIZE_HPT 3886 3886 - :Architectures: powerpc 3887 3887 - :Type: vm ioctl 3888 3888 - :Parameters: struct kvm_ppc_resize_hpt (in) 3889 3889 - :Returns: 0 on successful completion, 3890 3890 - >0 if a new HPT is being prepared, the value is an estimated 3891 3891 - number of milliseconds until preparation is complete, 3892 3892 - -EFAULT if struct kvm_reinject_control cannot be read, 3893 3893 - -EINVAL if the supplied shift or flags are invalid,when moving existing 3894 3894 - HPT entries to the new HPT, 3895 3895 - -EIO on other error conditions 3896 3859 3897 3860 Used to implement the PAPR extension for runtime resizing of a guest's 3898 3861 Hashed Page Table (HPT). Specifically this starts, stops or monitors 3899 3862 the preparation of a new potential HPT for the guest, essentially 3900 3863 implementing the H_RESIZE_HPT_PREPARE hypercall. 3864 3864 + 3865 3865 + :: 3866 3866 + 3867 3867 + struct kvm_ppc_resize_hpt { 3868 3868 + __u64 flags; 3869 3869 + __u32 shift; 3870 3870 + __u32 pad; 3871 3871 + }; 3901 3872 3902 3873 If called with shift > 0 when there is no pending HPT for the guest, 3903 3874 this begins preparation of a new pending HPT of size 2^(shift) bytes. ··· 3897 3926 it returns <= 0. The first call will initiate preparation, subsequent 3898 3927 ones will monitor preparation until it completes or fails. 3899 3928 3900 3900 - :: 3901 3901 - 3902 3902 - struct kvm_ppc_resize_hpt { 3903 3903 - __u64 flags; 3904 3904 - __u32 shift; 3905 3905 - __u32 pad; 3906 3906 - }; 3907 3907 - 3908 3929 4.103 KVM_PPC_RESIZE_HPT_COMMIT 3909 3930 ------------------------------- 3910 3931 ··· 3919 3956 transferred to working with the new HPT, essentially implementing the 3920 3957 H_RESIZE_HPT_COMMIT hypercall. 3921 3958 3959 3959 + :: 3960 3960 + 3961 3961 + struct kvm_ppc_resize_hpt { 3962 3962 + __u64 flags; 3963 3963 + __u32 shift; 3964 3964 + __u32 pad; 3965 3965 + }; 3966 3966 + 3922 3967 This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has 3923 3968 returned 0 with the same parameters. In other cases 3924 3969 KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or ··· 3941 3970 HPT and the previous HPT will be discarded. 3942 3971 3943 3972 On failure, the guest will still be operating on its previous HPT. 3944 3944 - 3945 3945 - :: 3946 3946 - 3947 3947 - struct kvm_ppc_resize_hpt { 3948 3948 - __u64 flags; 3949 3949 - __u32 shift; 3950 3950 - __u32 pad; 3951 3951 - }; 3952 3973 3953 3974 4.104 KVM_X86_GET_MCE_CAP_SUPPORTED 3954 3975 ----------------------------------- ··· 4878 4915 union { 4879 4916 __u64 gpa; 4880 4917 __u64 pad[4]; 4918 4918 + struct { 4919 4919 + __u64 state; 4920 4920 + __u64 state_entry_time; 4921 4921 + __u64 time_running; 4922 4922 + __u64 time_runnable; 4923 4923 + __u64 time_blocked; 4924 4924 + __u64 time_offline; 4925 4925 + } runstate; 4881 4926 } u; 4882 4927 }; 4883 4928 ··· 4898 4927 Sets the guest physical address of an additional pvclock structure 4899 4928 for a given vCPU. This is typically used for guest vsyscall support. 4900 4929 4930 4930 + KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 4931 4931 + Sets the guest physical address of the vcpu_runstate_info for a given 4932 4932 + vCPU. This is how a Xen guest tracks CPU state such as steal time. 4933 4933 + 4934 4934 + KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 4935 4935 + Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of 4936 4936 + the given vCPU from the .u.runstate.state member of the structure. 4937 4937 + KVM automatically accounts running and runnable time but blocked 4938 4938 + and offline states are only entered explicitly. 4939 4939 + 4940 4940 + KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 4941 4941 + Sets all fields of the vCPU runstate data from the .u.runstate member 4942 4942 + of the structure, including the current runstate. The state_entry_time 4943 4943 + must equal the sum of the other four times. 4944 4944 + 4945 4945 + KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 4946 4946 + This *adds* the contents of the .u.runstate members of the structure 4947 4947 + to the corresponding members of the given vCPU's runstate data, thus 4948 4948 + permitting atomic adjustments to the runstate times. The adjustment 4949 4949 + to the state_entry_time must equal the sum of the adjustments to the 4950 4950 + other four times. The state field must be set to -1, or to a valid 4951 4951 + runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked 4952 4952 + or RUNSTATE_offline) to set the current accounted state as of the 4953 4953 + adjusted state_entry_time. 4954 4954 + 4901 4955 4.130 KVM_XEN_VCPU_GET_ATTR 4902 4956 --------------------------- 4903 4957 ··· 4934 4938 4935 4939 Allows Xen vCPU attributes to be read. For the structure and types, 4936 4940 see KVM_XEN_VCPU_SET_ATTR above. 4941 4941 + 4942 4942 + The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used 4943 4943 + with the KVM_XEN_VCPU_GET_ATTR ioctl. 4937 4944 4938 4945 5. The kvm_run structure 4939 4946 ======================== ··· 4999 5000 __u16 flags; 5000 5001 5001 5002 More architecture-specific flags detailing state of the VCPU that may 5002 5002 - affect the device's behavior. Current defined flags: 5003 5003 + affect the device's behavior. Current defined flags:: 5004 5004 + 5003 5005 /* x86, set if the VCPU is in system management mode */ 5004 5006 #define KVM_RUN_X86_SMM (1 << 0) 5005 5007 /* x86, set if bus lock detected in VM */ ··· 6217 6217 notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. 6218 6218 KVM_RUN_BUS_LOCK flag is used to distinguish between them. 6219 6219 6220 6220 - 7.22 KVM_CAP_PPC_DAWR1 6220 6220 + 7.23 KVM_CAP_PPC_DAWR1 6221 6221 ---------------------- 6222 6222 6223 6223 :Architectures: ppc ··· 6702 6702 #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) 6703 6703 #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) 6704 6704 #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) 6705 6705 + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2) 6705 6706 6706 6707 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG 6707 6708 ioctl is available, for the guest to set its hypercall page. ··· 6717 6716 KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors 6718 6717 for event channel upcalls when the evtchn_upcall_pending field of a vcpu's 6719 6718 vcpu_info is set. 6719 6719 + 6720 6720 + The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related 6721 6721 + features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are 6722 6722 + supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.

+6 -3

arch/x86/include/asm/kvm_host.h

reviewed

··· 535 535 /* Xen HVM per vcpu emulation context */ 536 536 struct kvm_vcpu_xen { 537 537 u64 hypercall_rip; 538 538 + u32 current_runstate; 538 539 bool vcpu_info_set; 539 540 bool vcpu_time_info_set; 541 541 + bool runstate_set; 540 542 struct gfn_to_hva_cache vcpu_info_cache; 541 543 struct gfn_to_hva_cache vcpu_time_info_cache; 544 544 + struct gfn_to_hva_cache runstate_cache; 545 545 + u64 last_steal; 546 546 + u64 runstate_entry_time; 547 547 + u64 runstate_times[4]; 542 548 }; 543 549 544 550 struct kvm_vcpu_arch { ··· 945 939 unsigned int indirect_shadow_pages; 946 940 u8 mmu_valid_gen; 947 941 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 948 948 - /* 949 949 - * Hash table of struct kvm_mmu_page. 950 950 - */ 951 942 struct list_head active_mmu_pages; 952 943 struct list_head zapped_obsolete_pages; 953 944 struct list_head lpage_disallowed_mmu_pages;

arch/x86/kvm/Kconfig

reviewed

··· 103 103 Provides support for launching Encrypted VMs (SEV) and Encrypted VMs 104 104 with Encrypted State (SEV-ES) on AMD processors. 105 105 106 106 + config KVM_XEN 107 107 + bool "Support for Xen hypercall interface" 108 108 + depends on KVM 109 109 + help 110 110 + Provides KVM support for the hosting Xen HVM guests and 111 111 + passing Xen hypercalls to userspace. 112 112 + 113 113 + If in doubt, say "N". 114 114 + 106 115 config KVM_MMU_AUDIT 107 116 bool "Audit KVM MMU" 108 117 depends on KVM && TRACEPOINTS

+2 -1

arch/x86/kvm/Makefile

reviewed

··· 14 14 $(KVM)/dirty_ring.o 15 15 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 16 16 17 17 - kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \ 17 17 + kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ 18 18 i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ 19 19 hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \ 20 20 mmu/spte.o 21 21 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o 22 22 + kvm-$(CONFIG_KVM_XEN) += xen.o 22 23 23 24 kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ 24 25 vmx/evmcs.o vmx/nested.o vmx/posted_intr.o

+1 -1

arch/x86/kvm/hyperv.c

reviewed

··· 159 159 struct kvm_vcpu_hv_synic *synic; 160 160 161 161 vcpu = get_vcpu_by_vpidx(kvm, vpidx); 162 162 - if (!vcpu) 162 162 + if (!vcpu || !to_hv_vcpu(vcpu)) 163 163 return NULL; 164 164 synic = to_hv_synic(vcpu); 165 165 return (synic->active) ? synic : NULL;

+8 -8

arch/x86/kvm/mmu/mmu_internal.h

reviewed

··· 81 81 static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) 82 82 { 83 83 /* 84 84 - * When using the EPT page-modification log, the GPAs in the log 85 85 - * would come from L2 rather than L1. Therefore, we need to rely 86 86 - * on write protection to record dirty pages. This also bypasses 87 87 - * PML, since writes now result in a vmexit. Note, this helper will 88 88 - * tag SPTEs as needing write-protection even if PML is disabled or 89 89 - * unsupported, but that's ok because the tag is consumed if and only 90 90 - * if PML is enabled. Omit the PML check to save a few uops. 84 84 + * When using the EPT page-modification log, the GPAs in the CPU dirty 85 85 + * log would come from L2 rather than L1. Therefore, we need to rely 86 86 + * on write protection to record dirty pages, which bypasses PML, since 87 87 + * writes now result in a vmexit. Note, the check on CPU dirty logging 88 88 + * being enabled is mandatory as the bits used to denote WP-only SPTEs 89 89 + * are reserved for NPT w/ PAE (32-bit KVM). 91 90 */ 92 92 - return vcpu->arch.mmu == &vcpu->arch.guest_mmu; 91 91 + return vcpu->arch.mmu == &vcpu->arch.guest_mmu && 92 92 + kvm_x86_ops.cpu_dirty_log_size; 93 93 } 94 94 95 95 bool is_nx_huge_page_enabled(void);

arch/x86/kvm/svm/svm.c

reviewed

··· 1200 1200 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 1201 1201 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 1202 1202 1203 1203 + svm_set_cr4(&svm->vcpu, 0); 1203 1204 svm_set_efer(&svm->vcpu, 0); 1204 1205 save->dr6 = 0xffff0ff0; 1205 1206 kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);

+21 -1

arch/x86/kvm/x86.c

reviewed

··· 2957 2957 struct kvm_host_map map; 2958 2958 struct kvm_steal_time *st; 2959 2959 2960 2960 + if (kvm_xen_msr_enabled(vcpu->kvm)) { 2961 2961 + kvm_xen_runstate_set_running(vcpu); 2962 2962 + return; 2963 2963 + } 2964 2964 + 2960 2965 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) 2961 2966 return; 2962 2967 ··· 3761 3756 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: 3762 3757 r = 1; 3763 3758 break; 3759 3759 + #ifdef CONFIG_KVM_XEN 3764 3760 case KVM_CAP_XEN_HVM: 3765 3761 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | 3766 3762 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | 3767 3763 KVM_XEN_HVM_CONFIG_SHARED_INFO; 3764 3764 + if (sched_info_on()) 3765 3765 + r |= KVM_XEN_HVM_CONFIG_RUNSTATE; 3768 3766 break; 3767 3767 + #endif 3769 3768 case KVM_CAP_SYNC_REGS: 3770 3769 r = KVM_SYNC_X86_VALID_FIELDS; 3771 3770 break; ··· 4047 4038 if (vcpu->preempted && !vcpu->arch.guest_state_protected) 4048 4039 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); 4049 4040 4050 4050 - kvm_steal_time_set_preempted(vcpu); 4041 4041 + if (kvm_xen_msr_enabled(vcpu->kvm)) 4042 4042 + kvm_xen_runstate_set_preempted(vcpu); 4043 4043 + else 4044 4044 + kvm_steal_time_set_preempted(vcpu); 4045 4045 + 4051 4046 static_call(kvm_x86_vcpu_put)(vcpu); 4052 4047 vcpu->arch.last_host_tsc = rdtsc(); 4053 4048 /* ··· 5026 5013 case KVM_GET_SUPPORTED_HV_CPUID: 5027 5014 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); 5028 5015 break; 5016 5016 + #ifdef CONFIG_KVM_XEN 5029 5017 case KVM_XEN_VCPU_GET_ATTR: { 5030 5018 struct kvm_xen_vcpu_attr xva; 5031 5019 ··· 5047 5033 r = kvm_xen_vcpu_set_attr(vcpu, &xva); 5048 5034 break; 5049 5035 } 5036 5036 + #endif 5050 5037 default: 5051 5038 r = -EINVAL; 5052 5039 } ··· 5669 5654 kvm->arch.bsp_vcpu_id = arg; 5670 5655 mutex_unlock(&kvm->lock); 5671 5656 break; 5657 5657 + #ifdef CONFIG_KVM_XEN 5672 5658 case KVM_XEN_HVM_CONFIG: { 5673 5659 struct kvm_xen_hvm_config xhc; 5674 5660 r = -EFAULT; ··· 5698 5682 r = kvm_xen_hvm_set_attr(kvm, &xha); 5699 5683 break; 5700 5684 } 5685 5685 + #endif 5701 5686 case KVM_SET_CLOCK: { 5702 5687 struct kvm_clock_data user_ns; 5703 5688 u64 now_ns; ··· 8057 8040 kvm_mmu_module_exit(); 8058 8041 free_percpu(user_return_msrs); 8059 8042 kmem_cache_destroy(x86_fpu_cache); 8043 8043 + #ifdef CONFIG_KVM_XEN 8044 8044 + static_key_deferred_flush(&kvm_xen_enabled); 8060 8045 WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); 8046 8046 + #endif 8061 8047 } 8062 8048 8063 8049 static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)

+290

arch/x86/kvm/xen.c

reviewed

··· 11 11 #include "hyperv.h" 12 12 13 13 #include <linux/kvm_host.h> 14 14 + #include <linux/sched/stat.h> 14 15 15 16 #include <trace/events/kvm.h> 16 17 #include <xen/interface/xen.h> 18 18 + #include <xen/interface/vcpu.h> 17 19 18 20 #include "trace.h" 19 21 ··· 61 59 out: 62 60 srcu_read_unlock(&kvm->srcu, idx); 63 61 return ret; 62 62 + } 63 63 + 64 64 + static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) 65 65 + { 66 66 + struct kvm_vcpu_xen *vx = &v->arch.xen; 67 67 + u64 now = get_kvmclock_ns(v->kvm); 68 68 + u64 delta_ns = now - vx->runstate_entry_time; 69 69 + u64 run_delay = current->sched_info.run_delay; 70 70 + 71 71 + if (unlikely(!vx->runstate_entry_time)) 72 72 + vx->current_runstate = RUNSTATE_offline; 73 73 + 74 74 + /* 75 75 + * Time waiting for the scheduler isn't "stolen" if the 76 76 + * vCPU wasn't running anyway. 77 77 + */ 78 78 + if (vx->current_runstate == RUNSTATE_running) { 79 79 + u64 steal_ns = run_delay - vx->last_steal; 80 80 + 81 81 + delta_ns -= steal_ns; 82 82 + 83 83 + vx->runstate_times[RUNSTATE_runnable] += steal_ns; 84 84 + } 85 85 + vx->last_steal = run_delay; 86 86 + 87 87 + vx->runstate_times[vx->current_runstate] += delta_ns; 88 88 + vx->current_runstate = state; 89 89 + vx->runstate_entry_time = now; 90 90 + } 91 91 + 92 92 + void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) 93 93 + { 94 94 + struct kvm_vcpu_xen *vx = &v->arch.xen; 95 95 + uint64_t state_entry_time; 96 96 + unsigned int offset; 97 97 + 98 98 + kvm_xen_update_runstate(v, state); 99 99 + 100 100 + if (!vx->runstate_set) 101 101 + return; 102 102 + 103 103 + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); 104 104 + 105 105 + offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); 106 106 + #ifdef CONFIG_X86_64 107 107 + /* 108 108 + * The only difference is alignment of uint64_t in 32-bit. 109 109 + * So the first field 'state' is accessed directly using 110 110 + * offsetof() (where its offset happens to be zero), while the 111 111 + * remaining fields which are all uint64_t, start at 'offset' 112 112 + * which we tweak here by adding 4. 113 113 + */ 114 114 + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 115 115 + offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); 116 116 + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != 117 117 + offsetof(struct compat_vcpu_runstate_info, time) + 4); 118 118 + 119 119 + if (v->kvm->arch.xen.long_mode) 120 120 + offset = offsetof(struct vcpu_runstate_info, state_entry_time); 121 121 + #endif 122 122 + /* 123 123 + * First write the updated state_entry_time at the appropriate 124 124 + * location determined by 'offset'. 125 125 + */ 126 126 + state_entry_time = vx->runstate_entry_time; 127 127 + state_entry_time |= XEN_RUNSTATE_UPDATE; 128 128 + 129 129 + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != 130 130 + sizeof(state_entry_time)); 131 131 + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != 132 132 + sizeof(state_entry_time)); 133 133 + 134 134 + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 135 135 + &state_entry_time, offset, 136 136 + sizeof(state_entry_time))) 137 137 + return; 138 138 + smp_wmb(); 139 139 + 140 140 + /* 141 141 + * Next, write the new runstate. This is in the *same* place 142 142 + * for 32-bit and 64-bit guests, asserted here for paranoia. 143 143 + */ 144 144 + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 145 145 + offsetof(struct compat_vcpu_runstate_info, state)); 146 146 + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != 147 147 + sizeof(vx->current_runstate)); 148 148 + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != 149 149 + sizeof(vx->current_runstate)); 150 150 + 151 151 + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 152 152 + &vx->current_runstate, 153 153 + offsetof(struct vcpu_runstate_info, state), 154 154 + sizeof(vx->current_runstate))) 155 155 + return; 156 156 + 157 157 + /* 158 158 + * Write the actual runstate times immediately after the 159 159 + * runstate_entry_time. 160 160 + */ 161 161 + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 162 162 + offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); 163 163 + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != 164 164 + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); 165 165 + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 166 166 + sizeof(((struct compat_vcpu_runstate_info *)0)->time)); 167 167 + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 168 168 + sizeof(vx->runstate_times)); 169 169 + 170 170 + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 171 171 + &vx->runstate_times[0], 172 172 + offset + sizeof(u64), 173 173 + sizeof(vx->runstate_times))) 174 174 + return; 175 175 + 176 176 + smp_wmb(); 177 177 + 178 178 + /* 179 179 + * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's 180 180 + * runstate_entry_time field. 181 181 + */ 182 182 + 183 183 + state_entry_time &= ~XEN_RUNSTATE_UPDATE; 184 184 + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 185 185 + &state_entry_time, offset, 186 186 + sizeof(state_entry_time))) 187 187 + return; 64 188 } 65 189 66 190 int __kvm_xen_has_interrupt(struct kvm_vcpu *v) ··· 315 187 /* No compat necessary here. */ 316 188 BUILD_BUG_ON(sizeof(struct vcpu_info) != 317 189 sizeof(struct compat_vcpu_info)); 190 190 + BUILD_BUG_ON(offsetof(struct vcpu_info, time) != 191 191 + offsetof(struct compat_vcpu_info, time)); 318 192 319 193 if (data->u.gpa == GPA_INVALID) { 320 194 vcpu->arch.xen.vcpu_info_set = false; 195 195 + r = 0; 321 196 break; 322 197 } 323 198 ··· 337 206 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 338 207 if (data->u.gpa == GPA_INVALID) { 339 208 vcpu->arch.xen.vcpu_time_info_set = false; 209 209 + r = 0; 340 210 break; 341 211 } 342 212 ··· 349 217 vcpu->arch.xen.vcpu_time_info_set = true; 350 218 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 351 219 } 220 220 + break; 221 221 + 222 222 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 223 223 + if (!sched_info_on()) { 224 224 + r = -EOPNOTSUPP; 225 225 + break; 226 226 + } 227 227 + if (data->u.gpa == GPA_INVALID) { 228 228 + vcpu->arch.xen.runstate_set = false; 229 229 + r = 0; 230 230 + break; 231 231 + } 232 232 + 233 233 + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 234 234 + &vcpu->arch.xen.runstate_cache, 235 235 + data->u.gpa, 236 236 + sizeof(struct vcpu_runstate_info)); 237 237 + if (!r) { 238 238 + vcpu->arch.xen.runstate_set = true; 239 239 + } 240 240 + break; 241 241 + 242 242 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 243 243 + if (!sched_info_on()) { 244 244 + r = -EOPNOTSUPP; 245 245 + break; 246 246 + } 247 247 + if (data->u.runstate.state > RUNSTATE_offline) { 248 248 + r = -EINVAL; 249 249 + break; 250 250 + } 251 251 + 252 252 + kvm_xen_update_runstate(vcpu, data->u.runstate.state); 253 253 + r = 0; 254 254 + break; 255 255 + 256 256 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 257 257 + if (!sched_info_on()) { 258 258 + r = -EOPNOTSUPP; 259 259 + break; 260 260 + } 261 261 + if (data->u.runstate.state > RUNSTATE_offline) { 262 262 + r = -EINVAL; 263 263 + break; 264 264 + } 265 265 + if (data->u.runstate.state_entry_time != 266 266 + (data->u.runstate.time_running + 267 267 + data->u.runstate.time_runnable + 268 268 + data->u.runstate.time_blocked + 269 269 + data->u.runstate.time_offline)) { 270 270 + r = -EINVAL; 271 271 + break; 272 272 + } 273 273 + if (get_kvmclock_ns(vcpu->kvm) < 274 274 + data->u.runstate.state_entry_time) { 275 275 + r = -EINVAL; 276 276 + break; 277 277 + } 278 278 + 279 279 + vcpu->arch.xen.current_runstate = data->u.runstate.state; 280 280 + vcpu->arch.xen.runstate_entry_time = 281 281 + data->u.runstate.state_entry_time; 282 282 + vcpu->arch.xen.runstate_times[RUNSTATE_running] = 283 283 + data->u.runstate.time_running; 284 284 + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = 285 285 + data->u.runstate.time_runnable; 286 286 + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = 287 287 + data->u.runstate.time_blocked; 288 288 + vcpu->arch.xen.runstate_times[RUNSTATE_offline] = 289 289 + data->u.runstate.time_offline; 290 290 + vcpu->arch.xen.last_steal = current->sched_info.run_delay; 291 291 + r = 0; 292 292 + break; 293 293 + 294 294 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 295 295 + if (!sched_info_on()) { 296 296 + r = -EOPNOTSUPP; 297 297 + break; 298 298 + } 299 299 + if (data->u.runstate.state > RUNSTATE_offline && 300 300 + data->u.runstate.state != (u64)-1) { 301 301 + r = -EINVAL; 302 302 + break; 303 303 + } 304 304 + /* The adjustment must add up */ 305 305 + if (data->u.runstate.state_entry_time != 306 306 + (data->u.runstate.time_running + 307 307 + data->u.runstate.time_runnable + 308 308 + data->u.runstate.time_blocked + 309 309 + data->u.runstate.time_offline)) { 310 310 + r = -EINVAL; 311 311 + break; 312 312 + } 313 313 + 314 314 + if (get_kvmclock_ns(vcpu->kvm) < 315 315 + (vcpu->arch.xen.runstate_entry_time + 316 316 + data->u.runstate.state_entry_time)) { 317 317 + r = -EINVAL; 318 318 + break; 319 319 + } 320 320 + 321 321 + vcpu->arch.xen.runstate_entry_time += 322 322 + data->u.runstate.state_entry_time; 323 323 + vcpu->arch.xen.runstate_times[RUNSTATE_running] += 324 324 + data->u.runstate.time_running; 325 325 + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += 326 326 + data->u.runstate.time_runnable; 327 327 + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += 328 328 + data->u.runstate.time_blocked; 329 329 + vcpu->arch.xen.runstate_times[RUNSTATE_offline] += 330 330 + data->u.runstate.time_offline; 331 331 + 332 332 + if (data->u.runstate.state <= RUNSTATE_offline) 333 333 + kvm_xen_update_runstate(vcpu, data->u.runstate.state); 334 334 + r = 0; 352 335 break; 353 336 354 337 default: ··· 496 249 else 497 250 data->u.gpa = GPA_INVALID; 498 251 r = 0; 252 252 + break; 253 253 + 254 254 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 255 255 + if (!sched_info_on()) { 256 256 + r = -EOPNOTSUPP; 257 257 + break; 258 258 + } 259 259 + if (vcpu->arch.xen.runstate_set) { 260 260 + data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; 261 261 + r = 0; 262 262 + } 263 263 + break; 264 264 + 265 265 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 266 266 + if (!sched_info_on()) { 267 267 + r = -EOPNOTSUPP; 268 268 + break; 269 269 + } 270 270 + data->u.runstate.state = vcpu->arch.xen.current_runstate; 271 271 + r = 0; 272 272 + break; 273 273 + 274 274 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 275 275 + if (!sched_info_on()) { 276 276 + r = -EOPNOTSUPP; 277 277 + break; 278 278 + } 279 279 + data->u.runstate.state = vcpu->arch.xen.current_runstate; 280 280 + data->u.runstate.state_entry_time = 281 281 + vcpu->arch.xen.runstate_entry_time; 282 282 + data->u.runstate.time_running = 283 283 + vcpu->arch.xen.runstate_times[RUNSTATE_running]; 284 284 + data->u.runstate.time_runnable = 285 285 + vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; 286 286 + data->u.runstate.time_blocked = 287 287 + vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; 288 288 + data->u.runstate.time_offline = 289 289 + vcpu->arch.xen.runstate_times[RUNSTATE_offline]; 290 290 + r = 0; 291 291 + break; 292 292 + 293 293 + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 294 294 + r = -EINVAL; 499 295 break; 500 296 501 297 default:

+62 -2

arch/x86/kvm/xen.h

reviewed

··· 9 9 #ifndef __ARCH_X86_KVM_XEN_H__ 10 10 #define __ARCH_X86_KVM_XEN_H__ 11 11 12 12 + #ifdef CONFIG_KVM_XEN 12 13 #include <linux/jump_label_ratelimit.h> 13 14 14 15 extern struct static_key_false_deferred kvm_xen_enabled; ··· 19 18 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); 20 19 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); 21 20 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); 22 22 - int kvm_xen_hypercall(struct kvm_vcpu *vcpu); 23 21 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data); 24 22 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); 25 23 void kvm_xen_destroy_vm(struct kvm *kvm); 24 24 + 25 25 + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) 26 26 + { 27 27 + return static_branch_unlikely(&kvm_xen_enabled.key) && 28 28 + kvm->arch.xen_hvm_config.msr; 29 29 + } 26 30 27 31 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) 28 32 { ··· 44 38 45 39 return 0; 46 40 } 41 41 + #else 42 42 + static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) 43 43 + { 44 44 + return 1; 45 45 + } 47 46 48 48 - /* 32-bit compatibility definitions, also used natively in 32-bit build */ 47 47 + static inline void kvm_xen_destroy_vm(struct kvm *kvm) 48 48 + { 49 49 + } 50 50 + 51 51 + static inline bool kvm_xen_msr_enabled(struct kvm *kvm) 52 52 + { 53 53 + return false; 54 54 + } 55 55 + 56 56 + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) 57 57 + { 58 58 + return false; 59 59 + } 60 60 + 61 61 + static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) 62 62 + { 63 63 + return 0; 64 64 + } 65 65 + #endif 66 66 + 67 67 + int kvm_xen_hypercall(struct kvm_vcpu *vcpu); 68 68 + 49 69 #include <asm/pvclock-abi.h> 50 70 #include <asm/xen/interface.h> 71 71 + #include <xen/interface/vcpu.h> 51 72 73 73 + void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state); 74 74 + 75 75 + static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu) 76 76 + { 77 77 + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running); 78 78 + } 79 79 + 80 80 + static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) 81 81 + { 82 82 + /* 83 83 + * If the vCPU wasn't preempted but took a normal exit for 84 84 + * some reason (hypercalls, I/O, etc.), that is accounted as 85 85 + * still RUNSTATE_running, as the VMM is still operating on 86 86 + * behalf of the vCPU. Only if the VMM does actually block 87 87 + * does it need to enter RUNSTATE_blocked. 88 88 + */ 89 89 + if (vcpu->preempted) 90 90 + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); 91 91 + } 92 92 + 93 93 + /* 32-bit compatibility definitions, also used natively in 32-bit build */ 52 94 struct compat_arch_vcpu_info { 53 95 unsigned int cr2; 54 96 unsigned int pad[5]; ··· 128 74 struct pvclock_wall_clock wc; 129 75 struct compat_arch_shared_info arch; 130 76 }; 77 77 + 78 78 + struct compat_vcpu_runstate_info { 79 79 + int state; 80 80 + uint64_t state_entry_time; 81 81 + uint64_t time[4]; 82 82 + } __attribute__((packed)); 131 83 132 84 #endif /* __ARCH_X86_KVM_XEN_H__ */

+13

include/uapi/linux/kvm.h

reviewed

··· 1154 1154 #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) 1155 1155 #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) 1156 1156 #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) 1157 1157 + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) 1157 1158 1158 1159 struct kvm_xen_hvm_config { 1159 1160 __u32 flags; ··· 1622 1621 union { 1623 1622 __u64 gpa; 1624 1623 __u64 pad[8]; 1624 1624 + struct { 1625 1625 + __u64 state; 1626 1626 + __u64 state_entry_time; 1627 1627 + __u64 time_running; 1628 1628 + __u64 time_runnable; 1629 1629 + __u64 time_blocked; 1630 1630 + __u64 time_offline; 1631 1631 + } runstate; 1625 1632 } u; 1626 1633 }; 1627 1634 1628 1635 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ 1629 1636 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 1630 1637 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 1638 1638 + #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 1639 1639 + #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 1640 1640 + #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 1641 1641 + #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 1631 1642 1632 1643 /* Secure Encrypted Virtualization command */ 1633 1644 enum sev_cmd_id {

+4 -2

tools/testing/selftests/kvm/lib/kvm_util.c

reviewed

··· 21 21 #define KVM_UTIL_PGS_PER_HUGEPG 512 22 22 #define KVM_UTIL_MIN_PFN 2 23 23 24 24 + static int vcpu_mmap_sz(void); 25 25 + 24 26 /* Aligns x up to the next multiple of size. Size must be a power of 2. */ 25 27 static void *align(void *x, size_t size) 26 28 { ··· 511 509 vcpu->dirty_gfns = NULL; 512 510 } 513 511 514 514 - ret = munmap(vcpu->state, sizeof(*vcpu->state)); 512 512 + ret = munmap(vcpu->state, vcpu_mmap_sz()); 515 513 TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " 516 514 "errno: %i", ret, errno); 517 515 close(vcpu->fd); ··· 980 978 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " 981 979 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 982 980 vcpu_mmap_sz(), sizeof(*vcpu->state)); 983 983 - vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), 981 981 + vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 984 982 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 985 983 TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " 986 984 "vcpu id: %u errno: %i", vcpuid, errno);

+156 -3

tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

reviewed

··· 13 13 14 14 #include <stdint.h> 15 15 #include <time.h> 16 16 + #include <sched.h> 17 17 + #include <sys/syscall.h> 16 18 17 19 #define VCPU_ID 5 18 20 21 21 + #define SHINFO_REGION_GVA 0xc0000000ULL 19 22 #define SHINFO_REGION_GPA 0xc0000000ULL 20 23 #define SHINFO_REGION_SLOT 10 21 24 #define PAGE_SIZE 4096 22 25 23 26 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 27 27 + #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) 28 28 + 29 29 + #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) 24 30 25 31 static struct kvm_vm *vm; 26 32 27 33 #define XEN_HYPERCALL_MSR 0x40000000 34 34 + 35 35 + #define MIN_STEAL_TIME 50000 28 36 29 37 struct pvclock_vcpu_time_info { 30 38 u32 version; ··· 51 43 u32 nsec; 52 44 } __attribute__((__packed__)); 53 45 46 46 + struct vcpu_runstate_info { 47 47 + uint32_t state; 48 48 + uint64_t state_entry_time; 49 49 + uint64_t time[4]; 50 50 + }; 51 51 + 52 52 + #define RUNSTATE_running 0 53 53 + #define RUNSTATE_runnable 1 54 54 + #define RUNSTATE_blocked 2 55 55 + #define RUNSTATE_offline 3 56 56 + 54 57 static void guest_code(void) 55 58 { 59 59 + struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 60 60 + 61 61 + /* Test having the host set runstates manually */ 62 62 + GUEST_SYNC(RUNSTATE_runnable); 63 63 + GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 64 64 + GUEST_ASSERT(rs->state == 0); 65 65 + 66 66 + GUEST_SYNC(RUNSTATE_blocked); 67 67 + GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 68 68 + GUEST_ASSERT(rs->state == 0); 69 69 + 70 70 + GUEST_SYNC(RUNSTATE_offline); 71 71 + GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 72 72 + GUEST_ASSERT(rs->state == 0); 73 73 + 74 74 + /* Test runstate time adjust */ 75 75 + GUEST_SYNC(4); 76 76 + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 77 77 + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 78 78 + 79 79 + /* Test runstate time set */ 80 80 + GUEST_SYNC(5); 81 81 + GUEST_ASSERT(rs->state_entry_time >= 0x8000); 82 82 + GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 83 83 + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 84 84 + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 85 85 + 86 86 + /* sched_yield() should result in some 'runnable' time */ 87 87 + GUEST_SYNC(6); 88 88 + GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 89 89 + 56 90 GUEST_DONE(); 91 91 + } 92 92 + 93 93 + static long get_run_delay(void) 94 94 + { 95 95 + char path[64]; 96 96 + long val[2]; 97 97 + FILE *fp; 98 98 + 99 99 + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); 100 100 + fp = fopen(path, "r"); 101 101 + fscanf(fp, "%ld %ld ", &val[0], &val[1]); 102 102 + fclose(fp); 103 103 + 104 104 + return val[1]; 57 105 } 58 106 59 107 static int cmp_timespec(struct timespec *a, struct timespec *b) ··· 130 66 { 131 67 struct timespec min_ts, max_ts, vm_ts; 132 68 133 133 - if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & 134 134 - KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { 69 69 + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 70 70 + if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { 135 71 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); 136 72 exit(KSFT_SKIP); 137 73 } 74 74 + 75 75 + bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 138 76 139 77 clock_gettime(CLOCK_REALTIME, &min_ts); 140 78 ··· 146 80 /* Map a region for the shared_info page */ 147 81 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 148 82 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); 83 83 + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0); 149 84 150 85 struct kvm_xen_hvm_config hvmc = { 151 86 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, ··· 178 111 }; 179 112 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); 180 113 114 114 + if (do_runstate_tests) { 115 115 + struct kvm_xen_vcpu_attr st = { 116 116 + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 117 117 + .u.gpa = RUNSTATE_ADDR, 118 118 + }; 119 119 + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); 120 120 + } 121 121 + 122 122 + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; 123 123 + rs->state = 0x5a; 124 124 + 181 125 for (;;) { 182 126 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); 183 127 struct ucall uc; ··· 204 126 case UCALL_ABORT: 205 127 TEST_FAIL("%s", (const char *)uc.args[0]); 206 128 /* NOT REACHED */ 207 207 - case UCALL_SYNC: 129 129 + case UCALL_SYNC: { 130 130 + struct kvm_xen_vcpu_attr rst; 131 131 + long rundelay; 132 132 + 133 133 + /* If no runstate support, bail out early */ 134 134 + if (!do_runstate_tests) 135 135 + goto done; 136 136 + 137 137 + TEST_ASSERT(rs->state_entry_time == rs->time[0] + 138 138 + rs->time[1] + rs->time[2] + rs->time[3], 139 139 + "runstate times don't add up"); 140 140 + 141 141 + switch (uc.args[1]) { 142 142 + case RUNSTATE_running...RUNSTATE_offline: 143 143 + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 144 144 + rst.u.runstate.state = uc.args[1]; 145 145 + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 146 146 + break; 147 147 + case 4: 148 148 + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 149 149 + memset(&rst.u, 0, sizeof(rst.u)); 150 150 + rst.u.runstate.state = (uint64_t)-1; 151 151 + rst.u.runstate.time_blocked = 152 152 + 0x5a - rs->time[RUNSTATE_blocked]; 153 153 + rst.u.runstate.time_offline = 154 154 + 0x6b6b - rs->time[RUNSTATE_offline]; 155 155 + rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 156 156 + rst.u.runstate.time_offline; 157 157 + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 158 158 + break; 159 159 + 160 160 + case 5: 161 161 + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 162 162 + memset(&rst.u, 0, sizeof(rst.u)); 163 163 + rst.u.runstate.state = RUNSTATE_running; 164 164 + rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 165 165 + rst.u.runstate.time_blocked = 0x6b6b; 166 166 + rst.u.runstate.time_offline = 0x5a; 167 167 + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 168 168 + break; 169 169 + case 6: 170 170 + /* Yield until scheduler delay exceeds target */ 171 171 + rundelay = get_run_delay() + MIN_STEAL_TIME; 172 172 + do { 173 173 + sched_yield(); 174 174 + } while (get_run_delay() < rundelay); 175 175 + break; 176 176 + } 208 177 break; 178 178 + } 209 179 case UCALL_DONE: 210 180 goto done; 211 181 default: ··· 288 162 TEST_ASSERT(ti2->version && !(ti2->version & 1), 289 163 "Bad time_info version %x", ti->version); 290 164 165 165 + if (do_runstate_tests) { 166 166 + /* 167 167 + * Fetch runstate and check sanity. Strictly speaking in the 168 168 + * general case we might not expect the numbers to be identical 169 169 + * but in this case we know we aren't running the vCPU any more. 170 170 + */ 171 171 + struct kvm_xen_vcpu_attr rst = { 172 172 + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 173 173 + }; 174 174 + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); 175 175 + 176 176 + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 177 177 + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 178 178 + "State entry time mismatch"); 179 179 + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 180 180 + "Running time mismatch"); 181 181 + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 182 182 + "Runnable time mismatch"); 183 183 + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 184 184 + "Blocked time mismatch"); 185 185 + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 186 186 + "Offline time mismatch"); 187 187 + 188 188 + TEST_ASSERT(rs->state_entry_time == rs->time[0] + 189 189 + rs->time[1] + rs->time[2] + rs->time[3], 190 190 + "runstate times don't add up"); 191 191 + } 291 192 kvm_vm_free(vm); 292 193 return 0; 293 194 }