Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * KVM_GET/SET_* tests
4 *
5 * Copyright (C) 2018, Red Hat, Inc.
6 *
7 * Tests for vCPU state save/restore, including nested guest state.
8 */
9#include <fcntl.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/ioctl.h>
14
15#include "test_util.h"
16
17#include "kvm_util.h"
18#include "processor.h"
19#include "vmx.h"
20#include "svm_util.h"
21
22#define L2_GUEST_STACK_SIZE 256
23
24void svm_l2_guest_code(void)
25{
26 GUEST_SYNC(4);
27 /* Exit to L1 */
28 vmcall();
29 clgi();
30 GUEST_SYNC(6);
31 stgi();
32 /* Done, exit to L1 and never come back. */
33 vmcall();
34}
35
36static void svm_l1_guest_code(struct svm_test_data *svm)
37{
38 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
39 struct vmcb *vmcb = svm->vmcb;
40
41 GUEST_ASSERT(svm->vmcb_gpa);
42 /* Prepare for L2 execution. */
43 generic_svm_setup(svm, svm_l2_guest_code,
44 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
45
46 vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK);
47
48 GUEST_SYNC(3);
49 run_guest(vmcb, svm->vmcb_gpa);
50 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
51 GUEST_SYNC(5);
52 vmcb->save.rip += 3;
53 run_guest(vmcb, svm->vmcb_gpa);
54 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
55 GUEST_SYNC(7);
56}
57
58void vmx_l2_guest_code(void)
59{
60 GUEST_SYNC(6);
61
62 /* Exit to L1 */
63 vmcall();
64
65 /* L1 has now set up a shadow VMCS for us. */
66 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
67 GUEST_SYNC(10);
68 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
69 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
70 GUEST_SYNC(11);
71 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
72 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
73 GUEST_SYNC(12);
74
75 /* Done, exit to L1 and never come back. */
76 vmcall();
77}
78
79static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
80{
81 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
82
83 GUEST_ASSERT(vmx_pages->vmcs_gpa);
84 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
85 GUEST_SYNC(3);
86 GUEST_ASSERT(load_vmcs(vmx_pages));
87 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
88
89 GUEST_SYNC(4);
90 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
91
92 prepare_vmcs(vmx_pages, vmx_l2_guest_code,
93 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
94
95 GUEST_SYNC(5);
96 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
97 GUEST_ASSERT(!vmlaunch());
98 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
99 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
100
101 /* Check that the launched state is preserved. */
102 GUEST_ASSERT(vmlaunch());
103
104 GUEST_ASSERT(!vmresume());
105 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
106
107 GUEST_SYNC(7);
108 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
109
110 GUEST_ASSERT(!vmresume());
111 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
112
113 vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
114
115 vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
116 vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
117
118 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
119 GUEST_ASSERT(vmlaunch());
120 GUEST_SYNC(8);
121 GUEST_ASSERT(vmlaunch());
122 GUEST_ASSERT(vmresume());
123
124 vmwrite(GUEST_RIP, 0xc0ffee);
125 GUEST_SYNC(9);
126 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
127
128 GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
129 GUEST_ASSERT(!vmresume());
130 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
131
132 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
133 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
134 GUEST_ASSERT(vmlaunch());
135 GUEST_ASSERT(vmresume());
136 GUEST_SYNC(13);
137 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
138 GUEST_ASSERT(vmlaunch());
139 GUEST_ASSERT(vmresume());
140}
141
142static void __attribute__((__flatten__)) guest_code(void *arg)
143{
144 GUEST_SYNC(1);
145
146 if (this_cpu_has(X86_FEATURE_XSAVE)) {
147 u64 supported_xcr0 = this_cpu_supported_xcr0();
148 u8 buffer[PAGE_SIZE];
149
150 memset(buffer, 0xcc, sizeof(buffer));
151
152 /*
153 * Modify state for all supported xfeatures to take them out of
154 * their "init" state, i.e. to make them show up in XSTATE_BV.
155 *
156 * Note off-by-default features, e.g. AMX, are out of scope for
157 * this particular testcase as they have a different ABI.
158 */
159 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
160 asm volatile ("fincstp");
161
162 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
163 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
164
165 if (supported_xcr0 & XFEATURE_MASK_YMM)
166 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
167
168 if (supported_xcr0 & XFEATURE_MASK_AVX512) {
169 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
170 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
171 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
172 }
173
174 if (this_cpu_has(X86_FEATURE_MPX)) {
175 u64 bounds[2] = { 10, 0xffffffffull };
176 u64 output[2] = { };
177
178 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
179 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
180
181 /*
182 * Don't bother trying to get BNDCSR into the INUSE
183 * state. MSR_IA32_BNDCFGS doesn't count as it isn't
184 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
185 * modified by XRSTOR. Stuffing XSTATE_BV in the host
186 * is simpler than doing XRSTOR here in the guest.
187 *
188 * However, temporarily enable MPX in BNDCFGS so that
189 * BNDMOV actually loads BND1. If MPX isn't *fully*
190 * enabled, all MPX instructions are treated as NOPs.
191 *
192 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
193 * mnemonics/registers has been removed from gcc and
194 * clang (and was never fully supported by clang).
195 */
196 wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
197 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
198 /*
199 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
200 * that BND1 actually got loaded.
201 */
202 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
203 wrmsr(MSR_IA32_BNDCFGS, 0);
204
205 GUEST_ASSERT_EQ(bounds[0], output[0]);
206 GUEST_ASSERT_EQ(bounds[1], output[1]);
207 }
208 if (this_cpu_has(X86_FEATURE_PKU)) {
209 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
210 set_cr4(get_cr4() | X86_CR4_PKE);
211 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
212
213 wrpkru(-1u);
214 }
215 }
216
217 GUEST_SYNC(2);
218
219 if (arg) {
220 if (this_cpu_has(X86_FEATURE_SVM))
221 svm_l1_guest_code(arg);
222 else
223 vmx_l1_guest_code(arg);
224 }
225
226 GUEST_DONE();
227}
228
229void svm_check_nested_state(int stage, struct kvm_x86_state *state)
230{
231 struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm;
232
233 if (kvm_cpu_has(X86_FEATURE_VGIF)) {
234 if (stage == 4)
235 TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1);
236 if (stage == 6)
237 TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0);
238 }
239
240 if (kvm_cpu_has(X86_FEATURE_NRIPS)) {
241 /*
242 * GUEST_SYNC() causes IO emulation in KVM, in which case the
243 * RIP is advanced before exiting to userspace. Hence, the RIP
244 * in the saved state should be the same as nRIP saved by the
245 * CPU in the VMCB.
246 */
247 if (stage == 6)
248 TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip);
249 }
250}
251
252void check_nested_state(int stage, struct kvm_x86_state *state)
253{
254 if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM))
255 svm_check_nested_state(stage, state);
256}
257
258int main(int argc, char *argv[])
259{
260 u64 *xstate_bv, saved_xstate_bv;
261 gva_t nested_gva = 0;
262 struct kvm_cpuid2 empty_cpuid = {};
263 struct kvm_regs regs1, regs2;
264 struct kvm_vcpu *vcpu, *vcpuN;
265 struct kvm_vm *vm;
266 struct kvm_x86_state *state;
267 struct ucall uc;
268 int stage;
269
270 /* Create VM */
271 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
272
273 vcpu_regs_get(vcpu, ®s1);
274
275 if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
276 if (kvm_cpu_has(X86_FEATURE_SVM))
277 vcpu_alloc_svm(vm, &nested_gva);
278 else if (kvm_cpu_has(X86_FEATURE_VMX))
279 vcpu_alloc_vmx(vm, &nested_gva);
280 }
281
282 if (!nested_gva)
283 pr_info("will skip nested state checks\n");
284
285 vcpu_args_set(vcpu, 1, nested_gva);
286
287 for (stage = 1;; stage++) {
288 vcpu_run(vcpu);
289 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
290
291 switch (get_ucall(vcpu, &uc)) {
292 case UCALL_ABORT:
293 REPORT_GUEST_ASSERT(uc);
294 /* NOT REACHED */
295 case UCALL_SYNC:
296 break;
297 case UCALL_DONE:
298 goto done;
299 default:
300 TEST_FAIL("Unknown ucall %lu", uc.cmd);
301 }
302
303 /* UCALL_SYNC is handled here. */
304 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
305 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
306 stage, (ulong)uc.args[1]);
307
308 state = vcpu_save_state(vcpu);
309 memset(®s1, 0, sizeof(regs1));
310 vcpu_regs_get(vcpu, ®s1);
311
312 kvm_vm_release(vm);
313
314 check_nested_state(stage, state);
315
316 /* Restore state in a new VM. */
317 vcpu = vm_recreate_with_one_vcpu(vm);
318 vcpu_load_state(vcpu, state);
319
320 /*
321 * Restore XSAVE state in a dummy vCPU, first without doing
322 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
323 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
324 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
325 * load only XSAVE state, MSRs in particular have a much more
326 * convoluted ABI.
327 *
328 * Load two versions of XSAVE state: one with the actual guest
329 * XSAVE state, and one with all supported features forced "on"
330 * in xstate_bv, e.g. to ensure that KVM allows loading all
331 * supported features, even if something goes awry in saving
332 * the original snapshot.
333 */
334 xstate_bv = (void *)&((u8 *)state->xsave->region)[512];
335 saved_xstate_bv = *xstate_bv;
336
337 vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
338 vcpu_xsave_set(vcpuN, state->xsave);
339 *xstate_bv = kvm_cpu_supported_xcr0();
340 vcpu_xsave_set(vcpuN, state->xsave);
341
342 vcpu_init_cpuid(vcpuN, &empty_cpuid);
343 vcpu_xsave_set(vcpuN, state->xsave);
344 *xstate_bv = saved_xstate_bv;
345 vcpu_xsave_set(vcpuN, state->xsave);
346
347 kvm_x86_state_cleanup(state);
348
349 memset(®s2, 0, sizeof(regs2));
350 vcpu_regs_get(vcpu, ®s2);
351 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
352 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
353 (ulong) regs2.rdi, (ulong) regs2.rsi);
354 }
355
356done:
357 kvm_vm_free(vm);
358}