Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * KVM page table test
4 *
5 * Copyright (C) 2021, Huawei, Inc.
6 *
7 * Make sure that THP has been enabled or enough HUGETLB pages with specific
8 * page size have been pre-allocated on your system, if you are planning to
9 * use hugepages to back the guest memory for testing.
10 */
11#include <stdio.h>
12#include <stdlib.h>
13#include <time.h>
14#include <pthread.h>
15#include <semaphore.h>
16
17#include "test_util.h"
18#include "kvm_util.h"
19#include "processor.h"
20#include "guest_modes.h"
21#include "ucall_common.h"
22
23#define TEST_MEM_SLOT_INDEX 1
24
25/* Default size(1GB) of the memory for testing */
26#define DEFAULT_TEST_MEM_SIZE (1 << 30)
27
28/* Default guest test virtual memory offset */
29#define DEFAULT_GUEST_TEST_MEM 0xc0000000
30
31/* Different guest memory accessing stages */
32enum test_stage {
33 KVM_BEFORE_MAPPINGS,
34 KVM_CREATE_MAPPINGS,
35 KVM_UPDATE_MAPPINGS,
36 KVM_ADJUST_MAPPINGS,
37 NUM_TEST_STAGES,
38};
39
40static const char * const test_stage_string[] = {
41 "KVM_BEFORE_MAPPINGS",
42 "KVM_CREATE_MAPPINGS",
43 "KVM_UPDATE_MAPPINGS",
44 "KVM_ADJUST_MAPPINGS",
45};
46
47struct test_args {
48 struct kvm_vm *vm;
49 u64 guest_test_virt_mem;
50 u64 host_page_size;
51 u64 host_num_pages;
52 u64 large_page_size;
53 u64 large_num_pages;
54 u64 host_pages_per_lpage;
55 enum vm_mem_backing_src_type src_type;
56 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
57};
58
59/*
60 * Guest variables. Use addr_gva2hva() if these variables need
61 * to be changed in host.
62 */
63static enum test_stage guest_test_stage;
64
65/* Host variables */
66static u32 nr_vcpus = 1;
67static struct test_args test_args;
68static enum test_stage *current_stage;
69static bool host_quit;
70
71/* Whether the test stage is updated, or completed */
72static sem_t test_stage_updated;
73static sem_t test_stage_completed;
74
75/*
76 * Guest physical memory offset of the testing memory slot.
77 * This will be set to the topmost valid physical address minus
78 * the test memory size.
79 */
80static u64 guest_test_phys_mem;
81
82/*
83 * Guest virtual memory offset of the testing memory slot.
84 * Must not conflict with identity mapped test code.
85 */
86static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
87
88static void guest_code(bool do_write)
89{
90 struct test_args *p = &test_args;
91 enum test_stage *current_stage = &guest_test_stage;
92 u64 addr;
93 int i, j;
94
95 while (true) {
96 addr = p->guest_test_virt_mem;
97
98 switch (READ_ONCE(*current_stage)) {
99 /*
100 * All vCPU threads will be started in this stage,
101 * where guest code of each vCPU will do nothing.
102 */
103 case KVM_BEFORE_MAPPINGS:
104 break;
105
106 /*
107 * Before dirty logging, vCPUs concurrently access the first
108 * 8 bytes of each page (host page/large page) within the same
109 * memory region with different accessing types (read/write).
110 * Then KVM will create normal page mappings or huge block
111 * mappings for them.
112 */
113 case KVM_CREATE_MAPPINGS:
114 for (i = 0; i < p->large_num_pages; i++) {
115 if (do_write)
116 *(u64 *)addr = 0x0123456789ABCDEF;
117 else
118 READ_ONCE(*(u64 *)addr);
119
120 addr += p->large_page_size;
121 }
122 break;
123
124 /*
125 * During dirty logging, KVM will only update attributes of the
126 * normal page mappings from RO to RW if memory backing src type
127 * is anonymous. In other cases, KVM will split the huge block
128 * mappings into normal page mappings if memory backing src type
129 * is THP or HUGETLB.
130 */
131 case KVM_UPDATE_MAPPINGS:
132 if (p->src_type == VM_MEM_SRC_ANONYMOUS) {
133 for (i = 0; i < p->host_num_pages; i++) {
134 *(u64 *)addr = 0x0123456789ABCDEF;
135 addr += p->host_page_size;
136 }
137 break;
138 }
139
140 for (i = 0; i < p->large_num_pages; i++) {
141 /*
142 * Write to the first host page in each large
143 * page region, and triger break of large pages.
144 */
145 *(u64 *)addr = 0x0123456789ABCDEF;
146
147 /*
148 * Access the middle host pages in each large
149 * page region. Since dirty logging is enabled,
150 * this will create new mappings at the smallest
151 * granularity.
152 */
153 addr += p->large_page_size / 2;
154 for (j = 0; j < p->host_pages_per_lpage / 2; j++) {
155 READ_ONCE(*(u64 *)addr);
156 addr += p->host_page_size;
157 }
158 }
159 break;
160
161 /*
162 * After dirty logging is stopped, vCPUs concurrently read
163 * from every single host page. Then KVM will coalesce the
164 * split page mappings back to block mappings. And a TLB
165 * conflict abort could occur here if TLB entries of the
166 * page mappings are not fully invalidated.
167 */
168 case KVM_ADJUST_MAPPINGS:
169 for (i = 0; i < p->host_num_pages; i++) {
170 READ_ONCE(*(u64 *)addr);
171 addr += p->host_page_size;
172 }
173 break;
174
175 default:
176 GUEST_ASSERT(0);
177 }
178
179 GUEST_SYNC(1);
180 }
181}
182
183static void *vcpu_worker(void *data)
184{
185 struct kvm_vcpu *vcpu = data;
186 bool do_write = !(vcpu->id % 2);
187 struct timespec start;
188 struct timespec ts_diff;
189 enum test_stage stage;
190 int ret;
191
192 vcpu_args_set(vcpu, 1, do_write);
193
194 while (!READ_ONCE(host_quit)) {
195 ret = sem_wait(&test_stage_updated);
196 TEST_ASSERT(ret == 0, "Error in sem_wait");
197
198 if (READ_ONCE(host_quit))
199 return NULL;
200
201 clock_gettime(CLOCK_MONOTONIC, &start);
202 ret = _vcpu_run(vcpu);
203 ts_diff = timespec_elapsed(start);
204
205 TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
206 TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
207 "Invalid guest sync status: exit_reason=%s",
208 exit_reason_str(vcpu->run->exit_reason));
209
210 pr_debug("Got sync event from vCPU %d\n", vcpu->id);
211 stage = READ_ONCE(*current_stage);
212
213 /*
214 * Here we can know the execution time of every
215 * single vcpu running in different test stages.
216 */
217 pr_debug("vCPU %d has completed stage %s\n"
218 "execution time is: %ld.%.9lds\n\n",
219 vcpu->id, test_stage_string[stage],
220 ts_diff.tv_sec, ts_diff.tv_nsec);
221
222 ret = sem_post(&test_stage_completed);
223 TEST_ASSERT(ret == 0, "Error in sem_post");
224 }
225
226 return NULL;
227}
228
229struct test_params {
230 u64 phys_offset;
231 u64 test_mem_size;
232 enum vm_mem_backing_src_type src_type;
233};
234
235static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
236{
237 int ret;
238 struct test_params *p = arg;
239 enum vm_mem_backing_src_type src_type = p->src_type;
240 u64 large_page_size = get_backing_src_pagesz(src_type);
241 u64 guest_page_size = vm_guest_mode_params[mode].page_size;
242 u64 host_page_size = getpagesize();
243 u64 test_mem_size = p->test_mem_size;
244 u64 guest_num_pages;
245 u64 alignment;
246 void *host_test_mem;
247 struct kvm_vm *vm;
248
249 /* Align up the test memory size */
250 alignment = max(large_page_size, guest_page_size);
251 test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1);
252
253 /* Create a VM with enough guest pages */
254 guest_num_pages = test_mem_size / guest_page_size;
255 vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,
256 guest_code, test_args.vcpus);
257
258 /* Align down GPA of the testing memslot */
259 if (!p->phys_offset)
260 guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
261 guest_page_size;
262 else
263 guest_test_phys_mem = p->phys_offset;
264 guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
265
266 /* Set up the shared data structure test_args */
267 test_args.vm = vm;
268 test_args.guest_test_virt_mem = guest_test_virt_mem;
269 test_args.host_page_size = host_page_size;
270 test_args.host_num_pages = test_mem_size / host_page_size;
271 test_args.large_page_size = large_page_size;
272 test_args.large_num_pages = test_mem_size / large_page_size;
273 test_args.host_pages_per_lpage = large_page_size / host_page_size;
274 test_args.src_type = src_type;
275
276 /* Add an extra memory slot with specified backing src type */
277 vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
278 TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
279
280 /* Do mapping(GVA->GPA) for the testing memory slot */
281 virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
282
283 /* Cache the HVA pointer of the region */
284 host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem);
285
286 /* Export shared structure test_args to guest */
287 sync_global_to_guest(vm, test_args);
288
289 ret = sem_init(&test_stage_updated, 0, 0);
290 TEST_ASSERT(ret == 0, "Error in sem_init");
291
292 ret = sem_init(&test_stage_completed, 0, 0);
293 TEST_ASSERT(ret == 0, "Error in sem_init");
294
295 current_stage = addr_gva2hva(vm, (gva_t)(&guest_test_stage));
296 *current_stage = NUM_TEST_STAGES;
297
298 pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
299 pr_info("Testing memory backing src type: %s\n",
300 vm_mem_backing_src_alias(src_type)->name);
301 pr_info("Testing memory backing src granularity: 0x%lx\n",
302 large_page_size);
303 pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size);
304 pr_info("Guest physical test memory offset: 0x%lx\n",
305 guest_test_phys_mem);
306 pr_info("Host virtual test memory offset: 0x%lx\n",
307 (u64)host_test_mem);
308 pr_info("Number of testing vCPUs: %d\n", nr_vcpus);
309
310 return vm;
311}
312
313static void vcpus_complete_new_stage(enum test_stage stage)
314{
315 int ret;
316 int vcpus;
317
318 /* Wake up all the vcpus to run new test stage */
319 for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
320 ret = sem_post(&test_stage_updated);
321 TEST_ASSERT(ret == 0, "Error in sem_post");
322 }
323 pr_debug("All vcpus have been notified to continue\n");
324
325 /* Wait for all the vcpus to complete new test stage */
326 for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
327 ret = sem_wait(&test_stage_completed);
328 TEST_ASSERT(ret == 0, "Error in sem_wait");
329
330 pr_debug("%d vcpus have completed stage %s\n",
331 vcpus + 1, test_stage_string[stage]);
332 }
333
334 pr_debug("All vcpus have completed stage %s\n",
335 test_stage_string[stage]);
336}
337
338static void run_test(enum vm_guest_mode mode, void *arg)
339{
340 pthread_t *vcpu_threads;
341 struct kvm_vm *vm;
342 struct timespec start;
343 struct timespec ts_diff;
344 int ret, i;
345
346 /* Create VM with vCPUs and make some pre-initialization */
347 vm = pre_init_before_test(mode, arg);
348
349 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
350 TEST_ASSERT(vcpu_threads, "Memory allocation failed");
351
352 host_quit = false;
353 *current_stage = KVM_BEFORE_MAPPINGS;
354
355 for (i = 0; i < nr_vcpus; i++)
356 pthread_create(&vcpu_threads[i], NULL, vcpu_worker,
357 test_args.vcpus[i]);
358
359 vcpus_complete_new_stage(*current_stage);
360 pr_info("Started all vCPUs successfully\n");
361
362 /* Test the stage of KVM creating mappings */
363 *current_stage = KVM_CREATE_MAPPINGS;
364
365 clock_gettime(CLOCK_MONOTONIC, &start);
366 vcpus_complete_new_stage(*current_stage);
367 ts_diff = timespec_elapsed(start);
368
369 pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
370 ts_diff.tv_sec, ts_diff.tv_nsec);
371
372 /* Test the stage of KVM updating mappings */
373 vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
374 KVM_MEM_LOG_DIRTY_PAGES);
375
376 *current_stage = KVM_UPDATE_MAPPINGS;
377
378 clock_gettime(CLOCK_MONOTONIC, &start);
379 vcpus_complete_new_stage(*current_stage);
380 ts_diff = timespec_elapsed(start);
381
382 pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
383 ts_diff.tv_sec, ts_diff.tv_nsec);
384
385 /* Test the stage of KVM adjusting mappings */
386 vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
387
388 *current_stage = KVM_ADJUST_MAPPINGS;
389
390 clock_gettime(CLOCK_MONOTONIC, &start);
391 vcpus_complete_new_stage(*current_stage);
392 ts_diff = timespec_elapsed(start);
393
394 pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n",
395 ts_diff.tv_sec, ts_diff.tv_nsec);
396
397 /* Tell the vcpu thread to quit */
398 host_quit = true;
399 for (i = 0; i < nr_vcpus; i++) {
400 ret = sem_post(&test_stage_updated);
401 TEST_ASSERT(ret == 0, "Error in sem_post");
402 }
403
404 for (i = 0; i < nr_vcpus; i++)
405 pthread_join(vcpu_threads[i], NULL);
406
407 ret = sem_destroy(&test_stage_updated);
408 TEST_ASSERT(ret == 0, "Error in sem_destroy");
409
410 ret = sem_destroy(&test_stage_completed);
411 TEST_ASSERT(ret == 0, "Error in sem_destroy");
412
413 free(vcpu_threads);
414 kvm_vm_free(vm);
415}
416
417static void help(char *name)
418{
419 puts("");
420 printf("usage: %s [-h] [-p offset] [-m mode] "
421 "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
422 puts("");
423 printf(" -p: specify guest physical test memory offset\n"
424 " Warning: a low offset can conflict with the loaded test code.\n");
425 guest_modes_help();
426 printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n"
427 " (default: 1G)\n");
428 printf(" -v: specify the number of vCPUs to run\n"
429 " (default: 1)\n");
430 backing_src_help("-s");
431 puts("");
432}
433
434int main(int argc, char *argv[])
435{
436 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
437 struct test_params p = {
438 .test_mem_size = DEFAULT_TEST_MEM_SIZE,
439 .src_type = DEFAULT_VM_MEM_SRC,
440 };
441 int opt;
442
443 guest_modes_append_default();
444
445 while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
446 switch (opt) {
447 case 'p':
448 p.phys_offset = strtoull(optarg, NULL, 0);
449 break;
450 case 'm':
451 guest_modes_cmdline(optarg);
452 break;
453 case 'b':
454 p.test_mem_size = parse_size(optarg);
455 break;
456 case 'v':
457 nr_vcpus = atoi_positive("Number of vCPUs", optarg);
458 TEST_ASSERT(nr_vcpus <= max_vcpus,
459 "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
460 break;
461 case 's':
462 p.src_type = parse_backing_src_type(optarg);
463 break;
464 case 'h':
465 default:
466 help(argv[0]);
467 exit(0);
468 }
469 }
470
471 for_each_guest_mode(run_test, &p);
472
473 return 0;
474}