Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * COW (Copy On Write) tests.
4 *
5 * Copyright 2022, Red Hat, Inc.
6 *
7 * Author(s): David Hildenbrand <david@redhat.com>
8 */
9#define _GNU_SOURCE
10#include <stdlib.h>
11#include <string.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <unistd.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <assert.h>
18#include <linux/mman.h>
19#include <sys/mman.h>
20#include <sys/ioctl.h>
21#include <sys/wait.h>
22#include <linux/memfd.h>
23
24#include "local_config.h"
25#ifdef LOCAL_CONFIG_HAVE_LIBURING
26#include <liburing.h>
27#endif /* LOCAL_CONFIG_HAVE_LIBURING */
28
29#include "../../../../mm/gup_test.h"
30#include "kselftest.h"
31#include "vm_util.h"
32#include "thp_settings.h"
33
34static size_t pagesize;
35static int pagemap_fd;
36static size_t pmdsize;
37static int nr_thpsizes;
38static size_t thpsizes[20];
39static int nr_hugetlbsizes;
40static size_t hugetlbsizes[10];
41static int gup_fd;
42static bool has_huge_zeropage;
43
44static int detect_thp_sizes(size_t sizes[], int max)
45{
46 int count = 0;
47 unsigned long orders;
48 size_t kb;
49 int i;
50
51 /* thp not supported at all. */
52 if (!pmdsize)
53 return 0;
54
55 orders = 1UL << sz2ord(pmdsize, pagesize);
56 orders |= thp_supported_orders();
57
58 for (i = 0; orders && count < max; i++) {
59 if (!(orders & (1UL << i)))
60 continue;
61 orders &= ~(1UL << i);
62 kb = (pagesize >> 10) << i;
63 sizes[count++] = kb * 1024;
64 ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
65 }
66
67 return count;
68}
69
70static bool range_is_swapped(void *addr, size_t size)
71{
72 for (; size; addr += pagesize, size -= pagesize)
73 if (!pagemap_is_swapped(pagemap_fd, addr))
74 return false;
75 return true;
76}
77
78static bool populate_page_checked(char *addr)
79{
80 bool ret;
81
82 FORCE_READ(*addr);
83 ret = pagemap_is_populated(pagemap_fd, addr);
84 if (!ret)
85 ksft_print_msg("Failed to populate page\n");
86
87 return ret;
88}
89
90struct comm_pipes {
91 int child_ready[2];
92 int parent_ready[2];
93};
94
95static int setup_comm_pipes(struct comm_pipes *comm_pipes)
96{
97 if (pipe(comm_pipes->child_ready) < 0) {
98 ksft_perror("pipe() failed");
99 return -errno;
100 }
101 if (pipe(comm_pipes->parent_ready) < 0) {
102 ksft_perror("pipe() failed");
103 close(comm_pipes->child_ready[0]);
104 close(comm_pipes->child_ready[1]);
105 return -errno;
106 }
107
108 return 0;
109}
110
111static void close_comm_pipes(struct comm_pipes *comm_pipes)
112{
113 close(comm_pipes->child_ready[0]);
114 close(comm_pipes->child_ready[1]);
115 close(comm_pipes->parent_ready[0]);
116 close(comm_pipes->parent_ready[1]);
117}
118
119static int child_memcmp_fn(char *mem, size_t size,
120 struct comm_pipes *comm_pipes)
121{
122 char *old = malloc(size);
123 char buf;
124
125 /* Backup the original content. */
126 memcpy(old, mem, size);
127
128 /* Wait until the parent modified the page. */
129 write(comm_pipes->child_ready[1], "0", 1);
130 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
131 ;
132
133 /* See if we still read the old values. */
134 return memcmp(old, mem, size);
135}
136
137static int child_vmsplice_memcmp_fn(char *mem, size_t size,
138 struct comm_pipes *comm_pipes)
139{
140 struct iovec iov = {
141 .iov_base = mem,
142 .iov_len = size,
143 };
144 ssize_t cur, total, transferred;
145 char *old, *new;
146 int fds[2];
147 char buf;
148
149 old = malloc(size);
150 new = malloc(size);
151
152 /* Backup the original content. */
153 memcpy(old, mem, size);
154
155 if (pipe(fds) < 0)
156 return -errno;
157
158 /* Trigger a read-only pin. */
159 transferred = vmsplice(fds[1], &iov, 1, 0);
160 if (transferred < 0)
161 return -errno;
162 if (transferred == 0)
163 return -EINVAL;
164
165 /* Unmap it from our page tables. */
166 if (munmap(mem, size) < 0)
167 return -errno;
168
169 /* Wait until the parent modified it. */
170 write(comm_pipes->child_ready[1], "0", 1);
171 while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
172 ;
173
174 /* See if we still read the old values via the pipe. */
175 for (total = 0; total < transferred; total += cur) {
176 cur = read(fds[0], new + total, transferred - total);
177 if (cur < 0)
178 return -errno;
179 }
180
181 return memcmp(old, new, transferred);
182}
183
184typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
185
186static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
187 child_fn fn, bool xfail)
188{
189 struct comm_pipes comm_pipes;
190 char buf;
191 int ret;
192
193 ret = setup_comm_pipes(&comm_pipes);
194 if (ret) {
195 log_test_result(KSFT_FAIL);
196 return;
197 }
198
199 ret = fork();
200 if (ret < 0) {
201 ksft_perror("fork() failed");
202 log_test_result(KSFT_FAIL);
203 goto close_comm_pipes;
204 } else if (!ret) {
205 exit(fn(mem, size, &comm_pipes));
206 }
207
208 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
209 ;
210
211 if (do_mprotect) {
212 /*
213 * mprotect() optimizations might try avoiding
214 * write-faults by directly mapping pages writable.
215 */
216 ret = mprotect(mem, size, PROT_READ);
217 if (ret) {
218 ksft_perror("mprotect() failed");
219 log_test_result(KSFT_FAIL);
220 write(comm_pipes.parent_ready[1], "0", 1);
221 wait(&ret);
222 goto close_comm_pipes;
223 }
224
225 ret = mprotect(mem, size, PROT_READ|PROT_WRITE);
226 if (ret) {
227 ksft_perror("mprotect() failed");
228 log_test_result(KSFT_FAIL);
229 write(comm_pipes.parent_ready[1], "0", 1);
230 wait(&ret);
231 goto close_comm_pipes;
232 }
233 }
234
235 /* Modify the page. */
236 memset(mem, 0xff, size);
237 write(comm_pipes.parent_ready[1], "0", 1);
238
239 wait(&ret);
240 if (WIFEXITED(ret))
241 ret = WEXITSTATUS(ret);
242 else
243 ret = -EINVAL;
244
245 if (!ret) {
246 log_test_result(KSFT_PASS);
247 } else if (xfail) {
248 /*
249 * With hugetlb, some vmsplice() tests are currently expected to
250 * fail because (a) harder to fix and (b) nobody really cares.
251 * Flag them as expected failure for now.
252 */
253 ksft_print_msg("Leak from parent into child\n");
254 log_test_result(KSFT_XFAIL);
255 } else {
256 ksft_print_msg("Leak from parent into child\n");
257 log_test_result(KSFT_FAIL);
258 }
259close_comm_pipes:
260 close_comm_pipes(&comm_pipes);
261}
262
263static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
264{
265 do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
266}
267
268static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
269{
270 do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
271}
272
273static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
274{
275 do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
276 is_hugetlb);
277}
278
279static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
280 bool is_hugetlb)
281{
282 do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
283 is_hugetlb);
284}
285
286static void do_test_vmsplice_in_parent(char *mem, size_t size,
287 bool before_fork, bool xfail)
288{
289 struct iovec iov = {
290 .iov_base = mem,
291 .iov_len = size,
292 };
293 ssize_t cur, total, transferred = 0;
294 struct comm_pipes comm_pipes;
295 char *old, *new;
296 int ret, fds[2];
297 char buf;
298
299 old = malloc(size);
300 new = malloc(size);
301
302 memcpy(old, mem, size);
303
304 ret = setup_comm_pipes(&comm_pipes);
305 if (ret) {
306 log_test_result(KSFT_FAIL);
307 goto free;
308 }
309
310 if (pipe(fds) < 0) {
311 ksft_perror("pipe() failed");
312 log_test_result(KSFT_FAIL);
313 goto close_comm_pipes;
314 }
315
316 if (before_fork) {
317 transferred = vmsplice(fds[1], &iov, 1, 0);
318 if (transferred <= 0) {
319 ksft_perror("vmsplice() failed\n");
320 log_test_result(KSFT_FAIL);
321 goto close_pipe;
322 }
323 }
324
325 ret = fork();
326 if (ret < 0) {
327 ksft_perror("fork() failed\n");
328 log_test_result(KSFT_FAIL);
329 goto close_pipe;
330 } else if (!ret) {
331 write(comm_pipes.child_ready[1], "0", 1);
332 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
333 ;
334 /* Modify page content in the child. */
335 memset(mem, 0xff, size);
336 exit(0);
337 }
338
339 if (!before_fork) {
340 transferred = vmsplice(fds[1], &iov, 1, 0);
341 if (transferred <= 0) {
342 ksft_perror("vmsplice() failed");
343 log_test_result(KSFT_FAIL);
344 wait(&ret);
345 goto close_pipe;
346 }
347 }
348
349 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
350 ;
351 if (munmap(mem, size) < 0) {
352 ksft_perror("munmap() failed");
353 log_test_result(KSFT_FAIL);
354 goto close_pipe;
355 }
356 write(comm_pipes.parent_ready[1], "0", 1);
357
358 /* Wait until the child is done writing. */
359 wait(&ret);
360 if (!WIFEXITED(ret)) {
361 ksft_perror("wait() failed");
362 log_test_result(KSFT_FAIL);
363 goto close_pipe;
364 }
365
366 /* See if we still read the old values. */
367 for (total = 0; total < transferred; total += cur) {
368 cur = read(fds[0], new + total, transferred - total);
369 if (cur < 0) {
370 ksft_perror("read() failed");
371 log_test_result(KSFT_FAIL);
372 goto close_pipe;
373 }
374 }
375
376 if (!memcmp(old, new, transferred)) {
377 log_test_result(KSFT_PASS);
378 } else if (xfail) {
379 /*
380 * With hugetlb, some vmsplice() tests are currently expected to
381 * fail because (a) harder to fix and (b) nobody really cares.
382 * Flag them as expected failure for now.
383 */
384 ksft_print_msg("Leak from child into parent\n");
385 log_test_result(KSFT_XFAIL);
386 } else {
387 ksft_print_msg("Leak from child into parent\n");
388 log_test_result(KSFT_FAIL);
389 }
390close_pipe:
391 close(fds[0]);
392 close(fds[1]);
393close_comm_pipes:
394 close_comm_pipes(&comm_pipes);
395free:
396 free(old);
397 free(new);
398}
399
400static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
401{
402 do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
403}
404
405static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
406{
407 do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
408}
409
410#ifdef LOCAL_CONFIG_HAVE_LIBURING
411static void do_test_iouring(char *mem, size_t size, bool use_fork)
412{
413 struct comm_pipes comm_pipes;
414 struct io_uring_cqe *cqe;
415 struct io_uring_sqe *sqe;
416 struct io_uring ring;
417 ssize_t cur, total;
418 struct iovec iov;
419 char *buf, *tmp;
420 int ret, fd;
421 FILE *file;
422
423 ret = setup_comm_pipes(&comm_pipes);
424 if (ret) {
425 log_test_result(KSFT_FAIL);
426 return;
427 }
428
429 file = tmpfile();
430 if (!file) {
431 ksft_perror("tmpfile() failed");
432 log_test_result(KSFT_FAIL);
433 goto close_comm_pipes;
434 }
435 fd = fileno(file);
436 assert(fd);
437
438 tmp = malloc(size);
439 if (!tmp) {
440 ksft_print_msg("malloc() failed\n");
441 log_test_result(KSFT_FAIL);
442 goto close_file;
443 }
444
445 /* Skip on errors, as we might just lack kernel support. */
446 ret = io_uring_queue_init(1, &ring, 0);
447 if (ret < 0) {
448 ksft_print_msg("io_uring_queue_init() failed\n");
449 log_test_result(KSFT_SKIP);
450 goto free_tmp;
451 }
452
453 /*
454 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
455 * | FOLL_LONGTERM the range.
456 *
457 * Skip on errors, as we might just lack kernel support or might not
458 * have sufficient MEMLOCK permissions.
459 */
460 iov.iov_base = mem;
461 iov.iov_len = size;
462 ret = io_uring_register_buffers(&ring, &iov, 1);
463 if (ret) {
464 ksft_print_msg("io_uring_register_buffers() failed\n");
465 log_test_result(KSFT_SKIP);
466 goto queue_exit;
467 }
468
469 if (use_fork) {
470 /*
471 * fork() and keep the child alive until we're done. Note that
472 * we expect the pinned page to not get shared with the child.
473 */
474 ret = fork();
475 if (ret < 0) {
476 ksft_perror("fork() failed");
477 log_test_result(KSFT_FAIL);
478 goto unregister_buffers;
479 } else if (!ret) {
480 write(comm_pipes.child_ready[1], "0", 1);
481 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
482 ;
483 exit(0);
484 }
485
486 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
487 ;
488 } else {
489 /*
490 * Map the page R/O into the page table. Enable softdirty
491 * tracking to stop the page from getting mapped R/W immediately
492 * again by mprotect() optimizations. Note that we don't have an
493 * easy way to test if that worked (the pagemap does not export
494 * if the page is mapped R/O vs. R/W).
495 */
496 ret = mprotect(mem, size, PROT_READ);
497 if (ret) {
498 ksft_perror("mprotect() failed");
499 log_test_result(KSFT_FAIL);
500 goto unregister_buffers;
501 }
502
503 clear_softdirty();
504 ret = mprotect(mem, size, PROT_READ | PROT_WRITE);
505 if (ret) {
506 ksft_perror("mprotect() failed");
507 log_test_result(KSFT_FAIL);
508 goto unregister_buffers;
509 }
510 }
511
512 /*
513 * Modify the page and write page content as observed by the fixed
514 * buffer pin to the file so we can verify it.
515 */
516 memset(mem, 0xff, size);
517 sqe = io_uring_get_sqe(&ring);
518 if (!sqe) {
519 ksft_print_msg("io_uring_get_sqe() failed\n");
520 log_test_result(KSFT_FAIL);
521 goto quit_child;
522 }
523 io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
524
525 ret = io_uring_submit(&ring);
526 if (ret < 0) {
527 ksft_print_msg("io_uring_submit() failed\n");
528 log_test_result(KSFT_FAIL);
529 goto quit_child;
530 }
531
532 ret = io_uring_wait_cqe(&ring, &cqe);
533 if (ret < 0) {
534 ksft_print_msg("io_uring_wait_cqe() failed\n");
535 log_test_result(KSFT_FAIL);
536 goto quit_child;
537 }
538
539 if (cqe->res != size) {
540 ksft_print_msg("write_fixed failed\n");
541 log_test_result(KSFT_FAIL);
542 goto quit_child;
543 }
544 io_uring_cqe_seen(&ring, cqe);
545
546 /* Read back the file content to the temporary buffer. */
547 total = 0;
548 while (total < size) {
549 cur = pread(fd, tmp + total, size - total, total);
550 if (cur < 0) {
551 ksft_perror("pread() failed\n");
552 log_test_result(KSFT_FAIL);
553 goto quit_child;
554 }
555 total += cur;
556 }
557
558 /* Finally, check if we read what we expected. */
559 if (!memcmp(mem, tmp, size)) {
560 log_test_result(KSFT_PASS);
561 } else {
562 ksft_print_msg("Longtom R/W pin is not reliable\n");
563 log_test_result(KSFT_FAIL);
564 }
565
566quit_child:
567 if (use_fork) {
568 write(comm_pipes.parent_ready[1], "0", 1);
569 wait(&ret);
570 }
571unregister_buffers:
572 io_uring_unregister_buffers(&ring);
573queue_exit:
574 io_uring_queue_exit(&ring);
575free_tmp:
576 free(tmp);
577close_file:
578 fclose(file);
579close_comm_pipes:
580 close_comm_pipes(&comm_pipes);
581}
582
583static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
584{
585 do_test_iouring(mem, size, false);
586}
587
588static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
589{
590 do_test_iouring(mem, size, true);
591}
592
593#endif /* LOCAL_CONFIG_HAVE_LIBURING */
594
595enum ro_pin_test {
596 RO_PIN_TEST,
597 RO_PIN_TEST_SHARED,
598 RO_PIN_TEST_PREVIOUSLY_SHARED,
599 RO_PIN_TEST_RO_EXCLUSIVE,
600};
601
602static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
603 bool fast)
604{
605 struct pin_longterm_test args;
606 struct comm_pipes comm_pipes;
607 char *tmp, buf;
608 __u64 tmp_val;
609 int ret;
610
611 if (gup_fd < 0) {
612 ksft_print_msg("gup_test not available\n");
613 log_test_result(KSFT_SKIP);
614 return;
615 }
616
617 tmp = malloc(size);
618 if (!tmp) {
619 ksft_perror("malloc() failed\n");
620 log_test_result(KSFT_FAIL);
621 return;
622 }
623
624 ret = setup_comm_pipes(&comm_pipes);
625 if (ret) {
626 log_test_result(KSFT_FAIL);
627 goto free_tmp;
628 }
629
630 switch (test) {
631 case RO_PIN_TEST:
632 break;
633 case RO_PIN_TEST_SHARED:
634 case RO_PIN_TEST_PREVIOUSLY_SHARED:
635 /*
636 * Share the pages with our child. As the pages are not pinned,
637 * this should just work.
638 */
639 ret = fork();
640 if (ret < 0) {
641 ksft_perror("fork() failed");
642 log_test_result(KSFT_FAIL);
643 goto close_comm_pipes;
644 } else if (!ret) {
645 write(comm_pipes.child_ready[1], "0", 1);
646 while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
647 ;
648 exit(0);
649 }
650
651 /* Wait until our child is ready. */
652 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
653 ;
654
655 if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
656 /*
657 * Tell the child to quit now and wait until it quit.
658 * The pages should now be mapped R/O into our page
659 * tables, but they are no longer shared.
660 */
661 write(comm_pipes.parent_ready[1], "0", 1);
662 wait(&ret);
663 if (!WIFEXITED(ret))
664 ksft_print_msg("[INFO] wait() failed\n");
665 }
666 break;
667 case RO_PIN_TEST_RO_EXCLUSIVE:
668 /*
669 * Map the page R/O into the page table. Enable softdirty
670 * tracking to stop the page from getting mapped R/W immediately
671 * again by mprotect() optimizations. Note that we don't have an
672 * easy way to test if that worked (the pagemap does not export
673 * if the page is mapped R/O vs. R/W).
674 */
675 ret = mprotect(mem, size, PROT_READ);
676 clear_softdirty();
677 ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
678 if (ret) {
679 ksft_perror("mprotect() failed");
680 log_test_result(KSFT_FAIL);
681 goto close_comm_pipes;
682 }
683 break;
684 default:
685 assert(false);
686 }
687
688 /* Take a R/O pin. This should trigger unsharing. */
689 args.addr = (__u64)(uintptr_t)mem;
690 args.size = size;
691 args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
692 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
693 if (ret) {
694 if (errno == EINVAL)
695 ret = KSFT_SKIP;
696 else
697 ret = KSFT_FAIL;
698 ksft_perror("PIN_LONGTERM_TEST_START failed");
699 log_test_result(ret);
700 goto wait;
701 }
702
703 /* Modify the page. */
704 memset(mem, 0xff, size);
705
706 /*
707 * Read back the content via the pin to the temporary buffer and
708 * test if we observed the modification.
709 */
710 tmp_val = (__u64)(uintptr_t)tmp;
711 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
712 if (ret) {
713 ksft_perror("PIN_LONGTERM_TEST_READ failed");
714 log_test_result(KSFT_FAIL);
715 } else {
716 if (!memcmp(mem, tmp, size)) {
717 log_test_result(KSFT_PASS);
718 } else {
719 ksft_print_msg("Longterm R/O pin is not reliable\n");
720 log_test_result(KSFT_FAIL);
721 }
722 }
723
724 ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
725 if (ret)
726 ksft_perror("PIN_LONGTERM_TEST_STOP failed");
727wait:
728 switch (test) {
729 case RO_PIN_TEST_SHARED:
730 write(comm_pipes.parent_ready[1], "0", 1);
731 wait(&ret);
732 if (!WIFEXITED(ret))
733 ksft_perror("wait() failed");
734 break;
735 default:
736 break;
737 }
738close_comm_pipes:
739 close_comm_pipes(&comm_pipes);
740free_tmp:
741 free(tmp);
742}
743
744static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
745{
746 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
747}
748
749static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
750{
751 do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
752}
753
754static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
755 bool is_hugetlb)
756{
757 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
758}
759
760static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
761 bool is_hugetlb)
762{
763 do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
764}
765
766static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
767 bool is_hugetlb)
768{
769 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
770}
771
772static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
773 bool is_hugetlb)
774{
775 do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
776}
777
778typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
779
780static void do_run_with_base_page(test_fn fn, bool swapout)
781{
782 char *mem;
783 int ret;
784
785 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
786 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
787 if (mem == MAP_FAILED) {
788 ksft_perror("mmap() failed");
789 log_test_result(KSFT_FAIL);
790 return;
791 }
792
793 ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
794 /* Ignore if not around on a kernel. */
795 if (ret && errno != EINVAL) {
796 ksft_perror("MADV_NOHUGEPAGE failed");
797 log_test_result(KSFT_FAIL);
798 goto munmap;
799 }
800
801 /* Populate a base page. */
802 memset(mem, 1, pagesize);
803
804 if (swapout) {
805 madvise(mem, pagesize, MADV_PAGEOUT);
806 if (!pagemap_is_swapped(pagemap_fd, mem)) {
807 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
808 log_test_result(KSFT_SKIP);
809 goto munmap;
810 }
811 }
812
813 fn(mem, pagesize, false);
814munmap:
815 munmap(mem, pagesize);
816}
817
818static void run_with_base_page(test_fn fn, const char *desc)
819{
820 log_test_start("%s ... with base page", desc);
821 do_run_with_base_page(fn, false);
822}
823
824static void run_with_base_page_swap(test_fn fn, const char *desc)
825{
826 log_test_start("%s ... with swapped out base page", desc);
827 do_run_with_base_page(fn, true);
828}
829
830enum thp_run {
831 THP_RUN_PMD,
832 THP_RUN_PMD_SWAPOUT,
833 THP_RUN_PTE,
834 THP_RUN_PTE_SWAPOUT,
835 THP_RUN_SINGLE_PTE,
836 THP_RUN_SINGLE_PTE_SWAPOUT,
837 THP_RUN_PARTIAL_MREMAP,
838 THP_RUN_PARTIAL_SHARED,
839};
840
841static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
842{
843 char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
844 size_t size, mmap_size, mremap_size;
845 int ret;
846
847 /* For alignment purposes, we need twice the thp size. */
848 mmap_size = 2 * thpsize;
849 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
850 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
851 if (mmap_mem == MAP_FAILED) {
852 ksft_perror("mmap() failed");
853 log_test_result(KSFT_FAIL);
854 return;
855 }
856
857 /* We need a THP-aligned memory area. */
858 mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
859
860 ret = madvise(mem, thpsize, MADV_HUGEPAGE);
861 if (ret) {
862 ksft_perror("MADV_HUGEPAGE failed");
863 log_test_result(KSFT_FAIL);
864 goto munmap;
865 }
866
867 /*
868 * Try to populate a THP. Touch the first sub-page and test if
869 * we get the last sub-page populated automatically.
870 */
871 mem[0] = 1;
872 if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
873 ksft_print_msg("Did not get a THP populated\n");
874 log_test_result(KSFT_SKIP);
875 goto munmap;
876 }
877 memset(mem, 1, thpsize);
878
879 size = thpsize;
880 switch (thp_run) {
881 case THP_RUN_PMD:
882 case THP_RUN_PMD_SWAPOUT:
883 assert(thpsize == pmdsize);
884 break;
885 case THP_RUN_PTE:
886 case THP_RUN_PTE_SWAPOUT:
887 /*
888 * Trigger PTE-mapping the THP by temporarily mapping a single
889 * subpage R/O. This is a noop if the THP is not pmdsize (and
890 * therefore already PTE-mapped).
891 */
892 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
893 if (ret) {
894 ksft_perror("mprotect() failed");
895 log_test_result(KSFT_FAIL);
896 goto munmap;
897 }
898 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
899 if (ret) {
900 ksft_perror("mprotect() failed");
901 log_test_result(KSFT_FAIL);
902 goto munmap;
903 }
904 break;
905 case THP_RUN_SINGLE_PTE:
906 case THP_RUN_SINGLE_PTE_SWAPOUT:
907 /*
908 * Discard all but a single subpage of that PTE-mapped THP. What
909 * remains is a single PTE mapping a single subpage.
910 */
911 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
912 if (ret) {
913 ksft_perror("MADV_DONTNEED failed");
914 log_test_result(KSFT_FAIL);
915 goto munmap;
916 }
917 size = pagesize;
918 break;
919 case THP_RUN_PARTIAL_MREMAP:
920 /*
921 * Remap half of the THP. We need some new memory location
922 * for that.
923 */
924 mremap_size = thpsize / 2;
925 mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
926 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
927 if (mremap_mem == MAP_FAILED) {
928 ksft_perror("mmap() failed");
929 log_test_result(KSFT_FAIL);
930 goto munmap;
931 }
932 tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
933 MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
934 if (tmp != mremap_mem) {
935 ksft_perror("mremap() failed");
936 log_test_result(KSFT_FAIL);
937 goto munmap;
938 }
939 size = mremap_size;
940 break;
941 case THP_RUN_PARTIAL_SHARED:
942 /*
943 * Share the first page of the THP with a child and quit the
944 * child. This will result in some parts of the THP never
945 * have been shared.
946 */
947 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
948 if (ret) {
949 ksft_perror("MADV_DONTFORK failed");
950 log_test_result(KSFT_FAIL);
951 goto munmap;
952 }
953 ret = fork();
954 if (ret < 0) {
955 ksft_perror("fork() failed");
956 log_test_result(KSFT_FAIL);
957 goto munmap;
958 } else if (!ret) {
959 exit(0);
960 }
961 wait(&ret);
962 /* Allow for sharing all pages again. */
963 ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
964 if (ret) {
965 ksft_perror("MADV_DOFORK failed");
966 log_test_result(KSFT_FAIL);
967 goto munmap;
968 }
969 break;
970 default:
971 assert(false);
972 }
973
974 switch (thp_run) {
975 case THP_RUN_PMD_SWAPOUT:
976 case THP_RUN_PTE_SWAPOUT:
977 case THP_RUN_SINGLE_PTE_SWAPOUT:
978 madvise(mem, size, MADV_PAGEOUT);
979 if (!range_is_swapped(mem, size)) {
980 ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n");
981 log_test_result(KSFT_SKIP);
982 goto munmap;
983 }
984 break;
985 default:
986 break;
987 }
988
989 fn(mem, size, false);
990munmap:
991 munmap(mmap_mem, mmap_size);
992 if (mremap_mem != MAP_FAILED)
993 munmap(mremap_mem, mremap_size);
994}
995
996static void run_with_thp(test_fn fn, const char *desc, size_t size)
997{
998 log_test_start("%s ... with THP (%zu kB)",
999 desc, size / 1024);
1000 do_run_with_thp(fn, THP_RUN_PMD, size);
1001}
1002
1003static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
1004{
1005 log_test_start("%s ... with swapped-out THP (%zu kB)",
1006 desc, size / 1024);
1007 do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
1008}
1009
1010static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
1011{
1012 log_test_start("%s ... with PTE-mapped THP (%zu kB)",
1013 desc, size / 1024);
1014 do_run_with_thp(fn, THP_RUN_PTE, size);
1015}
1016
1017static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
1018{
1019 log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)",
1020 desc, size / 1024);
1021 do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
1022}
1023
1024static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
1025{
1026 log_test_start("%s ... with single PTE of THP (%zu kB)",
1027 desc, size / 1024);
1028 do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
1029}
1030
1031static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
1032{
1033 log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)",
1034 desc, size / 1024);
1035 do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
1036}
1037
1038static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
1039{
1040 log_test_start("%s ... with partially mremap()'ed THP (%zu kB)",
1041 desc, size / 1024);
1042 do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
1043}
1044
1045static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
1046{
1047 log_test_start("%s ... with partially shared THP (%zu kB)",
1048 desc, size / 1024);
1049 do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
1050}
1051
1052static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
1053{
1054 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
1055 char *mem, *dummy;
1056
1057 log_test_start("%s ... with hugetlb (%zu kB)", desc,
1058 hugetlbsize / 1024);
1059
1060 flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
1061
1062 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1063 if (mem == MAP_FAILED) {
1064 ksft_perror("need more free huge pages");
1065 log_test_result(KSFT_SKIP);
1066 return;
1067 }
1068
1069 /* Populate an huge page. */
1070 memset(mem, 1, hugetlbsize);
1071
1072 /*
1073 * We need a total of two hugetlb pages to handle COW/unsharing
1074 * properly, otherwise we might get zapped by a SIGBUS.
1075 */
1076 dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
1077 if (dummy == MAP_FAILED) {
1078 ksft_perror("need more free huge pages");
1079 log_test_result(KSFT_SKIP);
1080 goto munmap;
1081 }
1082 munmap(dummy, hugetlbsize);
1083
1084 fn(mem, hugetlbsize, true);
1085munmap:
1086 munmap(mem, hugetlbsize);
1087}
1088
1089struct test_case {
1090 const char *desc;
1091 test_fn fn;
1092};
1093
1094/*
1095 * Test cases that are specific to anonymous pages: pages in private mappings
1096 * that may get shared via COW during fork().
1097 */
1098static const struct test_case anon_test_cases[] = {
1099 /*
1100 * Basic COW tests for fork() without any GUP. If we miss to break COW,
1101 * either the child can observe modifications by the parent or the
1102 * other way around.
1103 */
1104 {
1105 "Basic COW after fork()",
1106 test_cow_in_parent,
1107 },
1108 /*
1109 * Basic test, but do an additional mprotect(PROT_READ)+
1110 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1111 */
1112 {
1113 "Basic COW after fork() with mprotect() optimization",
1114 test_cow_in_parent_mprotect,
1115 },
1116 /*
1117 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
1118 * we miss to break COW, the child observes modifications by the parent.
1119 * This is CVE-2020-29374 reported by Jann Horn.
1120 */
1121 {
1122 "vmsplice() + unmap in child",
1123 test_vmsplice_in_child,
1124 },
1125 /*
1126 * vmsplice() test, but do an additional mprotect(PROT_READ)+
1127 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
1128 */
1129 {
1130 "vmsplice() + unmap in child with mprotect() optimization",
1131 test_vmsplice_in_child_mprotect,
1132 },
1133 /*
1134 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
1135 * fork(); modify in the child. If we miss to break COW, the parent
1136 * observes modifications by the child.
1137 */
1138 {
1139 "vmsplice() before fork(), unmap in parent after fork()",
1140 test_vmsplice_before_fork,
1141 },
1142 /*
1143 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
1144 * child. If we miss to break COW, the parent observes modifications by
1145 * the child.
1146 */
1147 {
1148 "vmsplice() + unmap in parent after fork()",
1149 test_vmsplice_after_fork,
1150 },
1151#ifdef LOCAL_CONFIG_HAVE_LIBURING
1152 /*
1153 * Take a R/W longterm pin and then map the page R/O into the page
1154 * table to trigger a write fault on next access. When modifying the
1155 * page, the page content must be visible via the pin.
1156 */
1157 {
1158 "R/O-mapping a page registered as iouring fixed buffer",
1159 test_iouring_ro,
1160 },
1161 /*
1162 * Take a R/W longterm pin and then fork() a child. When modifying the
1163 * page, the page content must be visible via the pin. We expect the
1164 * pinned page to not get shared with the child.
1165 */
1166 {
1167 "fork() with an iouring fixed buffer",
1168 test_iouring_fork,
1169 },
1170
1171#endif /* LOCAL_CONFIG_HAVE_LIBURING */
1172 /*
1173 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
1174 * When modifying the page via the page table, the page content change
1175 * must be visible via the pin.
1176 */
1177 {
1178 "R/O GUP pin on R/O-mapped shared page",
1179 test_ro_pin_on_shared,
1180 },
1181 /* Same as above, but using GUP-fast. */
1182 {
1183 "R/O GUP-fast pin on R/O-mapped shared page",
1184 test_ro_fast_pin_on_shared,
1185 },
1186 /*
1187 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
1188 * was previously shared. When modifying the page via the page table,
1189 * the page content change must be visible via the pin.
1190 */
1191 {
1192 "R/O GUP pin on R/O-mapped previously-shared page",
1193 test_ro_pin_on_ro_previously_shared,
1194 },
1195 /* Same as above, but using GUP-fast. */
1196 {
1197 "R/O GUP-fast pin on R/O-mapped previously-shared page",
1198 test_ro_fast_pin_on_ro_previously_shared,
1199 },
1200 /*
1201 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
1202 * When modifying the page via the page table, the page content change
1203 * must be visible via the pin.
1204 */
1205 {
1206 "R/O GUP pin on R/O-mapped exclusive page",
1207 test_ro_pin_on_ro_exclusive,
1208 },
1209 /* Same as above, but using GUP-fast. */
1210 {
1211 "R/O GUP-fast pin on R/O-mapped exclusive page",
1212 test_ro_fast_pin_on_ro_exclusive,
1213 },
1214};
1215
1216static void run_anon_test_case(struct test_case const *test_case)
1217{
1218 int i;
1219
1220 run_with_base_page(test_case->fn, test_case->desc);
1221 run_with_base_page_swap(test_case->fn, test_case->desc);
1222 for (i = 0; i < nr_thpsizes; i++) {
1223 size_t size = thpsizes[i];
1224 struct thp_settings settings = *thp_current_settings();
1225
1226 settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER;
1227 settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS;
1228 thp_push_settings(&settings);
1229
1230 if (size == pmdsize) {
1231 run_with_thp(test_case->fn, test_case->desc, size);
1232 run_with_thp_swap(test_case->fn, test_case->desc, size);
1233 }
1234
1235 run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
1236 run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
1237 run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
1238 run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
1239 run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
1240 run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
1241
1242 thp_pop_settings();
1243 }
1244 for (i = 0; i < nr_hugetlbsizes; i++)
1245 run_with_hugetlb(test_case->fn, test_case->desc,
1246 hugetlbsizes[i]);
1247}
1248
1249static void run_anon_test_cases(void)
1250{
1251 int i;
1252
1253 ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
1254
1255 for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
1256 run_anon_test_case(&anon_test_cases[i]);
1257}
1258
1259static int tests_per_anon_test_case(void)
1260{
1261 int tests = 2 + nr_hugetlbsizes;
1262
1263 tests += 6 * nr_thpsizes;
1264 if (pmdsize)
1265 tests += 2;
1266 return tests;
1267}
1268
1269enum anon_thp_collapse_test {
1270 ANON_THP_COLLAPSE_UNSHARED,
1271 ANON_THP_COLLAPSE_FULLY_SHARED,
1272 ANON_THP_COLLAPSE_LOWER_SHARED,
1273 ANON_THP_COLLAPSE_UPPER_SHARED,
1274};
1275
1276static void do_test_anon_thp_collapse(char *mem, size_t size,
1277 enum anon_thp_collapse_test test)
1278{
1279 struct comm_pipes comm_pipes;
1280 char buf;
1281 int ret;
1282
1283 ret = setup_comm_pipes(&comm_pipes);
1284 if (ret) {
1285 log_test_result(KSFT_FAIL);
1286 return;
1287 }
1288
1289 /*
1290 * Trigger PTE-mapping the THP by temporarily mapping a single subpage
1291 * R/O, such that we can try collapsing it later.
1292 */
1293 ret = mprotect(mem + pagesize, pagesize, PROT_READ);
1294 if (ret) {
1295 ksft_perror("mprotect() failed");
1296 log_test_result(KSFT_FAIL);
1297 goto close_comm_pipes;
1298 }
1299 ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
1300 if (ret) {
1301 ksft_perror("mprotect() failed");
1302 log_test_result(KSFT_FAIL);
1303 goto close_comm_pipes;
1304 }
1305
1306 switch (test) {
1307 case ANON_THP_COLLAPSE_UNSHARED:
1308 /* Collapse before actually COW-sharing the page. */
1309 ret = madvise(mem, size, MADV_COLLAPSE);
1310 if (ret) {
1311 ksft_perror("MADV_COLLAPSE failed");
1312 log_test_result(KSFT_SKIP);
1313 goto close_comm_pipes;
1314 }
1315 break;
1316 case ANON_THP_COLLAPSE_FULLY_SHARED:
1317 /* COW-share the full PTE-mapped THP. */
1318 break;
1319 case ANON_THP_COLLAPSE_LOWER_SHARED:
1320 /* Don't COW-share the upper part of the THP. */
1321 ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
1322 if (ret) {
1323 ksft_perror("MADV_DONTFORK failed");
1324 log_test_result(KSFT_FAIL);
1325 goto close_comm_pipes;
1326 }
1327 break;
1328 case ANON_THP_COLLAPSE_UPPER_SHARED:
1329 /* Don't COW-share the lower part of the THP. */
1330 ret = madvise(mem, size / 2, MADV_DONTFORK);
1331 if (ret) {
1332 ksft_perror("MADV_DONTFORK failed");
1333 log_test_result(KSFT_FAIL);
1334 goto close_comm_pipes;
1335 }
1336 break;
1337 default:
1338 assert(false);
1339 }
1340
1341 ret = fork();
1342 if (ret < 0) {
1343 ksft_perror("fork() failed");
1344 log_test_result(KSFT_FAIL);
1345 goto close_comm_pipes;
1346 } else if (!ret) {
1347 switch (test) {
1348 case ANON_THP_COLLAPSE_UNSHARED:
1349 case ANON_THP_COLLAPSE_FULLY_SHARED:
1350 exit(child_memcmp_fn(mem, size, &comm_pipes));
1351 break;
1352 case ANON_THP_COLLAPSE_LOWER_SHARED:
1353 exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
1354 break;
1355 case ANON_THP_COLLAPSE_UPPER_SHARED:
1356 exit(child_memcmp_fn(mem + size / 2, size / 2,
1357 &comm_pipes));
1358 break;
1359 default:
1360 assert(false);
1361 }
1362 }
1363
1364 while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
1365 ;
1366
1367 switch (test) {
1368 case ANON_THP_COLLAPSE_UNSHARED:
1369 break;
1370 case ANON_THP_COLLAPSE_UPPER_SHARED:
1371 case ANON_THP_COLLAPSE_LOWER_SHARED:
1372 /*
1373 * Revert MADV_DONTFORK such that we merge the VMAs and are
1374 * able to actually collapse.
1375 */
1376 ret = madvise(mem, size, MADV_DOFORK);
1377 if (ret) {
1378 ksft_perror("MADV_DOFORK failed");
1379 log_test_result(KSFT_FAIL);
1380 write(comm_pipes.parent_ready[1], "0", 1);
1381 wait(&ret);
1382 goto close_comm_pipes;
1383 }
1384 /* FALLTHROUGH */
1385 case ANON_THP_COLLAPSE_FULLY_SHARED:
1386 /* Collapse before anyone modified the COW-shared page. */
1387 ret = madvise(mem, size, MADV_COLLAPSE);
1388 if (ret) {
1389 ksft_perror("MADV_COLLAPSE failed");
1390 log_test_result(KSFT_SKIP);
1391 write(comm_pipes.parent_ready[1], "0", 1);
1392 wait(&ret);
1393 goto close_comm_pipes;
1394 }
1395 break;
1396 default:
1397 assert(false);
1398 }
1399
1400 /* Modify the page. */
1401 memset(mem, 0xff, size);
1402 write(comm_pipes.parent_ready[1], "0", 1);
1403
1404 wait(&ret);
1405 if (WIFEXITED(ret))
1406 ret = WEXITSTATUS(ret);
1407 else
1408 ret = -EINVAL;
1409
1410 if (!ret) {
1411 log_test_result(KSFT_PASS);
1412 } else {
1413 ksft_print_msg("Leak from parent into child\n");
1414 log_test_result(KSFT_FAIL);
1415 }
1416close_comm_pipes:
1417 close_comm_pipes(&comm_pipes);
1418}
1419
1420static void test_anon_thp_collapse_unshared(char *mem, size_t size,
1421 bool is_hugetlb)
1422{
1423 assert(!is_hugetlb);
1424 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
1425}
1426
1427static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
1428 bool is_hugetlb)
1429{
1430 assert(!is_hugetlb);
1431 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
1432}
1433
1434static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
1435 bool is_hugetlb)
1436{
1437 assert(!is_hugetlb);
1438 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
1439}
1440
1441static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
1442 bool is_hugetlb)
1443{
1444 assert(!is_hugetlb);
1445 do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
1446}
1447
1448/*
1449 * Test cases that are specific to anonymous THP: pages in private mappings
1450 * that may get shared via COW during fork().
1451 */
1452static const struct test_case anon_thp_test_cases[] = {
1453 /*
1454 * Basic COW test for fork() without any GUP when collapsing a THP
1455 * before fork().
1456 *
1457 * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
1458 * collapse") might easily get COW handling wrong when not collapsing
1459 * exclusivity information properly.
1460 */
1461 {
1462 "Basic COW after fork() when collapsing before fork()",
1463 test_anon_thp_collapse_unshared,
1464 },
1465 /* Basic COW test, but collapse after COW-sharing a full THP. */
1466 {
1467 "Basic COW after fork() when collapsing after fork() (fully shared)",
1468 test_anon_thp_collapse_fully_shared,
1469 },
1470 /*
1471 * Basic COW test, but collapse after COW-sharing the lower half of a
1472 * THP.
1473 */
1474 {
1475 "Basic COW after fork() when collapsing after fork() (lower shared)",
1476 test_anon_thp_collapse_lower_shared,
1477 },
1478 /*
1479 * Basic COW test, but collapse after COW-sharing the upper half of a
1480 * THP.
1481 */
1482 {
1483 "Basic COW after fork() when collapsing after fork() (upper shared)",
1484 test_anon_thp_collapse_upper_shared,
1485 },
1486};
1487
1488static void run_anon_thp_test_cases(void)
1489{
1490 int i;
1491
1492 if (!pmdsize)
1493 return;
1494
1495 ksft_print_msg("[INFO] Anonymous THP tests\n");
1496
1497 for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
1498 struct test_case const *test_case = &anon_thp_test_cases[i];
1499
1500 log_test_start("%s", test_case->desc);
1501 do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
1502 }
1503}
1504
1505static int tests_per_anon_thp_test_case(void)
1506{
1507 return pmdsize ? 1 : 0;
1508}
1509
1510typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
1511
1512static void test_cow(char *mem, const char *smem, size_t size)
1513{
1514 char *old = malloc(size);
1515
1516 /* Backup the original content. */
1517 memcpy(old, smem, size);
1518
1519 /* Modify the page. */
1520 memset(mem, 0xff, size);
1521
1522 /* See if we still read the old values via the other mapping. */
1523 if (!memcmp(smem, old, size)) {
1524 log_test_result(KSFT_PASS);
1525 } else {
1526 ksft_print_msg("Other mapping modified\n");
1527 log_test_result(KSFT_FAIL);
1528 }
1529 free(old);
1530}
1531
1532static void test_ro_pin(char *mem, const char *smem, size_t size)
1533{
1534 do_test_ro_pin(mem, size, RO_PIN_TEST, false);
1535}
1536
1537static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
1538{
1539 do_test_ro_pin(mem, size, RO_PIN_TEST, true);
1540}
1541
1542static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
1543{
1544 char *mem, *smem;
1545
1546 log_test_start("%s ... with shared zeropage", desc);
1547
1548 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
1549 MAP_PRIVATE | MAP_ANON, -1, 0);
1550 if (mem == MAP_FAILED) {
1551 ksft_perror("mmap() failed");
1552 log_test_result(KSFT_FAIL);
1553 return;
1554 }
1555
1556 smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
1557 if (smem == MAP_FAILED) {
1558 ksft_perror("mmap() failed");
1559 log_test_result(KSFT_FAIL);
1560 goto munmap;
1561 }
1562
1563 /* Read from the page to populate the shared zeropage. */
1564 if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
1565 log_test_result(KSFT_FAIL);
1566 goto munmap;
1567 }
1568
1569 fn(mem, smem, pagesize);
1570munmap:
1571 munmap(mem, pagesize);
1572 if (smem != MAP_FAILED)
1573 munmap(smem, pagesize);
1574}
1575
1576static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
1577{
1578 char *mem, *smem, *mmap_mem, *mmap_smem;
1579 size_t mmap_size;
1580 int ret;
1581
1582 log_test_start("%s ... with huge zeropage", desc);
1583
1584 if (!has_huge_zeropage) {
1585 ksft_print_msg("Huge zeropage not enabled\n");
1586 log_test_result(KSFT_SKIP);
1587 return;
1588 }
1589
1590 /* For alignment purposes, we need twice the thp size. */
1591 mmap_size = 2 * pmdsize;
1592 mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
1593 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1594 if (mmap_mem == MAP_FAILED) {
1595 ksft_perror("mmap() failed");
1596 log_test_result(KSFT_FAIL);
1597 return;
1598 }
1599 mmap_smem = mmap(NULL, mmap_size, PROT_READ,
1600 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1601 if (mmap_smem == MAP_FAILED) {
1602 ksft_perror("mmap() failed");
1603 log_test_result(KSFT_FAIL);
1604 goto munmap;
1605 }
1606
1607 /* We need a THP-aligned memory area. */
1608 mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
1609 smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
1610
1611 ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
1612 if (ret) {
1613 ksft_perror("madvise()");
1614 log_test_result(KSFT_FAIL);
1615 goto munmap;
1616 }
1617 ret = madvise(smem, pmdsize, MADV_HUGEPAGE);
1618 if (ret) {
1619 ksft_perror("madvise()");
1620 log_test_result(KSFT_FAIL);
1621 goto munmap;
1622 }
1623
1624 /*
1625 * Read from the memory to populate the huge shared zeropage. Read from
1626 * the first sub-page and test if we get another sub-page populated
1627 * automatically.
1628 */
1629 if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
1630 log_test_result(KSFT_FAIL);
1631 goto munmap;
1632 }
1633
1634 if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
1635 !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
1636 ksft_test_result_skip("Did not get THPs populated\n");
1637 goto munmap;
1638 }
1639
1640 fn(mem, smem, pmdsize);
1641munmap:
1642 munmap(mmap_mem, mmap_size);
1643 if (mmap_smem != MAP_FAILED)
1644 munmap(mmap_smem, mmap_size);
1645}
1646
1647static void run_with_memfd(non_anon_test_fn fn, const char *desc)
1648{
1649 char *mem, *smem;
1650 int fd;
1651
1652 log_test_start("%s ... with memfd", desc);
1653
1654 fd = memfd_create("test", 0);
1655 if (fd < 0) {
1656 ksft_perror("memfd_create() failed");
1657 log_test_result(KSFT_FAIL);
1658 return;
1659 }
1660
1661 /* File consists of a single page filled with zeroes. */
1662 if (fallocate(fd, 0, 0, pagesize)) {
1663 ksft_perror("fallocate() failed");
1664 log_test_result(KSFT_FAIL);
1665 goto close;
1666 }
1667
1668 /* Create a private mapping of the memfd. */
1669 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1670 if (mem == MAP_FAILED) {
1671 ksft_perror("mmap() failed");
1672 log_test_result(KSFT_FAIL);
1673 goto close;
1674 }
1675 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1676 if (smem == MAP_FAILED) {
1677 ksft_perror("mmap() failed");
1678 log_test_result(KSFT_FAIL);
1679 goto munmap;
1680 }
1681
1682 /* Fault the page in. */
1683 if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
1684 log_test_result(KSFT_FAIL);
1685 goto munmap;
1686 }
1687
1688 fn(mem, smem, pagesize);
1689munmap:
1690 munmap(mem, pagesize);
1691 if (smem != MAP_FAILED)
1692 munmap(smem, pagesize);
1693close:
1694 close(fd);
1695}
1696
1697static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
1698{
1699 char *mem, *smem;
1700 FILE *file;
1701 int fd;
1702
1703 log_test_start("%s ... with tmpfile", desc);
1704
1705 file = tmpfile();
1706 if (!file) {
1707 ksft_perror("tmpfile() failed");
1708 log_test_result(KSFT_FAIL);
1709 return;
1710 }
1711
1712 fd = fileno(file);
1713 if (fd < 0) {
1714 ksft_perror("fileno() failed");
1715 log_test_result(KSFT_SKIP);
1716 return;
1717 }
1718
1719 /* File consists of a single page filled with zeroes. */
1720 if (fallocate(fd, 0, 0, pagesize)) {
1721 ksft_perror("fallocate() failed");
1722 log_test_result(KSFT_FAIL);
1723 goto close;
1724 }
1725
1726 /* Create a private mapping of the memfd. */
1727 mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1728 if (mem == MAP_FAILED) {
1729 ksft_perror("mmap() failed");
1730 log_test_result(KSFT_FAIL);
1731 goto close;
1732 }
1733 smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
1734 if (smem == MAP_FAILED) {
1735 ksft_perror("mmap() failed");
1736 log_test_result(KSFT_FAIL);
1737 goto munmap;
1738 }
1739
1740 /* Fault the page in. */
1741 if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
1742 log_test_result(KSFT_FAIL);
1743 goto munmap;
1744 }
1745
1746 fn(mem, smem, pagesize);
1747munmap:
1748 munmap(mem, pagesize);
1749 if (smem != MAP_FAILED)
1750 munmap(smem, pagesize);
1751close:
1752 fclose(file);
1753}
1754
1755static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
1756 size_t hugetlbsize)
1757{
1758 int flags = MFD_HUGETLB;
1759 char *mem, *smem;
1760 int fd;
1761
1762 log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
1763 hugetlbsize / 1024);
1764
1765 flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
1766
1767 fd = memfd_create("test", flags);
1768 if (fd < 0) {
1769 ksft_perror("memfd_create() failed");
1770 log_test_result(KSFT_SKIP);
1771 return;
1772 }
1773
1774 /* File consists of a single page filled with zeroes. */
1775 if (fallocate(fd, 0, 0, hugetlbsize)) {
1776 ksft_perror("need more free huge pages");
1777 log_test_result(KSFT_SKIP);
1778 goto close;
1779 }
1780
1781 /* Create a private mapping of the memfd. */
1782 mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
1783 0);
1784 if (mem == MAP_FAILED) {
1785 ksft_perror("need more free huge pages");
1786 log_test_result(KSFT_SKIP);
1787 goto close;
1788 }
1789 smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
1790 if (smem == MAP_FAILED) {
1791 ksft_perror("mmap() failed");
1792 log_test_result(KSFT_FAIL);
1793 goto munmap;
1794 }
1795
1796 /* Fault the page in. */
1797 if (!populate_page_checked(mem) || !populate_page_checked(smem)) {
1798 log_test_result(KSFT_FAIL);
1799 goto munmap;
1800 }
1801
1802 fn(mem, smem, hugetlbsize);
1803munmap:
1804 munmap(mem, hugetlbsize);
1805 if (smem != MAP_FAILED)
1806 munmap(smem, hugetlbsize);
1807close:
1808 close(fd);
1809}
1810
1811struct non_anon_test_case {
1812 const char *desc;
1813 non_anon_test_fn fn;
1814};
1815
1816/*
1817 * Test cases that target any pages in private mappings that are not anonymous:
1818 * pages that may get shared via COW ndependent of fork(). This includes
1819 * the shared zeropage(s), pagecache pages, ...
1820 */
1821static const struct non_anon_test_case non_anon_test_cases[] = {
1822 /*
1823 * Basic COW test without any GUP. If we miss to break COW, changes are
1824 * visible via other private/shared mappings.
1825 */
1826 {
1827 "Basic COW",
1828 test_cow,
1829 },
1830 /*
1831 * Take a R/O longterm pin. When modifying the page via the page table,
1832 * the page content change must be visible via the pin.
1833 */
1834 {
1835 "R/O longterm GUP pin",
1836 test_ro_pin,
1837 },
1838 /* Same as above, but using GUP-fast. */
1839 {
1840 "R/O longterm GUP-fast pin",
1841 test_ro_fast_pin,
1842 },
1843};
1844
1845static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
1846{
1847 int i;
1848
1849 run_with_zeropage(test_case->fn, test_case->desc);
1850 run_with_memfd(test_case->fn, test_case->desc);
1851 run_with_tmpfile(test_case->fn, test_case->desc);
1852 if (pmdsize)
1853 run_with_huge_zeropage(test_case->fn, test_case->desc);
1854 for (i = 0; i < nr_hugetlbsizes; i++)
1855 run_with_memfd_hugetlb(test_case->fn, test_case->desc,
1856 hugetlbsizes[i]);
1857}
1858
1859static void run_non_anon_test_cases(void)
1860{
1861 int i;
1862
1863 ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
1864
1865 for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
1866 run_non_anon_test_case(&non_anon_test_cases[i]);
1867}
1868
1869static int tests_per_non_anon_test_case(void)
1870{
1871 int tests = 3 + nr_hugetlbsizes;
1872
1873 if (pmdsize)
1874 tests += 1;
1875 return tests;
1876}
1877
1878int main(int argc, char **argv)
1879{
1880 struct thp_settings default_settings;
1881
1882 ksft_print_header();
1883
1884 pagesize = getpagesize();
1885 pmdsize = read_pmd_pagesize();
1886 if (pmdsize) {
1887 /* Only if THP is supported. */
1888 thp_read_settings(&default_settings);
1889 default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT;
1890 thp_save_settings();
1891 thp_push_settings(&default_settings);
1892
1893 ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
1894 pmdsize / 1024);
1895 nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
1896 }
1897 nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
1898 ARRAY_SIZE(hugetlbsizes));
1899 has_huge_zeropage = detect_huge_zeropage();
1900
1901 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
1902 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
1903 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
1904
1905 gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
1906 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
1907 if (pagemap_fd < 0)
1908 ksft_exit_fail_msg("opening pagemap failed\n");
1909
1910 run_anon_test_cases();
1911 run_anon_thp_test_cases();
1912 run_non_anon_test_cases();
1913
1914 if (pmdsize) {
1915 /* Only if THP is supported. */
1916 thp_restore_settings();
1917 }
1918
1919 ksft_finished();
1920}