Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests/mm: add folio_split() and filemap_get_entry() race test

The added folio_split_race_test is a modified C port of the race condition
test from [1]. The test creates shmem huge pages, where the main thread
punches holes in the shmem to cause folio_split() in the kernel and a set
of 16 threads reads the shmem to cause filemap_get_entry() in the kernel.
filemap_get_entry() reads the folio and xarray split by folio_split()
locklessly. The original test[2] is written in rust and uses memfd (shmem
backed). This C port uses shmem directly and use a single process.

Note: the initial rust to C conversion is done by Cursor.

Link: https://lore.kernel.org/all/CAKNNEtw5_kZomhkugedKMPOG-sxs5Q5OLumWJdiWXv+C9Yct0w@mail.gmail.com/ [1]
Link: https://github.com/dfinity/thp-madv-remove-test [2]
Link: https://lkml.kernel.org/r/20260323163717.184107-1-ziy@nvidia.com
Co-developed-by: Bas van Dijk <bas@dfinity.org>
Signed-off-by: Bas van Dijk <bas@dfinity.org>
Co-developed-by: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Signed-off-by: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Zi Yan and committed by
Andrew Morton
224f1292 54fdcbfe

+301
+1
tools/testing/selftests/mm/.gitignore
··· 61 61 merge 62 62 prctl_thp_disable 63 63 rmap 64 + folio_split_race_test
+1
tools/testing/selftests/mm/Makefile
··· 105 105 TEST_GEN_FILES += guard-regions 106 106 TEST_GEN_FILES += merge 107 107 TEST_GEN_FILES += rmap 108 + TEST_GEN_FILES += folio_split_race_test 108 109 109 110 ifneq ($(ARCH),arm64) 110 111 TEST_GEN_FILES += soft-dirty
+297
tools/testing/selftests/mm/folio_split_race_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * The test creates shmem PMD huge pages, fills all pages with known patterns, 4 + * then continuously verifies non-punched pages with 16 threads. Meanwhile, the 5 + * main thread punches holes via MADV_REMOVE on the shmem. 6 + * 7 + * It tests the race condition between folio_split() and filemap_get_entry(), 8 + * where the hole punches on shmem lead to folio_split() and reading the shmem 9 + * lead to filemap_get_entry(). 10 + */ 11 + 12 + #define _GNU_SOURCE 13 + #include <errno.h> 14 + #include <inttypes.h> 15 + #include <linux/mman.h> 16 + #include <pthread.h> 17 + #include <stdatomic.h> 18 + #include <stdbool.h> 19 + #include <stdint.h> 20 + #include <stdio.h> 21 + #include <stdlib.h> 22 + #include <string.h> 23 + #include <sys/mman.h> 24 + #include <signal.h> 25 + #include <unistd.h> 26 + #include "vm_util.h" 27 + #include "kselftest.h" 28 + #include "thp_settings.h" 29 + 30 + uint64_t page_size; 31 + uint64_t pmd_pagesize; 32 + #define NR_PMD_PAGE 5 33 + #define FILE_SIZE (pmd_pagesize * NR_PMD_PAGE) 34 + #define TOTAL_PAGES (FILE_SIZE / page_size) 35 + 36 + /* Every N-th to N+M-th pages are punched; not aligned with huge page boundaries. */ 37 + #define PUNCH_INTERVAL 50 /* N */ 38 + #define PUNCH_SIZE_FACTOR 3 /* M */ 39 + 40 + #define NUM_READER_THREADS 16 41 + #define FILL_BYTE 0xAF 42 + #define NUM_ITERATIONS 100 43 + 44 + /* Shared control block: control reading threads and record stats */ 45 + struct shared_ctl { 46 + atomic_uint_fast32_t stop; 47 + atomic_uint_fast64_t reader_failures; 48 + atomic_uint_fast64_t reader_verified; 49 + pthread_barrier_t barrier; 50 + }; 51 + 52 + static void fill_page(unsigned char *base, size_t page_idx) 53 + { 54 + unsigned char *page_ptr = base + page_idx * page_size; 55 + uint64_t idx = (uint64_t)page_idx; 56 + 57 + memset(page_ptr, FILL_BYTE, page_size); 58 + memcpy(page_ptr, &idx, sizeof(idx)); 59 + } 60 + 61 + /* Returns true if valid, false if corrupted. */ 62 + static bool check_page(unsigned char *base, uint64_t page_idx) 63 + { 64 + unsigned char *page_ptr = base + page_idx * page_size; 65 + uint64_t expected_idx = (uint64_t)page_idx; 66 + uint64_t got_idx; 67 + 68 + memcpy(&got_idx, page_ptr, 8); 69 + 70 + if (got_idx != expected_idx) { 71 + uint64_t off; 72 + int all_zero = 1; 73 + 74 + for (off = 0; off < page_size; off++) { 75 + if (page_ptr[off] != 0) { 76 + all_zero = 0; 77 + break; 78 + } 79 + } 80 + if (all_zero) { 81 + ksft_print_msg("CORRUPTED: page %" PRIu64 82 + " (huge page %" PRIu64 83 + ") is ALL ZEROS\n", 84 + page_idx, 85 + (page_idx * page_size) / pmd_pagesize); 86 + } else { 87 + ksft_print_msg("CORRUPTED: page %" PRIu64 88 + " (huge page %" PRIu64 89 + "): expected idx %" PRIu64 90 + ", got %" PRIu64 "\n", 91 + page_idx, 92 + (page_idx * page_size) / pmd_pagesize, 93 + page_idx, got_idx); 94 + } 95 + return false; 96 + } 97 + return true; 98 + } 99 + 100 + struct reader_arg { 101 + unsigned char *base; 102 + struct shared_ctl *ctl; 103 + int tid; 104 + atomic_uint_fast64_t *failures; 105 + atomic_uint_fast64_t *verified; 106 + }; 107 + 108 + static void *reader_thread(void *arg) 109 + { 110 + struct reader_arg *ra = (struct reader_arg *)arg; 111 + unsigned char *base = ra->base; 112 + struct shared_ctl *ctl = ra->ctl; 113 + int tid = ra->tid; 114 + atomic_uint_fast64_t *failures = ra->failures; 115 + atomic_uint_fast64_t *verified = ra->verified; 116 + uint64_t page_idx; 117 + 118 + pthread_barrier_wait(&ctl->barrier); 119 + 120 + while (atomic_load_explicit(&ctl->stop, memory_order_acquire) == 0) { 121 + for (page_idx = (size_t)tid; page_idx < TOTAL_PAGES; 122 + page_idx += NUM_READER_THREADS) { 123 + /* 124 + * page_idx % PUNCH_INTERVAL is in [0, PUNCH_INTERVAL), 125 + * skip [0, PUNCH_SIZE_FACTOR) 126 + */ 127 + if (page_idx % PUNCH_INTERVAL < PUNCH_SIZE_FACTOR) 128 + continue; 129 + if (check_page(base, page_idx)) 130 + atomic_fetch_add_explicit(verified, 1, 131 + memory_order_relaxed); 132 + else 133 + atomic_fetch_add_explicit(failures, 1, 134 + memory_order_relaxed); 135 + } 136 + if (atomic_load_explicit(failures, memory_order_relaxed) > 0) 137 + break; 138 + } 139 + 140 + return NULL; 141 + } 142 + 143 + static void create_readers(pthread_t *threads, struct reader_arg *args, 144 + unsigned char *base, struct shared_ctl *ctl) 145 + { 146 + int i; 147 + 148 + for (i = 0; i < NUM_READER_THREADS; i++) { 149 + args[i].base = base; 150 + args[i].ctl = ctl; 151 + args[i].tid = i; 152 + args[i].failures = &ctl->reader_failures; 153 + args[i].verified = &ctl->reader_verified; 154 + if (pthread_create(&threads[i], NULL, reader_thread, 155 + &args[i]) != 0) 156 + ksft_exit_fail_msg("pthread_create failed\n"); 157 + } 158 + } 159 + 160 + /* Run a single iteration. Returns total number of corrupted pages. */ 161 + static uint64_t run_iteration(void) 162 + { 163 + uint64_t reader_failures, reader_verified; 164 + struct reader_arg args[NUM_READER_THREADS]; 165 + pthread_t threads[NUM_READER_THREADS]; 166 + unsigned char *mmap_base; 167 + struct shared_ctl ctl; 168 + uint64_t i; 169 + 170 + memset(&ctl, 0, sizeof(struct shared_ctl)); 171 + 172 + mmap_base = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, 173 + MAP_SHARED | MAP_ANONYMOUS, -1, 0); 174 + 175 + if (mmap_base == MAP_FAILED) 176 + ksft_exit_fail_msg("mmap failed: %d\n", errno); 177 + 178 + if (madvise(mmap_base, FILE_SIZE, MADV_HUGEPAGE) != 0) 179 + ksft_exit_fail_msg("madvise(MADV_HUGEPAGE) failed: %d\n", 180 + errno); 181 + 182 + for (i = 0; i < TOTAL_PAGES; i++) 183 + fill_page(mmap_base, i); 184 + 185 + if (!check_huge_shmem(mmap_base, NR_PMD_PAGE, pmd_pagesize)) 186 + ksft_exit_fail_msg("No shmem THP is allocated\n"); 187 + 188 + if (pthread_barrier_init(&ctl.barrier, NULL, NUM_READER_THREADS + 1) != 0) 189 + ksft_exit_fail_msg("pthread_barrier_init failed\n"); 190 + 191 + create_readers(threads, args, mmap_base, &ctl); 192 + 193 + /* Wait for all reader threads to be ready before punching holes. */ 194 + pthread_barrier_wait(&ctl.barrier); 195 + 196 + for (i = 0; i < TOTAL_PAGES; i++) { 197 + if (i % PUNCH_INTERVAL != 0) 198 + continue; 199 + if (madvise(mmap_base + i * page_size, 200 + PUNCH_SIZE_FACTOR * page_size, MADV_REMOVE) != 0) { 201 + ksft_exit_fail_msg( 202 + "madvise(MADV_REMOVE) failed on page %" PRIu64 ": %d\n", 203 + i, errno); 204 + } 205 + 206 + i += PUNCH_SIZE_FACTOR - 1; 207 + } 208 + 209 + atomic_store_explicit(&ctl.stop, 1, memory_order_release); 210 + 211 + for (i = 0; i < NUM_READER_THREADS; i++) 212 + pthread_join(threads[i], NULL); 213 + 214 + pthread_barrier_destroy(&ctl.barrier); 215 + 216 + reader_failures = atomic_load_explicit(&ctl.reader_failures, 217 + memory_order_acquire); 218 + reader_verified = atomic_load_explicit(&ctl.reader_verified, 219 + memory_order_acquire); 220 + if (reader_failures) 221 + ksft_print_msg("Child: %" PRIu64 " pages verified, %" PRIu64 " failures\n", 222 + reader_verified, reader_failures); 223 + 224 + munmap(mmap_base, FILE_SIZE); 225 + 226 + return reader_failures; 227 + } 228 + 229 + static void thp_cleanup_handler(int signum) 230 + { 231 + thp_restore_settings(); 232 + /* 233 + * Restore default handler and re-raise the signal to exit. 234 + * This is to ensure the test process exits with the correct 235 + * status code corresponding to the signal. 236 + */ 237 + signal(signum, SIG_DFL); 238 + raise(signum); 239 + } 240 + 241 + static void thp_settings_cleanup(void) 242 + { 243 + thp_restore_settings(); 244 + } 245 + 246 + int main(void) 247 + { 248 + struct thp_settings current_settings; 249 + uint64_t corrupted_pages; 250 + uint64_t iter; 251 + 252 + ksft_print_header(); 253 + 254 + page_size = getpagesize(); 255 + pmd_pagesize = read_pmd_pagesize(); 256 + 257 + if (!thp_available() || !pmd_pagesize) 258 + ksft_exit_skip("Transparent Hugepages not available\n"); 259 + 260 + if (geteuid() != 0) 261 + ksft_exit_skip("Please run the test as root\n"); 262 + 263 + thp_save_settings(); 264 + /* make sure thp settings are restored */ 265 + if (atexit(thp_settings_cleanup) != 0) 266 + ksft_exit_fail_msg("atexit failed\n"); 267 + 268 + signal(SIGINT, thp_cleanup_handler); 269 + signal(SIGTERM, thp_cleanup_handler); 270 + 271 + thp_read_settings(&current_settings); 272 + current_settings.shmem_enabled = SHMEM_ADVISE; 273 + thp_write_settings(&current_settings); 274 + 275 + ksft_set_plan(1); 276 + 277 + ksft_print_msg("folio split race test\n"); 278 + 279 + for (iter = 0; iter < NUM_ITERATIONS; iter++) { 280 + corrupted_pages = run_iteration(); 281 + if (corrupted_pages > 0) 282 + break; 283 + } 284 + 285 + if (iter < NUM_ITERATIONS) 286 + ksft_test_result_fail("FAILED on iteration %" PRIu64 287 + ": %" PRIu64 288 + " pages corrupted by MADV_REMOVE!\n", 289 + iter, corrupted_pages); 290 + else 291 + ksft_test_result_pass("All %d iterations passed\n", 292 + NUM_ITERATIONS); 293 + 294 + ksft_exit(iter == NUM_ITERATIONS); 295 + 296 + return 0; 297 + }
+2
tools/testing/selftests/mm/run_vmtests.sh
··· 515 515 rm -f ${XFS_IMG} 516 516 fi 517 517 518 + CATEGORY="thp" run_test ./folio_split_race_test 519 + 518 520 CATEGORY="migration" run_test ./migration 519 521 520 522 CATEGORY="mkdirty" run_test ./mkdirty