Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

selftests/mm: add process_madvise() tests

Add tests for process_madvise(), focusing on verifying behavior under
various conditions including valid usage and error cases.

[lianux.mm@gmail.com: v7]
Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com
Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com
Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com
Signed-off-by: wang lian <lianux.mm@gmail.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Zi Yan <ziy@nvidia.com>
Suggested-by: Mark Brown <broonie@kernel.org>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Tested-by: Zi Yan <ziy@nvidia.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

wang lian and committed by
Andrew Morton
b50e3788 8d58d656

+351
+1
tools/testing/selftests/mm/.gitignore
··· 21 21 transhuge-stress 22 22 pagemap_ioctl 23 23 pfnmap 24 + process_madv 24 25 *.tmp* 25 26 protection_keys 26 27 protection_keys_32
+1
tools/testing/selftests/mm/Makefile
··· 85 85 TEST_GEN_FILES += on-fault-limit 86 86 TEST_GEN_FILES += pagemap_ioctl 87 87 TEST_GEN_FILES += pfnmap 88 + TEST_GEN_FILES += process_madv 88 89 TEST_GEN_FILES += thuge-gen 89 90 TEST_GEN_FILES += transhuge-stress 90 91 TEST_GEN_FILES += uffd-stress
+344
tools/testing/selftests/mm/process_madv.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #define _GNU_SOURCE 4 + #include "../kselftest_harness.h" 5 + #include <errno.h> 6 + #include <setjmp.h> 7 + #include <signal.h> 8 + #include <stdbool.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <linux/mman.h> 13 + #include <sys/syscall.h> 14 + #include <unistd.h> 15 + #include <sched.h> 16 + #include "vm_util.h" 17 + 18 + #include "../pidfd/pidfd.h" 19 + 20 + FIXTURE(process_madvise) 21 + { 22 + unsigned long page_size; 23 + pid_t child_pid; 24 + int remote_pidfd; 25 + int pidfd; 26 + }; 27 + 28 + FIXTURE_SETUP(process_madvise) 29 + { 30 + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); 31 + self->pidfd = PIDFD_SELF; 32 + self->remote_pidfd = -1; 33 + self->child_pid = -1; 34 + }; 35 + 36 + FIXTURE_TEARDOWN_PARENT(process_madvise) 37 + { 38 + /* This teardown is guaranteed to run, even if tests SKIP or ASSERT */ 39 + if (self->child_pid > 0) { 40 + kill(self->child_pid, SIGKILL); 41 + waitpid(self->child_pid, NULL, 0); 42 + } 43 + 44 + if (self->remote_pidfd >= 0) 45 + close(self->remote_pidfd); 46 + } 47 + 48 + static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, 49 + size_t vlen, int advice, unsigned int flags) 50 + { 51 + return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags); 52 + } 53 + 54 + /* 55 + * This test uses PIDFD_SELF to target the current process. The main 56 + * goal is to verify the basic behavior of process_madvise() with 57 + * a vector of non-contiguous memory ranges, not its cross-process 58 + * capabilities. 59 + */ 60 + TEST_F(process_madvise, basic) 61 + { 62 + const unsigned long pagesize = self->page_size; 63 + const int madvise_pages = 4; 64 + struct iovec vec[madvise_pages]; 65 + int pidfd = self->pidfd; 66 + ssize_t ret; 67 + char *map; 68 + 69 + /* 70 + * Create a single large mapping. We will pick pages from this 71 + * mapping to advise on. This ensures we test non-contiguous iovecs. 72 + */ 73 + map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE, 74 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 75 + if (map == MAP_FAILED) 76 + SKIP(return, "mmap failed, not enough memory.\n"); 77 + 78 + /* Fill the entire region with a known pattern. */ 79 + memset(map, 'A', pagesize * 10); 80 + 81 + /* 82 + * Setup the iovec to point to 4 non-contiguous pages 83 + * within the mapping. 84 + */ 85 + vec[0].iov_base = &map[0 * pagesize]; 86 + vec[0].iov_len = pagesize; 87 + vec[1].iov_base = &map[3 * pagesize]; 88 + vec[1].iov_len = pagesize; 89 + vec[2].iov_base = &map[5 * pagesize]; 90 + vec[2].iov_len = pagesize; 91 + vec[3].iov_base = &map[8 * pagesize]; 92 + vec[3].iov_len = pagesize; 93 + 94 + ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0); 95 + if (ret == -1 && errno == EPERM) 96 + SKIP(return, 97 + "process_madvise() unsupported or permission denied, try running as root.\n"); 98 + else if (errno == EINVAL) 99 + SKIP(return, 100 + "process_madvise() unsupported or parameter invalid, please check arguments.\n"); 101 + 102 + /* The call should succeed and report the total bytes processed. */ 103 + ASSERT_EQ(ret, madvise_pages * pagesize); 104 + 105 + /* Check that advised pages are now zero. */ 106 + for (int i = 0; i < madvise_pages; i++) { 107 + char *advised_page = (char *)vec[i].iov_base; 108 + 109 + /* Content must be 0, not 'A'. */ 110 + ASSERT_EQ(*advised_page, '\0'); 111 + } 112 + 113 + /* Check that an un-advised page in between is still 'A'. */ 114 + char *unadvised_page = &map[1 * pagesize]; 115 + 116 + for (int i = 0; i < pagesize; i++) 117 + ASSERT_EQ(unadvised_page[i], 'A'); 118 + 119 + /* Cleanup. */ 120 + ASSERT_EQ(munmap(map, pagesize * 10), 0); 121 + } 122 + 123 + /* 124 + * This test deterministically validates process_madvise() with MADV_COLLAPSE 125 + * on a remote process, other advices are difficult to verify reliably. 126 + * 127 + * The test verifies that a memory region in a child process, 128 + * focus on process_madv remote result, only check addresses and lengths. 129 + * The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged. 130 + */ 131 + TEST_F(process_madvise, remote_collapse) 132 + { 133 + const unsigned long pagesize = self->page_size; 134 + long huge_page_size; 135 + int pipe_info[2]; 136 + ssize_t ret; 137 + struct iovec vec; 138 + 139 + struct child_info { 140 + pid_t pid; 141 + void *map_addr; 142 + } info; 143 + 144 + huge_page_size = read_pmd_pagesize(); 145 + if (huge_page_size <= 0) 146 + SKIP(return, "Could not determine a valid huge page size.\n"); 147 + 148 + ASSERT_EQ(pipe(pipe_info), 0); 149 + 150 + self->child_pid = fork(); 151 + ASSERT_NE(self->child_pid, -1); 152 + 153 + if (self->child_pid == 0) { 154 + char *map; 155 + size_t map_size = 2 * huge_page_size; 156 + 157 + close(pipe_info[0]); 158 + 159 + map = mmap(NULL, map_size, PROT_READ | PROT_WRITE, 160 + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 161 + ASSERT_NE(map, MAP_FAILED); 162 + 163 + /* Fault in as small pages */ 164 + for (size_t i = 0; i < map_size; i += pagesize) 165 + map[i] = 'A'; 166 + 167 + /* Send info and pause */ 168 + info.pid = getpid(); 169 + info.map_addr = map; 170 + ret = write(pipe_info[1], &info, sizeof(info)); 171 + ASSERT_EQ(ret, sizeof(info)); 172 + close(pipe_info[1]); 173 + 174 + pause(); 175 + exit(0); 176 + } 177 + 178 + close(pipe_info[1]); 179 + 180 + /* Receive child info */ 181 + ret = read(pipe_info[0], &info, sizeof(info)); 182 + if (ret <= 0) { 183 + waitpid(self->child_pid, NULL, 0); 184 + SKIP(return, "Failed to read child info from pipe.\n"); 185 + } 186 + ASSERT_EQ(ret, sizeof(info)); 187 + close(pipe_info[0]); 188 + self->child_pid = info.pid; 189 + 190 + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); 191 + ASSERT_GE(self->remote_pidfd, 0); 192 + 193 + vec.iov_base = info.map_addr; 194 + vec.iov_len = huge_page_size; 195 + 196 + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE, 197 + 0); 198 + if (ret == -1) { 199 + if (errno == EINVAL) 200 + SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n"); 201 + else if (errno == EPERM) 202 + SKIP(return, 203 + "No process_madvise() permissions, try running as root.\n"); 204 + return; 205 + } 206 + 207 + ASSERT_EQ(ret, huge_page_size); 208 + } 209 + 210 + /* 211 + * Test process_madvise() with a pidfd for a process that has already 212 + * exited to ensure correct error handling. 213 + */ 214 + TEST_F(process_madvise, exited_process_pidfd) 215 + { 216 + const unsigned long pagesize = self->page_size; 217 + struct iovec vec; 218 + char *map; 219 + ssize_t ret; 220 + 221 + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 222 + 0); 223 + if (map == MAP_FAILED) 224 + SKIP(return, "mmap failed, not enough memory.\n"); 225 + 226 + vec.iov_base = map; 227 + vec.iov_len = pagesize; 228 + 229 + /* 230 + * Using a pidfd for a process that has already exited should fail 231 + * with ESRCH. 232 + */ 233 + self->child_pid = fork(); 234 + ASSERT_NE(self->child_pid, -1); 235 + 236 + if (self->child_pid == 0) 237 + exit(0); 238 + 239 + self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0); 240 + ASSERT_GE(self->remote_pidfd, 0); 241 + 242 + /* Wait for the child to ensure it has terminated. */ 243 + waitpid(self->child_pid, NULL, 0); 244 + 245 + ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED, 246 + 0); 247 + ASSERT_EQ(ret, -1); 248 + ASSERT_EQ(errno, ESRCH); 249 + } 250 + 251 + /* 252 + * Test process_madvise() with bad pidfds to ensure correct error 253 + * handling. 254 + */ 255 + TEST_F(process_madvise, bad_pidfd) 256 + { 257 + const unsigned long pagesize = self->page_size; 258 + struct iovec vec; 259 + char *map; 260 + ssize_t ret; 261 + 262 + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 263 + 0); 264 + if (map == MAP_FAILED) 265 + SKIP(return, "mmap failed, not enough memory.\n"); 266 + 267 + vec.iov_base = map; 268 + vec.iov_len = pagesize; 269 + 270 + /* Using an invalid fd number (-1) should fail with EBADF. */ 271 + ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0); 272 + ASSERT_EQ(ret, -1); 273 + ASSERT_EQ(errno, EBADF); 274 + 275 + /* 276 + * Using a valid fd that is not a pidfd (e.g. stdin) should fail 277 + * with EBADF. 278 + */ 279 + ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0); 280 + ASSERT_EQ(ret, -1); 281 + ASSERT_EQ(errno, EBADF); 282 + } 283 + 284 + /* 285 + * Test that process_madvise() rejects vlen > UIO_MAXIOV. 286 + * The kernel should return -EINVAL when the number of iovecs exceeds 1024. 287 + */ 288 + TEST_F(process_madvise, invalid_vlen) 289 + { 290 + const unsigned long pagesize = self->page_size; 291 + int pidfd = self->pidfd; 292 + struct iovec vec; 293 + char *map; 294 + ssize_t ret; 295 + 296 + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 297 + 0); 298 + if (map == MAP_FAILED) 299 + SKIP(return, "mmap failed, not enough memory.\n"); 300 + 301 + vec.iov_base = map; 302 + vec.iov_len = pagesize; 303 + 304 + ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0); 305 + ASSERT_EQ(ret, -1); 306 + ASSERT_EQ(errno, EINVAL); 307 + 308 + /* Cleanup. */ 309 + ASSERT_EQ(munmap(map, pagesize), 0); 310 + } 311 + 312 + /* 313 + * Test process_madvise() with an invalid flag value. Currently, only a flag 314 + * value of 0 is supported. This test is reserved for the future, e.g., if 315 + * synchronous flags are added. 316 + */ 317 + TEST_F(process_madvise, flag) 318 + { 319 + const unsigned long pagesize = self->page_size; 320 + unsigned int invalid_flag; 321 + int pidfd = self->pidfd; 322 + struct iovec vec; 323 + char *map; 324 + ssize_t ret; 325 + 326 + map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 327 + 0); 328 + if (map == MAP_FAILED) 329 + SKIP(return, "mmap failed, not enough memory.\n"); 330 + 331 + vec.iov_base = map; 332 + vec.iov_len = pagesize; 333 + 334 + invalid_flag = 0x80000000; 335 + 336 + ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag); 337 + ASSERT_EQ(ret, -1); 338 + ASSERT_EQ(errno, EINVAL); 339 + 340 + /* Cleanup. */ 341 + ASSERT_EQ(munmap(map, pagesize), 0); 342 + } 343 + 344 + TEST_HARNESS_MAIN
+5
tools/testing/selftests/mm/run_vmtests.sh
··· 65 65 test pagemap_scan IOCTL 66 66 - pfnmap 67 67 tests for VM_PFNMAP handling 68 + - process_madv 69 + test for process_madv 68 70 - cow 69 71 test copy-on-write semantics 70 72 - thp ··· 426 424 427 425 # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests 428 426 CATEGORY="madv_populate" run_test ./madv_populate 427 + 428 + # PROCESS_MADV test 429 + CATEGORY="process_madv" run_test ./process_madv 429 430 430 431 CATEGORY="vma_merge" run_test ./merge 431 432