Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Tests for empty mount namespace creation via UNSHARE_EMPTY_MNTNS
4 *
5 * Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
6 */
7
8#define _GNU_SOURCE
9#include <fcntl.h>
10#include <linux/mount.h>
11#include <linux/stat.h>
12#include <sched.h>
13#include <stdio.h>
14#include <string.h>
15#include <sys/mount.h>
16#include <sys/stat.h>
17#include <sys/types.h>
18#include <sys/wait.h>
19#include <unistd.h>
20
21#include "../utils.h"
22#include "../wrappers.h"
23#include "empty_mntns.h"
24#include "kselftest_harness.h"
25
26static bool unshare_empty_mntns_supported(void)
27{
28 pid_t pid;
29 int status;
30
31 pid = fork();
32 if (pid < 0)
33 return false;
34
35 if (pid == 0) {
36 if (enter_userns())
37 _exit(1);
38
39 if (unshare(UNSHARE_EMPTY_MNTNS) && errno == EINVAL)
40 _exit(1);
41 _exit(0);
42 }
43
44 if (waitpid(pid, &status, 0) != pid)
45 return false;
46
47 if (!WIFEXITED(status))
48 return false;
49
50 return WEXITSTATUS(status) == 0;
51}
52
53
54FIXTURE(empty_mntns) {};
55
56FIXTURE_SETUP(empty_mntns)
57{
58 if (!unshare_empty_mntns_supported())
59 SKIP(return, "UNSHARE_EMPTY_MNTNS not supported");
60}
61
62FIXTURE_TEARDOWN(empty_mntns) {}
63
64/* Verify unshare succeeds, produces exactly 1 mount, and root == cwd */
65TEST_F(empty_mntns, basic)
66{
67 pid_t pid;
68
69 pid = fork();
70 ASSERT_GE(pid, 0);
71
72 if (pid == 0) {
73 uint64_t root_id, cwd_id;
74
75 if (enter_userns())
76 _exit(1);
77
78 if (unshare(UNSHARE_EMPTY_MNTNS))
79 _exit(2);
80
81 if (count_mounts() != 1)
82 _exit(3);
83
84 root_id = get_unique_mnt_id("/");
85 cwd_id = get_unique_mnt_id(".");
86 if (root_id == 0 || cwd_id == 0)
87 _exit(4);
88
89 if (root_id != cwd_id)
90 _exit(5);
91
92 _exit(0);
93 }
94
95 ASSERT_EQ(wait_for_pid(pid), 0);
96}
97
98/*
99 * UNSHARE_EMPTY_MNTNS combined with CLONE_NEWUSER.
100 *
101 * The user namespace must be created first so /proc is still accessible
102 * for writing uid_map/gid_map. The empty mount namespace is created
103 * afterwards.
104 */
105TEST_F(empty_mntns, with_clone_newuser)
106{
107 pid_t pid;
108
109 pid = fork();
110 ASSERT_GE(pid, 0);
111
112 if (pid == 0) {
113 uid_t uid = getuid();
114 gid_t gid = getgid();
115 char map[100];
116
117 if (unshare(CLONE_NEWUSER))
118 _exit(1);
119
120 snprintf(map, sizeof(map), "0 %d 1", uid);
121 if (write_file("/proc/self/uid_map", map))
122 _exit(2);
123
124 if (write_file("/proc/self/setgroups", "deny"))
125 _exit(3);
126
127 snprintf(map, sizeof(map), "0 %d 1", gid);
128 if (write_file("/proc/self/gid_map", map))
129 _exit(4);
130
131 if (unshare(UNSHARE_EMPTY_MNTNS))
132 _exit(5);
133
134 if (count_mounts() != 1)
135 _exit(6);
136
137 _exit(0);
138 }
139
140 ASSERT_EQ(wait_for_pid(pid), 0);
141}
142
143/* UNSHARE_EMPTY_MNTNS combined with other namespace flags */
144TEST_F(empty_mntns, with_other_ns_flags)
145{
146 pid_t pid;
147
148 pid = fork();
149 ASSERT_GE(pid, 0);
150
151 if (pid == 0) {
152 if (enter_userns())
153 _exit(1);
154
155 if (unshare(UNSHARE_EMPTY_MNTNS | CLONE_NEWUTS | CLONE_NEWIPC))
156 _exit(2);
157
158 if (count_mounts() != 1)
159 _exit(3);
160
161 _exit(0);
162 }
163
164 ASSERT_EQ(wait_for_pid(pid), 0);
165}
166
167/* EPERM without proper capabilities */
168TEST_F(empty_mntns, eperm_without_caps)
169{
170 pid_t pid;
171
172 pid = fork();
173 ASSERT_GE(pid, 0);
174
175 if (pid == 0) {
176 /* Skip if already root */
177 if (getuid() == 0)
178 _exit(0);
179
180 if (unshare(UNSHARE_EMPTY_MNTNS) == 0)
181 _exit(1);
182
183 if (errno != EPERM)
184 _exit(2);
185
186 _exit(0);
187 }
188
189 ASSERT_EQ(wait_for_pid(pid), 0);
190}
191
192/* Many source mounts still result in exactly 1 mount */
193TEST_F(empty_mntns, many_source_mounts)
194{
195 pid_t pid;
196
197 pid = fork();
198 ASSERT_GE(pid, 0);
199
200 if (pid == 0) {
201 char tmpdir[] = "/tmp/empty_mntns_test.XXXXXX";
202 int i;
203
204 if (enter_userns())
205 _exit(1);
206
207 if (unshare(CLONE_NEWNS))
208 _exit(2);
209
210 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
211 _exit(3);
212
213 if (!mkdtemp(tmpdir))
214 _exit(4);
215
216 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
217 _exit(5);
218
219 for (i = 0; i < 5; i++) {
220 char subdir[256];
221
222 snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i);
223 if (mkdir(subdir, 0755) && errno != EEXIST)
224 _exit(6);
225 if (mount(subdir, subdir, NULL, MS_BIND, NULL))
226 _exit(7);
227 }
228
229 if (count_mounts() < 5)
230 _exit(8);
231
232 if (unshare(UNSHARE_EMPTY_MNTNS))
233 _exit(9);
234
235 if (count_mounts() != 1)
236 _exit(10);
237
238 _exit(0);
239 }
240
241 ASSERT_EQ(wait_for_pid(pid), 0);
242}
243
244/* CWD on a different mount gets reset to root */
245TEST_F(empty_mntns, cwd_reset)
246{
247 pid_t pid;
248
249 pid = fork();
250 ASSERT_GE(pid, 0);
251
252 if (pid == 0) {
253 char tmpdir[] = "/tmp/empty_mntns_cwd.XXXXXX";
254 uint64_t root_id, cwd_id;
255 struct statmount *sm;
256
257 if (enter_userns())
258 _exit(1);
259
260 if (unshare(CLONE_NEWNS))
261 _exit(2);
262
263 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
264 _exit(3);
265
266 if (!mkdtemp(tmpdir))
267 _exit(4);
268
269 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
270 _exit(5);
271
272 if (chdir(tmpdir))
273 _exit(6);
274
275 if (unshare(UNSHARE_EMPTY_MNTNS))
276 _exit(7);
277
278 root_id = get_unique_mnt_id("/");
279 cwd_id = get_unique_mnt_id(".");
280 if (root_id == 0 || cwd_id == 0)
281 _exit(8);
282
283 if (root_id != cwd_id)
284 _exit(9);
285
286 sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, 0);
287 if (!sm)
288 _exit(10);
289
290 if (strcmp(sm->str + sm->mnt_point, "/") != 0)
291 _exit(11);
292
293 free(sm);
294 _exit(0);
295 }
296
297 ASSERT_EQ(wait_for_pid(pid), 0);
298}
299
300/* Verify statmount properties of the root mount */
301TEST_F(empty_mntns, mount_properties)
302{
303 pid_t pid;
304
305 pid = fork();
306 ASSERT_GE(pid, 0);
307
308 if (pid == 0) {
309 struct statmount *sm;
310 uint64_t root_id;
311
312 if (enter_userns())
313 _exit(1);
314
315 if (unshare(UNSHARE_EMPTY_MNTNS))
316 _exit(2);
317
318 root_id = get_unique_mnt_id("/");
319 if (!root_id)
320 _exit(3);
321
322 sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT |
323 STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE, 0);
324 if (!sm)
325 _exit(4);
326
327 if (!(sm->mask & STATMOUNT_MNT_POINT))
328 _exit(5);
329
330 if (strcmp(sm->str + sm->mnt_point, "/") != 0)
331 _exit(6);
332
333 if (!(sm->mask & STATMOUNT_MNT_BASIC))
334 _exit(7);
335
336 if (sm->mnt_id != root_id)
337 _exit(8);
338
339 free(sm);
340 _exit(0);
341 }
342
343 ASSERT_EQ(wait_for_pid(pid), 0);
344}
345
346/* Consecutive UNSHARE_EMPTY_MNTNS calls produce new namespaces */
347TEST_F(empty_mntns, repeated_unshare)
348{
349 pid_t pid;
350
351 pid = fork();
352 ASSERT_GE(pid, 0);
353
354 if (pid == 0) {
355 uint64_t first_root_id, second_root_id;
356
357 if (enter_userns())
358 _exit(1);
359
360 if (unshare(UNSHARE_EMPTY_MNTNS))
361 _exit(2);
362
363 if (count_mounts() != 1)
364 _exit(3);
365
366 first_root_id = get_unique_mnt_id("/");
367
368 if (unshare(UNSHARE_EMPTY_MNTNS))
369 _exit(4);
370
371 if (count_mounts() != 1)
372 _exit(5);
373
374 second_root_id = get_unique_mnt_id("/");
375
376 if (first_root_id == second_root_id)
377 _exit(6);
378
379 _exit(0);
380 }
381
382 ASSERT_EQ(wait_for_pid(pid), 0);
383}
384
385/* Root mount's parent is itself */
386TEST_F(empty_mntns, root_is_own_parent)
387{
388 pid_t pid;
389
390 pid = fork();
391 ASSERT_GE(pid, 0);
392
393 if (pid == 0) {
394 struct statmount sm;
395 uint64_t root_id;
396
397 if (enter_userns())
398 _exit(1);
399
400 if (unshare(UNSHARE_EMPTY_MNTNS))
401 _exit(2);
402
403 root_id = get_unique_mnt_id("/");
404 if (!root_id)
405 _exit(3);
406
407 if (statmount(root_id, 0, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0) < 0)
408 _exit(4);
409
410 if (!(sm.mask & STATMOUNT_MNT_BASIC))
411 _exit(5);
412
413 if (sm.mnt_parent_id != sm.mnt_id)
414 _exit(6);
415
416 _exit(0);
417 }
418
419 ASSERT_EQ(wait_for_pid(pid), 0);
420}
421
422/* Listmount returns only the root mount */
423TEST_F(empty_mntns, listmount_single_entry)
424{
425 pid_t pid;
426
427 pid = fork();
428 ASSERT_GE(pid, 0);
429
430 if (pid == 0) {
431 uint64_t list[16];
432 ssize_t nr_mounts;
433 uint64_t root_id;
434
435 if (enter_userns())
436 _exit(1);
437
438 if (unshare(UNSHARE_EMPTY_MNTNS))
439 _exit(2);
440
441 nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0);
442 if (nr_mounts != 1)
443 _exit(3);
444
445 root_id = get_unique_mnt_id("/");
446 if (!root_id)
447 _exit(4);
448
449 if (list[0] != root_id)
450 _exit(5);
451
452 _exit(0);
453 }
454
455 ASSERT_EQ(wait_for_pid(pid), 0);
456}
457
458/*
459 * Mount tmpfs over nullfs root to build a writable filesystem from scratch.
460 * This exercises the intended usage pattern: create an empty mount namespace
461 * (which has a nullfs root), then mount a real filesystem over it.
462 *
463 * Because resolving "/" returns the process root directly (via nd_jump_root)
464 * without following overmounts, we use the new mount API (fsopen/fsmount)
465 * to obtain a mount fd, then fchdir + chroot to enter the new filesystem.
466 */
467TEST_F(empty_mntns, overmount_tmpfs)
468{
469 pid_t pid;
470
471 pid = fork();
472 ASSERT_GE(pid, 0);
473
474 if (pid == 0) {
475 struct statmount *sm;
476 uint64_t root_id, cwd_id;
477 int fd, fsfd, mntfd;
478
479 if (enter_userns())
480 _exit(1);
481
482 if (unshare(UNSHARE_EMPTY_MNTNS))
483 _exit(2);
484
485 if (count_mounts() != 1)
486 _exit(3);
487
488 root_id = get_unique_mnt_id("/");
489 if (!root_id)
490 _exit(4);
491
492 /* Verify root is nullfs */
493 sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
494 if (!sm)
495 _exit(5);
496
497 if (!(sm->mask & STATMOUNT_FS_TYPE))
498 _exit(6);
499
500 if (strcmp(sm->str + sm->fs_type, "nullfs") != 0)
501 _exit(7);
502
503 free(sm);
504
505 cwd_id = get_unique_mnt_id(".");
506 if (!cwd_id || root_id != cwd_id)
507 _exit(8);
508
509 /*
510 * nullfs root is immutable. open(O_CREAT) returns ENOENT
511 * because empty_dir_lookup() returns -ENOENT before the
512 * IS_IMMUTABLE permission check in may_o_create() is reached.
513 */
514 fd = open("/test", O_CREAT | O_RDWR, 0644);
515 if (fd >= 0) {
516 close(fd);
517 _exit(9);
518 }
519 if (errno != ENOENT)
520 _exit(10);
521
522 /*
523 * Use the new mount API to create tmpfs and get a mount fd.
524 * We need the fd because after attaching the tmpfs on top of
525 * "/", path resolution of "/" still returns the process root
526 * (nullfs) without following the overmount. The mount fd
527 * lets us fchdir + chroot into the tmpfs.
528 */
529 fsfd = sys_fsopen("tmpfs", 0);
530 if (fsfd < 0)
531 _exit(11);
532
533 if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "1M", 0)) {
534 close(fsfd);
535 _exit(12);
536 }
537
538 if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) {
539 close(fsfd);
540 _exit(13);
541 }
542
543 mntfd = sys_fsmount(fsfd, 0, 0);
544 close(fsfd);
545 if (mntfd < 0)
546 _exit(14);
547
548 if (sys_move_mount(mntfd, "", AT_FDCWD, "/",
549 MOVE_MOUNT_F_EMPTY_PATH)) {
550 close(mntfd);
551 _exit(15);
552 }
553
554 if (count_mounts() != 2) {
555 close(mntfd);
556 _exit(16);
557 }
558
559 /* Enter the tmpfs via the mount fd */
560 if (fchdir(mntfd)) {
561 close(mntfd);
562 _exit(17);
563 }
564
565 if (chroot(".")) {
566 close(mntfd);
567 _exit(18);
568 }
569
570 close(mntfd);
571
572 /* Verify "/" now resolves to tmpfs */
573 root_id = get_unique_mnt_id("/");
574 if (!root_id)
575 _exit(19);
576
577 sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
578 if (!sm)
579 _exit(20);
580
581 if (!(sm->mask & STATMOUNT_FS_TYPE))
582 _exit(21);
583
584 if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0)
585 _exit(22);
586
587 free(sm);
588
589 /* Verify tmpfs is writable */
590 fd = open("/testfile", O_CREAT | O_RDWR, 0644);
591 if (fd < 0)
592 _exit(23);
593
594 if (write(fd, "test", 4) != 4) {
595 close(fd);
596 _exit(24);
597 }
598
599 close(fd);
600
601 if (access("/testfile", F_OK))
602 _exit(25);
603
604 _exit(0);
605 }
606
607 ASSERT_EQ(wait_for_pid(pid), 0);
608}
609
610/*
611 * Tests below do not require UNSHARE_EMPTY_MNTNS support.
612 */
613
614/* Invalid unshare flags return EINVAL */
615TEST(invalid_flags)
616{
617 pid_t pid;
618
619 pid = fork();
620 ASSERT_GE(pid, 0);
621
622 if (pid == 0) {
623 if (enter_userns())
624 _exit(1);
625
626 if (unshare(0x80000000) == 0)
627 _exit(2);
628
629 if (errno != EINVAL)
630 _exit(3);
631
632 _exit(0);
633 }
634
635 ASSERT_EQ(wait_for_pid(pid), 0);
636}
637
638/* Regular CLONE_NEWNS still copies the full mount tree */
639TEST(clone_newns_full_copy)
640{
641 pid_t pid;
642
643 pid = fork();
644 ASSERT_GE(pid, 0);
645
646 if (pid == 0) {
647 ssize_t nr_mounts_before, nr_mounts_after;
648 char tmpdir[] = "/tmp/empty_mntns_regr.XXXXXX";
649 int i;
650
651 if (enter_userns())
652 _exit(1);
653
654 if (unshare(CLONE_NEWNS))
655 _exit(2);
656
657 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
658 _exit(3);
659
660 if (!mkdtemp(tmpdir))
661 _exit(4);
662
663 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
664 _exit(5);
665
666 for (i = 0; i < 3; i++) {
667 char subdir[256];
668
669 snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i);
670 if (mkdir(subdir, 0755) && errno != EEXIST)
671 _exit(6);
672 if (mount(subdir, subdir, NULL, MS_BIND, NULL))
673 _exit(7);
674 }
675
676 nr_mounts_before = count_mounts();
677 if (nr_mounts_before < 3)
678 _exit(8);
679
680 if (unshare(CLONE_NEWNS))
681 _exit(9);
682
683 nr_mounts_after = count_mounts();
684 if (nr_mounts_after < nr_mounts_before)
685 _exit(10);
686
687 _exit(0);
688 }
689
690 ASSERT_EQ(wait_for_pid(pid), 0);
691}
692
693/* Other namespace unshares are unaffected */
694TEST(other_ns_unaffected)
695{
696 pid_t pid;
697
698 pid = fork();
699 ASSERT_GE(pid, 0);
700
701 if (pid == 0) {
702 char hostname[256];
703
704 if (enter_userns())
705 _exit(1);
706
707 if (unshare(CLONE_NEWUTS))
708 _exit(2);
709
710 if (sethostname("test-empty-mntns", 16))
711 _exit(3);
712
713 if (gethostname(hostname, sizeof(hostname)))
714 _exit(4);
715
716 if (strcmp(hostname, "test-empty-mntns") != 0)
717 _exit(5);
718
719 _exit(0);
720 }
721
722 ASSERT_EQ(wait_for_pid(pid), 0);
723}
724
725TEST_HARNESS_MAIN