Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'close-range-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull close_range() implementation from Christian Brauner:
"This adds the close_range() syscall. It allows to efficiently close a
range of file descriptors up to all file descriptors of a calling
task.

This is coordinated with the FreeBSD folks which have copied our
version of this syscall and in the meantime have already merged it in
April 2019:

https://reviews.freebsd.org/D21627
https://svnweb.freebsd.org/base?view=revision&revision=359836

The syscall originally came up in a discussion around the new mount
API and making new file descriptor types cloexec by default. During
this discussion, Al suggested the close_range() syscall.

First, it helps to close all file descriptors of an exec()ing task.
This can be done safely via (quoting Al's example from [1] verbatim):

/* that exec is sensitive */
unshare(CLONE_FILES);
/* we don't want anything past stderr here */
close_range(3, ~0U);
execve(....);

The code snippet above is one way of working around the problem that
file descriptors are not cloexec by default. This is aggravated by the
fact that we can't just switch them over without massively regressing
userspace. For a whole class of programs having an in-kernel method of
closing all file descriptors is very helpful (e.g. demons, service
managers, programming language standard libraries, container managers
etc.).

Second, it allows userspace to avoid implementing closing all file
descriptors by parsing through /proc/<pid>/fd/* and calling close() on
each file descriptor and other hacks. From looking at various
large(ish) userspace code bases this or similar patterns are very
common in service managers, container runtimes, and programming
language runtimes/standard libraries such as Python or Rust.

In addition, the syscall will also work for tasks that do not have
procfs mounted and on kernels that do not have procfs support compiled
in. In such situations the only way to make sure that all file
descriptors are closed is to call close() on each file descriptor up
to UINT_MAX or RLIMIT_NOFILE, OPEN_MAX trickery.

Based on Linus' suggestion close_range() also comes with a new flag
CLOSE_RANGE_UNSHARE to more elegantly handle file descriptor dropping
right before exec. This would usually be expressed in the sequence:

unshare(CLONE_FILES);
close_range(3, ~0U);

as pointed out by Linus it might be desirable to have this be a part
of close_range() itself under a new flag CLOSE_RANGE_UNSHARE which
gets especially handy when we're closing all file descriptors above a
certain threshold.

Test-suite as always included"

* tag 'close-range-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
tests: add CLOSE_RANGE_UNSHARE tests
close_range: add CLOSE_RANGE_UNSHARE
tests: add close_range() tests
arch: wire-up close_range()
open: add close_range()

+405 -17
+1
arch/alpha/kernel/syscalls/syscall.tbl
··· 475 475 543 common fspick sys_fspick 476 476 544 common pidfd_open sys_pidfd_open 477 477 # 545 reserved for clone3 478 + 546 common close_range sys_close_range 478 479 547 common openat2 sys_openat2 479 480 548 common pidfd_getfd sys_pidfd_getfd 480 481 549 common faccessat2 sys_faccessat2
+1
arch/arm/tools/syscall.tbl
··· 449 449 433 common fspick sys_fspick 450 450 434 common pidfd_open sys_pidfd_open 451 451 435 common clone3 sys_clone3 452 + 436 common close_range sys_close_range 452 453 437 common openat2 sys_openat2 453 454 438 common pidfd_getfd sys_pidfd_getfd 454 455 439 common faccessat2 sys_faccessat2
+2
arch/arm64/include/asm/unistd32.h
··· 879 879 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) 880 880 #define __NR_clone3 435 881 881 __SYSCALL(__NR_clone3, sys_clone3) 882 + #define __NR_close_range 436 883 + __SYSCALL(__NR_close_range, sys_close_range) 882 884 #define __NR_openat2 437 883 885 __SYSCALL(__NR_openat2, sys_openat2) 884 886 #define __NR_pidfd_getfd 438
+1
arch/ia64/kernel/syscalls/syscall.tbl
··· 356 356 433 common fspick sys_fspick 357 357 434 common pidfd_open sys_pidfd_open 358 358 # 435 reserved for clone3 359 + 436 common close_range sys_close_range 359 360 437 common openat2 sys_openat2 360 361 438 common pidfd_getfd sys_pidfd_getfd 361 362 439 common faccessat2 sys_faccessat2
+1
arch/m68k/kernel/syscalls/syscall.tbl
··· 435 435 433 common fspick sys_fspick 436 436 434 common pidfd_open sys_pidfd_open 437 437 435 common clone3 __sys_clone3 438 + 436 common close_range sys_close_range 438 439 437 common openat2 sys_openat2 439 440 438 common pidfd_getfd sys_pidfd_getfd 440 441 439 common faccessat2 sys_faccessat2
+1
arch/microblaze/kernel/syscalls/syscall.tbl
··· 441 441 433 common fspick sys_fspick 442 442 434 common pidfd_open sys_pidfd_open 443 443 435 common clone3 sys_clone3 444 + 436 common close_range sys_close_range 444 445 437 common openat2 sys_openat2 445 446 438 common pidfd_getfd sys_pidfd_getfd 446 447 439 common faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_n32.tbl
··· 374 374 433 n32 fspick sys_fspick 375 375 434 n32 pidfd_open sys_pidfd_open 376 376 435 n32 clone3 __sys_clone3 377 + 436 n32 close_range sys_close_range 377 378 437 n32 openat2 sys_openat2 378 379 438 n32 pidfd_getfd sys_pidfd_getfd 379 380 439 n32 faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_n64.tbl
··· 350 350 433 n64 fspick sys_fspick 351 351 434 n64 pidfd_open sys_pidfd_open 352 352 435 n64 clone3 __sys_clone3 353 + 436 n64 close_range sys_close_range 353 354 437 n64 openat2 sys_openat2 354 355 438 n64 pidfd_getfd sys_pidfd_getfd 355 356 439 n64 faccessat2 sys_faccessat2
+1
arch/mips/kernel/syscalls/syscall_o32.tbl
··· 423 423 433 o32 fspick sys_fspick 424 424 434 o32 pidfd_open sys_pidfd_open 425 425 435 o32 clone3 __sys_clone3 426 + 436 o32 close_range sys_close_range 426 427 437 o32 openat2 sys_openat2 427 428 438 o32 pidfd_getfd sys_pidfd_getfd 428 429 439 o32 faccessat2 sys_faccessat2
+1
arch/parisc/kernel/syscalls/syscall.tbl
··· 433 433 433 common fspick sys_fspick 434 434 434 common pidfd_open sys_pidfd_open 435 435 435 common clone3 sys_clone3_wrapper 436 + 436 common close_range sys_close_range 436 437 437 common openat2 sys_openat2 437 438 438 common pidfd_getfd sys_pidfd_getfd 438 439 439 common faccessat2 sys_faccessat2
+1
arch/powerpc/kernel/syscalls/syscall.tbl
··· 525 525 435 32 clone3 ppc_clone3 sys_clone3 526 526 435 64 clone3 sys_clone3 527 527 435 spu clone3 sys_ni_syscall 528 + 436 common close_range sys_close_range 528 529 437 common openat2 sys_openat2 529 530 438 common pidfd_getfd sys_pidfd_getfd 530 531 439 common faccessat2 sys_faccessat2
+1
arch/s390/kernel/syscalls/syscall.tbl
··· 438 438 433 common fspick sys_fspick sys_fspick 439 439 434 common pidfd_open sys_pidfd_open sys_pidfd_open 440 440 435 common clone3 sys_clone3 sys_clone3 441 + 436 common close_range sys_close_range sys_close_range 441 442 437 common openat2 sys_openat2 sys_openat2 442 443 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 443 444 439 common faccessat2 sys_faccessat2 sys_faccessat2
+1
arch/sh/kernel/syscalls/syscall.tbl
··· 438 438 433 common fspick sys_fspick 439 439 434 common pidfd_open sys_pidfd_open 440 440 # 435 reserved for clone3 441 + 436 common close_range sys_close_range 441 442 437 common openat2 sys_openat2 442 443 438 common pidfd_getfd sys_pidfd_getfd 443 444 439 common faccessat2 sys_faccessat2
+1
arch/sparc/kernel/syscalls/syscall.tbl
··· 481 481 433 common fspick sys_fspick 482 482 434 common pidfd_open sys_pidfd_open 483 483 # 435 reserved for clone3 484 + 436 common close_range sys_close_range 484 485 437 common openat2 sys_openat2 485 486 438 common pidfd_getfd sys_pidfd_getfd 486 487 439 common faccessat2 sys_faccessat2
+1
arch/x86/entry/syscalls/syscall_32.tbl
··· 440 440 433 i386 fspick sys_fspick 441 441 434 i386 pidfd_open sys_pidfd_open 442 442 435 i386 clone3 sys_clone3 443 + 436 i386 close_range sys_close_range 443 444 437 i386 openat2 sys_openat2 444 445 438 i386 pidfd_getfd sys_pidfd_getfd 445 446 439 i386 faccessat2 sys_faccessat2
+1
arch/x86/entry/syscalls/syscall_64.tbl
··· 357 357 433 common fspick sys_fspick 358 358 434 common pidfd_open sys_pidfd_open 359 359 435 common clone3 sys_clone3 360 + 436 common close_range sys_close_range 360 361 437 common openat2 sys_openat2 361 362 438 common pidfd_getfd sys_pidfd_getfd 362 363 439 common faccessat2 sys_faccessat2
+1
arch/xtensa/kernel/syscalls/syscall.tbl
··· 406 406 433 common fspick sys_fspick 407 407 434 common pidfd_open sys_pidfd_open 408 408 435 common clone3 sys_clone3 409 + 436 common close_range sys_close_range 409 410 437 common openat2 sys_openat2 410 411 438 common pidfd_getfd sys_pidfd_getfd 411 412 439 common faccessat2 sys_faccessat2
+110 -11
fs/file.c
··· 10 10 #include <linux/syscalls.h> 11 11 #include <linux/export.h> 12 12 #include <linux/fs.h> 13 + #include <linux/kernel.h> 13 14 #include <linux/mm.h> 14 15 #include <linux/sched/signal.h> 15 16 #include <linux/slab.h> ··· 19 18 #include <linux/bitops.h> 20 19 #include <linux/spinlock.h> 21 20 #include <linux/rcupdate.h> 21 + #include <linux/close_range.h> 22 22 #include <net/sock.h> 23 23 24 24 unsigned int sysctl_nr_open __read_mostly = 1024*1024; ··· 267 265 return i; 268 266 } 269 267 268 + static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds) 269 + { 270 + unsigned int count; 271 + 272 + count = count_open_files(fdt); 273 + if (max_fds < NR_OPEN_DEFAULT) 274 + max_fds = NR_OPEN_DEFAULT; 275 + return min(count, max_fds); 276 + } 277 + 270 278 /* 271 279 * Allocate a new files structure and copy contents from the 272 280 * passed in files structure. 273 281 * errorp will be valid only when the returned files_struct is NULL. 274 282 */ 275 - struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) 283 + struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp) 276 284 { 277 285 struct files_struct *newf; 278 286 struct file **old_fds, **new_fds; ··· 309 297 310 298 spin_lock(&oldf->file_lock); 311 299 old_fdt = files_fdtable(oldf); 312 - open_files = count_open_files(old_fdt); 300 + open_files = sane_fdtable_size(old_fdt, max_fds); 313 301 314 302 /* 315 303 * Check whether we need to allocate a larger fd array and fd set. ··· 340 328 */ 341 329 spin_lock(&oldf->file_lock); 342 330 old_fdt = files_fdtable(oldf); 343 - open_files = count_open_files(old_fdt); 331 + open_files = sane_fdtable_size(old_fdt, max_fds); 344 332 } 345 333 346 334 copy_fd_bitmaps(new_fdt, old_fdt, open_files); ··· 637 625 638 626 EXPORT_SYMBOL(fd_install); 639 627 640 - /* 641 - * The same warnings as for __alloc_fd()/__fd_install() apply here... 642 - */ 643 - int __close_fd(struct files_struct *files, unsigned fd) 628 + static struct file *pick_file(struct files_struct *files, unsigned fd) 644 629 { 645 - struct file *file; 630 + struct file *file = NULL; 646 631 struct fdtable *fdt; 647 632 648 633 spin_lock(&files->file_lock); ··· 651 642 goto out_unlock; 652 643 rcu_assign_pointer(fdt->fd[fd], NULL); 653 644 __put_unused_fd(files, fd); 654 - spin_unlock(&files->file_lock); 655 - return filp_close(file, files); 656 645 657 646 out_unlock: 658 647 spin_unlock(&files->file_lock); 659 - return -EBADF; 648 + return file; 649 + } 650 + 651 + /* 652 + * The same warnings as for __alloc_fd()/__fd_install() apply here... 653 + */ 654 + int __close_fd(struct files_struct *files, unsigned fd) 655 + { 656 + struct file *file; 657 + 658 + file = pick_file(files, fd); 659 + if (!file) 660 + return -EBADF; 661 + 662 + return filp_close(file, files); 660 663 } 661 664 EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ 665 + 666 + /** 667 + * __close_range() - Close all file descriptors in a given range. 668 + * 669 + * @fd: starting file descriptor to close 670 + * @max_fd: last file descriptor to close 671 + * 672 + * This closes a range of file descriptors. All file descriptors 673 + * from @fd up to and including @max_fd are closed. 674 + */ 675 + int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) 676 + { 677 + unsigned int cur_max; 678 + struct task_struct *me = current; 679 + struct files_struct *cur_fds = me->files, *fds = NULL; 680 + 681 + if (flags & ~CLOSE_RANGE_UNSHARE) 682 + return -EINVAL; 683 + 684 + if (fd > max_fd) 685 + return -EINVAL; 686 + 687 + rcu_read_lock(); 688 + cur_max = files_fdtable(cur_fds)->max_fds; 689 + rcu_read_unlock(); 690 + 691 + /* cap to last valid index into fdtable */ 692 + cur_max--; 693 + 694 + if (flags & CLOSE_RANGE_UNSHARE) { 695 + int ret; 696 + unsigned int max_unshare_fds = NR_OPEN_MAX; 697 + 698 + /* 699 + * If the requested range is greater than the current maximum, 700 + * we're closing everything so only copy all file descriptors 701 + * beneath the lowest file descriptor. 702 + */ 703 + if (max_fd >= cur_max) 704 + max_unshare_fds = fd; 705 + 706 + ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); 707 + if (ret) 708 + return ret; 709 + 710 + /* 711 + * We used to share our file descriptor table, and have now 712 + * created a private one, make sure we're using it below. 713 + */ 714 + if (fds) 715 + swap(cur_fds, fds); 716 + } 717 + 718 + max_fd = min(max_fd, cur_max); 719 + while (fd <= max_fd) { 720 + struct file *file; 721 + 722 + file = pick_file(cur_fds, fd++); 723 + if (!file) 724 + continue; 725 + 726 + filp_close(file, cur_fds); 727 + cond_resched(); 728 + } 729 + 730 + if (fds) { 731 + /* 732 + * We're done closing the files we were supposed to. Time to install 733 + * the new file descriptor table and drop the old one. 734 + */ 735 + task_lock(me); 736 + me->files = cur_fds; 737 + task_unlock(me); 738 + put_files_struct(fds); 739 + } 740 + 741 + return 0; 742 + } 662 743 663 744 /* 664 745 * variant of __close_fd that gets a ref on the file for later fput.
+17
fs/open.c
··· 1310 1310 return retval; 1311 1311 } 1312 1312 1313 + /** 1314 + * close_range() - Close all file descriptors in a given range. 1315 + * 1316 + * @fd: starting file descriptor to close 1317 + * @max_fd: last file descriptor to close 1318 + * @flags: reserved for future extensions 1319 + * 1320 + * This closes a range of file descriptors. All file descriptors 1321 + * from @fd up to and including @max_fd are closed. 1322 + * Currently, errors to close a given file descriptor are ignored. 1323 + */ 1324 + SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd, 1325 + unsigned int, flags) 1326 + { 1327 + return __close_range(fd, max_fd, flags); 1328 + } 1329 + 1313 1330 /* 1314 1331 * This routine simulates a hangup on the tty, to arrange that users 1315 1332 * are given clean terminals at login time.
+5 -1
include/linux/fdtable.h
··· 22 22 * as this is the granularity returned by copy_fdset(). 23 23 */ 24 24 #define NR_OPEN_DEFAULT BITS_PER_LONG 25 + #define NR_OPEN_MAX ~0U 25 26 26 27 struct fdtable { 27 28 unsigned int max_fds; ··· 110 109 void put_files_struct(struct files_struct *fs); 111 110 void reset_files_struct(struct files_struct *); 112 111 int unshare_files(struct files_struct **); 113 - struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy; 112 + struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; 114 113 void do_close_on_exec(struct files_struct *); 115 114 int iterate_fd(struct files_struct *, unsigned, 116 115 int (*)(const void *, struct file *, unsigned), ··· 122 121 unsigned int fd, struct file *file); 123 122 extern int __close_fd(struct files_struct *files, 124 123 unsigned int fd); 124 + extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); 125 125 extern int __close_fd_get_file(unsigned int fd, struct file **res); 126 + extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, 127 + struct files_struct **new_fdp); 126 128 127 129 extern struct kmem_cache *files_cachep; 128 130
+2
include/linux/syscalls.h
··· 444 444 asmlinkage long sys_openat2(int dfd, const char __user *filename, 445 445 struct open_how *how, size_t size); 446 446 asmlinkage long sys_close(unsigned int fd); 447 + asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, 448 + unsigned int flags); 447 449 asmlinkage long sys_vhangup(void); 448 450 449 451 /* fs/pipe.c */
+2
include/uapi/asm-generic/unistd.h
··· 850 850 #define __NR_clone3 435 851 851 __SYSCALL(__NR_clone3, sys_clone3) 852 852 #endif 853 + #define __NR_close_range 436 854 + __SYSCALL(__NR_close_range, sys_close_range) 853 855 854 856 #define __NR_openat2 437 855 857 __SYSCALL(__NR_openat2, sys_openat2)
+9
include/uapi/linux/close_range.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef _UAPI_LINUX_CLOSE_RANGE_H 3 + #define _UAPI_LINUX_CLOSE_RANGE_H 4 + 5 + /* Unshare the file descriptor table before closing file descriptors. */ 6 + #define CLOSE_RANGE_UNSHARE (1U << 1) 7 + 8 + #endif /* _UAPI_LINUX_CLOSE_RANGE_H */ 9 +
+6 -5
kernel/fork.c
··· 1479 1479 goto out; 1480 1480 } 1481 1481 1482 - newf = dup_fd(oldf, &error); 1482 + newf = dup_fd(oldf, NR_OPEN_MAX, &error); 1483 1483 if (!newf) 1484 1484 goto out; 1485 1485 ··· 2866 2866 /* 2867 2867 * Unshare file descriptor table if it is being shared 2868 2868 */ 2869 - static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) 2869 + int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, 2870 + struct files_struct **new_fdp) 2870 2871 { 2871 2872 struct files_struct *fd = current->files; 2872 2873 int error = 0; 2873 2874 2874 2875 if ((unshare_flags & CLONE_FILES) && 2875 2876 (fd && atomic_read(&fd->count) > 1)) { 2876 - *new_fdp = dup_fd(fd, &error); 2877 + *new_fdp = dup_fd(fd, max_fds, &error); 2877 2878 if (!*new_fdp) 2878 2879 return error; 2879 2880 } ··· 2934 2933 err = unshare_fs(unshare_flags, &new_fs); 2935 2934 if (err) 2936 2935 goto bad_unshare_out; 2937 - err = unshare_fd(unshare_flags, &new_fd); 2936 + err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd); 2938 2937 if (err) 2939 2938 goto bad_unshare_cleanup_fs; 2940 2939 err = unshare_userns(unshare_flags, &new_cred); ··· 3023 3022 struct files_struct *copy = NULL; 3024 3023 int error; 3025 3024 3026 - error = unshare_fd(CLONE_FILES, &copy); 3025 + error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy); 3027 3026 if (error || !copy) { 3028 3027 *displaced = NULL; 3029 3028 return error;
+1
tools/testing/selftests/Makefile
··· 6 6 TARGETS += capabilities 7 7 TARGETS += cgroup 8 8 TARGETS += clone3 9 + TARGETS += core 9 10 TARGETS += cpufreq 10 11 TARGETS += cpu-hotplug 11 12 TARGETS += drivers/dma-buf
+1
tools/testing/selftests/core/.gitignore
··· 1 + close_range_test
+7
tools/testing/selftests/core/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + CFLAGS += -g -I../../../../usr/include/ 3 + 4 + TEST_GEN_PROGS := close_range_test 5 + 6 + include ../lib.mk 7 +
+227
tools/testing/selftests/core/close_range_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #define _GNU_SOURCE 4 + #include <errno.h> 5 + #include <fcntl.h> 6 + #include <linux/kernel.h> 7 + #include <limits.h> 8 + #include <stdbool.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <syscall.h> 13 + #include <unistd.h> 14 + 15 + #include "../kselftest_harness.h" 16 + #include "../clone3/clone3_selftests.h" 17 + 18 + #ifndef __NR_close_range 19 + #define __NR_close_range -1 20 + #endif 21 + 22 + #ifndef CLOSE_RANGE_UNSHARE 23 + #define CLOSE_RANGE_UNSHARE (1U << 1) 24 + #endif 25 + 26 + static inline int sys_close_range(unsigned int fd, unsigned int max_fd, 27 + unsigned int flags) 28 + { 29 + return syscall(__NR_close_range, fd, max_fd, flags); 30 + } 31 + 32 + #ifndef ARRAY_SIZE 33 + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 34 + #endif 35 + 36 + TEST(close_range) 37 + { 38 + int i, ret; 39 + int open_fds[101]; 40 + 41 + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 42 + int fd; 43 + 44 + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 45 + ASSERT_GE(fd, 0) { 46 + if (errno == ENOENT) 47 + XFAIL(return, "Skipping test since /dev/null does not exist"); 48 + } 49 + 50 + open_fds[i] = fd; 51 + } 52 + 53 + EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { 54 + if (errno == ENOSYS) 55 + XFAIL(return, "close_range() syscall not supported"); 56 + } 57 + 58 + EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); 59 + 60 + for (i = 0; i <= 50; i++) 61 + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 62 + 63 + for (i = 51; i <= 100; i++) 64 + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 65 + 66 + /* create a couple of gaps */ 67 + close(57); 68 + close(78); 69 + close(81); 70 + close(82); 71 + close(84); 72 + close(90); 73 + 74 + EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0)); 75 + 76 + for (i = 51; i <= 92; i++) 77 + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 78 + 79 + for (i = 93; i <= 100; i++) 80 + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 81 + 82 + /* test that the kernel caps and still closes all fds */ 83 + EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0)); 84 + 85 + for (i = 93; i <= 99; i++) 86 + EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL)); 87 + 88 + EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1); 89 + 90 + EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0)); 91 + 92 + EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL)); 93 + } 94 + 95 + TEST(close_range_unshare) 96 + { 97 + int i, ret, status; 98 + pid_t pid; 99 + int open_fds[101]; 100 + struct clone_args args = { 101 + .flags = CLONE_FILES, 102 + .exit_signal = SIGCHLD, 103 + }; 104 + 105 + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 106 + int fd; 107 + 108 + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 109 + ASSERT_GE(fd, 0) { 110 + if (errno == ENOENT) 111 + XFAIL(return, "Skipping test since /dev/null does not exist"); 112 + } 113 + 114 + open_fds[i] = fd; 115 + } 116 + 117 + pid = sys_clone3(&args, sizeof(args)); 118 + ASSERT_GE(pid, 0); 119 + 120 + if (pid == 0) { 121 + ret = sys_close_range(open_fds[0], open_fds[50], 122 + CLOSE_RANGE_UNSHARE); 123 + if (ret) 124 + exit(EXIT_FAILURE); 125 + 126 + for (i = 0; i <= 50; i++) 127 + if (fcntl(open_fds[i], F_GETFL) != -1) 128 + exit(EXIT_FAILURE); 129 + 130 + for (i = 51; i <= 100; i++) 131 + if (fcntl(open_fds[i], F_GETFL) == -1) 132 + exit(EXIT_FAILURE); 133 + 134 + /* create a couple of gaps */ 135 + close(57); 136 + close(78); 137 + close(81); 138 + close(82); 139 + close(84); 140 + close(90); 141 + 142 + ret = sys_close_range(open_fds[51], open_fds[92], 143 + CLOSE_RANGE_UNSHARE); 144 + if (ret) 145 + exit(EXIT_FAILURE); 146 + 147 + for (i = 51; i <= 92; i++) 148 + if (fcntl(open_fds[i], F_GETFL) != -1) 149 + exit(EXIT_FAILURE); 150 + 151 + for (i = 93; i <= 100; i++) 152 + if (fcntl(open_fds[i], F_GETFL) == -1) 153 + exit(EXIT_FAILURE); 154 + 155 + /* test that the kernel caps and still closes all fds */ 156 + ret = sys_close_range(open_fds[93], open_fds[99], 157 + CLOSE_RANGE_UNSHARE); 158 + if (ret) 159 + exit(EXIT_FAILURE); 160 + 161 + for (i = 93; i <= 99; i++) 162 + if (fcntl(open_fds[i], F_GETFL) != -1) 163 + exit(EXIT_FAILURE); 164 + 165 + if (fcntl(open_fds[100], F_GETFL) == -1) 166 + exit(EXIT_FAILURE); 167 + 168 + ret = sys_close_range(open_fds[100], open_fds[100], 169 + CLOSE_RANGE_UNSHARE); 170 + if (ret) 171 + exit(EXIT_FAILURE); 172 + 173 + if (fcntl(open_fds[100], F_GETFL) != -1) 174 + exit(EXIT_FAILURE); 175 + 176 + exit(EXIT_SUCCESS); 177 + } 178 + 179 + EXPECT_EQ(waitpid(pid, &status, 0), pid); 180 + EXPECT_EQ(true, WIFEXITED(status)); 181 + EXPECT_EQ(0, WEXITSTATUS(status)); 182 + } 183 + 184 + TEST(close_range_unshare_capped) 185 + { 186 + int i, ret, status; 187 + pid_t pid; 188 + int open_fds[101]; 189 + struct clone_args args = { 190 + .flags = CLONE_FILES, 191 + .exit_signal = SIGCHLD, 192 + }; 193 + 194 + for (i = 0; i < ARRAY_SIZE(open_fds); i++) { 195 + int fd; 196 + 197 + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 198 + ASSERT_GE(fd, 0) { 199 + if (errno == ENOENT) 200 + XFAIL(return, "Skipping test since /dev/null does not exist"); 201 + } 202 + 203 + open_fds[i] = fd; 204 + } 205 + 206 + pid = sys_clone3(&args, sizeof(args)); 207 + ASSERT_GE(pid, 0); 208 + 209 + if (pid == 0) { 210 + ret = sys_close_range(open_fds[0], UINT_MAX, 211 + CLOSE_RANGE_UNSHARE); 212 + if (ret) 213 + exit(EXIT_FAILURE); 214 + 215 + for (i = 0; i <= 100; i++) 216 + if (fcntl(open_fds[i], F_GETFL) != -1) 217 + exit(EXIT_FAILURE); 218 + 219 + exit(EXIT_SUCCESS); 220 + } 221 + 222 + EXPECT_EQ(waitpid(pid, &status, 0), pid); 223 + EXPECT_EQ(true, WIFEXITED(status)); 224 + EXPECT_EQ(0, WEXITSTATUS(status)); 225 + } 226 + 227 + TEST_HARNESS_MAIN