Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'seccomp-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp update from Kees Cook:

- Fix race with WAIT_KILLABLE_RECV (Johannes Nixdorf)

* tag 'seccomp-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
selftests/seccomp: Add a test for the WAIT_KILLABLE_RECV fast reply race
seccomp: Fix a race with WAIT_KILLABLE_RECV if the tracer replies too fast

+136 -7
+5 -7
kernel/seccomp.c
··· 1139 1139 static bool should_sleep_killable(struct seccomp_filter *match, 1140 1140 struct seccomp_knotif *n) 1141 1141 { 1142 - return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT; 1142 + return match->wait_killable_recv && n->state >= SECCOMP_NOTIFY_SENT; 1143 1143 } 1144 1144 1145 1145 static int seccomp_do_user_notification(int this_syscall, ··· 1186 1186 1187 1187 if (err != 0) { 1188 1188 /* 1189 - * Check to see if the notifcation got picked up and 1190 - * whether we should switch to wait killable. 1189 + * Check to see whether we should switch to wait 1190 + * killable. Only return the interrupted error if not. 1191 1191 */ 1192 - if (!wait_killable && should_sleep_killable(match, &n)) 1193 - continue; 1194 - 1195 - goto interrupted; 1192 + if (!(!wait_killable && should_sleep_killable(match, &n))) 1193 + goto interrupted; 1196 1194 } 1197 1195 1198 1196 addfd = list_first_entry_or_null(&n.addfd,
+131
tools/testing/selftests/seccomp/seccomp_bpf.c
··· 24 24 #include <linux/filter.h> 25 25 #include <sys/prctl.h> 26 26 #include <sys/ptrace.h> 27 + #include <sys/time.h> 27 28 #include <sys/user.h> 28 29 #include <linux/prctl.h> 29 30 #include <linux/ptrace.h> ··· 3548 3547 perror("write from signal"); 3549 3548 } 3550 3549 3550 + static void signal_handler_nop(int signal) 3551 + { 3552 + } 3553 + 3551 3554 TEST(user_notification_signal) 3552 3555 { 3553 3556 pid_t pid; ··· 4822 4817 EXPECT_EQ(waitpid(pid, &status, 0), pid); 4823 4818 EXPECT_EQ(true, WIFSIGNALED(status)); 4824 4819 EXPECT_EQ(SIGTERM, WTERMSIG(status)); 4820 + } 4821 + 4822 + /* Ensure signals after the reply do not interrupt */ 4823 + TEST(user_notification_wait_killable_after_reply) 4824 + { 4825 + int i, max_iter = 100000; 4826 + int listener, status; 4827 + int pipe_fds[2]; 4828 + pid_t pid; 4829 + long ret; 4830 + 4831 + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 4832 + ASSERT_EQ(0, ret) 4833 + { 4834 + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 4835 + } 4836 + 4837 + listener = user_notif_syscall( 4838 + __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | 4839 + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); 4840 + ASSERT_GE(listener, 0); 4841 + 4842 + /* 4843 + * Used to count invocations. One token is transferred from the child 4844 + * to the parent per syscall invocation, the parent tries to take 4845 + * one token per successful RECV. If the syscall is restarted after 4846 + * RECV the parent will try to get two tokens while the child only 4847 + * provided one. 4848 + */ 4849 + ASSERT_EQ(pipe(pipe_fds), 0); 4850 + 4851 + pid = fork(); 4852 + ASSERT_GE(pid, 0); 4853 + 4854 + if (pid == 0) { 4855 + struct sigaction new_action = { 4856 + .sa_handler = signal_handler_nop, 4857 + .sa_flags = SA_RESTART, 4858 + }; 4859 + struct itimerval timer = { 4860 + .it_value = { .tv_usec = 1000 }, 4861 + .it_interval = { .tv_usec = 1000 }, 4862 + }; 4863 + char c = 'a'; 4864 + 4865 + close(pipe_fds[0]); 4866 + 4867 + /* Setup the sigaction with SA_RESTART */ 4868 + if (sigaction(SIGALRM, &new_action, NULL)) { 4869 + perror("sigaction"); 4870 + exit(1); 4871 + } 4872 + 4873 + /* 4874 + * Kill with SIGALRM repeatedly, to try to hit the race when 4875 + * handling the syscall. 4876 + */ 4877 + if (setitimer(ITIMER_REAL, &timer, NULL) < 0) 4878 + perror("setitimer"); 4879 + 4880 + for (i = 0; i < max_iter; ++i) { 4881 + int fd; 4882 + 4883 + /* Send one token per iteration to catch repeats. */ 4884 + if (write(pipe_fds[1], &c, sizeof(c)) != 1) { 4885 + perror("write"); 4886 + exit(1); 4887 + } 4888 + 4889 + fd = syscall(__NR_dup, 0); 4890 + if (fd < 0) { 4891 + perror("dup"); 4892 + exit(1); 4893 + } 4894 + close(fd); 4895 + } 4896 + 4897 + exit(0); 4898 + } 4899 + 4900 + close(pipe_fds[1]); 4901 + 4902 + for (i = 0; i < max_iter; ++i) { 4903 + struct seccomp_notif req = {}; 4904 + struct seccomp_notif_addfd addfd = {}; 4905 + struct pollfd pfd = { 4906 + .fd = pipe_fds[0], 4907 + .events = POLLIN, 4908 + }; 4909 + char c; 4910 + 4911 + /* 4912 + * Try to receive one token. If it failed, one child syscall 4913 + * was restarted after RECV and needed to be handled twice. 4914 + */ 4915 + ASSERT_EQ(poll(&pfd, 1, 1000), 1) 4916 + kill(pid, SIGKILL); 4917 + 4918 + ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) 4919 + kill(pid, SIGKILL); 4920 + 4921 + /* 4922 + * Get the notification, reply to it as fast as possible to test 4923 + * whether the child wrongly skips going into the non-preemptible 4924 + * (TASK_KILLABLE) state. 4925 + */ 4926 + do 4927 + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); 4928 + while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ 4929 + ASSERT_EQ(ret, 0) 4930 + kill(pid, SIGKILL); 4931 + 4932 + addfd.id = req.id; 4933 + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; 4934 + addfd.srcfd = 0; 4935 + ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) 4936 + kill(pid, SIGKILL); 4937 + } 4938 + 4939 + /* 4940 + * Wait for the process to exit, and make sure the process terminated 4941 + * with a zero exit code.. 4942 + */ 4943 + EXPECT_EQ(waitpid(pid, &status, 0), pid); 4944 + EXPECT_EQ(true, WIFEXITED(status)); 4945 + EXPECT_EQ(0, WEXITSTATUS(status)); 4825 4946 } 4826 4947 4827 4948 struct tsync_vs_thread_leader_args {