Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

- kthread: consolidate kthread exit paths to prevent use-after-free

- iomap:
- don't mark folio uptodate if read IO has bytes pending
- don't report direct-io retries to fserror
- reject delalloc mappings during writeback

- ns: tighten visibility checks

- netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict
sequence

* tag 'vfs-7.0-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
iomap: reject delalloc mappings during writeback
iomap: don't mark folio uptodate if read IO has bytes pending
selftests: fix mntns iteration selftests
nstree: tighten permission checks for listing
nsfs: tighten permission checks for handle opening
nsfs: tighten permission checks for ns iteration ioctls
netfs: Fix unbuffered/DIO writes to dispatch subrequests in strict sequence
kthread: consolidate kthread exit paths to prevent use-after-free
iomap: don't report direct-io retries to fserror

+326 -160
+12 -3
fs/iomap/buffered-io.c
··· 80 80 { 81 81 struct iomap_folio_state *ifs = folio->private; 82 82 unsigned long flags; 83 - bool uptodate = true; 83 + bool mark_uptodate = true; 84 84 85 85 if (folio_test_uptodate(folio)) 86 86 return; 87 87 88 88 if (ifs) { 89 89 spin_lock_irqsave(&ifs->state_lock, flags); 90 - uptodate = ifs_set_range_uptodate(folio, ifs, off, len); 90 + /* 91 + * If a read with bytes pending is in progress, we must not call 92 + * folio_mark_uptodate(). The read completion path 93 + * (iomap_read_end()) will call folio_end_read(), which uses XOR 94 + * semantics to set the uptodate bit. If we set it here, the XOR 95 + * in folio_end_read() will clear it, leaving the folio not 96 + * uptodate. 97 + */ 98 + mark_uptodate = ifs_set_range_uptodate(folio, ifs, off, len) && 99 + !ifs->read_bytes_pending; 91 100 spin_unlock_irqrestore(&ifs->state_lock, flags); 92 101 } 93 102 94 - if (uptodate) 103 + if (mark_uptodate) 95 104 folio_mark_uptodate(folio); 96 105 } 97 106
+14 -1
fs/iomap/direct-io.c
··· 87 87 return FSERR_DIRECTIO_READ; 88 88 } 89 89 90 + static inline bool should_report_dio_fserror(const struct iomap_dio *dio) 91 + { 92 + switch (dio->error) { 93 + case 0: 94 + case -EAGAIN: 95 + case -ENOTBLK: 96 + /* don't send fsnotify for success or magic retry codes */ 97 + return false; 98 + default: 99 + return true; 100 + } 101 + } 102 + 90 103 ssize_t iomap_dio_complete(struct iomap_dio *dio) 91 104 { 92 105 const struct iomap_dio_ops *dops = dio->dops; ··· 109 96 110 97 if (dops && dops->end_io) 111 98 ret = dops->end_io(iocb, dio->size, ret, dio->flags); 112 - if (dio->error) 99 + if (should_report_dio_fserror(dio)) 113 100 fserror_report_io(file_inode(iocb->ki_filp), 114 101 iomap_dio_err_type(dio), offset, dio->size, 115 102 dio->error, GFP_NOFS);
+7 -6
fs/iomap/ioend.c
··· 215 215 WARN_ON_ONCE(!folio->private && map_len < dirty_len); 216 216 217 217 switch (wpc->iomap.type) { 218 - case IOMAP_INLINE: 219 - WARN_ON_ONCE(1); 220 - return -EIO; 218 + case IOMAP_UNWRITTEN: 219 + ioend_flags |= IOMAP_IOEND_UNWRITTEN; 220 + break; 221 + case IOMAP_MAPPED: 222 + break; 221 223 case IOMAP_HOLE: 222 224 return map_len; 223 225 default: 224 - break; 226 + WARN_ON_ONCE(1); 227 + return -EIO; 225 228 } 226 229 227 - if (wpc->iomap.type == IOMAP_UNWRITTEN) 228 - ioend_flags |= IOMAP_IOEND_UNWRITTEN; 229 230 if (wpc->iomap.flags & IOMAP_F_SHARED) 230 231 ioend_flags |= IOMAP_IOEND_SHARED; 231 232 if (folio_test_dropbehind(folio))
+212 -16
fs/netfs/direct_write.c
··· 10 10 #include "internal.h" 11 11 12 12 /* 13 + * Perform the cleanup rituals after an unbuffered write is complete. 14 + */ 15 + static void netfs_unbuffered_write_done(struct netfs_io_request *wreq) 16 + { 17 + struct netfs_inode *ictx = netfs_inode(wreq->inode); 18 + 19 + _enter("R=%x", wreq->debug_id); 20 + 21 + /* Okay, declare that all I/O is complete. */ 22 + trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 23 + 24 + if (!wreq->error) 25 + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); 26 + 27 + if (wreq->origin == NETFS_DIO_WRITE && 28 + wreq->mapping->nrpages) { 29 + /* mmap may have got underfoot and we may now have folios 30 + * locally covering the region we just wrote. Attempt to 31 + * discard the folios, but leave in place any modified locally. 32 + * ->write_iter() is prevented from interfering by the DIO 33 + * counter. 34 + */ 35 + pgoff_t first = wreq->start >> PAGE_SHIFT; 36 + pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 37 + 38 + invalidate_inode_pages2_range(wreq->mapping, first, last); 39 + } 40 + 41 + if (wreq->origin == NETFS_DIO_WRITE) 42 + inode_dio_end(wreq->inode); 43 + 44 + _debug("finished"); 45 + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 46 + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ 47 + 48 + if (wreq->iocb) { 49 + size_t written = umin(wreq->transferred, wreq->len); 50 + 51 + wreq->iocb->ki_pos += written; 52 + if (wreq->iocb->ki_complete) { 53 + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); 54 + wreq->iocb->ki_complete(wreq->iocb, wreq->error ?: written); 55 + } 56 + wreq->iocb = VFS_PTR_POISON; 57 + } 58 + 59 + netfs_clear_subrequests(wreq); 60 + } 61 + 62 + /* 63 + * Collect the subrequest results of unbuffered write subrequests. 64 + */ 65 + static void netfs_unbuffered_write_collect(struct netfs_io_request *wreq, 66 + struct netfs_io_stream *stream, 67 + struct netfs_io_subrequest *subreq) 68 + { 69 + trace_netfs_collect_sreq(wreq, subreq); 70 + 71 + spin_lock(&wreq->lock); 72 + list_del_init(&subreq->rreq_link); 73 + spin_unlock(&wreq->lock); 74 + 75 + wreq->transferred += subreq->transferred; 76 + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); 77 + 78 + stream->collected_to = subreq->start + subreq->transferred; 79 + wreq->collected_to = stream->collected_to; 80 + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); 81 + 82 + trace_netfs_collect_stream(wreq, stream); 83 + trace_netfs_collect_state(wreq, wreq->collected_to, 0); 84 + } 85 + 86 + /* 87 + * Write data to the server without going through the pagecache and without 88 + * writing it to the local cache. We dispatch the subrequests serially and 89 + * wait for each to complete before dispatching the next, lest we leave a gap 90 + * in the data written due to a failure such as ENOSPC. We could, however 91 + * attempt to do preparation such as content encryption for the next subreq 92 + * whilst the current is in progress. 93 + */ 94 + static int netfs_unbuffered_write(struct netfs_io_request *wreq) 95 + { 96 + struct netfs_io_subrequest *subreq = NULL; 97 + struct netfs_io_stream *stream = &wreq->io_streams[0]; 98 + int ret; 99 + 100 + _enter("%llx", wreq->len); 101 + 102 + if (wreq->origin == NETFS_DIO_WRITE) 103 + inode_dio_begin(wreq->inode); 104 + 105 + stream->collected_to = wreq->start; 106 + 107 + for (;;) { 108 + bool retry = false; 109 + 110 + if (!subreq) { 111 + netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred); 112 + subreq = stream->construct; 113 + stream->construct = NULL; 114 + stream->front = NULL; 115 + } 116 + 117 + /* Check if (re-)preparation failed. */ 118 + if (unlikely(test_bit(NETFS_SREQ_FAILED, &subreq->flags))) { 119 + netfs_write_subrequest_terminated(subreq, subreq->error); 120 + wreq->error = subreq->error; 121 + break; 122 + } 123 + 124 + iov_iter_truncate(&subreq->io_iter, wreq->len - wreq->transferred); 125 + if (!iov_iter_count(&subreq->io_iter)) 126 + break; 127 + 128 + subreq->len = netfs_limit_iter(&subreq->io_iter, 0, 129 + stream->sreq_max_len, 130 + stream->sreq_max_segs); 131 + iov_iter_truncate(&subreq->io_iter, subreq->len); 132 + stream->submit_extendable_to = subreq->len; 133 + 134 + trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 135 + stream->issue_write(subreq); 136 + 137 + /* Async, need to wait. */ 138 + netfs_wait_for_in_progress_stream(wreq, stream); 139 + 140 + if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 141 + retry = true; 142 + } else if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { 143 + ret = subreq->error; 144 + wreq->error = ret; 145 + netfs_see_subrequest(subreq, netfs_sreq_trace_see_failed); 146 + subreq = NULL; 147 + break; 148 + } 149 + ret = 0; 150 + 151 + if (!retry) { 152 + netfs_unbuffered_write_collect(wreq, stream, subreq); 153 + subreq = NULL; 154 + if (wreq->transferred >= wreq->len) 155 + break; 156 + if (!wreq->iocb && signal_pending(current)) { 157 + ret = wreq->transferred ? -EINTR : -ERESTARTSYS; 158 + trace_netfs_rreq(wreq, netfs_rreq_trace_intr); 159 + break; 160 + } 161 + continue; 162 + } 163 + 164 + /* We need to retry the last subrequest, so first reset the 165 + * iterator, taking into account what, if anything, we managed 166 + * to transfer. 167 + */ 168 + subreq->error = -EAGAIN; 169 + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 170 + if (subreq->transferred > 0) 171 + iov_iter_advance(&wreq->buffer.iter, subreq->transferred); 172 + 173 + if (stream->source == NETFS_UPLOAD_TO_SERVER && 174 + wreq->netfs_ops->retry_request) 175 + wreq->netfs_ops->retry_request(wreq, stream); 176 + 177 + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 178 + __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); 179 + __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); 180 + subreq->io_iter = wreq->buffer.iter; 181 + subreq->start = wreq->start + wreq->transferred; 182 + subreq->len = wreq->len - wreq->transferred; 183 + subreq->transferred = 0; 184 + subreq->retry_count += 1; 185 + stream->sreq_max_len = UINT_MAX; 186 + stream->sreq_max_segs = INT_MAX; 187 + 188 + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 189 + stream->prepare_write(subreq); 190 + 191 + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 192 + netfs_stat(&netfs_n_wh_retry_write_subreq); 193 + } 194 + 195 + netfs_unbuffered_write_done(wreq); 196 + _leave(" = %d", ret); 197 + return ret; 198 + } 199 + 200 + static void netfs_unbuffered_write_async(struct work_struct *work) 201 + { 202 + struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); 203 + 204 + netfs_unbuffered_write(wreq); 205 + netfs_put_request(wreq, netfs_rreq_trace_put_complete); 206 + } 207 + 208 + /* 13 209 * Perform an unbuffered write where we may have to do an RMW operation on an 14 210 * encrypted file. This can also be used for direct I/O writes. 15 211 */ ··· 266 70 */ 267 71 wreq->buffer.iter = *iter; 268 72 } 73 + 74 + wreq->len = iov_iter_count(&wreq->buffer.iter); 269 75 } 270 76 271 77 __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); 272 - if (async) 273 - __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); 274 78 275 79 /* Copy the data into the bounce buffer and encrypt it. */ 276 80 // TODO 277 81 278 82 /* Dispatch the write. */ 279 83 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 280 - if (async) 84 + 85 + if (async) { 86 + INIT_WORK(&wreq->work, netfs_unbuffered_write_async); 281 87 wreq->iocb = iocb; 282 - wreq->len = iov_iter_count(&wreq->buffer.iter); 283 - ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); 284 - if (ret < 0) { 285 - _debug("begin = %zd", ret); 286 - goto out; 287 - } 288 - 289 - if (!async) { 290 - ret = netfs_wait_for_write(wreq); 291 - if (ret > 0) 292 - iocb->ki_pos += ret; 293 - } else { 88 + queue_work(system_dfl_wq, &wreq->work); 294 89 ret = -EIOCBQUEUED; 90 + } else { 91 + ret = netfs_unbuffered_write(wreq); 92 + if (ret < 0) { 93 + _debug("begin = %zd", ret); 94 + } else { 95 + iocb->ki_pos += wreq->transferred; 96 + ret = wreq->transferred ?: wreq->error; 97 + } 98 + 99 + netfs_put_request(wreq, netfs_rreq_trace_put_complete); 295 100 } 296 101 297 - out: 298 102 netfs_put_request(wreq, netfs_rreq_trace_put_return); 299 103 return ret; 300 104
+3 -1
fs/netfs/internal.h
··· 198 198 struct file *file, 199 199 loff_t start, 200 200 enum netfs_io_origin origin); 201 + void netfs_prepare_write(struct netfs_io_request *wreq, 202 + struct netfs_io_stream *stream, 203 + loff_t start); 201 204 void netfs_reissue_write(struct netfs_io_stream *stream, 202 205 struct netfs_io_subrequest *subreq, 203 206 struct iov_iter *source); ··· 215 212 struct folio **writethrough_cache); 216 213 ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 217 214 struct folio *writethrough_cache); 218 - int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); 219 215 220 216 /* 221 217 * write_retry.c
-21
fs/netfs/write_collect.c
··· 399 399 ictx->ops->invalidate_cache(wreq); 400 400 } 401 401 402 - if ((wreq->origin == NETFS_UNBUFFERED_WRITE || 403 - wreq->origin == NETFS_DIO_WRITE) && 404 - !wreq->error) 405 - netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); 406 - 407 - if (wreq->origin == NETFS_DIO_WRITE && 408 - wreq->mapping->nrpages) { 409 - /* mmap may have got underfoot and we may now have folios 410 - * locally covering the region we just wrote. Attempt to 411 - * discard the folios, but leave in place any modified locally. 412 - * ->write_iter() is prevented from interfering by the DIO 413 - * counter. 414 - */ 415 - pgoff_t first = wreq->start >> PAGE_SHIFT; 416 - pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 417 - invalidate_inode_pages2_range(wreq->mapping, first, last); 418 - } 419 - 420 - if (wreq->origin == NETFS_DIO_WRITE) 421 - inode_dio_end(wreq->inode); 422 - 423 402 _debug("finished"); 424 403 netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); 425 404 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
+3 -38
fs/netfs/write_issue.c
··· 154 154 * Prepare a write subrequest. We need to allocate a new subrequest 155 155 * if we don't have one. 156 156 */ 157 - static void netfs_prepare_write(struct netfs_io_request *wreq, 158 - struct netfs_io_stream *stream, 159 - loff_t start) 157 + void netfs_prepare_write(struct netfs_io_request *wreq, 158 + struct netfs_io_stream *stream, 159 + loff_t start) 160 160 { 161 161 struct netfs_io_subrequest *subreq; 162 162 struct iov_iter *wreq_iter = &wreq->buffer.iter; ··· 696 696 ret = netfs_wait_for_write(wreq); 697 697 netfs_put_request(wreq, netfs_rreq_trace_put_return); 698 698 return ret; 699 - } 700 - 701 - /* 702 - * Write data to the server without going through the pagecache and without 703 - * writing it to the local cache. 704 - */ 705 - int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) 706 - { 707 - struct netfs_io_stream *upload = &wreq->io_streams[0]; 708 - ssize_t part; 709 - loff_t start = wreq->start; 710 - int error = 0; 711 - 712 - _enter("%zx", len); 713 - 714 - if (wreq->origin == NETFS_DIO_WRITE) 715 - inode_dio_begin(wreq->inode); 716 - 717 - while (len) { 718 - // TODO: Prepare content encryption 719 - 720 - _debug("unbuffered %zx", len); 721 - part = netfs_advance_write(wreq, upload, start, len, false); 722 - start += part; 723 - len -= part; 724 - rolling_buffer_advance(&wreq->buffer, part); 725 - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) 726 - netfs_wait_for_paused_write(wreq); 727 - if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) 728 - break; 729 - } 730 - 731 - netfs_end_issue_write(wreq); 732 - _leave(" = %d", error); 733 - return error; 734 699 } 735 700 736 701 /*
+14 -1
fs/nsfs.c
··· 199 199 return false; 200 200 } 201 201 202 + static bool may_use_nsfs_ioctl(unsigned int cmd) 203 + { 204 + switch (_IOC_NR(cmd)) { 205 + case _IOC_NR(NS_MNT_GET_NEXT): 206 + fallthrough; 207 + case _IOC_NR(NS_MNT_GET_PREV): 208 + return may_see_all_namespaces(); 209 + } 210 + return true; 211 + } 212 + 202 213 static long ns_ioctl(struct file *filp, unsigned int ioctl, 203 214 unsigned long arg) 204 215 { ··· 225 214 226 215 if (!nsfs_ioctl_valid(ioctl)) 227 216 return -ENOIOCTLCMD; 217 + if (!may_use_nsfs_ioctl(ioctl)) 218 + return -EPERM; 228 219 229 220 ns = get_proc_ns(file_inode(filp)); 230 221 switch (ioctl) { ··· 627 614 return ERR_PTR(-EOPNOTSUPP); 628 615 } 629 616 630 - if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) { 617 + if (owning_ns && !may_see_all_namespaces()) { 631 618 ns->ops->put(ns); 632 619 return ERR_PTR(-EPERM); 633 620 }
+20 -1
include/linux/kthread.h
··· 7 7 8 8 struct mm_struct; 9 9 10 + /* opaque kthread data */ 11 + struct kthread; 12 + 13 + /* 14 + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will 15 + * always remain a kthread. For kthreads p->worker_private always 16 + * points to a struct kthread. For tasks that are not kthreads 17 + * p->worker_private is used to point to other things. 18 + * 19 + * Return NULL for any task that is not a kthread. 20 + */ 21 + static inline struct kthread *tsk_is_kthread(struct task_struct *p) 22 + { 23 + if (p->flags & PF_KTHREAD) 24 + return p->worker_private; 25 + return NULL; 26 + } 27 + 10 28 __printf(4, 5) 11 29 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), 12 30 void *data, ··· 116 98 int kthread_park(struct task_struct *k); 117 99 void kthread_unpark(struct task_struct *k); 118 100 void kthread_parkme(void); 119 - void kthread_exit(long result) __noreturn; 101 + #define kthread_exit(result) do_exit(result) 120 102 void kthread_complete_and_exit(struct completion *, long) __noreturn; 121 103 int kthreads_update_housekeeping(void); 104 + void kthread_do_exit(struct kthread *, long); 122 105 123 106 int kthreadd(void *unused); 124 107 extern struct task_struct *kthreadd_task;
+2
include/linux/ns_common.h
··· 55 55 56 56 #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) 57 57 58 + bool may_see_all_namespaces(void); 59 + 58 60 static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) 59 61 { 60 62 return atomic_read(&ns->__ns_ref_active);
+3 -1
include/trace/events/netfs.h
··· 57 57 EM(netfs_rreq_trace_done, "DONE ") \ 58 58 EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ 59 59 EM(netfs_rreq_trace_free, "FREE ") \ 60 + EM(netfs_rreq_trace_intr, "INTR ") \ 60 61 EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ 61 62 EM(netfs_rreq_trace_recollect, "RECLLCT") \ 62 63 EM(netfs_rreq_trace_redirty, "REDIRTY") \ ··· 170 169 EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ 171 170 EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ 172 171 EM(netfs_sreq_trace_put_work, "PUT WORK ") \ 173 - E_(netfs_sreq_trace_put_terminated, "PUT TERM ") 172 + EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \ 173 + E_(netfs_sreq_trace_see_failed, "SEE FAILED ") 174 174 175 175 #define netfs_folio_traces \ 176 176 EM(netfs_folio_is_uptodate, "mod-uptodate") \
+6
kernel/exit.c
··· 896 896 void __noreturn do_exit(long code) 897 897 { 898 898 struct task_struct *tsk = current; 899 + struct kthread *kthread; 899 900 int group_dead; 900 901 901 902 WARN_ON(irqs_disabled()); 902 903 WARN_ON(tsk->plug); 904 + 905 + kthread = tsk_is_kthread(tsk); 906 + if (unlikely(kthread)) 907 + kthread_do_exit(kthread, code); 903 908 904 909 kcov_task_exit(tsk); 905 910 kmsan_task_exit(tsk); ··· 1018 1013 lockdep_free_task(tsk); 1019 1014 do_task_dead(); 1020 1015 } 1016 + EXPORT_SYMBOL(do_exit); 1021 1017 1022 1018 void __noreturn make_task_dead(int signr) 1023 1019 {
+5 -36
kernel/kthread.c
··· 85 85 return k->worker_private; 86 86 } 87 87 88 - /* 89 - * Variant of to_kthread() that doesn't assume @p is a kthread. 90 - * 91 - * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will 92 - * always remain a kthread. For kthreads p->worker_private always 93 - * points to a struct kthread. For tasks that are not kthreads 94 - * p->worker_private is used to point to other things. 95 - * 96 - * Return NULL for any task that is not a kthread. 97 - */ 98 - static inline struct kthread *__to_kthread(struct task_struct *p) 99 - { 100 - void *kthread = p->worker_private; 101 - if (kthread && !(p->flags & PF_KTHREAD)) 102 - kthread = NULL; 103 - return kthread; 104 - } 105 - 106 88 void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk) 107 89 { 108 90 struct kthread *kthread = to_kthread(tsk); ··· 175 193 176 194 bool kthread_should_stop_or_park(void) 177 195 { 178 - struct kthread *kthread = __to_kthread(current); 196 + struct kthread *kthread = tsk_is_kthread(current); 179 197 180 198 if (!kthread) 181 199 return false; ··· 216 234 */ 217 235 void *kthread_func(struct task_struct *task) 218 236 { 219 - struct kthread *kthread = __to_kthread(task); 237 + struct kthread *kthread = tsk_is_kthread(task); 220 238 if (kthread) 221 239 return kthread->threadfn; 222 240 return NULL; ··· 248 266 */ 249 267 void *kthread_probe_data(struct task_struct *task) 250 268 { 251 - struct kthread *kthread = __to_kthread(task); 269 + struct kthread *kthread = tsk_is_kthread(task); 252 270 void *data = NULL; 253 271 254 272 if (kthread) ··· 291 309 } 292 310 EXPORT_SYMBOL_GPL(kthread_parkme); 293 311 294 - /** 295 - * kthread_exit - Cause the current kthread return @result to kthread_stop(). 296 - * @result: The integer value to return to kthread_stop(). 297 - * 298 - * While kthread_exit can be called directly, it exists so that 299 - * functions which do some additional work in non-modular code such as 300 - * module_put_and_kthread_exit can be implemented. 301 - * 302 - * Does not return. 303 - */ 304 - void __noreturn kthread_exit(long result) 312 + void kthread_do_exit(struct kthread *kthread, long result) 305 313 { 306 - struct kthread *kthread = to_kthread(current); 307 314 kthread->result = result; 308 315 if (!list_empty(&kthread->affinity_node)) { 309 316 mutex_lock(&kthread_affinity_lock); ··· 304 333 kthread->preferred_affinity = NULL; 305 334 } 306 335 } 307 - do_exit(0); 308 336 } 309 - EXPORT_SYMBOL(kthread_exit); 310 337 311 338 /** 312 339 * kthread_complete_and_exit - Exit the current kthread. ··· 652 683 653 684 bool kthread_is_per_cpu(struct task_struct *p) 654 685 { 655 - struct kthread *kthread = __to_kthread(p); 686 + struct kthread *kthread = tsk_is_kthread(p); 656 687 if (!kthread) 657 688 return false; 658 689
+6
kernel/nscommon.c
··· 309 309 return; 310 310 } 311 311 } 312 + 313 + bool may_see_all_namespaces(void) 314 + { 315 + return (task_active_pid_ns(current) == &init_pid_ns) && 316 + ns_capable_noaudit(init_pid_ns.user_ns, CAP_SYS_ADMIN); 317 + }
+4 -25
kernel/nstree.c
··· 515 515 static inline bool __must_check may_list_ns(const struct klistns *kls, 516 516 struct ns_common *ns) 517 517 { 518 - if (kls->user_ns) { 519 - if (kls->userns_capable) 520 - return true; 521 - } else { 522 - struct ns_common *owner; 523 - struct user_namespace *user_ns; 524 - 525 - owner = ns_owner(ns); 526 - if (owner) 527 - user_ns = to_user_ns(owner); 528 - else 529 - user_ns = &init_user_ns; 530 - if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN)) 531 - return true; 532 - } 533 - 518 + if (kls->user_ns && kls->userns_capable) 519 + return true; 534 520 if (is_current_namespace(ns)) 535 521 return true; 536 - 537 - if (ns->ns_type != CLONE_NEWUSER) 538 - return false; 539 - 540 - if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN)) 541 - return true; 542 - 543 - return false; 522 + return may_see_all_namespaces(); 544 523 } 545 524 546 525 static inline void ns_put(struct ns_common *ns) ··· 579 600 580 601 ret = 0; 581 602 head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head; 582 - kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN); 603 + kls->userns_capable = may_see_all_namespaces(); 583 604 584 605 rcu_read_lock(); 585 606
+15 -10
tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
··· 37 37 __u64 mnt_ns_id[MNT_NS_COUNT]; 38 38 }; 39 39 40 + static inline bool mntns_in_list(__u64 *mnt_ns_id, struct mnt_ns_info *info) 41 + { 42 + for (int i = 0; i < MNT_NS_COUNT; i++) { 43 + if (mnt_ns_id[i] == info->mnt_ns_id) 44 + return true; 45 + } 46 + return false; 47 + } 48 + 40 49 FIXTURE_SETUP(iterate_mount_namespaces) 41 50 { 42 51 for (int i = 0; i < MNT_NS_COUNT; i++) 43 52 self->fd_mnt_ns[i] = -EBADF; 44 - 45 - /* 46 - * Creating a new user namespace let's us guarantee that we only see 47 - * mount namespaces that we did actually create. 48 - */ 49 - ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 50 53 51 54 for (int i = 0; i < MNT_NS_COUNT; i++) { 52 55 struct mnt_ns_info info = {}; ··· 78 75 fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); 79 76 ASSERT_GE(fd_mnt_ns_cur, 0); 80 77 81 - for (;; count++) { 78 + for (;;) { 82 79 struct mnt_ns_info info = {}; 83 80 int fd_mnt_ns_next; 84 81 85 82 fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); 86 83 if (fd_mnt_ns_next < 0 && errno == ENOENT) 87 84 break; 85 + if (mntns_in_list(self->mnt_ns_id, &info)) 86 + count++; 88 87 ASSERT_GE(fd_mnt_ns_next, 0); 89 88 ASSERT_EQ(close(fd_mnt_ns_cur), 0); 90 89 fd_mnt_ns_cur = fd_mnt_ns_next; ··· 101 96 fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); 102 97 ASSERT_GE(fd_mnt_ns_cur, 0); 103 98 104 - for (;; count++) { 99 + for (;;) { 105 100 struct mnt_ns_info info = {}; 106 101 int fd_mnt_ns_prev; 107 102 108 103 fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); 109 104 if (fd_mnt_ns_prev < 0 && errno == ENOENT) 110 105 break; 106 + if (mntns_in_list(self->mnt_ns_id, &info)) 107 + count++; 111 108 ASSERT_GE(fd_mnt_ns_prev, 0); 112 109 ASSERT_EQ(close(fd_mnt_ns_cur), 0); 113 110 fd_mnt_ns_cur = fd_mnt_ns_prev; ··· 132 125 ASSERT_GE(fd_mnt_ns_next, 0); 133 126 ASSERT_EQ(close(fd_mnt_ns_cur), 0); 134 127 fd_mnt_ns_cur = fd_mnt_ns_next; 135 - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); 136 128 } 137 129 } 138 130 ··· 150 144 ASSERT_GE(fd_mnt_ns_prev, 0); 151 145 ASSERT_EQ(close(fd_mnt_ns_cur), 0); 152 146 fd_mnt_ns_cur = fd_mnt_ns_prev; 153 - ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); 154 147 } 155 148 } 156 149