Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

- fsize was missed in previous unification of work flags

- Few fixes cleaning up the flags unification creds cases (Pavel)

- Fix NUMA affinities for completely unplugged/replugged node for io-wq

- Two fallout fixes from the set_fs changes. One local to io_uring, one
for the splice entry point that io_uring uses.

- Linked timeout fixes (Pavel)

- Removal of ->flush() ->files work-around that we don't need anymore
with referenced files (Pavel)

- Various cleanups (Pavel)

* tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block:
splice: change exported internal do_splice() helper to take kernel offset
io_uring: make loop_rw_iter() use original user supplied pointers
io_uring: remove req cancel in ->flush()
io-wq: re-set NUMA node affinities if CPUs come online
io_uring: don't reuse linked_timeout
io_uring: unify fsize with def->work_flags
io_uring: fix racy REQ_F_LINK_TIMEOUT clearing
io_uring: do poll's hash_node init in common code
io_uring: inline io_poll_task_handler()
io_uring: remove extra ->file check in poll prep
io_uring: make cached_cq_overflow non atomic_t
io_uring: inline io_fail_links()
io_uring: kill ref get/drop in personality init
io_uring: flags-based creds init in queue

+193 -120
+62 -6
fs/io-wq.c
··· 19 19 #include <linux/task_work.h> 20 20 #include <linux/blk-cgroup.h> 21 21 #include <linux/audit.h> 22 + #include <linux/cpu.h> 22 23 24 + #include "../kernel/sched/sched.h" 23 25 #include "io-wq.h" 24 26 25 27 #define WORKER_IDLE_TIMEOUT (5 * HZ) ··· 125 123 refcount_t refs; 126 124 struct completion done; 127 125 126 + struct hlist_node cpuhp_node; 127 + 128 128 refcount_t use_refs; 129 129 }; 130 + 131 + static enum cpuhp_state io_wq_online; 130 132 131 133 static bool io_worker_get(struct io_worker *worker) 132 134 { ··· 193 187 worker->blkcg_css = NULL; 194 188 } 195 189 #endif 196 - 190 + if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY) 191 + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; 197 192 return dropped_lock; 198 193 } 199 194 ··· 490 483 if ((work->flags & IO_WQ_WORK_CREDS) && 491 484 worker->cur_creds != work->identity->creds) 492 485 io_wq_switch_creds(worker, work); 493 - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; 486 + if (work->flags & IO_WQ_WORK_FSIZE) 487 + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; 488 + else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY) 489 + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; 494 490 io_wq_switch_blkcg(worker, work); 495 491 #ifdef CONFIG_AUDIT 496 492 current->loginuid = work->identity->loginuid; ··· 1097 1087 return ERR_PTR(-ENOMEM); 1098 1088 1099 1089 wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL); 1100 - if (!wq->wqes) { 1101 - kfree(wq); 1102 - return ERR_PTR(-ENOMEM); 1103 - } 1090 + if (!wq->wqes) 1091 + goto err_wq; 1092 + 1093 + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); 1094 + if (ret) 1095 + goto err_wqes; 1104 1096 1105 1097 wq->free_work = data->free_work; 1106 1098 wq->do_work = data->do_work; ··· 1110 1098 /* caller must already hold a reference to this */ 1111 1099 wq->user = data->user; 1112 1100 1101 + ret = -ENOMEM; 1113 1102 for_each_node(node) { 1114 1103 struct io_wqe *wqe; 1115 1104 int alloc_node = node; ··· 1154 1141 ret = PTR_ERR(wq->manager); 1155 1142 complete(&wq->done); 1156 1143 err: 1144 + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); 1157 1145 for_each_node(node) 1158 1146 kfree(wq->wqes[node]); 1147 + err_wqes: 1159 1148 kfree(wq->wqes); 1149 + err_wq: 1160 1150 kfree(wq); 1161 1151 return ERR_PTR(ret); 1162 1152 } ··· 1175 1159 static void __io_wq_destroy(struct io_wq *wq) 1176 1160 { 1177 1161 int node; 1162 + 1163 + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); 1178 1164 1179 1165 set_bit(IO_WQ_BIT_EXIT, &wq->state); 1180 1166 if (wq->manager) ··· 1205 1187 { 1206 1188 return wq->manager; 1207 1189 } 1190 + 1191 + static bool io_wq_worker_affinity(struct io_worker *worker, void *data) 1192 + { 1193 + struct task_struct *task = worker->task; 1194 + struct rq_flags rf; 1195 + struct rq *rq; 1196 + 1197 + rq = task_rq_lock(task, &rf); 1198 + do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); 1199 + task->flags |= PF_NO_SETAFFINITY; 1200 + task_rq_unlock(rq, task, &rf); 1201 + return false; 1202 + } 1203 + 1204 + static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) 1205 + { 1206 + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); 1207 + int i; 1208 + 1209 + rcu_read_lock(); 1210 + for_each_node(i) 1211 + io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL); 1212 + rcu_read_unlock(); 1213 + return 0; 1214 + } 1215 + 1216 + static __init int io_wq_init(void) 1217 + { 1218 + int ret; 1219 + 1220 + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", 1221 + io_wq_cpu_online, NULL); 1222 + if (ret < 0) 1223 + return ret; 1224 + io_wq_online = ret; 1225 + return 0; 1226 + } 1227 + subsys_initcall(io_wq_init);
+1
fs/io-wq.h
··· 17 17 IO_WQ_WORK_MM = 128, 18 18 IO_WQ_WORK_CREDS = 256, 19 19 IO_WQ_WORK_BLKCG = 512, 20 + IO_WQ_WORK_FSIZE = 1024, 20 21 21 22 IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ 22 23 };
+78 -99
fs/io_uring.c
··· 277 277 unsigned sq_mask; 278 278 unsigned sq_thread_idle; 279 279 unsigned cached_sq_dropped; 280 - atomic_t cached_cq_overflow; 280 + unsigned cached_cq_overflow; 281 281 unsigned long sq_check_overflow; 282 282 283 283 struct list_head defer_list; ··· 585 585 REQ_F_BUFFER_SELECTED_BIT, 586 586 REQ_F_NO_FILE_TABLE_BIT, 587 587 REQ_F_WORK_INITIALIZED_BIT, 588 + REQ_F_LTIMEOUT_ACTIVE_BIT, 588 589 589 590 /* not a real bit, just to check we're not overflowing the space */ 590 591 __REQ_F_LAST_BIT, ··· 615 614 REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), 616 615 /* must not punt to workers */ 617 616 REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), 618 - /* has linked timeout */ 617 + /* has or had linked timeout */ 619 618 REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), 620 619 /* regular file */ 621 620 REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), ··· 629 628 REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), 630 629 /* io_wq_work is initialized */ 631 630 REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), 631 + /* linked timeout is active, i.e. prepared by link's head */ 632 + REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), 632 633 }; 633 634 634 635 struct async_poll { ··· 753 750 unsigned pollout : 1; 754 751 /* op supports buffer selection */ 755 752 unsigned buffer_select : 1; 756 - /* needs rlimit(RLIMIT_FSIZE) assigned */ 757 - unsigned needs_fsize : 1; 758 753 /* must always have async data allocated */ 759 754 unsigned needs_async_data : 1; 760 755 /* size of async data needed, if any */ ··· 776 775 .hash_reg_file = 1, 777 776 .unbound_nonreg_file = 1, 778 777 .pollout = 1, 779 - .needs_fsize = 1, 780 778 .needs_async_data = 1, 781 779 .async_size = sizeof(struct io_async_rw), 782 - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, 780 + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | 781 + IO_WQ_WORK_FSIZE, 783 782 }, 784 783 [IORING_OP_FSYNC] = { 785 784 .needs_file = 1, ··· 790 789 .unbound_nonreg_file = 1, 791 790 .pollin = 1, 792 791 .async_size = sizeof(struct io_async_rw), 793 - .work_flags = IO_WQ_WORK_BLKCG, 792 + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM, 794 793 }, 795 794 [IORING_OP_WRITE_FIXED] = { 796 795 .needs_file = 1, 797 796 .hash_reg_file = 1, 798 797 .unbound_nonreg_file = 1, 799 798 .pollout = 1, 800 - .needs_fsize = 1, 801 799 .async_size = sizeof(struct io_async_rw), 802 - .work_flags = IO_WQ_WORK_BLKCG, 800 + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE | 801 + IO_WQ_WORK_MM, 803 802 }, 804 803 [IORING_OP_POLL_ADD] = { 805 804 .needs_file = 1, ··· 857 856 }, 858 857 [IORING_OP_FALLOCATE] = { 859 858 .needs_file = 1, 860 - .needs_fsize = 1, 861 - .work_flags = IO_WQ_WORK_BLKCG, 859 + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE, 862 860 }, 863 861 [IORING_OP_OPENAT] = { 864 862 .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG | ··· 887 887 .needs_file = 1, 888 888 .unbound_nonreg_file = 1, 889 889 .pollout = 1, 890 - .needs_fsize = 1, 891 890 .async_size = sizeof(struct io_async_rw), 892 - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, 891 + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | 892 + IO_WQ_WORK_FSIZE, 893 893 }, 894 894 [IORING_OP_FADVISE] = { 895 895 .needs_file = 1, ··· 1070 1070 refcount_set(&id->count, 1); 1071 1071 } 1072 1072 1073 + static inline void __io_req_init_async(struct io_kiocb *req) 1074 + { 1075 + memset(&req->work, 0, sizeof(req->work)); 1076 + req->flags |= REQ_F_WORK_INITIALIZED; 1077 + } 1078 + 1073 1079 /* 1074 1080 * Note: must call io_req_init_async() for the first time you 1075 1081 * touch any members of io_wq_work. ··· 1087 1081 if (req->flags & REQ_F_WORK_INITIALIZED) 1088 1082 return; 1089 1083 1090 - memset(&req->work, 0, sizeof(req->work)); 1091 - req->flags |= REQ_F_WORK_INITIALIZED; 1084 + __io_req_init_async(req); 1092 1085 1093 1086 /* Grab a ref if this isn't our static identity */ 1094 1087 req->work.identity = tctx->identity; ··· 1179 1174 struct io_ring_ctx *ctx = req->ctx; 1180 1175 1181 1176 return seq != ctx->cached_cq_tail 1182 - + atomic_read(&ctx->cached_cq_overflow); 1177 + + READ_ONCE(ctx->cached_cq_overflow); 1183 1178 } 1184 1179 1185 1180 return false; ··· 1290 1285 struct io_identity *id = req->work.identity; 1291 1286 struct io_ring_ctx *ctx = req->ctx; 1292 1287 1293 - if (def->needs_fsize && id->fsize != rlimit(RLIMIT_FSIZE)) 1294 - return false; 1288 + if (def->work_flags & IO_WQ_WORK_FSIZE) { 1289 + if (id->fsize != rlimit(RLIMIT_FSIZE)) 1290 + return false; 1291 + req->work.flags |= IO_WQ_WORK_FSIZE; 1292 + } 1295 1293 1296 1294 if (!(req->work.flags & IO_WQ_WORK_FILES) && 1297 1295 (def->work_flags & IO_WQ_WORK_FILES) && ··· 1627 1619 WRITE_ONCE(cqe->res, req->result); 1628 1620 WRITE_ONCE(cqe->flags, req->compl.cflags); 1629 1621 } else { 1622 + ctx->cached_cq_overflow++; 1630 1623 WRITE_ONCE(ctx->rings->cq_overflow, 1631 - atomic_inc_return(&ctx->cached_cq_overflow)); 1624 + ctx->cached_cq_overflow); 1632 1625 } 1633 1626 } 1634 1627 ··· 1671 1662 * then we cannot store the request for later flushing, we need 1672 1663 * to drop it on the floor. 1673 1664 */ 1674 - WRITE_ONCE(ctx->rings->cq_overflow, 1675 - atomic_inc_return(&ctx->cached_cq_overflow)); 1665 + ctx->cached_cq_overflow++; 1666 + WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); 1676 1667 } else { 1677 1668 if (list_empty(&ctx->cq_overflow_list)) { 1678 1669 set_bit(0, &ctx->sq_check_overflow); ··· 1874 1865 link = list_first_entry(&req->link_list, struct io_kiocb, link_list); 1875 1866 if (link->opcode != IORING_OP_LINK_TIMEOUT) 1876 1867 return false; 1868 + /* 1869 + * Can happen if a linked timeout fired and link had been like 1870 + * req -> link t-out -> link t-out [-> ...] 1871 + */ 1872 + if (!(link->flags & REQ_F_LTIMEOUT_ACTIVE)) 1873 + return false; 1877 1874 1878 1875 list_del_init(&link->link_list); 1879 1876 wake_ev = io_link_cancel_timeout(link); ··· 1923 1908 /* 1924 1909 * Called if REQ_F_LINK_HEAD is set, and we fail the head request 1925 1910 */ 1926 - static void __io_fail_links(struct io_kiocb *req) 1911 + static void io_fail_links(struct io_kiocb *req) 1927 1912 { 1928 1913 struct io_ring_ctx *ctx = req->ctx; 1914 + unsigned long flags; 1929 1915 1916 + spin_lock_irqsave(&ctx->completion_lock, flags); 1930 1917 while (!list_empty(&req->link_list)) { 1931 1918 struct io_kiocb *link = list_first_entry(&req->link_list, 1932 1919 struct io_kiocb, link_list); ··· 1950 1933 } 1951 1934 1952 1935 io_commit_cqring(ctx); 1953 - } 1954 - 1955 - static void io_fail_links(struct io_kiocb *req) 1956 - { 1957 - struct io_ring_ctx *ctx = req->ctx; 1958 - unsigned long flags; 1959 - 1960 - spin_lock_irqsave(&ctx->completion_lock, flags); 1961 - __io_fail_links(req); 1962 1936 spin_unlock_irqrestore(&ctx->completion_lock, flags); 1963 1937 1964 1938 io_cqring_ev_posted(ctx); ··· 3117 3109 * For files that don't have ->read_iter() and ->write_iter(), handle them 3118 3110 * by looping over ->read() or ->write() manually. 3119 3111 */ 3120 - static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb, 3121 - struct iov_iter *iter) 3112 + static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) 3122 3113 { 3114 + struct kiocb *kiocb = &req->rw.kiocb; 3115 + struct file *file = req->file; 3123 3116 ssize_t ret = 0; 3124 3117 3125 3118 /* ··· 3140 3131 if (!iov_iter_is_bvec(iter)) { 3141 3132 iovec = iov_iter_iovec(iter); 3142 3133 } else { 3143 - /* fixed buffers import bvec */ 3144 - iovec.iov_base = kmap(iter->bvec->bv_page) 3145 - + iter->iov_offset; 3146 - iovec.iov_len = min(iter->count, 3147 - iter->bvec->bv_len - iter->iov_offset); 3134 + iovec.iov_base = u64_to_user_ptr(req->rw.addr); 3135 + iovec.iov_len = req->rw.len; 3148 3136 } 3149 3137 3150 3138 if (rw == READ) { ··· 3152 3146 iovec.iov_len, io_kiocb_ppos(kiocb)); 3153 3147 } 3154 3148 3155 - if (iov_iter_is_bvec(iter)) 3156 - kunmap(iter->bvec->bv_page); 3157 - 3158 3149 if (nr < 0) { 3159 3150 if (!ret) 3160 3151 ret = nr; ··· 3160 3157 ret += nr; 3161 3158 if (nr != iovec.iov_len) 3162 3159 break; 3160 + req->rw.len -= nr; 3161 + req->rw.addr += nr; 3163 3162 iov_iter_advance(iter, nr); 3164 3163 } 3165 3164 ··· 3351 3346 if (req->file->f_op->read_iter) 3352 3347 return call_read_iter(req->file, &req->rw.kiocb, iter); 3353 3348 else if (req->file->f_op->read) 3354 - return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter); 3349 + return loop_rw_iter(READ, req, iter); 3355 3350 else 3356 3351 return -EINVAL; 3357 3352 } ··· 3542 3537 if (req->file->f_op->write_iter) 3543 3538 ret2 = call_write_iter(req->file, kiocb, iter); 3544 3539 else if (req->file->f_op->write) 3545 - ret2 = loop_rw_iter(WRITE, req->file, kiocb, iter); 3540 + ret2 = loop_rw_iter(WRITE, req, iter); 3546 3541 else 3547 3542 ret2 = -EINVAL; 3548 3543 ··· 4932 4927 io_commit_cqring(ctx); 4933 4928 } 4934 4929 4935 - static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) 4936 - { 4937 - struct io_ring_ctx *ctx = req->ctx; 4938 - 4939 - if (io_poll_rewait(req, &req->poll)) { 4940 - spin_unlock_irq(&ctx->completion_lock); 4941 - return; 4942 - } 4943 - 4944 - hash_del(&req->hash_node); 4945 - io_poll_complete(req, req->result, 0); 4946 - spin_unlock_irq(&ctx->completion_lock); 4947 - 4948 - *nxt = io_put_req_find_next(req); 4949 - io_cqring_ev_posted(ctx); 4950 - } 4951 - 4952 4930 static void io_poll_task_func(struct callback_head *cb) 4953 4931 { 4954 4932 struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); 4955 4933 struct io_ring_ctx *ctx = req->ctx; 4956 - struct io_kiocb *nxt = NULL; 4934 + struct io_kiocb *nxt; 4957 4935 4958 - io_poll_task_handler(req, &nxt); 4959 - if (nxt) 4960 - __io_req_task_submit(nxt); 4936 + if (io_poll_rewait(req, &req->poll)) { 4937 + spin_unlock_irq(&ctx->completion_lock); 4938 + } else { 4939 + hash_del(&req->hash_node); 4940 + io_poll_complete(req, req->result, 0); 4941 + spin_unlock_irq(&ctx->completion_lock); 4942 + 4943 + nxt = io_put_req_find_next(req); 4944 + io_cqring_ev_posted(ctx); 4945 + if (nxt) 4946 + __io_req_task_submit(nxt); 4947 + } 4948 + 4961 4949 percpu_ref_put(&ctx->refs); 4962 4950 } 4963 4951 ··· 5104 5106 struct io_ring_ctx *ctx = req->ctx; 5105 5107 bool cancel = false; 5106 5108 5109 + INIT_HLIST_NODE(&req->hash_node); 5107 5110 io_init_poll_iocb(poll, mask, wake_func); 5108 5111 poll->file = req->file; 5109 5112 poll->wait.private = req; ··· 5166 5167 5167 5168 req->flags |= REQ_F_POLLED; 5168 5169 req->apoll = apoll; 5169 - INIT_HLIST_NODE(&req->hash_node); 5170 5170 5171 5171 mask = 0; 5172 5172 if (def->pollin) ··· 5347 5349 return -EINVAL; 5348 5350 if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) 5349 5351 return -EINVAL; 5350 - if (!poll->file) 5351 - return -EBADF; 5352 5352 5353 5353 events = READ_ONCE(sqe->poll32_events); 5354 5354 #ifdef __BIG_ENDIAN ··· 5364 5368 struct io_poll_table ipt; 5365 5369 __poll_t mask; 5366 5370 5367 - INIT_HLIST_NODE(&req->hash_node); 5368 5371 ipt.pt._qproc = io_poll_queue_proc; 5369 5372 5370 5373 mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, ··· 6113 6118 if (!list_empty(&req->link_list)) { 6114 6119 prev = list_entry(req->link_list.prev, struct io_kiocb, 6115 6120 link_list); 6116 - if (refcount_inc_not_zero(&prev->refs)) { 6121 + if (refcount_inc_not_zero(&prev->refs)) 6117 6122 list_del_init(&req->link_list); 6118 - prev->flags &= ~REQ_F_LINK_TIMEOUT; 6119 - } else 6123 + else 6120 6124 prev = NULL; 6121 6125 } 6122 6126 ··· 6172 6178 if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT) 6173 6179 return NULL; 6174 6180 6181 + nxt->flags |= REQ_F_LTIMEOUT_ACTIVE; 6175 6182 req->flags |= REQ_F_LINK_TIMEOUT; 6176 6183 return nxt; 6177 6184 } ··· 6187 6192 again: 6188 6193 linked_timeout = io_prep_linked_timeout(req); 6189 6194 6190 - if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.identity->creds && 6195 + if ((req->flags & REQ_F_WORK_INITIALIZED) && 6196 + (req->work.flags & IO_WQ_WORK_CREDS) && 6191 6197 req->work.identity->creds != current_cred()) { 6192 6198 if (old_creds) 6193 6199 revert_creds(old_creds); ··· 6196 6200 old_creds = NULL; /* restored original creds */ 6197 6201 else 6198 6202 old_creds = override_creds(req->work.identity->creds); 6199 - req->work.flags |= IO_WQ_WORK_CREDS; 6200 6203 } 6201 6204 6202 6205 ret = io_issue_sqe(req, true, cs); ··· 6236 6241 if (nxt) { 6237 6242 req = nxt; 6238 6243 6239 - if (req->flags & REQ_F_FORCE_ASYNC) 6244 + if (req->flags & REQ_F_FORCE_ASYNC) { 6245 + linked_timeout = NULL; 6240 6246 goto punt; 6247 + } 6241 6248 goto again; 6242 6249 } 6243 6250 exit: ··· 6502 6505 if (id) { 6503 6506 struct io_identity *iod; 6504 6507 6505 - io_req_init_async(req); 6506 6508 iod = idr_find(&ctx->personality_idr, id); 6507 6509 if (unlikely(!iod)) 6508 6510 return -EINVAL; 6509 6511 refcount_inc(&iod->count); 6510 - io_put_identity(current->io_uring, req); 6512 + 6513 + __io_req_init_async(req); 6511 6514 get_cred(iod->creds); 6512 6515 req->work.identity = iod; 6513 6516 req->work.flags |= IO_WQ_WORK_CREDS; ··· 8683 8686 fput(file); 8684 8687 } 8685 8688 8686 - static void __io_uring_attempt_task_drop(struct file *file) 8687 - { 8688 - struct file *old = xa_load(&current->io_uring->xa, (unsigned long)file); 8689 - 8690 - if (old == file) 8691 - io_uring_del_task_file(file); 8692 - } 8693 - 8694 8689 /* 8695 8690 * Drop task note for this file if we're the only ones that hold it after 8696 8691 * pending fput() 8697 8692 */ 8698 - static void io_uring_attempt_task_drop(struct file *file, bool exiting) 8693 + static void io_uring_attempt_task_drop(struct file *file) 8699 8694 { 8700 8695 if (!current->io_uring) 8701 8696 return; ··· 8695 8706 * fput() is pending, will be 2 if the only other ref is our potential 8696 8707 * task file note. If the task is exiting, drop regardless of count. 8697 8708 */ 8698 - if (!exiting && atomic_long_read(&file->f_count) != 2) 8699 - return; 8700 - 8701 - __io_uring_attempt_task_drop(file); 8709 + if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || 8710 + atomic_long_read(&file->f_count) == 2) 8711 + io_uring_del_task_file(file); 8702 8712 } 8703 8713 8704 8714 void __io_uring_files_cancel(struct files_struct *files) ··· 8755 8767 8756 8768 static int io_uring_flush(struct file *file, void *data) 8757 8769 { 8758 - struct io_ring_ctx *ctx = file->private_data; 8759 - 8760 - /* 8761 - * If the task is going away, cancel work it may have pending 8762 - */ 8763 - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) 8764 - data = NULL; 8765 - 8766 - io_uring_cancel_task_requests(ctx, data); 8767 - io_uring_attempt_task_drop(file, !data); 8770 + io_uring_attempt_task_drop(file); 8768 8771 return 0; 8769 8772 } 8770 8773
+50 -13
fs/splice.c
··· 1005 1005 /* 1006 1006 * Determine where to splice to/from. 1007 1007 */ 1008 - long do_splice(struct file *in, loff_t __user *off_in, 1009 - struct file *out, loff_t __user *off_out, 1010 - size_t len, unsigned int flags) 1008 + long do_splice(struct file *in, loff_t *off_in, struct file *out, 1009 + loff_t *off_out, size_t len, unsigned int flags) 1011 1010 { 1012 1011 struct pipe_inode_info *ipipe; 1013 1012 struct pipe_inode_info *opipe; ··· 1040 1041 if (off_out) { 1041 1042 if (!(out->f_mode & FMODE_PWRITE)) 1042 1043 return -EINVAL; 1043 - if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1044 - return -EFAULT; 1044 + offset = *off_out; 1045 1045 } else { 1046 1046 offset = out->f_pos; 1047 1047 } ··· 1061 1063 1062 1064 if (!off_out) 1063 1065 out->f_pos = offset; 1064 - else if (copy_to_user(off_out, &offset, sizeof(loff_t))) 1065 - ret = -EFAULT; 1066 + else 1067 + *off_out = offset; 1066 1068 1067 1069 return ret; 1068 1070 } ··· 1073 1075 if (off_in) { 1074 1076 if (!(in->f_mode & FMODE_PREAD)) 1075 1077 return -EINVAL; 1076 - if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1077 - return -EFAULT; 1078 + offset = *off_in; 1078 1079 } else { 1079 1080 offset = in->f_pos; 1080 1081 } ··· 1097 1100 wakeup_pipe_readers(opipe); 1098 1101 if (!off_in) 1099 1102 in->f_pos = offset; 1100 - else if (copy_to_user(off_in, &offset, sizeof(loff_t))) 1101 - ret = -EFAULT; 1103 + else 1104 + *off_in = offset; 1102 1105 1103 1106 return ret; 1104 1107 } 1105 1108 1106 1109 return -EINVAL; 1110 + } 1111 + 1112 + static long __do_splice(struct file *in, loff_t __user *off_in, 1113 + struct file *out, loff_t __user *off_out, 1114 + size_t len, unsigned int flags) 1115 + { 1116 + struct pipe_inode_info *ipipe; 1117 + struct pipe_inode_info *opipe; 1118 + loff_t offset, *__off_in = NULL, *__off_out = NULL; 1119 + long ret; 1120 + 1121 + ipipe = get_pipe_info(in, true); 1122 + opipe = get_pipe_info(out, true); 1123 + 1124 + if (ipipe && off_in) 1125 + return -ESPIPE; 1126 + if (opipe && off_out) 1127 + return -ESPIPE; 1128 + 1129 + if (off_out) { 1130 + if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1131 + return -EFAULT; 1132 + __off_out = &offset; 1133 + } 1134 + if (off_in) { 1135 + if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1136 + return -EFAULT; 1137 + __off_in = &offset; 1138 + } 1139 + 1140 + ret = do_splice(in, __off_in, out, __off_out, len, flags); 1141 + if (ret < 0) 1142 + return ret; 1143 + 1144 + if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) 1145 + return -EFAULT; 1146 + if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) 1147 + return -EFAULT; 1148 + 1149 + return ret; 1107 1150 } 1108 1151 1109 1152 static int iter_to_pipe(struct iov_iter *from, ··· 1340 1303 if (in.file) { 1341 1304 out = fdget(fd_out); 1342 1305 if (out.file) { 1343 - error = do_splice(in.file, off_in, out.file, off_out, 1344 - len, flags); 1306 + error = __do_splice(in.file, off_in, out.file, off_out, 1307 + len, flags); 1345 1308 fdput(out); 1346 1309 } 1347 1310 fdput(in);
+2 -2
include/linux/splice.h
··· 78 78 struct pipe_buffer *); 79 79 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, 80 80 splice_direct_actor *); 81 - extern long do_splice(struct file *in, loff_t __user *off_in, 82 - struct file *out, loff_t __user *off_out, 81 + extern long do_splice(struct file *in, loff_t *off_in, 82 + struct file *out, loff_t *off_out, 83 83 size_t len, unsigned int flags); 84 84 85 85 extern long do_tee(struct file *in, struct file *out, size_t len,