Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

- Fix a syzbot issue for the msg ring cache added in this release. No
ill effects from this one, but it did make KMSAN unhappy (me)

- Sanitize the NAPI timeout handling, by unifying the value handling
into all ktime_t rather than converting back and forth (Pavel)

- Fail NAPI registration for IOPOLL rings, it's not supported (Pavel)

- Fix a theoretical issue with ring polling and cancelations (Pavel)

- Various little cleanups and fixes (Pavel)

* tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux:
io_uring/napi: pass ktime to io_napi_adjust_timeout
io_uring/napi: use ktime in busy polling
io_uring/msg_ring: fix uninitialized use of target_req->flags
io_uring: align iowq and task request error handling
io_uring: kill REQ_F_CANCEL_SEQ
io_uring: simplify io_uring_cmd return
io_uring: fix io_match_task must_hold
io_uring: don't allow netpolling with SETUP_IOPOLL
io_uring: tighten task exit cancellations

+50 -48
+1 -4
include/linux/io_uring_types.h
··· 404 404 spinlock_t napi_lock; /* napi_list lock */ 405 405 406 406 /* napi busy poll default timeout */ 407 - unsigned int napi_busy_poll_to; 407 + ktime_t napi_busy_poll_dt; 408 408 bool napi_prefer_busy_poll; 409 409 bool napi_enabled; 410 410 ··· 461 461 REQ_F_SUPPORT_NOWAIT_BIT, 462 462 REQ_F_ISREG_BIT, 463 463 REQ_F_POLL_NO_LAZY_BIT, 464 - REQ_F_CANCEL_SEQ_BIT, 465 464 REQ_F_CAN_POLL_BIT, 466 465 REQ_F_BL_EMPTY_BIT, 467 466 REQ_F_BL_NO_RECYCLE_BIT, ··· 535 536 REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), 536 537 /* don't use lazy poll wake for this request */ 537 538 REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), 538 - /* cancel sequence is set and valid */ 539 - REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), 540 539 /* file is pollable */ 541 540 REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), 542 541 /* buffer list was empty after selection of buffer */
+9 -4
io_uring/io_uring.c
··· 1849 1849 } while (1); 1850 1850 1851 1851 /* avoid locking problems by failing it from a clean context */ 1852 - if (ret < 0) 1852 + if (ret) 1853 1853 io_req_task_queue_fail(req, ret); 1854 1854 } 1855 1855 ··· 2416 2416 2417 2417 if (uts) { 2418 2418 struct timespec64 ts; 2419 + ktime_t dt; 2419 2420 2420 2421 if (get_timespec64(&ts, uts)) 2421 2422 return -EFAULT; 2422 2423 2423 - iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 2424 - io_napi_adjust_timeout(ctx, &iowq, &ts); 2424 + dt = timespec64_to_ktime(ts); 2425 + iowq.timeout = ktime_add(dt, ktime_get()); 2426 + io_napi_adjust_timeout(ctx, &iowq, dt); 2425 2427 } 2426 2428 2427 2429 if (sig) { ··· 3033 3031 bool loop = false; 3034 3032 3035 3033 io_uring_drop_tctx_refs(current); 3034 + if (!tctx_inflight(tctx, !cancel_all)) 3035 + break; 3036 + 3036 3037 /* read completions before cancelations */ 3037 - inflight = tctx_inflight(tctx, !cancel_all); 3038 + inflight = tctx_inflight(tctx, false); 3038 3039 if (!inflight) 3039 3040 break; 3040 3041
+1 -1
io_uring/io_uring.h
··· 43 43 ktime_t timeout; 44 44 45 45 #ifdef CONFIG_NET_RX_BUSY_POLL 46 - unsigned int napi_busy_poll_to; 46 + ktime_t napi_busy_poll_dt; 47 47 bool napi_prefer_busy_poll; 48 48 #endif 49 49 };
+3 -3
io_uring/msg_ring.c
··· 110 110 if (spin_trylock(&ctx->msg_lock)) { 111 111 req = io_alloc_cache_get(&ctx->msg_cache); 112 112 spin_unlock(&ctx->msg_lock); 113 + if (req) 114 + return req; 113 115 } 114 - if (req) 115 - return req; 116 - return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN); 116 + return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 117 117 } 118 118 119 119 static int io_msg_data_remote(struct io_kiocb *req)
+29 -29
io_uring/napi.c
··· 33 33 return NULL; 34 34 } 35 35 36 + static inline ktime_t net_to_ktime(unsigned long t) 37 + { 38 + /* napi approximating usecs, reverse busy_loop_current_time */ 39 + return ns_to_ktime(t << 10); 40 + } 41 + 36 42 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) 37 43 { 38 44 struct hlist_head *hash_list; ··· 108 102 __io_napi_remove_stale(ctx); 109 103 } 110 104 111 - static inline bool io_napi_busy_loop_timeout(unsigned long start_time, 112 - unsigned long bp_usec) 105 + static inline bool io_napi_busy_loop_timeout(ktime_t start_time, 106 + ktime_t bp) 113 107 { 114 - if (bp_usec) { 115 - unsigned long end_time = start_time + bp_usec; 116 - unsigned long now = busy_loop_current_time(); 108 + if (bp) { 109 + ktime_t end_time = ktime_add(start_time, bp); 110 + ktime_t now = net_to_ktime(busy_loop_current_time()); 117 111 118 - return time_after(now, end_time); 112 + return ktime_after(now, end_time); 119 113 } 120 114 121 115 return true; ··· 130 124 return true; 131 125 if (io_should_wake(iowq) || io_has_work(iowq->ctx)) 132 126 return true; 133 - if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) 127 + if (io_napi_busy_loop_timeout(net_to_ktime(start_time), 128 + iowq->napi_busy_poll_dt)) 134 129 return true; 135 130 136 131 return false; ··· 188 181 */ 189 182 void io_napi_init(struct io_ring_ctx *ctx) 190 183 { 184 + u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; 185 + 191 186 INIT_LIST_HEAD(&ctx->napi_list); 192 187 spin_lock_init(&ctx->napi_lock); 193 188 ctx->napi_prefer_busy_poll = false; 194 - ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); 189 + ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); 195 190 } 196 191 197 192 /* ··· 226 217 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) 227 218 { 228 219 const struct io_uring_napi curr = { 229 - .busy_poll_to = ctx->napi_busy_poll_to, 220 + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 230 221 .prefer_busy_poll = ctx->napi_prefer_busy_poll 231 222 }; 232 223 struct io_uring_napi napi; 233 224 225 + if (ctx->flags & IORING_SETUP_IOPOLL) 226 + return -EINVAL; 234 227 if (copy_from_user(&napi, arg, sizeof(napi))) 235 228 return -EFAULT; 236 229 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) ··· 241 230 if (copy_to_user(arg, &curr, sizeof(curr))) 242 231 return -EFAULT; 243 232 244 - WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); 233 + WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); 245 234 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); 246 235 WRITE_ONCE(ctx->napi_enabled, true); 247 236 return 0; ··· 258 247 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) 259 248 { 260 249 const struct io_uring_napi curr = { 261 - .busy_poll_to = ctx->napi_busy_poll_to, 250 + .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 262 251 .prefer_busy_poll = ctx->napi_prefer_busy_poll 263 252 }; 264 253 265 254 if (arg && copy_to_user(arg, &curr, sizeof(curr))) 266 255 return -EFAULT; 267 256 268 - WRITE_ONCE(ctx->napi_busy_poll_to, 0); 257 + WRITE_ONCE(ctx->napi_busy_poll_dt, 0); 269 258 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 270 259 WRITE_ONCE(ctx->napi_enabled, false); 271 260 return 0; ··· 282 271 * the NAPI timeout accordingly. 283 272 */ 284 273 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, 285 - struct timespec64 *ts) 274 + ktime_t to_wait) 286 275 { 287 - unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); 276 + ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); 288 277 289 - if (ts) { 290 - struct timespec64 poll_to_ts; 278 + if (to_wait) 279 + poll_dt = min(poll_dt, to_wait); 291 280 292 - poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); 293 - if (timespec64_compare(ts, &poll_to_ts) < 0) { 294 - s64 poll_to_ns = timespec64_to_ns(ts); 295 - if (poll_to_ns > 0) { 296 - u64 val = poll_to_ns + 999; 297 - do_div(val, 1000); 298 - poll_to = val; 299 - } 300 - } 301 - } 302 - 303 - iowq->napi_busy_poll_to = poll_to; 281 + iowq->napi_busy_poll_dt = poll_dt; 304 282 } 305 283 306 284 /* ··· 318 318 LIST_HEAD(napi_list); 319 319 bool is_stale = false; 320 320 321 - if (!READ_ONCE(ctx->napi_busy_poll_to)) 321 + if (!READ_ONCE(ctx->napi_busy_poll_dt)) 322 322 return 0; 323 323 if (list_empty_careful(&ctx->napi_list)) 324 324 return 0;
+5 -5
io_uring/napi.h
··· 18 18 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock); 19 19 20 20 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, 21 - struct io_wait_queue *iowq, struct timespec64 *ts); 21 + struct io_wait_queue *iowq, ktime_t to_wait); 22 22 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); 23 23 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); 24 24 ··· 29 29 30 30 static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, 31 31 struct io_wait_queue *iowq, 32 - struct timespec64 *ts) 32 + ktime_t to_wait) 33 33 { 34 34 if (!io_napi(ctx)) 35 35 return; 36 - __io_napi_adjust_timeout(ctx, iowq, ts); 36 + __io_napi_adjust_timeout(ctx, iowq, to_wait); 37 37 } 38 38 39 39 static inline void io_napi_busy_loop(struct io_ring_ctx *ctx, ··· 55 55 struct io_ring_ctx *ctx = req->ctx; 56 56 struct socket *sock; 57 57 58 - if (!READ_ONCE(ctx->napi_busy_poll_to)) 58 + if (!READ_ONCE(ctx->napi_busy_poll_dt)) 59 59 return; 60 60 61 61 sock = sock_from_file(req->file); ··· 88 88 } 89 89 static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx, 90 90 struct io_wait_queue *iowq, 91 - struct timespec64 *ts) 91 + ktime_t to_wait) 92 92 { 93 93 } 94 94 static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
+1 -1
io_uring/timeout.c
··· 639 639 640 640 static bool io_match_task(struct io_kiocb *head, struct task_struct *task, 641 641 bool cancel_all) 642 - __must_hold(&req->ctx->timeout_lock) 642 + __must_hold(&head->ctx->timeout_lock) 643 643 { 644 644 struct io_kiocb *req; 645 645
+1 -1
io_uring/uring_cmd.c
··· 265 265 req_set_fail(req); 266 266 io_req_uring_cleanup(req, issue_flags); 267 267 io_req_set_res(req, ret, 0); 268 - return ret < 0 ? ret : IOU_OK; 268 + return IOU_OK; 269 269 } 270 270 271 271 int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,