Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-7.1-20260508' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring fixes from Jens Axboe:

- Ensure that the absolute timeouts for both the command side and the
waiting side honor the callers time namespace

- Ensure tracked NAPI entries are cleared at unregistration time, as
the NAPI polling loop checks the list state rather than the general
NAPI state. This can lead to NAPI polling even after unregistration
has been done. If unregistered, all NAPI polling should be disabled

- Fix for eventfd recursive invocation handling

* tag 'io_uring-7.1-20260508' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
io_uring/wait: honour caller's time namespace for IORING_ENTER_ABS_TIMER
io_uring/timeout: honour caller's time namespace for IORING_TIMEOUT_ABS
io_uring/eventfd: reset deferred signal state
io_uring/napi: clear tracked NAPI entries on unregister

+53 -24
+1
io_uring/eventfd.c
··· 43 43 { 44 44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 45 45 46 + atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops); 46 47 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 47 48 io_eventfd_put(ev_fd); 48 49 }
+20 -7
io_uring/napi.c
··· 38 38 return ns_to_ktime(t << 10); 39 39 } 40 40 41 - int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id) 41 + int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, 42 + unsigned int mode) 42 43 { 43 44 struct hlist_head *hash_list; 44 45 struct io_napi_entry *e; ··· 70 69 * kfree() 71 70 */ 72 71 spin_lock(&ctx->napi_lock); 72 + if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) { 73 + spin_unlock(&ctx->napi_lock); 74 + kfree(e); 75 + return -EINVAL; 76 + } 73 77 if (unlikely(io_napi_hash_find(hash_list, napi_id))) { 74 78 spin_unlock(&ctx->napi_lock); 75 79 kfree(e); ··· 202 196 bool (*loop_end)(void *, unsigned long), 203 197 void *loop_end_arg) 204 198 { 205 - if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC) 199 + switch (READ_ONCE(ctx->napi_track_mode)) { 200 + case IO_URING_NAPI_TRACKING_STATIC: 206 201 return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); 207 - return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); 202 + case IO_URING_NAPI_TRACKING_DYNAMIC: 203 + return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg); 204 + default: 205 + return false; 206 + } 208 207 } 209 208 210 209 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, ··· 284 273 default: 285 274 return -EINVAL; 286 275 } 287 - /* clean the napi list for new settings */ 276 + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); 288 277 io_napi_free(ctx); 289 - WRITE_ONCE(ctx->napi_track_mode, napi->op_param); 290 278 /* cap NAPI at 10 msec of spin time */ 291 279 napi->busy_poll_to = min(10000, napi->busy_poll_to); 292 280 WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC); 293 281 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll); 282 + WRITE_ONCE(ctx->napi_track_mode, napi->op_param); 294 283 return 0; 295 284 } 296 285 ··· 326 315 case IO_URING_NAPI_STATIC_ADD_ID: 327 316 if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) 328 317 return -EINVAL; 329 - return __io_napi_add_id(ctx, napi.op_param); 318 + return __io_napi_add_id(ctx, napi.op_param, 319 + IO_URING_NAPI_TRACKING_STATIC); 330 320 case IO_URING_NAPI_STATIC_DEL_ID: 331 321 if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC) 332 322 return -EINVAL; ··· 355 343 if (arg && copy_to_user(arg, &curr, sizeof(curr))) 356 344 return -EFAULT; 357 345 346 + WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); 358 347 WRITE_ONCE(ctx->napi_busy_poll_dt, 0); 359 348 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 360 - WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE); 349 + io_napi_free(ctx); 361 350 return 0; 362 351 } 363 352
+5 -3
io_uring/napi.h
··· 15 15 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg); 16 16 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg); 17 17 18 - int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id); 18 + int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id, 19 + unsigned int mode); 19 20 20 21 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq); 21 22 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx); ··· 44 43 { 45 44 struct io_ring_ctx *ctx = req->ctx; 46 45 struct socket *sock; 46 + unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC; 47 47 48 - if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC) 48 + if (READ_ONCE(ctx->napi_track_mode) != mode) 49 49 return; 50 50 51 51 sock = sock_from_file(req->file); 52 52 if (sock && sock->sk) 53 - __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id)); 53 + __io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode); 54 54 } 55 55 56 56 #else
+22 -13
io_uring/timeout.c
··· 3 3 #include <linux/errno.h> 4 4 #include <linux/file.h> 5 5 #include <linux/io_uring.h> 6 + #include <linux/time_namespace.h> 6 7 7 8 #include <trace/events/io_uring.h> 8 9 ··· 36 35 bool ltimeout; 37 36 }; 38 37 38 + static clockid_t io_flags_to_clock(unsigned flags) 39 + { 40 + switch (flags & IORING_TIMEOUT_CLOCK_MASK) { 41 + case IORING_TIMEOUT_BOOTTIME: 42 + return CLOCK_BOOTTIME; 43 + case IORING_TIMEOUT_REALTIME: 44 + return CLOCK_REALTIME; 45 + default: 46 + /* can't happen, vetted at prep time */ 47 + WARN_ON_ONCE(1); 48 + fallthrough; 49 + case 0: 50 + return CLOCK_MONOTONIC; 51 + } 52 + } 53 + 39 54 static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags) 40 55 { 41 56 struct timespec64 ts; ··· 60 43 *time = ns_to_ktime(arg); 61 44 if (*time < 0) 62 45 return -EINVAL; 63 - return 0; 46 + goto out; 64 47 } 65 48 66 49 if (get_timespec64(&ts, u64_to_user_ptr(arg))) ··· 68 51 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 69 52 return -EINVAL; 70 53 *time = timespec64_to_ktime(ts); 54 + out: 55 + if (flags & IORING_TIMEOUT_ABS) 56 + *time = timens_ktime_to_host(io_flags_to_clock(flags), *time); 71 57 return 0; 72 58 } 73 59 ··· 419 399 420 400 static clockid_t io_timeout_get_clock(struct io_timeout_data *data) 421 401 { 422 - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { 423 - case IORING_TIMEOUT_BOOTTIME: 424 - return CLOCK_BOOTTIME; 425 - case IORING_TIMEOUT_REALTIME: 426 - return CLOCK_REALTIME; 427 - default: 428 - /* can't happen, vetted at prep time */ 429 - WARN_ON_ONCE(1); 430 - fallthrough; 431 - case 0: 432 - return CLOCK_MONOTONIC; 433 - } 402 + return io_flags_to_clock(data->flags); 434 403 } 435 404 436 405 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
+5 -1
io_uring/wait.c
··· 5 5 #include <linux/kernel.h> 6 6 #include <linux/sched/signal.h> 7 7 #include <linux/io_uring.h> 8 + #include <linux/time_namespace.h> 8 9 9 10 #include <trace/events/io_uring.h> 10 11 ··· 230 229 231 230 if (ext_arg->ts_set) { 232 231 iowq.timeout = timespec64_to_ktime(ext_arg->ts); 233 - if (!(flags & IORING_ENTER_ABS_TIMER)) 232 + if (flags & IORING_ENTER_ABS_TIMER) 233 + iowq.timeout = timens_ktime_to_host(ctx->clockid, 234 + iowq.timeout); 235 + else 234 236 iowq.timeout = ktime_add(iowq.timeout, start_time); 235 237 } 236 238