Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring fixes from Jens Axboe:

- Add MAINTAINERS entry for zcrx, mostly so that netdev gets
automatically CC'ed by default on any changes there too.

- Fix for the SQPOLL busy vs work time accounting.

It was using getrusage(), which was both broken from a thread point
of view (we only care about the SQPOLL thread itself), and vastly
overkill as only the systime was used. On top of that, also be a bit
smarter in when it's queried. It used excessive CPU before this
change. Marked for stable as well.

- Fix provided ring buffer auto commit for uring_cmd.

- Fix a few style issues and sparse annotation for a lock.

* tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
io_uring: fix buffer auto-commit for multishot uring_cmd
io_uring: correct __must_hold annotation in io_install_fixed_file
io_uring zcrx: add MAINTAINERS entry
io_uring: Fix code indentation error
io_uring/sqpoll: be smarter on when to update the stime usage
io_uring/sqpoll: switch away from getrusage() for CPU accounting
io_uring: fix incorrect unlikely() usage in io_waitid_prep()

+85 -39
+9
MAINTAINERS
··· 13116 13116 F: include/uapi/linux/io_uring/ 13117 13117 F: io_uring/ 13118 13118 13119 + IO_URING ZCRX 13120 + M: Pavel Begunkov <asml.silence@gmail.com> 13121 + L: io-uring@vger.kernel.org 13122 + L: netdev@vger.kernel.org 13123 + T: git https://github.com/isilence/linux.git zcrx/for-next 13124 + T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git 13125 + S: Maintained 13126 + F: io_uring/zcrx.* 13127 + 13119 13128 IPMI SUBSYSTEM 13120 13129 M: Corey Minyard <corey@minyard.net> 13121 13130 L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
+4 -4
io_uring/fdinfo.c
··· 59 59 { 60 60 struct io_overflow_cqe *ocqe; 61 61 struct io_rings *r = ctx->rings; 62 - struct rusage sq_usage; 63 62 unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; 64 63 unsigned int sq_head = READ_ONCE(r->sq.head); 65 64 unsigned int sq_tail = READ_ONCE(r->sq.tail); ··· 151 152 * thread termination. 152 153 */ 153 154 if (tsk) { 155 + u64 usec; 156 + 154 157 get_task_struct(tsk); 155 158 rcu_read_unlock(); 156 - getrusage(tsk, RUSAGE_SELF, &sq_usage); 159 + usec = io_sq_cpu_usec(tsk); 157 160 put_task_struct(tsk); 158 161 sq_pid = sq->task_pid; 159 162 sq_cpu = sq->sq_cpu; 160 - sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 161 - + sq_usage.ru_stime.tv_usec); 163 + sq_total_time = usec; 162 164 sq_work_time = sq->work_time; 163 165 } else { 164 166 rcu_read_unlock();
+1 -1
io_uring/filetable.c
··· 57 57 58 58 static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, 59 59 u32 slot_index) 60 - __must_hold(&req->ctx->uring_lock) 60 + __must_hold(&ctx->uring_lock) 61 61 { 62 62 struct io_rsrc_node *node; 63 63
+1 -1
io_uring/io_uring.c
··· 879 879 } 880 880 881 881 static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe, 882 - struct io_big_cqe *big_cqe) 882 + struct io_big_cqe *big_cqe) 883 883 { 884 884 struct io_overflow_cqe *ocqe; 885 885
+22 -11
io_uring/kbuf.c
··· 155 155 return 1; 156 156 } 157 157 158 + static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) 159 + { 160 + /* 161 + * If we came in unlocked, we have no choice but to consume the 162 + * buffer here, otherwise nothing ensures that the buffer won't 163 + * get used by others. This does mean it'll be pinned until the 164 + * IO completes, coming in unlocked means we're being called from 165 + * io-wq context and there may be further retries in async hybrid 166 + * mode. For the locked case, the caller must call commit when 167 + * the transfer completes (or if we get -EAGAIN and must poll of 168 + * retry). 169 + */ 170 + if (issue_flags & IO_URING_F_UNLOCKED) 171 + return true; 172 + 173 + /* uring_cmd commits kbuf upfront, no need to auto-commit */ 174 + if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD) 175 + return true; 176 + return false; 177 + } 178 + 158 179 static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, 159 180 struct io_buffer_list *bl, 160 181 unsigned int issue_flags) ··· 202 181 sel.buf_list = bl; 203 182 sel.addr = u64_to_user_ptr(buf->addr); 204 183 205 - if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { 206 - /* 207 - * If we came in unlocked, we have no choice but to consume the 208 - * buffer here, otherwise nothing ensures that the buffer won't 209 - * get used by others. This does mean it'll be pinned until the 210 - * IO completes, coming in unlocked means we're being called from 211 - * io-wq context and there may be further retries in async hybrid 212 - * mode. For the locked case, the caller must call commit when 213 - * the transfer completes (or if we get -EAGAIN and must poll of 214 - * retry). 215 - */ 184 + if (io_should_commit(req, issue_flags)) { 216 185 io_kbuf_commit(req, sel.buf_list, *len, 1); 217 186 sel.buf_list = NULL; 218 187 }
+1 -1
io_uring/net.c
··· 383 383 return 0; 384 384 385 385 if (sr->flags & IORING_SEND_VECTORIZED) 386 - return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 386 + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); 387 387 388 388 return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); 389 389 }
+45 -20
io_uring/sqpoll.c
··· 11 11 #include <linux/audit.h> 12 12 #include <linux/security.h> 13 13 #include <linux/cpuset.h> 14 + #include <linux/sched/cputime.h> 14 15 #include <linux/io_uring.h> 15 16 16 17 #include <uapi/linux/io_uring.h> ··· 170 169 return READ_ONCE(sqd->state); 171 170 } 172 171 173 - static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) 172 + struct io_sq_time { 173 + bool started; 174 + u64 usec; 175 + }; 176 + 177 + u64 io_sq_cpu_usec(struct task_struct *tsk) 178 + { 179 + u64 utime, stime; 180 + 181 + task_cputime_adjusted(tsk, &utime, &stime); 182 + do_div(stime, 1000); 183 + return stime; 184 + } 185 + 186 + static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) 187 + { 188 + if (!ist->started) 189 + return; 190 + ist->started = false; 191 + sqd->work_time += io_sq_cpu_usec(current) - ist->usec; 192 + } 193 + 194 + static void io_sq_start_worktime(struct io_sq_time *ist) 195 + { 196 + if (ist->started) 197 + return; 198 + ist->started = true; 199 + ist->usec = io_sq_cpu_usec(current); 200 + } 201 + 202 + static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, 203 + bool cap_entries, struct io_sq_time *ist) 174 204 { 175 205 unsigned int to_submit; 176 206 int ret = 0; ··· 213 181 214 182 if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { 215 183 const struct cred *creds = NULL; 184 + 185 + io_sq_start_worktime(ist); 216 186 217 187 if (ctx->sq_creds != current_cred()) 218 188 creds = override_creds(ctx->sq_creds); ··· 289 255 return retry_list || !llist_empty(&tctx->task_list); 290 256 } 291 257 292 - static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) 293 - { 294 - struct rusage end; 295 - 296 - getrusage(current, RUSAGE_SELF, &end); 297 - end.ru_stime.tv_sec -= start->ru_stime.tv_sec; 298 - end.ru_stime.tv_usec -= start->ru_stime.tv_usec; 299 - 300 - sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; 301 - } 302 - 303 258 static int io_sq_thread(void *data) 304 259 { 305 260 struct llist_node *retry_list = NULL; 306 261 struct io_sq_data *sqd = data; 307 262 struct io_ring_ctx *ctx; 308 - struct rusage start; 309 263 unsigned long timeout = 0; 310 264 char buf[TASK_COMM_LEN] = {}; 311 265 DEFINE_WAIT(wait); ··· 331 309 mutex_lock(&sqd->lock); 332 310 while (1) { 333 311 bool cap_entries, sqt_spin = false; 312 + struct io_sq_time ist = { }; 334 313 335 314 if (io_sqd_events_pending(sqd) || signal_pending(current)) { 336 315 if (io_sqd_handle_event(sqd)) ··· 340 317 } 341 318 342 319 cap_entries = !list_is_singular(&sqd->ctx_list); 343 - getrusage(current, RUSAGE_SELF, &start); 344 320 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { 345 - int ret = __io_sq_thread(ctx, cap_entries); 321 + int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); 346 322 347 323 if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) 348 324 sqt_spin = true; ··· 349 327 if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) 350 328 sqt_spin = true; 351 329 352 - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 353 - if (io_napi(ctx)) 330 + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { 331 + if (io_napi(ctx)) { 332 + io_sq_start_worktime(&ist); 354 333 io_napi_sqpoll_busy_poll(ctx); 334 + } 335 + } 336 + 337 + io_sq_update_worktime(sqd, &ist); 355 338 356 339 if (sqt_spin || !time_after(jiffies, timeout)) { 357 - if (sqt_spin) { 358 - io_sq_update_worktime(sqd, &start); 340 + if (sqt_spin) 359 341 timeout = jiffies + sqd->sq_thread_idle; 360 - } 361 342 if (unlikely(need_resched())) { 362 343 mutex_unlock(&sqd->lock); 363 344 cond_resched();
+1
io_uring/sqpoll.h
··· 29 29 void io_put_sq_data(struct io_sq_data *sqd); 30 30 void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); 31 31 int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); 32 + u64 io_sq_cpu_usec(struct task_struct *tsk); 32 33 33 34 static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) 34 35 {
+1 -1
io_uring/waitid.c
··· 250 250 return -EINVAL; 251 251 252 252 iwa = io_uring_alloc_async_data(NULL, req); 253 - if (!unlikely(iwa)) 253 + if (unlikely(!iwa)) 254 254 return -ENOMEM; 255 255 iwa->req = req; 256 256