Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-20190516' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
"A small set of fixes for io_uring.

This contains:

- smp_rmb() cleanup for io_cqring_events() (Jackie)

- io_cqring_wait() simplification (Jackie)

- removal of dead 'ev_flags' passing (me)

- SQ poll CPU affinity verification fix (me)

- SQ poll wait fix (Roman)

- SQE command prep cleanup and fix (Stefan)"

* tag 'for-linus-20190516' of git://git.kernel.dk/linux-block:
io_uring: use wait_event_interruptible for cq_wait conditional wait
io_uring: adjust smp_rmb inside io_cqring_events
io_uring: fix infinite wait in khread_park() on io_finish_async()
io_uring: remove 'ev_flags' argument
io_uring: fix failure to verify SQ_AFF cpu
io_uring: fix race condition reading SQE data

+31 -57
+31 -57
fs/io_uring.c
··· 231 231 struct task_struct *sqo_thread; /* if using sq thread polling */ 232 232 struct mm_struct *sqo_mm; 233 233 wait_queue_head_t sqo_wait; 234 - unsigned sqo_stop; 235 234 236 235 struct { 237 236 /* CQ ring */ ··· 328 329 #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ 329 330 #define REQ_F_FIXED_FILE 4 /* ctx owns file */ 330 331 #define REQ_F_SEQ_PREV 8 /* sequential with previous */ 331 - #define REQ_F_PREPPED 16 /* prep already done */ 332 - #define REQ_F_IO_DRAIN 32 /* drain existing IO first */ 333 - #define REQ_F_IO_DRAINED 64 /* drain done */ 332 + #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ 333 + #define REQ_F_IO_DRAINED 32 /* drain done */ 334 334 u64 user_data; 335 335 u32 error; /* iopoll result from callback */ 336 336 u32 sequence; ··· 488 490 } 489 491 490 492 static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, 491 - long res, unsigned ev_flags) 493 + long res) 492 494 { 493 495 struct io_uring_cqe *cqe; 494 496 ··· 501 503 if (cqe) { 502 504 WRITE_ONCE(cqe->user_data, ki_user_data); 503 505 WRITE_ONCE(cqe->res, res); 504 - WRITE_ONCE(cqe->flags, ev_flags); 506 + WRITE_ONCE(cqe->flags, 0); 505 507 } else { 506 508 unsigned overflow = READ_ONCE(ctx->cq_ring->overflow); 507 509 ··· 520 522 } 521 523 522 524 static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, 523 - long res, unsigned ev_flags) 525 + long res) 524 526 { 525 527 unsigned long flags; 526 528 527 529 spin_lock_irqsave(&ctx->completion_lock, flags); 528 - io_cqring_fill_event(ctx, user_data, res, ev_flags); 530 + io_cqring_fill_event(ctx, user_data, res); 529 531 io_commit_cqring(ctx); 530 532 spin_unlock_irqrestore(&ctx->completion_lock, flags); 531 533 ··· 627 629 req = list_first_entry(done, struct io_kiocb, list); 628 630 list_del(&req->list); 629 631 630 - io_cqring_fill_event(ctx, req->user_data, req->error, 0); 632 + io_cqring_fill_event(ctx, req->user_data, req->error); 631 633 (*nr_events)++; 632 634 633 635 if (refcount_dec_and_test(&req->refs)) { ··· 775 777 776 778 kiocb_end_write(kiocb); 777 779 778 - io_cqring_add_event(req->ctx, req->user_data, res, 0); 780 + io_cqring_add_event(req->ctx, req->user_data, res); 779 781 io_put_req(req); 780 782 } 781 783 ··· 894 896 895 897 if (!req->file) 896 898 return -EBADF; 897 - /* For -EAGAIN retry, everything is already prepped */ 898 - if (req->flags & REQ_F_PREPPED) 899 - return 0; 900 899 901 900 if (force_nonblock && !io_file_supports_async(req->file)) 902 901 force_nonblock = false; ··· 936 941 return -EINVAL; 937 942 kiocb->ki_complete = io_complete_rw; 938 943 } 939 - req->flags |= REQ_F_PREPPED; 940 944 return 0; 941 945 } 942 946 ··· 1210 1216 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1211 1217 return -EINVAL; 1212 1218 1213 - io_cqring_add_event(ctx, user_data, err, 0); 1219 + io_cqring_add_event(ctx, user_data, err); 1214 1220 io_put_req(req); 1215 1221 return 0; 1216 1222 } ··· 1221 1227 1222 1228 if (!req->file) 1223 1229 return -EBADF; 1224 - /* Prep already done (EAGAIN retry) */ 1225 - if (req->flags & REQ_F_PREPPED) 1226 - return 0; 1227 1230 1228 1231 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1229 1232 return -EINVAL; 1230 1233 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) 1231 1234 return -EINVAL; 1232 1235 1233 - req->flags |= REQ_F_PREPPED; 1234 1236 return 0; 1235 1237 } 1236 1238 ··· 1255 1265 end > 0 ? end : LLONG_MAX, 1256 1266 fsync_flags & IORING_FSYNC_DATASYNC); 1257 1267 1258 - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1268 + io_cqring_add_event(req->ctx, sqe->user_data, ret); 1259 1269 io_put_req(req); 1260 1270 return 0; 1261 1271 } ··· 1267 1277 1268 1278 if (!req->file) 1269 1279 return -EBADF; 1270 - /* Prep already done (EAGAIN retry) */ 1271 - if (req->flags & REQ_F_PREPPED) 1272 - return 0; 1273 1280 1274 1281 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1275 1282 return -EINVAL; 1276 1283 if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) 1277 1284 return -EINVAL; 1278 1285 1279 - req->flags |= REQ_F_PREPPED; 1280 1286 return ret; 1281 1287 } 1282 1288 ··· 1299 1313 1300 1314 ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); 1301 1315 1302 - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1316 + io_cqring_add_event(req->ctx, sqe->user_data, ret); 1303 1317 io_put_req(req); 1304 1318 return 0; 1305 1319 } ··· 1357 1371 } 1358 1372 spin_unlock_irq(&ctx->completion_lock); 1359 1373 1360 - io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); 1374 + io_cqring_add_event(req->ctx, sqe->user_data, ret); 1361 1375 io_put_req(req); 1362 1376 return 0; 1363 1377 } ··· 1366 1380 __poll_t mask) 1367 1381 { 1368 1382 req->poll.done = true; 1369 - io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); 1383 + io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask)); 1370 1384 io_commit_cqring(ctx); 1371 1385 } 1372 1386 ··· 1686 1700 io_put_req(req); 1687 1701 1688 1702 if (ret) { 1689 - io_cqring_add_event(ctx, sqe->user_data, ret, 0); 1703 + io_cqring_add_event(ctx, sqe->user_data, ret); 1690 1704 io_put_req(req); 1691 1705 } 1692 1706 ··· 1991 2005 continue; 1992 2006 } 1993 2007 1994 - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0); 2008 + io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); 1995 2009 } 1996 2010 1997 2011 if (statep) ··· 2014 2028 set_fs(USER_DS); 2015 2029 2016 2030 timeout = inflight = 0; 2017 - while (!kthread_should_stop() && !ctx->sqo_stop) { 2031 + while (!kthread_should_park()) { 2018 2032 bool all_fixed, mm_fault = false; 2019 2033 int i; 2020 2034 ··· 2076 2090 smp_mb(); 2077 2091 2078 2092 if (!io_get_sqring(ctx, &sqes[0])) { 2079 - if (kthread_should_stop()) { 2093 + if (kthread_should_park()) { 2080 2094 finish_wait(&ctx->sqo_wait, &wait); 2081 2095 break; 2082 2096 } ··· 2126 2140 mmput(cur_mm); 2127 2141 } 2128 2142 2129 - if (kthread_should_park()) 2130 - kthread_parkme(); 2143 + kthread_parkme(); 2131 2144 2132 2145 return 0; 2133 2146 } ··· 2155 2170 2156 2171 ret = io_submit_sqe(ctx, &s, statep); 2157 2172 if (ret) 2158 - io_cqring_add_event(ctx, s.sqe->user_data, ret, 0); 2173 + io_cqring_add_event(ctx, s.sqe->user_data, ret); 2159 2174 } 2160 2175 io_commit_sqring(ctx); 2161 2176 ··· 2167 2182 2168 2183 static unsigned io_cqring_events(struct io_cq_ring *ring) 2169 2184 { 2185 + /* See comment at the top of this file */ 2186 + smp_rmb(); 2170 2187 return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head); 2171 2188 } 2172 2189 ··· 2181 2194 { 2182 2195 struct io_cq_ring *ring = ctx->cq_ring; 2183 2196 sigset_t ksigmask, sigsaved; 2184 - DEFINE_WAIT(wait); 2185 2197 int ret; 2186 2198 2187 - /* See comment at the top of this file */ 2188 - smp_rmb(); 2189 2199 if (io_cqring_events(ring) >= min_events) 2190 2200 return 0; 2191 2201 ··· 2200 2216 return ret; 2201 2217 } 2202 2218 2203 - do { 2204 - prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE); 2205 - 2206 - ret = 0; 2207 - /* See comment at the top of this file */ 2208 - smp_rmb(); 2209 - if (io_cqring_events(ring) >= min_events) 2210 - break; 2211 - 2212 - schedule(); 2213 - 2219 + ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); 2220 + if (ret == -ERESTARTSYS) 2214 2221 ret = -EINTR; 2215 - if (signal_pending(current)) 2216 - break; 2217 - } while (1); 2218 - 2219 - finish_wait(&ctx->wait, &wait); 2220 2222 2221 2223 if (sig) 2222 2224 restore_user_sigmask(sig, &sigsaved); ··· 2243 2273 static void io_sq_thread_stop(struct io_ring_ctx *ctx) 2244 2274 { 2245 2275 if (ctx->sqo_thread) { 2246 - ctx->sqo_stop = 1; 2247 - mb(); 2276 + /* 2277 + * The park is a bit of a work-around, without it we get 2278 + * warning spews on shutdown with SQPOLL set and affinity 2279 + * set to a single CPU. 2280 + */ 2248 2281 kthread_park(ctx->sqo_thread); 2249 2282 kthread_stop(ctx->sqo_thread); 2250 2283 ctx->sqo_thread = NULL; ··· 2440 2467 ctx->sq_thread_idle = HZ; 2441 2468 2442 2469 if (p->flags & IORING_SETUP_SQ_AFF) { 2443 - int cpu = array_index_nospec(p->sq_thread_cpu, 2444 - nr_cpu_ids); 2470 + int cpu = p->sq_thread_cpu; 2445 2471 2446 2472 ret = -EINVAL; 2473 + if (cpu >= nr_cpu_ids) 2474 + goto err; 2447 2475 if (!cpu_online(cpu)) 2448 2476 goto err; 2449 2477