Merge tag 'for-linus-20190428' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"A set of io_uring fixes that should go into this release. In
particular, this contains:

- The mutex lock vs ctx ref count fix (me)

- Removal of a dead variable (me)

- Two race fixes (Stefan)

- Ring head/tail condition fix for poll full SQ detection (Stefan)"

* tag 'for-linus-20190428' of git://git.kernel.dk/linux-block:
io_uring: remove 'state' argument from io_{read,write} path
io_uring: fix poll full SQ detection
io_uring: fix race condition when sq threads goes sleeping
io_uring: fix race condition reading SQ entries
io_uring: fail io_uring_register(2) on a dying io_uring instance

Linus Torvalds 7 years ago 975a0f40 14f974d7

+26 -16

1 changed file

expand all

io_uring.c

+26 -16

fs/io_uring.c

··· 740 740 } 741 741 742 742 static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, 743 - bool force_nonblock, struct io_submit_state *state) 743 + bool force_nonblock) 744 744 { 745 745 const struct io_uring_sqe *sqe = s->sqe; 746 746 struct io_ring_ctx *ctx = req->ctx; ··· 938 938 } 939 939 940 940 static int io_read(struct io_kiocb *req, const struct sqe_submit *s, 941 - bool force_nonblock, struct io_submit_state *state) 941 + bool force_nonblock) 942 942 { 943 943 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 944 944 struct kiocb *kiocb = &req->rw; ··· 947 947 size_t iov_count; 948 948 int ret; 949 949 950 - ret = io_prep_rw(req, s, force_nonblock, state); 950 + ret = io_prep_rw(req, s, force_nonblock); 951 951 if (ret) 952 952 return ret; 953 953 file = kiocb->ki_filp; ··· 985 985 } 986 986 987 987 static int io_write(struct io_kiocb *req, const struct sqe_submit *s, 988 - bool force_nonblock, struct io_submit_state *state) 988 + bool force_nonblock) 989 989 { 990 990 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 991 991 struct kiocb *kiocb = &req->rw; ··· 994 994 size_t iov_count; 995 995 int ret; 996 996 997 - ret = io_prep_rw(req, s, force_nonblock, state); 997 + ret = io_prep_rw(req, s, force_nonblock); 998 998 if (ret) 999 999 return ret; 1000 1000 ··· 1336 1336 } 1337 1337 1338 1338 static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, 1339 - const struct sqe_submit *s, bool force_nonblock, 1340 - struct io_submit_state *state) 1339 + const struct sqe_submit *s, bool force_nonblock) 1341 1340 { 1342 1341 int ret, opcode; 1343 1342 ··· 1352 1353 case IORING_OP_READV: 1353 1354 if (unlikely(s->sqe->buf_index)) 1354 1355 return -EINVAL; 1355 - ret = io_read(req, s, force_nonblock, state); 1356 + ret = io_read(req, s, force_nonblock); 1356 1357 break; 1357 1358 case IORING_OP_WRITEV: 1358 1359 if (unlikely(s->sqe->buf_index)) 1359 1360 return -EINVAL; 1360 - ret = io_write(req, s, force_nonblock, state); 1361 + ret = io_write(req, s, force_nonblock); 1361 1362 break; 1362 1363 case IORING_OP_READ_FIXED: 1363 - ret = io_read(req, s, force_nonblock, state); 1364 + ret = io_read(req, s, force_nonblock); 1364 1365 break; 1365 1366 case IORING_OP_WRITE_FIXED: 1366 - ret = io_write(req, s, force_nonblock, state); 1367 + ret = io_write(req, s, force_nonblock); 1367 1368 break; 1368 1369 case IORING_OP_FSYNC: 1369 1370 ret = io_fsync(req, s->sqe, force_nonblock); ··· 1456 1457 s->has_user = cur_mm != NULL; 1457 1458 s->needs_lock = true; 1458 1459 do { 1459 - ret = __io_submit_sqe(ctx, req, s, false, NULL); 1460 + ret = __io_submit_sqe(ctx, req, s, false); 1460 1461 /* 1461 1462 * We can get EAGAIN for polled IO even though 1462 1463 * we're forcing a sync submission from here, ··· 1622 1623 if (unlikely(ret)) 1623 1624 goto out; 1624 1625 1625 - ret = __io_submit_sqe(ctx, req, s, true, state); 1626 + ret = __io_submit_sqe(ctx, req, s, true); 1626 1627 if (ret == -EAGAIN) { 1627 1628 struct io_uring_sqe *sqe_copy; 1628 1629 ··· 1738 1739 head = ctx->cached_sq_head; 1739 1740 /* See comment at the top of this file */ 1740 1741 smp_rmb(); 1741 - if (head == READ_ONCE(ring->r.tail)) 1742 + /* make sure SQ entry isn't read before tail */ 1743 + if (head == smp_load_acquire(&ring->r.tail)) 1742 1744 return false; 1743 1745 1744 1746 head = READ_ONCE(ring->array[head & ctx->sq_mask]); ··· 1864 1864 1865 1865 /* Tell userspace we may need a wakeup call */ 1866 1866 ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP; 1867 - smp_wmb(); 1867 + /* make sure to read SQ tail after writing flags */ 1868 + smp_mb(); 1868 1869 1869 1870 if (!io_get_sqring(ctx, &sqes[0])) { 1870 1871 if (kthread_should_stop()) { ··· 2575 2574 poll_wait(file, &ctx->cq_wait, wait); 2576 2575 /* See comment at the top of this file */ 2577 2576 smp_rmb(); 2578 - if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head) 2577 + if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head != 2578 + ctx->sq_ring->ring_entries) 2579 2579 mask |= EPOLLOUT | EPOLLWRNORM; 2580 2580 if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail) 2581 2581 mask |= EPOLLIN | EPOLLRDNORM; ··· 2935 2933 __acquires(ctx->uring_lock) 2936 2934 { 2937 2935 int ret; 2936 + 2937 + /* 2938 + * We're inside the ring mutex, if the ref is already dying, then 2939 + * someone else killed the ctx or is already going through 2940 + * io_uring_register(). 2941 + */ 2942 + if (percpu_ref_is_dying(&ctx->refs)) 2943 + return -ENXIO; 2938 2944 2939 2945 percpu_ref_kill(&ctx->refs); 2940 2946

Configure Feed

Configure Feed