Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-2019-10-26' of git://git.kernel.dk/linux-block

Pull block and io_uring fixes from Jens Axboe:
"A bit bigger than usual at this point in time, mostly due to some good
bug hunting work by Pavel that resulted in three io_uring fixes from
him and two from me. Anyway, this pull request contains:

- Revert of the submit-and-wait optimization for io_uring, it can't
always be done safely. It depends on commands always making
progress on their own, which isn't necessarily the case outside of
strict file IO. (me)

- Series of two patches from me and three from Pavel, fixing issues
with shared data and sequencing for io_uring.

- Lastly, two timeout sequence fixes for io_uring (zhangyi)

- Two nbd patches fixing races (Josef)

- libahci regulator_get_optional() fix (Mark)"

* tag 'for-linus-2019-10-26' of git://git.kernel.dk/linux-block:
nbd: verify socket is supported during setup
ata: libahci_platform: Fix regulator_get_optional() misuse
nbd: handle racing with error'ed out commands
nbd: protect cmd->status with cmd->lock
io_uring: fix bad inflight accounting for SETUP_IOPOLL|SETUP_SQTHREAD
io_uring: used cached copies of sq->dropped and cq->overflow
io_uring: Fix race for sqes with userspace
io_uring: Fix broken links with offloading
io_uring: Fix corrupted user_data
io_uring: correct timeout req sequence when inserting a new entry
io_uring : correct timeout req sequence when waiting timeout
io_uring: revert "io_uring: optimize submit_and_wait API"

+161 -125
+14 -24
drivers/ata/libahci_platform.c
··· 153 153 { 154 154 int rc, i; 155 155 156 - if (hpriv->ahci_regulator) { 157 - rc = regulator_enable(hpriv->ahci_regulator); 158 - if (rc) 159 - return rc; 160 - } 156 + rc = regulator_enable(hpriv->ahci_regulator); 157 + if (rc) 158 + return rc; 161 159 162 - if (hpriv->phy_regulator) { 163 - rc = regulator_enable(hpriv->phy_regulator); 164 - if (rc) 165 - goto disable_ahci_pwrs; 166 - } 160 + rc = regulator_enable(hpriv->phy_regulator); 161 + if (rc) 162 + goto disable_ahci_pwrs; 167 163 168 164 for (i = 0; i < hpriv->nports; i++) { 169 165 if (!hpriv->target_pwrs[i]) ··· 177 181 if (hpriv->target_pwrs[i]) 178 182 regulator_disable(hpriv->target_pwrs[i]); 179 183 180 - if (hpriv->phy_regulator) 181 - regulator_disable(hpriv->phy_regulator); 184 + regulator_disable(hpriv->phy_regulator); 182 185 disable_ahci_pwrs: 183 - if (hpriv->ahci_regulator) 184 - regulator_disable(hpriv->ahci_regulator); 186 + regulator_disable(hpriv->ahci_regulator); 185 187 return rc; 186 188 } 187 189 EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators); ··· 201 207 regulator_disable(hpriv->target_pwrs[i]); 202 208 } 203 209 204 - if (hpriv->ahci_regulator) 205 - regulator_disable(hpriv->ahci_regulator); 206 - if (hpriv->phy_regulator) 207 - regulator_disable(hpriv->phy_regulator); 210 + regulator_disable(hpriv->ahci_regulator); 211 + regulator_disable(hpriv->phy_regulator); 208 212 } 209 213 EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators); 210 214 /** ··· 351 359 struct regulator *target_pwr; 352 360 int rc = 0; 353 361 354 - target_pwr = regulator_get_optional(dev, "target"); 362 + target_pwr = regulator_get(dev, "target"); 355 363 356 364 if (!IS_ERR(target_pwr)) 357 365 hpriv->target_pwrs[port] = target_pwr; ··· 428 436 hpriv->clks[i] = clk; 429 437 } 430 438 431 - hpriv->ahci_regulator = devm_regulator_get_optional(dev, "ahci"); 439 + hpriv->ahci_regulator = devm_regulator_get(dev, "ahci"); 432 440 if (IS_ERR(hpriv->ahci_regulator)) { 433 441 rc = PTR_ERR(hpriv->ahci_regulator); 434 - if (rc == -EPROBE_DEFER) 442 + if (rc != 0) 435 443 goto err_out; 436 - rc = 0; 437 - hpriv->ahci_regulator = NULL; 438 444 } 439 445 440 - hpriv->phy_regulator = devm_regulator_get_optional(dev, "phy"); 446 + hpriv->phy_regulator = devm_regulator_get(dev, "phy"); 441 447 if (IS_ERR(hpriv->phy_regulator)) { 442 448 rc = PTR_ERR(hpriv->phy_regulator); 443 449 if (rc == -EPROBE_DEFER)
+34 -7
drivers/block/nbd.c
··· 385 385 struct nbd_device *nbd = cmd->nbd; 386 386 struct nbd_config *config; 387 387 388 + if (!mutex_trylock(&cmd->lock)) 389 + return BLK_EH_RESET_TIMER; 390 + 388 391 if (!refcount_inc_not_zero(&nbd->config_refs)) { 389 392 cmd->status = BLK_STS_TIMEOUT; 393 + mutex_unlock(&cmd->lock); 390 394 goto done; 391 395 } 392 396 config = nbd->config; 393 - 394 - if (!mutex_trylock(&cmd->lock)) { 395 - nbd_config_put(nbd); 396 - return BLK_EH_RESET_TIMER; 397 - } 398 397 399 398 if (config->num_connections > 1) { 400 399 dev_err_ratelimited(nbd_to_dev(nbd), ··· 710 711 ret = -ENOENT; 711 712 goto out; 712 713 } 714 + if (cmd->status != BLK_STS_OK) { 715 + dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n", 716 + req); 717 + ret = -ENOENT; 718 + goto out; 719 + } 713 720 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { 714 721 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", 715 722 req); ··· 797 792 { 798 793 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 799 794 795 + mutex_lock(&cmd->lock); 800 796 cmd->status = BLK_STS_IOERR; 797 + mutex_unlock(&cmd->lock); 798 + 801 799 blk_mq_complete_request(req); 802 800 return true; 803 801 } ··· 980 972 return ret; 981 973 } 982 974 975 + static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, 976 + int *err) 977 + { 978 + struct socket *sock; 979 + 980 + *err = 0; 981 + sock = sockfd_lookup(fd, err); 982 + if (!sock) 983 + return NULL; 984 + 985 + if (sock->ops->shutdown == sock_no_shutdown) { 986 + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); 987 + *err = -EINVAL; 988 + return NULL; 989 + } 990 + 991 + return sock; 992 + } 993 + 983 994 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 984 995 bool netlink) 985 996 { ··· 1008 981 struct nbd_sock *nsock; 1009 982 int err; 1010 983 1011 - sock = sockfd_lookup(arg, &err); 984 + sock = nbd_get_socket(nbd, arg, &err); 1012 985 if (!sock) 1013 986 return err; 1014 987 ··· 1060 1033 int i; 1061 1034 int err; 1062 1035 1063 - sock = sockfd_lookup(arg, &err); 1036 + sock = nbd_get_socket(nbd, arg, &err); 1064 1037 if (!sock) 1065 1038 return err; 1066 1039
+113 -94
fs/io_uring.c
··· 197 197 unsigned sq_entries; 198 198 unsigned sq_mask; 199 199 unsigned sq_thread_idle; 200 + unsigned cached_sq_dropped; 200 201 struct io_uring_sqe *sq_sqes; 201 202 202 203 struct list_head defer_list; ··· 213 212 214 213 struct { 215 214 unsigned cached_cq_tail; 215 + atomic_t cached_cq_overflow; 216 216 unsigned cq_entries; 217 217 unsigned cq_mask; 218 218 struct wait_queue_head cq_wait; ··· 422 420 static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, 423 421 struct io_kiocb *req) 424 422 { 425 - return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped; 423 + return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped 424 + + atomic_read(&ctx->cached_cq_overflow); 426 425 } 427 426 428 427 static inline bool io_sequence_defer(struct io_ring_ctx *ctx, ··· 570 567 WRITE_ONCE(cqe->res, res); 571 568 WRITE_ONCE(cqe->flags, 0); 572 569 } else { 573 - unsigned overflow = READ_ONCE(ctx->rings->cq_overflow); 574 - 575 - WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1); 570 + WRITE_ONCE(ctx->rings->cq_overflow, 571 + atomic_inc_return(&ctx->cached_cq_overflow)); 576 572 } 577 573 } 578 574 ··· 737 735 return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head); 738 736 } 739 737 738 + static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) 739 + { 740 + struct io_rings *rings = ctx->rings; 741 + 742 + /* make sure SQ entry isn't read before tail */ 743 + return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; 744 + } 745 + 740 746 /* 741 747 * Find and free completed poll iocbs 742 748 */ ··· 874 864 mutex_unlock(&ctx->uring_lock); 875 865 } 876 866 877 - static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, 878 - long min) 867 + static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, 868 + long min) 879 869 { 880 - int iters, ret = 0; 870 + int iters = 0, ret = 0; 881 871 882 - /* 883 - * We disallow the app entering submit/complete with polling, but we 884 - * still need to lock the ring to prevent racing with polled issue 885 - * that got punted to a workqueue. 886 - */ 887 - mutex_lock(&ctx->uring_lock); 888 - 889 - iters = 0; 890 872 do { 891 873 int tmin = 0; 892 874 ··· 914 912 ret = 0; 915 913 } while (min && !*nr_events && !need_resched()); 916 914 915 + return ret; 916 + } 917 + 918 + static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, 919 + long min) 920 + { 921 + int ret; 922 + 923 + /* 924 + * We disallow the app entering submit/complete with polling, but we 925 + * still need to lock the ring to prevent racing with polled issue 926 + * that got punted to a workqueue. 927 + */ 928 + mutex_lock(&ctx->uring_lock); 929 + ret = __io_iopoll_check(ctx, nr_events, min); 917 930 mutex_unlock(&ctx->uring_lock); 918 931 return ret; 919 932 } ··· 1894 1877 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) 1895 1878 { 1896 1879 struct io_ring_ctx *ctx; 1897 - struct io_kiocb *req; 1880 + struct io_kiocb *req, *prev; 1898 1881 unsigned long flags; 1899 1882 1900 1883 req = container_of(timer, struct io_kiocb, timeout.timer); ··· 1902 1885 atomic_inc(&ctx->cq_timeouts); 1903 1886 1904 1887 spin_lock_irqsave(&ctx->completion_lock, flags); 1888 + /* 1889 + * Adjust the reqs sequence before the current one because it 1890 + * will consume a slot in the cq_ring and the the cq_tail pointer 1891 + * will be increased, otherwise other timeout reqs may return in 1892 + * advance without waiting for enough wait_nr. 1893 + */ 1894 + prev = req; 1895 + list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list) 1896 + prev->sequence++; 1905 1897 list_del(&req->list); 1906 1898 1907 1899 io_cqring_fill_event(ctx, req->user_data, -ETIME); ··· 1929 1903 struct io_ring_ctx *ctx = req->ctx; 1930 1904 struct list_head *entry; 1931 1905 struct timespec64 ts; 1906 + unsigned span = 0; 1932 1907 1933 1908 if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 1934 1909 return -EINVAL; ··· 1978 1951 if (ctx->cached_sq_head < nxt_sq_head) 1979 1952 tmp += UINT_MAX; 1980 1953 1981 - if (tmp >= tmp_nxt) 1954 + if (tmp > tmp_nxt) 1982 1955 break; 1956 + 1957 + /* 1958 + * Sequence of reqs after the insert one and itself should 1959 + * be adjusted because each timeout req consumes a slot. 1960 + */ 1961 + span++; 1962 + nxt->sequence++; 1983 1963 } 1964 + req->sequence -= span; 1984 1965 list_add(&req->list, entry); 1985 1966 spin_unlock_irq(&ctx->completion_lock); 1986 1967 ··· 2327 2292 } 2328 2293 2329 2294 static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, 2330 - struct sqe_submit *s, bool force_nonblock) 2295 + struct sqe_submit *s) 2331 2296 { 2332 2297 int ret; 2333 2298 2334 - ret = __io_submit_sqe(ctx, req, s, force_nonblock); 2299 + ret = __io_submit_sqe(ctx, req, s, true); 2335 2300 2336 2301 /* 2337 2302 * We async punt it if the file wasn't marked NOWAIT, or if the file ··· 2378 2343 } 2379 2344 2380 2345 static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, 2381 - struct sqe_submit *s, bool force_nonblock) 2346 + struct sqe_submit *s) 2382 2347 { 2383 2348 int ret; 2384 2349 ··· 2391 2356 return 0; 2392 2357 } 2393 2358 2394 - return __io_queue_sqe(ctx, req, s, force_nonblock); 2359 + return __io_queue_sqe(ctx, req, s); 2395 2360 } 2396 2361 2397 2362 static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req, 2398 - struct sqe_submit *s, struct io_kiocb *shadow, 2399 - bool force_nonblock) 2363 + struct sqe_submit *s, struct io_kiocb *shadow) 2400 2364 { 2401 2365 int ret; 2402 2366 int need_submit = false; 2403 2367 2404 2368 if (!shadow) 2405 - return io_queue_sqe(ctx, req, s, force_nonblock); 2369 + return io_queue_sqe(ctx, req, s); 2406 2370 2407 2371 /* 2408 2372 * Mark the first IO in link list as DRAIN, let all the following ··· 2430 2396 spin_unlock_irq(&ctx->completion_lock); 2431 2397 2432 2398 if (need_submit) 2433 - return __io_queue_sqe(ctx, req, s, force_nonblock); 2399 + return __io_queue_sqe(ctx, req, s); 2434 2400 2435 2401 return 0; 2436 2402 } ··· 2438 2404 #define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) 2439 2405 2440 2406 static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, 2441 - struct io_submit_state *state, struct io_kiocb **link, 2442 - bool force_nonblock) 2407 + struct io_submit_state *state, struct io_kiocb **link) 2443 2408 { 2444 2409 struct io_uring_sqe *sqe_copy; 2445 2410 struct io_kiocb *req; ··· 2464 2431 io_cqring_add_event(ctx, s->sqe->user_data, ret); 2465 2432 return; 2466 2433 } 2434 + 2435 + req->user_data = s->sqe->user_data; 2467 2436 2468 2437 /* 2469 2438 * If we already have a head request, queue this one for async ··· 2493 2458 INIT_LIST_HEAD(&req->link_list); 2494 2459 *link = req; 2495 2460 } else { 2496 - io_queue_sqe(ctx, req, s, force_nonblock); 2461 + io_queue_sqe(ctx, req, s); 2497 2462 } 2498 2463 } 2499 2464 ··· 2573 2538 2574 2539 /* drop invalid entries */ 2575 2540 ctx->cached_sq_head++; 2576 - rings->sq_dropped++; 2541 + ctx->cached_sq_dropped++; 2542 + WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); 2577 2543 return false; 2578 2544 } 2579 2545 2580 - static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, 2581 - unsigned int nr, bool has_user, bool mm_fault) 2546 + static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, 2547 + bool has_user, bool mm_fault) 2582 2548 { 2583 2549 struct io_submit_state state, *statep = NULL; 2584 2550 struct io_kiocb *link = NULL; ··· 2593 2557 } 2594 2558 2595 2559 for (i = 0; i < nr; i++) { 2560 + struct sqe_submit s; 2561 + 2562 + if (!io_get_sqring(ctx, &s)) 2563 + break; 2564 + 2596 2565 /* 2597 2566 * If previous wasn't linked and we have a linked command, 2598 2567 * that's the end of the chain. Submit the previous link. 2599 2568 */ 2600 2569 if (!prev_was_link && link) { 2601 - io_queue_link_head(ctx, link, &link->submit, shadow_req, 2602 - true); 2570 + io_queue_link_head(ctx, link, &link->submit, shadow_req); 2603 2571 link = NULL; 2604 2572 shadow_req = NULL; 2605 2573 } 2606 - prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; 2574 + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; 2607 2575 2608 - if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) { 2576 + if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) { 2609 2577 if (!shadow_req) { 2610 2578 shadow_req = io_get_req(ctx, NULL); 2611 2579 if (unlikely(!shadow_req)) ··· 2617 2577 shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN); 2618 2578 refcount_dec(&shadow_req->refs); 2619 2579 } 2620 - shadow_req->sequence = sqes[i].sequence; 2580 + shadow_req->sequence = s.sequence; 2621 2581 } 2622 2582 2623 2583 out: 2624 2584 if (unlikely(mm_fault)) { 2625 - io_cqring_add_event(ctx, sqes[i].sqe->user_data, 2585 + io_cqring_add_event(ctx, s.sqe->user_data, 2626 2586 -EFAULT); 2627 2587 } else { 2628 - sqes[i].has_user = has_user; 2629 - sqes[i].needs_lock = true; 2630 - sqes[i].needs_fixed_file = true; 2631 - io_submit_sqe(ctx, &sqes[i], statep, &link, true); 2588 + s.has_user = has_user; 2589 + s.needs_lock = true; 2590 + s.needs_fixed_file = true; 2591 + io_submit_sqe(ctx, &s, statep, &link); 2632 2592 submitted++; 2633 2593 } 2634 2594 } 2635 2595 2636 2596 if (link) 2637 - io_queue_link_head(ctx, link, &link->submit, shadow_req, true); 2597 + io_queue_link_head(ctx, link, &link->submit, shadow_req); 2638 2598 if (statep) 2639 2599 io_submit_state_end(&state); 2640 2600 ··· 2643 2603 2644 2604 static int io_sq_thread(void *data) 2645 2605 { 2646 - struct sqe_submit sqes[IO_IOPOLL_BATCH]; 2647 2606 struct io_ring_ctx *ctx = data; 2648 2607 struct mm_struct *cur_mm = NULL; 2649 2608 mm_segment_t old_fs; ··· 2657 2618 2658 2619 timeout = inflight = 0; 2659 2620 while (!kthread_should_park()) { 2660 - bool all_fixed, mm_fault = false; 2661 - int i; 2621 + bool mm_fault = false; 2622 + unsigned int to_submit; 2662 2623 2663 2624 if (inflight) { 2664 2625 unsigned nr_events = 0; 2665 2626 2666 2627 if (ctx->flags & IORING_SETUP_IOPOLL) { 2667 - io_iopoll_check(ctx, &nr_events, 0); 2628 + /* 2629 + * inflight is the count of the maximum possible 2630 + * entries we submitted, but it can be smaller 2631 + * if we dropped some of them. If we don't have 2632 + * poll entries available, then we know that we 2633 + * have nothing left to poll for. Reset the 2634 + * inflight count to zero in that case. 2635 + */ 2636 + mutex_lock(&ctx->uring_lock); 2637 + if (!list_empty(&ctx->poll_list)) 2638 + __io_iopoll_check(ctx, &nr_events, 0); 2639 + else 2640 + inflight = 0; 2641 + mutex_unlock(&ctx->uring_lock); 2668 2642 } else { 2669 2643 /* 2670 2644 * Normal IO, just pretend everything completed. ··· 2691 2639 timeout = jiffies + ctx->sq_thread_idle; 2692 2640 } 2693 2641 2694 - if (!io_get_sqring(ctx, &sqes[0])) { 2642 + to_submit = io_sqring_entries(ctx); 2643 + if (!to_submit) { 2695 2644 /* 2696 2645 * We're polling. If we're within the defined idle 2697 2646 * period, then let us spin without work before going ··· 2723 2670 /* make sure to read SQ tail after writing flags */ 2724 2671 smp_mb(); 2725 2672 2726 - if (!io_get_sqring(ctx, &sqes[0])) { 2673 + to_submit = io_sqring_entries(ctx); 2674 + if (!to_submit) { 2727 2675 if (kthread_should_park()) { 2728 2676 finish_wait(&ctx->sqo_wait, &wait); 2729 2677 break; ··· 2742 2688 ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; 2743 2689 } 2744 2690 2745 - i = 0; 2746 - all_fixed = true; 2747 - do { 2748 - if (all_fixed && io_sqe_needs_user(sqes[i].sqe)) 2749 - all_fixed = false; 2750 - 2751 - i++; 2752 - if (i == ARRAY_SIZE(sqes)) 2753 - break; 2754 - } while (io_get_sqring(ctx, &sqes[i])); 2755 - 2756 2691 /* Unless all new commands are FIXED regions, grab mm */ 2757 - if (!all_fixed && !cur_mm) { 2692 + if (!cur_mm) { 2758 2693 mm_fault = !mmget_not_zero(ctx->sqo_mm); 2759 2694 if (!mm_fault) { 2760 2695 use_mm(ctx->sqo_mm); ··· 2751 2708 } 2752 2709 } 2753 2710 2754 - inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL, 2755 - mm_fault); 2711 + to_submit = min(to_submit, ctx->sq_entries); 2712 + inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL, 2713 + mm_fault); 2756 2714 2757 2715 /* Commit SQ ring head once we've consumed all SQEs */ 2758 2716 io_commit_sqring(ctx); ··· 2770 2726 return 0; 2771 2727 } 2772 2728 2773 - static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit, 2774 - bool block_for_last) 2729 + static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) 2775 2730 { 2776 2731 struct io_submit_state state, *statep = NULL; 2777 2732 struct io_kiocb *link = NULL; ··· 2784 2741 } 2785 2742 2786 2743 for (i = 0; i < to_submit; i++) { 2787 - bool force_nonblock = true; 2788 2744 struct sqe_submit s; 2789 2745 2790 2746 if (!io_get_sqring(ctx, &s)) ··· 2794 2752 * that's the end of the chain. Submit the previous link. 2795 2753 */ 2796 2754 if (!prev_was_link && link) { 2797 - io_queue_link_head(ctx, link, &link->submit, shadow_req, 2798 - force_nonblock); 2755 + io_queue_link_head(ctx, link, &link->submit, shadow_req); 2799 2756 link = NULL; 2800 2757 shadow_req = NULL; 2801 2758 } ··· 2816 2775 s.needs_lock = false; 2817 2776 s.needs_fixed_file = false; 2818 2777 submit++; 2819 - 2820 - /* 2821 - * The caller will block for events after submit, submit the 2822 - * last IO non-blocking. This is either the only IO it's 2823 - * submitting, or it already submitted the previous ones. This 2824 - * improves performance by avoiding an async punt that we don't 2825 - * need to do. 2826 - */ 2827 - if (block_for_last && submit == to_submit) 2828 - force_nonblock = false; 2829 - 2830 - io_submit_sqe(ctx, &s, statep, &link, force_nonblock); 2778 + io_submit_sqe(ctx, &s, statep, &link); 2831 2779 } 2832 - io_commit_sqring(ctx); 2833 2780 2834 2781 if (link) 2835 - io_queue_link_head(ctx, link, &link->submit, shadow_req, 2836 - !block_for_last); 2782 + io_queue_link_head(ctx, link, &link->submit, shadow_req); 2837 2783 if (statep) 2838 2784 io_submit_state_end(statep); 2785 + 2786 + io_commit_sqring(ctx); 2839 2787 2840 2788 return submit; 2841 2789 } ··· 3666 3636 wake_up(&ctx->sqo_wait); 3667 3637 submitted = to_submit; 3668 3638 } else if (to_submit) { 3669 - bool block_for_last = false; 3670 - 3671 3639 to_submit = min(to_submit, ctx->sq_entries); 3672 3640 3673 - /* 3674 - * Allow last submission to block in a series, IFF the caller 3675 - * asked to wait for events and we don't currently have 3676 - * enough. This potentially avoids an async punt. 3677 - */ 3678 - if (to_submit == min_complete && 3679 - io_cqring_events(ctx->rings) < min_complete) 3680 - block_for_last = true; 3681 - 3682 3641 mutex_lock(&ctx->uring_lock); 3683 - submitted = io_ring_submit(ctx, to_submit, block_for_last); 3642 + submitted = io_ring_submit(ctx, to_submit); 3684 3643 mutex_unlock(&ctx->uring_lock); 3685 3644 } 3686 3645 if (flags & IORING_ENTER_GETEVENTS) {