Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"Just a collection of fixes that have been queued up since the initial
merge window pull request, the majority of which are targeted for
stable as well.

One bio_set fix that fixes an issue with the dm adoption of cached bio
structs that got introduced in this merge window"

* tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block:
block: Fix potential deadlock in blk_ia_range_sysfs_show()
block: fix bio_clone_blkg_association() to associate with proper blkcg_gq
block: remove useless BUG_ON() in blk_mq_put_tag()
blk-mq: do not update io_ticks with passthrough requests
block: make bioset_exit() fully resilient against being called twice
block: use bio_queue_enter instead of blk_queue_enter in bio_poll
block: document BLK_STS_AGAIN usage
block: take destination bvec offsets into account in bio_copy_data_iter
blk-iolatency: Fix inflight count imbalances and IO hangs on offline
blk-mq: don't touch ->tagset in blk_mq_get_sq_hctx

+83 -80
+6 -3
block/bio.c
··· 722 722 bio_alloc_cache_prune(cache, -1U); 723 723 } 724 724 free_percpu(bs->cache); 725 + bs->cache = NULL; 725 726 } 726 727 727 728 /** ··· 1367 1366 struct bio_vec src_bv = bio_iter_iovec(src, *src_iter); 1368 1367 struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter); 1369 1368 unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); 1370 - void *src_buf; 1369 + void *src_buf = bvec_kmap_local(&src_bv); 1370 + void *dst_buf = bvec_kmap_local(&dst_bv); 1371 1371 1372 - src_buf = bvec_kmap_local(&src_bv); 1373 - memcpy_to_bvec(&dst_bv, src_buf); 1372 + memcpy(dst_buf, src_buf, bytes); 1373 + 1374 + kunmap_local(dst_buf); 1374 1375 kunmap_local(src_buf); 1375 1376 1376 1377 bio_advance_iter_single(src, src_iter, bytes);
+2 -6
block/blk-cgroup.c
··· 1974 1974 */ 1975 1975 void bio_clone_blkg_association(struct bio *dst, struct bio *src) 1976 1976 { 1977 - if (src->bi_blkg) { 1978 - if (dst->bi_blkg) 1979 - blkg_put(dst->bi_blkg); 1980 - blkg_get(src->bi_blkg); 1981 - dst->bi_blkg = src->bi_blkg; 1982 - } 1977 + if (src->bi_blkg) 1978 + bio_associate_blkg_from_css(dst, bio_blkcg_css(src)); 1983 1979 } 1984 1980 EXPORT_SYMBOL_GPL(bio_clone_blkg_association); 1985 1981
+1 -1
block/blk-core.c
··· 939 939 940 940 blk_flush_plug(current->plug, false); 941 941 942 - if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) 942 + if (bio_queue_enter(bio)) 943 943 return 0; 944 944 if (queue_is_mq(q)) { 945 945 ret = blk_mq_poll(q, cookie, iob, flags);
+1 -6
block/blk-ia-ranges.c
··· 54 54 container_of(attr, struct blk_ia_range_sysfs_entry, attr); 55 55 struct blk_independent_access_range *iar = 56 56 container_of(kobj, struct blk_independent_access_range, kobj); 57 - ssize_t ret; 58 57 59 - mutex_lock(&iar->queue->sysfs_lock); 60 - ret = entry->show(iar, buf); 61 - mutex_unlock(&iar->queue->sysfs_lock); 62 - 63 - return ret; 58 + return entry->show(iar, buf); 64 59 } 65 60 66 61 static const struct sysfs_ops blk_ia_range_sysfs_ops = {
+64 -58
block/blk-iolatency.c
··· 87 87 struct blk_iolatency { 88 88 struct rq_qos rqos; 89 89 struct timer_list timer; 90 - atomic_t enabled; 90 + 91 + /* 92 + * ->enabled is the master enable switch gating the throttling logic and 93 + * inflight tracking. The number of cgroups which have iolat enabled is 94 + * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly 95 + * from ->enable_work with the request_queue frozen. For details, See 96 + * blkiolatency_enable_work_fn(). 97 + */ 98 + bool enabled; 99 + atomic_t enable_cnt; 100 + struct work_struct enable_work; 91 101 }; 92 102 93 103 static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) 94 104 { 95 105 return container_of(rqos, struct blk_iolatency, rqos); 96 - } 97 - 98 - static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat) 99 - { 100 - return atomic_read(&blkiolat->enabled) > 0; 101 106 } 102 107 103 108 struct child_latency_info { ··· 469 464 struct blkcg_gq *blkg = bio->bi_blkg; 470 465 bool issue_as_root = bio_issue_as_root_blkg(bio); 471 466 472 - if (!blk_iolatency_enabled(blkiolat)) 467 + if (!blkiolat->enabled) 473 468 return; 474 469 475 470 while (blkg && blkg->parent) { ··· 599 594 u64 window_start; 600 595 u64 now; 601 596 bool issue_as_root = bio_issue_as_root_blkg(bio); 602 - bool enabled = false; 603 597 int inflight = 0; 604 598 605 599 blkg = bio->bi_blkg; ··· 609 605 if (!iolat) 610 606 return; 611 607 612 - enabled = blk_iolatency_enabled(iolat->blkiolat); 613 - if (!enabled) 608 + if (!iolat->blkiolat->enabled) 614 609 return; 615 610 616 611 now = ktime_to_ns(ktime_get()); ··· 648 645 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); 649 646 650 647 del_timer_sync(&blkiolat->timer); 648 + flush_work(&blkiolat->enable_work); 651 649 blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); 652 650 kfree(blkiolat); 653 651 } ··· 720 716 rcu_read_unlock(); 721 717 } 722 718 719 + /** 720 + * blkiolatency_enable_work_fn - Enable or disable iolatency on the device 721 + * @work: enable_work of the blk_iolatency of interest 722 + * 723 + * iolatency needs to keep track of the number of in-flight IOs per cgroup. This 724 + * is relatively expensive as it involves walking up the hierarchy twice for 725 + * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we 726 + * want to disable the in-flight tracking. 727 + * 728 + * We have to make sure that the counting is balanced - we don't want to leak 729 + * the in-flight counts by disabling accounting in the completion path while IOs 730 + * are in flight. This is achieved by ensuring that no IO is in flight by 731 + * freezing the queue while flipping ->enabled. As this requires a sleepable 732 + * context, ->enabled flipping is punted to this work function. 733 + */ 734 + static void blkiolatency_enable_work_fn(struct work_struct *work) 735 + { 736 + struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency, 737 + enable_work); 738 + bool enabled; 739 + 740 + /* 741 + * There can only be one instance of this function running for @blkiolat 742 + * and it's guaranteed to be executed at least once after the latest 743 + * ->enabled_cnt modification. Acting on the latest ->enable_cnt is 744 + * sufficient. 745 + * 746 + * Also, we know @blkiolat is safe to access as ->enable_work is flushed 747 + * in blkcg_iolatency_exit(). 748 + */ 749 + enabled = atomic_read(&blkiolat->enable_cnt); 750 + if (enabled != blkiolat->enabled) { 751 + blk_mq_freeze_queue(blkiolat->rqos.q); 752 + blkiolat->enabled = enabled; 753 + blk_mq_unfreeze_queue(blkiolat->rqos.q); 754 + } 755 + } 756 + 723 757 int blk_iolatency_init(struct request_queue *q) 724 758 { 725 759 struct blk_iolatency *blkiolat; ··· 783 741 } 784 742 785 743 timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); 744 + INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); 786 745 787 746 return 0; 788 747 } 789 748 790 - /* 791 - * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise 792 - * return 0. 793 - */ 794 - static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) 749 + static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) 795 750 { 796 751 struct iolatency_grp *iolat = blkg_to_lat(blkg); 752 + struct blk_iolatency *blkiolat = iolat->blkiolat; 797 753 u64 oldval = iolat->min_lat_nsec; 798 754 799 755 iolat->min_lat_nsec = val; ··· 799 759 iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, 800 760 BLKIOLATENCY_MAX_WIN_SIZE); 801 761 802 - if (!oldval && val) 803 - return 1; 762 + if (!oldval && val) { 763 + if (atomic_inc_return(&blkiolat->enable_cnt) == 1) 764 + schedule_work(&blkiolat->enable_work); 765 + } 804 766 if (oldval && !val) { 805 767 blkcg_clear_delay(blkg); 806 - return -1; 768 + if (atomic_dec_return(&blkiolat->enable_cnt) == 0) 769 + schedule_work(&blkiolat->enable_work); 807 770 } 808 - return 0; 809 771 } 810 772 811 773 static void iolatency_clear_scaling(struct blkcg_gq *blkg) ··· 839 797 u64 lat_val = 0; 840 798 u64 oldval; 841 799 int ret; 842 - int enable = 0; 843 800 844 801 ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); 845 802 if (ret) ··· 873 832 blkg = ctx.blkg; 874 833 oldval = iolat->min_lat_nsec; 875 834 876 - enable = iolatency_set_min_lat_nsec(blkg, lat_val); 877 - if (enable) { 878 - if (!blk_get_queue(blkg->q)) { 879 - ret = -ENODEV; 880 - goto out; 881 - } 882 - 883 - blkg_get(blkg); 884 - } 885 - 886 - if (oldval != iolat->min_lat_nsec) { 835 + iolatency_set_min_lat_nsec(blkg, lat_val); 836 + if (oldval != iolat->min_lat_nsec) 887 837 iolatency_clear_scaling(blkg); 888 - } 889 - 890 838 ret = 0; 891 839 out: 892 840 blkg_conf_finish(&ctx); 893 - if (ret == 0 && enable) { 894 - struct iolatency_grp *tmp = blkg_to_lat(blkg); 895 - struct blk_iolatency *blkiolat = tmp->blkiolat; 896 - 897 - blk_mq_freeze_queue(blkg->q); 898 - 899 - if (enable == 1) 900 - atomic_inc(&blkiolat->enabled); 901 - else if (enable == -1) 902 - atomic_dec(&blkiolat->enabled); 903 - else 904 - WARN_ON_ONCE(1); 905 - 906 - blk_mq_unfreeze_queue(blkg->q); 907 - 908 - blkg_put(blkg); 909 - blk_put_queue(blkg->q); 910 - } 911 841 return ret ?: nbytes; 912 842 } 913 843 ··· 1017 1005 { 1018 1006 struct iolatency_grp *iolat = pd_to_lat(pd); 1019 1007 struct blkcg_gq *blkg = lat_to_blkg(iolat); 1020 - struct blk_iolatency *blkiolat = iolat->blkiolat; 1021 - int ret; 1022 1008 1023 - ret = iolatency_set_min_lat_nsec(blkg, 0); 1024 - if (ret == 1) 1025 - atomic_inc(&blkiolat->enabled); 1026 - if (ret == -1) 1027 - atomic_dec(&blkiolat->enabled); 1009 + iolatency_set_min_lat_nsec(blkg, 0); 1028 1010 iolatency_clear_scaling(blkg); 1029 1011 } 1030 1012
-1
block/blk-mq-tag.c
··· 228 228 BUG_ON(real_tag >= tags->nr_tags); 229 229 sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); 230 230 } else { 231 - BUG_ON(tag >= tags->nr_reserved_tags); 232 231 sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); 233 232 } 234 233 }
+5 -5
block/blk-mq.c
··· 133 133 { 134 134 struct mq_inflight *mi = priv; 135 135 136 - if ((!mi->part->bd_partno || rq->part == mi->part) && 136 + if (rq->part && blk_do_io_stat(rq) && 137 + (!mi->part->bd_partno || rq->part == mi->part) && 137 138 blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) 138 139 mi->inflight[rq_data_dir(rq)]++; 139 140 ··· 2175 2174 */ 2176 2175 static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) 2177 2176 { 2178 - struct blk_mq_hw_ctx *hctx; 2179 - 2177 + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 2180 2178 /* 2181 2179 * If the IO scheduler does not respect hardware queues when 2182 2180 * dispatching, we just don't bother with multiple HW queues and ··· 2183 2183 * just causes lock contention inside the scheduler and pointless cache 2184 2184 * bouncing. 2185 2185 */ 2186 - hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT, 2187 - raw_smp_processor_id()); 2186 + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx); 2187 + 2188 2188 if (!blk_mq_hctx_stopped(hctx)) 2189 2189 return hctx; 2190 2190 return NULL;
+4
include/linux/blk_types.h
··· 105 105 /* hack for device mapper, don't use elsewhere: */ 106 106 #define BLK_STS_DM_REQUEUE ((__force blk_status_t)11) 107 107 108 + /* 109 + * BLK_STS_AGAIN should only be returned if RQF_NOWAIT is set 110 + * and the bio would block (cf bio_wouldblock_error()) 111 + */ 108 112 #define BLK_STS_AGAIN ((__force blk_status_t)12) 109 113 110 114 /*