Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

block: handle zone management operations completions

The functions blk_zone_wplug_handle_reset_or_finish() and
blk_zone_wplug_handle_reset_all() both modify the zone write pointer
offset of zone write plugs that are the target of a reset, reset all or
finish zone management operation. However, these functions do this
modification before the BIO is executed. So if the zone operation fails,
the modified zone write pointer offsets become invalid.

Avoid this by modifying the zone write pointer offset of a zone write
plug that is the target of a zone management operation when the
operation completes. To do so, modify blk_zone_bio_endio() to call the
new function blk_zone_mgmt_bio_endio() which in turn calls the functions
blk_zone_reset_all_bio_endio(), blk_zone_reset_bio_endio() or
blk_zone_finish_bio_endio() depending on the operation of the completed
BIO, to modify a zone write plug write pointer offset accordingly.
These functions are called only if the BIO execution was successful.

Fixes: dd291d77cc90 ("block: Introduce zone write plugging")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Damien Le Moal and committed by
Jens Axboe
efae226c f68ff6bc

+104 -49
+90 -49
block/blk-zoned.c
··· 71 71 struct gendisk *disk; 72 72 }; 73 73 74 + static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) 75 + { 76 + return 1U << disk->zone_wplugs_hash_bits; 77 + } 78 + 74 79 /* 75 80 * Zone write plug flags bits: 76 81 * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, ··· 703 698 disk_report_zones_cb, &args); 704 699 } 705 700 706 - static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, 707 - unsigned int wp_offset) 701 + static void blk_zone_reset_bio_endio(struct bio *bio) 708 702 { 709 703 struct gendisk *disk = bio->bi_bdev->bd_disk; 710 - sector_t sector = bio->bi_iter.bi_sector; 711 704 struct blk_zone_wplug *zwplug; 712 - unsigned long flags; 713 - 714 - /* Conventional zones cannot be reset nor finished. */ 715 - if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { 716 - bio_io_error(bio); 717 - return true; 718 - } 719 705 720 706 /* 721 - * No-wait reset or finish BIOs do not make much sense as the callers 722 - * issue these as blocking operations in most cases. To avoid issues 723 - * the BIO execution potentially failing with BLK_STS_AGAIN, warn about 724 - * REQ_NOWAIT being set and ignore that flag. 725 - */ 726 - if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) 727 - bio->bi_opf &= ~REQ_NOWAIT; 728 - 729 - /* 730 - * If we have a zone write plug, set its write pointer offset to 0 731 - * (reset case) or to the zone size (finish case). This will abort all 732 - * BIOs plugged for the target zone. It is fine as resetting or 733 - * finishing zones while writes are still in-flight will result in the 707 + * If we have a zone write plug, set its write pointer offset to 0. 708 + * This will abort all BIOs plugged for the target zone. It is fine as 709 + * resetting zones while writes are still in-flight will result in the 734 710 * writes failing anyway. 735 711 */ 736 - zwplug = disk_get_zone_wplug(disk, sector); 712 + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); 737 713 if (zwplug) { 714 + unsigned long flags; 715 + 738 716 spin_lock_irqsave(&zwplug->lock, flags); 739 - disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); 717 + disk_zone_wplug_set_wp_offset(disk, zwplug, 0); 740 718 spin_unlock_irqrestore(&zwplug->lock, flags); 741 719 disk_put_zone_wplug(zwplug); 742 720 } 743 - 744 - return false; 745 721 } 746 722 747 - static bool blk_zone_wplug_handle_reset_all(struct bio *bio) 723 + static void blk_zone_reset_all_bio_endio(struct bio *bio) 748 724 { 749 725 struct gendisk *disk = bio->bi_bdev->bd_disk; 750 726 struct blk_zone_wplug *zwplug; 751 727 unsigned long flags; 752 - sector_t sector; 728 + unsigned int i; 753 729 754 - /* 755 - * Set the write pointer offset of all zone write plugs to 0. This will 756 - * abort all plugged BIOs. It is fine as resetting zones while writes 757 - * are still in-flight will result in the writes failing anyway. 758 - */ 759 - for (sector = 0; sector < get_capacity(disk); 760 - sector += disk->queue->limits.chunk_sectors) { 761 - zwplug = disk_get_zone_wplug(disk, sector); 762 - if (zwplug) { 730 + /* Update the condition of all zone write plugs. */ 731 + rcu_read_lock(); 732 + for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { 733 + hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], 734 + node) { 763 735 spin_lock_irqsave(&zwplug->lock, flags); 764 736 disk_zone_wplug_set_wp_offset(disk, zwplug, 0); 765 737 spin_unlock_irqrestore(&zwplug->lock, flags); 766 - disk_put_zone_wplug(zwplug); 767 738 } 768 739 } 740 + rcu_read_unlock(); 741 + } 769 742 770 - return false; 743 + static void blk_zone_finish_bio_endio(struct bio *bio) 744 + { 745 + struct block_device *bdev = bio->bi_bdev; 746 + struct gendisk *disk = bdev->bd_disk; 747 + struct blk_zone_wplug *zwplug; 748 + 749 + /* 750 + * If we have a zone write plug, set its write pointer offset to the 751 + * zone size. This will abort all BIOs plugged for the target zone. It 752 + * is fine as resetting zones while writes are still in-flight will 753 + * result in the writes failing anyway. 754 + */ 755 + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); 756 + if (zwplug) { 757 + unsigned long flags; 758 + 759 + spin_lock_irqsave(&zwplug->lock, flags); 760 + disk_zone_wplug_set_wp_offset(disk, zwplug, 761 + bdev_zone_sectors(bdev)); 762 + spin_unlock_irqrestore(&zwplug->lock, flags); 763 + disk_put_zone_wplug(zwplug); 764 + } 765 + } 766 + 767 + void blk_zone_mgmt_bio_endio(struct bio *bio) 768 + { 769 + /* If the BIO failed, we have nothing to do. */ 770 + if (bio->bi_status != BLK_STS_OK) 771 + return; 772 + 773 + switch (bio_op(bio)) { 774 + case REQ_OP_ZONE_RESET: 775 + blk_zone_reset_bio_endio(bio); 776 + return; 777 + case REQ_OP_ZONE_RESET_ALL: 778 + blk_zone_reset_all_bio_endio(bio); 779 + return; 780 + case REQ_OP_ZONE_FINISH: 781 + blk_zone_finish_bio_endio(bio); 782 + return; 783 + default: 784 + return; 785 + } 771 786 } 772 787 773 788 static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, ··· 1131 1106 disk_put_zone_wplug(zwplug); 1132 1107 } 1133 1108 1109 + static bool blk_zone_wplug_handle_zone_mgmt(struct bio *bio) 1110 + { 1111 + if (bio_op(bio) != REQ_OP_ZONE_RESET_ALL && 1112 + !bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { 1113 + /* 1114 + * Zone reset and zone finish operations do not apply to 1115 + * conventional zones. 1116 + */ 1117 + bio_io_error(bio); 1118 + return true; 1119 + } 1120 + 1121 + /* 1122 + * No-wait zone management BIOs do not make much sense as the callers 1123 + * issue these as blocking operations in most cases. To avoid issues 1124 + * with the BIO execution potentially failing with BLK_STS_AGAIN, warn 1125 + * about REQ_NOWAIT being set and ignore that flag. 1126 + */ 1127 + if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) 1128 + bio->bi_opf &= ~REQ_NOWAIT; 1129 + 1130 + return false; 1131 + } 1132 + 1134 1133 /** 1135 1134 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging 1136 1135 * @bio: The BIO being submitted ··· 1202 1153 case REQ_OP_WRITE_ZEROES: 1203 1154 return blk_zone_wplug_handle_write(bio, nr_segs); 1204 1155 case REQ_OP_ZONE_RESET: 1205 - return blk_zone_wplug_handle_reset_or_finish(bio, 0); 1206 1156 case REQ_OP_ZONE_FINISH: 1207 - return blk_zone_wplug_handle_reset_or_finish(bio, 1208 - bdev_zone_sectors(bdev)); 1209 1157 case REQ_OP_ZONE_RESET_ALL: 1210 - return blk_zone_wplug_handle_reset_all(bio); 1158 + return blk_zone_wplug_handle_zone_mgmt(bio); 1211 1159 default: 1212 1160 return false; 1213 1161 } ··· 1376 1330 put_zwplug: 1377 1331 /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ 1378 1332 disk_put_zone_wplug(zwplug); 1379 - } 1380 - 1381 - static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) 1382 - { 1383 - return 1U << disk->zone_wplugs_hash_bits; 1384 1333 } 1385 1334 1386 1335 void disk_init_zone_resources(struct gendisk *disk)
+14
block/blk.h
··· 489 489 void blk_zone_write_plug_bio_merged(struct bio *bio); 490 490 void blk_zone_write_plug_init_request(struct request *rq); 491 491 void blk_zone_append_update_request_bio(struct request *rq, struct bio *bio); 492 + void blk_zone_mgmt_bio_endio(struct bio *bio); 492 493 void blk_zone_write_plug_bio_endio(struct bio *bio); 493 494 static inline void blk_zone_bio_endio(struct bio *bio) 494 495 { 496 + /* 497 + * Zone management BIOs may impact zone write plugs (e.g. a zone reset 498 + * changes a zone write plug zone write pointer offset), but these 499 + * operation do not go through zone write plugging as they may operate 500 + * on zones that do not have a zone write 501 + * plug. blk_zone_mgmt_bio_endio() handles the potential changes to zone 502 + * write plugs that are present. 503 + */ 504 + if (op_is_zone_mgmt(bio_op(bio))) { 505 + blk_zone_mgmt_bio_endio(bio); 506 + return; 507 + } 508 + 495 509 /* 496 510 * For write BIOs to zoned devices, signal the completion of the BIO so 497 511 * that the next write BIO can be submitted by zone write plugging.