Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'cached-zones' into for-6.19/block

This patch series implements a cached report zones using information
from the block layer zone write plugs and a new zone condition tracking.
This avoids having to execute slow report zones commands on the device
when for instance mounting file systems, which can significantly speed
things up, especially in setups with multiple SMR HDDs (e.g. a BTRFS
RAID volume).

The first patch improves handling of zone management commands. Patch 2
fixes zone resource updates and the following 3 patches cleanup the zone
code in preparation for introducing cached zone report support.
From patch 6 to 13, cached report zones is implemented and made
available to users with a new ioctl() command.

Finally, patches 14 and 15 introduce the use of cached report zones in
the mount operation of XFS and BTRFS.

Link: https://lore.kernel.org/linux-block/20251104212249.1075412-1-dlemoal@kernel.org/
Signed-off-by: Jens Axboe <axboe@kernel.dk>

* cached-zones:
xfs: use blkdev_report_zones_cached()
btrfs: use blkdev_report_zones_cached()
block: add zone write plug condition to debugfs zone_wplugs
block: improve zone_wplugs debugfs attribute output
block: introduce BLKREPORTZONESV2 ioctl
block: introduce blkdev_report_zones_cached()
block: introduce blkdev_get_zone_info()
block: refactor blkdev_report_zones() code
block: track zone conditions
block: use zone condition to determine conventional zones
block: reorganize struct blk_zone_wplug
block: introduce disk_report_zone()
block: cleanup blkdev_report_zones()
block: freeze queue when updating zone resources
block: handle zone management operations completions

+792 -284
+635 -179
block/blk-zoned.c
··· 33 33 ZONE_COND_NAME(READONLY), 34 34 ZONE_COND_NAME(FULL), 35 35 ZONE_COND_NAME(OFFLINE), 36 + ZONE_COND_NAME(ACTIVE), 36 37 }; 37 38 #undef ZONE_COND_NAME 38 39 39 40 /* 40 41 * Per-zone write plug. 41 42 * @node: hlist_node structure for managing the plug using a hash table. 43 + * @bio_list: The list of BIOs that are currently plugged. 44 + * @bio_work: Work struct to handle issuing of plugged BIOs 45 + * @rcu_head: RCU head to free zone write plugs with an RCU grace period. 46 + * @disk: The gendisk the plug belongs to. 47 + * @lock: Spinlock to atomically manipulate the plug. 42 48 * @ref: Zone write plug reference counter. A zone write plug reference is 43 49 * always at least 1 when the plug is hashed in the disk plug hash table. 44 50 * The reference is incremented whenever a new BIO needing plugging is ··· 54 48 * reference is dropped whenever the zone of the zone write plug is reset, 55 49 * finished and when the zone becomes full (last write BIO to the zone 56 50 * completes). 57 - * @lock: Spinlock to atomically manipulate the plug. 58 51 * @flags: Flags indicating the plug state. 59 52 * @zone_no: The number of the zone the plug is managing. 60 53 * @wp_offset: The zone write pointer location relative to the start of the zone 61 54 * as a number of 512B sectors. 62 - * @bio_list: The list of BIOs that are currently plugged. 63 - * @bio_work: Work struct to handle issuing of plugged BIOs 64 - * @rcu_head: RCU head to free zone write plugs with an RCU grace period. 65 - * @disk: The gendisk the plug belongs to. 55 + * @cond: Condition of the zone 66 56 */ 67 57 struct blk_zone_wplug { 68 58 struct hlist_node node; 69 - refcount_t ref; 70 - spinlock_t lock; 71 - unsigned int flags; 72 - unsigned int zone_no; 73 - unsigned int wp_offset; 74 59 struct bio_list bio_list; 75 60 struct work_struct bio_work; 76 61 struct rcu_head rcu_head; 77 62 struct gendisk *disk; 63 + spinlock_t lock; 64 + refcount_t ref; 65 + unsigned int flags; 66 + unsigned int zone_no; 67 + unsigned int wp_offset; 68 + enum blk_zone_cond cond; 78 69 }; 70 + 71 + static inline bool disk_need_zone_resources(struct gendisk *disk) 72 + { 73 + /* 74 + * All request-based zoned devices need zone resources so that the 75 + * block layer can automatically handle write BIO plugging. BIO-based 76 + * device drivers (e.g. DM devices) are normally responsible for 77 + * handling zone write ordering and do not need zone resources, unless 78 + * the driver requires zone append emulation. 79 + */ 80 + return queue_is_mq(disk->queue) || 81 + queue_emulates_zone_append(disk->queue); 82 + } 83 + 84 + static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) 85 + { 86 + return 1U << disk->zone_wplugs_hash_bits; 87 + } 79 88 80 89 /* 81 90 * Zone write plug flags bits: ··· 130 109 } 131 110 EXPORT_SYMBOL_GPL(blk_zone_cond_str); 132 111 133 - struct disk_report_zones_cb_args { 134 - struct gendisk *disk; 135 - report_zones_cb user_cb; 136 - void *user_data; 112 + static void blk_zone_set_cond(u8 *zones_cond, unsigned int zno, 113 + enum blk_zone_cond cond) 114 + { 115 + if (!zones_cond) 116 + return; 117 + 118 + switch (cond) { 119 + case BLK_ZONE_COND_IMP_OPEN: 120 + case BLK_ZONE_COND_EXP_OPEN: 121 + case BLK_ZONE_COND_CLOSED: 122 + zones_cond[zno] = BLK_ZONE_COND_ACTIVE; 123 + return; 124 + case BLK_ZONE_COND_NOT_WP: 125 + case BLK_ZONE_COND_EMPTY: 126 + case BLK_ZONE_COND_FULL: 127 + case BLK_ZONE_COND_OFFLINE: 128 + case BLK_ZONE_COND_READONLY: 129 + default: 130 + zones_cond[zno] = cond; 131 + return; 132 + } 133 + } 134 + 135 + static void disk_zone_set_cond(struct gendisk *disk, sector_t sector, 136 + enum blk_zone_cond cond) 137 + { 138 + u8 *zones_cond; 139 + 140 + rcu_read_lock(); 141 + zones_cond = rcu_dereference(disk->zones_cond); 142 + if (zones_cond) { 143 + unsigned int zno = disk_zone_no(disk, sector); 144 + 145 + /* 146 + * The condition of a conventional, readonly and offline zones 147 + * never changes, so do nothing if the target zone is in one of 148 + * these conditions. 149 + */ 150 + switch (zones_cond[zno]) { 151 + case BLK_ZONE_COND_NOT_WP: 152 + case BLK_ZONE_COND_READONLY: 153 + case BLK_ZONE_COND_OFFLINE: 154 + break; 155 + default: 156 + blk_zone_set_cond(zones_cond, zno, cond); 157 + break; 158 + } 159 + } 160 + rcu_read_unlock(); 161 + } 162 + 163 + /** 164 + * bdev_zone_is_seq - check if a sector belongs to a sequential write zone 165 + * @bdev: block device to check 166 + * @sector: sector number 167 + * 168 + * Check if @sector on @bdev is contained in a sequential write required zone. 169 + */ 170 + bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) 171 + { 172 + struct gendisk *disk = bdev->bd_disk; 173 + unsigned int zno = disk_zone_no(disk, sector); 174 + bool is_seq = false; 175 + u8 *zones_cond; 176 + 177 + if (!bdev_is_zoned(bdev)) 178 + return false; 179 + 180 + rcu_read_lock(); 181 + zones_cond = rcu_dereference(disk->zones_cond); 182 + if (zones_cond && zno < disk->nr_zones) 183 + is_seq = zones_cond[zno] != BLK_ZONE_COND_NOT_WP; 184 + rcu_read_unlock(); 185 + 186 + return is_seq; 187 + } 188 + EXPORT_SYMBOL_GPL(bdev_zone_is_seq); 189 + 190 + /* 191 + * Zone report arguments for block device drivers report_zones operation. 192 + * @cb: report_zones_cb callback for each reported zone. 193 + * @data: Private data passed to report_zones_cb. 194 + */ 195 + struct blk_report_zones_args { 196 + report_zones_cb cb; 197 + void *data; 198 + bool report_active; 137 199 }; 138 200 139 - static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, 140 - struct blk_zone *zone); 141 - 142 - static int disk_report_zones_cb(struct blk_zone *zone, unsigned int idx, 143 - void *data) 201 + static int blkdev_do_report_zones(struct block_device *bdev, sector_t sector, 202 + unsigned int nr_zones, 203 + struct blk_report_zones_args *args) 144 204 { 145 - struct disk_report_zones_cb_args *args = data; 146 - struct gendisk *disk = args->disk; 205 + struct gendisk *disk = bdev->bd_disk; 147 206 148 - if (disk->zone_wplugs_hash) 149 - disk_zone_wplug_sync_wp_offset(disk, zone); 207 + if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) 208 + return -EOPNOTSUPP; 150 209 151 - if (!args->user_cb) 210 + if (!nr_zones || sector >= get_capacity(disk)) 152 211 return 0; 153 212 154 - return args->user_cb(zone, idx, args->user_data); 213 + return disk->fops->report_zones(disk, sector, nr_zones, args); 155 214 } 156 215 157 216 /** ··· 256 155 int blkdev_report_zones(struct block_device *bdev, sector_t sector, 257 156 unsigned int nr_zones, report_zones_cb cb, void *data) 258 157 { 259 - struct gendisk *disk = bdev->bd_disk; 260 - sector_t capacity = get_capacity(disk); 261 - struct disk_report_zones_cb_args args = { 262 - .disk = disk, 263 - .user_cb = cb, 264 - .user_data = data, 158 + struct blk_report_zones_args args = { 159 + .cb = cb, 160 + .data = data, 265 161 }; 266 162 267 - if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) 268 - return -EOPNOTSUPP; 269 - 270 - if (!nr_zones || sector >= capacity) 271 - return 0; 272 - 273 - return disk->fops->report_zones(disk, sector, nr_zones, 274 - disk_report_zones_cb, &args); 163 + return blkdev_do_report_zones(bdev, sector, nr_zones, &args); 275 164 } 276 165 EXPORT_SYMBOL_GPL(blkdev_report_zones); 277 166 ··· 357 266 } 358 267 359 268 /* 360 - * BLKREPORTZONE ioctl processing. 269 + * Mask of valid input flags for BLKREPORTZONEV2 ioctl. 270 + */ 271 + #define BLK_ZONE_REPV2_INPUT_FLAGS BLK_ZONE_REP_CACHED 272 + 273 + /* 274 + * BLKREPORTZONE and BLKREPORTZONEV2 ioctl processing. 361 275 * Called from blkdev_ioctl. 362 276 */ 363 277 int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, ··· 386 290 return -EINVAL; 387 291 388 292 args.zones = argp + sizeof(struct blk_zone_report); 389 - ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, 390 - blkdev_copy_zone_to_user, &args); 293 + 294 + switch (cmd) { 295 + case BLKREPORTZONE: 296 + ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, 297 + blkdev_copy_zone_to_user, &args); 298 + break; 299 + case BLKREPORTZONEV2: 300 + if (rep.flags & ~BLK_ZONE_REPV2_INPUT_FLAGS) 301 + return -EINVAL; 302 + ret = blkdev_report_zones_cached(bdev, rep.sector, rep.nr_zones, 303 + blkdev_copy_zone_to_user, &args); 304 + break; 305 + default: 306 + return -EINVAL; 307 + } 308 + 391 309 if (ret < 0) 392 310 return ret; 393 311 ··· 511 401 { 512 402 struct blk_zone_wplug *zwplg; 513 403 unsigned long flags; 404 + u8 *zones_cond; 514 405 unsigned int idx = 515 406 hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits); 516 407 ··· 527 416 return false; 528 417 } 529 418 } 419 + 420 + /* 421 + * Set the zone condition: if we do not yet have a zones_cond array 422 + * attached to the disk, then this is a zone write plug insert from the 423 + * first call to blk_revalidate_disk_zones(), in which case the zone is 424 + * necessarilly in the active condition. 425 + */ 426 + zones_cond = rcu_dereference_check(disk->zones_cond, 427 + lockdep_is_held(&disk->zone_wplugs_lock)); 428 + if (zones_cond) 429 + zwplug->cond = zones_cond[zwplug->zone_no]; 430 + else 431 + zwplug->cond = BLK_ZONE_COND_ACTIVE; 432 + 530 433 hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); 531 434 atomic_inc(&disk->nr_zone_wplugs); 532 435 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); ··· 640 515 641 516 /* 642 517 * Mark the zone write plug as unhashed and drop the extra reference we 643 - * took when the plug was inserted in the hash table. 518 + * took when the plug was inserted in the hash table. Also update the 519 + * disk zone condition array with the current condition of the zone 520 + * write plug. 644 521 */ 645 522 zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; 646 523 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); 524 + blk_zone_set_cond(rcu_dereference_check(disk->zones_cond, 525 + lockdep_is_held(&disk->zone_wplugs_lock)), 526 + zwplug->zone_no, zwplug->cond); 647 527 hlist_del_init_rcu(&zwplug->node); 648 528 atomic_dec(&disk->nr_zone_wplugs); 649 529 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); ··· 751 621 } 752 622 753 623 /* 624 + * Update a zone write plug condition based on the write pointer offset. 625 + */ 626 + static void disk_zone_wplug_update_cond(struct gendisk *disk, 627 + struct blk_zone_wplug *zwplug) 628 + { 629 + lockdep_assert_held(&zwplug->lock); 630 + 631 + if (disk_zone_wplug_is_full(disk, zwplug)) 632 + zwplug->cond = BLK_ZONE_COND_FULL; 633 + else if (!zwplug->wp_offset) 634 + zwplug->cond = BLK_ZONE_COND_EMPTY; 635 + else 636 + zwplug->cond = BLK_ZONE_COND_ACTIVE; 637 + } 638 + 639 + /* 754 640 * Set a zone write plug write pointer offset to the specified value. 755 641 * This aborts all plugged BIOs, which is fine as this function is called for 756 642 * a zone reset operation, a zone finish operation or if the zone needs a wp ··· 781 635 /* Update the zone write pointer and abort all plugged BIOs. */ 782 636 zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE; 783 637 zwplug->wp_offset = wp_offset; 638 + disk_zone_wplug_update_cond(disk, zwplug); 639 + 784 640 disk_zone_wplug_abort(zwplug); 785 641 786 642 /* ··· 836 688 disk_put_zone_wplug(zwplug); 837 689 } 838 690 839 - static int disk_zone_sync_wp_offset(struct gendisk *disk, sector_t sector) 691 + /** 692 + * disk_report_zone - Report one zone 693 + * @disk: Target disk 694 + * @zone: The zone to report 695 + * @idx: The index of the zone in the overall zone report 696 + * @args: report zones callback and data 697 + * 698 + * Description: 699 + * Helper function for block device drivers to report one zone of a zone 700 + * report initiated with blkdev_report_zones(). The zone being reported is 701 + * specified by @zone and used to update, if necessary, the zone write plug 702 + * information for the zone. If @args specifies a user callback function, 703 + * this callback is executed. 704 + */ 705 + int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, 706 + unsigned int idx, struct blk_report_zones_args *args) 840 707 { 841 - struct disk_report_zones_cb_args args = { 842 - .disk = disk, 843 - }; 708 + if (args->report_active) { 709 + /* 710 + * If we come here, then this is a report zones as a fallback 711 + * for a cached report. So collapse the implicit open, explicit 712 + * open and closed conditions into the active zone condition. 713 + */ 714 + switch (zone->cond) { 715 + case BLK_ZONE_COND_IMP_OPEN: 716 + case BLK_ZONE_COND_EXP_OPEN: 717 + case BLK_ZONE_COND_CLOSED: 718 + zone->cond = BLK_ZONE_COND_ACTIVE; 719 + break; 720 + default: 721 + break; 722 + } 723 + } 844 724 845 - return disk->fops->report_zones(disk, sector, 1, 846 - disk_report_zones_cb, &args); 725 + if (disk->zone_wplugs_hash) 726 + disk_zone_wplug_sync_wp_offset(disk, zone); 727 + 728 + if (args && args->cb) 729 + return args->cb(zone, idx, args->data); 730 + 731 + return 0; 732 + } 733 + EXPORT_SYMBOL_GPL(disk_report_zone); 734 + 735 + static int blkdev_report_zone_cb(struct blk_zone *zone, unsigned int idx, 736 + void *data) 737 + { 738 + memcpy(data, zone, sizeof(struct blk_zone)); 739 + return 0; 847 740 } 848 741 849 - static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, 850 - unsigned int wp_offset) 742 + static int blkdev_report_zone_fallback(struct block_device *bdev, 743 + sector_t sector, struct blk_zone *zone) 744 + { 745 + struct blk_report_zones_args args = { 746 + .cb = blkdev_report_zone_cb, 747 + .data = zone, 748 + .report_active = true, 749 + }; 750 + 751 + return blkdev_do_report_zones(bdev, sector, 1, &args); 752 + } 753 + 754 + /** 755 + * blkdev_get_zone_info - Get a single zone information from cached data 756 + * @bdev: Target block device 757 + * @sector: Sector contained by the target zone 758 + * @zone: zone structure to return the zone information 759 + * 760 + * Description: 761 + * Get the zone information for the zone containing @sector using the zone 762 + * write plug of the target zone, if one exist, or the disk zone condition 763 + * array otherwise. The zone condition may be reported as being 764 + * the BLK_ZONE_COND_ACTIVE condition for a zone that is in the implicit 765 + * open, explicit open or closed condition. 766 + * 767 + * Returns 0 on success and a negative error code on failure. 768 + */ 769 + int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, 770 + struct blk_zone *zone) 771 + { 772 + struct gendisk *disk = bdev->bd_disk; 773 + sector_t zone_sectors = bdev_zone_sectors(bdev); 774 + struct blk_zone_wplug *zwplug; 775 + unsigned long flags; 776 + u8 *zones_cond; 777 + 778 + if (!bdev_is_zoned(bdev)) 779 + return -EOPNOTSUPP; 780 + 781 + if (sector >= get_capacity(disk)) 782 + return -EINVAL; 783 + 784 + memset(zone, 0, sizeof(*zone)); 785 + sector = ALIGN_DOWN(sector, zone_sectors); 786 + 787 + rcu_read_lock(); 788 + zones_cond = rcu_dereference(disk->zones_cond); 789 + if (!disk->zone_wplugs_hash || !zones_cond) { 790 + rcu_read_unlock(); 791 + return blkdev_report_zone_fallback(bdev, sector, zone); 792 + } 793 + zone->cond = zones_cond[disk_zone_no(disk, sector)]; 794 + rcu_read_unlock(); 795 + 796 + zone->start = sector; 797 + zone->len = zone_sectors; 798 + 799 + /* 800 + * If this is a conventional zone, we do not have a zone write plug and 801 + * can report the zone immediately. 802 + */ 803 + if (zone->cond == BLK_ZONE_COND_NOT_WP) { 804 + zone->type = BLK_ZONE_TYPE_CONVENTIONAL; 805 + zone->capacity = zone_sectors; 806 + zone->wp = ULLONG_MAX; 807 + return 0; 808 + } 809 + 810 + /* 811 + * This is a sequential write required zone. If the zone is read-only or 812 + * offline, only set the zone write pointer to an invalid value and 813 + * report the zone. 814 + */ 815 + zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; 816 + if (disk_zone_is_last(disk, zone)) 817 + zone->capacity = disk->last_zone_capacity; 818 + else 819 + zone->capacity = disk->zone_capacity; 820 + 821 + if (zone->cond == BLK_ZONE_COND_READONLY || 822 + zone->cond == BLK_ZONE_COND_OFFLINE) { 823 + zone->wp = ULLONG_MAX; 824 + return 0; 825 + } 826 + 827 + /* 828 + * If the zone does not have a zone write plug, it is either full or 829 + * empty, as we otherwise would have a zone write plug for it. In this 830 + * case, set the write pointer accordingly and report the zone. 831 + * Otherwise, if we have a zone write plug, use it. 832 + */ 833 + zwplug = disk_get_zone_wplug(disk, sector); 834 + if (!zwplug) { 835 + if (zone->cond == BLK_ZONE_COND_FULL) 836 + zone->wp = ULLONG_MAX; 837 + else 838 + zone->wp = sector; 839 + return 0; 840 + } 841 + 842 + spin_lock_irqsave(&zwplug->lock, flags); 843 + if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) { 844 + spin_unlock_irqrestore(&zwplug->lock, flags); 845 + disk_put_zone_wplug(zwplug); 846 + return blkdev_report_zone_fallback(bdev, sector, zone); 847 + } 848 + zone->cond = zwplug->cond; 849 + zone->wp = sector + zwplug->wp_offset; 850 + spin_unlock_irqrestore(&zwplug->lock, flags); 851 + 852 + disk_put_zone_wplug(zwplug); 853 + 854 + return 0; 855 + } 856 + EXPORT_SYMBOL_GPL(blkdev_get_zone_info); 857 + 858 + /** 859 + * blkdev_report_zones_cached - Get cached zones information 860 + * @bdev: Target block device 861 + * @sector: Sector from which to report zones 862 + * @nr_zones: Maximum number of zones to report 863 + * @cb: Callback function called for each reported zone 864 + * @data: Private data for the callback function 865 + * 866 + * Description: 867 + * Similar to blkdev_report_zones() but instead of calling into the low level 868 + * device driver to get the zone report from the device, use 869 + * blkdev_get_zone_info() to generate the report from the disk zone write 870 + * plugs and zones condition array. Since calling this function without a 871 + * callback does not make sense, @cb must be specified. 872 + */ 873 + int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector, 874 + unsigned int nr_zones, report_zones_cb cb, void *data) 875 + { 876 + struct gendisk *disk = bdev->bd_disk; 877 + sector_t capacity = get_capacity(disk); 878 + sector_t zone_sectors = bdev_zone_sectors(bdev); 879 + unsigned int idx = 0; 880 + struct blk_zone zone; 881 + int ret; 882 + 883 + if (!cb || !bdev_is_zoned(bdev) || 884 + WARN_ON_ONCE(!disk->fops->report_zones)) 885 + return -EOPNOTSUPP; 886 + 887 + if (!nr_zones || sector >= capacity) 888 + return 0; 889 + 890 + /* 891 + * If we do not have any zone write plug resources, fallback to using 892 + * the regular zone report. 893 + */ 894 + if (!disk_need_zone_resources(disk)) { 895 + struct blk_report_zones_args args = { 896 + .cb = cb, 897 + .data = data, 898 + .report_active = true, 899 + }; 900 + 901 + return blkdev_do_report_zones(bdev, sector, nr_zones, &args); 902 + } 903 + 904 + for (sector = ALIGN_DOWN(sector, zone_sectors); 905 + sector < capacity && idx < nr_zones; 906 + sector += zone_sectors, idx++) { 907 + ret = blkdev_get_zone_info(bdev, sector, &zone); 908 + if (ret) 909 + return ret; 910 + 911 + ret = cb(&zone, idx, data); 912 + if (ret) 913 + return ret; 914 + } 915 + 916 + return idx; 917 + } 918 + EXPORT_SYMBOL_GPL(blkdev_report_zones_cached); 919 + 920 + static void blk_zone_reset_bio_endio(struct bio *bio) 851 921 { 852 922 struct gendisk *disk = bio->bi_bdev->bd_disk; 853 923 sector_t sector = bio->bi_iter.bi_sector; 854 924 struct blk_zone_wplug *zwplug; 855 - unsigned long flags; 856 - 857 - /* Conventional zones cannot be reset nor finished. */ 858 - if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { 859 - bio_io_error(bio); 860 - return true; 861 - } 862 925 863 926 /* 864 - * No-wait reset or finish BIOs do not make much sense as the callers 865 - * issue these as blocking operations in most cases. To avoid issues 866 - * the BIO execution potentially failing with BLK_STS_AGAIN, warn about 867 - * REQ_NOWAIT being set and ignore that flag. 868 - */ 869 - if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) 870 - bio->bi_opf &= ~REQ_NOWAIT; 871 - 872 - /* 873 - * If we have a zone write plug, set its write pointer offset to 0 874 - * (reset case) or to the zone size (finish case). This will abort all 875 - * BIOs plugged for the target zone. It is fine as resetting or 876 - * finishing zones while writes are still in-flight will result in the 927 + * If we have a zone write plug, set its write pointer offset to 0. 928 + * This will abort all BIOs plugged for the target zone. It is fine as 929 + * resetting zones while writes are still in-flight will result in the 877 930 * writes failing anyway. 878 931 */ 879 932 zwplug = disk_get_zone_wplug(disk, sector); 880 933 if (zwplug) { 934 + unsigned long flags; 935 + 881 936 spin_lock_irqsave(&zwplug->lock, flags); 882 - disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); 937 + disk_zone_wplug_set_wp_offset(disk, zwplug, 0); 883 938 spin_unlock_irqrestore(&zwplug->lock, flags); 884 939 disk_put_zone_wplug(zwplug); 940 + } else { 941 + disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY); 885 942 } 886 - 887 - return false; 888 943 } 889 944 890 - static bool blk_zone_wplug_handle_reset_all(struct bio *bio) 945 + static void blk_zone_reset_all_bio_endio(struct bio *bio) 891 946 { 892 947 struct gendisk *disk = bio->bi_bdev->bd_disk; 948 + sector_t capacity = get_capacity(disk); 893 949 struct blk_zone_wplug *zwplug; 894 950 unsigned long flags; 895 951 sector_t sector; 952 + unsigned int i; 896 953 897 - /* 898 - * Set the write pointer offset of all zone write plugs to 0. This will 899 - * abort all plugged BIOs. It is fine as resetting zones while writes 900 - * are still in-flight will result in the writes failing anyway. 901 - */ 902 - for (sector = 0; sector < get_capacity(disk); 903 - sector += disk->queue->limits.chunk_sectors) { 904 - zwplug = disk_get_zone_wplug(disk, sector); 905 - if (zwplug) { 954 + /* Update the condition of all zone write plugs. */ 955 + rcu_read_lock(); 956 + for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { 957 + hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], 958 + node) { 906 959 spin_lock_irqsave(&zwplug->lock, flags); 907 960 disk_zone_wplug_set_wp_offset(disk, zwplug, 0); 908 961 spin_unlock_irqrestore(&zwplug->lock, flags); 909 - disk_put_zone_wplug(zwplug); 910 962 } 911 963 } 964 + rcu_read_unlock(); 912 965 913 - return false; 966 + /* Update the cached zone conditions. */ 967 + for (sector = 0; sector < capacity; 968 + sector += bdev_zone_sectors(bio->bi_bdev)) 969 + disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY); 970 + } 971 + 972 + static void blk_zone_finish_bio_endio(struct bio *bio) 973 + { 974 + struct block_device *bdev = bio->bi_bdev; 975 + struct gendisk *disk = bdev->bd_disk; 976 + sector_t sector = bio->bi_iter.bi_sector; 977 + struct blk_zone_wplug *zwplug; 978 + 979 + /* 980 + * If we have a zone write plug, set its write pointer offset to the 981 + * zone size. This will abort all BIOs plugged for the target zone. It 982 + * is fine as resetting zones while writes are still in-flight will 983 + * result in the writes failing anyway. 984 + */ 985 + zwplug = disk_get_zone_wplug(disk, sector); 986 + if (zwplug) { 987 + unsigned long flags; 988 + 989 + spin_lock_irqsave(&zwplug->lock, flags); 990 + disk_zone_wplug_set_wp_offset(disk, zwplug, 991 + bdev_zone_sectors(bdev)); 992 + spin_unlock_irqrestore(&zwplug->lock, flags); 993 + disk_put_zone_wplug(zwplug); 994 + } else { 995 + disk_zone_set_cond(disk, sector, BLK_ZONE_COND_FULL); 996 + } 997 + } 998 + 999 + void blk_zone_mgmt_bio_endio(struct bio *bio) 1000 + { 1001 + /* If the BIO failed, we have nothing to do. */ 1002 + if (bio->bi_status != BLK_STS_OK) 1003 + return; 1004 + 1005 + switch (bio_op(bio)) { 1006 + case REQ_OP_ZONE_RESET: 1007 + blk_zone_reset_bio_endio(bio); 1008 + return; 1009 + case REQ_OP_ZONE_RESET_ALL: 1010 + blk_zone_reset_all_bio_endio(bio); 1011 + return; 1012 + case REQ_OP_ZONE_FINISH: 1013 + blk_zone_finish_bio_endio(bio); 1014 + return; 1015 + default: 1016 + return; 1017 + } 914 1018 } 915 1019 916 1020 static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, ··· 1236 836 */ 1237 837 void blk_zone_write_plug_bio_merged(struct bio *bio) 1238 838 { 839 + struct gendisk *disk = bio->bi_bdev->bd_disk; 1239 840 struct blk_zone_wplug *zwplug; 1240 841 unsigned long flags; 1241 842 ··· 1258 857 * have at least one request and one BIO referencing the zone write 1259 858 * plug. So this should not fail. 1260 859 */ 1261 - zwplug = disk_get_zone_wplug(bio->bi_bdev->bd_disk, 1262 - bio->bi_iter.bi_sector); 860 + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); 1263 861 if (WARN_ON_ONCE(!zwplug)) 1264 862 return; 1265 863 1266 864 spin_lock_irqsave(&zwplug->lock, flags); 1267 865 zwplug->wp_offset += bio_sectors(bio); 866 + disk_zone_wplug_update_cond(disk, zwplug); 1268 867 spin_unlock_irqrestore(&zwplug->lock, flags); 1269 868 } 1270 869 ··· 1323 922 /* Drop the reference taken by disk_zone_wplug_add_bio(). */ 1324 923 blk_queue_exit(q); 1325 924 zwplug->wp_offset += bio_sectors(bio); 925 + disk_zone_wplug_update_cond(disk, zwplug); 1326 926 1327 927 req_back_sector += bio_sectors(bio); 1328 928 } ··· 1387 985 1388 986 /* Advance the zone write pointer offset. */ 1389 987 zwplug->wp_offset += bio_sectors(bio); 988 + disk_zone_wplug_update_cond(disk, zwplug); 1390 989 1391 990 return true; 1392 991 } ··· 1509 1106 disk_put_zone_wplug(zwplug); 1510 1107 } 1511 1108 1109 + static bool blk_zone_wplug_handle_zone_mgmt(struct bio *bio) 1110 + { 1111 + if (bio_op(bio) != REQ_OP_ZONE_RESET_ALL && 1112 + !bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { 1113 + /* 1114 + * Zone reset and zone finish operations do not apply to 1115 + * conventional zones. 1116 + */ 1117 + bio_io_error(bio); 1118 + return true; 1119 + } 1120 + 1121 + /* 1122 + * No-wait zone management BIOs do not make much sense as the callers 1123 + * issue these as blocking operations in most cases. To avoid issues 1124 + * with the BIO execution potentially failing with BLK_STS_AGAIN, warn 1125 + * about REQ_NOWAIT being set and ignore that flag. 1126 + */ 1127 + if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) 1128 + bio->bi_opf &= ~REQ_NOWAIT; 1129 + 1130 + return false; 1131 + } 1132 + 1512 1133 /** 1513 1134 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging 1514 1135 * @bio: The BIO being submitted ··· 1580 1153 case REQ_OP_WRITE_ZEROES: 1581 1154 return blk_zone_wplug_handle_write(bio, nr_segs); 1582 1155 case REQ_OP_ZONE_RESET: 1583 - return blk_zone_wplug_handle_reset_or_finish(bio, 0); 1584 1156 case REQ_OP_ZONE_FINISH: 1585 - return blk_zone_wplug_handle_reset_or_finish(bio, 1586 - bdev_zone_sectors(bdev)); 1587 1157 case REQ_OP_ZONE_RESET_ALL: 1588 - return blk_zone_wplug_handle_reset_all(bio); 1158 + return blk_zone_wplug_handle_zone_mgmt(bio); 1589 1159 default: 1590 1160 return false; 1591 1161 } ··· 1756 1332 disk_put_zone_wplug(zwplug); 1757 1333 } 1758 1334 1759 - static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) 1760 - { 1761 - return 1U << disk->zone_wplugs_hash_bits; 1762 - } 1763 - 1764 1335 void disk_init_zone_resources(struct gendisk *disk) 1765 1336 { 1766 1337 spin_lock_init(&disk->zone_wplugs_lock); ··· 1836 1417 disk->zone_wplugs_hash_bits = 0; 1837 1418 } 1838 1419 1839 - static unsigned int disk_set_conv_zones_bitmap(struct gendisk *disk, 1840 - unsigned long *bitmap) 1420 + static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond) 1841 1421 { 1842 - unsigned int nr_conv_zones = 0; 1843 1422 unsigned long flags; 1844 1423 1845 1424 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); 1846 - if (bitmap) 1847 - nr_conv_zones = bitmap_weight(bitmap, disk->nr_zones); 1848 - bitmap = rcu_replace_pointer(disk->conv_zones_bitmap, bitmap, 1849 - lockdep_is_held(&disk->zone_wplugs_lock)); 1425 + zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond, 1426 + lockdep_is_held(&disk->zone_wplugs_lock)); 1850 1427 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); 1851 1428 1852 - kfree_rcu_mightsleep(bitmap); 1853 - 1854 - return nr_conv_zones; 1429 + kfree_rcu_mightsleep(zones_cond); 1855 1430 } 1856 1431 1857 1432 void disk_free_zone_resources(struct gendisk *disk) ··· 1869 1456 mempool_destroy(disk->zone_wplugs_pool); 1870 1457 disk->zone_wplugs_pool = NULL; 1871 1458 1872 - disk_set_conv_zones_bitmap(disk, NULL); 1459 + disk_set_zones_cond_array(disk, NULL); 1873 1460 disk->zone_capacity = 0; 1874 1461 disk->last_zone_capacity = 0; 1875 1462 disk->nr_zones = 0; 1876 1463 } 1877 1464 1878 - static inline bool disk_need_zone_resources(struct gendisk *disk) 1879 - { 1880 - /* 1881 - * All mq zoned devices need zone resources so that the block layer 1882 - * can automatically handle write BIO plugging. BIO-based device drivers 1883 - * (e.g. DM devices) are normally responsible for handling zone write 1884 - * ordering and do not need zone resources, unless the driver requires 1885 - * zone append emulation. 1886 - */ 1887 - return queue_is_mq(disk->queue) || 1888 - queue_emulates_zone_append(disk->queue); 1889 - } 1465 + struct blk_revalidate_zone_args { 1466 + struct gendisk *disk; 1467 + u8 *zones_cond; 1468 + unsigned int nr_zones; 1469 + unsigned int nr_conv_zones; 1470 + unsigned int zone_capacity; 1471 + unsigned int last_zone_capacity; 1472 + sector_t sector; 1473 + }; 1890 1474 1891 1475 static int disk_revalidate_zone_resources(struct gendisk *disk, 1892 - unsigned int nr_zones) 1476 + struct blk_revalidate_zone_args *args) 1893 1477 { 1894 1478 struct queue_limits *lim = &disk->queue->limits; 1895 1479 unsigned int pool_size; 1480 + 1481 + args->disk = disk; 1482 + args->nr_zones = 1483 + DIV_ROUND_UP_ULL(get_capacity(disk), lim->chunk_sectors); 1484 + 1485 + /* Cached zone conditions: 1 byte per zone */ 1486 + args->zones_cond = kzalloc(args->nr_zones, GFP_NOIO); 1487 + if (!args->zones_cond) 1488 + return -ENOMEM; 1896 1489 1897 1490 if (!disk_need_zone_resources(disk)) 1898 1491 return 0; ··· 1909 1490 */ 1910 1491 pool_size = max(lim->max_open_zones, lim->max_active_zones); 1911 1492 if (!pool_size) 1912 - pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_zones); 1493 + pool_size = 1494 + min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, args->nr_zones); 1913 1495 1914 1496 if (!disk->zone_wplugs_hash) 1915 1497 return disk_alloc_zone_resources(disk, pool_size); 1916 1498 1917 1499 return 0; 1918 1500 } 1919 - 1920 - struct blk_revalidate_zone_args { 1921 - struct gendisk *disk; 1922 - unsigned long *conv_zones_bitmap; 1923 - unsigned int nr_zones; 1924 - unsigned int zone_capacity; 1925 - unsigned int last_zone_capacity; 1926 - sector_t sector; 1927 - }; 1928 1501 1929 1502 /* 1930 1503 * Update the disk zone resources information and device queue limits. ··· 1926 1515 struct blk_revalidate_zone_args *args) 1927 1516 { 1928 1517 struct request_queue *q = disk->queue; 1929 - unsigned int nr_seq_zones, nr_conv_zones; 1930 - unsigned int pool_size; 1518 + unsigned int nr_seq_zones; 1519 + unsigned int pool_size, memflags; 1931 1520 struct queue_limits lim; 1932 - 1933 - disk->nr_zones = args->nr_zones; 1934 - disk->zone_capacity = args->zone_capacity; 1935 - disk->last_zone_capacity = args->last_zone_capacity; 1936 - nr_conv_zones = 1937 - disk_set_conv_zones_bitmap(disk, args->conv_zones_bitmap); 1938 - if (nr_conv_zones >= disk->nr_zones) { 1939 - pr_warn("%s: Invalid number of conventional zones %u / %u\n", 1940 - disk->disk_name, nr_conv_zones, disk->nr_zones); 1941 - return -ENODEV; 1942 - } 1521 + int ret = 0; 1943 1522 1944 1523 lim = queue_limits_start_update(q); 1945 1524 1525 + memflags = blk_mq_freeze_queue(q); 1526 + 1527 + disk->nr_zones = args->nr_zones; 1528 + if (args->nr_conv_zones >= disk->nr_zones) { 1529 + pr_warn("%s: Invalid number of conventional zones %u / %u\n", 1530 + disk->disk_name, args->nr_conv_zones, disk->nr_zones); 1531 + ret = -ENODEV; 1532 + goto unfreeze; 1533 + } 1534 + 1535 + disk->zone_capacity = args->zone_capacity; 1536 + disk->last_zone_capacity = args->last_zone_capacity; 1537 + disk_set_zones_cond_array(disk, args->zones_cond); 1538 + 1946 1539 /* 1947 - * Some devices can advertize zone resource limits that are larger than 1540 + * Some devices can advertise zone resource limits that are larger than 1948 1541 * the number of sequential zones of the zoned block device, e.g. a 1949 1542 * small ZNS namespace. For such case, assume that the zoned device has 1950 1543 * no zone resource limits. 1951 1544 */ 1952 - nr_seq_zones = disk->nr_zones - nr_conv_zones; 1545 + nr_seq_zones = disk->nr_zones - args->nr_conv_zones; 1953 1546 if (lim.max_open_zones >= nr_seq_zones) 1954 1547 lim.max_open_zones = 0; 1955 1548 if (lim.max_active_zones >= nr_seq_zones) ··· 1983 1568 } 1984 1569 1985 1570 commit: 1986 - return queue_limits_commit_update_frozen(q, &lim); 1571 + ret = queue_limits_commit_update(q, &lim); 1572 + 1573 + unfreeze: 1574 + if (ret) 1575 + disk_free_zone_resources(disk); 1576 + 1577 + blk_mq_unfreeze_queue(q, memflags); 1578 + 1579 + return ret; 1580 + } 1581 + 1582 + static int blk_revalidate_zone_cond(struct blk_zone *zone, unsigned int idx, 1583 + struct blk_revalidate_zone_args *args) 1584 + { 1585 + enum blk_zone_cond cond = zone->cond; 1586 + 1587 + /* Check that the zone condition is consistent with the zone type. */ 1588 + switch (cond) { 1589 + case BLK_ZONE_COND_NOT_WP: 1590 + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) 1591 + goto invalid_condition; 1592 + break; 1593 + case BLK_ZONE_COND_IMP_OPEN: 1594 + case BLK_ZONE_COND_EXP_OPEN: 1595 + case BLK_ZONE_COND_CLOSED: 1596 + case BLK_ZONE_COND_EMPTY: 1597 + case BLK_ZONE_COND_FULL: 1598 + case BLK_ZONE_COND_OFFLINE: 1599 + case BLK_ZONE_COND_READONLY: 1600 + if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) 1601 + goto invalid_condition; 1602 + break; 1603 + default: 1604 + pr_warn("%s: Invalid zone condition 0x%X\n", 1605 + args->disk->disk_name, cond); 1606 + return -ENODEV; 1607 + } 1608 + 1609 + blk_zone_set_cond(args->zones_cond, idx, cond); 1610 + 1611 + return 0; 1612 + 1613 + invalid_condition: 1614 + pr_warn("%s: Invalid zone condition 0x%x for type 0x%x\n", 1615 + args->disk->disk_name, cond, zone->type); 1616 + 1617 + return -ENODEV; 1987 1618 } 1988 1619 1989 1620 static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, ··· 2046 1585 if (disk_zone_is_last(disk, zone)) 2047 1586 args->last_zone_capacity = zone->capacity; 2048 1587 2049 - if (!disk_need_zone_resources(disk)) 2050 - return 0; 2051 - 2052 - if (!args->conv_zones_bitmap) { 2053 - args->conv_zones_bitmap = 2054 - bitmap_zalloc(args->nr_zones, GFP_NOIO); 2055 - if (!args->conv_zones_bitmap) 2056 - return -ENOMEM; 2057 - } 2058 - 2059 - set_bit(idx, args->conv_zones_bitmap); 1588 + args->nr_conv_zones++; 2060 1589 2061 1590 return 0; 2062 1591 } ··· 2144 1693 return -ENODEV; 2145 1694 } 2146 1695 1696 + /* Check zone condition */ 1697 + ret = blk_revalidate_zone_cond(zone, idx, args); 1698 + if (ret) 1699 + return ret; 1700 + 2147 1701 /* Check zone type */ 2148 1702 switch (zone->type) { 2149 1703 case BLK_ZONE_TYPE_CONVENTIONAL: ··· 2189 1733 sector_t zone_sectors = q->limits.chunk_sectors; 2190 1734 sector_t capacity = get_capacity(disk); 2191 1735 struct blk_revalidate_zone_args args = { }; 2192 - unsigned int noio_flag; 1736 + unsigned int memflags, noio_flag; 1737 + struct blk_report_zones_args rep_args = { 1738 + .cb = blk_revalidate_zone_cb, 1739 + .data = &args, 1740 + }; 2193 1741 int ret = -ENOMEM; 2194 1742 2195 1743 if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) ··· 2216 1756 * Ensure that all memory allocations in this context are done as if 2217 1757 * GFP_NOIO was specified. 2218 1758 */ 2219 - args.disk = disk; 2220 - args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); 2221 1759 noio_flag = memalloc_noio_save(); 2222 - ret = disk_revalidate_zone_resources(disk, args.nr_zones); 1760 + ret = disk_revalidate_zone_resources(disk, &args); 2223 1761 if (ret) { 2224 1762 memalloc_noio_restore(noio_flag); 2225 1763 return ret; 2226 1764 } 2227 1765 2228 - ret = disk->fops->report_zones(disk, 0, UINT_MAX, 2229 - blk_revalidate_zone_cb, &args); 1766 + ret = disk->fops->report_zones(disk, 0, UINT_MAX, &rep_args); 2230 1767 if (!ret) { 2231 1768 pr_warn("%s: No zones reported\n", disk->disk_name); 2232 1769 ret = -ENODEV; ··· 2240 1783 ret = -ENODEV; 2241 1784 } 2242 1785 2243 - /* 2244 - * Set the new disk zone parameters only once the queue is frozen and 2245 - * all I/Os are completed. 2246 - */ 2247 1786 if (ret > 0) 2248 - ret = disk_update_zone_resources(disk, &args); 2249 - else 2250 - pr_warn("%s: failed to revalidate zones\n", disk->disk_name); 2251 - if (ret) { 2252 - unsigned int memflags = blk_mq_freeze_queue(q); 1787 + return disk_update_zone_resources(disk, &args); 2253 1788 2254 - disk_free_zone_resources(disk); 2255 - blk_mq_unfreeze_queue(q, memflags); 2256 - } 1789 + pr_warn("%s: failed to revalidate zones\n", disk->disk_name); 1790 + 1791 + memflags = blk_mq_freeze_queue(q); 1792 + disk_free_zone_resources(disk); 1793 + blk_mq_unfreeze_queue(q, memflags); 2257 1794 2258 1795 return ret; 2259 1796 } ··· 2268 1817 int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, 2269 1818 sector_t nr_sects, gfp_t gfp_mask) 2270 1819 { 1820 + struct gendisk *disk = bdev->bd_disk; 2271 1821 int ret; 2272 1822 2273 1823 if (WARN_ON_ONCE(!bdev_is_zoned(bdev))) ··· 2284 1832 * pointer. Undo this using a report zone to update the zone write 2285 1833 * pointer to the correct current value. 2286 1834 */ 2287 - ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector); 1835 + ret = disk->fops->report_zones(disk, sector, 1, NULL); 2288 1836 if (ret != 1) 2289 1837 return ret < 0 ? ret : -EIO; 2290 1838 ··· 2303 1851 unsigned int zwp_wp_offset, zwp_flags; 2304 1852 unsigned int zwp_zone_no, zwp_ref; 2305 1853 unsigned int zwp_bio_list_size; 1854 + enum blk_zone_cond zwp_cond; 2306 1855 unsigned long flags; 2307 1856 2308 1857 spin_lock_irqsave(&zwplug->lock, flags); 2309 1858 zwp_zone_no = zwplug->zone_no; 2310 1859 zwp_flags = zwplug->flags; 2311 1860 zwp_ref = refcount_read(&zwplug->ref); 1861 + zwp_cond = zwplug->cond; 2312 1862 zwp_wp_offset = zwplug->wp_offset; 2313 1863 zwp_bio_list_size = bio_list_size(&zwplug->bio_list); 2314 1864 spin_unlock_irqrestore(&zwplug->lock, flags); 2315 1865 2316 - seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref, 2317 - zwp_wp_offset, zwp_bio_list_size); 1866 + seq_printf(m, 1867 + "Zone no: %u, flags: 0x%x, ref: %u, cond: %s, wp ofst: %u, pending BIO: %u\n", 1868 + zwp_zone_no, zwp_flags, zwp_ref, blk_zone_cond_str(zwp_cond), 1869 + zwp_wp_offset, zwp_bio_list_size); 2318 1870 } 2319 1871 2320 1872 int queue_zone_wplugs_show(void *data, struct seq_file *m)
+14
block/blk.h
··· 489 489 void blk_zone_write_plug_bio_merged(struct bio *bio); 490 490 void blk_zone_write_plug_init_request(struct request *rq); 491 491 void blk_zone_append_update_request_bio(struct request *rq, struct bio *bio); 492 + void blk_zone_mgmt_bio_endio(struct bio *bio); 492 493 void blk_zone_write_plug_bio_endio(struct bio *bio); 493 494 static inline void blk_zone_bio_endio(struct bio *bio) 494 495 { 496 + /* 497 + * Zone management BIOs may impact zone write plugs (e.g. a zone reset 498 + * changes a zone write plug zone write pointer offset), but these 499 + * operation do not go through zone write plugging as they may operate 500 + * on zones that do not have a zone write 501 + * plug. blk_zone_mgmt_bio_endio() handles the potential changes to zone 502 + * write plugs that are present. 503 + */ 504 + if (op_is_zone_mgmt(bio_op(bio))) { 505 + blk_zone_mgmt_bio_endio(bio); 506 + return; 507 + } 508 + 495 509 /* 496 510 * For write BIOs to zoned devices, signal the completion of the BIO so 497 511 * that the next write BIO can be submitted by zone write plugging.
+1
block/ioctl.c
··· 581 581 case BLKGETDISKSEQ: 582 582 return put_u64(argp, bdev->bd_disk->diskseq); 583 583 case BLKREPORTZONE: 584 + case BLKREPORTZONEV2: 584 585 return blkdev_report_zones_ioctl(bdev, cmd, arg); 585 586 case BLKRESETZONE: 586 587 case BLKOPENZONE:
+2 -1
drivers/block/null_blk/null_blk.h
··· 143 143 int null_register_zoned_dev(struct nullb *nullb); 144 144 void null_free_zoned_dev(struct nullb_device *dev); 145 145 int null_report_zones(struct gendisk *disk, sector_t sector, 146 - unsigned int nr_zones, report_zones_cb cb, void *data); 146 + unsigned int nr_zones, 147 + struct blk_report_zones_args *args); 147 148 blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, 148 149 sector_t sector, sector_t nr_sectors); 149 150 size_t null_zone_valid_read_len(struct nullb *nullb,
+2 -2
drivers/block/null_blk/zoned.c
··· 191 191 } 192 192 193 193 int null_report_zones(struct gendisk *disk, sector_t sector, 194 - unsigned int nr_zones, report_zones_cb cb, void *data) 194 + unsigned int nr_zones, struct blk_report_zones_args *args) 195 195 { 196 196 struct nullb *nullb = disk->private_data; 197 197 struct nullb_device *dev = nullb->dev; ··· 225 225 blkz.capacity = zone->capacity; 226 226 null_unlock_zone(dev, zone); 227 227 228 - error = cb(&blkz, i, data); 228 + error = disk_report_zone(disk, &blkz, i, args); 229 229 if (error) 230 230 return error; 231 231 }
+2 -2
drivers/block/ublk_drv.c
··· 367 367 } 368 368 369 369 static int ublk_report_zones(struct gendisk *disk, sector_t sector, 370 - unsigned int nr_zones, report_zones_cb cb, void *data) 370 + unsigned int nr_zones, struct blk_report_zones_args *args) 371 371 { 372 372 struct ublk_device *ub = disk->private_data; 373 373 unsigned int zone_size_sectors = disk->queue->limits.chunk_sectors; ··· 430 430 if (!zone->len) 431 431 break; 432 432 433 - ret = cb(zone, i, data); 433 + ret = disk_report_zone(disk, zone, i, args); 434 434 if (ret) 435 435 goto out; 436 436
+6 -5
drivers/block/virtio_blk.c
··· 584 584 585 585 static int virtblk_parse_zone(struct virtio_blk *vblk, 586 586 struct virtio_blk_zone_descriptor *entry, 587 - unsigned int idx, report_zones_cb cb, void *data) 587 + unsigned int idx, 588 + struct blk_report_zones_args *args) 588 589 { 589 590 struct blk_zone zone = { }; 590 591 ··· 651 650 * The callback below checks the validity of the reported 652 651 * entry data, no need to further validate it here. 653 652 */ 654 - return cb(&zone, idx, data); 653 + return disk_report_zone(vblk->disk, &zone, idx, args); 655 654 } 656 655 657 656 static int virtblk_report_zones(struct gendisk *disk, sector_t sector, 658 - unsigned int nr_zones, report_zones_cb cb, 659 - void *data) 657 + unsigned int nr_zones, 658 + struct blk_report_zones_args *args) 660 659 { 661 660 struct virtio_blk *vblk = disk->private_data; 662 661 struct virtio_blk_zone_report *report; ··· 694 693 695 694 for (i = 0; i < nz && zone_idx < nr_zones; i++) { 696 695 ret = virtblk_parse_zone(vblk, &report->zones[i], 697 - zone_idx, cb, data); 696 + zone_idx, args); 698 697 if (ret) 699 698 goto fail_report; 700 699
+2 -2
drivers/block/zloop.c
··· 647 647 } 648 648 649 649 static int zloop_report_zones(struct gendisk *disk, sector_t sector, 650 - unsigned int nr_zones, report_zones_cb cb, void *data) 650 + unsigned int nr_zones, struct blk_report_zones_args *args) 651 651 { 652 652 struct zloop_device *zlo = disk->private_data; 653 653 struct blk_zone blkz = {}; ··· 687 687 688 688 mutex_unlock(&zone->lock); 689 689 690 - ret = cb(&blkz, i, data); 690 + ret = disk_report_zone(disk, &blkz, i, args); 691 691 if (ret) 692 692 return ret; 693 693 }
+30 -24
drivers/md/dm-zone.c
··· 17 17 * For internal zone reports bypassing the top BIO submission path. 18 18 */ 19 19 static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, 20 - sector_t sector, unsigned int nr_zones, 21 - report_zones_cb cb, void *data) 20 + unsigned int nr_zones, 21 + struct dm_report_zones_args *args) 22 22 { 23 - struct gendisk *disk = md->disk; 24 - int ret; 25 - struct dm_report_zones_args args = { 26 - .next_sector = sector, 27 - .orig_data = data, 28 - .orig_cb = cb, 29 - }; 30 - 31 23 do { 32 24 struct dm_target *tgt; 25 + int ret; 33 26 34 - tgt = dm_table_find_target(t, args.next_sector); 27 + tgt = dm_table_find_target(t, args->next_sector); 35 28 if (WARN_ON_ONCE(!tgt->type->report_zones)) 36 29 return -EIO; 37 30 38 - args.tgt = tgt; 39 - ret = tgt->type->report_zones(tgt, &args, 40 - nr_zones - args.zone_idx); 31 + args->tgt = tgt; 32 + ret = tgt->type->report_zones(tgt, args, 33 + nr_zones - args->zone_idx); 41 34 if (ret < 0) 42 35 return ret; 43 - } while (args.zone_idx < nr_zones && 44 - args.next_sector < get_capacity(disk)); 36 + } while (args->zone_idx < nr_zones && 37 + args->next_sector < get_capacity(md->disk)); 45 38 46 - return args.zone_idx; 39 + return args->zone_idx; 47 40 } 48 41 49 42 /* ··· 45 52 * generally implemented by targets using dm_report_zones(). 46 53 */ 47 54 int dm_blk_report_zones(struct gendisk *disk, sector_t sector, 48 - unsigned int nr_zones, report_zones_cb cb, void *data) 55 + unsigned int nr_zones, 56 + struct blk_report_zones_args *args) 49 57 { 50 58 struct mapped_device *md = disk->private_data; 51 59 struct dm_table *map; ··· 70 76 map = zone_revalidate_map; 71 77 } 72 78 73 - if (map) 74 - ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, 75 - data); 79 + if (map) { 80 + struct dm_report_zones_args dm_args = { 81 + .disk = md->disk, 82 + .next_sector = sector, 83 + .rep_args = args, 84 + }; 85 + ret = dm_blk_do_report_zones(md, map, nr_zones, &dm_args); 86 + } 76 87 77 88 if (put_table) 78 89 dm_put_live_table(md, srcu_idx); ··· 112 113 } 113 114 114 115 args->next_sector = zone->start + zone->len; 115 - return args->orig_cb(zone, args->zone_idx++, args->orig_data); 116 + 117 + return disk_report_zone(args->disk, zone, args->zone_idx++, 118 + args->rep_args); 116 119 } 117 120 118 121 /* ··· 493 492 sector_t sector, unsigned int nr_zones, 494 493 unsigned long *need_reset) 495 494 { 495 + struct dm_report_zones_args args = { 496 + .disk = md->disk, 497 + .next_sector = sector, 498 + .cb = dm_zone_need_reset_cb, 499 + .data = need_reset, 500 + }; 496 501 int ret; 497 502 498 - ret = dm_blk_do_report_zones(md, t, sector, nr_zones, 499 - dm_zone_need_reset_cb, need_reset); 503 + ret = dm_blk_do_report_zones(md, t, nr_zones, &args); 500 504 if (ret != nr_zones) { 501 505 DMERR("Get %s zone reset bitmap failed\n", 502 506 md->disk->disk_name);
+2 -1
drivers/md/dm.h
··· 109 109 void dm_zone_endio(struct dm_io *io, struct bio *clone); 110 110 #ifdef CONFIG_BLK_DEV_ZONED 111 111 int dm_blk_report_zones(struct gendisk *disk, sector_t sector, 112 - unsigned int nr_zones, report_zones_cb cb, void *data); 112 + unsigned int nr_zones, 113 + struct blk_report_zones_args *args); 113 114 bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); 114 115 int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, 115 116 sector_t sector, unsigned int nr_zones,
+2 -3
drivers/nvme/host/core.c
··· 2599 2599 2600 2600 #ifdef CONFIG_BLK_DEV_ZONED 2601 2601 static int nvme_report_zones(struct gendisk *disk, sector_t sector, 2602 - unsigned int nr_zones, report_zones_cb cb, void *data) 2602 + unsigned int nr_zones, struct blk_report_zones_args *args) 2603 2603 { 2604 - return nvme_ns_report_zones(disk->private_data, sector, nr_zones, cb, 2605 - data); 2604 + return nvme_ns_report_zones(disk->private_data, sector, nr_zones, args); 2606 2605 } 2607 2606 #else 2608 2607 #define nvme_report_zones NULL
+2 -2
drivers/nvme/host/multipath.c
··· 576 576 577 577 #ifdef CONFIG_BLK_DEV_ZONED 578 578 static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector, 579 - unsigned int nr_zones, report_zones_cb cb, void *data) 579 + unsigned int nr_zones, struct blk_report_zones_args *args) 580 580 { 581 581 struct nvme_ns_head *head = disk->private_data; 582 582 struct nvme_ns *ns; ··· 585 585 srcu_idx = srcu_read_lock(&head->srcu); 586 586 ns = nvme_find_path(head); 587 587 if (ns) 588 - ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data); 588 + ret = nvme_ns_report_zones(ns, sector, nr_zones, args); 589 589 srcu_read_unlock(&head->srcu, srcu_idx); 590 590 return ret; 591 591 }
+1 -1
drivers/nvme/host/nvme.h
··· 1108 1108 }; 1109 1109 1110 1110 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 1111 - unsigned int nr_zones, report_zones_cb cb, void *data); 1111 + unsigned int nr_zones, struct blk_report_zones_args *args); 1112 1112 int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf, 1113 1113 struct nvme_zone_info *zi); 1114 1114 void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+5 -5
drivers/nvme/host/zns.c
··· 148 148 149 149 static int nvme_zone_parse_entry(struct nvme_ns *ns, 150 150 struct nvme_zone_descriptor *entry, 151 - unsigned int idx, report_zones_cb cb, 152 - void *data) 151 + unsigned int idx, 152 + struct blk_report_zones_args *args) 153 153 { 154 154 struct nvme_ns_head *head = ns->head; 155 155 struct blk_zone zone = { }; ··· 169 169 else 170 170 zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp)); 171 171 172 - return cb(&zone, idx, data); 172 + return disk_report_zone(ns->disk, &zone, idx, args); 173 173 } 174 174 175 175 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 176 - unsigned int nr_zones, report_zones_cb cb, void *data) 176 + unsigned int nr_zones, struct blk_report_zones_args *args) 177 177 { 178 178 struct nvme_zone_report *report; 179 179 struct nvme_command c = { }; ··· 213 213 214 214 for (i = 0; i < nz && zone_idx < nr_zones; i++) { 215 215 ret = nvme_zone_parse_entry(ns, &report->entries[i], 216 - zone_idx, cb, data); 216 + zone_idx, args); 217 217 if (ret) 218 218 goto out_free; 219 219 zone_idx++;
+1 -1
drivers/scsi/sd.h
··· 240 240 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, 241 241 struct scsi_sense_hdr *sshdr); 242 242 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 243 - unsigned int nr_zones, report_zones_cb cb, void *data); 243 + unsigned int nr_zones, struct blk_report_zones_args *args); 244 244 245 245 #else /* CONFIG_BLK_DEV_ZONED */ 246 246
+7 -13
drivers/scsi/sd_zbc.c
··· 35 35 * @buf: SCSI zone descriptor. 36 36 * @idx: Index of the zone relative to the first zone reported by the current 37 37 * sd_zbc_report_zones() call. 38 - * @cb: Callback function pointer. 39 - * @data: Second argument passed to @cb. 38 + * @args: report zones arguments (callback, etc) 40 39 * 41 40 * Return: Value returned by @cb. 42 41 * ··· 43 44 * call @cb(blk_zone, @data). 44 45 */ 45 46 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], 46 - unsigned int idx, report_zones_cb cb, void *data) 47 + unsigned int idx, struct blk_report_zones_args *args) 47 48 { 48 49 struct scsi_device *sdp = sdkp->device; 49 50 struct blk_zone zone = { 0 }; 50 51 sector_t start_lba, gran; 51 - int ret; 52 52 53 53 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) 54 54 return -EINVAL; ··· 85 87 else 86 88 zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); 87 89 88 - ret = cb(&zone, idx, data); 89 - if (ret) 90 - return ret; 91 - 92 - return 0; 90 + return disk_report_zone(sdkp->disk, &zone, idx, args); 93 91 } 94 92 95 93 /** ··· 211 217 * @disk: Disk to report zones for. 212 218 * @sector: Start sector. 213 219 * @nr_zones: Maximum number of zones to report. 214 - * @cb: Callback function called to report zone information. 215 - * @data: Second argument passed to @cb. 220 + * @args: Callback arguments. 216 221 * 217 222 * Called by the block layer to iterate over zone information. See also the 218 223 * disk->fops->report_zones() calls in block/blk-zoned.c. 219 224 */ 220 225 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 221 - unsigned int nr_zones, report_zones_cb cb, void *data) 226 + unsigned int nr_zones, 227 + struct blk_report_zones_args *args) 222 228 { 223 229 struct scsi_disk *sdkp = scsi_disk(disk); 224 230 sector_t lba = sectors_to_logical(sdkp->device, sector); ··· 277 283 } 278 284 279 285 ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, 280 - cb, data); 286 + args); 281 287 if (ret) 282 288 goto out; 283 289
+6 -5
fs/btrfs/zoned.c
··· 264 264 } 265 265 } 266 266 267 - ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, 268 - copy_zone_info_cb, zones); 267 + ret = blkdev_report_zones_cached(device->bdev, pos >> SECTOR_SHIFT, 268 + *nr_zones, copy_zone_info_cb, zones); 269 269 if (ret < 0) { 270 270 btrfs_err(device->fs_info, 271 271 "zoned: failed to read zone %llu on %s (devid %llu)", ··· 494 494 case BLK_ZONE_COND_IMP_OPEN: 495 495 case BLK_ZONE_COND_EXP_OPEN: 496 496 case BLK_ZONE_COND_CLOSED: 497 + case BLK_ZONE_COND_ACTIVE: 497 498 __set_bit(nreported, zone_info->active_zones); 498 499 nactive++; 499 500 break; ··· 897 896 if (sb_zone + 1 >= nr_zones) 898 897 return -ENOENT; 899 898 900 - ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev), 901 - BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb, 902 - zones); 899 + ret = blkdev_report_zones_cached(bdev, zone_start_sector(sb_zone, bdev), 900 + BTRFS_NR_SB_LOG_ZONES, 901 + copy_zone_info_cb, zones); 903 902 if (ret < 0) 904 903 return ret; 905 904 if (unlikely(ret != BTRFS_NR_SB_LOG_ZONES))
+1
fs/xfs/libxfs/xfs_zones.c
··· 95 95 case BLK_ZONE_COND_IMP_OPEN: 96 96 case BLK_ZONE_COND_EXP_OPEN: 97 97 case BLK_ZONE_COND_CLOSED: 98 + case BLK_ZONE_COND_ACTIVE: 98 99 return xfs_zone_validate_wp(zone, rtg, write_pointer); 99 100 case BLK_ZONE_COND_FULL: 100 101 return xfs_zone_validate_full(zone, rtg, write_pointer);
+1 -1
fs/xfs/xfs_zone_alloc.c
··· 1250 1250 trace_xfs_zones_mount(mp); 1251 1251 1252 1252 if (bdev_is_zoned(bt->bt_bdev)) { 1253 - error = blkdev_report_zones(bt->bt_bdev, 1253 + error = blkdev_report_zones_cached(bt->bt_bdev, 1254 1254 XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart), 1255 1255 mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, &iz); 1256 1256 if (error < 0)
+20 -29
include/linux/blkdev.h
··· 38 38 struct kiocb; 39 39 struct pr_ops; 40 40 struct rq_qos; 41 + struct blk_report_zones_args; 41 42 struct blk_queue_stats; 42 43 struct blk_stat_callback; 43 44 struct blk_crypto_profile; ··· 196 195 unsigned int nr_zones; 197 196 unsigned int zone_capacity; 198 197 unsigned int last_zone_capacity; 199 - unsigned long __rcu *conv_zones_bitmap; 198 + u8 __rcu *zones_cond; 200 199 unsigned int zone_wplugs_hash_bits; 201 200 atomic_t nr_zone_wplugs; 202 201 spinlock_t zone_wplugs_lock; ··· 433 432 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, 434 433 void *data); 435 434 435 + int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, 436 + unsigned int idx, struct blk_report_zones_args *args); 437 + 438 + int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, 439 + struct blk_zone *zone); 440 + 436 441 #define BLK_ALL_ZONES ((unsigned int)-1) 437 442 int blkdev_report_zones(struct block_device *bdev, sector_t sector, 443 + unsigned int nr_zones, report_zones_cb cb, void *data); 444 + int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector, 438 445 unsigned int nr_zones, report_zones_cb cb, void *data); 439 446 int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, 440 447 sector_t sectors, sector_t nr_sectors); ··· 930 921 { 931 922 return disk_zone_capacity(bdev->bd_disk, pos); 932 923 } 924 + 925 + bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector); 926 + 933 927 #else /* CONFIG_BLK_DEV_ZONED */ 934 928 static inline unsigned int disk_nr_zones(struct gendisk *disk) 935 929 { 936 930 return 0; 931 + } 932 + 933 + static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) 934 + { 935 + return false; 937 936 } 938 937 939 938 static inline bool bio_needs_zone_write_plugging(struct bio *bio) ··· 1546 1529 return bdev_is_zone_start(bdev, sector); 1547 1530 } 1548 1531 1549 - /** 1550 - * bdev_zone_is_seq - check if a sector belongs to a sequential write zone 1551 - * @bdev: block device to check 1552 - * @sector: sector number 1553 - * 1554 - * Check if @sector on @bdev is contained in a sequential write required zone. 1555 - */ 1556 - static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) 1557 - { 1558 - bool is_seq = false; 1559 - 1560 - #if IS_ENABLED(CONFIG_BLK_DEV_ZONED) 1561 - if (bdev_is_zoned(bdev)) { 1562 - struct gendisk *disk = bdev->bd_disk; 1563 - unsigned long *bitmap; 1564 - 1565 - rcu_read_lock(); 1566 - bitmap = rcu_dereference(disk->conv_zones_bitmap); 1567 - is_seq = !bitmap || 1568 - !test_bit(disk_zone_no(disk, sector), bitmap); 1569 - rcu_read_unlock(); 1570 - } 1571 - #endif 1572 - 1573 - return is_seq; 1574 - } 1575 - 1576 1532 int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, 1577 1533 sector_t nr_sects, gfp_t gfp_mask); 1578 1534 ··· 1652 1662 /* this callback is with swap_lock and sometimes page table lock held */ 1653 1663 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1654 1664 int (*report_zones)(struct gendisk *, sector_t sector, 1655 - unsigned int nr_zones, report_zones_cb cb, void *data); 1665 + unsigned int nr_zones, 1666 + struct blk_report_zones_args *args); 1656 1667 char *(*devnode)(struct gendisk *disk, umode_t *mode); 1657 1668 /* returns the length of the identifier or a negative errno: */ 1658 1669 int (*get_unique_id)(struct gendisk *disk, u8 id[16],
+8 -2
include/linux/device-mapper.h
··· 538 538 #ifdef CONFIG_BLK_DEV_ZONED 539 539 struct dm_report_zones_args { 540 540 struct dm_target *tgt; 541 + struct gendisk *disk; 541 542 sector_t next_sector; 542 543 543 - void *orig_data; 544 - report_zones_cb orig_cb; 545 544 unsigned int zone_idx; 545 + 546 + /* for block layer ->report_zones */ 547 + struct blk_report_zones_args *rep_args; 548 + 549 + /* for internal users */ 550 + report_zones_cb cb; 551 + void *data; 546 552 547 553 /* must be filled by ->report_zones before calling dm_report_zones_cb */ 548 554 sector_t start;
+41 -5
include/uapi/linux/blkzoned.h
··· 48 48 * FINISH ZONE command. 49 49 * @BLK_ZONE_COND_READONLY: The zone is read-only. 50 50 * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written). 51 + * @BLK_ZONE_COND_ACTIVE: The zone is either implicitly open, explicitly open, 52 + * or closed. 51 53 * 52 54 * The Zone Condition state machine in the ZBC/ZAC standards maps the above 53 55 * deinitions as: ··· 63 61 * 64 62 * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should 65 63 * be considered invalid. 64 + * 65 + * The condition BLK_ZONE_COND_ACTIVE is used only with cached zone reports. 66 + * It is used to report any of the BLK_ZONE_COND_IMP_OPEN, 67 + * BLK_ZONE_COND_EXP_OPEN and BLK_ZONE_COND_CLOSED conditions. Conversely, a 68 + * regular zone report will never report a zone condition using 69 + * BLK_ZONE_COND_ACTIVE and instead use the conditions BLK_ZONE_COND_IMP_OPEN, 70 + * BLK_ZONE_COND_EXP_OPEN or BLK_ZONE_COND_CLOSED as reported by the device. 66 71 */ 67 72 enum blk_zone_cond { 68 73 BLK_ZONE_COND_NOT_WP = 0x0, ··· 80 71 BLK_ZONE_COND_READONLY = 0xD, 81 72 BLK_ZONE_COND_FULL = 0xE, 82 73 BLK_ZONE_COND_OFFLINE = 0xF, 74 + 75 + BLK_ZONE_COND_ACTIVE = 0xFF, 83 76 }; 84 77 85 78 /** 86 79 * enum blk_zone_report_flags - Feature flags of reported zone descriptors. 87 80 * 88 - * @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field. 81 + * @BLK_ZONE_REP_CAPACITY: Output only. Indicates that zone descriptors in a 82 + * zone report have a valid capacity field. 83 + * @BLK_ZONE_REP_CACHED: Input only. Indicates that the zone report should be 84 + * generated using cached zone information. In this case, 85 + * the implicit open, explicit open and closed zone 86 + * conditions are all reported with the 87 + * BLK_ZONE_COND_ACTIVE condition. 89 88 */ 90 89 enum blk_zone_report_flags { 91 - BLK_ZONE_REP_CAPACITY = (1 << 0), 90 + /* Output flags */ 91 + BLK_ZONE_REP_CAPACITY = (1U << 0), 92 + 93 + /* Input flags */ 94 + BLK_ZONE_REP_CACHED = (1U << 31), 92 95 }; 93 96 94 97 /** ··· 143 122 * @sector: starting sector of report 144 123 * @nr_zones: IN maximum / OUT actual 145 124 * @flags: one or more flags as defined by enum blk_zone_report_flags. 125 + * @flags: one or more flags as defined by enum blk_zone_report_flags. 126 + * With BLKREPORTZONE, this field is ignored as an input and is valid 127 + * only as an output. Using BLKREPORTZONEV2, this field is used as both 128 + * input and output. 146 129 * @zones: Space to hold @nr_zones @zones entries on reply. 147 130 * 148 131 * The array of at most @nr_zones must follow this structure in memory. ··· 173 148 /** 174 149 * Zoned block device ioctl's: 175 150 * 176 - * @BLKREPORTZONE: Get zone information. Takes a zone report as argument. 177 - * The zone report will start from the zone containing the 178 - * sector specified in the report request structure. 151 + * @BLKREPORTZONE: Get zone information from a zoned device. Takes a zone report 152 + * as argument. The zone report will start from the zone 153 + * containing the sector specified in struct blk_zone_report. 154 + * The flags field of struct blk_zone_report is used as an 155 + * output only and ignored as an input. 156 + * DEPRECATED, use BLKREPORTZONEV2 instead. 157 + * @BLKREPORTZONEV2: Same as @BLKREPORTZONE but uses the flags field of 158 + * struct blk_zone_report as an input, allowing to get a zone 159 + * report using cached zone information if the flag 160 + * BLK_ZONE_REP_CACHED is set. In such case, the zone report 161 + * may include zones with the condition @BLK_ZONE_COND_ACTIVE 162 + * (c.f. the description of this condition above for more 163 + * details). 179 164 * @BLKRESETZONE: Reset the write pointer of the zones in the specified 180 165 * sector range. The sector range must be zone aligned. 181 166 * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. ··· 204 169 #define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) 205 170 #define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) 206 171 #define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) 172 + #define BLKREPORTZONEV2 _IOWR(0x12, 142, struct blk_zone_report) 207 173 208 174 #endif /* _UAPI_BLKZONED_H */
+1 -1
include/uapi/linux/fs.h
··· 298 298 #define BLKROTATIONAL _IO(0x12,126) 299 299 #define BLKZEROOUT _IO(0x12,127) 300 300 #define BLKGETDISKSEQ _IOR(0x12,128,__u64) 301 - /* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ 301 + /* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ 302 302 /* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ 303 303 #define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2) 304 304