Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dm: limit swapping tables for devices with zone write plugs

dm_revalidate_zones() only allowed new or previously unzoned devices to
call blk_revalidate_disk_zones(). If the device was already zoned,
disk->nr_zones would always equal md->nr_zones, so dm_revalidate_zones()
returned without doing any work. This would make the zoned settings for
the device not match the new table. If the device had zone write plug
resources, it could run into errors like bdev_zone_is_seq() reading
invalid memory because disk->conv_zones_bitmap was the wrong size.

If the device doesn't have any zone write plug resources, calling
blk_revalidate_disk_zones() will always correctly update device. If
blk_revalidate_disk_zones() fails, it can still overwrite or clear the
current disk->nr_zones value. In this case, DM must restore the previous
value of disk->nr_zones, so that the zoned settings will continue to
match the previous value that it fell back to.

If the device already has zone write plug resources,
blk_revalidate_disk_zones() will not correctly update them, if it is
called for arbitrary zoned device changes. Since there is not much need
for this ability, the easiest solution is to disallow any table reloads
that change the zoned settings, for devices that already have zone plug
resources. Specifically, if a device already has zone plug resources
allocated, it can only switch to another zoned table that also emulates
zone append. Also, it cannot change the device size or the zone size. A
device can switch to an error target.

Fixes: bb37d77239af2 ("dm: introduce zone append emulation")
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Tested-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

authored by

Benjamin Marzinski and committed by
Mikulas Patocka
121218be 37f53a2c

+73 -14
+36 -5
drivers/md/dm-table.c
··· 1491 1491 return true; 1492 1492 } 1493 1493 1494 + bool dm_table_is_wildcard(struct dm_table *t) 1495 + { 1496 + for (unsigned int i = 0; i < t->num_targets; i++) { 1497 + struct dm_target *ti = dm_table_get_target(t, i); 1498 + 1499 + if (!dm_target_is_wildcard(ti->type)) 1500 + return false; 1501 + } 1502 + 1503 + return true; 1504 + } 1505 + 1494 1506 static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev, 1495 1507 sector_t start, sector_t len, void *data) 1496 1508 { ··· 1843 1831 return true; 1844 1832 } 1845 1833 1834 + bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, 1835 + sector_t new_size) 1836 + { 1837 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) && 1838 + old_size != new_size) { 1839 + DMWARN("%s: device has zone write plug resources. " 1840 + "Cannot change size", 1841 + dm_device_name(t->md)); 1842 + return false; 1843 + } 1844 + return true; 1845 + } 1846 + 1846 1847 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1847 1848 struct queue_limits *limits) 1848 1849 { ··· 1893 1868 limits->features &= ~BLK_FEAT_DAX; 1894 1869 1895 1870 /* For a zoned table, setup the zone related queue attributes. */ 1896 - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && 1897 - (limits->features & BLK_FEAT_ZONED)) { 1898 - r = dm_set_zones_restrictions(t, q, limits); 1899 - if (r) 1900 - return r; 1871 + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { 1872 + if (limits->features & BLK_FEAT_ZONED) { 1873 + r = dm_set_zones_restrictions(t, q, limits); 1874 + if (r) 1875 + return r; 1876 + } else if (dm_has_zone_plugs(t->md)) { 1877 + DMWARN("%s: device has zone write plug resources. " 1878 + "Cannot switch to non-zoned table.", 1879 + dm_device_name(t->md)); 1880 + return -EINVAL; 1881 + } 1901 1882 } 1902 1883 1903 1884 if (dm_table_supports_atomic_writes(t))
+26 -9
drivers/md/dm-zone.c
··· 160 160 { 161 161 struct mapped_device *md = t->md; 162 162 struct gendisk *disk = md->disk; 163 + unsigned int nr_zones = disk->nr_zones; 163 164 int ret; 164 165 165 166 if (!get_capacity(disk)) 166 167 return 0; 167 168 168 - /* Revalidate only if something changed. */ 169 - if (!disk->nr_zones || disk->nr_zones != md->nr_zones) { 170 - DMINFO("%s using %s zone append", 171 - disk->disk_name, 172 - queue_emulates_zone_append(q) ? "emulated" : "native"); 173 - md->nr_zones = 0; 174 - } 175 - 176 - if (md->nr_zones) 169 + /* 170 + * Do not revalidate if zone write plug resources have already 171 + * been allocated. 172 + */ 173 + if (dm_has_zone_plugs(md)) 177 174 return 0; 175 + 176 + DMINFO("%s using %s zone append", disk->disk_name, 177 + queue_emulates_zone_append(q) ? "emulated" : "native"); 178 178 179 179 /* 180 180 * Our table is not live yet. So the call to dm_get_live_table() ··· 189 189 190 190 if (ret) { 191 191 DMERR("Revalidate zones failed %d", ret); 192 + disk->nr_zones = nr_zones; 192 193 return ret; 193 194 } 194 195 ··· 386 385 lim->max_open_zones = 0; 387 386 lim->max_active_zones = 0; 388 387 lim->max_hw_zone_append_sectors = 0; 388 + lim->max_zone_append_sectors = 0; 389 389 lim->zone_write_granularity = 0; 390 390 lim->chunk_sectors = 0; 391 391 lim->features &= ~BLK_FEAT_ZONED; 392 392 return 0; 393 393 } 394 394 395 + if (get_capacity(disk) && dm_has_zone_plugs(t->md)) { 396 + if (q->limits.chunk_sectors != lim->chunk_sectors) { 397 + DMWARN("%s: device has zone write plug resources. " 398 + "Cannot change zone size", 399 + disk->disk_name); 400 + return -EINVAL; 401 + } 402 + if (lim->max_hw_zone_append_sectors != 0 && 403 + !dm_table_is_wildcard(t)) { 404 + DMWARN("%s: device has zone write plug resources. " 405 + "New table must emulate zone append", 406 + disk->disk_name); 407 + return -EINVAL; 408 + } 409 + } 395 410 /* 396 411 * Warn once (when the capacity is not yet set) if the mapped device is 397 412 * partially using zone resources of the target devices as that leads to
+6
drivers/md/dm.c
··· 2429 2429 size = dm_table_get_size(t); 2430 2430 2431 2431 old_size = dm_get_size(md); 2432 + 2433 + if (!dm_table_supports_size_change(t, old_size, size)) { 2434 + old_map = ERR_PTR(-EINVAL); 2435 + goto out; 2436 + } 2437 + 2432 2438 set_capacity(md->disk, size); 2433 2439 2434 2440 ret = dm_table_set_restrictions(t, md->queue, limits);
+5
drivers/md/dm.h
··· 58 58 void (*fn)(void *), void *context); 59 59 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 60 60 bool dm_table_has_no_data_devices(struct dm_table *table); 61 + bool dm_table_is_wildcard(struct dm_table *t); 61 62 int dm_calculate_queue_limits(struct dm_table *table, 62 63 struct queue_limits *limits); 63 64 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, ··· 73 72 struct dm_target *dm_table_get_immutable_target(struct dm_table *t); 74 73 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); 75 74 bool dm_table_request_based(struct dm_table *t); 75 + bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, 76 + sector_t new_size); 76 77 77 78 void dm_lock_md_type(struct mapped_device *md); 78 79 void dm_unlock_md_type(struct mapped_device *md); ··· 114 111 int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, 115 112 sector_t sector, unsigned int nr_zones, 116 113 unsigned long *need_reset); 114 + #define dm_has_zone_plugs(md) ((md)->disk->zone_wplugs_hash != NULL) 117 115 #else 118 116 #define dm_blk_report_zones NULL 119 117 static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) 120 118 { 121 119 return false; 122 120 } 121 + #define dm_has_zone_plugs(md) false 123 122 #endif 124 123 125 124 /*