Merge tag 'block-6.14-20250228' of git://git.kernel.dk/linux

+1 -1

block/bio.c

··· 77 77 struct kmem_cache *slab; 78 78 unsigned int slab_ref; 79 79 unsigned int slab_size; 80 - char name[8]; 80 + char name[12]; 81 81 }; 82 82 static DEFINE_MUTEX(bio_slab_lock); 83 83 static DEFINE_XARRAY(bio_slabs);

+1 -1

block/blk-merge.c

··· 329 329 330 330 if (nsegs < lim->max_segments && 331 331 bytes + bv.bv_len <= max_bytes && 332 - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { 332 + bv.bv_offset + bv.bv_len <= lim->min_segment_size) { 333 333 nsegs++; 334 334 bytes += bv.bv_len; 335 335 } else {

+11 -3

block/blk-settings.c

··· 246 246 { 247 247 unsigned int max_hw_sectors; 248 248 unsigned int logical_block_sectors; 249 + unsigned long seg_size; 249 250 int err; 250 251 251 252 /* ··· 304 303 max_hw_sectors = min_not_zero(lim->max_hw_sectors, 305 304 lim->max_dev_sectors); 306 305 if (lim->max_user_sectors) { 307 - if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) 306 + if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE) 308 307 return -EINVAL; 309 308 lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); 310 309 } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { ··· 342 341 */ 343 342 if (!lim->seg_boundary_mask) 344 343 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 345 - if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) 344 + if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1)) 346 345 return -EINVAL; 347 346 348 347 /* ··· 363 362 */ 364 363 if (!lim->max_segment_size) 365 364 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; 366 - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) 365 + if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE)) 367 366 return -EINVAL; 368 367 } 368 + 369 + /* setup min segment size for building new segment in fast path */ 370 + if (lim->seg_boundary_mask > lim->max_segment_size - 1) 371 + seg_size = lim->max_segment_size; 372 + else 373 + seg_size = lim->seg_boundary_mask + 1; 374 + lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE); 369 375 370 376 /* 371 377 * We require drivers to at least do logical block aligned I/O, but

+69 -7

block/blk-zoned.c

··· 410 410 } 411 411 } 412 412 hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); 413 + atomic_inc(&disk->nr_zone_wplugs); 413 414 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); 414 415 415 416 return true; 416 417 } 417 418 418 - static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, 419 - sector_t sector) 419 + static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk, 420 + sector_t sector) 420 421 { 421 422 unsigned int zno = disk_zone_no(disk, sector); 422 423 unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); ··· 436 435 rcu_read_unlock(); 437 436 438 437 return NULL; 438 + } 439 + 440 + static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, 441 + sector_t sector) 442 + { 443 + if (!atomic_read(&disk->nr_zone_wplugs)) 444 + return NULL; 445 + 446 + return disk_get_hashed_zone_wplug(disk, sector); 439 447 } 440 448 441 449 static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) ··· 513 503 zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; 514 504 spin_lock_irqsave(&disk->zone_wplugs_lock, flags); 515 505 hlist_del_init_rcu(&zwplug->node); 506 + atomic_dec(&disk->nr_zone_wplugs); 516 507 spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); 517 508 disk_put_zone_wplug(zwplug); 518 509 } ··· 604 593 { 605 594 struct bio *bio; 606 595 596 + if (bio_list_empty(&zwplug->bio_list)) 597 + return; 598 + 599 + pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n", 600 + zwplug->disk->disk_name, zwplug->zone_no); 607 601 while ((bio = bio_list_pop(&zwplug->bio_list))) 608 602 blk_zone_wplug_bio_io_error(zwplug, bio); 609 603 } ··· 1056 1040 return true; 1057 1041 } 1058 1042 1043 + static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) 1044 + { 1045 + struct gendisk *disk = bio->bi_bdev->bd_disk; 1046 + struct blk_zone_wplug *zwplug; 1047 + unsigned long flags; 1048 + 1049 + /* 1050 + * We have native support for zone append operations, so we are not 1051 + * going to handle @bio through plugging. However, we may already have a 1052 + * zone write plug for the target zone if that zone was previously 1053 + * partially written using regular writes. In such case, we risk leaving 1054 + * the plug in the disk hash table if the zone is fully written using 1055 + * zone append operations. Avoid this by removing the zone write plug. 1056 + */ 1057 + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); 1058 + if (likely(!zwplug)) 1059 + return; 1060 + 1061 + spin_lock_irqsave(&zwplug->lock, flags); 1062 + 1063 + /* 1064 + * We are about to remove the zone write plug. But if the user 1065 + * (mistakenly) has issued regular writes together with native zone 1066 + * append, we must aborts the writes as otherwise the plugged BIOs would 1067 + * not be executed by the plug BIO work as disk_get_zone_wplug() will 1068 + * return NULL after the plug is removed. Aborting the plugged write 1069 + * BIOs is consistent with the fact that these writes will most likely 1070 + * fail anyway as there is no ordering guarantees between zone append 1071 + * operations and regular write operations. 1072 + */ 1073 + if (!bio_list_empty(&zwplug->bio_list)) { 1074 + pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n", 1075 + disk->disk_name, zwplug->zone_no); 1076 + disk_zone_wplug_abort(zwplug); 1077 + } 1078 + disk_remove_zone_wplug(disk, zwplug); 1079 + spin_unlock_irqrestore(&zwplug->lock, flags); 1080 + 1081 + disk_put_zone_wplug(zwplug); 1082 + } 1083 + 1059 1084 /** 1060 1085 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging 1061 1086 * @bio: The BIO being submitted ··· 1153 1096 */ 1154 1097 switch (bio_op(bio)) { 1155 1098 case REQ_OP_ZONE_APPEND: 1156 - if (!bdev_emulates_zone_append(bdev)) 1099 + if (!bdev_emulates_zone_append(bdev)) { 1100 + blk_zone_wplug_handle_native_zone_append(bio); 1157 1101 return false; 1102 + } 1158 1103 fallthrough; 1159 1104 case REQ_OP_WRITE: 1160 1105 case REQ_OP_WRITE_ZEROES: ··· 1343 1284 { 1344 1285 unsigned int i; 1345 1286 1287 + atomic_set(&disk->nr_zone_wplugs, 0); 1346 1288 disk->zone_wplugs_hash_bits = 1347 1289 min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); 1348 1290 ··· 1398 1338 } 1399 1339 } 1400 1340 1341 + WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); 1401 1342 kfree(disk->zone_wplugs_hash); 1402 1343 disk->zone_wplugs_hash = NULL; 1403 1344 disk->zone_wplugs_hash_bits = 0; ··· 1611 1550 } 1612 1551 1613 1552 /* 1614 - * We need to track the write pointer of all zones that are not 1615 - * empty nor full. So make sure we have a zone write plug for 1616 - * such zone if the device has a zone write plug hash table. 1553 + * If the device needs zone append emulation, we need to track the 1554 + * write pointer of all zones that are not empty nor full. So make sure 1555 + * we have a zone write plug for such zone if the device has a zone 1556 + * write plug hash table. 1617 1557 */ 1618 - if (!disk->zone_wplugs_hash) 1558 + if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) 1619 1559 return 0; 1620 1560 1621 1561 disk_zone_wplug_sync_wp_offset(disk, zone);

+7 -2

block/blk.h

··· 14 14 struct elevator_type; 15 15 16 16 #define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) 17 + #define BLK_MIN_SEGMENT_SIZE 4096 17 18 18 19 /* Max future timer expiry for timeouts */ 19 20 #define BLK_MAX_TIMEOUT (5 * HZ) ··· 359 358 static inline bool bio_may_need_split(struct bio *bio, 360 359 const struct queue_limits *lim) 361 360 { 362 - return lim->chunk_sectors || bio->bi_vcnt != 1 || 363 - bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; 361 + if (lim->chunk_sectors) 362 + return true; 363 + if (bio->bi_vcnt != 1) 364 + return true; 365 + return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > 366 + lim->min_segment_size; 364 367 } 365 368 366 369 /**

+5 -3

include/linux/blkdev.h

··· 196 196 unsigned int zone_capacity; 197 197 unsigned int last_zone_capacity; 198 198 unsigned long __rcu *conv_zones_bitmap; 199 - unsigned int zone_wplugs_hash_bits; 200 - spinlock_t zone_wplugs_lock; 199 + unsigned int zone_wplugs_hash_bits; 200 + atomic_t nr_zone_wplugs; 201 + spinlock_t zone_wplugs_lock; 201 202 struct mempool_s *zone_wplugs_pool; 202 - struct hlist_head *zone_wplugs_hash; 203 + struct hlist_head *zone_wplugs_hash; 203 204 struct workqueue_struct *zone_wplugs_wq; 204 205 #endif /* CONFIG_BLK_DEV_ZONED */ 205 206 ··· 368 367 unsigned int max_sectors; 369 368 unsigned int max_user_sectors; 370 369 unsigned int max_segment_size; 370 + unsigned int min_segment_size; 371 371 unsigned int physical_block_size; 372 372 unsigned int logical_block_size; 373 373 unsigned int alignment_offset;

Configure Feed

Configure Feed