Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"A few more fixes to zoned mode and one regression fix for chunk limit:

- Zoned mode fixes:
- fix how wait/wake up is done when finishing zone
- fix zone append limit in emulated mode
- fix mount on devices with conventional zones

- fix regression, user settable data chunk limit got accidentally
lowered and causes allocation problems on some profiles (raid0,
raid1)"

* tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix the max chunk size and stripe length calculation
btrfs: zoned: fix mounting with conventional zones
btrfs: zoned: set pseudo max append zone limit in zone emulation mode
btrfs: zoned: fix API misuse of zone finish waiting

+60 -54
-2
fs/btrfs/ctree.h
··· 1088 1088 1089 1089 spinlock_t zone_active_bgs_lock; 1090 1090 struct list_head zone_active_bgs; 1091 - /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */ 1092 - wait_queue_head_t zone_finish_wait; 1093 1091 1094 1092 /* Updates are not protected by any lock */ 1095 1093 struct btrfs_commit_stats commit_stats;
-1
fs/btrfs/disk-io.c
··· 3068 3068 init_waitqueue_head(&fs_info->transaction_blocked_wait); 3069 3069 init_waitqueue_head(&fs_info->async_submit_wait); 3070 3070 init_waitqueue_head(&fs_info->delayed_iputs_wait); 3071 - init_waitqueue_head(&fs_info->zone_finish_wait); 3072 3071 3073 3072 /* Usable values until the real ones are cached from the superblock */ 3074 3073 fs_info->nodesize = 4096;
+3 -4
fs/btrfs/inode.c
··· 1644 1644 done_offset = end; 1645 1645 1646 1646 if (done_offset == start) { 1647 - struct btrfs_fs_info *info = inode->root->fs_info; 1648 - 1649 - wait_var_event(&info->zone_finish_wait, 1650 - !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags)); 1647 + wait_on_bit_io(&inode->root->fs_info->flags, 1648 + BTRFS_FS_NEED_ZONE_FINISH, 1649 + TASK_UNINTERRUPTIBLE); 1651 1650 continue; 1652 1651 } 1653 1652
+1 -1
fs/btrfs/space-info.c
··· 199 199 ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK); 200 200 201 201 if (flags & BTRFS_BLOCK_GROUP_DATA) 202 - return SZ_1G; 202 + return BTRFS_MAX_DATA_CHUNK_SIZE; 203 203 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 204 204 return SZ_32M; 205 205
+3
fs/btrfs/volumes.c
··· 5267 5267 ctl->stripe_size); 5268 5268 } 5269 5269 5270 + /* Stripe size should not go beyond 1G. */ 5271 + ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G); 5272 + 5270 5273 /* Align to BTRFS_STRIPE_LEN */ 5271 5274 ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN); 5272 5275 ctl->chunk_size = ctl->stripe_size * data_stripes;
+53 -46
fs/btrfs/zoned.c
··· 421 421 * since btrfs adds the pages one by one to a bio, and btrfs cannot 422 422 * increase the metadata reservation even if it increases the number of 423 423 * extents, it is safe to stick with the limit. 424 + * 425 + * With the zoned emulation, we can have non-zoned device on the zoned 426 + * mode. In this case, we don't have a valid max zone append size. So, 427 + * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size. 424 428 */ 425 - zone_info->max_zone_append_size = 426 - min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, 427 - (u64)bdev_max_segments(bdev) << PAGE_SHIFT); 429 + if (bdev_is_zoned(bdev)) { 430 + zone_info->max_zone_append_size = min_t(u64, 431 + (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, 432 + (u64)bdev_max_segments(bdev) << PAGE_SHIFT); 433 + } else { 434 + zone_info->max_zone_append_size = 435 + (u64)bdev_max_segments(bdev) << PAGE_SHIFT; 436 + } 428 437 if (!IS_ALIGNED(nr_sectors, zone_sectors)) 429 438 zone_info->nr_zones++; 430 439 ··· 1187 1178 * offset. 1188 1179 */ 1189 1180 static int calculate_alloc_pointer(struct btrfs_block_group *cache, 1190 - u64 *offset_ret) 1181 + u64 *offset_ret, bool new) 1191 1182 { 1192 1183 struct btrfs_fs_info *fs_info = cache->fs_info; 1193 1184 struct btrfs_root *root; ··· 1196 1187 struct btrfs_key found_key; 1197 1188 int ret; 1198 1189 u64 length; 1190 + 1191 + /* 1192 + * Avoid tree lookups for a new block group, there's no use for it. 1193 + * It must always be 0. 1194 + * 1195 + * Also, we have a lock chain of extent buffer lock -> chunk mutex. 1196 + * For new a block group, this function is called from 1197 + * btrfs_make_block_group() which is already taking the chunk mutex. 1198 + * Thus, we cannot call calculate_alloc_pointer() which takes extent 1199 + * buffer locks to avoid deadlock. 1200 + */ 1201 + if (new) { 1202 + *offset_ret = 0; 1203 + return 0; 1204 + } 1199 1205 1200 1206 path = btrfs_alloc_path(); 1201 1207 if (!path) ··· 1347 1323 else 1348 1324 num_conventional++; 1349 1325 1326 + /* 1327 + * Consider a zone as active if we can allow any number of 1328 + * active zones. 1329 + */ 1330 + if (!device->zone_info->max_active_zones) 1331 + __set_bit(i, active); 1332 + 1350 1333 if (!is_sequential) { 1351 1334 alloc_offsets[i] = WP_CONVENTIONAL; 1352 1335 continue; ··· 1420 1389 __set_bit(i, active); 1421 1390 break; 1422 1391 } 1423 - 1424 - /* 1425 - * Consider a zone as active if we can allow any number of 1426 - * active zones. 1427 - */ 1428 - if (!device->zone_info->max_active_zones) 1429 - __set_bit(i, active); 1430 1392 } 1431 1393 1432 1394 if (num_sequential > 0) 1433 1395 cache->seq_zone = true; 1434 1396 1435 1397 if (num_conventional > 0) { 1436 - /* 1437 - * Avoid calling calculate_alloc_pointer() for new BG. It 1438 - * is no use for new BG. It must be always 0. 1439 - * 1440 - * Also, we have a lock chain of extent buffer lock -> 1441 - * chunk mutex. For new BG, this function is called from 1442 - * btrfs_make_block_group() which is already taking the 1443 - * chunk mutex. Thus, we cannot call 1444 - * calculate_alloc_pointer() which takes extent buffer 1445 - * locks to avoid deadlock. 1446 - */ 1447 - 1448 1398 /* Zone capacity is always zone size in emulation */ 1449 1399 cache->zone_capacity = cache->length; 1450 - if (new) { 1451 - cache->alloc_offset = 0; 1452 - goto out; 1453 - } 1454 - ret = calculate_alloc_pointer(cache, &last_alloc); 1455 - if (ret || map->num_stripes == num_conventional) { 1456 - if (!ret) 1457 - cache->alloc_offset = last_alloc; 1458 - else 1459 - btrfs_err(fs_info, 1400 + ret = calculate_alloc_pointer(cache, &last_alloc, new); 1401 + if (ret) { 1402 + btrfs_err(fs_info, 1460 1403 "zoned: failed to determine allocation offset of bg %llu", 1461 - cache->start); 1404 + cache->start); 1405 + goto out; 1406 + } else if (map->num_stripes == num_conventional) { 1407 + cache->alloc_offset = last_alloc; 1408 + cache->zone_is_active = 1; 1462 1409 goto out; 1463 1410 } 1464 1411 } ··· 1504 1495 goto out; 1505 1496 } 1506 1497 1507 - if (cache->zone_is_active) { 1508 - btrfs_get_block_group(cache); 1509 - spin_lock(&fs_info->zone_active_bgs_lock); 1510 - list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs); 1511 - spin_unlock(&fs_info->zone_active_bgs_lock); 1512 - } 1513 - 1514 1498 out: 1515 1499 if (cache->alloc_offset > fs_info->zone_size) { 1516 1500 btrfs_err(fs_info, ··· 1528 1526 ret = -EIO; 1529 1527 } 1530 1528 1531 - if (!ret) 1529 + if (!ret) { 1532 1530 cache->meta_write_pointer = cache->alloc_offset + cache->start; 1533 - 1534 - if (ret) { 1531 + if (cache->zone_is_active) { 1532 + btrfs_get_block_group(cache); 1533 + spin_lock(&fs_info->zone_active_bgs_lock); 1534 + list_add_tail(&cache->active_bg_list, 1535 + &fs_info->zone_active_bgs); 1536 + spin_unlock(&fs_info->zone_active_bgs_lock); 1537 + } 1538 + } else { 1535 1539 kfree(cache->physical_map); 1536 1540 cache->physical_map = NULL; 1537 1541 } ··· 2015 2007 /* For active_bg_list */ 2016 2008 btrfs_put_block_group(block_group); 2017 2009 2018 - clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); 2019 - wake_up_all(&fs_info->zone_finish_wait); 2010 + clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); 2020 2011 2021 2012 return 0; 2022 2013 }