Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"A few fixes and one patch to help some block layer API cleanups:

- skip missing device when running fstrim

- fix unpersisted i_size on fsync after expanding truncate

- fix lock inversion problem when doing qgroup extent tracing

- replace bdgrab/bdput usage, replace gendisk by block_device"

* tag 'for-5.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: store a block_device in struct btrfs_ordered_extent
btrfs: fix lock inversion problem when doing qgroup extent tracing
btrfs: check for missing device in btrfs_trim_fs
btrfs: fix unpersisted i_size on fsync after expanding truncate

+79 -47
+3 -3
fs/btrfs/backref.c
··· 1488 1488 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1489 1489 struct btrfs_fs_info *fs_info, u64 bytenr, 1490 1490 u64 time_seq, struct ulist **roots, 1491 - bool ignore_offset) 1491 + bool ignore_offset, bool skip_commit_root_sem) 1492 1492 { 1493 1493 int ret; 1494 1494 1495 - if (!trans) 1495 + if (!trans && !skip_commit_root_sem) 1496 1496 down_read(&fs_info->commit_root_sem); 1497 1497 ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, 1498 1498 time_seq, roots, ignore_offset); 1499 - if (!trans) 1499 + if (!trans && !skip_commit_root_sem) 1500 1500 up_read(&fs_info->commit_root_sem); 1501 1501 return ret; 1502 1502 }
+2 -1
fs/btrfs/backref.h
··· 47 47 const u64 *extent_item_pos, bool ignore_offset); 48 48 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 49 49 struct btrfs_fs_info *fs_info, u64 bytenr, 50 - u64 time_seq, struct ulist **roots, bool ignore_offset); 50 + u64 time_seq, struct ulist **roots, bool ignore_offset, 51 + bool skip_commit_root_sem); 51 52 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 52 53 u32 name_len, unsigned long name_off, 53 54 struct extent_buffer *eb_in, u64 parent,
+2 -2
fs/btrfs/delayed-ref.c
··· 974 974 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 975 975 976 976 if (qrecord_inserted) 977 - btrfs_qgroup_trace_extent_post(fs_info, record); 977 + btrfs_qgroup_trace_extent_post(trans, record); 978 978 979 979 return 0; 980 980 } ··· 1069 1069 1070 1070 1071 1071 if (qrecord_inserted) 1072 - return btrfs_qgroup_trace_extent_post(fs_info, record); 1072 + return btrfs_qgroup_trace_extent_post(trans, record); 1073 1073 return 0; 1074 1074 } 1075 1075
+3
fs/btrfs/extent-tree.c
··· 6019 6019 mutex_lock(&fs_info->fs_devices->device_list_mutex); 6020 6020 devices = &fs_info->fs_devices->devices; 6021 6021 list_for_each_entry(device, devices, dev_list) { 6022 + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 6023 + continue; 6024 + 6022 6025 ret = btrfs_trim_free_extents(device, &group_trimmed); 6023 6026 if (ret) { 6024 6027 dev_failed++;
+1 -1
fs/btrfs/inode.c
··· 2992 2992 goto out; 2993 2993 } 2994 2994 2995 - if (ordered_extent->disk) 2995 + if (ordered_extent->bdev) 2996 2996 btrfs_rewrite_logical_zoned(ordered_extent); 2997 2997 2998 2998 btrfs_free_io_failure_record(inode, start, end);
-2
fs/btrfs/ordered-data.c
··· 190 190 entry->truncated_len = (u64)-1; 191 191 entry->qgroup_rsv = ret; 192 192 entry->physical = (u64)-1; 193 - entry->disk = NULL; 194 - entry->partno = (u8)-1; 195 193 196 194 ASSERT(type == BTRFS_ORDERED_REGULAR || 197 195 type == BTRFS_ORDERED_NOCOW ||
+1 -2
fs/btrfs/ordered-data.h
··· 145 145 * command in a workqueue context 146 146 */ 147 147 u64 physical; 148 - struct gendisk *disk; 149 - u8 partno; 148 + struct block_device *bdev; 150 149 }; 151 150 152 151 /*
+30 -8
fs/btrfs/qgroup.c
··· 1704 1704 return 0; 1705 1705 } 1706 1706 1707 - int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1707 + int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, 1708 1708 struct btrfs_qgroup_extent_record *qrecord) 1709 1709 { 1710 1710 struct ulist *old_root; 1711 1711 u64 bytenr = qrecord->bytenr; 1712 1712 int ret; 1713 1713 1714 - ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); 1714 + /* 1715 + * We are always called in a context where we are already holding a 1716 + * transaction handle. Often we are called when adding a data delayed 1717 + * reference from btrfs_truncate_inode_items() (truncating or unlinking), 1718 + * in which case we will be holding a write lock on extent buffer from a 1719 + * subvolume tree. In this case we can't allow btrfs_find_all_roots() to 1720 + * acquire fs_info->commit_root_sem, because that is a higher level lock 1721 + * that must be acquired before locking any extent buffers. 1722 + * 1723 + * So we want btrfs_find_all_roots() to not acquire the commit_root_sem 1724 + * but we can't pass it a non-NULL transaction handle, because otherwise 1725 + * it would not use commit roots and would lock extent buffers, causing 1726 + * a deadlock if it ends up trying to read lock the same extent buffer 1727 + * that was previously write locked at btrfs_truncate_inode_items(). 1728 + * 1729 + * So pass a NULL transaction handle to btrfs_find_all_roots() and 1730 + * explicitly tell it to not acquire the commit_root_sem - if we are 1731 + * holding a transaction handle we don't need its protection. 1732 + */ 1733 + ASSERT(trans != NULL); 1734 + 1735 + ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, 1736 + false, true); 1715 1737 if (ret < 0) { 1716 - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1717 - btrfs_warn(fs_info, 1738 + trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1739 + btrfs_warn(trans->fs_info, 1718 1740 "error accounting new delayed refs extent (err code: %d), quota inconsistent", 1719 1741 ret); 1720 1742 return 0; ··· 1780 1758 kfree(record); 1781 1759 return 0; 1782 1760 } 1783 - return btrfs_qgroup_trace_extent_post(fs_info, record); 1761 + return btrfs_qgroup_trace_extent_post(trans, record); 1784 1762 } 1785 1763 1786 1764 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, ··· 2651 2629 /* Search commit root to find old_roots */ 2652 2630 ret = btrfs_find_all_roots(NULL, fs_info, 2653 2631 record->bytenr, 0, 2654 - &record->old_roots, false); 2632 + &record->old_roots, false, false); 2655 2633 if (ret < 0) 2656 2634 goto cleanup; 2657 2635 } ··· 2667 2645 * current root. It's safe inside commit_transaction(). 2668 2646 */ 2669 2647 ret = btrfs_find_all_roots(trans, fs_info, 2670 - record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); 2648 + record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false); 2671 2649 if (ret < 0) 2672 2650 goto cleanup; 2673 2651 if (qgroup_to_skip) { ··· 3201 3179 num_bytes = found.offset; 3202 3180 3203 3181 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 3204 - &roots, false); 3182 + &roots, false, false); 3205 3183 if (ret < 0) 3206 3184 goto out; 3207 3185 /* For rescan, just pass old_roots as NULL */
+1 -1
fs/btrfs/qgroup.h
··· 298 298 * using current root, then we can move all expensive backref walk out of 299 299 * transaction committing, but not now as qgroup accounting will be wrong again. 300 300 */ 301 - int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 301 + int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, 302 302 struct btrfs_qgroup_extent_record *qrecord); 303 303 304 304 /*
+10 -10
fs/btrfs/tests/qgroup-tests.c
··· 224 224 * quota. 225 225 */ 226 226 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, 227 - false); 227 + false, false); 228 228 if (ret) { 229 229 ulist_free(old_roots); 230 230 test_err("couldn't find old roots: %d", ret); ··· 237 237 return ret; 238 238 239 239 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, 240 - false); 240 + false, false); 241 241 if (ret) { 242 242 ulist_free(old_roots); 243 243 ulist_free(new_roots); ··· 261 261 new_roots = NULL; 262 262 263 263 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, 264 - false); 264 + false, false); 265 265 if (ret) { 266 266 ulist_free(old_roots); 267 267 test_err("couldn't find old roots: %d", ret); ··· 273 273 return -EINVAL; 274 274 275 275 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, 276 - false); 276 + false, false); 277 277 if (ret) { 278 278 ulist_free(old_roots); 279 279 ulist_free(new_roots); ··· 325 325 } 326 326 327 327 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, 328 - false); 328 + false, false); 329 329 if (ret) { 330 330 ulist_free(old_roots); 331 331 test_err("couldn't find old roots: %d", ret); ··· 338 338 return ret; 339 339 340 340 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, 341 - false); 341 + false, false); 342 342 if (ret) { 343 343 ulist_free(old_roots); 344 344 ulist_free(new_roots); ··· 360 360 } 361 361 362 362 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, 363 - false); 363 + false, false); 364 364 if (ret) { 365 365 ulist_free(old_roots); 366 366 test_err("couldn't find old roots: %d", ret); ··· 373 373 return ret; 374 374 375 375 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, 376 - false); 376 + false, false); 377 377 if (ret) { 378 378 ulist_free(old_roots); 379 379 ulist_free(new_roots); ··· 401 401 } 402 402 403 403 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, 404 - false); 404 + false, false); 405 405 if (ret) { 406 406 ulist_free(old_roots); 407 407 test_err("couldn't find old roots: %d", ret); ··· 414 414 return ret; 415 415 416 416 ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, 417 - false); 417 + false, false); 418 418 if (ret) { 419 419 ulist_free(old_roots); 420 420 ulist_free(new_roots);
+22 -9
fs/btrfs/tree-log.c
··· 5526 5526 spin_lock(&inode->lock); 5527 5527 inode->logged_trans = trans->transid; 5528 5528 /* 5529 - * Don't update last_log_commit if we logged that an inode exists 5530 - * after it was loaded to memory (full_sync bit set). 5531 - * This is to prevent data loss when we do a write to the inode, 5532 - * then the inode gets evicted after all delalloc was flushed, 5533 - * then we log it exists (due to a rename for example) and then 5534 - * fsync it. This last fsync would do nothing (not logging the 5535 - * extents previously written). 5529 + * Don't update last_log_commit if we logged that an inode exists. 5530 + * We do this for two reasons: 5531 + * 5532 + * 1) We might have had buffered writes to this inode that were 5533 + * flushed and had their ordered extents completed in this 5534 + * transaction, but we did not previously log the inode with 5535 + * LOG_INODE_ALL. Later the inode was evicted and after that 5536 + * it was loaded again and this LOG_INODE_EXISTS log operation 5537 + * happened. We must make sure that if an explicit fsync against 5538 + * the inode is performed later, it logs the new extents, an 5539 + * updated inode item, etc, and syncs the log. The same logic 5540 + * applies to direct IO writes instead of buffered writes. 5541 + * 5542 + * 2) When we log the inode with LOG_INODE_EXISTS, its inode item 5543 + * is logged with an i_size of 0 or whatever value was logged 5544 + * before. If later the i_size of the inode is increased by a 5545 + * truncate operation, the log is synced through an fsync of 5546 + * some other inode and then finally an explicit fsync against 5547 + * this inode is made, we must make sure this fsync logs the 5548 + * inode with the new i_size, the hole between old i_size and 5549 + * the new i_size, and syncs the log. 5536 5550 */ 5537 - if (inode_only != LOG_INODE_EXISTS || 5538 - !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) 5551 + if (inode_only != LOG_INODE_EXISTS) 5539 5552 inode->last_log_commit = inode->last_sub_trans; 5540 5553 spin_unlock(&inode->lock); 5541 5554 }
+4 -8
fs/btrfs/zoned.c
··· 1349 1349 return; 1350 1350 1351 1351 ordered->physical = physical; 1352 - ordered->disk = bio->bi_bdev->bd_disk; 1353 - ordered->partno = bio->bi_bdev->bd_partno; 1352 + ordered->bdev = bio->bi_bdev; 1354 1353 1355 1354 btrfs_put_ordered_extent(ordered); 1356 1355 } ··· 1361 1362 struct extent_map_tree *em_tree; 1362 1363 struct extent_map *em; 1363 1364 struct btrfs_ordered_sum *sum; 1364 - struct block_device *bdev; 1365 1365 u64 orig_logical = ordered->disk_bytenr; 1366 1366 u64 *logical = NULL; 1367 1367 int nr, stripe_len; 1368 1368 1369 1369 /* Zoned devices should not have partitions. So, we can assume it is 0 */ 1370 - ASSERT(ordered->partno == 0); 1371 - bdev = bdgrab(ordered->disk->part0); 1372 - if (WARN_ON(!bdev)) 1370 + ASSERT(!bdev_is_partition(ordered->bdev)); 1371 + if (WARN_ON(!ordered->bdev)) 1373 1372 return; 1374 1373 1375 - if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev, 1374 + if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev, 1376 1375 ordered->physical, &logical, &nr, 1377 1376 &stripe_len))) 1378 1377 goto out; ··· 1399 1402 1400 1403 out: 1401 1404 kfree(logical); 1402 - bdput(bdev); 1403 1405 } 1404 1406 1405 1407 bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,