Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"A few fixes and error handling improvements:

- fix deadlock between quota disable and qgroup rescan worker

- fix use-after-free after failure to create a snapshot

- skip warning on unmount after log cleanup failure

- don't start transaction for scrub if the fs is mounted read-only

- tree checker verifies item sizes"

* tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: skip reserved bytes warning on unmount after log cleanup failure
btrfs: fix use of uninitialized variable at rm device ioctl
btrfs: fix use-after-free after failure to create a snapshot
btrfs: tree-checker: check item_size for dev_item
btrfs: tree-checker: check item_size for inode_item
btrfs: fix deadlock between quota disable and qgroup rescan worker
btrfs: don't start transaction for scrub if the fs is mounted read-only

+128 -9
+37 -2
fs/btrfs/block-group.c
··· 124 124 { 125 125 if (refcount_dec_and_test(&cache->refs)) { 126 126 WARN_ON(cache->pinned > 0); 127 - WARN_ON(cache->reserved > 0); 127 + /* 128 + * If there was a failure to cleanup a log tree, very likely due 129 + * to an IO failure on a writeback attempt of one or more of its 130 + * extent buffers, we could not do proper (and cheap) unaccounting 131 + * of their reserved space, so don't warn on reserved > 0 in that 132 + * case. 133 + */ 134 + if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) || 135 + !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info)) 136 + WARN_ON(cache->reserved > 0); 128 137 129 138 /* 130 139 * A block_group shouldn't be on the discard_list anymore. ··· 2553 2544 int ret; 2554 2545 bool dirty_bg_running; 2555 2546 2547 + /* 2548 + * This can only happen when we are doing read-only scrub on read-only 2549 + * mount. 2550 + * In that case we should not start a new transaction on read-only fs. 2551 + * Thus here we skip all chunk allocations. 2552 + */ 2553 + if (sb_rdonly(fs_info->sb)) { 2554 + mutex_lock(&fs_info->ro_block_group_mutex); 2555 + ret = inc_block_group_ro(cache, 0); 2556 + mutex_unlock(&fs_info->ro_block_group_mutex); 2557 + return ret; 2558 + } 2559 + 2556 2560 do { 2557 2561 trans = btrfs_join_transaction(root); 2558 2562 if (IS_ERR(trans)) ··· 3996 3974 * important and indicates a real bug if this happens. 3997 3975 */ 3998 3976 if (WARN_ON(space_info->bytes_pinned > 0 || 3999 - space_info->bytes_reserved > 0 || 4000 3977 space_info->bytes_may_use > 0)) 4001 3978 btrfs_dump_space_info(info, space_info, 0, 0); 3979 + 3980 + /* 3981 + * If there was a failure to cleanup a log tree, very likely due 3982 + * to an IO failure on a writeback attempt of one or more of its 3983 + * extent buffers, we could not do proper (and cheap) unaccounting 3984 + * of their reserved space, so don't warn on bytes_reserved > 0 in 3985 + * that case. 3986 + */ 3987 + if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || 3988 + !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { 3989 + if (WARN_ON(space_info->bytes_reserved > 0)) 3990 + btrfs_dump_space_info(info, space_info, 0, 0); 3991 + } 3992 + 4002 3993 WARN_ON(space_info->reclaim_size > 0); 4003 3994 list_del(&space_info->list); 4004 3995 btrfs_sysfs_remove_space_info(space_info);
+6
fs/btrfs/ctree.h
··· 145 145 BTRFS_FS_STATE_DUMMY_FS_INFO, 146 146 147 147 BTRFS_FS_STATE_NO_CSUMS, 148 + 149 + /* Indicates there was an error cleaning up a log tree. */ 150 + BTRFS_FS_STATE_LOG_CLEANUP_ERROR, 148 151 }; 149 152 150 153 #define BTRFS_BACKREF_REV_MAX 256 ··· 3596 3593 3597 3594 #define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ 3598 3595 &(fs_info)->fs_state))) 3596 + #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ 3597 + (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ 3598 + &(fs_info)->fs_state))) 3599 3599 3600 3600 __printf(5, 6) 3601 3601 __cold
+2 -5
fs/btrfs/ioctl.c
··· 805 805 goto fail; 806 806 } 807 807 808 - spin_lock(&fs_info->trans_lock); 809 - list_add(&pending_snapshot->list, 810 - &trans->transaction->pending_snapshots); 811 - spin_unlock(&fs_info->trans_lock); 808 + trans->pending_snapshot = pending_snapshot; 812 809 813 810 ret = btrfs_commit_transaction(trans); 814 811 if (ret) ··· 3351 3354 struct block_device *bdev = NULL; 3352 3355 fmode_t mode; 3353 3356 int ret; 3354 - bool cancel; 3357 + bool cancel = false; 3355 3358 3356 3359 if (!capable(CAP_SYS_ADMIN)) 3357 3360 return -EPERM;
+19 -2
fs/btrfs/qgroup.c
··· 1185 1185 struct btrfs_trans_handle *trans = NULL; 1186 1186 int ret = 0; 1187 1187 1188 + /* 1189 + * We need to have subvol_sem write locked, to prevent races between 1190 + * concurrent tasks trying to disable quotas, because we will unlock 1191 + * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. 1192 + */ 1193 + lockdep_assert_held_write(&fs_info->subvol_sem); 1194 + 1188 1195 mutex_lock(&fs_info->qgroup_ioctl_lock); 1189 1196 if (!fs_info->quota_root) 1190 1197 goto out; 1198 + 1199 + /* 1200 + * Request qgroup rescan worker to complete and wait for it. This wait 1201 + * must be done before transaction start for quota disable since it may 1202 + * deadlock with transaction by the qgroup rescan worker. 1203 + */ 1204 + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1205 + btrfs_qgroup_wait_for_completion(fs_info, false); 1191 1206 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1192 1207 1193 1208 /* ··· 1220 1205 if (IS_ERR(trans)) { 1221 1206 ret = PTR_ERR(trans); 1222 1207 trans = NULL; 1208 + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1223 1209 goto out; 1224 1210 } 1225 1211 1226 1212 if (!fs_info->quota_root) 1227 1213 goto out; 1228 1214 1229 - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1230 - btrfs_qgroup_wait_for_completion(fs_info, false); 1231 1215 spin_lock(&fs_info->qgroup_lock); 1232 1216 quota_root = fs_info->quota_root; 1233 1217 fs_info->quota_root = NULL; ··· 3397 3383 btrfs_warn(fs_info, 3398 3384 "qgroup rescan init failed, qgroup is not enabled"); 3399 3385 ret = -EINVAL; 3386 + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 3387 + /* Quota disable is in progress */ 3388 + ret = -EBUSY; 3400 3389 } 3401 3390 3402 3391 if (ret) {
+24
fs/btrfs/transaction.c
··· 2000 2000 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); 2001 2001 } 2002 2002 2003 + /* 2004 + * Add a pending snapshot associated with the given transaction handle to the 2005 + * respective handle. This must be called after the transaction commit started 2006 + * and while holding fs_info->trans_lock. 2007 + * This serves to guarantee a caller of btrfs_commit_transaction() that it can 2008 + * safely free the pending snapshot pointer in case btrfs_commit_transaction() 2009 + * returns an error. 2010 + */ 2011 + static void add_pending_snapshot(struct btrfs_trans_handle *trans) 2012 + { 2013 + struct btrfs_transaction *cur_trans = trans->transaction; 2014 + 2015 + if (!trans->pending_snapshot) 2016 + return; 2017 + 2018 + lockdep_assert_held(&trans->fs_info->trans_lock); 2019 + ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START); 2020 + 2021 + list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); 2022 + } 2023 + 2003 2024 int btrfs_commit_transaction(struct btrfs_trans_handle *trans) 2004 2025 { 2005 2026 struct btrfs_fs_info *fs_info = trans->fs_info; ··· 2093 2072 spin_lock(&fs_info->trans_lock); 2094 2073 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 2095 2074 enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; 2075 + 2076 + add_pending_snapshot(trans); 2096 2077 2097 2078 spin_unlock(&fs_info->trans_lock); 2098 2079 refcount_inc(&cur_trans->use_count); ··· 2186 2163 * COMMIT_DOING so make sure to wait for num_writers to == 1 again. 2187 2164 */ 2188 2165 spin_lock(&fs_info->trans_lock); 2166 + add_pending_snapshot(trans); 2189 2167 cur_trans->state = TRANS_STATE_COMMIT_DOING; 2190 2168 spin_unlock(&fs_info->trans_lock); 2191 2169 wait_event(cur_trans->writer_wait,
+2
fs/btrfs/transaction.h
··· 123 123 struct btrfs_transaction *transaction; 124 124 struct btrfs_block_rsv *block_rsv; 125 125 struct btrfs_block_rsv *orig_rsv; 126 + /* Set by a task that wants to create a snapshot. */ 127 + struct btrfs_pending_snapshot *pending_snapshot; 126 128 refcount_t use_count; 127 129 unsigned int type; 128 130 /*
+15
fs/btrfs/tree-checker.c
··· 965 965 struct btrfs_key *key, int slot) 966 966 { 967 967 struct btrfs_dev_item *ditem; 968 + const u32 item_size = btrfs_item_size(leaf, slot); 968 969 969 970 if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) { 970 971 dev_item_err(leaf, slot, ··· 973 972 key->objectid, BTRFS_DEV_ITEMS_OBJECTID); 974 973 return -EUCLEAN; 975 974 } 975 + 976 + if (unlikely(item_size != sizeof(*ditem))) { 977 + dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", 978 + item_size, sizeof(*ditem)); 979 + return -EUCLEAN; 980 + } 981 + 976 982 ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); 977 983 if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) { 978 984 dev_item_err(leaf, slot, ··· 1015 1007 struct btrfs_inode_item *iitem; 1016 1008 u64 super_gen = btrfs_super_generation(fs_info->super_copy); 1017 1009 u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); 1010 + const u32 item_size = btrfs_item_size(leaf, slot); 1018 1011 u32 mode; 1019 1012 int ret; 1020 1013 u32 flags; ··· 1024 1015 ret = check_inode_key(leaf, key, slot); 1025 1016 if (unlikely(ret < 0)) 1026 1017 return ret; 1018 + 1019 + if (unlikely(item_size != sizeof(*iitem))) { 1020 + generic_err(leaf, slot, "invalid item size: has %u expect %zu", 1021 + item_size, sizeof(*iitem)); 1022 + return -EUCLEAN; 1023 + } 1027 1024 1028 1025 iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); 1029 1026
+23
fs/btrfs/tree-log.c
··· 3414 3414 if (log->node) { 3415 3415 ret = walk_log_tree(trans, log, &wc); 3416 3416 if (ret) { 3417 + /* 3418 + * We weren't able to traverse the entire log tree, the 3419 + * typical scenario is getting an -EIO when reading an 3420 + * extent buffer of the tree, due to a previous writeback 3421 + * failure of it. 3422 + */ 3423 + set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, 3424 + &log->fs_info->fs_state); 3425 + 3426 + /* 3427 + * Some extent buffers of the log tree may still be dirty 3428 + * and not yet written back to storage, because we may 3429 + * have updates to a log tree without syncing a log tree, 3430 + * such as during rename and link operations. So flush 3431 + * them out and wait for their writeback to complete, so 3432 + * that we properly cleanup their state and pages. 3433 + */ 3434 + btrfs_write_marked_extents(log->fs_info, 3435 + &log->dirty_log_pages, 3436 + EXTENT_DIRTY | EXTENT_NEW); 3437 + btrfs_wait_tree_log_extents(log, 3438 + EXTENT_DIRTY | EXTENT_NEW); 3439 + 3417 3440 if (trans) 3418 3441 btrfs_abort_transaction(trans, ret); 3419 3442 else