Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-5.10-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
"A few fixes for various warnings that accumulated over past two weeks:

- tree-checker: add missing return values for some errors

- lockdep fixes
- when reading qgroup config and starting quota rescan
- reverse order of quota ioctl lock and VFS freeze lock

- avoid accessing potentially stale fs info during device scan,
reported by syzbot

- add scope NOFS protection around qgroup relation changes

- check for running transaction before flushing qgroups

- fix tracking of new delalloc ranges for some cases"

* tag 'for-5.10-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: fix lockdep splat when enabling and disabling qgroups
btrfs: do nofs allocations when adding and removing qgroup relations
btrfs: fix lockdep splat when reading qgroup config on mount
btrfs: tree-checker: add missing returns after data_ref alignment checks
btrfs: don't access possibly stale fs_info data for printing duplicate device
btrfs: tree-checker: add missing return after error in root_item
btrfs: qgroup: don't commit transaction when we already hold the handle
btrfs: fix missing delalloc new bit for new delalloc ranges

+158 -73
+4 -1
fs/btrfs/ctree.h
··· 878 878 */ 879 879 struct ulist *qgroup_ulist; 880 880 881 - /* protect user change for quota operations */ 881 + /* 882 + * Protect user change for quota operations. If a transaction is needed, 883 + * it must be started before locking this lock. 884 + */ 882 885 struct mutex qgroup_ioctl_lock; 883 886 884 887 /* list of dirty qgroups to be written at next commit */
-57
fs/btrfs/file.c
··· 452 452 } 453 453 } 454 454 455 - static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, 456 - const u64 start, 457 - const u64 len, 458 - struct extent_state **cached_state) 459 - { 460 - u64 search_start = start; 461 - const u64 end = start + len - 1; 462 - 463 - while (search_start < end) { 464 - const u64 search_len = end - search_start + 1; 465 - struct extent_map *em; 466 - u64 em_len; 467 - int ret = 0; 468 - 469 - em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); 470 - if (IS_ERR(em)) 471 - return PTR_ERR(em); 472 - 473 - if (em->block_start != EXTENT_MAP_HOLE) 474 - goto next; 475 - 476 - em_len = em->len; 477 - if (em->start < search_start) 478 - em_len -= search_start - em->start; 479 - if (em_len > search_len) 480 - em_len = search_len; 481 - 482 - ret = set_extent_bit(&inode->io_tree, search_start, 483 - search_start + em_len - 1, 484 - EXTENT_DELALLOC_NEW, 485 - NULL, cached_state, GFP_NOFS); 486 - next: 487 - search_start = extent_map_end(em); 488 - free_extent_map(em); 489 - if (ret) 490 - return ret; 491 - } 492 - return 0; 493 - } 494 - 495 455 /* 496 456 * after copy_from_user, pages need to be dirtied and we need to make 497 457 * sure holes are created between the current EOF and the start of ··· 487 527 clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block, 488 528 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 489 529 0, 0, cached); 490 - 491 - if (!btrfs_is_free_space_inode(inode)) { 492 - if (start_pos >= isize && 493 - !(inode->flags & BTRFS_INODE_PREALLOC)) { 494 - /* 495 - * There can't be any extents following eof in this case 496 - * so just set the delalloc new bit for the range 497 - * directly. 498 - */ 499 - extra_bits |= EXTENT_DELALLOC_NEW; 500 - } else { 501 - err = btrfs_find_new_delalloc_bytes(inode, start_pos, 502 - num_bytes, cached); 503 - if (err) 504 - return err; 505 - } 506 - } 507 530 508 531 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 509 532 extra_bits, cached);
+58
fs/btrfs/inode.c
··· 2253 2253 return 0; 2254 2254 } 2255 2255 2256 + static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, 2257 + const u64 start, 2258 + const u64 len, 2259 + struct extent_state **cached_state) 2260 + { 2261 + u64 search_start = start; 2262 + const u64 end = start + len - 1; 2263 + 2264 + while (search_start < end) { 2265 + const u64 search_len = end - search_start + 1; 2266 + struct extent_map *em; 2267 + u64 em_len; 2268 + int ret = 0; 2269 + 2270 + em = btrfs_get_extent(inode, NULL, 0, search_start, search_len); 2271 + if (IS_ERR(em)) 2272 + return PTR_ERR(em); 2273 + 2274 + if (em->block_start != EXTENT_MAP_HOLE) 2275 + goto next; 2276 + 2277 + em_len = em->len; 2278 + if (em->start < search_start) 2279 + em_len -= search_start - em->start; 2280 + if (em_len > search_len) 2281 + em_len = search_len; 2282 + 2283 + ret = set_extent_bit(&inode->io_tree, search_start, 2284 + search_start + em_len - 1, 2285 + EXTENT_DELALLOC_NEW, 2286 + NULL, cached_state, GFP_NOFS); 2287 + next: 2288 + search_start = extent_map_end(em); 2289 + free_extent_map(em); 2290 + if (ret) 2291 + return ret; 2292 + } 2293 + return 0; 2294 + } 2295 + 2256 2296 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, 2257 2297 unsigned int extra_bits, 2258 2298 struct extent_state **cached_state) 2259 2299 { 2260 2300 WARN_ON(PAGE_ALIGNED(end)); 2301 + 2302 + if (start >= i_size_read(&inode->vfs_inode) && 2303 + !(inode->flags & BTRFS_INODE_PREALLOC)) { 2304 + /* 2305 + * There can't be any extents following eof in this case so just 2306 + * set the delalloc new bit for the range directly. 2307 + */ 2308 + extra_bits |= EXTENT_DELALLOC_NEW; 2309 + } else { 2310 + int ret; 2311 + 2312 + ret = btrfs_find_new_delalloc_bytes(inode, start, 2313 + end + 1 - start, 2314 + cached_state); 2315 + if (ret) 2316 + return ret; 2317 + } 2318 + 2261 2319 return set_extent_delalloc(&inode->io_tree, start, end, extra_bits, 2262 2320 cached_state); 2263 2321 }
+78 -10
fs/btrfs/qgroup.c
··· 11 11 #include <linux/slab.h> 12 12 #include <linux/workqueue.h> 13 13 #include <linux/btrfs.h> 14 + #include <linux/sched/mm.h> 14 15 15 16 #include "ctree.h" 16 17 #include "transaction.h" ··· 498 497 break; 499 498 } 500 499 out: 500 + btrfs_free_path(path); 501 501 fs_info->qgroup_flags |= flags; 502 502 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 503 503 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 504 504 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 505 505 ret >= 0) 506 506 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 507 - btrfs_free_path(path); 508 507 509 508 if (ret < 0) { 510 509 ulist_free(fs_info->qgroup_ulist); ··· 937 936 struct btrfs_key found_key; 938 937 struct btrfs_qgroup *qgroup = NULL; 939 938 struct btrfs_trans_handle *trans = NULL; 939 + struct ulist *ulist = NULL; 940 940 int ret = 0; 941 941 int slot; 942 942 ··· 945 943 if (fs_info->quota_root) 946 944 goto out; 947 945 948 - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 949 - if (!fs_info->qgroup_ulist) { 946 + ulist = ulist_alloc(GFP_KERNEL); 947 + if (!ulist) { 950 948 ret = -ENOMEM; 951 949 goto out; 952 950 } ··· 954 952 ret = btrfs_sysfs_add_qgroups(fs_info); 955 953 if (ret < 0) 956 954 goto out; 955 + 956 + /* 957 + * Unlock qgroup_ioctl_lock before starting the transaction. This is to 958 + * avoid lock acquisition inversion problems (reported by lockdep) between 959 + * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we 960 + * start a transaction. 961 + * After we started the transaction lock qgroup_ioctl_lock again and 962 + * check if someone else created the quota root in the meanwhile. If so, 963 + * just return success and release the transaction handle. 964 + * 965 + * Also we don't need to worry about someone else calling 966 + * btrfs_sysfs_add_qgroups() after we unlock and getting an error because 967 + * that function returns 0 (success) when the sysfs entries already exist. 968 + */ 969 + mutex_unlock(&fs_info->qgroup_ioctl_lock); 970 + 957 971 /* 958 972 * 1 for quota root item 959 973 * 1 for BTRFS_QGROUP_STATUS item ··· 979 961 * would be a lot of overkill. 980 962 */ 981 963 trans = btrfs_start_transaction(tree_root, 2); 964 + 965 + mutex_lock(&fs_info->qgroup_ioctl_lock); 982 966 if (IS_ERR(trans)) { 983 967 ret = PTR_ERR(trans); 984 968 trans = NULL; 985 969 goto out; 986 970 } 971 + 972 + if (fs_info->quota_root) 973 + goto out; 974 + 975 + fs_info->qgroup_ulist = ulist; 976 + ulist = NULL; 987 977 988 978 /* 989 979 * initially create the quota tree ··· 1150 1124 if (ret) { 1151 1125 ulist_free(fs_info->qgroup_ulist); 1152 1126 fs_info->qgroup_ulist = NULL; 1153 - if (trans) 1154 - btrfs_end_transaction(trans); 1155 1127 btrfs_sysfs_del_qgroups(fs_info); 1156 1128 } 1157 1129 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1130 + if (ret && trans) 1131 + btrfs_end_transaction(trans); 1132 + else if (trans) 1133 + ret = btrfs_end_transaction(trans); 1134 + ulist_free(ulist); 1158 1135 return ret; 1159 1136 } 1160 1137 ··· 1170 1141 mutex_lock(&fs_info->qgroup_ioctl_lock); 1171 1142 if (!fs_info->quota_root) 1172 1143 goto out; 1144 + mutex_unlock(&fs_info->qgroup_ioctl_lock); 1173 1145 1174 1146 /* 1175 1147 * 1 For the root item 1176 1148 * 1177 1149 * We should also reserve enough items for the quota tree deletion in 1178 1150 * btrfs_clean_quota_tree but this is not done. 1151 + * 1152 + * Also, we must always start a transaction without holding the mutex 1153 + * qgroup_ioctl_lock, see btrfs_quota_enable(). 1179 1154 */ 1180 1155 trans = btrfs_start_transaction(fs_info->tree_root, 1); 1156 + 1157 + mutex_lock(&fs_info->qgroup_ioctl_lock); 1181 1158 if (IS_ERR(trans)) { 1182 1159 ret = PTR_ERR(trans); 1160 + trans = NULL; 1183 1161 goto out; 1184 1162 } 1163 + 1164 + if (!fs_info->quota_root) 1165 + goto out; 1185 1166 1186 1167 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1187 1168 btrfs_qgroup_wait_for_completion(fs_info, false); ··· 1206 1167 ret = btrfs_clean_quota_tree(trans, quota_root); 1207 1168 if (ret) { 1208 1169 btrfs_abort_transaction(trans, ret); 1209 - goto end_trans; 1170 + goto out; 1210 1171 } 1211 1172 1212 1173 ret = btrfs_del_root(trans, &quota_root->root_key); 1213 1174 if (ret) { 1214 1175 btrfs_abort_transaction(trans, ret); 1215 - goto end_trans; 1176 + goto out; 1216 1177 } 1217 1178 1218 1179 list_del(&quota_root->dirty_list); ··· 1224 1185 1225 1186 btrfs_put_root(quota_root); 1226 1187 1227 - end_trans: 1228 - ret = btrfs_end_transaction(trans); 1229 1188 out: 1230 1189 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1190 + if (ret && trans) 1191 + btrfs_end_transaction(trans); 1192 + else if (trans) 1193 + ret = btrfs_end_transaction(trans); 1194 + 1231 1195 return ret; 1232 1196 } 1233 1197 ··· 1366 1324 struct btrfs_qgroup *member; 1367 1325 struct btrfs_qgroup_list *list; 1368 1326 struct ulist *tmp; 1327 + unsigned int nofs_flag; 1369 1328 int ret = 0; 1370 1329 1371 1330 /* Check the level of src and dst first */ 1372 1331 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1373 1332 return -EINVAL; 1374 1333 1334 + /* We hold a transaction handle open, must do a NOFS allocation. */ 1335 + nofs_flag = memalloc_nofs_save(); 1375 1336 tmp = ulist_alloc(GFP_KERNEL); 1337 + memalloc_nofs_restore(nofs_flag); 1376 1338 if (!tmp) 1377 1339 return -ENOMEM; 1378 1340 ··· 1433 1387 struct btrfs_qgroup_list *list; 1434 1388 struct ulist *tmp; 1435 1389 bool found = false; 1390 + unsigned int nofs_flag; 1436 1391 int ret = 0; 1437 1392 int ret2; 1438 1393 1394 + /* We hold a transaction handle open, must do a NOFS allocation. */ 1395 + nofs_flag = memalloc_nofs_save(); 1439 1396 tmp = ulist_alloc(GFP_KERNEL); 1397 + memalloc_nofs_restore(nofs_flag); 1440 1398 if (!tmp) 1441 1399 return -ENOMEM; 1442 1400 ··· 3562 3512 { 3563 3513 struct btrfs_trans_handle *trans; 3564 3514 int ret; 3515 + bool can_commit = true; 3565 3516 3566 3517 /* 3567 3518 * We don't want to run flush again and again, so if there is a running ··· 3573 3522 !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); 3574 3523 return 0; 3575 3524 } 3525 + 3526 + /* 3527 + * If current process holds a transaction, we shouldn't flush, as we 3528 + * assume all space reservation happens before a transaction handle is 3529 + * held. 3530 + * 3531 + * But there are cases like btrfs_delayed_item_reserve_metadata() where 3532 + * we try to reserve space with one transction handle already held. 3533 + * In that case we can't commit transaction, but at least try to end it 3534 + * and hope the started data writes can free some space. 3535 + */ 3536 + if (current->journal_info && 3537 + current->journal_info != BTRFS_SEND_TRANS_STUB) 3538 + can_commit = false; 3576 3539 3577 3540 ret = btrfs_start_delalloc_snapshot(root); 3578 3541 if (ret < 0) ··· 3599 3534 goto out; 3600 3535 } 3601 3536 3602 - ret = btrfs_commit_transaction(trans); 3537 + if (can_commit) 3538 + ret = btrfs_commit_transaction(trans); 3539 + else 3540 + ret = btrfs_end_transaction(trans); 3603 3541 out: 3604 3542 clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); 3605 3543 wake_up(&root->qgroup_flush_wait);
+8 -4
fs/btrfs/tests/inode-tests.c
··· 983 983 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 984 984 BTRFS_MAX_EXTENT_SIZE >> 1, 985 985 (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, 986 - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); 986 + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | 987 + EXTENT_UPTODATE, 0, 0, NULL); 987 988 if (ret) { 988 989 test_err("clear_extent_bit returned %d", ret); 989 990 goto out; ··· 1051 1050 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 1052 1051 BTRFS_MAX_EXTENT_SIZE + sectorsize, 1053 1052 BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 1054 - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); 1053 + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | 1054 + EXTENT_UPTODATE, 0, 0, NULL); 1055 1055 if (ret) { 1056 1056 test_err("clear_extent_bit returned %d", ret); 1057 1057 goto out; ··· 1084 1082 1085 1083 /* Empty */ 1086 1084 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1087 - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); 1085 + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | 1086 + EXTENT_UPTODATE, 0, 0, NULL); 1088 1087 if (ret) { 1089 1088 test_err("clear_extent_bit returned %d", ret); 1090 1089 goto out; ··· 1100 1097 out: 1101 1098 if (ret) 1102 1099 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1103 - EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL); 1100 + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | 1101 + EXTENT_UPTODATE, 0, 0, NULL); 1104 1102 iput(inode); 1105 1103 btrfs_free_dummy_root(root); 1106 1104 btrfs_free_dummy_fs_info(fs_info);
+3
fs/btrfs/tree-checker.c
··· 1068 1068 "invalid root item size, have %u expect %zu or %u", 1069 1069 btrfs_item_size_nr(leaf, slot), sizeof(ri), 1070 1070 btrfs_legacy_root_item_size()); 1071 + return -EUCLEAN; 1071 1072 } 1072 1073 1073 1074 /* ··· 1424 1423 "invalid item size, have %u expect aligned to %zu for key type %u", 1425 1424 btrfs_item_size_nr(leaf, slot), 1426 1425 sizeof(*dref), key->type); 1426 + return -EUCLEAN; 1427 1427 } 1428 1428 if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) { 1429 1429 generic_err(leaf, slot, ··· 1453 1451 extent_err(leaf, slot, 1454 1452 "invalid extent data backref offset, have %llu expect aligned to %u", 1455 1453 offset, leaf->fs_info->sectorsize); 1454 + return -EUCLEAN; 1456 1455 } 1457 1456 } 1458 1457 return 0;
+7 -1
fs/btrfs/volumes.c
··· 940 940 if (device->bdev != path_bdev) { 941 941 bdput(path_bdev); 942 942 mutex_unlock(&fs_devices->device_list_mutex); 943 - btrfs_warn_in_rcu(device->fs_info, 943 + /* 944 + * device->fs_info may not be reliable here, so 945 + * pass in a NULL instead. This avoids a 946 + * possible use-after-free when the fs_info and 947 + * fs_info->sb are already torn down. 948 + */ 949 + btrfs_warn_in_rcu(NULL, 944 950 "duplicate device %s devid %llu generation %llu scanned by %s (%d)", 945 951 path, devid, found_transid, 946 952 current->comm,