Merge tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:

- A few small typo fixes

- fstests xfs/538 DEBUG-only fix

- Performance fix on blockgc on COW'ed files, by skipping trims on
cowblock inodes currently opened for write

- Prevent cowblocks to be freed under dirty pagecache during unshare

- Update MAINTAINERS file to quote the new maintainer

* tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: fix a typo
xfs: don't free cowblocks from under dirty pagecache on unshare
xfs: skip background cowblock trims on inodes open for write
xfs: support lowmode allocations in xfs_bmap_exact_minlen_extent_alloc
xfs: call xfs_bmap_exact_minlen_extent_alloc from xfs_bmap_btalloc
xfs: don't ifdef around the exact minlen allocations
xfs: fold xfs_bmap_alloc_userdata into xfs_bmapi_allocate
xfs: distinguish extra split from real ENOSPC from xfs_attr_node_try_addname
xfs: distinguish extra split from real ENOSPC from xfs_attr3_leaf_split
xfs: return bool from xfs_attr3_leaf_add
xfs: merge xfs_attr_leaf_try_add into xfs_attr_leaf_addname
xfs: Use try_cmpxchg() in xlog_cil_insert_pcp_aggregate()
xfs: scrub: convert comma to semicolon
xfs: Remove empty declartion in header file
MAINTAINERS: add Carlos Maiolino as XFS release manager

Linus Torvalds 2 years ago 825ec756 d3d15566

+211 -265

15 changed files

expand all collapse all

MAINTAINERS

xfs

libxfs

xfs_alloc.c

xfs_alloc.h

xfs_attr.c

xfs_attr_leaf.c

xfs_attr_leaf.h

xfs_bmap.c

xfs_da_btree.c

scrub

ialloc_repair.c

xfs_icache.c

xfs_log.h

xfs_log_cil.c

xfs_log_recover.c

xfs_reflink.c

xfs_reflink.h

+1 -1

MAINTAINERS

reviewed

··· 25404 25404 F: include/xen/swiotlb-xen.h 25405 25405 25406 25406 XFS FILESYSTEM 25407 25407 - M: Chandan Babu R <chandan.babu@oracle.com> 25407 25407 + M: Carlos Maiolino <cem@kernel.org> 25408 25408 R: Darrick J. Wong <djwong@kernel.org> 25409 25409 L: linux-xfs@vger.kernel.org 25410 25410 S: Supported

+2 -5

fs/xfs/libxfs/xfs_alloc.c

reviewed

··· 2766 2766 xfs_defer_item_unpause(tp, aarp->dfp); 2767 2767 } 2768 2768 2769 2769 - #ifdef DEBUG 2770 2769 /* 2771 2770 * Check if an AGF has a free extent record whose length is equal to 2772 2771 * args->minlen. ··· 2805 2806 2806 2807 return error; 2807 2808 } 2808 2808 - #endif 2809 2809 2810 2810 /* 2811 2811 * Decide whether to use this allocation group for this allocation. ··· 2878 2880 if (!xfs_alloc_space_available(args, need, alloc_flags)) 2879 2881 goto out_agbp_relse; 2880 2882 2881 2881 - #ifdef DEBUG 2882 2882 - if (args->alloc_minlen_only) { 2883 2883 + if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) { 2883 2884 int stat; 2884 2885 2885 2886 error = xfs_exact_minlen_extent_available(args, agbp, &stat); 2886 2887 if (error || !stat) 2887 2888 goto out_agbp_relse; 2888 2889 } 2889 2889 - #endif 2890 2890 + 2890 2891 /* 2891 2892 * Make the freelist shorter if it's too long. 2892 2893 *

+1 -3

fs/xfs/libxfs/xfs_alloc.h

reviewed

··· 53 53 int datatype; /* mask defining data type treatment */ 54 54 char wasdel; /* set if allocation was prev delayed */ 55 55 char wasfromfl; /* set if allocation is from freelist */ 56 56 + bool alloc_minlen_only; /* allocate exact minlen extent */ 56 57 struct xfs_owner_info oinfo; /* owner of blocks being allocated */ 57 58 enum xfs_ag_resv_type resv; /* block reservation to use */ 58 58 - #ifdef DEBUG 59 59 - bool alloc_minlen_only; /* allocate exact minlen extent */ 60 60 - #endif 61 59 } xfs_alloc_arg_t; 62 60 63 61 /*

+85 -113

fs/xfs/libxfs/xfs_attr.c

reviewed

··· 51 51 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); 52 52 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); 53 53 STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); 54 54 - STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args); 55 54 56 55 /* 57 56 * Internal routines when attribute list is more than one block. ··· 436 437 return xfs_attr_hashname(name, namelen); 437 438 } 438 439 440 440 + /* Save the current remote block info and clear the current pointers. */ 441 441 + static void 442 442 + xfs_attr_save_rmt_blk( 443 443 + struct xfs_da_args *args) 444 444 + { 445 445 + args->blkno2 = args->blkno; 446 446 + args->index2 = args->index; 447 447 + args->rmtblkno2 = args->rmtblkno; 448 448 + args->rmtblkcnt2 = args->rmtblkcnt; 449 449 + args->rmtvaluelen2 = args->rmtvaluelen; 450 450 + args->rmtblkno = 0; 451 451 + args->rmtblkcnt = 0; 452 452 + args->rmtvaluelen = 0; 453 453 + } 454 454 + 455 455 + /* Set stored info about a remote block */ 456 456 + static void 457 457 + xfs_attr_restore_rmt_blk( 458 458 + struct xfs_da_args *args) 459 459 + { 460 460 + args->blkno = args->blkno2; 461 461 + args->index = args->index2; 462 462 + args->rmtblkno = args->rmtblkno2; 463 463 + args->rmtblkcnt = args->rmtblkcnt2; 464 464 + args->rmtvaluelen = args->rmtvaluelen2; 465 465 + } 466 466 + 439 467 /* 440 468 * PPTR_REPLACE operations require the caller to set the old and new names and 441 469 * values explicitly. Update the canonical fields to the new name and value ··· 508 482 return replace_state; 509 483 } 510 484 485 485 + /* 486 486 + * Try to add an attribute to an inode in leaf form. 487 487 + */ 511 488 static int 512 489 xfs_attr_leaf_addname( 513 490 struct xfs_attr_intent *attr) 514 491 { 515 492 struct xfs_da_args *args = attr->xattri_da_args; 493 493 + struct xfs_buf *bp; 516 494 int error; 517 495 518 496 ASSERT(xfs_attr_is_leaf(args->dp)); 519 497 520 520 - /* 521 521 - * Use the leaf buffer we may already hold locked as a result of 522 522 - * a sf-to-leaf conversion. 523 523 - */ 524 524 - error = xfs_attr_leaf_try_add(args); 525 525 - 526 526 - if (error == -ENOSPC) { 527 527 - error = xfs_attr3_leaf_to_node(args); 528 528 - if (error) 529 529 - return error; 530 530 - 531 531 - /* 532 532 - * We're not in leaf format anymore, so roll the transaction and 533 533 - * retry the add to the newly allocated node block. 534 534 - */ 535 535 - attr->xattri_dela_state = XFS_DAS_NODE_ADD; 536 536 - goto out; 537 537 - } 498 498 + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); 538 499 if (error) 539 500 return error; 501 501 + 502 502 + /* 503 503 + * Look up the xattr name to set the insertion point for the new xattr. 504 504 + */ 505 505 + error = xfs_attr3_leaf_lookup_int(bp, args); 506 506 + switch (error) { 507 507 + case -ENOATTR: 508 508 + if (args->op_flags & XFS_DA_OP_REPLACE) 509 509 + goto out_brelse; 510 510 + break; 511 511 + case -EEXIST: 512 512 + if (!(args->op_flags & XFS_DA_OP_REPLACE)) 513 513 + goto out_brelse; 514 514 + 515 515 + trace_xfs_attr_leaf_replace(args); 516 516 + /* 517 517 + * Save the existing remote attr state so that the current 518 518 + * values reflect the state of the new attribute we are about to 519 519 + * add, not the attribute we just found and will remove later. 520 520 + */ 521 521 + xfs_attr_save_rmt_blk(args); 522 522 + break; 523 523 + case 0: 524 524 + break; 525 525 + default: 526 526 + goto out_brelse; 527 527 + } 540 528 541 529 /* 542 530 * We need to commit and roll if we need to allocate remote xattr blocks 543 531 * or perform more xattr manipulations. Otherwise there is nothing more 544 532 * to do and we can return success. 545 533 */ 546 546 - if (args->rmtblkno) 534 534 + if (!xfs_attr3_leaf_add(bp, args)) { 535 535 + error = xfs_attr3_leaf_to_node(args); 536 536 + if (error) 537 537 + return error; 538 538 + 539 539 + attr->xattri_dela_state = XFS_DAS_NODE_ADD; 540 540 + } else if (args->rmtblkno) { 547 541 attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT; 548 548 - else 549 549 - attr->xattri_dela_state = xfs_attr_complete_op(attr, 550 550 - XFS_DAS_LEAF_REPLACE); 551 551 - out: 542 542 + } else { 543 543 + attr->xattri_dela_state = 544 544 + xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE); 545 545 + } 546 546 + 552 547 trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp); 548 548 + return 0; 549 549 + 550 550 + out_brelse: 551 551 + xfs_trans_brelse(args->trans, bp); 553 552 return error; 554 553 } 555 554 ··· 597 546 return error; 598 547 599 548 error = xfs_attr_node_try_addname(attr); 600 600 - if (error == -ENOSPC) { 549 549 + if (error == 1) { 601 550 error = xfs_attr3_leaf_to_node(args); 602 551 if (error) 603 552 return error; ··· 1221 1170 * External routines when attribute list is one block 1222 1171 *========================================================================*/ 1223 1172 1224 1224 - /* Save the current remote block info and clear the current pointers. */ 1225 1225 - static void 1226 1226 - xfs_attr_save_rmt_blk( 1227 1227 - struct xfs_da_args *args) 1228 1228 - { 1229 1229 - args->blkno2 = args->blkno; 1230 1230 - args->index2 = args->index; 1231 1231 - args->rmtblkno2 = args->rmtblkno; 1232 1232 - args->rmtblkcnt2 = args->rmtblkcnt; 1233 1233 - args->rmtvaluelen2 = args->rmtvaluelen; 1234 1234 - args->rmtblkno = 0; 1235 1235 - args->rmtblkcnt = 0; 1236 1236 - args->rmtvaluelen = 0; 1237 1237 - } 1238 1238 - 1239 1239 - /* Set stored info about a remote block */ 1240 1240 - static void 1241 1241 - xfs_attr_restore_rmt_blk( 1242 1242 - struct xfs_da_args *args) 1243 1243 - { 1244 1244 - args->blkno = args->blkno2; 1245 1245 - args->index = args->index2; 1246 1246 - args->rmtblkno = args->rmtblkno2; 1247 1247 - args->rmtblkcnt = args->rmtblkcnt2; 1248 1248 - args->rmtvaluelen = args->rmtvaluelen2; 1249 1249 - } 1250 1250 - 1251 1251 - /* 1252 1252 - * Tries to add an attribute to an inode in leaf form 1253 1253 - * 1254 1254 - * This function is meant to execute as part of a delayed operation and leaves 1255 1255 - * the transaction handling to the caller. On success the attribute is added 1256 1256 - * and the inode and transaction are left dirty. If there is not enough space, 1257 1257 - * the attr data is converted to node format and -ENOSPC is returned. Caller is 1258 1258 - * responsible for handling the dirty inode and transaction or adding the attr 1259 1259 - * in node format. 1260 1260 - */ 1261 1261 - STATIC int 1262 1262 - xfs_attr_leaf_try_add( 1263 1263 - struct xfs_da_args *args) 1264 1264 - { 1265 1265 - struct xfs_buf *bp; 1266 1266 - int error; 1267 1267 - 1268 1268 - error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); 1269 1269 - if (error) 1270 1270 - return error; 1271 1271 - 1272 1272 - /* 1273 1273 - * Look up the xattr name to set the insertion point for the new xattr. 1274 1274 - */ 1275 1275 - error = xfs_attr3_leaf_lookup_int(bp, args); 1276 1276 - switch (error) { 1277 1277 - case -ENOATTR: 1278 1278 - if (args->op_flags & XFS_DA_OP_REPLACE) 1279 1279 - goto out_brelse; 1280 1280 - break; 1281 1281 - case -EEXIST: 1282 1282 - if (!(args->op_flags & XFS_DA_OP_REPLACE)) 1283 1283 - goto out_brelse; 1284 1284 - 1285 1285 - trace_xfs_attr_leaf_replace(args); 1286 1286 - /* 1287 1287 - * Save the existing remote attr state so that the current 1288 1288 - * values reflect the state of the new attribute we are about to 1289 1289 - * add, not the attribute we just found and will remove later. 1290 1290 - */ 1291 1291 - xfs_attr_save_rmt_blk(args); 1292 1292 - break; 1293 1293 - case 0: 1294 1294 - break; 1295 1295 - default: 1296 1296 - goto out_brelse; 1297 1297 - } 1298 1298 - 1299 1299 - return xfs_attr3_leaf_add(bp, args); 1300 1300 - 1301 1301 - out_brelse: 1302 1302 - xfs_trans_brelse(args->trans, bp); 1303 1303 - return error; 1304 1304 - } 1305 1305 - 1306 1173 /* 1307 1174 * Return EEXIST if attr is found, or ENOATTR if not 1308 1175 */ ··· 1386 1417 /* 1387 1418 * Add a name to a Btree-format attribute list. 1388 1419 * 1389 1389 - * This will involve walking down the Btree, and may involve splitting 1390 1390 - * leaf nodes and even splitting intermediate nodes up to and including 1391 1391 - * the root node (a special case of an intermediate node). 1420 1420 + * This will involve walking down the Btree, and may involve splitting leaf 1421 1421 + * nodes and even splitting intermediate nodes up to and including the root 1422 1422 + * node (a special case of an intermediate node). 1423 1423 + * 1424 1424 + * If the tree was still in single leaf format and needs to converted to 1425 1425 + * real node format return 1 and let the caller handle that. 1392 1426 */ 1393 1427 static int 1394 1428 xfs_attr_node_try_addname( ··· 1399 1427 { 1400 1428 struct xfs_da_state *state = attr->xattri_da_state; 1401 1429 struct xfs_da_state_blk *blk; 1402 1402 - int error; 1430 1430 + int error = 0; 1403 1431 1404 1432 trace_xfs_attr_node_addname(state->args); 1405 1433 1406 1434 blk = &state->path.blk[state->path.active-1]; 1407 1435 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1408 1436 1409 1409 - error = xfs_attr3_leaf_add(blk->bp, state->args); 1410 1410 - if (error == -ENOSPC) { 1437 1437 + if (!xfs_attr3_leaf_add(blk->bp, state->args)) { 1411 1438 if (state->path.active == 1) { 1412 1439 /* 1413 1440 * Its really a single leaf node, but it had 1414 1441 * out-of-line values so it looked like it *might* 1415 1442 * have been a b-tree. Let the caller deal with this. 1416 1443 */ 1444 1444 + error = 1; 1417 1445 goto out; 1418 1446 } 1419 1447

+22 -18

fs/xfs/libxfs/xfs_attr_leaf.c

reviewed

··· 47 47 */ 48 48 STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args, 49 49 xfs_dablk_t which_block, struct xfs_buf **bpp); 50 50 - STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, 50 50 + STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, 51 51 struct xfs_attr3_icleaf_hdr *ichdr, 52 52 struct xfs_da_args *args, int freemap_index); 53 53 STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args, ··· 995 995 xfs_attr_sethash(&nargs); 996 996 error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ 997 997 ASSERT(error == -ENOATTR); 998 998 - error = xfs_attr3_leaf_add(bp, &nargs); 999 999 - ASSERT(error != -ENOSPC); 1000 1000 - if (error) 1001 1001 - goto out; 998 998 + if (!xfs_attr3_leaf_add(bp, &nargs)) 999 999 + ASSERT(0); 1002 1000 sfe = xfs_attr_sf_nextentry(sfe); 1003 1001 } 1004 1002 error = 0; ··· 1331 1333 1332 1334 /* 1333 1335 * Split the leaf node, rebalance, then add the new entry. 1336 1336 + * 1337 1337 + * Returns 0 if the entry was added, 1 if a further split is needed or a 1338 1338 + * negative error number otherwise. 1334 1339 */ 1335 1340 int 1336 1341 xfs_attr3_leaf_split( ··· 1341 1340 struct xfs_da_state_blk *oldblk, 1342 1341 struct xfs_da_state_blk *newblk) 1343 1342 { 1344 1344 - xfs_dablk_t blkno; 1345 1345 - int error; 1343 1343 + bool added; 1344 1344 + xfs_dablk_t blkno; 1345 1345 + int error; 1346 1346 1347 1347 trace_xfs_attr_leaf_split(state->args); 1348 1348 ··· 1378 1376 */ 1379 1377 if (state->inleaf) { 1380 1378 trace_xfs_attr_leaf_add_old(state->args); 1381 1381 - error = xfs_attr3_leaf_add(oldblk->bp, state->args); 1379 1379 + added = xfs_attr3_leaf_add(oldblk->bp, state->args); 1382 1380 } else { 1383 1381 trace_xfs_attr_leaf_add_new(state->args); 1384 1384 - error = xfs_attr3_leaf_add(newblk->bp, state->args); 1382 1382 + added = xfs_attr3_leaf_add(newblk->bp, state->args); 1385 1383 } 1386 1384 1387 1385 /* ··· 1389 1387 */ 1390 1388 oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); 1391 1389 newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); 1392 1392 - return error; 1390 1390 + if (!added) 1391 1391 + return 1; 1392 1392 + return 0; 1393 1393 } 1394 1394 1395 1395 /* 1396 1396 * Add a name to the leaf attribute list structure. 1397 1397 */ 1398 1398 - int 1398 1398 + bool 1399 1399 xfs_attr3_leaf_add( 1400 1400 struct xfs_buf *bp, 1401 1401 struct xfs_da_args *args) ··· 1406 1402 struct xfs_attr3_icleaf_hdr ichdr; 1407 1403 int tablesize; 1408 1404 int entsize; 1405 1405 + bool added = true; 1409 1406 int sum; 1410 1407 int tmp; 1411 1408 int i; ··· 1435 1430 if (ichdr.freemap[i].base < ichdr.firstused) 1436 1431 tmp += sizeof(xfs_attr_leaf_entry_t); 1437 1432 if (ichdr.freemap[i].size >= tmp) { 1438 1438 - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i); 1433 1433 + xfs_attr3_leaf_add_work(bp, &ichdr, args, i); 1439 1434 goto out_log_hdr; 1440 1435 } 1441 1436 sum += ichdr.freemap[i].size; ··· 1447 1442 * no good and we should just give up. 1448 1443 */ 1449 1444 if (!ichdr.holes && sum < entsize) 1450 1450 - return -ENOSPC; 1445 1445 + return false; 1451 1446 1452 1447 /* 1453 1448 * Compact the entries to coalesce free space. ··· 1460 1455 * free region, in freemap[0]. If it is not big enough, give up. 1461 1456 */ 1462 1457 if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { 1463 1463 - tmp = -ENOSPC; 1458 1458 + added = false; 1464 1459 goto out_log_hdr; 1465 1460 } 1466 1461 1467 1467 - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); 1462 1462 + xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); 1468 1463 1469 1464 out_log_hdr: 1470 1465 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr); 1471 1466 xfs_trans_log_buf(args->trans, bp, 1472 1467 XFS_DA_LOGRANGE(leaf, &leaf->hdr, 1473 1468 xfs_attr3_leaf_hdr_size(leaf))); 1474 1474 - return tmp; 1469 1469 + return added; 1475 1470 } 1476 1471 1477 1472 /* 1478 1473 * Add a name to a leaf attribute list structure. 1479 1474 */ 1480 1480 - STATIC int 1475 1475 + STATIC void 1481 1476 xfs_attr3_leaf_add_work( 1482 1477 struct xfs_buf *bp, 1483 1478 struct xfs_attr3_icleaf_hdr *ichdr, ··· 1595 1590 } 1596 1591 } 1597 1592 ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); 1598 1598 - return 0; 1599 1593 } 1600 1594 1601 1595 /*

+1 -1

fs/xfs/libxfs/xfs_attr_leaf.h

reviewed

··· 76 76 int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf, 77 77 struct xfs_da_args *args); 78 78 int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); 79 79 - int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, 79 79 + bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, 80 80 struct xfs_da_args *args); 81 81 int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, 82 82 struct xfs_da_args *args);

+44 -96

fs/xfs/libxfs/xfs_bmap.c

reviewed

··· 3477 3477 xfs_bmap_alloc_account(ap); 3478 3478 } 3479 3479 3480 3480 - #ifdef DEBUG 3481 3480 static int 3482 3481 xfs_bmap_exact_minlen_extent_alloc( 3483 3483 - struct xfs_bmalloca *ap) 3482 3482 + struct xfs_bmalloca *ap, 3483 3483 + struct xfs_alloc_arg *args) 3484 3484 { 3485 3485 - struct xfs_mount *mp = ap->ip->i_mount; 3486 3486 - struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp }; 3487 3487 - xfs_fileoff_t orig_offset; 3488 3488 - xfs_extlen_t orig_length; 3489 3489 - int error; 3490 3490 - 3491 3491 - ASSERT(ap->length); 3492 3492 - 3493 3485 if (ap->minlen != 1) { 3494 3494 - ap->blkno = NULLFSBLOCK; 3495 3495 - ap->length = 0; 3486 3486 + args->fsbno = NULLFSBLOCK; 3496 3487 return 0; 3497 3488 } 3498 3489 3499 3499 - orig_offset = ap->offset; 3500 3500 - orig_length = ap->length; 3501 3501 - 3502 3502 - args.alloc_minlen_only = 1; 3503 3503 - 3504 3504 - xfs_bmap_compute_alignments(ap, &args); 3490 3490 + args->alloc_minlen_only = 1; 3491 3491 + args->minlen = args->maxlen = ap->minlen; 3492 3492 + args->total = ap->total; 3505 3493 3506 3494 /* 3507 3495 * Unlike the longest extent available in an AG, we don't track ··· 3499 3511 * we need not be concerned about a drop in performance in 3500 3512 * "debug only" code paths. 3501 3513 */ 3502 3502 - ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); 3514 3514 + ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0); 3503 3515 3504 3504 - args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; 3505 3505 - args.minlen = args.maxlen = ap->minlen; 3506 3506 - args.total = ap->total; 3507 3507 - 3508 3508 - args.alignment = 1; 3509 3509 - args.minalignslop = 0; 3510 3510 - 3511 3511 - args.minleft = ap->minleft; 3512 3512 - args.wasdel = ap->wasdel; 3513 3513 - args.resv = XFS_AG_RESV_NONE; 3514 3514 - args.datatype = ap->datatype; 3515 3515 - 3516 3516 - error = xfs_alloc_vextent_first_ag(&args, ap->blkno); 3517 3517 - if (error) 3518 3518 - return error; 3519 3519 - 3520 3520 - if (args.fsbno != NULLFSBLOCK) { 3521 3521 - xfs_bmap_process_allocated_extent(ap, &args, orig_offset, 3522 3522 - orig_length); 3523 3523 - } else { 3524 3524 - ap->blkno = NULLFSBLOCK; 3525 3525 - ap->length = 0; 3526 3526 - } 3527 3527 - 3528 3528 - return 0; 3516 3516 + /* 3517 3517 + * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG 3518 3518 + * iteration and then drops args->total to args->minlen, which might be 3519 3519 + * required to find an allocation for the transaction reservation when 3520 3520 + * the file system is very full. 3521 3521 + */ 3522 3522 + return xfs_bmap_btalloc_low_space(ap, args); 3529 3523 } 3530 3530 - #else 3531 3531 - 3532 3532 - #define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED) 3533 3533 - 3534 3534 - #endif 3535 3524 3536 3525 /* 3537 3526 * If we are not low on available data blocks and we are allocating at ··· 3766 3801 /* Trim the allocation back to the maximum an AG can fit. */ 3767 3802 args.maxlen = min(ap->length, mp->m_ag_max_usable); 3768 3803 3769 3769 - if ((ap->datatype & XFS_ALLOC_USERDATA) && 3770 3770 - xfs_inode_is_filestream(ap->ip)) 3804 3804 + if (unlikely(XFS_TEST_ERROR(false, mp, 3805 3805 + XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) 3806 3806 + error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); 3807 3807 + else if ((ap->datatype & XFS_ALLOC_USERDATA) && 3808 3808 + xfs_inode_is_filestream(ap->ip)) 3771 3809 error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); 3772 3810 else 3773 3811 error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); ··· 4145 4177 } 4146 4178 4147 4179 static int 4148 4148 - xfs_bmap_alloc_userdata( 4149 4149 - struct xfs_bmalloca *bma) 4150 4150 - { 4151 4151 - struct xfs_mount *mp = bma->ip->i_mount; 4152 4152 - int whichfork = xfs_bmapi_whichfork(bma->flags); 4153 4153 - int error; 4154 4154 - 4155 4155 - /* 4156 4156 - * Set the data type being allocated. For the data fork, the first data 4157 4157 - * in the file is treated differently to all other allocations. For the 4158 4158 - * attribute fork, we only need to ensure the allocated range is not on 4159 4159 - * the busy list. 4160 4160 - */ 4161 4161 - bma->datatype = XFS_ALLOC_NOBUSY; 4162 4162 - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { 4163 4163 - bma->datatype |= XFS_ALLOC_USERDATA; 4164 4164 - if (bma->offset == 0) 4165 4165 - bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4166 4166 - 4167 4167 - if (mp->m_dalign && bma->length >= mp->m_dalign) { 4168 4168 - error = xfs_bmap_isaeof(bma, whichfork); 4169 4169 - if (error) 4170 4170 - return error; 4171 4171 - } 4172 4172 - 4173 4173 - if (XFS_IS_REALTIME_INODE(bma->ip)) 4174 4174 - return xfs_bmap_rtalloc(bma); 4175 4175 - } 4176 4176 - 4177 4177 - if (unlikely(XFS_TEST_ERROR(false, mp, 4178 4178 - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) 4179 4179 - return xfs_bmap_exact_minlen_extent_alloc(bma); 4180 4180 - 4181 4181 - return xfs_bmap_btalloc(bma); 4182 4182 - } 4183 4183 - 4184 4184 - static int 4185 4180 xfs_bmapi_allocate( 4186 4181 struct xfs_bmalloca *bma) 4187 4182 { ··· 4161 4230 else 4162 4231 bma->minlen = 1; 4163 4232 4164 4164 - if (bma->flags & XFS_BMAPI_METADATA) { 4165 4165 - if (unlikely(XFS_TEST_ERROR(false, mp, 4166 4166 - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) 4167 4167 - error = xfs_bmap_exact_minlen_extent_alloc(bma); 4168 4168 - else 4169 4169 - error = xfs_bmap_btalloc(bma); 4170 4170 - } else { 4171 4171 - error = xfs_bmap_alloc_userdata(bma); 4233 4233 + if (!(bma->flags & XFS_BMAPI_METADATA)) { 4234 4234 + /* 4235 4235 + * For the data and COW fork, the first data in the file is 4236 4236 + * treated differently to all other allocations. For the 4237 4237 + * attribute fork, we only need to ensure the allocated range 4238 4238 + * is not on the busy list. 4239 4239 + */ 4240 4240 + bma->datatype = XFS_ALLOC_NOBUSY; 4241 4241 + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { 4242 4242 + bma->datatype |= XFS_ALLOC_USERDATA; 4243 4243 + if (bma->offset == 0) 4244 4244 + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4245 4245 + 4246 4246 + if (mp->m_dalign && bma->length >= mp->m_dalign) { 4247 4247 + error = xfs_bmap_isaeof(bma, whichfork); 4248 4248 + if (error) 4249 4249 + return error; 4250 4250 + } 4251 4251 + } 4172 4252 } 4253 4253 + 4254 4254 + if ((bma->datatype & XFS_ALLOC_USERDATA) && 4255 4255 + XFS_IS_REALTIME_INODE(bma->ip)) 4256 4256 + error = xfs_bmap_rtalloc(bma); 4257 4257 + else 4258 4258 + error = xfs_bmap_btalloc(bma); 4173 4259 if (error) 4174 4260 return error; 4175 4261 if (bma->blkno == NULLFSBLOCK)

+3 -2

fs/xfs/libxfs/xfs_da_btree.c

reviewed

··· 593 593 switch (oldblk->magic) { 594 594 case XFS_ATTR_LEAF_MAGIC: 595 595 error = xfs_attr3_leaf_split(state, oldblk, newblk); 596 596 - if ((error != 0) && (error != -ENOSPC)) { 596 596 + if (error < 0) 597 597 return error; /* GROT: attr is inconsistent */ 598 598 - } 599 598 if (!error) { 600 599 addblk = newblk; 601 600 break; ··· 616 617 error = xfs_attr3_leaf_split(state, newblk, 617 618 &state->extrablk); 618 619 } 620 620 + if (error == 1) 621 621 + return -ENOSPC; 619 622 if (error) 620 623 return error; /* GROT: attr inconsistent */ 621 624 addblk = newblk;

+2 -2

fs/xfs/scrub/ialloc_repair.c

reviewed

··· 657 657 * Start by setting up the inobt staging cursor. 658 658 */ 659 659 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, 660 660 - XFS_IBT_BLOCK(sc->mp)), 660 660 + XFS_IBT_BLOCK(sc->mp)); 661 661 xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno, 662 662 XFS_AG_RESV_NONE); 663 663 ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; ··· 678 678 resv = XFS_AG_RESV_NONE; 679 679 680 680 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, 681 681 - XFS_FIBT_BLOCK(sc->mp)), 681 681 + XFS_FIBT_BLOCK(sc->mp)); 682 682 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, 683 683 fsbno, resv); 684 684 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;

+23 -14

fs/xfs/xfs_icache.c

reviewed

··· 1280 1280 } 1281 1281 1282 1282 /* 1283 1283 - * Set ourselves up to free CoW blocks from this file. If it's already clean 1284 1284 - * then we can bail out quickly, but otherwise we must back off if the file 1285 1285 - * is undergoing some kind of write. 1283 1283 + * Prepare to free COW fork blocks from an inode. 1286 1284 */ 1287 1285 static bool 1288 1286 xfs_prep_free_cowblocks( 1289 1289 - struct xfs_inode *ip) 1287 1287 + struct xfs_inode *ip, 1288 1288 + struct xfs_icwalk *icw) 1290 1289 { 1290 1290 + bool sync; 1291 1291 + 1292 1292 + sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); 1293 1293 + 1291 1294 /* 1292 1295 * Just clear the tag if we have an empty cow fork or none at all. It's 1293 1296 * possible the inode was fully unshared since it was originally tagged. ··· 1302 1299 } 1303 1300 1304 1301 /* 1305 1305 - * If the mapping is dirty or under writeback we cannot touch the 1306 1306 - * CoW fork. Leave it alone if we're in the midst of a directio. 1302 1302 + * A cowblocks trim of an inode can have a significant effect on 1303 1303 + * fragmentation even when a reasonable COW extent size hint is set. 1304 1304 + * Therefore, we prefer to not process cowblocks unless they are clean 1305 1305 + * and idle. We can never process a cowblocks inode that is dirty or has 1306 1306 + * in-flight I/O under any circumstances, because outstanding writeback 1307 1307 + * or dio expects targeted COW fork blocks exist through write 1308 1308 + * completion where they can be remapped into the data fork. 1309 1309 + * 1310 1310 + * Therefore, the heuristic used here is to never process inodes 1311 1311 + * currently opened for write from background (i.e. non-sync) scans. For 1312 1312 + * sync scans, use the pagecache/dio state of the inode to ensure we 1313 1313 + * never free COW fork blocks out from under pending I/O. 1307 1314 */ 1308 1308 - if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || 1309 1309 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || 1310 1310 - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || 1311 1311 - atomic_read(&VFS_I(ip)->i_dio_count)) 1315 1315 + if (!sync && inode_is_open_for_write(VFS_I(ip))) 1312 1316 return false; 1313 1313 - 1314 1314 - return true; 1317 1317 + return xfs_can_free_cowblocks(ip); 1315 1318 } 1316 1319 1317 1320 /* ··· 1346 1337 if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) 1347 1338 return 0; 1348 1339 1349 1349 - if (!xfs_prep_free_cowblocks(ip)) 1340 1340 + if (!xfs_prep_free_cowblocks(ip, icw)) 1350 1341 return 0; 1351 1342 1352 1343 if (!xfs_icwalk_match(ip, icw)) ··· 1375 1366 * Check again, nobody else should be able to dirty blocks or change 1376 1367 * the reflink iflag now that we have the first two locks held. 1377 1368 */ 1378 1378 - if (xfs_prep_free_cowblocks(ip)) 1369 1369 + if (xfs_prep_free_cowblocks(ip, icw)) 1379 1370 ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); 1380 1371 return ret; 1381 1372 }

-2

fs/xfs/xfs_log.h

reviewed

··· 158 158 159 159 bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags); 160 160 161 161 - int xfs_attr_use_log_assist(struct xfs_mount *mp); 162 162 - 163 161 #endif /* __XFS_LOG_H__ */

+4 -7

fs/xfs/xfs_log_cil.c

reviewed

··· 156 156 struct xfs_cil *cil, 157 157 struct xfs_cil_ctx *ctx) 158 158 { 159 159 - struct xlog_cil_pcp *cilpcp; 160 159 int cpu; 161 160 int count = 0; 162 161 ··· 170 171 * structures that could have a nonzero space_used. 171 172 */ 172 173 for_each_cpu(cpu, &ctx->cil_pcpmask) { 173 173 - int old, prev; 174 174 + struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); 175 175 + int old = READ_ONCE(cilpcp->space_used); 174 176 175 175 - cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); 176 176 - do { 177 177 - old = cilpcp->space_used; 178 178 - prev = cmpxchg(&cilpcp->space_used, old, 0); 179 179 - } while (old != prev); 177 177 + while (!try_cmpxchg(&cilpcp->space_used, &old, 0)) 178 178 + ; 180 179 count += old; 181 180 } 182 181 atomic_add(count, &ctx->space_used);

+1 -1

fs/xfs/xfs_log_recover.c

reviewed

··· 1849 1849 * from the transaction. However, we can't do that until after we've 1850 1850 * replayed all the other items because they may be dependent on the 1851 1851 * cancelled buffer and replaying the cancelled buffer can remove it 1852 1852 - * form the cancelled buffer table. Hence they have tobe done last. 1852 1852 + * form the cancelled buffer table. Hence they have to be done last. 1853 1853 * 1854 1854 * 3. Inode allocation buffers must be replayed before inode items that 1855 1855 * read the buffer and replay changes into it. For filesystems using the

fs/xfs/xfs_reflink.c

reviewed

··· 1595 1595 1596 1596 ASSERT(xfs_is_reflink_inode(ip)); 1597 1597 1598 1598 + if (!xfs_can_free_cowblocks(ip)) 1599 1599 + return 0; 1600 1600 + 1598 1601 error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag); 1599 1602 if (error || needs_flag) 1600 1603 return error;

+19

fs/xfs/xfs_reflink.h

reviewed

··· 6 6 #ifndef __XFS_REFLINK_H 7 7 #define __XFS_REFLINK_H 1 8 8 9 9 + /* 10 10 + * Check whether it is safe to free COW fork blocks from an inode. It is unsafe 11 11 + * to do so when an inode has dirty cache or I/O in-flight, even if no shared 12 12 + * extents exist in the data fork, because outstanding I/O may target blocks 13 13 + * that were speculatively allocated to the COW fork. 14 14 + */ 15 15 + static inline bool 16 16 + xfs_can_free_cowblocks(struct xfs_inode *ip) 17 17 + { 18 18 + struct inode *inode = VFS_I(ip); 19 19 + 20 20 + if ((inode->i_state & I_DIRTY_PAGES) || 21 21 + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || 22 22 + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) || 23 23 + atomic_read(&inode->i_dio_count)) 24 24 + return false; 25 25 + return true; 26 26 + } 27 27 + 9 28 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, 10 29 struct xfs_bmbt_irec *irec, bool *shared); 11 30 int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,