Merge tag 'xfs-fixes-6.16-rc5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'xfs-fixes-6.16-rc5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:

- Fix umount hang with unflushable inodes (and add new tracepoint used
for debugging this)

- Fix ABBA deadlock in xfs_reclaim_inode() vs xfs_ifree_cluster()

- Fix dquot buffer pin deadlock

* tag 'xfs-fixes-6.16-rc5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: add FALLOC_FL_ALLOCATE_RANGE to supported flags mask
xfs: fix unmount hang with unflushable inodes stuck in the AIL
xfs: factor out stale buffer item completion
xfs: rearrange code in xfs_buf_item.c
xfs: add tracepoints for stale pinned inode state debug
xfs: avoid dquot buffer pin deadlock
xfs: catch stale AGF/AGF metadata
xfs: xfs_ifree_cluster vs xfs_iflush_shutdown_abort deadlock
xfs: actually use the xfs_growfs_check_rtgeom tracepoint
xfs: Improve error handling in xfs_mru_cache_create()
xfs: move xfs_submit_zoned_bio a bit
xfs: use xfs_readonly_buftarg in xfs_remount_rw
xfs: remove NULL pointer checks in xfs_mru_cache_insert
xfs: check for shutdown before going to sleep in xfs_select_zone

Linus Torvalds 11 months ago d32e907d b4911fb0

+321 -288

19 changed files

expand all collapse all

xfs

libxfs

xfs_alloc.c

xfs_ialloc.c

xfs_buf.c

xfs_buf.h

xfs_buf_item.c

xfs_buf_item.h

xfs_dquot.c

xfs_file.c

xfs_icache.c

xfs_inode.c

xfs_inode_item.c

xfs_log_cil.c

xfs_mru_cache.c

xfs_qm.c

xfs_rtalloc.c

xfs_super.c

xfs_trace.h

xfs_trans.c

xfs_zone_alloc.c

+33 -8

fs/xfs/libxfs/xfs_alloc.c

reviewed

··· 3444 3444 3445 3445 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); 3446 3446 } 3447 3447 + 3447 3448 #ifdef DEBUG 3448 3448 - else if (!xfs_is_shutdown(mp)) { 3449 3449 - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 3450 3450 - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 3451 3451 - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 3452 3452 - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 3453 3453 - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); 3454 3454 - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); 3449 3449 + /* 3450 3450 + * It's possible for the AGF to be out of sync if the block device is 3451 3451 + * silently dropping writes. This can happen in fstests with dmflakey 3452 3452 + * enabled, which allows the buffer to be cleaned and reclaimed by 3453 3453 + * memory pressure and then re-read from disk here. We will get a 3454 3454 + * stale version of the AGF from disk, and nothing good can happen from 3455 3455 + * here. Hence if we detect this situation, immediately shut down the 3456 3456 + * filesystem. 3457 3457 + * 3458 3458 + * This can also happen if we are already in the middle of a forced 3459 3459 + * shutdown, so don't bother checking if we are already shut down. 3460 3460 + */ 3461 3461 + if (!xfs_is_shutdown(pag_mount(pag))) { 3462 3462 + bool ok = true; 3463 3463 + 3464 3464 + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); 3465 3465 + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); 3466 3466 + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); 3467 3467 + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); 3468 3468 + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); 3469 3469 + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); 3470 3470 + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); 3471 3471 + 3472 3472 + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { 3473 3473 + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); 3474 3474 + xfs_trans_brelse(tp, agfbp); 3475 3475 + xfs_force_shutdown(pag_mount(pag), 3476 3476 + SHUTDOWN_CORRUPT_ONDISK); 3477 3477 + return -EFSCORRUPTED; 3478 3478 + } 3455 3479 } 3456 3456 - #endif 3480 3480 + #endif /* DEBUG */ 3481 3481 + 3457 3482 if (agfbpp) 3458 3483 *agfbpp = agfbp; 3459 3484 else

+27 -4

fs/xfs/libxfs/xfs_ialloc.c

reviewed

··· 2801 2801 set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); 2802 2802 } 2803 2803 2804 2804 + #ifdef DEBUG 2804 2805 /* 2805 2805 - * It's possible for these to be out of sync if 2806 2806 - * we are in the middle of a forced shutdown. 2806 2806 + * It's possible for the AGF to be out of sync if the block device is 2807 2807 + * silently dropping writes. This can happen in fstests with dmflakey 2808 2808 + * enabled, which allows the buffer to be cleaned and reclaimed by 2809 2809 + * memory pressure and then re-read from disk here. We will get a 2810 2810 + * stale version of the AGF from disk, and nothing good can happen from 2811 2811 + * here. Hence if we detect this situation, immediately shut down the 2812 2812 + * filesystem. 2813 2813 + * 2814 2814 + * This can also happen if we are already in the middle of a forced 2815 2815 + * shutdown, so don't bother checking if we are already shut down. 2807 2816 */ 2808 2808 - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || 2809 2809 - xfs_is_shutdown(pag_mount(pag))); 2817 2817 + if (!xfs_is_shutdown(pag_mount(pag))) { 2818 2818 + bool ok = true; 2819 2819 + 2820 2820 + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); 2821 2821 + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); 2822 2822 + 2823 2823 + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { 2824 2824 + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 2825 2825 + xfs_trans_brelse(tp, agibp); 2826 2826 + xfs_force_shutdown(pag_mount(pag), 2827 2827 + SHUTDOWN_CORRUPT_ONDISK); 2828 2828 + return -EFSCORRUPTED; 2829 2829 + } 2830 2830 + } 2831 2831 + #endif /* DEBUG */ 2832 2832 + 2810 2833 if (agibpp) 2811 2834 *agibpp = agibp; 2812 2835 else

-38

fs/xfs/xfs_buf.c

reviewed

··· 2082 2082 return error; 2083 2083 } 2084 2084 2085 2085 - /* 2086 2086 - * Push a single buffer on a delwri queue. 2087 2087 - * 2088 2088 - * The purpose of this function is to submit a single buffer of a delwri queue 2089 2089 - * and return with the buffer still on the original queue. 2090 2090 - * 2091 2091 - * The buffer locking and queue management logic between _delwri_pushbuf() and 2092 2092 - * _delwri_queue() guarantee that the buffer cannot be queued to another list 2093 2093 - * before returning. 2094 2094 - */ 2095 2095 - int 2096 2096 - xfs_buf_delwri_pushbuf( 2097 2097 - struct xfs_buf *bp, 2098 2098 - struct list_head *buffer_list) 2099 2099 - { 2100 2100 - int error; 2101 2101 - 2102 2102 - ASSERT(bp->b_flags & _XBF_DELWRI_Q); 2103 2103 - 2104 2104 - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); 2105 2105 - 2106 2106 - xfs_buf_lock(bp); 2107 2107 - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); 2108 2108 - bp->b_flags |= XBF_WRITE; 2109 2109 - xfs_buf_submit(bp); 2110 2110 - 2111 2111 - /* 2112 2112 - * The buffer is now locked, under I/O but still on the original delwri 2113 2113 - * queue. Wait for I/O completion, restore the DELWRI_Q flag and 2114 2114 - * return with the buffer unlocked and still on the original queue. 2115 2115 - */ 2116 2116 - error = xfs_buf_iowait(bp); 2117 2117 - bp->b_flags |= _XBF_DELWRI_Q; 2118 2118 - xfs_buf_unlock(bp); 2119 2119 - 2120 2120 - return error; 2121 2121 - } 2122 2122 - 2123 2085 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 2124 2086 { 2125 2087 /*

-1

fs/xfs/xfs_buf.h

reviewed

··· 326 326 void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); 327 327 extern int xfs_buf_delwri_submit(struct list_head *); 328 328 extern int xfs_buf_delwri_submit_nowait(struct list_head *); 329 329 - extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); 330 329 331 330 static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) 332 331 {

+180 -117

fs/xfs/xfs_buf_item.c

reviewed

··· 32 32 return container_of(lip, struct xfs_buf_log_item, bli_item); 33 33 } 34 34 35 35 + static void 36 36 + xfs_buf_item_get_format( 37 37 + struct xfs_buf_log_item *bip, 38 38 + int count) 39 39 + { 40 40 + ASSERT(bip->bli_formats == NULL); 41 41 + bip->bli_format_count = count; 42 42 + 43 43 + if (count == 1) { 44 44 + bip->bli_formats = &bip->__bli_format; 45 45 + return; 46 46 + } 47 47 + 48 48 + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), 49 49 + GFP_KERNEL | __GFP_NOFAIL); 50 50 + } 51 51 + 52 52 + static void 53 53 + xfs_buf_item_free_format( 54 54 + struct xfs_buf_log_item *bip) 55 55 + { 56 56 + if (bip->bli_formats != &bip->__bli_format) { 57 57 + kfree(bip->bli_formats); 58 58 + bip->bli_formats = NULL; 59 59 + } 60 60 + } 61 61 + 62 62 + static void 63 63 + xfs_buf_item_free( 64 64 + struct xfs_buf_log_item *bip) 65 65 + { 66 66 + xfs_buf_item_free_format(bip); 67 67 + kvfree(bip->bli_item.li_lv_shadow); 68 68 + kmem_cache_free(xfs_buf_item_cache, bip); 69 69 + } 70 70 + 71 71 + /* 72 72 + * xfs_buf_item_relse() is called when the buf log item is no longer needed. 73 73 + */ 74 74 + static void 75 75 + xfs_buf_item_relse( 76 76 + struct xfs_buf_log_item *bip) 77 77 + { 78 78 + struct xfs_buf *bp = bip->bli_buf; 79 79 + 80 80 + trace_xfs_buf_item_relse(bp, _RET_IP_); 81 81 + 82 82 + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); 83 83 + ASSERT(atomic_read(&bip->bli_refcount) == 0); 84 84 + 85 85 + bp->b_log_item = NULL; 86 86 + xfs_buf_rele(bp); 87 87 + xfs_buf_item_free(bip); 88 88 + } 89 89 + 35 90 /* Is this log iovec plausibly large enough to contain the buffer log format? */ 36 91 bool 37 92 xfs_buf_log_check_iovec( ··· 445 390 } 446 391 447 392 /* 393 393 + * For a stale BLI, process all the necessary completions that must be 394 394 + * performed when the final BLI reference goes away. The buffer will be 395 395 + * referenced and locked here - we return to the caller with the buffer still 396 396 + * referenced and locked for them to finalise processing of the buffer. 397 397 + */ 398 398 + static void 399 399 + xfs_buf_item_finish_stale( 400 400 + struct xfs_buf_log_item *bip) 401 401 + { 402 402 + struct xfs_buf *bp = bip->bli_buf; 403 403 + struct xfs_log_item *lip = &bip->bli_item; 404 404 + 405 405 + ASSERT(bip->bli_flags & XFS_BLI_STALE); 406 406 + ASSERT(xfs_buf_islocked(bp)); 407 407 + ASSERT(bp->b_flags & XBF_STALE); 408 408 + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 409 409 + ASSERT(list_empty(&lip->li_trans)); 410 410 + ASSERT(!bp->b_transp); 411 411 + 412 412 + if (bip->bli_flags & XFS_BLI_STALE_INODE) { 413 413 + xfs_buf_item_done(bp); 414 414 + xfs_buf_inode_iodone(bp); 415 415 + ASSERT(list_empty(&bp->b_li_list)); 416 416 + return; 417 417 + } 418 418 + 419 419 + /* 420 420 + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do 421 421 + * the right thing regardless of the situation in which we are called. 422 422 + */ 423 423 + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); 424 424 + xfs_buf_item_relse(bip); 425 425 + ASSERT(bp->b_log_item == NULL); 426 426 + } 427 427 + 428 428 + /* 448 429 * This is called to unpin the buffer associated with the buf log item which was 449 430 * previously pinned with a call to xfs_buf_item_pin(). We enter this function 450 431 * with a buffer pin count, a buffer reference and a BLI reference. ··· 529 438 } 530 439 531 440 if (stale) { 532 532 - ASSERT(bip->bli_flags & XFS_BLI_STALE); 533 533 - ASSERT(xfs_buf_islocked(bp)); 534 534 - ASSERT(bp->b_flags & XBF_STALE); 535 535 - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 536 536 - ASSERT(list_empty(&lip->li_trans)); 537 537 - ASSERT(!bp->b_transp); 538 538 - 539 441 trace_xfs_buf_item_unpin_stale(bip); 540 442 541 443 /* ··· 539 455 * processing is complete. 540 456 */ 541 457 xfs_buf_rele(bp); 542 542 - 543 543 - /* 544 544 - * If we get called here because of an IO error, we may or may 545 545 - * not have the item on the AIL. xfs_trans_ail_delete() will 546 546 - * take care of that situation. xfs_trans_ail_delete() drops 547 547 - * the AIL lock. 548 548 - */ 549 549 - if (bip->bli_flags & XFS_BLI_STALE_INODE) { 550 550 - xfs_buf_item_done(bp); 551 551 - xfs_buf_inode_iodone(bp); 552 552 - ASSERT(list_empty(&bp->b_li_list)); 553 553 - } else { 554 554 - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); 555 555 - xfs_buf_item_relse(bp); 556 556 - ASSERT(bp->b_log_item == NULL); 557 557 - } 458 458 + xfs_buf_item_finish_stale(bip); 558 459 xfs_buf_relse(bp); 559 460 return; 560 461 } ··· 612 543 * Drop the buffer log item refcount and take appropriate action. This helper 613 544 * determines whether the bli must be freed or not, since a decrement to zero 614 545 * does not necessarily mean the bli is unused. 615 615 - * 616 616 - * Return true if the bli is freed, false otherwise. 617 546 */ 618 618 - bool 547 547 + void 619 548 xfs_buf_item_put( 620 549 struct xfs_buf_log_item *bip) 621 550 { 622 622 - struct xfs_log_item *lip = &bip->bli_item; 623 623 - bool aborted; 624 624 - bool dirty; 551 551 + 552 552 + ASSERT(xfs_buf_islocked(bip->bli_buf)); 625 553 626 554 /* drop the bli ref and return if it wasn't the last one */ 627 555 if (!atomic_dec_and_test(&bip->bli_refcount)) 628 628 - return false; 556 556 + return; 557 557 + 558 558 + /* If the BLI is in the AIL, then it is still dirty and in use */ 559 559 + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { 560 560 + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); 561 561 + return; 562 562 + } 629 563 630 564 /* 631 631 - * We dropped the last ref and must free the item if clean or aborted. 632 632 - * If the bli is dirty and non-aborted, the buffer was clean in the 633 633 - * transaction but still awaiting writeback from previous changes. In 634 634 - * that case, the bli is freed on buffer writeback completion. 565 565 + * In shutdown conditions, we can be asked to free a dirty BLI that 566 566 + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI 567 567 + * instead of inserting it into the AIL at checkpoint IO completion. If 568 568 + * there's another bli reference (e.g. a btree cursor holds a clean 569 569 + * reference) and it is released via xfs_trans_brelse(), we can get here 570 570 + * with that aborted, dirty BLI. In this case, it is safe to free the 571 571 + * dirty BLI immediately, as it is not in the AIL and there are no 572 572 + * other references to it. 573 573 + * 574 574 + * We should never get here with a stale BLI via that path as 575 575 + * xfs_trans_brelse() specifically holds onto stale buffers rather than 576 576 + * releasing them. 635 577 */ 636 636 - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || 637 637 - xlog_is_shutdown(lip->li_log); 638 638 - dirty = bip->bli_flags & XFS_BLI_DIRTY; 639 639 - if (dirty && !aborted) 640 640 - return false; 641 641 - 642 642 - /* 643 643 - * The bli is aborted or clean. An aborted item may be in the AIL 644 644 - * regardless of dirty state. For example, consider an aborted 645 645 - * transaction that invalidated a dirty bli and cleared the dirty 646 646 - * state. 647 647 - */ 648 648 - if (aborted) 649 649 - xfs_trans_ail_delete(lip, 0); 650 650 - xfs_buf_item_relse(bip->bli_buf); 651 651 - return true; 578 578 + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || 579 579 + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); 580 580 + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 581 581 + xfs_buf_item_relse(bip); 652 582 } 653 583 654 584 /* ··· 668 600 * if necessary but do not unlock the buffer. This is for support of 669 601 * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't 670 602 * free the item. 603 603 + * 604 604 + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* 605 605 + * perform a completion abort of any objects attached to the buffer for IO 606 606 + * tracking purposes. This generally only happens in shutdown situations, 607 607 + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform 608 608 + * completion processing. However, because transaction completion can race with 609 609 + * checkpoint completion during a shutdown, this release context may end up 610 610 + * being the last active reference to the BLI and so needs to perform this 611 611 + * cleanup. 671 612 */ 672 613 STATIC void 673 614 xfs_buf_item_release( ··· 684 607 { 685 608 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 686 609 struct xfs_buf *bp = bip->bli_buf; 687 687 - bool released; 688 610 bool hold = bip->bli_flags & XFS_BLI_HOLD; 689 611 bool stale = bip->bli_flags & XFS_BLI_STALE; 690 690 - #if defined(DEBUG) || defined(XFS_WARN) 691 691 - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; 692 692 - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; 693 612 bool aborted = test_bit(XFS_LI_ABORTED, 694 613 &lip->li_flags); 614 614 + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; 615 615 + #if defined(DEBUG) || defined(XFS_WARN) 616 616 + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; 695 617 #endif 696 618 697 619 trace_xfs_buf_item_release(bip); 620 620 + 621 621 + ASSERT(xfs_buf_islocked(bp)); 698 622 699 623 /* 700 624 * The bli dirty state should match whether the blf has logged segments ··· 712 634 bp->b_transp = NULL; 713 635 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); 714 636 637 637 + /* If there are other references, then we have nothing to do. */ 638 638 + if (!atomic_dec_and_test(&bip->bli_refcount)) 639 639 + goto out_release; 640 640 + 715 641 /* 716 716 - * Unref the item and unlock the buffer unless held or stale. Stale 717 717 - * buffers remain locked until final unpin unless the bli is freed by 718 718 - * the unref call. The latter implies shutdown because buffer 719 719 - * invalidation dirties the bli and transaction. 642 642 + * Stale buffer completion frees the BLI, unlocks and releases the 643 643 + * buffer. Neither the BLI or buffer are safe to reference after this 644 644 + * call, so there's nothing more we need to do here. 645 645 + * 646 646 + * If we get here with a stale buffer and references to the BLI remain, 647 647 + * we must not unlock the buffer as the last BLI reference owns lock 648 648 + * context, not us. 720 649 */ 721 721 - released = xfs_buf_item_put(bip); 722 722 - if (hold || (stale && !released)) 650 650 + if (stale) { 651 651 + xfs_buf_item_finish_stale(bip); 652 652 + xfs_buf_relse(bp); 653 653 + ASSERT(!hold); 723 654 return; 724 724 - ASSERT(!stale || aborted); 655 655 + } 656 656 + 657 657 + /* 658 658 + * Dirty or clean, aborted items are done and need to be removed from 659 659 + * the AIL and released. This frees the BLI, but leaves the buffer 660 660 + * locked and referenced. 661 661 + */ 662 662 + if (aborted || xlog_is_shutdown(lip->li_log)) { 663 663 + ASSERT(list_empty(&bip->bli_buf->b_li_list)); 664 664 + xfs_buf_item_done(bp); 665 665 + goto out_release; 666 666 + } 667 667 + 668 668 + /* 669 669 + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer 670 670 + * locked and referenced. 671 671 + * 672 672 + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. 673 673 + */ 674 674 + if (!dirty) 675 675 + xfs_buf_item_relse(bip); 676 676 + else 677 677 + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); 678 678 + 679 679 + /* Not safe to reference the BLI from here */ 680 680 + out_release: 681 681 + /* 682 682 + * If we get here with a stale buffer, we must not unlock the 683 683 + * buffer as the last BLI reference owns lock context, not us. 684 684 + */ 685 685 + if (stale || hold) 686 686 + return; 725 687 xfs_buf_relse(bp); 726 688 } 727 689 ··· 846 728 .iop_committed = xfs_buf_item_committed, 847 729 .iop_push = xfs_buf_item_push, 848 730 }; 849 849 - 850 850 - STATIC void 851 851 - xfs_buf_item_get_format( 852 852 - struct xfs_buf_log_item *bip, 853 853 - int count) 854 854 - { 855 855 - ASSERT(bip->bli_formats == NULL); 856 856 - bip->bli_format_count = count; 857 857 - 858 858 - if (count == 1) { 859 859 - bip->bli_formats = &bip->__bli_format; 860 860 - return; 861 861 - } 862 862 - 863 863 - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), 864 864 - GFP_KERNEL | __GFP_NOFAIL); 865 865 - } 866 866 - 867 867 - STATIC void 868 868 - xfs_buf_item_free_format( 869 869 - struct xfs_buf_log_item *bip) 870 870 - { 871 871 - if (bip->bli_formats != &bip->__bli_format) { 872 872 - kfree(bip->bli_formats); 873 873 - bip->bli_formats = NULL; 874 874 - } 875 875 - } 876 731 877 732 /* 878 733 * Allocate a new buf log item to go with the given buffer. ··· 1067 976 return false; 1068 977 } 1069 978 1070 1070 - STATIC void 1071 1071 - xfs_buf_item_free( 1072 1072 - struct xfs_buf_log_item *bip) 1073 1073 - { 1074 1074 - xfs_buf_item_free_format(bip); 1075 1075 - kvfree(bip->bli_item.li_lv_shadow); 1076 1076 - kmem_cache_free(xfs_buf_item_cache, bip); 1077 1077 - } 1078 1078 - 1079 1079 - /* 1080 1080 - * xfs_buf_item_relse() is called when the buf log item is no longer needed. 1081 1081 - */ 1082 1082 - void 1083 1083 - xfs_buf_item_relse( 1084 1084 - struct xfs_buf *bp) 1085 1085 - { 1086 1086 - struct xfs_buf_log_item *bip = bp->b_log_item; 1087 1087 - 1088 1088 - trace_xfs_buf_item_relse(bp, _RET_IP_); 1089 1089 - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); 1090 1090 - 1091 1091 - if (atomic_read(&bip->bli_refcount)) 1092 1092 - return; 1093 1093 - bp->b_log_item = NULL; 1094 1094 - xfs_buf_rele(bp); 1095 1095 - xfs_buf_item_free(bip); 1096 1096 - } 1097 1097 - 1098 979 void 1099 980 xfs_buf_item_done( 1100 981 struct xfs_buf *bp) ··· 1086 1023 xfs_trans_ail_delete(&bp->b_log_item->bli_item, 1087 1024 (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : 1088 1025 SHUTDOWN_CORRUPT_INCORE); 1089 1089 - xfs_buf_item_relse(bp); 1026 1026 + xfs_buf_item_relse(bp->b_log_item); 1090 1027 }

+1 -2

fs/xfs/xfs_buf_item.h

reviewed

··· 49 49 50 50 int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 51 51 void xfs_buf_item_done(struct xfs_buf *bp); 52 52 - void xfs_buf_item_relse(struct xfs_buf *); 53 53 - bool xfs_buf_item_put(struct xfs_buf_log_item *); 52 52 + void xfs_buf_item_put(struct xfs_buf_log_item *bip); 54 53 void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 55 54 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 56 55 void xfs_buf_inode_iodone(struct xfs_buf *);

+1 -3

fs/xfs/xfs_dquot.c

reviewed

··· 1398 1398 1399 1399 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1400 1400 ASSERT(!completion_done(&dqp->q_flush)); 1401 1401 + ASSERT(atomic_read(&dqp->q_pincount) == 0); 1401 1402 1402 1403 trace_xfs_dqflush(dqp); 1403 1403 - 1404 1404 - xfs_qm_dqunpin_wait(dqp); 1405 1405 - 1406 1404 fa = xfs_qm_dqflush_check(dqp); 1407 1405 if (fa) { 1408 1406 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",

+4 -3

fs/xfs/xfs_file.c

reviewed

··· 1335 1335 } 1336 1336 1337 1337 #define XFS_FALLOC_FL_SUPPORTED \ 1338 1338 - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 1339 1339 - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 1340 1340 - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 1338 1338 + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ 1339 1339 + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ 1340 1340 + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ 1341 1341 + FALLOC_FL_UNSHARE_RANGE) 1341 1342 1342 1343 STATIC long 1343 1344 __xfs_file_fallocate(

fs/xfs/xfs_icache.c

reviewed

··· 979 979 */ 980 980 if (xlog_is_shutdown(ip->i_mount->m_log)) { 981 981 xfs_iunpin_wait(ip); 982 982 + /* 983 983 + * Avoid a ABBA deadlock on the inode cluster buffer vs 984 984 + * concurrent xfs_ifree_cluster() trying to mark the inode 985 985 + * stale. We don't need the inode locked to run the flush abort 986 986 + * code, but the flush abort needs to lock the cluster buffer. 987 987 + */ 988 988 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 982 989 xfs_iflush_shutdown_abort(ip); 990 990 + xfs_ilock(ip, XFS_ILOCK_EXCL); 983 991 goto reclaim; 984 992 } 985 993 if (xfs_ipincount(ip))

+1 -1

fs/xfs/xfs_inode.c

reviewed

··· 1635 1635 iip = ip->i_itemp; 1636 1636 if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { 1637 1637 ASSERT(!list_empty(&iip->ili_item.li_bio_list)); 1638 1638 - ASSERT(iip->ili_last_fields); 1638 1638 + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); 1639 1639 goto out_iunlock; 1640 1640 } 1641 1641

+4 -1

fs/xfs/xfs_inode_item.c

reviewed

··· 758 758 * completed and items removed from the AIL before the next push 759 759 * attempt. 760 760 */ 761 761 + trace_xfs_inode_push_stale(ip, _RET_IP_); 761 762 return XFS_ITEM_PINNED; 762 763 } 763 764 764 764 - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) 765 765 + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { 766 766 + trace_xfs_inode_push_pinned(ip, _RET_IP_); 765 767 return XFS_ITEM_PINNED; 768 768 + } 766 769 767 770 if (xfs_iflags_test(ip, XFS_IFLUSHING)) 768 771 return XFS_ITEM_FLUSHING;

+3 -1

fs/xfs/xfs_log_cil.c

reviewed

··· 793 793 struct xfs_log_item *lip = lv->lv_item; 794 794 xfs_lsn_t item_lsn; 795 795 796 796 - if (aborted) 796 796 + if (aborted) { 797 797 + trace_xlog_ail_insert_abort(lip); 797 798 set_bit(XFS_LI_ABORTED, &lip->li_flags); 799 799 + } 798 800 799 801 if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 800 802 lip->li_ops->iop_release(lip);

+4 -15

fs/xfs/xfs_mru_cache.c

reviewed

··· 320 320 xfs_mru_cache_free_func_t free_func) 321 321 { 322 322 struct xfs_mru_cache *mru = NULL; 323 323 - int err = 0, grp; 323 323 + int grp; 324 324 unsigned int grp_time; 325 325 326 326 if (mrup) ··· 341 341 mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), 342 342 GFP_KERNEL | __GFP_NOFAIL); 343 343 if (!mru->lists) { 344 344 - err = -ENOMEM; 345 345 - goto exit; 344 344 + kfree(mru); 345 345 + return -ENOMEM; 346 346 } 347 347 348 348 for (grp = 0; grp < mru->grp_count; grp++) ··· 361 361 mru->free_func = free_func; 362 362 mru->data = data; 363 363 *mrup = mru; 364 364 - 365 365 - exit: 366 366 - if (err && mru && mru->lists) 367 367 - kfree(mru->lists); 368 368 - if (err && mru) 369 369 - kfree(mru); 370 370 - 371 371 - return err; 364 364 + return 0; 372 365 } 373 366 374 367 /* ··· 417 424 struct xfs_mru_cache_elem *elem) 418 425 { 419 426 int error = -EINVAL; 420 420 - 421 421 - ASSERT(mru && mru->lists); 422 422 - if (!mru || !mru->lists) 423 423 - goto out_free; 424 427 425 428 error = -ENOMEM; 426 429 if (radix_tree_preload(GFP_KERNEL))

+19 -67

fs/xfs/xfs_qm.c

reviewed

··· 134 134 135 135 dqp->q_flags |= XFS_DQFLAG_FREEING; 136 136 137 137 + xfs_qm_dqunpin_wait(dqp); 137 138 xfs_dqflock(dqp); 138 139 139 140 /* ··· 466 465 struct xfs_dquot *dqp = container_of(item, 467 466 struct xfs_dquot, q_lru); 468 467 struct xfs_qm_isolate *isol = arg; 468 468 + enum lru_status ret = LRU_SKIP; 469 469 470 470 if (!xfs_dqlock_nowait(dqp)) 471 471 goto out_miss_busy; ··· 478 476 */ 479 477 if (dqp->q_flags & XFS_DQFLAG_FREEING) 480 478 goto out_miss_unlock; 479 479 + 480 480 + /* 481 481 + * If the dquot is pinned or dirty, rotate it to the end of the LRU to 482 482 + * give some time for it to be cleaned before we try to isolate it 483 483 + * again. 484 484 + */ 485 485 + ret = LRU_ROTATE; 486 486 + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { 487 487 + goto out_miss_unlock; 488 488 + } 481 489 482 490 /* 483 491 * This dquot has acquired a reference in the meantime remove it from ··· 504 492 } 505 493 506 494 /* 507 507 - * If the dquot is dirty, flush it. If it's already being flushed, just 508 508 - * skip it so there is time for the IO to complete before we try to 509 509 - * reclaim it again on the next LRU pass. 495 495 + * The dquot may still be under IO, in which case the flush lock will be 496 496 + * held. If we can't get the flush lock now, just skip over the dquot as 497 497 + * if it was dirty. 510 498 */ 511 499 if (!xfs_dqflock_nowait(dqp)) 512 500 goto out_miss_unlock; 513 501 514 514 - if (XFS_DQ_IS_DIRTY(dqp)) { 515 515 - struct xfs_buf *bp = NULL; 516 516 - int error; 517 517 - 518 518 - trace_xfs_dqreclaim_dirty(dqp); 519 519 - 520 520 - /* we have to drop the LRU lock to flush the dquot */ 521 521 - spin_unlock(&lru->lock); 522 522 - 523 523 - error = xfs_dquot_use_attached_buf(dqp, &bp); 524 524 - if (!bp || error == -EAGAIN) { 525 525 - xfs_dqfunlock(dqp); 526 526 - goto out_unlock_dirty; 527 527 - } 528 528 - 529 529 - /* 530 530 - * dqflush completes dqflock on error, and the delwri ioend 531 531 - * does it on success. 532 532 - */ 533 533 - error = xfs_qm_dqflush(dqp, bp); 534 534 - if (error) 535 535 - goto out_unlock_dirty; 536 536 - 537 537 - xfs_buf_delwri_queue(bp, &isol->buffers); 538 538 - xfs_buf_relse(bp); 539 539 - goto out_unlock_dirty; 540 540 - } 541 541 - 502 502 + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); 542 503 xfs_dquot_detach_buf(dqp); 543 504 xfs_dqfunlock(dqp); 544 505 ··· 533 548 out_miss_busy: 534 549 trace_xfs_dqreclaim_busy(dqp); 535 550 XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); 536 536 - return LRU_SKIP; 537 537 - 538 538 - out_unlock_dirty: 539 539 - trace_xfs_dqreclaim_busy(dqp); 540 540 - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); 541 541 - xfs_dqunlock(dqp); 542 542 - return LRU_RETRY; 551 551 + return ret; 543 552 } 544 553 545 554 static unsigned long ··· 1465 1486 struct xfs_dquot *dqp, 1466 1487 void *data) 1467 1488 { 1468 1468 - struct xfs_mount *mp = dqp->q_mount; 1469 1489 struct list_head *buffer_list = data; 1470 1490 struct xfs_buf *bp = NULL; 1471 1491 int error = 0; ··· 1475 1497 if (!XFS_DQ_IS_DIRTY(dqp)) 1476 1498 goto out_unlock; 1477 1499 1478 1478 - /* 1479 1479 - * The only way the dquot is already flush locked by the time quotacheck 1480 1480 - * gets here is if reclaim flushed it before the dqadjust walk dirtied 1481 1481 - * it for the final time. Quotacheck collects all dquot bufs in the 1482 1482 - * local delwri queue before dquots are dirtied, so reclaim can't have 1483 1483 - * possibly queued it for I/O. The only way out is to push the buffer to 1484 1484 - * cycle the flush lock. 1485 1485 - */ 1486 1486 - if (!xfs_dqflock_nowait(dqp)) { 1487 1487 - /* buf is pinned in-core by delwri list */ 1488 1488 - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, 1489 1489 - mp->m_quotainfo->qi_dqchunklen, 0, &bp); 1490 1490 - if (error) 1491 1491 - goto out_unlock; 1492 1492 - 1493 1493 - if (!(bp->b_flags & _XBF_DELWRI_Q)) { 1494 1494 - error = -EAGAIN; 1495 1495 - xfs_buf_relse(bp); 1496 1496 - goto out_unlock; 1497 1497 - } 1498 1498 - xfs_buf_unlock(bp); 1499 1499 - 1500 1500 - xfs_buf_delwri_pushbuf(bp, buffer_list); 1501 1501 - xfs_buf_rele(bp); 1502 1502 - 1503 1503 - error = -EAGAIN; 1504 1504 - goto out_unlock; 1505 1505 - } 1500 1500 + xfs_qm_dqunpin_wait(dqp); 1501 1501 + xfs_dqflock(dqp); 1506 1502 1507 1503 error = xfs_dquot_use_attached_buf(dqp, &bp); 1508 1504 if (error)

fs/xfs/xfs_rtalloc.c

reviewed

··· 1259 1259 1260 1260 kfree(nmp); 1261 1261 1262 1262 + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); 1263 1263 + 1262 1264 if (min_logfsbs > mp->m_sb.sb_logblocks) 1263 1265 return -EINVAL; 1264 1266

+2 -3

fs/xfs/xfs_super.c

reviewed

··· 2020 2020 int error; 2021 2021 2022 2022 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && 2023 2023 - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { 2023 2023 + xfs_readonly_buftarg(mp->m_logdev_targp)) { 2024 2024 xfs_warn(mp, 2025 2025 "ro->rw transition prohibited by read-only logdev"); 2026 2026 return -EACCES; 2027 2027 } 2028 2028 2029 2029 - if (mp->m_rtdev_targp && 2030 2030 - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { 2029 2029 + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { 2031 2030 xfs_warn(mp, 2032 2031 "ro->rw transition prohibited by read-only rtdev"); 2033 2032 return -EACCES;

+8 -2

fs/xfs/xfs_trace.h

reviewed

··· 778 778 DEFINE_BUF_EVENT(xfs_buf_delwri_queue); 779 779 DEFINE_BUF_EVENT(xfs_buf_delwri_queued); 780 780 DEFINE_BUF_EVENT(xfs_buf_delwri_split); 781 781 - DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); 782 781 DEFINE_BUF_EVENT(xfs_buf_get_uncached); 783 782 DEFINE_BUF_EVENT(xfs_buf_item_relse); 784 783 DEFINE_BUF_EVENT(xfs_buf_iodone_async); ··· 1146 1147 __field(xfs_ino_t, ino) 1147 1148 __field(int, count) 1148 1149 __field(int, pincount) 1150 1150 + __field(unsigned long, iflags) 1149 1151 __field(unsigned long, caller_ip) 1150 1152 ), 1151 1153 TP_fast_assign( ··· 1154 1154 __entry->ino = ip->i_ino; 1155 1155 __entry->count = atomic_read(&VFS_I(ip)->i_count); 1156 1156 __entry->pincount = atomic_read(&ip->i_pincount); 1157 1157 + __entry->iflags = ip->i_flags; 1157 1158 __entry->caller_ip = caller_ip; 1158 1159 ), 1159 1159 - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", 1160 1160 + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", 1160 1161 MAJOR(__entry->dev), MINOR(__entry->dev), 1161 1162 __entry->ino, 1162 1163 __entry->count, 1163 1164 __entry->pincount, 1165 1165 + __entry->iflags, 1164 1166 (char *)__entry->caller_ip) 1165 1167 ) 1166 1168 ··· 1252 1250 DEFINE_IREF_EVENT(xfs_inode_pin); 1253 1251 DEFINE_IREF_EVENT(xfs_inode_unpin); 1254 1252 DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); 1253 1253 + DEFINE_IREF_EVENT(xfs_inode_push_pinned); 1254 1254 + DEFINE_IREF_EVENT(xfs_inode_push_stale); 1255 1255 1256 1256 DECLARE_EVENT_CLASS(xfs_namespace_class, 1257 1257 TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), ··· 1658 1654 DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); 1659 1655 DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); 1660 1656 DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); 1657 1657 + DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); 1658 1658 + DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); 1661 1659 1662 1660 DECLARE_EVENT_CLASS(xfs_ail_class, 1663 1661 TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),

+3 -1

fs/xfs/xfs_trans.c

reviewed

··· 742 742 743 743 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 744 744 xfs_trans_del_item(lip); 745 745 - if (abort) 745 745 + if (abort) { 746 746 + trace_xfs_trans_free_abort(lip); 746 747 set_bit(XFS_LI_ABORTED, &lip->li_flags); 748 748 + } 747 749 if (lip->li_ops->iop_release) 748 750 lip->li_ops->iop_release(lip); 749 751 }

+21 -21

fs/xfs/xfs_zone_alloc.c

reviewed

··· 727 727 for (;;) { 728 728 prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); 729 729 oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); 730 730 - if (oz) 730 730 + if (oz || xfs_is_shutdown(mp)) 731 731 break; 732 732 schedule(); 733 733 } ··· 775 775 776 776 if (xfs_rtb_to_rgbno(mp, xfs_daddr_to_rtb(mp, sector)) == 0) 777 777 ioend->io_flags |= IOMAP_IOEND_BOUNDARY; 778 778 - } 779 779 - 780 780 - static void 781 781 - xfs_submit_zoned_bio( 782 782 - struct iomap_ioend *ioend, 783 783 - struct xfs_open_zone *oz, 784 784 - bool is_seq) 785 785 - { 786 786 - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; 787 787 - ioend->io_private = oz; 788 788 - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ 789 789 - 790 790 - if (is_seq) { 791 791 - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; 792 792 - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; 793 793 - } else { 794 794 - xfs_mark_rtg_boundary(ioend); 795 795 - } 796 796 - 797 797 - submit_bio(&ioend->io_bio); 798 778 } 799 779 800 780 /* ··· 869 889 } 870 890 item->oz = oz; 871 891 xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); 892 892 + } 893 893 + 894 894 + static void 895 895 + xfs_submit_zoned_bio( 896 896 + struct iomap_ioend *ioend, 897 897 + struct xfs_open_zone *oz, 898 898 + bool is_seq) 899 899 + { 900 900 + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; 901 901 + ioend->io_private = oz; 902 902 + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ 903 903 + 904 904 + if (is_seq) { 905 905 + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; 906 906 + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; 907 907 + } else { 908 908 + xfs_mark_rtg_boundary(ioend); 909 909 + } 910 910 + 911 911 + submit_bio(&ioend->io_bio); 872 912 } 873 913 874 914 void