Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:

- Fix some ordering problems with log items during log recovery

- Don't deadlock the system by trying to flush busy freed extents while
holding on to busy freed extents

- Improve validation of log geometry parameters when reading the
primary superblock

- Validate the length field in the AGF header

- Fix recordset filtering bugs when re-calling GETFSMAP to return more
results when the resultset didn't previously fit in the caller's
buffer

- Fix integer overflows in GETFSMAP when working with rt volumes larger
than 2^32 fsblocks

- Fix GETFSMAP reporting the undefined space beyond the last rtextent

- Fix filtering bugs in GETFSMAP's log device backend if the log ever
becomes longer than 2^32 fsblocks

- Improve validation of file offsets in the GETFSMAP range parameters

- Fix an off by one bug in the pmem media failure notification
computation

- Validate the length field in the AGI header too

* tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: Remove unneeded semicolon
xfs: AGI length should be bounds checked
xfs: fix the calculation for "end" and "length"
xfs: fix xfs_btree_query_range callers to initialize btree rec fully
xfs: validate fsmap offsets specified in the query keys
xfs: fix logdev fsmap query result filtering
xfs: clean up the rtbitmap fsmap backend
xfs: fix getfsmap reporting past the last rt extent
xfs: fix integer overflows in the fsmap rtbitmap and logdev backends
xfs: fix interval filtering in multi-step fsmap queries
xfs: fix bounds check in xfs_defer_agfl_block()
xfs: AGF length has never been bounds checked
xfs: journal geometry is not properly bounds checked
xfs: don't block in busy flushing when freeing extents
xfs: allow extent free intents to be retried
xfs: pass alloc flags through to xfs_extent_busy_flush()
xfs: use deferred frees for btree block freeing
xfs: don't reverse order of items in bulk AIL insertion
xfs: remove redundant initializations of pointers drop_leaf and save_leaf

+590 -335
+1 -1
fs/xfs/libxfs/xfs_ag.c
··· 985 985 goto resv_err; 986 986 987 987 err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, 988 - true); 988 + XFS_AG_RESV_NONE, true); 989 989 if (err2) 990 990 goto resv_err; 991 991
+187 -104
fs/xfs/libxfs/xfs_alloc.c
··· 1536 1536 */ 1537 1537 STATIC int 1538 1538 xfs_alloc_ag_vextent_near( 1539 - struct xfs_alloc_arg *args) 1539 + struct xfs_alloc_arg *args, 1540 + uint32_t alloc_flags) 1540 1541 { 1541 1542 struct xfs_alloc_cur acur = {}; 1542 1543 int error; /* error code */ ··· 1556 1555 if (args->agbno > args->max_agbno) 1557 1556 args->agbno = args->max_agbno; 1558 1557 1558 + /* Retry once quickly if we find busy extents before blocking. */ 1559 + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; 1559 1560 restart: 1560 1561 len = 0; 1561 1562 ··· 1613 1610 */ 1614 1611 if (!acur.len) { 1615 1612 if (acur.busy) { 1613 + /* 1614 + * Our only valid extents must have been busy. Flush and 1615 + * retry the allocation again. If we get an -EAGAIN 1616 + * error, we're being told that a deadlock was avoided 1617 + * and the current transaction needs committing before 1618 + * the allocation can be retried. 1619 + */ 1616 1620 trace_xfs_alloc_near_busy(args); 1617 - xfs_extent_busy_flush(args->mp, args->pag, 1618 - acur.busy_gen); 1621 + error = xfs_extent_busy_flush(args->tp, args->pag, 1622 + acur.busy_gen, alloc_flags); 1623 + if (error) 1624 + goto out; 1625 + 1626 + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; 1619 1627 goto restart; 1620 1628 } 1621 1629 trace_xfs_alloc_size_neither(args); ··· 1649 1635 * and of the form k * prod + mod unless there's nothing that large. 1650 1636 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1651 1637 */ 1652 - STATIC int /* error */ 1638 + static int 1653 1639 xfs_alloc_ag_vextent_size( 1654 - xfs_alloc_arg_t *args) /* allocation argument structure */ 1640 + struct xfs_alloc_arg *args, 1641 + uint32_t alloc_flags) 1655 1642 { 1656 - struct xfs_agf *agf = args->agbp->b_addr; 1657 - struct xfs_btree_cur *bno_cur; /* cursor for bno btree */ 1658 - struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */ 1659 - int error; /* error result */ 1660 - xfs_agblock_t fbno; /* start of found freespace */ 1661 - xfs_extlen_t flen; /* length of found freespace */ 1662 - int i; /* temp status variable */ 1663 - xfs_agblock_t rbno; /* returned block number */ 1664 - xfs_extlen_t rlen; /* length of returned extent */ 1665 - bool busy; 1666 - unsigned busy_gen; 1643 + struct xfs_agf *agf = args->agbp->b_addr; 1644 + struct xfs_btree_cur *bno_cur; 1645 + struct xfs_btree_cur *cnt_cur; 1646 + xfs_agblock_t fbno; /* start of found freespace */ 1647 + xfs_extlen_t flen; /* length of found freespace */ 1648 + xfs_agblock_t rbno; /* returned block number */ 1649 + xfs_extlen_t rlen; /* length of returned extent */ 1650 + bool busy; 1651 + unsigned busy_gen; 1652 + int error; 1653 + int i; 1667 1654 1655 + /* Retry once quickly if we find busy extents before blocking. */ 1656 + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; 1668 1657 restart: 1669 1658 /* 1670 1659 * Allocate and initialize a cursor for the by-size btree. ··· 1725 1708 error = xfs_btree_increment(cnt_cur, 0, &i); 1726 1709 if (error) 1727 1710 goto error0; 1728 - if (i == 0) { 1729 - /* 1730 - * Our only valid extents must have been busy. 1731 - * Make it unbusy by forcing the log out and 1732 - * retrying. 1733 - */ 1734 - xfs_btree_del_cursor(cnt_cur, 1735 - XFS_BTREE_NOERROR); 1736 - trace_xfs_alloc_size_busy(args); 1737 - xfs_extent_busy_flush(args->mp, 1738 - args->pag, busy_gen); 1739 - goto restart; 1740 - } 1711 + if (i) 1712 + continue; 1713 + 1714 + /* 1715 + * Our only valid extents must have been busy. Flush and 1716 + * retry the allocation again. If we get an -EAGAIN 1717 + * error, we're being told that a deadlock was avoided 1718 + * and the current transaction needs committing before 1719 + * the allocation can be retried. 1720 + */ 1721 + trace_xfs_alloc_size_busy(args); 1722 + error = xfs_extent_busy_flush(args->tp, args->pag, 1723 + busy_gen, alloc_flags); 1724 + if (error) 1725 + goto error0; 1726 + 1727 + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; 1728 + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1729 + goto restart; 1741 1730 } 1742 1731 } 1743 1732 ··· 1823 1800 args->len = rlen; 1824 1801 if (rlen < args->minlen) { 1825 1802 if (busy) { 1826 - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1803 + /* 1804 + * Our only valid extents must have been busy. Flush and 1805 + * retry the allocation again. If we get an -EAGAIN 1806 + * error, we're being told that a deadlock was avoided 1807 + * and the current transaction needs committing before 1808 + * the allocation can be retried. 1809 + */ 1827 1810 trace_xfs_alloc_size_busy(args); 1828 - xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1811 + error = xfs_extent_busy_flush(args->tp, args->pag, 1812 + busy_gen, alloc_flags); 1813 + if (error) 1814 + goto error0; 1815 + 1816 + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; 1817 + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1829 1818 goto restart; 1830 1819 } 1831 1820 goto out_nominleft; ··· 2470 2435 xfs_defer_agfl_block( 2471 2436 struct xfs_trans *tp, 2472 2437 xfs_agnumber_t agno, 2473 - xfs_fsblock_t agbno, 2438 + xfs_agblock_t agbno, 2474 2439 struct xfs_owner_info *oinfo) 2475 2440 { 2476 2441 struct xfs_mount *mp = tp->t_mountp; 2477 2442 struct xfs_extent_free_item *xefi; 2443 + xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); 2478 2444 2479 2445 ASSERT(xfs_extfree_item_cache != NULL); 2480 2446 ASSERT(oinfo != NULL); 2481 2447 2448 + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) 2449 + return -EFSCORRUPTED; 2450 + 2482 2451 xefi = kmem_cache_zalloc(xfs_extfree_item_cache, 2483 2452 GFP_KERNEL | __GFP_NOFAIL); 2484 - xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); 2453 + xefi->xefi_startblock = fsbno; 2485 2454 xefi->xefi_blockcount = 1; 2486 2455 xefi->xefi_owner = oinfo->oi_owner; 2487 - 2488 - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) 2489 - return -EFSCORRUPTED; 2456 + xefi->xefi_agresv = XFS_AG_RESV_AGFL; 2490 2457 2491 2458 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); 2492 2459 ··· 2507 2470 xfs_fsblock_t bno, 2508 2471 xfs_filblks_t len, 2509 2472 const struct xfs_owner_info *oinfo, 2473 + enum xfs_ag_resv_type type, 2510 2474 bool skip_discard) 2511 2475 { 2512 2476 struct xfs_extent_free_item *xefi; ··· 2528 2490 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 2529 2491 #endif 2530 2492 ASSERT(xfs_extfree_item_cache != NULL); 2493 + ASSERT(type != XFS_AG_RESV_AGFL); 2531 2494 2532 2495 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) 2533 2496 return -EFSCORRUPTED; ··· 2537 2498 GFP_KERNEL | __GFP_NOFAIL); 2538 2499 xefi->xefi_startblock = bno; 2539 2500 xefi->xefi_blockcount = (xfs_extlen_t)len; 2501 + xefi->xefi_agresv = type; 2540 2502 if (skip_discard) 2541 2503 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; 2542 2504 if (oinfo) { ··· 2608 2568 int /* error */ 2609 2569 xfs_alloc_fix_freelist( 2610 2570 struct xfs_alloc_arg *args, /* allocation argument structure */ 2611 - int flags) /* XFS_ALLOC_FLAG_... */ 2571 + uint32_t alloc_flags) 2612 2572 { 2613 2573 struct xfs_mount *mp = args->mp; 2614 2574 struct xfs_perag *pag = args->pag; ··· 2624 2584 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 2625 2585 2626 2586 if (!xfs_perag_initialised_agf(pag)) { 2627 - error = xfs_alloc_read_agf(pag, tp, flags, &agbp); 2587 + error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp); 2628 2588 if (error) { 2629 2589 /* Couldn't lock the AGF so skip this AG. */ 2630 2590 if (error == -EAGAIN) ··· 2640 2600 */ 2641 2601 if (xfs_perag_prefers_metadata(pag) && 2642 2602 (args->datatype & XFS_ALLOC_USERDATA) && 2643 - (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2644 - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2603 + (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2604 + ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING)); 2645 2605 goto out_agbp_relse; 2646 2606 } 2647 2607 2648 2608 need = xfs_alloc_min_freelist(mp, pag); 2649 - if (!xfs_alloc_space_available(args, need, flags | 2609 + if (!xfs_alloc_space_available(args, need, alloc_flags | 2650 2610 XFS_ALLOC_FLAG_CHECK)) 2651 2611 goto out_agbp_relse; 2652 2612 ··· 2655 2615 * Can fail if we're not blocking on locks, and it's held. 2656 2616 */ 2657 2617 if (!agbp) { 2658 - error = xfs_alloc_read_agf(pag, tp, flags, &agbp); 2618 + error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp); 2659 2619 if (error) { 2660 2620 /* Couldn't lock the AGF so skip this AG. */ 2661 2621 if (error == -EAGAIN) ··· 2670 2630 2671 2631 /* If there isn't enough total space or single-extent, reject it. */ 2672 2632 need = xfs_alloc_min_freelist(mp, pag); 2673 - if (!xfs_alloc_space_available(args, need, flags)) 2633 + if (!xfs_alloc_space_available(args, need, alloc_flags)) 2674 2634 goto out_agbp_relse; 2675 2635 2676 2636 #ifdef DEBUG ··· 2708 2668 */ 2709 2669 memset(&targs, 0, sizeof(targs)); 2710 2670 /* struct copy below */ 2711 - if (flags & XFS_ALLOC_FLAG_NORMAP) 2671 + if (alloc_flags & XFS_ALLOC_FLAG_NORMAP) 2712 2672 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; 2713 2673 else 2714 2674 targs.oinfo = XFS_RMAP_OINFO_AG; 2715 - while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2675 + while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) && 2676 + pag->pagf_flcount > need) { 2716 2677 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0); 2717 2678 if (error) 2718 2679 goto out_agbp_relse; ··· 2741 2700 targs.resv = XFS_AG_RESV_AGFL; 2742 2701 2743 2702 /* Allocate as many blocks as possible at once. */ 2744 - error = xfs_alloc_ag_vextent_size(&targs); 2703 + error = xfs_alloc_ag_vextent_size(&targs, alloc_flags); 2745 2704 if (error) 2746 2705 goto out_agflbp_relse; 2747 2706 ··· 2751 2710 * on a completely full ag. 2752 2711 */ 2753 2712 if (targs.agbno == NULLAGBLOCK) { 2754 - if (flags & XFS_ALLOC_FLAG_FREEING) 2713 + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) 2755 2714 break; 2756 2715 goto out_agflbp_relse; 2757 2716 } ··· 2957 2916 } 2958 2917 2959 2918 /* 2919 + * Check that this AGF/AGI header's sequence number and length matches the AG 2920 + * number and size in fsblocks. 2921 + */ 2922 + xfs_failaddr_t 2923 + xfs_validate_ag_length( 2924 + struct xfs_buf *bp, 2925 + uint32_t seqno, 2926 + uint32_t length) 2927 + { 2928 + struct xfs_mount *mp = bp->b_mount; 2929 + /* 2930 + * During growfs operations, the perag is not fully initialised, 2931 + * so we can't use it for any useful checking. growfs ensures we can't 2932 + * use it by using uncached buffers that don't have the perag attached 2933 + * so we can detect and avoid this problem. 2934 + */ 2935 + if (bp->b_pag && seqno != bp->b_pag->pag_agno) 2936 + return __this_address; 2937 + 2938 + /* 2939 + * Only the last AG in the filesystem is allowed to be shorter 2940 + * than the AG size recorded in the superblock. 2941 + */ 2942 + if (length != mp->m_sb.sb_agblocks) { 2943 + /* 2944 + * During growfs, the new last AG can get here before we 2945 + * have updated the superblock. Give it a pass on the seqno 2946 + * check. 2947 + */ 2948 + if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1) 2949 + return __this_address; 2950 + if (length < XFS_MIN_AG_BLOCKS) 2951 + return __this_address; 2952 + if (length > mp->m_sb.sb_agblocks) 2953 + return __this_address; 2954 + } 2955 + 2956 + return NULL; 2957 + } 2958 + 2959 + /* 2960 2960 * Verify the AGF is consistent. 2961 2961 * 2962 2962 * We do not verify the AGFL indexes in the AGF are fully consistent here ··· 3016 2934 { 3017 2935 struct xfs_mount *mp = bp->b_mount; 3018 2936 struct xfs_agf *agf = bp->b_addr; 2937 + xfs_failaddr_t fa; 2938 + uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno); 2939 + uint32_t agf_length = be32_to_cpu(agf->agf_length); 3019 2940 3020 2941 if (xfs_has_crc(mp)) { 3021 2942 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) ··· 3030 2945 if (!xfs_verify_magic(bp, agf->agf_magicnum)) 3031 2946 return __this_address; 3032 2947 3033 - if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 3034 - be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 3035 - be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && 3036 - be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && 3037 - be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) 2948 + if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum))) 3038 2949 return __this_address; 3039 2950 3040 - if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) 2951 + /* 2952 + * Both agf_seqno and agf_length need to validated before anything else 2953 + * block number related in the AGF or AGFL can be checked. 2954 + */ 2955 + fa = xfs_validate_ag_length(bp, agf_seqno, agf_length); 2956 + if (fa) 2957 + return fa; 2958 + 2959 + if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp)) 2960 + return __this_address; 2961 + if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp)) 2962 + return __this_address; 2963 + if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp)) 3041 2964 return __this_address; 3042 2965 3043 2966 if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || 3044 - be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) 2967 + be32_to_cpu(agf->agf_freeblks) > agf_length) 3045 2968 return __this_address; 3046 2969 3047 2970 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || ··· 3060 2967 mp->m_alloc_maxlevels) 3061 2968 return __this_address; 3062 2969 3063 - if (xfs_has_rmapbt(mp) && 3064 - (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 3065 - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > 3066 - mp->m_rmap_maxlevels)) 3067 - return __this_address; 3068 - 3069 - if (xfs_has_rmapbt(mp) && 3070 - be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) 3071 - return __this_address; 3072 - 3073 - /* 3074 - * during growfs operations, the perag is not fully initialised, 3075 - * so we can't use it for any useful checking. growfs ensures we can't 3076 - * use it by using uncached buffers that don't have the perag attached 3077 - * so we can detect and avoid this problem. 3078 - */ 3079 - if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 3080 - return __this_address; 3081 - 3082 2970 if (xfs_has_lazysbcount(mp) && 3083 - be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2971 + be32_to_cpu(agf->agf_btreeblks) > agf_length) 3084 2972 return __this_address; 3085 2973 3086 - if (xfs_has_reflink(mp) && 3087 - be32_to_cpu(agf->agf_refcount_blocks) > 3088 - be32_to_cpu(agf->agf_length)) 3089 - return __this_address; 2974 + if (xfs_has_rmapbt(mp)) { 2975 + if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length) 2976 + return __this_address; 3090 2977 3091 - if (xfs_has_reflink(mp) && 3092 - (be32_to_cpu(agf->agf_refcount_level) < 1 || 3093 - be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)) 3094 - return __this_address; 2978 + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2979 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > 2980 + mp->m_rmap_maxlevels) 2981 + return __this_address; 2982 + } 2983 + 2984 + if (xfs_has_reflink(mp)) { 2985 + if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length) 2986 + return __this_address; 2987 + 2988 + if (be32_to_cpu(agf->agf_refcount_level) < 1 || 2989 + be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels) 2990 + return __this_address; 2991 + } 3095 2992 3096 2993 return NULL; 3097 2994 } ··· 3309 3226 static int 3310 3227 xfs_alloc_vextent_prepare_ag( 3311 3228 struct xfs_alloc_arg *args, 3312 - uint32_t flags) 3229 + uint32_t alloc_flags) 3313 3230 { 3314 3231 bool need_pag = !args->pag; 3315 3232 int error; ··· 3318 3235 args->pag = xfs_perag_get(args->mp, args->agno); 3319 3236 3320 3237 args->agbp = NULL; 3321 - error = xfs_alloc_fix_freelist(args, flags); 3238 + error = xfs_alloc_fix_freelist(args, alloc_flags); 3322 3239 if (error) { 3323 3240 trace_xfs_alloc_vextent_nofix(args); 3324 3241 if (need_pag) ··· 3440 3357 { 3441 3358 struct xfs_mount *mp = args->mp; 3442 3359 xfs_agnumber_t minimum_agno; 3360 + uint32_t alloc_flags = 0; 3443 3361 int error; 3444 3362 3445 3363 ASSERT(args->pag != NULL); ··· 3459 3375 return error; 3460 3376 } 3461 3377 3462 - error = xfs_alloc_vextent_prepare_ag(args, 0); 3378 + error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); 3463 3379 if (!error && args->agbp) 3464 - error = xfs_alloc_ag_vextent_size(args); 3380 + error = xfs_alloc_ag_vextent_size(args, alloc_flags); 3465 3381 3466 3382 return xfs_alloc_vextent_finish(args, minimum_agno, error, false); 3467 3383 } ··· 3490 3406 xfs_agnumber_t minimum_agno, 3491 3407 xfs_agnumber_t start_agno, 3492 3408 xfs_agblock_t target_agbno, 3493 - uint32_t flags) 3409 + uint32_t alloc_flags) 3494 3410 { 3495 3411 struct xfs_mount *mp = args->mp; 3496 3412 xfs_agnumber_t restart_agno = minimum_agno; 3497 3413 xfs_agnumber_t agno; 3498 3414 int error = 0; 3499 3415 3500 - if (flags & XFS_ALLOC_FLAG_TRYLOCK) 3416 + if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) 3501 3417 restart_agno = 0; 3502 3418 restart: 3503 3419 for_each_perag_wrap_range(mp, start_agno, restart_agno, 3504 3420 mp->m_sb.sb_agcount, agno, args->pag) { 3505 3421 args->agno = agno; 3506 - error = xfs_alloc_vextent_prepare_ag(args, flags); 3422 + error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); 3507 3423 if (error) 3508 3424 break; 3509 3425 if (!args->agbp) { ··· 3517 3433 */ 3518 3434 if (args->agno == start_agno && target_agbno) { 3519 3435 args->agbno = target_agbno; 3520 - error = xfs_alloc_ag_vextent_near(args); 3436 + error = xfs_alloc_ag_vextent_near(args, alloc_flags); 3521 3437 } else { 3522 3438 args->agbno = 0; 3523 - error = xfs_alloc_ag_vextent_size(args); 3439 + error = xfs_alloc_ag_vextent_size(args, alloc_flags); 3524 3440 } 3525 3441 break; 3526 3442 } ··· 3537 3453 * constraining flags by the caller, drop them and retry the allocation 3538 3454 * without any constraints being set. 3539 3455 */ 3540 - if (flags) { 3541 - flags = 0; 3456 + if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) { 3457 + alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK; 3542 3458 restart_agno = minimum_agno; 3543 3459 goto restart; 3544 3460 } ··· 3566 3482 xfs_agnumber_t start_agno; 3567 3483 xfs_agnumber_t rotorstep = xfs_rotorstep; 3568 3484 bool bump_rotor = false; 3485 + uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; 3569 3486 int error; 3570 3487 3571 3488 ASSERT(args->pag == NULL); ··· 3593 3508 3594 3509 start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); 3595 3510 error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, 3596 - XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK); 3511 + XFS_FSB_TO_AGBNO(mp, target), alloc_flags); 3597 3512 3598 3513 if (bump_rotor) { 3599 3514 if (args->agno == start_agno) ··· 3620 3535 struct xfs_mount *mp = args->mp; 3621 3536 xfs_agnumber_t minimum_agno; 3622 3537 xfs_agnumber_t start_agno; 3538 + uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; 3623 3539 int error; 3624 3540 3625 3541 ASSERT(args->pag == NULL); ··· 3639 3553 3640 3554 start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); 3641 3555 error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, 3642 - XFS_FSB_TO_AGBNO(mp, target), 0); 3556 + XFS_FSB_TO_AGBNO(mp, target), alloc_flags); 3643 3557 return xfs_alloc_vextent_finish(args, minimum_agno, error, true); 3644 3558 } 3645 3559 ··· 3692 3606 struct xfs_mount *mp = args->mp; 3693 3607 xfs_agnumber_t minimum_agno; 3694 3608 bool needs_perag = args->pag == NULL; 3609 + uint32_t alloc_flags = 0; 3695 3610 int error; 3696 3611 3697 3612 if (!needs_perag) ··· 3713 3626 if (needs_perag) 3714 3627 args->pag = xfs_perag_grab(mp, args->agno); 3715 3628 3716 - error = xfs_alloc_vextent_prepare_ag(args, 0); 3629 + error = xfs_alloc_vextent_prepare_ag(args, alloc_flags); 3717 3630 if (!error && args->agbp) 3718 - error = xfs_alloc_ag_vextent_near(args); 3631 + error = xfs_alloc_ag_vextent_near(args, alloc_flags); 3719 3632 3720 3633 return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag); 3721 3634 } ··· 3843 3756 xfs_alloc_query_range_fn fn, 3844 3757 void *priv) 3845 3758 { 3846 - union xfs_btree_irec low_brec; 3847 - union xfs_btree_irec high_brec; 3848 - struct xfs_alloc_query_range_info query; 3759 + union xfs_btree_irec low_brec = { .a = *low_rec }; 3760 + union xfs_btree_irec high_brec = { .a = *high_rec }; 3761 + struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn }; 3849 3762 3850 3763 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3851 - low_brec.a = *low_rec; 3852 - high_brec.a = *high_rec; 3853 - query.priv = priv; 3854 - query.fn = fn; 3855 3764 return xfs_btree_query_range(cur, &low_brec, &high_brec, 3856 3765 xfs_alloc_query_range_helper, &query); 3857 3766 }
+15 -9
fs/xfs/libxfs/xfs_alloc.h
··· 19 19 /* 20 20 * Flags for xfs_alloc_fix_freelist. 21 21 */ 22 - #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 23 - #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 24 - #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ 25 - #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ 26 - #define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ 22 + #define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */ 23 + #define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/ 24 + #define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */ 25 + #define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */ 26 + #define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */ 27 + #define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */ 27 28 28 29 /* 29 30 * Argument structure for xfs_alloc routines. ··· 196 195 struct xfs_buf **bpp); 197 196 int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, 198 197 struct xfs_buf *, struct xfs_owner_info *); 199 - int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); 198 + int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags); 200 199 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag, 201 200 struct xfs_buf **agbp); 202 201 ··· 233 232 234 233 int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, 235 234 xfs_filblks_t len, const struct xfs_owner_info *oinfo, 236 - bool skip_discard); 235 + enum xfs_ag_resv_type type, bool skip_discard); 237 236 238 237 /* 239 238 * List of extents to be free "later". ··· 246 245 xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ 247 246 struct xfs_perag *xefi_pag; 248 247 unsigned int xefi_flags; 248 + enum xfs_ag_resv_type xefi_agresv; 249 249 }; 250 250 251 251 void xfs_extent_free_get_group(struct xfs_mount *mp, ··· 261 259 struct xfs_trans *tp, 262 260 xfs_fsblock_t bno, 263 261 xfs_filblks_t len, 264 - const struct xfs_owner_info *oinfo) 262 + const struct xfs_owner_info *oinfo, 263 + enum xfs_ag_resv_type type) 265 264 { 266 - return __xfs_free_extent_later(tp, bno, len, oinfo, false); 265 + return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); 267 266 } 268 267 269 268 ··· 272 269 273 270 int __init xfs_extfree_intent_init_cache(void); 274 271 void xfs_extfree_intent_destroy_cache(void); 272 + 273 + xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno, 274 + uint32_t length); 275 275 276 276 #endif /* __XFS_ALLOC_H__ */
-2
fs/xfs/libxfs/xfs_attr_leaf.c
··· 2293 2293 2294 2294 trace_xfs_attr_leaf_unbalance(state->args); 2295 2295 2296 - drop_leaf = drop_blk->bp->b_addr; 2297 - save_leaf = save_blk->bp->b_addr; 2298 2296 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf); 2299 2297 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf); 2300 2298 entry = xfs_attr3_leaf_entryp(drop_leaf);
+5 -3
fs/xfs/libxfs/xfs_bmap.c
··· 574 574 return error; 575 575 576 576 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 577 - error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); 577 + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, 578 + XFS_AG_RESV_NONE); 578 579 if (error) 579 580 return error; 580 581 ··· 5237 5236 } else { 5238 5237 error = __xfs_free_extent_later(tp, del->br_startblock, 5239 5238 del->br_blockcount, NULL, 5240 - (bflags & XFS_BMAPI_NODISCARD) || 5241 - del->br_state == XFS_EXT_UNWRITTEN); 5239 + XFS_AG_RESV_NONE, 5240 + ((bflags & XFS_BMAPI_NODISCARD) || 5241 + del->br_state == XFS_EXT_UNWRITTEN)); 5242 5242 if (error) 5243 5243 goto done; 5244 5244 }
+2 -1
fs/xfs/libxfs/xfs_bmap_btree.c
··· 271 271 int error; 272 272 273 273 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); 274 - error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); 274 + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, 275 + XFS_AG_RESV_NONE); 275 276 if (error) 276 277 return error; 277 278
+15 -17
fs/xfs/libxfs/xfs_ialloc.c
··· 1853 1853 /* not sparse, calculate extent info directly */ 1854 1854 return xfs_free_extent_later(tp, 1855 1855 XFS_AGB_TO_FSB(mp, agno, sagbno), 1856 - M_IGEO(mp)->ialloc_blks, 1857 - &XFS_RMAP_OINFO_INODES); 1856 + M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, 1857 + XFS_AG_RESV_NONE); 1858 1858 } 1859 1859 1860 1860 /* holemask is only 16-bits (fits in an unsigned long) */ ··· 1899 1899 ASSERT(agbno % mp->m_sb.sb_spino_align == 0); 1900 1900 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); 1901 1901 error = xfs_free_extent_later(tp, 1902 - XFS_AGB_TO_FSB(mp, agno, agbno), 1903 - contigblk, &XFS_RMAP_OINFO_INODES); 1902 + XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, 1903 + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); 1904 1904 if (error) 1905 1905 return error; 1906 1906 ··· 2486 2486 2487 2487 static xfs_failaddr_t 2488 2488 xfs_agi_verify( 2489 - struct xfs_buf *bp) 2489 + struct xfs_buf *bp) 2490 2490 { 2491 - struct xfs_mount *mp = bp->b_mount; 2492 - struct xfs_agi *agi = bp->b_addr; 2493 - int i; 2491 + struct xfs_mount *mp = bp->b_mount; 2492 + struct xfs_agi *agi = bp->b_addr; 2493 + xfs_failaddr_t fa; 2494 + uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno); 2495 + uint32_t agi_length = be32_to_cpu(agi->agi_length); 2496 + int i; 2494 2497 2495 2498 if (xfs_has_crc(mp)) { 2496 2499 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) ··· 2510 2507 if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) 2511 2508 return __this_address; 2512 2509 2510 + fa = xfs_validate_ag_length(bp, agi_seqno, agi_length); 2511 + if (fa) 2512 + return fa; 2513 + 2513 2514 if (be32_to_cpu(agi->agi_level) < 1 || 2514 2515 be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels) 2515 2516 return __this_address; ··· 2521 2514 if (xfs_has_finobt(mp) && 2522 2515 (be32_to_cpu(agi->agi_free_level) < 1 || 2523 2516 be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels)) 2524 - return __this_address; 2525 - 2526 - /* 2527 - * during growfs operations, the perag is not fully initialised, 2528 - * so we can't use it for any useful checking. growfs ensures we can't 2529 - * use it by using uncached buffers that don't have the perag attached 2530 - * so we can detect and avoid this problem. 2531 - */ 2532 - if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) 2533 2517 return __this_address; 2534 2518 2535 2519 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
+1 -2
fs/xfs/libxfs/xfs_ialloc_btree.c
··· 160 160 161 161 xfs_inobt_mod_blockcount(cur, -1); 162 162 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); 163 - return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, 164 - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, 163 + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, 165 164 &XFS_RMAP_OINFO_INOBT, resv); 166 165 } 167 166
+13 -9
fs/xfs/libxfs/xfs_refcount.c
··· 1152 1152 cur->bc_ag.pag->pag_agno, 1153 1153 tmp.rc_startblock); 1154 1154 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1155 - tmp.rc_blockcount, NULL); 1155 + tmp.rc_blockcount, NULL, 1156 + XFS_AG_RESV_NONE); 1156 1157 if (error) 1157 1158 goto out_error; 1158 1159 } ··· 1214 1213 cur->bc_ag.pag->pag_agno, 1215 1214 ext.rc_startblock); 1216 1215 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1217 - ext.rc_blockcount, NULL); 1216 + ext.rc_blockcount, NULL, 1217 + XFS_AG_RESV_NONE); 1218 1218 if (error) 1219 1219 goto out_error; 1220 1220 } ··· 1921 1919 struct xfs_buf *agbp; 1922 1920 struct xfs_refcount_recovery *rr, *n; 1923 1921 struct list_head debris; 1924 - union xfs_btree_irec low; 1925 - union xfs_btree_irec high; 1922 + union xfs_btree_irec low = { 1923 + .rc.rc_domain = XFS_REFC_DOMAIN_COW, 1924 + }; 1925 + union xfs_btree_irec high = { 1926 + .rc.rc_domain = XFS_REFC_DOMAIN_COW, 1927 + .rc.rc_startblock = -1U, 1928 + }; 1926 1929 xfs_fsblock_t fsb; 1927 1930 int error; 1928 1931 ··· 1958 1951 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1959 1952 1960 1953 /* Find all the leftover CoW staging extents. */ 1961 - memset(&low, 0, sizeof(low)); 1962 - memset(&high, 0, sizeof(high)); 1963 - low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; 1964 - high.rc.rc_startblock = -1U; 1965 1954 error = xfs_btree_query_range(cur, &low, &high, 1966 1955 xfs_refcount_recover_extent, &debris); 1967 1956 xfs_btree_del_cursor(cur, error); ··· 1984 1981 1985 1982 /* Free the block. */ 1986 1983 error = xfs_free_extent_later(tp, fsb, 1987 - rr->rr_rrec.rc_blockcount, NULL); 1984 + rr->rr_rrec.rc_blockcount, NULL, 1985 + XFS_AG_RESV_NONE); 1988 1986 if (error) 1989 1987 goto out_trans; 1990 1988
+1 -7
fs/xfs/libxfs/xfs_refcount_btree.c
··· 106 106 struct xfs_buf *agbp = cur->bc_ag.agbp; 107 107 struct xfs_agf *agf = agbp->b_addr; 108 108 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 109 - int error; 110 109 111 110 trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, 112 111 XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); 113 112 be32_add_cpu(&agf->agf_refcount_blocks, -1); 114 113 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); 115 - error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, 116 - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, 114 + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, 117 115 &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); 118 - if (error) 119 - return error; 120 - 121 - return error; 122 116 } 123 117 124 118 STATIC int
+3 -7
fs/xfs/libxfs/xfs_rmap.c
··· 2389 2389 xfs_rmap_query_range_fn fn, 2390 2390 void *priv) 2391 2391 { 2392 - union xfs_btree_irec low_brec; 2393 - union xfs_btree_irec high_brec; 2394 - struct xfs_rmap_query_range_info query; 2392 + union xfs_btree_irec low_brec = { .r = *low_rec }; 2393 + union xfs_btree_irec high_brec = { .r = *high_rec }; 2394 + struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn }; 2395 2395 2396 - low_brec.r = *low_rec; 2397 - high_brec.r = *high_rec; 2398 - query.priv = priv; 2399 - query.fn = fn; 2400 2396 return xfs_btree_query_range(cur, &low_brec, &high_brec, 2401 2397 xfs_rmap_query_range_helper, &query); 2402 2398 }
+55 -1
fs/xfs/libxfs/xfs_sb.c
··· 412 412 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 413 413 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 414 414 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 415 - sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || 416 415 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 417 416 XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || 418 417 XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || ··· 427 428 sbp->sb_shared_vn != 0)) { 428 429 xfs_notice(mp, "SB sanity check failed"); 429 430 return -EFSCORRUPTED; 431 + } 432 + 433 + /* 434 + * Logs that are too large are not supported at all. Reject them 435 + * outright. Logs that are too small are tolerated on v4 filesystems, 436 + * but we can only check that when mounting the log. Hence we skip 437 + * those checks here. 438 + */ 439 + if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { 440 + xfs_notice(mp, 441 + "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", 442 + sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); 443 + return -EFSCORRUPTED; 444 + } 445 + 446 + if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { 447 + xfs_warn(mp, 448 + "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", 449 + XFS_FSB_TO_B(mp, sbp->sb_logblocks), 450 + XFS_MAX_LOG_BYTES); 451 + return -EFSCORRUPTED; 452 + } 453 + 454 + /* 455 + * Do not allow filesystems with corrupted log sector or stripe units to 456 + * be mounted. We cannot safely size the iclogs or write to the log if 457 + * the log stripe unit is not valid. 458 + */ 459 + if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { 460 + if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { 461 + xfs_notice(mp, 462 + "log sector size in bytes/log2 (0x%x/0x%x) must match", 463 + sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); 464 + return -EFSCORRUPTED; 465 + } 466 + } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { 467 + xfs_notice(mp, 468 + "log sector size in bytes/log2 (0x%x/0x%x) are not zero", 469 + sbp->sb_logsectsize, sbp->sb_logsectlog); 470 + return -EFSCORRUPTED; 471 + } 472 + 473 + if (sbp->sb_logsunit > 1) { 474 + if (sbp->sb_logsunit % sbp->sb_blocksize) { 475 + xfs_notice(mp, 476 + "log stripe unit 0x%x bytes must be a multiple of block size", 477 + sbp->sb_logsunit); 478 + return -EFSCORRUPTED; 479 + } 480 + if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { 481 + xfs_notice(mp, 482 + "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", 483 + sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); 484 + return -EFSCORRUPTED; 485 + } 430 486 } 431 487 432 488 /* Validate the realtime geometry; stolen from xfs_repair */
+31 -5
fs/xfs/xfs_extent_busy.c
··· 566 566 567 567 /* 568 568 * Flush out all busy extents for this AG. 569 + * 570 + * If the current transaction is holding busy extents, the caller may not want 571 + * to wait for committed busy extents to resolve. If we are being told just to 572 + * try a flush or progress has been made since we last skipped a busy extent, 573 + * return immediately to allow the caller to try again. 574 + * 575 + * If we are freeing extents, we might actually be holding the only free extents 576 + * in the transaction busy list and the log force won't resolve that situation. 577 + * In this case, we must return -EAGAIN to avoid a deadlock by informing the 578 + * caller it needs to commit the busy extents it holds before retrying the 579 + * extent free operation. 569 580 */ 570 - void 581 + int 571 582 xfs_extent_busy_flush( 572 - struct xfs_mount *mp, 583 + struct xfs_trans *tp, 573 584 struct xfs_perag *pag, 574 - unsigned busy_gen) 585 + unsigned busy_gen, 586 + uint32_t alloc_flags) 575 587 { 576 588 DEFINE_WAIT (wait); 577 589 int error; 578 590 579 - error = xfs_log_force(mp, XFS_LOG_SYNC); 591 + error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); 580 592 if (error) 581 - return; 593 + return error; 582 594 595 + /* Avoid deadlocks on uncommitted busy extents. */ 596 + if (!list_empty(&tp->t_busy)) { 597 + if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) 598 + return 0; 599 + 600 + if (busy_gen != READ_ONCE(pag->pagb_gen)) 601 + return 0; 602 + 603 + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) 604 + return -EAGAIN; 605 + } 606 + 607 + /* Wait for committed busy extents to resolve. */ 583 608 do { 584 609 prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); 585 610 if (busy_gen != READ_ONCE(pag->pagb_gen)) ··· 613 588 } while (1); 614 589 615 590 finish_wait(&pag->pagb_wait, &wait); 591 + return 0; 616 592 } 617 593 618 594 void
+3 -3
fs/xfs/xfs_extent_busy.h
··· 51 51 xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, 52 52 xfs_extlen_t *len, unsigned *busy_gen); 53 53 54 - void 55 - xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, 56 - unsigned busy_gen); 54 + int 55 + xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag, 56 + unsigned busy_gen, uint32_t alloc_flags); 57 57 58 58 void 59 59 xfs_extent_busy_wait_all(struct xfs_mount *mp);
+71 -4
fs/xfs/xfs_extfree_item.c
··· 337 337 } 338 338 339 339 /* 340 + * Fill the EFD with all extents from the EFI when we need to roll the 341 + * transaction and continue with a new EFI. 342 + * 343 + * This simply copies all the extents in the EFI to the EFD rather than make 344 + * assumptions about which extents in the EFI have already been processed. We 345 + * currently keep the xefi list in the same order as the EFI extent list, but 346 + * that may not always be the case. Copying everything avoids leaving a landmine 347 + * were we fail to cancel all the extents in an EFI if the xefi list is 348 + * processed in a different order to the extents in the EFI. 349 + */ 350 + static void 351 + xfs_efd_from_efi( 352 + struct xfs_efd_log_item *efdp) 353 + { 354 + struct xfs_efi_log_item *efip = efdp->efd_efip; 355 + uint i; 356 + 357 + ASSERT(efip->efi_format.efi_nextents > 0); 358 + ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents); 359 + 360 + for (i = 0; i < efip->efi_format.efi_nextents; i++) { 361 + efdp->efd_format.efd_extents[i] = 362 + efip->efi_format.efi_extents[i]; 363 + } 364 + efdp->efd_next_extent = efip->efi_format.efi_nextents; 365 + } 366 + 367 + /* 340 368 * Free an extent and log it to the EFD. Note that the transaction is marked 341 369 * dirty regardless of whether the extent free succeeds or fails to support the 342 370 * EFI/EFD lifecycle rules. ··· 393 365 agbno, xefi->xefi_blockcount); 394 366 395 367 error = __xfs_free_extent(tp, xefi->xefi_pag, agbno, 396 - xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, 368 + xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv, 397 369 xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); 398 370 399 371 /* ··· 405 377 */ 406 378 tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; 407 379 set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); 380 + 381 + /* 382 + * If we need a new transaction to make progress, the caller will log a 383 + * new EFI with the current contents. It will also log an EFD to cancel 384 + * the existing EFI, and so we need to copy all the unprocessed extents 385 + * in this EFI to the EFD so this works correctly. 386 + */ 387 + if (error == -EAGAIN) { 388 + xfs_efd_from_efi(efdp); 389 + return error; 390 + } 408 391 409 392 next_extent = efdp->efd_next_extent; 410 393 ASSERT(next_extent < efdp->efd_format.efd_nextents); ··· 533 494 xefi = container_of(item, struct xfs_extent_free_item, xefi_list); 534 495 535 496 error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); 497 + 498 + /* 499 + * Don't free the XEFI if we need a new transaction to complete 500 + * processing of it. 501 + */ 502 + if (error == -EAGAIN) 503 + return error; 536 504 537 505 xfs_extent_free_put_group(xefi); 538 506 kmem_cache_free(xfs_extfree_item_cache, xefi); ··· 666 620 struct xfs_trans *tp; 667 621 int i; 668 622 int error = 0; 623 + bool requeue_only = false; 669 624 670 625 /* 671 626 * First check the validity of the extents described by the ··· 691 644 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 692 645 struct xfs_extent_free_item fake = { 693 646 .xefi_owner = XFS_RMAP_OWN_UNKNOWN, 647 + .xefi_agresv = XFS_AG_RESV_NONE, 694 648 }; 695 649 struct xfs_extent *extp; 696 650 ··· 700 652 fake.xefi_startblock = extp->ext_start; 701 653 fake.xefi_blockcount = extp->ext_len; 702 654 703 - xfs_extent_free_get_group(mp, &fake); 704 - error = xfs_trans_free_extent(tp, efdp, &fake); 705 - xfs_extent_free_put_group(&fake); 655 + if (!requeue_only) { 656 + xfs_extent_free_get_group(mp, &fake); 657 + error = xfs_trans_free_extent(tp, efdp, &fake); 658 + xfs_extent_free_put_group(&fake); 659 + } 660 + 661 + /* 662 + * If we can't free the extent without potentially deadlocking, 663 + * requeue the rest of the extents to a new so that they get 664 + * run again later with a new transaction context. 665 + */ 666 + if (error == -EAGAIN || requeue_only) { 667 + error = xfs_free_extent_later(tp, fake.xefi_startblock, 668 + fake.xefi_blockcount, 669 + &XFS_RMAP_OINFO_ANY_OWNER, 670 + fake.xefi_agresv); 671 + if (!error) { 672 + requeue_only = true; 673 + continue; 674 + } 675 + } 676 + 706 677 if (error == -EFSCORRUPTED) 707 678 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 708 679 extp, sizeof(*extp));
+139 -122
fs/xfs/xfs_fsmap.c
··· 160 160 struct xfs_buf *agf_bp; /* AGF, for refcount queries */ 161 161 struct xfs_perag *pag; /* AG info, if applicable */ 162 162 xfs_daddr_t next_daddr; /* next daddr we expect */ 163 + /* daddr of low fsmap key when we're using the rtbitmap */ 164 + xfs_daddr_t low_daddr; 163 165 u64 missing_owner; /* owner of holes */ 164 166 u32 dev; /* device id */ 165 - struct xfs_rmap_irec low; /* low rmap key */ 167 + /* 168 + * Low rmap key for the query. If low.rm_blockcount is nonzero, this 169 + * is the second (or later) call to retrieve the recordset in pieces. 170 + * xfs_getfsmap_rec_before_start will compare all records retrieved 171 + * by the rmapbt query to filter out any records that start before 172 + * the last record. 173 + */ 174 + struct xfs_rmap_irec low; 166 175 struct xfs_rmap_irec high; /* high rmap key */ 167 176 bool last; /* last extent? */ 168 177 }; ··· 246 237 xfs_fsmap_from_internal(rec, xfm); 247 238 } 248 239 240 + static inline bool 241 + xfs_getfsmap_rec_before_start( 242 + struct xfs_getfsmap_info *info, 243 + const struct xfs_rmap_irec *rec, 244 + xfs_daddr_t rec_daddr) 245 + { 246 + if (info->low_daddr != -1ULL) 247 + return rec_daddr < info->low_daddr; 248 + if (info->low.rm_blockcount) 249 + return xfs_rmap_compare(rec, &info->low) < 0; 250 + return false; 251 + } 252 + 249 253 /* 250 254 * Format a reverse mapping for getfsmap, having translated rm_startblock 251 - * into the appropriate daddr units. 255 + * into the appropriate daddr units. Pass in a nonzero @len_daddr if the 256 + * length could be larger than rm_blockcount in struct xfs_rmap_irec. 252 257 */ 253 258 STATIC int 254 259 xfs_getfsmap_helper( 255 260 struct xfs_trans *tp, 256 261 struct xfs_getfsmap_info *info, 257 262 const struct xfs_rmap_irec *rec, 258 - xfs_daddr_t rec_daddr) 263 + xfs_daddr_t rec_daddr, 264 + xfs_daddr_t len_daddr) 259 265 { 260 266 struct xfs_fsmap fmr; 261 267 struct xfs_mount *mp = tp->t_mountp; ··· 280 256 if (fatal_signal_pending(current)) 281 257 return -EINTR; 282 258 259 + if (len_daddr == 0) 260 + len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount); 261 + 283 262 /* 284 263 * Filter out records that start before our startpoint, if the 285 264 * caller requested that. 286 265 */ 287 - if (xfs_rmap_compare(rec, &info->low) < 0) { 288 - rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); 266 + if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) { 267 + rec_daddr += len_daddr; 289 268 if (info->next_daddr < rec_daddr) 290 269 info->next_daddr = rec_daddr; 291 270 return 0; ··· 307 280 308 281 info->head->fmh_entries++; 309 282 310 - rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); 283 + rec_daddr += len_daddr; 311 284 if (info->next_daddr < rec_daddr) 312 285 info->next_daddr = rec_daddr; 313 286 return 0; ··· 347 320 if (error) 348 321 return error; 349 322 fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); 350 - fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount); 323 + fmr.fmr_length = len_daddr; 351 324 if (rec->rm_flags & XFS_RMAP_UNWRITTEN) 352 325 fmr.fmr_flags |= FMR_OF_PREALLOC; 353 326 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) ··· 364 337 365 338 xfs_getfsmap_format(mp, &fmr, info); 366 339 out: 367 - rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); 340 + rec_daddr += len_daddr; 368 341 if (info->next_daddr < rec_daddr) 369 342 info->next_daddr = rec_daddr; 370 343 return 0; ··· 385 358 fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock); 386 359 rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); 387 360 388 - return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); 361 + return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0); 389 362 } 390 363 391 364 /* Transform a bnobt irec into a fsmap */ ··· 409 382 irec.rm_offset = 0; 410 383 irec.rm_flags = 0; 411 384 412 - return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); 385 + return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0); 413 386 } 414 387 415 388 /* Set rmap flags based on the getfsmap flags */ ··· 436 409 { 437 410 struct xfs_mount *mp = tp->t_mountp; 438 411 struct xfs_rmap_irec rmap; 439 - int error; 412 + xfs_daddr_t rec_daddr, len_daddr; 413 + xfs_fsblock_t start_fsb, end_fsb; 414 + uint64_t eofs; 440 415 441 - /* Set up search keys */ 442 - info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); 443 - info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 444 - error = xfs_fsmap_owner_to_rmap(&info->low, keys); 445 - if (error) 446 - return error; 447 - info->low.rm_blockcount = 0; 448 - xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 416 + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 417 + if (keys[0].fmr_physical >= eofs) 418 + return 0; 419 + start_fsb = XFS_BB_TO_FSBT(mp, 420 + keys[0].fmr_physical + keys[0].fmr_length); 421 + end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 449 422 450 - error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); 451 - if (error) 452 - return error; 453 - info->high.rm_startblock = -1U; 454 - info->high.rm_owner = ULLONG_MAX; 455 - info->high.rm_offset = ULLONG_MAX; 456 - info->high.rm_blockcount = 0; 457 - info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; 458 - info->missing_owner = XFS_FMR_OWN_FREE; 423 + /* Adjust the low key if we are continuing from where we left off. */ 424 + if (keys[0].fmr_length > 0) 425 + info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); 459 426 460 - trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); 461 - trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); 427 + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); 428 + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); 462 429 463 - if (keys[0].fmr_physical > 0) 430 + if (start_fsb > 0) 464 431 return 0; 465 432 466 433 /* Fabricate an rmap entry for the external log device. */ ··· 464 443 rmap.rm_offset = 0; 465 444 rmap.rm_flags = 0; 466 445 467 - return xfs_getfsmap_helper(tp, info, &rmap, 0); 446 + rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); 447 + len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); 448 + return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); 468 449 } 469 450 470 451 #ifdef CONFIG_XFS_RT ··· 480 457 { 481 458 struct xfs_getfsmap_info *info = priv; 482 459 struct xfs_rmap_irec irec; 483 - xfs_daddr_t rec_daddr; 460 + xfs_rtblock_t rtbno; 461 + xfs_daddr_t rec_daddr, len_daddr; 484 462 485 - irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize; 486 - rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock); 487 - irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize; 463 + rtbno = rec->ar_startext * mp->m_sb.sb_rextsize; 464 + rec_daddr = XFS_FSB_TO_BB(mp, rtbno); 465 + irec.rm_startblock = rtbno; 466 + 467 + rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize; 468 + len_daddr = XFS_FSB_TO_BB(mp, rtbno); 469 + irec.rm_blockcount = rtbno; 470 + 488 471 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ 489 472 irec.rm_offset = 0; 490 473 irec.rm_flags = 0; 491 474 492 - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); 475 + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); 493 476 } 494 477 495 - /* Execute a getfsmap query against the realtime device. */ 478 + /* Execute a getfsmap query against the realtime device rtbitmap. */ 496 479 STATIC int 497 - __xfs_getfsmap_rtdev( 480 + xfs_getfsmap_rtdev_rtbitmap( 498 481 struct xfs_trans *tp, 499 482 const struct xfs_fsmap *keys, 500 - int (*query_fn)(struct xfs_trans *, 501 - struct xfs_getfsmap_info *), 502 483 struct xfs_getfsmap_info *info) 503 484 { 504 - struct xfs_mount *mp = tp->t_mountp; 505 - xfs_fsblock_t start_fsb; 506 - xfs_fsblock_t end_fsb; 507 - uint64_t eofs; 508 - int error = 0; 509 485 510 - eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 511 - if (keys[0].fmr_physical >= eofs) 512 - return 0; 513 - start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); 514 - end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 515 - 516 - /* Set up search keys */ 517 - info->low.rm_startblock = start_fsb; 518 - error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 519 - if (error) 520 - return error; 521 - info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 522 - info->low.rm_blockcount = 0; 523 - xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 524 - 525 - info->high.rm_startblock = end_fsb; 526 - error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); 527 - if (error) 528 - return error; 529 - info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); 530 - info->high.rm_blockcount = 0; 531 - xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); 532 - 533 - trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); 534 - trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); 535 - 536 - return query_fn(tp, info); 537 - } 538 - 539 - /* Actually query the realtime bitmap. */ 540 - STATIC int 541 - xfs_getfsmap_rtdev_rtbitmap_query( 542 - struct xfs_trans *tp, 543 - struct xfs_getfsmap_info *info) 544 - { 545 486 struct xfs_rtalloc_rec alow = { 0 }; 546 487 struct xfs_rtalloc_rec ahigh = { 0 }; 547 488 struct xfs_mount *mp = tp->t_mountp; 489 + xfs_rtblock_t start_rtb; 490 + xfs_rtblock_t end_rtb; 491 + uint64_t eofs; 548 492 int error; 493 + 494 + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize); 495 + if (keys[0].fmr_physical >= eofs) 496 + return 0; 497 + start_rtb = XFS_BB_TO_FSBT(mp, 498 + keys[0].fmr_physical + keys[0].fmr_length); 499 + end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); 500 + 501 + info->missing_owner = XFS_FMR_OWN_UNKNOWN; 502 + 503 + /* Adjust the low key if we are continuing from where we left off. */ 504 + if (keys[0].fmr_length > 0) { 505 + info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); 506 + if (info->low_daddr >= eofs) 507 + return 0; 508 + } 509 + 510 + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); 511 + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); 549 512 550 513 xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 551 514 ··· 539 530 * Set up query parameters to return free rtextents covering the range 540 531 * we want. 541 532 */ 542 - alow.ar_startext = info->low.rm_startblock; 543 - ahigh.ar_startext = info->high.rm_startblock; 533 + alow.ar_startext = start_rtb; 534 + ahigh.ar_startext = end_rtb; 544 535 do_div(alow.ar_startext, mp->m_sb.sb_rextsize); 545 536 if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize)) 546 537 ahigh.ar_startext++; ··· 562 553 err: 563 554 xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); 564 555 return error; 565 - } 566 - 567 - /* Execute a getfsmap query against the realtime device rtbitmap. */ 568 - STATIC int 569 - xfs_getfsmap_rtdev_rtbitmap( 570 - struct xfs_trans *tp, 571 - const struct xfs_fsmap *keys, 572 - struct xfs_getfsmap_info *info) 573 - { 574 - info->missing_owner = XFS_FMR_OWN_UNKNOWN; 575 - return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, 576 - info); 577 556 } 578 557 #endif /* CONFIG_XFS_RT */ 579 558 ··· 603 606 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 604 607 if (error) 605 608 return error; 606 - info->low.rm_blockcount = 0; 609 + info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); 607 610 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); 611 + 612 + /* Adjust the low key if we are continuing from where we left off. */ 613 + if (info->low.rm_blockcount == 0) { 614 + /* empty */ 615 + } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || 616 + (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | 617 + XFS_RMAP_BMBT_BLOCK | 618 + XFS_RMAP_UNWRITTEN))) { 619 + info->low.rm_startblock += info->low.rm_blockcount; 620 + info->low.rm_owner = 0; 621 + info->low.rm_offset = 0; 622 + 623 + start_fsb += info->low.rm_blockcount; 624 + if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) 625 + return 0; 626 + } else { 627 + info->low.rm_offset += info->low.rm_blockcount; 628 + } 608 629 609 630 info->high.rm_startblock = -1U; 610 631 info->high.rm_owner = ULLONG_MAX; ··· 674 659 * Set the AG low key to the start of the AG prior to 675 660 * moving on to the next AG. 676 661 */ 677 - if (pag->pag_agno == start_ag) { 678 - info->low.rm_startblock = 0; 679 - info->low.rm_owner = 0; 680 - info->low.rm_offset = 0; 681 - info->low.rm_flags = 0; 682 - } 662 + if (pag->pag_agno == start_ag) 663 + memset(&info->low, 0, sizeof(info->low)); 683 664 684 665 /* 685 666 * If this is the last AG, report any gap at the end of it ··· 802 791 struct xfs_fsmap *low_key, 803 792 struct xfs_fsmap *high_key) 804 793 { 794 + if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { 795 + if (low_key->fmr_offset) 796 + return false; 797 + } 798 + if (high_key->fmr_flags != -1U && 799 + (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | 800 + FMR_OF_EXTENT_MAP))) { 801 + if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) 802 + return false; 803 + } 804 + if (high_key->fmr_length && high_key->fmr_length != -1ULL) 805 + return false; 806 + 805 807 if (low_key->fmr_device > high_key->fmr_device) 806 808 return false; 807 809 if (low_key->fmr_device < high_key->fmr_device) ··· 858 834 * ---------------- 859 835 * There are multiple levels of keys and counters at work here: 860 836 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; 861 - * these reflect fs-wide sector addrs. 837 + * these reflect fs-wide sector addrs. 862 838 * dkeys -- fmh_keys used to query each device; 863 - * these are fmh_keys but w/ the low key 864 - * bumped up by fmr_length. 839 + * these are fmh_keys but w/ the low key 840 + * bumped up by fmr_length. 865 841 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this 866 842 * is how we detect gaps in the fsmap 867 843 records and report them. 868 844 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from 869 - * dkeys; used to query the metadata. 845 + * dkeys; used to query the metadata. 870 846 */ 871 847 int 872 848 xfs_getfsmap( ··· 886 862 return -EINVAL; 887 863 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || 888 864 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) 865 + return -EINVAL; 866 + if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) 889 867 return -EINVAL; 890 868 891 869 use_rmap = xfs_has_rmapbt(mp) && ··· 927 901 * blocks could be mapped to several other files/offsets. 928 902 * According to rmapbt record ordering, the minimal next 929 903 * possible record for the block range is the next starting 930 - * offset in the same inode. Therefore, bump the file offset to 931 - * continue the search appropriately. For all other low key 932 - * mapping types (attr blocks, metadata), bump the physical 933 - * offset as there can be no other mapping for the same physical 934 - * block range. 904 + * offset in the same inode. Therefore, each fsmap backend bumps 905 + * the file offset to continue the search appropriately. For 906 + * all other low key mapping types (attr blocks, metadata), each 907 + * fsmap backend bumps the physical offset as there can be no 908 + * other mapping for the same physical block range. 935 909 */ 936 910 dkeys[0] = head->fmh_keys[0]; 937 - if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { 938 - dkeys[0].fmr_physical += dkeys[0].fmr_length; 939 - dkeys[0].fmr_owner = 0; 940 - if (dkeys[0].fmr_offset) 941 - return -EINVAL; 942 - } else 943 - dkeys[0].fmr_offset += dkeys[0].fmr_length; 944 - dkeys[0].fmr_length = 0; 945 911 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); 946 - 947 - if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) 948 - return -EINVAL; 949 912 950 913 info.next_daddr = head->fmh_keys[0].fmr_physical + 951 914 head->fmh_keys[0].fmr_length; ··· 975 960 info.dev = handlers[i].dev; 976 961 info.last = false; 977 962 info.pag = NULL; 963 + info.low_daddr = -1ULL; 964 + info.low.rm_blockcount = 0; 978 965 error = handlers[i].fn(tp, dkeys, &info); 979 966 if (error) 980 967 break;
+15 -32
fs/xfs/xfs_log.c
··· 639 639 int num_bblks) 640 640 { 641 641 struct xlog *log; 642 - bool fatal = xfs_has_crc(mp); 643 642 int error = 0; 644 643 int min_logfsbs; 645 644 ··· 662 663 mp->m_log = log; 663 664 664 665 /* 665 - * Validate the given log space and drop a critical message via syslog 666 - * if the log size is too small that would lead to some unexpected 667 - * situations in transaction log space reservation stage. 666 + * Now that we have set up the log and it's internal geometry 667 + * parameters, we can validate the given log space and drop a critical 668 + * message via syslog if the log size is too small. A log that is too 669 + * small can lead to unexpected situations in transaction log space 670 + * reservation stage. The superblock verifier has already validated all 671 + * the other log geometry constraints, so we don't have to check those 672 + * here. 668 673 * 669 - * Note: we can't just reject the mount if the validation fails. This 670 - * would mean that people would have to downgrade their kernel just to 671 - * remedy the situation as there is no way to grow the log (short of 672 - * black magic surgery with xfs_db). 674 + * Note: For v4 filesystems, we can't just reject the mount if the 675 + * validation fails. This would mean that people would have to 676 + * downgrade their kernel just to remedy the situation as there is no 677 + * way to grow the log (short of black magic surgery with xfs_db). 673 678 * 674 - * We can, however, reject mounts for CRC format filesystems, as the 679 + * We can, however, reject mounts for V5 format filesystems, as the 675 680 * mkfs binary being used to make the filesystem should never create a 676 681 * filesystem with a log that is too small. 677 682 */ 678 683 min_logfsbs = xfs_log_calc_minimum_size(mp); 679 - 680 684 if (mp->m_sb.sb_logblocks < min_logfsbs) { 681 685 xfs_warn(mp, 682 686 "Log size %d blocks too small, minimum size is %d blocks", 683 687 mp->m_sb.sb_logblocks, min_logfsbs); 684 - error = -EINVAL; 685 - } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { 686 - xfs_warn(mp, 687 - "Log size %d blocks too large, maximum size is %lld blocks", 688 - mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); 689 - error = -EINVAL; 690 - } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { 691 - xfs_warn(mp, 692 - "log size %lld bytes too large, maximum size is %lld bytes", 693 - XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), 694 - XFS_MAX_LOG_BYTES); 695 - error = -EINVAL; 696 - } else if (mp->m_sb.sb_logsunit > 1 && 697 - mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { 698 - xfs_warn(mp, 699 - "log stripe unit %u bytes must be a multiple of block size", 700 - mp->m_sb.sb_logsunit); 701 - error = -EINVAL; 702 - fatal = true; 703 - } 704 - if (error) { 688 + 705 689 /* 706 690 * Log check errors are always fatal on v5; or whenever bad 707 691 * metadata leads to a crash. 708 692 */ 709 - if (fatal) { 693 + if (xfs_has_crc(mp)) { 710 694 xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); 711 695 ASSERT(0); 696 + error = -EINVAL; 712 697 goto out_free_log; 713 698 } 714 699 xfs_crit(mp, "Log size out of supported range.");
+5 -4
fs/xfs/xfs_notify_failure.c
··· 114 114 int error = 0; 115 115 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr); 116 116 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); 117 - xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen); 117 + xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, 118 + daddr + bblen - 1); 118 119 xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno); 119 120 120 121 error = xfs_trans_alloc_empty(mp, &tp); ··· 211 210 ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1; 212 211 213 212 /* Ignore the range out of filesystem area */ 214 - if (offset + len < ddev_start) 213 + if (offset + len - 1 < ddev_start) 215 214 return -ENXIO; 216 215 if (offset > ddev_end) 217 216 return -ENXIO; ··· 223 222 len -= ddev_start - offset; 224 223 offset = 0; 225 224 } 226 - if (offset + len > ddev_end) 227 - len -= ddev_end - offset; 225 + if (offset + len - 1 > ddev_end) 226 + len = ddev_end - offset + 1; 228 227 229 228 return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len), 230 229 mf_flags);
+2 -1
fs/xfs/xfs_reflink.c
··· 617 617 del.br_blockcount); 618 618 619 619 error = xfs_free_extent_later(*tpp, del.br_startblock, 620 - del.br_blockcount, NULL); 620 + del.br_blockcount, NULL, 621 + XFS_AG_RESV_NONE); 621 622 if (error) 622 623 break; 623 624
+25
fs/xfs/xfs_trace.h
··· 3623 3623 DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); 3624 3624 DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); 3625 3625 3626 + DECLARE_EVENT_CLASS(xfs_fsmap_linear_class, 3627 + TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), 3628 + TP_ARGS(mp, keydev, bno), 3629 + TP_STRUCT__entry( 3630 + __field(dev_t, dev) 3631 + __field(dev_t, keydev) 3632 + __field(xfs_fsblock_t, bno) 3633 + ), 3634 + TP_fast_assign( 3635 + __entry->dev = mp->m_super->s_dev; 3636 + __entry->keydev = new_decode_dev(keydev); 3637 + __entry->bno = bno; 3638 + ), 3639 + TP_printk("dev %d:%d keydev %d:%d bno 0x%llx", 3640 + MAJOR(__entry->dev), MINOR(__entry->dev), 3641 + MAJOR(__entry->keydev), MINOR(__entry->keydev), 3642 + __entry->bno) 3643 + ) 3644 + #define DEFINE_FSMAP_LINEAR_EVENT(name) \ 3645 + DEFINE_EVENT(xfs_fsmap_linear_class, name, \ 3646 + TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \ 3647 + TP_ARGS(mp, keydev, bno)) 3648 + DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear); 3649 + DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear); 3650 + 3626 3651 DECLARE_EVENT_CLASS(xfs_getfsmap_class, 3627 3652 TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), 3628 3653 TP_ARGS(mp, fsmap),
+1 -1
fs/xfs/xfs_trans_ail.c
··· 823 823 trace_xfs_ail_insert(lip, 0, lsn); 824 824 } 825 825 lip->li_lsn = lsn; 826 - list_add(&lip->li_ail, &tmp); 826 + list_add_tail(&lip->li_ail, &tmp); 827 827 } 828 828 829 829 if (!list_empty(&tmp))