Merge tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

+5

fs/xfs/Makefile

··· 39 39 xfs_btree.o \ 40 40 xfs_da_btree.o \ 41 41 xfs_da_format.o \ 42 + xfs_defer.o \ 42 43 xfs_dir2.o \ 43 44 xfs_dir2_block.o \ 44 45 xfs_dir2_data.o \ ··· 52 51 xfs_inode_fork.o \ 53 52 xfs_inode_buf.o \ 54 53 xfs_log_rlimit.o \ 54 + xfs_rmap.o \ 55 + xfs_rmap_btree.o \ 55 56 xfs_sb.o \ 56 57 xfs_symlink_remote.o \ 57 58 xfs_trans_resv.o \ ··· 103 100 xfs_extfree_item.o \ 104 101 xfs_icreate_item.o \ 105 102 xfs_inode_item.o \ 103 + xfs_rmap_item.o \ 106 104 xfs_log_recover.o \ 107 105 xfs_trans_ail.o \ 108 106 xfs_trans_buf.o \ 109 107 xfs_trans_extfree.o \ 110 108 xfs_trans_inode.o \ 109 + xfs_trans_rmap.o \ 111 110 112 111 # optional features 113 112 xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \

+136 -13

fs/xfs/libxfs/xfs_alloc.c

··· 24 24 #include "xfs_bit.h" 25 25 #include "xfs_sb.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_inode.h" 28 29 #include "xfs_btree.h" 30 + #include "xfs_rmap.h" 29 31 #include "xfs_alloc_btree.h" 30 32 #include "xfs_alloc.h" 31 33 #include "xfs_extent_busy.h" ··· 50 48 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 51 49 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 52 50 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 51 + 52 + xfs_extlen_t 53 + xfs_prealloc_blocks( 54 + struct xfs_mount *mp) 55 + { 56 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 57 + return XFS_RMAP_BLOCK(mp) + 1; 58 + if (xfs_sb_version_hasfinobt(&mp->m_sb)) 59 + return XFS_FIBT_BLOCK(mp) + 1; 60 + return XFS_IBT_BLOCK(mp) + 1; 61 + } 62 + 63 + /* 64 + * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of 65 + * AGF buffer (PV 947395), we place constraints on the relationship among 66 + * actual allocations for data blocks, freelist blocks, and potential file data 67 + * bmap btree blocks. However, these restrictions may result in no actual space 68 + * allocated for a delayed extent, for example, a data block in a certain AG is 69 + * allocated but there is no additional block for the additional bmap btree 70 + * block due to a split of the bmap btree of the file. The result of this may 71 + * lead to an infinite loop when the file gets flushed to disk and all delayed 72 + * extents need to be actually allocated. To get around this, we explicitly set 73 + * aside a few blocks which will not be reserved in delayed allocation. 74 + * 75 + * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist 76 + * and 4 more to handle a potential split of the file's bmap btree. 77 + * 78 + * When rmap is enabled, we must also be able to handle two rmap btree inserts 79 + * to record both the file data extent and a new bmbt block. The bmbt block 80 + * might not be in the same AG as the file data extent. In the worst case 81 + * the bmap btree splits multiple levels and all the new blocks come from 82 + * different AGs, so set aside enough to handle rmap btree splits in all AGs. 83 + */ 84 + unsigned int 85 + xfs_alloc_set_aside( 86 + struct xfs_mount *mp) 87 + { 88 + unsigned int blocks; 89 + 90 + blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); 91 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 92 + blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels; 93 + return blocks; 94 + } 95 + 96 + /* 97 + * When deciding how much space to allocate out of an AG, we limit the 98 + * allocation maximum size to the size the AG. However, we cannot use all the 99 + * blocks in the AG - some are permanently used by metadata. These 100 + * blocks are generally: 101 + * - the AG superblock, AGF, AGI and AGFL 102 + * - the AGF (bno and cnt) and AGI btree root blocks, and optionally 103 + * the AGI free inode and rmap btree root blocks. 104 + * - blocks on the AGFL according to xfs_alloc_set_aside() limits 105 + * - the rmapbt root block 106 + * 107 + * The AG headers are sector sized, so the amount of space they take up is 108 + * dependent on filesystem geometry. The others are all single blocks. 109 + */ 110 + unsigned int 111 + xfs_alloc_ag_max_usable( 112 + struct xfs_mount *mp) 113 + { 114 + unsigned int blocks; 115 + 116 + blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ 117 + blocks += XFS_ALLOC_AGFL_RESERVE; 118 + blocks += 3; /* AGF, AGI btree root blocks */ 119 + if (xfs_sb_version_hasfinobt(&mp->m_sb)) 120 + blocks++; /* finobt root block */ 121 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 122 + blocks++; /* rmap root block */ 123 + 124 + return mp->m_sb.sb_agblocks - blocks; 125 + } 53 126 54 127 /* 55 128 * Lookup the record equal to [bno, len] in the btree given by cur. ··· 712 635 ASSERT(args->len <= args->maxlen); 713 636 ASSERT(!args->wasfromfl || !args->isfl); 714 637 ASSERT(args->agbno % args->alignment == 0); 638 + 639 + /* if not file data, insert new block into the reverse map btree */ 640 + if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 641 + error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 642 + args->agbno, args->len, &args->oinfo); 643 + if (error) 644 + return error; 645 + } 715 646 716 647 if (!args->wasfromfl) { 717 648 error = xfs_alloc_update_counters(args->tp, args->pag, ··· 1662 1577 /* 1663 1578 * Free the extent starting at agno/bno for length. 1664 1579 */ 1665 - STATIC int /* error */ 1580 + STATIC int 1666 1581 xfs_free_ag_extent( 1667 - xfs_trans_t *tp, /* transaction pointer */ 1668 - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1669 - xfs_agnumber_t agno, /* allocation group number */ 1670 - xfs_agblock_t bno, /* starting block number */ 1671 - xfs_extlen_t len, /* length of extent */ 1672 - int isfl) /* set if is freelist blocks - no sb acctg */ 1582 + xfs_trans_t *tp, 1583 + xfs_buf_t *agbp, 1584 + xfs_agnumber_t agno, 1585 + xfs_agblock_t bno, 1586 + xfs_extlen_t len, 1587 + struct xfs_owner_info *oinfo, 1588 + int isfl) 1673 1589 { 1674 1590 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1675 1591 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ ··· 1687 1601 xfs_extlen_t nlen; /* new length of freespace */ 1688 1602 xfs_perag_t *pag; /* per allocation group data */ 1689 1603 1604 + bno_cur = cnt_cur = NULL; 1690 1605 mp = tp->t_mountp; 1606 + 1607 + if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1608 + error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1609 + if (error) 1610 + goto error0; 1611 + } 1612 + 1691 1613 /* 1692 1614 * Allocate and initialize a cursor for the by-block btree. 1693 1615 */ 1694 1616 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1695 - cnt_cur = NULL; 1696 1617 /* 1697 1618 * Look for a neighboring block on the left (lower block numbers) 1698 1619 * that is contiguous with this space. ··· 1968 1875 /* space needed by-size freespace btree */ 1969 1876 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 1970 1877 mp->m_ag_maxlevels); 1878 + /* space needed reverse mapping used space btree */ 1879 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 1880 + min_free += min_t(unsigned int, 1881 + pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 1882 + mp->m_rmap_maxlevels); 1971 1883 1972 1884 return min_free; 1973 1885 } ··· 2090 1992 * anything other than extra overhead when we need to put more blocks 2091 1993 * back on the free list? Maybe we should only do this when space is 2092 1994 * getting low or the AGFL is more than half full? 1995 + * 1996 + * The NOSHRINK flag prevents the AGFL from being shrunk if it's too 1997 + * big; the NORMAP flag prevents AGFL expand/shrink operations from 1998 + * updating the rmapbt. Both flags are used in xfs_repair while we're 1999 + * rebuilding the rmapbt, and neither are used by the kernel. They're 2000 + * both required to ensure that rmaps are correctly recorded for the 2001 + * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and 2002 + * repair/rmap.c in xfsprogs for details. 2093 2003 */ 2094 - while (pag->pagf_flcount > need) { 2004 + memset(&targs, 0, sizeof(targs)); 2005 + if (flags & XFS_ALLOC_FLAG_NORMAP) 2006 + xfs_rmap_skip_owner_update(&targs.oinfo); 2007 + else 2008 + xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); 2009 + while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2095 2010 struct xfs_buf *bp; 2096 2011 2097 2012 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2098 2013 if (error) 2099 2014 goto out_agbp_relse; 2100 - error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); 2015 + error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 2016 + &targs.oinfo, 1); 2101 2017 if (error) 2102 2018 goto out_agbp_relse; 2103 2019 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2104 2020 xfs_trans_binval(tp, bp); 2105 2021 } 2106 2022 2107 - memset(&targs, 0, sizeof(targs)); 2108 2023 targs.tp = tp; 2109 2024 targs.mp = mp; 2110 2025 targs.agbp = agbp; ··· 2382 2271 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2383 2272 return false; 2384 2273 2274 + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2275 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) 2276 + return false; 2277 + 2385 2278 /* 2386 2279 * during growfs operations, the perag is not fully initialised, 2387 2280 * so we can't use it for any useful checking. growfs ensures we can't ··· 2517 2402 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2518 2403 pag->pagf_levels[XFS_BTNUM_CNTi] = 2519 2404 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2405 + pag->pagf_levels[XFS_BTNUM_RMAPi] = 2406 + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); 2520 2407 spin_lock_init(&pag->pagb_lock); 2521 2408 pag->pagb_count = 0; 2522 2409 pag->pagb_tree = RB_ROOT; ··· 2808 2691 xfs_free_extent( 2809 2692 struct xfs_trans *tp, /* transaction pointer */ 2810 2693 xfs_fsblock_t bno, /* starting block number of extent */ 2811 - xfs_extlen_t len) /* length of extent */ 2694 + xfs_extlen_t len, /* length of extent */ 2695 + struct xfs_owner_info *oinfo) /* extent owner */ 2812 2696 { 2813 2697 struct xfs_mount *mp = tp->t_mountp; 2814 2698 struct xfs_buf *agbp; ··· 2818 2700 int error; 2819 2701 2820 2702 ASSERT(len != 0); 2703 + 2704 + if (XFS_TEST_ERROR(false, mp, 2705 + XFS_ERRTAG_FREE_EXTENT, 2706 + XFS_RANDOM_FREE_EXTENT)) 2707 + return -EIO; 2821 2708 2822 2709 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 2823 2710 if (error) ··· 2835 2712 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2836 2713 err); 2837 2714 2838 - error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0); 2715 + error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0); 2839 2716 if (error) 2840 2717 goto err; 2841 2718

+14 -38

fs/xfs/libxfs/xfs_alloc.h

··· 54 54 */ 55 55 #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 56 56 #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 57 - 58 - /* 59 - * In order to avoid ENOSPC-related deadlock caused by 60 - * out-of-order locking of AGF buffer (PV 947395), we place 61 - * constraints on the relationship among actual allocations for 62 - * data blocks, freelist blocks, and potential file data bmap 63 - * btree blocks. However, these restrictions may result in no 64 - * actual space allocated for a delayed extent, for example, a data 65 - * block in a certain AG is allocated but there is no additional 66 - * block for the additional bmap btree block due to a split of the 67 - * bmap btree of the file. The result of this may lead to an 68 - * infinite loop in xfssyncd when the file gets flushed to disk and 69 - * all delayed extents need to be actually allocated. To get around 70 - * this, we explicitly set aside a few blocks which will not be 71 - * reserved in delayed allocation. Considering the minimum number of 72 - * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap 73 - * btree requires 1 fsb, so we set the number of set-aside blocks 74 - * to 4 + 4*agcount. 75 - */ 76 - #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) 77 - 78 - /* 79 - * When deciding how much space to allocate out of an AG, we limit the 80 - * allocation maximum size to the size the AG. However, we cannot use all the 81 - * blocks in the AG - some are permanently used by metadata. These 82 - * blocks are generally: 83 - * - the AG superblock, AGF, AGI and AGFL 84 - * - the AGF (bno and cnt) and AGI btree root blocks 85 - * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits 86 - * 87 - * The AG headers are sector sized, so the amount of space they take up is 88 - * dependent on filesystem geometry. The others are all single blocks. 89 - */ 90 - #define XFS_ALLOC_AG_MAX_USABLE(mp) \ 91 - ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) 57 + #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ 58 + #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ 92 59 93 60 94 61 /* ··· 90 123 char isfl; /* set if is freelist blocks - !acctg */ 91 124 char userdata; /* mask defining userdata treatment */ 92 125 xfs_fsblock_t firstblock; /* io first block allocated */ 126 + struct xfs_owner_info oinfo; /* owner of blocks being allocated */ 93 127 } xfs_alloc_arg_t; 94 128 95 129 /* ··· 99 131 #define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ 100 132 #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ 101 133 #define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ 134 + 135 + /* freespace limit calculations */ 136 + #define XFS_ALLOC_AGFL_RESERVE 4 137 + unsigned int xfs_alloc_set_aside(struct xfs_mount *mp); 138 + unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); 102 139 103 140 xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 104 141 struct xfs_perag *pag, xfs_extlen_t need); ··· 181 208 */ 182 209 int /* error */ 183 210 xfs_free_extent( 184 - struct xfs_trans *tp, /* transaction pointer */ 185 - xfs_fsblock_t bno, /* starting block number of extent */ 186 - xfs_extlen_t len); /* length of extent */ 211 + struct xfs_trans *tp, /* transaction pointer */ 212 + xfs_fsblock_t bno, /* starting block number of extent */ 213 + xfs_extlen_t len, /* length of extent */ 214 + struct xfs_owner_info *oinfo);/* extent owner */ 187 215 188 216 int /* error */ 189 217 xfs_alloc_lookup_ge( ··· 205 231 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); 206 232 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, 207 233 struct xfs_buf **agbp); 234 + 235 + xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); 208 236 209 237 #endif /* __XFS_ALLOC_H__ */

-12

fs/xfs/libxfs/xfs_alloc_btree.c

··· 212 212 } 213 213 214 214 STATIC void 215 - xfs_allocbt_init_rec_from_key( 216 - union xfs_btree_key *key, 217 - union xfs_btree_rec *rec) 218 - { 219 - ASSERT(key->alloc.ar_startblock != 0); 220 - 221 - rec->alloc.ar_startblock = key->alloc.ar_startblock; 222 - rec->alloc.ar_blockcount = key->alloc.ar_blockcount; 223 - } 224 - 225 - STATIC void 226 215 xfs_allocbt_init_rec_from_cur( 227 216 struct xfs_btree_cur *cur, 228 217 union xfs_btree_rec *rec) ··· 395 406 .get_minrecs = xfs_allocbt_get_minrecs, 396 407 .get_maxrecs = xfs_allocbt_get_maxrecs, 397 408 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 398 - .init_rec_from_key = xfs_allocbt_init_rec_from_key, 399 409 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 400 410 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 401 411 .key_diff = xfs_allocbt_key_diff,

+36 -35

fs/xfs/libxfs/xfs_attr.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_bit.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_da_format.h" 27 28 #include "xfs_da_btree.h" 28 29 #include "xfs_attr_sf.h" ··· 204 203 { 205 204 struct xfs_mount *mp = dp->i_mount; 206 205 struct xfs_da_args args; 207 - struct xfs_bmap_free flist; 206 + struct xfs_defer_ops dfops; 208 207 struct xfs_trans_res tres; 209 208 xfs_fsblock_t firstblock; 210 209 int rsvd = (flags & ATTR_ROOT) != 0; ··· 222 221 args.value = value; 223 222 args.valuelen = valuelen; 224 223 args.firstblock = &firstblock; 225 - args.flist = &flist; 224 + args.dfops = &dfops; 226 225 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 227 226 args.total = xfs_attr_calc_size(&args, &local); 228 227 ··· 317 316 * It won't fit in the shortform, transform to a leaf block. 318 317 * GROT: another possible req'mt for a double-split btree op. 319 318 */ 320 - xfs_bmap_init(args.flist, args.firstblock); 319 + xfs_defer_init(args.dfops, args.firstblock); 321 320 error = xfs_attr_shortform_to_leaf(&args); 322 321 if (!error) 323 - error = xfs_bmap_finish(&args.trans, args.flist, dp); 322 + error = xfs_defer_finish(&args.trans, args.dfops, dp); 324 323 if (error) { 325 324 args.trans = NULL; 326 - xfs_bmap_cancel(&flist); 325 + xfs_defer_cancel(&dfops); 327 326 goto out; 328 327 } 329 328 ··· 383 382 { 384 383 struct xfs_mount *mp = dp->i_mount; 385 384 struct xfs_da_args args; 386 - struct xfs_bmap_free flist; 385 + struct xfs_defer_ops dfops; 387 386 xfs_fsblock_t firstblock; 388 387 int error; 389 388 ··· 400 399 return error; 401 400 402 401 args.firstblock = &firstblock; 403 - args.flist = &flist; 402 + args.dfops = &dfops; 404 403 405 404 /* 406 405 * we have no control over the attribute names that userspace passes us ··· 585 584 * Commit that transaction so that the node_addname() call 586 585 * can manage its own transactions. 587 586 */ 588 - xfs_bmap_init(args->flist, args->firstblock); 587 + xfs_defer_init(args->dfops, args->firstblock); 589 588 error = xfs_attr3_leaf_to_node(args); 590 589 if (!error) 591 - error = xfs_bmap_finish(&args->trans, args->flist, dp); 590 + error = xfs_defer_finish(&args->trans, args->dfops, dp); 592 591 if (error) { 593 592 args->trans = NULL; 594 - xfs_bmap_cancel(args->flist); 593 + xfs_defer_cancel(args->dfops); 595 594 return error; 596 595 } 597 596 ··· 675 674 * If the result is small enough, shrink it all into the inode. 676 675 */ 677 676 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 678 - xfs_bmap_init(args->flist, args->firstblock); 677 + xfs_defer_init(args->dfops, args->firstblock); 679 678 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 680 679 /* bp is gone due to xfs_da_shrink_inode */ 681 680 if (!error) 682 - error = xfs_bmap_finish(&args->trans, 683 - args->flist, dp); 681 + error = xfs_defer_finish(&args->trans, 682 + args->dfops, dp); 684 683 if (error) { 685 684 args->trans = NULL; 686 - xfs_bmap_cancel(args->flist); 685 + xfs_defer_cancel(args->dfops); 687 686 return error; 688 687 } 689 688 } ··· 738 737 * If the result is small enough, shrink it all into the inode. 739 738 */ 740 739 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 741 - xfs_bmap_init(args->flist, args->firstblock); 740 + xfs_defer_init(args->dfops, args->firstblock); 742 741 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 743 742 /* bp is gone due to xfs_da_shrink_inode */ 744 743 if (!error) 745 - error = xfs_bmap_finish(&args->trans, args->flist, dp); 744 + error = xfs_defer_finish(&args->trans, args->dfops, dp); 746 745 if (error) { 747 746 args->trans = NULL; 748 - xfs_bmap_cancel(args->flist); 747 + xfs_defer_cancel(args->dfops); 749 748 return error; 750 749 } 751 750 } ··· 864 863 */ 865 864 xfs_da_state_free(state); 866 865 state = NULL; 867 - xfs_bmap_init(args->flist, args->firstblock); 866 + xfs_defer_init(args->dfops, args->firstblock); 868 867 error = xfs_attr3_leaf_to_node(args); 869 868 if (!error) 870 - error = xfs_bmap_finish(&args->trans, 871 - args->flist, dp); 869 + error = xfs_defer_finish(&args->trans, 870 + args->dfops, dp); 872 871 if (error) { 873 872 args->trans = NULL; 874 - xfs_bmap_cancel(args->flist); 873 + xfs_defer_cancel(args->dfops); 875 874 goto out; 876 875 } 877 876 ··· 892 891 * in the index/blkno/rmtblkno/rmtblkcnt fields and 893 892 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. 894 893 */ 895 - xfs_bmap_init(args->flist, args->firstblock); 894 + xfs_defer_init(args->dfops, args->firstblock); 896 895 error = xfs_da3_split(state); 897 896 if (!error) 898 - error = xfs_bmap_finish(&args->trans, args->flist, dp); 897 + error = xfs_defer_finish(&args->trans, args->dfops, dp); 899 898 if (error) { 900 899 args->trans = NULL; 901 - xfs_bmap_cancel(args->flist); 900 + xfs_defer_cancel(args->dfops); 902 901 goto out; 903 902 } 904 903 } else { ··· 991 990 * Check to see if the tree needs to be collapsed. 992 991 */ 993 992 if (retval && (state->path.active > 1)) { 994 - xfs_bmap_init(args->flist, args->firstblock); 993 + xfs_defer_init(args->dfops, args->firstblock); 995 994 error = xfs_da3_join(state); 996 995 if (!error) 997 - error = xfs_bmap_finish(&args->trans, 998 - args->flist, dp); 996 + error = xfs_defer_finish(&args->trans, 997 + args->dfops, dp); 999 998 if (error) { 1000 999 args->trans = NULL; 1001 - xfs_bmap_cancel(args->flist); 1000 + xfs_defer_cancel(args->dfops); 1002 1001 goto out; 1003 1002 } 1004 1003 } ··· 1114 1113 * Check to see if the tree needs to be collapsed. 1115 1114 */ 1116 1115 if (retval && (state->path.active > 1)) { 1117 - xfs_bmap_init(args->flist, args->firstblock); 1116 + xfs_defer_init(args->dfops, args->firstblock); 1118 1117 error = xfs_da3_join(state); 1119 1118 if (!error) 1120 - error = xfs_bmap_finish(&args->trans, args->flist, dp); 1119 + error = xfs_defer_finish(&args->trans, args->dfops, dp); 1121 1120 if (error) { 1122 1121 args->trans = NULL; 1123 - xfs_bmap_cancel(args->flist); 1122 + xfs_defer_cancel(args->dfops); 1124 1123 goto out; 1125 1124 } 1126 1125 /* ··· 1147 1146 goto out; 1148 1147 1149 1148 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1150 - xfs_bmap_init(args->flist, args->firstblock); 1149 + xfs_defer_init(args->dfops, args->firstblock); 1151 1150 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 1152 1151 /* bp is gone due to xfs_da_shrink_inode */ 1153 1152 if (!error) 1154 - error = xfs_bmap_finish(&args->trans, 1155 - args->flist, dp); 1153 + error = xfs_defer_finish(&args->trans, 1154 + args->dfops, dp); 1156 1155 if (error) { 1157 1156 args->trans = NULL; 1158 - xfs_bmap_cancel(args->flist); 1157 + xfs_defer_cancel(args->dfops); 1159 1158 goto out; 1160 1159 } 1161 1160 } else

+2 -2

fs/xfs/libxfs/xfs_attr_leaf.c

··· 792 792 nargs.dp = dp; 793 793 nargs.geo = args->geo; 794 794 nargs.firstblock = args->firstblock; 795 - nargs.flist = args->flist; 795 + nargs.dfops = args->dfops; 796 796 nargs.total = args->total; 797 797 nargs.whichfork = XFS_ATTR_FORK; 798 798 nargs.trans = args->trans; ··· 922 922 nargs.geo = args->geo; 923 923 nargs.dp = dp; 924 924 nargs.firstblock = args->firstblock; 925 - nargs.flist = args->flist; 925 + nargs.dfops = args->dfops; 926 926 nargs.total = args->total; 927 927 nargs.whichfork = XFS_ATTR_FORK; 928 928 nargs.trans = args->trans;

+10 -9

fs/xfs/libxfs/xfs_attr_remote.c

··· 24 24 #include "xfs_trans_resv.h" 25 25 #include "xfs_bit.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_da_format.h" 28 29 #include "xfs_da_btree.h" 29 30 #include "xfs_inode.h" ··· 461 460 * extent and then crash then the block may not contain the 462 461 * correct metadata after log recovery occurs. 463 462 */ 464 - xfs_bmap_init(args->flist, args->firstblock); 463 + xfs_defer_init(args->dfops, args->firstblock); 465 464 nmap = 1; 466 465 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 467 466 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, 468 - args->total, &map, &nmap, args->flist); 467 + args->total, &map, &nmap, args->dfops); 469 468 if (!error) 470 - error = xfs_bmap_finish(&args->trans, args->flist, dp); 469 + error = xfs_defer_finish(&args->trans, args->dfops, dp); 471 470 if (error) { 472 471 args->trans = NULL; 473 - xfs_bmap_cancel(args->flist); 472 + xfs_defer_cancel(args->dfops); 474 473 return error; 475 474 } 476 475 ··· 504 503 505 504 ASSERT(blkcnt > 0); 506 505 507 - xfs_bmap_init(args->flist, args->firstblock); 506 + xfs_defer_init(args->dfops, args->firstblock); 508 507 nmap = 1; 509 508 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 510 509 blkcnt, &map, &nmap, ··· 604 603 blkcnt = args->rmtblkcnt; 605 604 done = 0; 606 605 while (!done) { 607 - xfs_bmap_init(args->flist, args->firstblock); 606 + xfs_defer_init(args->dfops, args->firstblock); 608 607 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 609 608 XFS_BMAPI_ATTRFORK, 1, args->firstblock, 610 - args->flist, &done); 609 + args->dfops, &done); 611 610 if (!error) 612 - error = xfs_bmap_finish(&args->trans, args->flist, 611 + error = xfs_defer_finish(&args->trans, args->dfops, 613 612 args->dp); 614 613 if (error) { 615 614 args->trans = NULL; 616 - xfs_bmap_cancel(args->flist); 615 + xfs_defer_cancel(args->dfops); 617 616 return error; 618 617 } 619 618

+132 -109

fs/xfs/libxfs/xfs_bmap.c

··· 24 24 #include "xfs_bit.h" 25 25 #include "xfs_sb.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_da_format.h" 28 29 #include "xfs_da_btree.h" 29 30 #include "xfs_dir2.h" ··· 46 45 #include "xfs_symlink.h" 47 46 #include "xfs_attr_leaf.h" 48 47 #include "xfs_filestream.h" 48 + #include "xfs_rmap.h" 49 49 50 50 51 51 kmem_zone_t *xfs_bmap_free_item_zone; ··· 572 570 */ 573 571 void 574 572 xfs_bmap_add_free( 575 - struct xfs_mount *mp, /* mount point structure */ 576 - struct xfs_bmap_free *flist, /* list of extents */ 577 - xfs_fsblock_t bno, /* fs block number of extent */ 578 - xfs_filblks_t len) /* length of extent */ 573 + struct xfs_mount *mp, 574 + struct xfs_defer_ops *dfops, 575 + xfs_fsblock_t bno, 576 + xfs_filblks_t len, 577 + struct xfs_owner_info *oinfo) 579 578 { 580 - struct xfs_bmap_free_item *new; /* new element */ 579 + struct xfs_extent_free_item *new; /* new element */ 581 580 #ifdef DEBUG 582 581 xfs_agnumber_t agno; 583 582 xfs_agblock_t agbno; ··· 595 592 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 596 593 #endif 597 594 ASSERT(xfs_bmap_free_item_zone != NULL); 595 + 598 596 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); 599 - new->xbfi_startblock = bno; 600 - new->xbfi_blockcount = (xfs_extlen_t)len; 601 - list_add(&new->xbfi_list, &flist->xbf_flist); 602 - flist->xbf_count++; 603 - } 604 - 605 - /* 606 - * Remove the entry "free" from the free item list. Prev points to the 607 - * previous entry, unless "free" is the head of the list. 608 - */ 609 - void 610 - xfs_bmap_del_free( 611 - struct xfs_bmap_free *flist, /* free item list header */ 612 - struct xfs_bmap_free_item *free) /* list item to be freed */ 613 - { 614 - list_del(&free->xbfi_list); 615 - flist->xbf_count--; 616 - kmem_zone_free(xfs_bmap_free_item_zone, free); 617 - } 618 - 619 - /* 620 - * Free up any items left in the list. 621 - */ 622 - void 623 - xfs_bmap_cancel( 624 - struct xfs_bmap_free *flist) /* list of bmap_free_items */ 625 - { 626 - struct xfs_bmap_free_item *free; /* free list item */ 627 - 628 - if (flist->xbf_count == 0) 629 - return; 630 - while (!list_empty(&flist->xbf_flist)) { 631 - free = list_first_entry(&flist->xbf_flist, 632 - struct xfs_bmap_free_item, xbfi_list); 633 - xfs_bmap_del_free(flist, free); 634 - } 635 - ASSERT(flist->xbf_count == 0); 597 + new->xefi_startblock = bno; 598 + new->xefi_blockcount = (xfs_extlen_t)len; 599 + if (oinfo) 600 + new->xefi_oinfo = *oinfo; 601 + else 602 + xfs_rmap_skip_owner_update(&new->xefi_oinfo); 603 + trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, 604 + XFS_FSB_TO_AGBNO(mp, bno), len); 605 + xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); 636 606 } 637 607 638 608 /* ··· 635 659 xfs_mount_t *mp; /* mount point structure */ 636 660 __be64 *pp; /* ptr to block address */ 637 661 struct xfs_btree_block *rblock;/* root btree block */ 662 + struct xfs_owner_info oinfo; 638 663 639 664 mp = ip->i_mount; 640 665 ifp = XFS_IFORK_PTR(ip, whichfork); ··· 659 682 cblock = XFS_BUF_TO_BLOCK(cbp); 660 683 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 661 684 return error; 662 - xfs_bmap_add_free(mp, cur->bc_private.b.flist, cbno, 1); 685 + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 686 + xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo); 663 687 ip->i_d.di_nblocks--; 664 688 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 665 689 xfs_trans_binval(tp, cbp); ··· 683 705 xfs_trans_t *tp, /* transaction pointer */ 684 706 xfs_inode_t *ip, /* incore inode pointer */ 685 707 xfs_fsblock_t *firstblock, /* first-block-allocated */ 686 - xfs_bmap_free_t *flist, /* blocks freed in xaction */ 708 + struct xfs_defer_ops *dfops, /* blocks freed in xaction */ 687 709 xfs_btree_cur_t **curp, /* cursor returned to caller */ 688 710 int wasdel, /* converting a delayed alloc */ 689 711 int *logflagsp, /* inode logging flags */ ··· 732 754 */ 733 755 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 734 756 cur->bc_private.b.firstblock = *firstblock; 735 - cur->bc_private.b.flist = flist; 757 + cur->bc_private.b.dfops = dfops; 736 758 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 737 759 /* 738 760 * Convert to a btree with two levels, one record in root. ··· 741 763 memset(&args, 0, sizeof(args)); 742 764 args.tp = tp; 743 765 args.mp = mp; 766 + xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); 744 767 args.firstblock = *firstblock; 745 768 if (*firstblock == NULLFSBLOCK) { 746 769 args.type = XFS_ALLOCTYPE_START_BNO; 747 770 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 748 - } else if (flist->xbf_low) { 771 + } else if (dfops->dop_low) { 749 772 args.type = XFS_ALLOCTYPE_START_BNO; 750 773 args.fsbno = *firstblock; 751 774 } else { ··· 767 788 ASSERT(args.fsbno != NULLFSBLOCK); 768 789 ASSERT(*firstblock == NULLFSBLOCK || 769 790 args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || 770 - (flist->xbf_low && 791 + (dfops->dop_low && 771 792 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); 772 793 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 773 794 cur->bc_private.b.allocated++; ··· 888 909 memset(&args, 0, sizeof(args)); 889 910 args.tp = tp; 890 911 args.mp = ip->i_mount; 912 + xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); 891 913 args.firstblock = *firstblock; 892 914 /* 893 915 * Allocate a block. We know we need only one, since the ··· 953 973 xfs_trans_t *tp, /* transaction pointer */ 954 974 xfs_inode_t *ip, /* incore inode pointer */ 955 975 xfs_fsblock_t *firstblock, /* first block allocated */ 956 - xfs_bmap_free_t *flist, /* blocks to free at commit */ 976 + struct xfs_defer_ops *dfops, /* blocks to free at commit */ 957 977 int *flags) /* inode logging flags */ 958 978 { 959 979 xfs_btree_cur_t *cur; /* btree cursor */ ··· 966 986 *flags |= XFS_ILOG_DBROOT; 967 987 else { 968 988 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 969 - cur->bc_private.b.flist = flist; 989 + cur->bc_private.b.dfops = dfops; 970 990 cur->bc_private.b.firstblock = *firstblock; 971 991 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 972 992 goto error0; ··· 996 1016 xfs_trans_t *tp, /* transaction pointer */ 997 1017 xfs_inode_t *ip, /* incore inode pointer */ 998 1018 xfs_fsblock_t *firstblock, /* first block allocated */ 999 - xfs_bmap_free_t *flist, /* blocks to free at commit */ 1019 + struct xfs_defer_ops *dfops, /* blocks to free at commit */ 1000 1020 int *flags) /* inode logging flags */ 1001 1021 { 1002 1022 xfs_btree_cur_t *cur; /* bmap btree cursor */ ··· 1005 1025 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 1006 1026 return 0; 1007 1027 cur = NULL; 1008 - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0, 1028 + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0, 1009 1029 flags, XFS_DATA_FORK); 1010 1030 if (cur) { 1011 1031 cur->bc_private.b.allocated = 0; ··· 1031 1051 xfs_trans_t *tp, /* transaction pointer */ 1032 1052 xfs_inode_t *ip, /* incore inode pointer */ 1033 1053 xfs_fsblock_t *firstblock, /* first block allocated */ 1034 - xfs_bmap_free_t *flist, /* blocks to free at commit */ 1054 + struct xfs_defer_ops *dfops, /* blocks to free at commit */ 1035 1055 int *flags) /* inode logging flags */ 1036 1056 { 1037 1057 xfs_da_args_t dargs; /* args for dir/attr code */ ··· 1044 1064 dargs.geo = ip->i_mount->m_dir_geo; 1045 1065 dargs.dp = ip; 1046 1066 dargs.firstblock = firstblock; 1047 - dargs.flist = flist; 1067 + dargs.dfops = dfops; 1048 1068 dargs.total = dargs.geo->fsbcount; 1049 1069 dargs.whichfork = XFS_DATA_FORK; 1050 1070 dargs.trans = tp; ··· 1072 1092 int rsvd) /* xact may use reserved blks */ 1073 1093 { 1074 1094 xfs_fsblock_t firstblock; /* 1st block/ag allocated */ 1075 - xfs_bmap_free_t flist; /* freed extent records */ 1095 + struct xfs_defer_ops dfops; /* freed extent records */ 1076 1096 xfs_mount_t *mp; /* mount structure */ 1077 1097 xfs_trans_t *tp; /* transaction pointer */ 1078 1098 int blks; /* space reservation */ ··· 1138 1158 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 1139 1159 ip->i_afp->if_flags = XFS_IFEXTENTS; 1140 1160 logflags = 0; 1141 - xfs_bmap_init(&flist, &firstblock); 1161 + xfs_defer_init(&dfops, &firstblock); 1142 1162 switch (ip->i_d.di_format) { 1143 1163 case XFS_DINODE_FMT_LOCAL: 1144 - error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist, 1164 + error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops, 1145 1165 &logflags); 1146 1166 break; 1147 1167 case XFS_DINODE_FMT_EXTENTS: 1148 1168 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, 1149 - &flist, &logflags); 1169 + &dfops, &logflags); 1150 1170 break; 1151 1171 case XFS_DINODE_FMT_BTREE: 1152 - error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist, 1172 + error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops, 1153 1173 &logflags); 1154 1174 break; 1155 1175 default: ··· 1178 1198 xfs_log_sb(tp); 1179 1199 } 1180 1200 1181 - error = xfs_bmap_finish(&tp, &flist, NULL); 1201 + error = xfs_defer_finish(&tp, &dfops, NULL); 1182 1202 if (error) 1183 1203 goto bmap_cancel; 1184 1204 error = xfs_trans_commit(tp); ··· 1186 1206 return error; 1187 1207 1188 1208 bmap_cancel: 1189 - xfs_bmap_cancel(&flist); 1209 + xfs_defer_cancel(&dfops); 1190 1210 trans_cancel: 1191 1211 xfs_trans_cancel(tp); 1192 1212 xfs_iunlock(ip, XFS_ILOCK_EXCL); ··· 1983 2003 1984 2004 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1985 2005 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1986 - bma->firstblock, bma->flist, 2006 + bma->firstblock, bma->dfops, 1987 2007 &bma->cur, 1, &tmp_rval, whichfork); 1988 2008 rval |= tmp_rval; 1989 2009 if (error) ··· 2067 2087 2068 2088 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2069 2089 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2070 - bma->firstblock, bma->flist, &bma->cur, 1, 2090 + bma->firstblock, bma->dfops, &bma->cur, 1, 2071 2091 &tmp_rval, whichfork); 2072 2092 rval |= tmp_rval; 2073 2093 if (error) ··· 2136 2156 2137 2157 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2138 2158 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2139 - bma->firstblock, bma->flist, &bma->cur, 2159 + bma->firstblock, bma->dfops, &bma->cur, 2140 2160 1, &tmp_rval, whichfork); 2141 2161 rval |= tmp_rval; 2142 2162 if (error) ··· 2179 2199 ASSERT(0); 2180 2200 } 2181 2201 2202 + /* add reverse mapping */ 2203 + error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new); 2204 + if (error) 2205 + goto done; 2206 + 2182 2207 /* convert to a btree if necessary */ 2183 2208 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2184 2209 int tmp_logflags; /* partial log flag return val */ 2185 2210 2186 2211 ASSERT(bma->cur == NULL); 2187 2212 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2188 - bma->firstblock, bma->flist, &bma->cur, 2213 + bma->firstblock, bma->dfops, &bma->cur, 2189 2214 da_old > 0, &tmp_logflags, whichfork); 2190 2215 bma->logflags |= tmp_logflags; 2191 2216 if (error) ··· 2232 2247 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2233 2248 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2234 2249 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2235 - xfs_bmap_free_t *flist, /* list of extents to be freed */ 2250 + struct xfs_defer_ops *dfops, /* list of extents to be freed */ 2236 2251 int *logflagsp) /* inode logging flags */ 2237 2252 { 2238 2253 xfs_btree_cur_t *cur; /* btree cursor */ ··· 2720 2735 ASSERT(0); 2721 2736 } 2722 2737 2738 + /* update reverse mappings */ 2739 + error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); 2740 + if (error) 2741 + goto done; 2742 + 2723 2743 /* convert to a btree if necessary */ 2724 2744 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { 2725 2745 int tmp_logflags; /* partial log flag return val */ 2726 2746 2727 2747 ASSERT(cur == NULL); 2728 - error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur, 2748 + error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, 2729 2749 0, &tmp_logflags, XFS_DATA_FORK); 2730 2750 *logflagsp |= tmp_logflags; 2731 2751 if (error) ··· 3117 3127 break; 3118 3128 } 3119 3129 3130 + /* add reverse mapping */ 3131 + error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new); 3132 + if (error) 3133 + goto done; 3134 + 3120 3135 /* convert to a btree if necessary */ 3121 3136 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 3122 3137 int tmp_logflags; /* partial log flag return val */ 3123 3138 3124 3139 ASSERT(bma->cur == NULL); 3125 3140 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 3126 - bma->firstblock, bma->flist, &bma->cur, 3141 + bma->firstblock, bma->dfops, &bma->cur, 3127 3142 0, &tmp_logflags, whichfork); 3128 3143 bma->logflags |= tmp_logflags; 3129 3144 if (error) ··· 3686 3691 args.tp = ap->tp; 3687 3692 args.mp = mp; 3688 3693 args.fsbno = ap->blkno; 3694 + xfs_rmap_skip_owner_update(&args.oinfo); 3689 3695 3690 3696 /* Trim the allocation back to the maximum an AG can fit. */ 3691 - args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); 3697 + args.maxlen = MIN(ap->length, mp->m_ag_max_usable); 3692 3698 args.firstblock = *ap->firstblock; 3693 3699 blen = 0; 3694 3700 if (nullfb) { ··· 3704 3708 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3705 3709 if (error) 3706 3710 return error; 3707 - } else if (ap->flist->xbf_low) { 3711 + } else if (ap->dfops->dop_low) { 3708 3712 if (xfs_inode_is_filestream(ap->ip)) 3709 3713 args.type = XFS_ALLOCTYPE_FIRST_AG; 3710 3714 else ··· 3737 3741 * is >= the stripe unit and the allocation offset is 3738 3742 * at the end of file. 3739 3743 */ 3740 - if (!ap->flist->xbf_low && ap->aeof) { 3744 + if (!ap->dfops->dop_low && ap->aeof) { 3741 3745 if (!ap->offset) { 3742 3746 args.alignment = stripe_align; 3743 3747 atype = args.type; ··· 3830 3834 args.minleft = 0; 3831 3835 if ((error = xfs_alloc_vextent(&args))) 3832 3836 return error; 3833 - ap->flist->xbf_low = 1; 3837 + ap->dfops->dop_low = true; 3834 3838 } 3835 3839 if (args.fsbno != NULLFSBLOCK) { 3836 3840 /* ··· 3840 3844 ASSERT(*ap->firstblock == NULLFSBLOCK || 3841 3845 XFS_FSB_TO_AGNO(mp, *ap->firstblock) == 3842 3846 XFS_FSB_TO_AGNO(mp, args.fsbno) || 3843 - (ap->flist->xbf_low && 3847 + (ap->dfops->dop_low && 3844 3848 XFS_FSB_TO_AGNO(mp, *ap->firstblock) < 3845 3849 XFS_FSB_TO_AGNO(mp, args.fsbno))); 3846 3850 ··· 3848 3852 if (*ap->firstblock == NULLFSBLOCK) 3849 3853 *ap->firstblock = args.fsbno; 3850 3854 ASSERT(nullfb || fb_agno == args.agno || 3851 - (ap->flist->xbf_low && fb_agno < args.agno)); 3855 + (ap->dfops->dop_low && fb_agno < args.agno)); 3852 3856 ap->length = args.len; 3853 3857 ap->ip->i_d.di_nblocks += args.len; 3854 3858 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); ··· 4315 4319 if (error) 4316 4320 return error; 4317 4321 4318 - if (bma->flist->xbf_low) 4322 + if (bma->dfops->dop_low) 4319 4323 bma->minleft = 0; 4320 4324 if (bma->cur) 4321 4325 bma->cur->bc_private.b.firstblock = *bma->firstblock; ··· 4324 4328 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4325 4329 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4326 4330 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4327 - bma->cur->bc_private.b.flist = bma->flist; 4331 + bma->cur->bc_private.b.dfops = bma->dfops; 4328 4332 } 4329 4333 /* 4330 4334 * Bump the number of extents we've allocated ··· 4405 4409 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4406 4410 bma->ip, whichfork); 4407 4411 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4408 - bma->cur->bc_private.b.flist = bma->flist; 4412 + bma->cur->bc_private.b.dfops = bma->dfops; 4409 4413 } 4410 4414 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4411 4415 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; ··· 4422 4426 } 4423 4427 4424 4428 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4425 - &bma->cur, mval, bma->firstblock, bma->flist, 4429 + &bma->cur, mval, bma->firstblock, bma->dfops, 4426 4430 &tmp_logflags); 4427 4431 /* 4428 4432 * Log the inode core unconditionally in the unwritten extent conversion ··· 4476 4480 xfs_extlen_t total, /* total blocks needed */ 4477 4481 struct xfs_bmbt_irec *mval, /* output: map values */ 4478 4482 int *nmap, /* i/o: mval size/count */ 4479 - struct xfs_bmap_free *flist) /* i/o: list extents to free */ 4483 + struct xfs_defer_ops *dfops) /* i/o: list extents to free */ 4480 4484 { 4481 4485 struct xfs_mount *mp = ip->i_mount; 4482 4486 struct xfs_ifork *ifp; ··· 4566 4570 bma.ip = ip; 4567 4571 bma.total = total; 4568 4572 bma.userdata = 0; 4569 - bma.flist = flist; 4573 + bma.dfops = dfops; 4570 4574 bma.firstblock = firstblock; 4571 4575 4572 4576 while (bno < end && n < *nmap) { ··· 4680 4684 XFS_FSB_TO_AGNO(mp, *firstblock) == 4681 4685 XFS_FSB_TO_AGNO(mp, 4682 4686 bma.cur->bc_private.b.firstblock) || 4683 - (flist->xbf_low && 4687 + (dfops->dop_low && 4684 4688 XFS_FSB_TO_AGNO(mp, *firstblock) < 4685 4689 XFS_FSB_TO_AGNO(mp, 4686 4690 bma.cur->bc_private.b.firstblock))); ··· 4764 4768 xfs_inode_t *ip, /* incore inode pointer */ 4765 4769 xfs_trans_t *tp, /* current transaction pointer */ 4766 4770 xfs_extnum_t *idx, /* extent number to update/delete */ 4767 - xfs_bmap_free_t *flist, /* list of extents to be freed */ 4771 + struct xfs_defer_ops *dfops, /* list of extents to be freed */ 4768 4772 xfs_btree_cur_t *cur, /* if null, not a btree */ 4769 4773 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4770 4774 int *logflagsp, /* inode logging flags */ ··· 4866 4870 nblks = 0; 4867 4871 do_fx = 0; 4868 4872 } 4873 + 4869 4874 /* 4870 4875 * Set flag value to use in switch statement. 4871 4876 * Left-contig is 2, right-contig is 1. ··· 5049 5052 ++*idx; 5050 5053 break; 5051 5054 } 5055 + 5056 + /* remove reverse mapping */ 5057 + if (!delay) { 5058 + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5059 + if (error) 5060 + goto done; 5061 + } 5062 + 5052 5063 /* 5053 5064 * If we need to, add to list of extents to delete. 5054 5065 */ 5055 5066 if (do_fx) 5056 - xfs_bmap_add_free(mp, flist, del->br_startblock, 5057 - del->br_blockcount); 5067 + xfs_bmap_add_free(mp, dfops, del->br_startblock, 5068 + del->br_blockcount, NULL); 5058 5069 /* 5059 5070 * Adjust inode # blocks in the file. 5060 5071 */ ··· 5102 5097 xfs_extnum_t nexts, /* number of extents max */ 5103 5098 xfs_fsblock_t *firstblock, /* first allocated block 5104 5099 controls a.g. for allocs */ 5105 - xfs_bmap_free_t *flist, /* i/o: list extents to free */ 5100 + struct xfs_defer_ops *dfops, /* i/o: list extents to free */ 5106 5101 int *done) /* set if not done yet */ 5107 5102 { 5108 5103 xfs_btree_cur_t *cur; /* bmap btree cursor */ ··· 5175 5170 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5176 5171 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5177 5172 cur->bc_private.b.firstblock = *firstblock; 5178 - cur->bc_private.b.flist = flist; 5173 + cur->bc_private.b.dfops = dfops; 5179 5174 cur->bc_private.b.flags = 0; 5180 5175 } else 5181 5176 cur = NULL; ··· 5184 5179 /* 5185 5180 * Synchronize by locking the bitmap inode. 5186 5181 */ 5187 - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 5182 + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 5188 5183 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); 5184 + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 5185 + xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); 5189 5186 } 5190 5187 5191 5188 extno = 0; ··· 5269 5262 } 5270 5263 del.br_state = XFS_EXT_UNWRITTEN; 5271 5264 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5272 - &lastx, &cur, &del, firstblock, flist, 5265 + &lastx, &cur, &del, firstblock, dfops, 5273 5266 &logflags); 5274 5267 if (error) 5275 5268 goto error0; ··· 5328 5321 lastx--; 5329 5322 error = xfs_bmap_add_extent_unwritten_real(tp, 5330 5323 ip, &lastx, &cur, &prev, 5331 - firstblock, flist, &logflags); 5324 + firstblock, dfops, &logflags); 5332 5325 if (error) 5333 5326 goto error0; 5334 5327 goto nodelete; ··· 5337 5330 del.br_state = XFS_EXT_UNWRITTEN; 5338 5331 error = xfs_bmap_add_extent_unwritten_real(tp, 5339 5332 ip, &lastx, &cur, &del, 5340 - firstblock, flist, &logflags); 5333 + firstblock, dfops, &logflags); 5341 5334 if (error) 5342 5335 goto error0; 5343 5336 goto nodelete; ··· 5395 5388 } else if (cur) 5396 5389 cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; 5397 5390 5398 - error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, 5391 + error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del, 5399 5392 &tmp_logflags, whichfork); 5400 5393 logflags |= tmp_logflags; 5401 5394 if (error) ··· 5429 5422 */ 5430 5423 if (xfs_bmap_needs_btree(ip, whichfork)) { 5431 5424 ASSERT(cur == NULL); 5432 - error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, 5425 + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, 5433 5426 &cur, 0, &tmp_logflags, whichfork); 5434 5427 logflags |= tmp_logflags; 5435 5428 if (error) ··· 5596 5589 struct xfs_bmbt_rec_host *gotp, 5597 5590 struct xfs_btree_cur *cur, 5598 5591 int *logflags, 5599 - enum shift_direction direction) 5592 + enum shift_direction direction, 5593 + struct xfs_defer_ops *dfops) 5600 5594 { 5601 5595 struct xfs_ifork *ifp; 5602 5596 struct xfs_mount *mp; ··· 5645 5637 /* check whether to merge the extent or shift it down */ 5646 5638 if (xfs_bmse_can_merge(&adj_irec, &got, 5647 5639 offset_shift_fsb)) { 5648 - return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5649 - *current_ext, gotp, adj_irecp, 5650 - cur, logflags); 5640 + error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5641 + *current_ext, gotp, adj_irecp, 5642 + cur, logflags); 5643 + if (error) 5644 + return error; 5645 + adj_irec = got; 5646 + goto update_rmap; 5651 5647 } 5652 5648 } else { 5653 5649 startoff = got.br_startoff + offset_shift_fsb; ··· 5688 5676 (*current_ext)--; 5689 5677 xfs_bmbt_set_startoff(gotp, startoff); 5690 5678 *logflags |= XFS_ILOG_CORE; 5679 + adj_irec = got; 5691 5680 if (!cur) { 5692 5681 *logflags |= XFS_ILOG_DEXT; 5693 - return 0; 5682 + goto update_rmap; 5694 5683 } 5695 5684 5696 5685 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock, ··· 5701 5688 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5702 5689 5703 5690 got.br_startoff = startoff; 5704 - return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5705 - got.br_blockcount, got.br_state); 5691 + error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5692 + got.br_blockcount, got.br_state); 5693 + if (error) 5694 + return error; 5695 + 5696 + update_rmap: 5697 + /* update reverse mapping */ 5698 + error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec); 5699 + if (error) 5700 + return error; 5701 + adj_irec.br_startoff = startoff; 5702 + return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec); 5706 5703 } 5707 5704 5708 5705 /* ··· 5734 5711 int *done, 5735 5712 xfs_fileoff_t stop_fsb, 5736 5713 xfs_fsblock_t *firstblock, 5737 - struct xfs_bmap_free *flist, 5714 + struct xfs_defer_ops *dfops, 5738 5715 enum shift_direction direction, 5739 5716 int num_exts) 5740 5717 { ··· 5779 5756 if (ifp->if_flags & XFS_IFBROOT) { 5780 5757 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5781 5758 cur->bc_private.b.firstblock = *firstblock; 5782 - cur->bc_private.b.flist = flist; 5759 + cur->bc_private.b.dfops = dfops; 5783 5760 cur->bc_private.b.flags = 0; 5784 5761 } 5785 5762 ··· 5840 5817 while (nexts++ < num_exts) { 5841 5818 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5842 5819 &current_ext, gotp, cur, &logflags, 5843 - direction); 5820 + direction, dfops); 5844 5821 if (error) 5845 5822 goto del_cursor; 5846 5823 /* ··· 5888 5865 struct xfs_inode *ip, 5889 5866 xfs_fileoff_t split_fsb, 5890 5867 xfs_fsblock_t *firstfsb, 5891 - struct xfs_bmap_free *free_list) 5868 + struct xfs_defer_ops *dfops) 5892 5869 { 5893 5870 int whichfork = XFS_DATA_FORK; 5894 5871 struct xfs_btree_cur *cur = NULL; ··· 5950 5927 if (ifp->if_flags & XFS_IFBROOT) { 5951 5928 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5952 5929 cur->bc_private.b.firstblock = *firstfsb; 5953 - cur->bc_private.b.flist = free_list; 5930 + cur->bc_private.b.dfops = dfops; 5954 5931 cur->bc_private.b.flags = 0; 5955 5932 error = xfs_bmbt_lookup_eq(cur, got.br_startoff, 5956 5933 got.br_startblock, ··· 6003 5980 int tmp_logflags; /* partial log flag return val */ 6004 5981 6005 5982 ASSERT(cur == NULL); 6006 - error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list, 5983 + error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops, 6007 5984 &cur, 0, &tmp_logflags, whichfork); 6008 5985 logflags |= tmp_logflags; 6009 5986 } ··· 6027 6004 { 6028 6005 struct xfs_mount *mp = ip->i_mount; 6029 6006 struct xfs_trans *tp; 6030 - struct xfs_bmap_free free_list; 6007 + struct xfs_defer_ops dfops; 6031 6008 xfs_fsblock_t firstfsb; 6032 6009 int error; 6033 6010 ··· 6039 6016 xfs_ilock(ip, XFS_ILOCK_EXCL); 6040 6017 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6041 6018 6042 - xfs_bmap_init(&free_list, &firstfsb); 6019 + xfs_defer_init(&dfops, &firstfsb); 6043 6020 6044 6021 error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 6045 - &firstfsb, &free_list); 6022 + &firstfsb, &dfops); 6046 6023 if (error) 6047 6024 goto out; 6048 6025 6049 - error = xfs_bmap_finish(&tp, &free_list, NULL); 6026 + error = xfs_defer_finish(&tp, &dfops, NULL); 6050 6027 if (error) 6051 6028 goto out; 6052 6029 6053 6030 return xfs_trans_commit(tp); 6054 6031 6055 6032 out: 6056 - xfs_bmap_cancel(&free_list); 6033 + xfs_defer_cancel(&dfops); 6057 6034 xfs_trans_cancel(tp); 6058 6035 return error; 6059 6036 }

+12 -42

fs/xfs/libxfs/xfs_bmap.h

··· 32 32 */ 33 33 struct xfs_bmalloca { 34 34 xfs_fsblock_t *firstblock; /* i/o first block allocated */ 35 - struct xfs_bmap_free *flist; /* bmap freelist */ 35 + struct xfs_defer_ops *dfops; /* bmap freelist */ 36 36 struct xfs_trans *tp; /* transaction pointer */ 37 37 struct xfs_inode *ip; /* incore inode pointer */ 38 38 struct xfs_bmbt_irec prev; /* extent before the new one */ ··· 62 62 * List of extents to be free "later". 63 63 * The list is kept sorted on xbf_startblock. 64 64 */ 65 - struct xfs_bmap_free_item 65 + struct xfs_extent_free_item 66 66 { 67 - xfs_fsblock_t xbfi_startblock;/* starting fs block number */ 68 - xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */ 69 - struct list_head xbfi_list; 67 + xfs_fsblock_t xefi_startblock;/* starting fs block number */ 68 + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ 69 + struct list_head xefi_list; 70 + struct xfs_owner_info xefi_oinfo; /* extent owner */ 70 71 }; 71 - 72 - /* 73 - * Header for free extent list. 74 - * 75 - * xbf_low is used by the allocator to activate the lowspace algorithm - 76 - * when free space is running low the extent allocator may choose to 77 - * allocate an extent from an AG without leaving sufficient space for 78 - * a btree split when inserting the new extent. In this case the allocator 79 - * will enable the lowspace algorithm which is supposed to allow further 80 - * allocations (such as btree splits and newroots) to allocate from 81 - * sequential AGs. In order to avoid locking AGs out of order the lowspace 82 - * algorithm will start searching for free space from AG 0. If the correct 83 - * transaction reservations have been made then this algorithm will eventually 84 - * find all the space it needs. 85 - */ 86 - typedef struct xfs_bmap_free 87 - { 88 - struct list_head xbf_flist; /* list of to-be-free extents */ 89 - int xbf_count; /* count of items on list */ 90 - int xbf_low; /* alloc in low mode */ 91 - } xfs_bmap_free_t; 92 72 93 73 #define XFS_BMAP_MAX_NMAP 4 94 74 ··· 119 139 #define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) 120 140 #define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) 121 141 122 - static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) 123 - { 124 - INIT_LIST_HEAD(&flp->xbf_flist); 125 - flp->xbf_count = 0; 126 - flp->xbf_low = 0; 127 - *fbp = NULLFSBLOCK; 128 - } 129 - 130 142 /* 131 143 * Flags for xfs_bmap_add_extent*. 132 144 */ ··· 165 193 166 194 int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 167 195 void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 168 - void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_bmap_free *flist, 169 - xfs_fsblock_t bno, xfs_filblks_t len); 170 - void xfs_bmap_cancel(struct xfs_bmap_free *flist); 171 - int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, 172 - struct xfs_inode *ip); 196 + void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 197 + xfs_fsblock_t bno, xfs_filblks_t len, 198 + struct xfs_owner_info *oinfo); 173 199 void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); 174 200 int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, 175 201 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); ··· 188 218 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 189 219 xfs_fsblock_t *firstblock, xfs_extlen_t total, 190 220 struct xfs_bmbt_irec *mval, int *nmap, 191 - struct xfs_bmap_free *flist); 221 + struct xfs_defer_ops *dfops); 192 222 int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, 193 223 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 194 224 xfs_extnum_t nexts, xfs_fsblock_t *firstblock, 195 - struct xfs_bmap_free *flist, int *done); 225 + struct xfs_defer_ops *dfops, int *done); 196 226 int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 197 227 xfs_extnum_t num); 198 228 uint xfs_default_attroffset(struct xfs_inode *ip); 199 229 int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 200 230 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 201 231 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 202 - struct xfs_bmap_free *flist, enum shift_direction direction, 232 + struct xfs_defer_ops *dfops, enum shift_direction direction, 203 233 int num_exts); 204 234 int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 205 235

+13 -19

fs/xfs/libxfs/xfs_bmap_btree.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_bit.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_inode.h" 27 28 #include "xfs_trans.h" 28 29 #include "xfs_inode_item.h" ··· 35 34 #include "xfs_quota.h" 36 35 #include "xfs_trace.h" 37 36 #include "xfs_cksum.h" 37 + #include "xfs_rmap.h" 38 38 39 39 /* 40 40 * Determine the extent state. ··· 408 406 cur->bc_private.b.ip, cur->bc_private.b.whichfork); 409 407 410 408 /* 411 - * Copy the firstblock, flist, and flags values, 409 + * Copy the firstblock, dfops, and flags values, 412 410 * since init cursor doesn't get them. 413 411 */ 414 412 new->bc_private.b.firstblock = cur->bc_private.b.firstblock; 415 - new->bc_private.b.flist = cur->bc_private.b.flist; 413 + new->bc_private.b.dfops = cur->bc_private.b.dfops; 416 414 new->bc_private.b.flags = cur->bc_private.b.flags; 417 415 418 416 return new; ··· 425 423 { 426 424 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || 427 425 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); 428 - ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); 426 + ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops); 429 427 430 428 dst->bc_private.b.allocated += src->bc_private.b.allocated; 431 429 dst->bc_private.b.firstblock = src->bc_private.b.firstblock; ··· 448 446 args.mp = cur->bc_mp; 449 447 args.fsbno = cur->bc_private.b.firstblock; 450 448 args.firstblock = args.fsbno; 449 + xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino, 450 + cur->bc_private.b.whichfork); 451 451 452 452 if (args.fsbno == NULLFSBLOCK) { 453 453 args.fsbno = be64_to_cpu(start->l); ··· 466 462 * block allocation here and corrupt the filesystem. 467 463 */ 468 464 args.minleft = args.tp->t_blk_res; 469 - } else if (cur->bc_private.b.flist->xbf_low) { 465 + } else if (cur->bc_private.b.dfops->dop_low) { 470 466 args.type = XFS_ALLOCTYPE_START_BNO; 471 467 } else { 472 468 args.type = XFS_ALLOCTYPE_NEAR_BNO; ··· 494 490 error = xfs_alloc_vextent(&args); 495 491 if (error) 496 492 goto error0; 497 - cur->bc_private.b.flist->xbf_low = 1; 493 + cur->bc_private.b.dfops->dop_low = true; 498 494 } 499 495 if (args.fsbno == NULLFSBLOCK) { 500 496 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); ··· 529 525 struct xfs_inode *ip = cur->bc_private.b.ip; 530 526 struct xfs_trans *tp = cur->bc_tp; 531 527 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); 528 + struct xfs_owner_info oinfo; 532 529 533 - xfs_bmap_add_free(mp, cur->bc_private.b.flist, fsbno, 1); 530 + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork); 531 + xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo); 534 532 ip->i_d.di_nblocks--; 535 533 536 534 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ··· 603 597 { 604 598 key->bmbt.br_startoff = 605 599 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); 606 - } 607 - 608 - STATIC void 609 - xfs_bmbt_init_rec_from_key( 610 - union xfs_btree_key *key, 611 - union xfs_btree_rec *rec) 612 - { 613 - ASSERT(key->bmbt.br_startoff != 0); 614 - 615 - xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), 616 - 0, 0, XFS_EXT_NORM); 617 600 } 618 601 619 602 STATIC void ··· 755 760 .get_minrecs = xfs_bmbt_get_minrecs, 756 761 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, 757 762 .init_key_from_rec = xfs_bmbt_init_key_from_rec, 758 - .init_rec_from_key = xfs_bmbt_init_rec_from_key, 759 763 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 760 764 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, 761 765 .key_diff = xfs_bmbt_key_diff, ··· 794 800 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); 795 801 cur->bc_private.b.ip = ip; 796 802 cur->bc_private.b.firstblock = NULLFSBLOCK; 797 - cur->bc_private.b.flist = NULL; 803 + cur->bc_private.b.dfops = NULL; 798 804 cur->bc_private.b.allocated = 0; 799 805 cur->bc_private.b.flags = 0; 800 806 cur->bc_private.b.whichfork = whichfork;

+765 -151

fs/xfs/libxfs/xfs_btree.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_bit.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_inode.h" 27 28 #include "xfs_trans.h" 28 29 #include "xfs_inode_item.h" ··· 44 43 * Btree magic numbers. 45 44 */ 46 45 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 47 - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 46 + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 48 47 XFS_FIBT_MAGIC }, 49 - { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 48 + { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 50 49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 51 50 }; 52 51 #define xfs_btree_magic(cur) \ 53 52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 54 - 55 53 56 54 STATIC int /* error (0 or EFSCORRUPTED) */ 57 55 xfs_btree_check_lblock( ··· 428 428 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 429 429 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 430 430 * inside the btree block is done using indices starting at one, not zero! 431 + * 432 + * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 433 + * overlapping intervals. In such a tree, records are still sorted lowest to 434 + * highest and indexed by the smallest key value that refers to the record. 435 + * However, nodes are different: each pointer has two associated keys -- one 436 + * indexing the lowest key available in the block(s) below (the same behavior 437 + * as the key in a regular btree) and another indexing the highest key 438 + * available in the block(s) below. Because records are /not/ sorted by the 439 + * highest key, all leaf block updates require us to compute the highest key 440 + * that matches any record in the leaf and to recursively update the high keys 441 + * in the nodes going further up in the tree, if necessary. Nodes look like 442 + * this: 443 + * 444 + * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 445 + * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 446 + * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 447 + * 448 + * To perform an interval query on an overlapped tree, perform the usual 449 + * depth-first search and use the low and high keys to decide if we can skip 450 + * that particular node. If a leaf node is reached, return the records that 451 + * intersect the interval. Note that an interval query may return numerous 452 + * entries. For a non-overlapped tree, simply search for the record associated 453 + * with the lowest key and iterate forward until a non-matching record is 454 + * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 455 + * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 456 + * more detail. 457 + * 458 + * Why do we care about overlapping intervals? Let's say you have a bunch of 459 + * reverse mapping records on a reflink filesystem: 460 + * 461 + * 1: +- file A startblock B offset C length D -----------+ 462 + * 2: +- file E startblock F offset G length H --------------+ 463 + * 3: +- file I startblock F offset J length K --+ 464 + * 4: +- file L... --+ 465 + * 466 + * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 467 + * we'd simply increment the length of record 1. But how do we find the record 468 + * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 469 + * record 3 because the keys are ordered first by startblock. An interval 470 + * query would return records 1 and 2 because they both overlap (B+D-1), and 471 + * from that we can pick out record 1 as the appropriate left neighbor. 472 + * 473 + * In the non-overlapped case you can do a LE lookup and decrement the cursor 474 + * because a record's interval must end before the next record. 431 475 */ 432 476 433 477 /* ··· 523 479 } 524 480 525 481 /* 482 + * Calculate offset of the n-th high key in a btree block. 483 + */ 484 + STATIC size_t 485 + xfs_btree_high_key_offset( 486 + struct xfs_btree_cur *cur, 487 + int n) 488 + { 489 + return xfs_btree_block_len(cur) + 490 + (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 491 + } 492 + 493 + /* 526 494 * Calculate offset of the n-th block pointer in a btree block. 527 495 */ 528 496 STATIC size_t ··· 572 516 { 573 517 return (union xfs_btree_key *) 574 518 ((char *)block + xfs_btree_key_offset(cur, n)); 519 + } 520 + 521 + /* 522 + * Return a pointer to the n-th high key in the btree block. 523 + */ 524 + STATIC union xfs_btree_key * 525 + xfs_btree_high_key_addr( 526 + struct xfs_btree_cur *cur, 527 + int n, 528 + struct xfs_btree_block *block) 529 + { 530 + return (union xfs_btree_key *) 531 + ((char *)block + xfs_btree_high_key_offset(cur, n)); 575 532 } 576 533 577 534 /* ··· 1212 1143 break; 1213 1144 case XFS_BTNUM_BMAP: 1214 1145 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1146 + break; 1147 + case XFS_BTNUM_RMAP: 1148 + xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1215 1149 break; 1216 1150 default: 1217 1151 ASSERT(0); ··· 1951 1879 return error; 1952 1880 } 1953 1881 1882 + /* Find the high key storage area from a regular key. */ 1883 + STATIC union xfs_btree_key * 1884 + xfs_btree_high_key_from_key( 1885 + struct xfs_btree_cur *cur, 1886 + union xfs_btree_key *key) 1887 + { 1888 + ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 1889 + return (union xfs_btree_key *)((char *)key + 1890 + (cur->bc_ops->key_len / 2)); 1891 + } 1892 + 1893 + /* Determine the low (and high if overlapped) keys of a leaf block */ 1894 + STATIC void 1895 + xfs_btree_get_leaf_keys( 1896 + struct xfs_btree_cur *cur, 1897 + struct xfs_btree_block *block, 1898 + union xfs_btree_key *key) 1899 + { 1900 + union xfs_btree_key max_hkey; 1901 + union xfs_btree_key hkey; 1902 + union xfs_btree_rec *rec; 1903 + union xfs_btree_key *high; 1904 + int n; 1905 + 1906 + rec = xfs_btree_rec_addr(cur, 1, block); 1907 + cur->bc_ops->init_key_from_rec(key, rec); 1908 + 1909 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 1910 + 1911 + cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 1912 + for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 1913 + rec = xfs_btree_rec_addr(cur, n, block); 1914 + cur->bc_ops->init_high_key_from_rec(&hkey, rec); 1915 + if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 1916 + > 0) 1917 + max_hkey = hkey; 1918 + } 1919 + 1920 + high = xfs_btree_high_key_from_key(cur, key); 1921 + memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 1922 + } 1923 + } 1924 + 1925 + /* Determine the low (and high if overlapped) keys of a node block */ 1926 + STATIC void 1927 + xfs_btree_get_node_keys( 1928 + struct xfs_btree_cur *cur, 1929 + struct xfs_btree_block *block, 1930 + union xfs_btree_key *key) 1931 + { 1932 + union xfs_btree_key *hkey; 1933 + union xfs_btree_key *max_hkey; 1934 + union xfs_btree_key *high; 1935 + int n; 1936 + 1937 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 1938 + memcpy(key, xfs_btree_key_addr(cur, 1, block), 1939 + cur->bc_ops->key_len / 2); 1940 + 1941 + max_hkey = xfs_btree_high_key_addr(cur, 1, block); 1942 + for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 1943 + hkey = xfs_btree_high_key_addr(cur, n, block); 1944 + if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 1945 + max_hkey = hkey; 1946 + } 1947 + 1948 + high = xfs_btree_high_key_from_key(cur, key); 1949 + memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 1950 + } else { 1951 + memcpy(key, xfs_btree_key_addr(cur, 1, block), 1952 + cur->bc_ops->key_len); 1953 + } 1954 + } 1955 + 1956 + /* Derive the keys for any btree block. */ 1957 + STATIC void 1958 + xfs_btree_get_keys( 1959 + struct xfs_btree_cur *cur, 1960 + struct xfs_btree_block *block, 1961 + union xfs_btree_key *key) 1962 + { 1963 + if (be16_to_cpu(block->bb_level) == 0) 1964 + xfs_btree_get_leaf_keys(cur, block, key); 1965 + else 1966 + xfs_btree_get_node_keys(cur, block, key); 1967 + } 1968 + 1954 1969 /* 1955 - * Update keys at all levels from here to the root along the cursor's path. 1970 + * Decide if we need to update the parent keys of a btree block. For 1971 + * a standard btree this is only necessary if we're updating the first 1972 + * record/key. For an overlapping btree, we must always update the 1973 + * keys because the highest key can be in any of the records or keys 1974 + * in the block. 1975 + */ 1976 + static inline bool 1977 + xfs_btree_needs_key_update( 1978 + struct xfs_btree_cur *cur, 1979 + int ptr) 1980 + { 1981 + return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 1982 + } 1983 + 1984 + /* 1985 + * Update the low and high parent keys of the given level, progressing 1986 + * towards the root. If force_all is false, stop if the keys for a given 1987 + * level do not need updating. 1956 1988 */ 1957 1989 STATIC int 1958 - xfs_btree_updkey( 1990 + __xfs_btree_updkeys( 1959 1991 struct xfs_btree_cur *cur, 1960 - union xfs_btree_key *keyp, 1992 + int level, 1993 + struct xfs_btree_block *block, 1994 + struct xfs_buf *bp0, 1995 + bool force_all) 1996 + { 1997 + union xfs_btree_bigkey key; /* keys from current level */ 1998 + union xfs_btree_key *lkey; /* keys from the next level up */ 1999 + union xfs_btree_key *hkey; 2000 + union xfs_btree_key *nlkey; /* keys from the next level up */ 2001 + union xfs_btree_key *nhkey; 2002 + struct xfs_buf *bp; 2003 + int ptr; 2004 + 2005 + ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2006 + 2007 + /* Exit if there aren't any parent levels to update. */ 2008 + if (level + 1 >= cur->bc_nlevels) 2009 + return 0; 2010 + 2011 + trace_xfs_btree_updkeys(cur, level, bp0); 2012 + 2013 + lkey = (union xfs_btree_key *)&key; 2014 + hkey = xfs_btree_high_key_from_key(cur, lkey); 2015 + xfs_btree_get_keys(cur, block, lkey); 2016 + for (level++; level < cur->bc_nlevels; level++) { 2017 + #ifdef DEBUG 2018 + int error; 2019 + #endif 2020 + block = xfs_btree_get_block(cur, level, &bp); 2021 + trace_xfs_btree_updkeys(cur, level, bp); 2022 + #ifdef DEBUG 2023 + error = xfs_btree_check_block(cur, block, level, bp); 2024 + if (error) { 2025 + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2026 + return error; 2027 + } 2028 + #endif 2029 + ptr = cur->bc_ptrs[level]; 2030 + nlkey = xfs_btree_key_addr(cur, ptr, block); 2031 + nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2032 + if (!force_all && 2033 + !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2034 + cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2035 + break; 2036 + xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2037 + xfs_btree_log_keys(cur, bp, ptr, ptr); 2038 + if (level + 1 >= cur->bc_nlevels) 2039 + break; 2040 + xfs_btree_get_node_keys(cur, block, lkey); 2041 + } 2042 + 2043 + return 0; 2044 + } 2045 + 2046 + /* Update all the keys from some level in cursor back to the root. */ 2047 + STATIC int 2048 + xfs_btree_updkeys_force( 2049 + struct xfs_btree_cur *cur, 2050 + int level) 2051 + { 2052 + struct xfs_buf *bp; 2053 + struct xfs_btree_block *block; 2054 + 2055 + block = xfs_btree_get_block(cur, level, &bp); 2056 + return __xfs_btree_updkeys(cur, level, block, bp, true); 2057 + } 2058 + 2059 + /* 2060 + * Update the parent keys of the given level, progressing towards the root. 2061 + */ 2062 + STATIC int 2063 + xfs_btree_update_keys( 2064 + struct xfs_btree_cur *cur, 1961 2065 int level) 1962 2066 { 1963 2067 struct xfs_btree_block *block; 1964 2068 struct xfs_buf *bp; 1965 2069 union xfs_btree_key *kp; 2070 + union xfs_btree_key key; 1966 2071 int ptr; 2072 + 2073 + ASSERT(level >= 0); 2074 + 2075 + block = xfs_btree_get_block(cur, level, &bp); 2076 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2077 + return __xfs_btree_updkeys(cur, level, block, bp, false); 1967 2078 1968 2079 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1969 2080 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 1970 - 1971 - ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); 1972 2081 1973 2082 /* 1974 2083 * Go up the tree from this level toward the root. ··· 2157 1904 * Stop when we reach a level where the cursor isn't pointing 2158 1905 * at the first entry in the block. 2159 1906 */ 2160 - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 1907 + xfs_btree_get_keys(cur, block, &key); 1908 + for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2161 1909 #ifdef DEBUG 2162 1910 int error; 2163 1911 #endif ··· 2172 1918 #endif 2173 1919 ptr = cur->bc_ptrs[level]; 2174 1920 kp = xfs_btree_key_addr(cur, ptr, block); 2175 - xfs_btree_copy_keys(cur, kp, keyp, 1); 1921 + xfs_btree_copy_keys(cur, kp, &key, 1); 2176 1922 xfs_btree_log_keys(cur, bp, ptr, ptr); 2177 1923 } 2178 1924 ··· 2224 1970 ptr, LASTREC_UPDATE); 2225 1971 } 2226 1972 2227 - /* Updating first rec in leaf. Pass new key value up to our parent. */ 2228 - if (ptr == 1) { 2229 - union xfs_btree_key key; 2230 - 2231 - cur->bc_ops->init_key_from_rec(&key, rec); 2232 - error = xfs_btree_updkey(cur, &key, 1); 1973 + /* Pass new key value up to our parent. */ 1974 + if (xfs_btree_needs_key_update(cur, ptr)) { 1975 + error = xfs_btree_update_keys(cur, 0); 2233 1976 if (error) 2234 1977 goto error0; 2235 1978 } ··· 2249 1998 int level, 2250 1999 int *stat) /* success/failure */ 2251 2000 { 2252 - union xfs_btree_key key; /* btree key */ 2253 2001 struct xfs_buf *lbp; /* left buffer pointer */ 2254 2002 struct xfs_btree_block *left; /* left btree block */ 2255 2003 int lrecs; /* left record count */ 2256 2004 struct xfs_buf *rbp; /* right buffer pointer */ 2257 2005 struct xfs_btree_block *right; /* right btree block */ 2006 + struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2258 2007 int rrecs; /* right record count */ 2259 2008 union xfs_btree_ptr lptr; /* left btree pointer */ 2260 2009 union xfs_btree_key *rkp = NULL; /* right btree key */ 2261 2010 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2262 2011 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2263 2012 int error; /* error return value */ 2013 + int i; 2264 2014 2265 2015 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2266 2016 XFS_BTREE_TRACE_ARGI(cur, level); ··· 2391 2139 xfs_btree_rec_addr(cur, 2, right), 2392 2140 -1, rrecs); 2393 2141 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2394 - 2395 - /* 2396 - * If it's the first record in the block, we'll need a key 2397 - * structure to pass up to the next level (updkey). 2398 - */ 2399 - cur->bc_ops->init_key_from_rec(&key, 2400 - xfs_btree_rec_addr(cur, 1, right)); 2401 - rkp = &key; 2402 2142 } 2403 2143 2404 - /* Update the parent key values of right. */ 2405 - error = xfs_btree_updkey(cur, rkp, level + 1); 2144 + /* 2145 + * Using a temporary cursor, update the parent key values of the 2146 + * block on the left. 2147 + */ 2148 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2149 + error = xfs_btree_dup_cursor(cur, &tcur); 2150 + if (error) 2151 + goto error0; 2152 + i = xfs_btree_firstrec(tcur, level); 2153 + XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2154 + 2155 + error = xfs_btree_decrement(tcur, level, &i); 2156 + if (error) 2157 + goto error1; 2158 + 2159 + /* Update the parent high keys of the left block, if needed. */ 2160 + error = xfs_btree_update_keys(tcur, level); 2161 + if (error) 2162 + goto error1; 2163 + 2164 + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2165 + } 2166 + 2167 + /* Update the parent keys of the right block. */ 2168 + error = xfs_btree_update_keys(cur, level); 2406 2169 if (error) 2407 2170 goto error0; 2408 2171 ··· 2436 2169 error0: 2437 2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2438 2171 return error; 2172 + 2173 + error1: 2174 + XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); 2175 + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2176 + return error; 2439 2177 } 2440 2178 2441 2179 /* ··· 2453 2181 int level, 2454 2182 int *stat) /* success/failure */ 2455 2183 { 2456 - union xfs_btree_key key; /* btree key */ 2457 2184 struct xfs_buf *lbp; /* left buffer pointer */ 2458 2185 struct xfs_btree_block *left; /* left btree block */ 2459 2186 struct xfs_buf *rbp; /* right buffer pointer */ ··· 2561 2290 /* Now put the new data in, and log it. */ 2562 2291 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2563 2292 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2564 - 2565 - cur->bc_ops->init_key_from_rec(&key, rrp); 2566 - rkp = &key; 2567 - 2568 - ASSERT(cur->bc_ops->recs_inorder(cur, rrp, 2569 - xfs_btree_rec_addr(cur, 2, right))); 2570 2293 } 2571 2294 2572 2295 /* ··· 2580 2315 if (error) 2581 2316 goto error0; 2582 2317 i = xfs_btree_lastrec(tcur, level); 2583 - XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 2318 + XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2584 2319 2585 2320 error = xfs_btree_increment(tcur, level, &i); 2586 2321 if (error) 2587 2322 goto error1; 2588 2323 2589 - error = xfs_btree_updkey(tcur, rkp, level + 1); 2324 + /* Update the parent high keys of the left block, if needed. */ 2325 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2326 + error = xfs_btree_update_keys(cur, level); 2327 + if (error) 2328 + goto error1; 2329 + } 2330 + 2331 + /* Update the parent keys of the right block. */ 2332 + error = xfs_btree_update_keys(tcur, level); 2590 2333 if (error) 2591 2334 goto error1; 2592 2335 ··· 2695 2422 2696 2423 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2697 2424 2425 + /* Adjust numrecs for the later get_*_keys() calls. */ 2426 + lrecs -= rrecs; 2427 + xfs_btree_set_numrecs(left, lrecs); 2428 + xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2429 + 2698 2430 /* 2699 2431 * Copy btree block entries from the left block over to the 2700 2432 * new block, the right. Update the right block and log the ··· 2725 2447 } 2726 2448 #endif 2727 2449 2450 + /* Copy the keys & pointers to the new block. */ 2728 2451 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2729 2452 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2730 2453 2731 2454 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2732 2455 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2733 2456 2734 - /* Grab the keys to the entries moved to the right block */ 2735 - xfs_btree_copy_keys(cur, key, rkp, 1); 2457 + /* Stash the keys of the new block for later insertion. */ 2458 + xfs_btree_get_node_keys(cur, right, key); 2736 2459 } else { 2737 2460 /* It's a leaf. Move records. */ 2738 2461 union xfs_btree_rec *lrp; /* left record pointer */ ··· 2742 2463 lrp = xfs_btree_rec_addr(cur, src_index, left); 2743 2464 rrp = xfs_btree_rec_addr(cur, 1, right); 2744 2465 2466 + /* Copy records to the new block. */ 2745 2467 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2746 2468 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2747 2469 2748 - cur->bc_ops->init_key_from_rec(key, 2749 - xfs_btree_rec_addr(cur, 1, right)); 2470 + /* Stash the keys of the new block for later insertion. */ 2471 + xfs_btree_get_leaf_keys(cur, right, key); 2750 2472 } 2751 - 2752 2473 2753 2474 /* 2754 2475 * Find the left block number by looking in the buffer. 2755 - * Adjust numrecs, sibling pointers. 2476 + * Adjust sibling pointers. 2756 2477 */ 2757 2478 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2758 2479 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2759 2480 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2760 2481 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2761 - 2762 - lrecs -= rrecs; 2763 - xfs_btree_set_numrecs(left, lrecs); 2764 - xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2765 2482 2766 2483 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2767 2484 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); ··· 2774 2499 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2775 2500 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2776 2501 } 2502 + 2503 + /* Update the parent high keys of the left block, if needed. */ 2504 + if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2505 + error = xfs_btree_update_keys(cur, level); 2506 + if (error) 2507 + goto error0; 2508 + } 2509 + 2777 2510 /* 2778 2511 * If the cursor is really in the right block, move it there. 2779 2512 * If it's just pointing past the last entry in left, then we'll ··· 3085 2802 bp = lbp; 3086 2803 nptr = 2; 3087 2804 } 2805 + 3088 2806 /* Fill in the new block's btree header and log it. */ 3089 2807 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3090 2808 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); ··· 3094 2810 3095 2811 /* Fill in the key data in the new root. */ 3096 2812 if (xfs_btree_get_level(left) > 0) { 3097 - xfs_btree_copy_keys(cur, 3098 - xfs_btree_key_addr(cur, 1, new), 3099 - xfs_btree_key_addr(cur, 1, left), 1); 3100 - xfs_btree_copy_keys(cur, 3101 - xfs_btree_key_addr(cur, 2, new), 3102 - xfs_btree_key_addr(cur, 1, right), 1); 2813 + /* 2814 + * Get the keys for the left block's keys and put them directly 2815 + * in the parent block. Do the same for the right block. 2816 + */ 2817 + xfs_btree_get_node_keys(cur, left, 2818 + xfs_btree_key_addr(cur, 1, new)); 2819 + xfs_btree_get_node_keys(cur, right, 2820 + xfs_btree_key_addr(cur, 2, new)); 3103 2821 } else { 3104 - cur->bc_ops->init_key_from_rec( 3105 - xfs_btree_key_addr(cur, 1, new), 3106 - xfs_btree_rec_addr(cur, 1, left)); 3107 - cur->bc_ops->init_key_from_rec( 3108 - xfs_btree_key_addr(cur, 2, new), 3109 - xfs_btree_rec_addr(cur, 1, right)); 2822 + /* 2823 + * Get the keys for the left block's records and put them 2824 + * directly in the parent block. Do the same for the right 2825 + * block. 2826 + */ 2827 + xfs_btree_get_leaf_keys(cur, left, 2828 + xfs_btree_key_addr(cur, 1, new)); 2829 + xfs_btree_get_leaf_keys(cur, right, 2830 + xfs_btree_key_addr(cur, 2, new)); 3110 2831 } 3111 2832 xfs_btree_log_keys(cur, nbp, 1, 2); 3112 2833 ··· 3147 2858 int *index, /* new tree index */ 3148 2859 union xfs_btree_ptr *nptr, /* new btree ptr */ 3149 2860 struct xfs_btree_cur **ncur, /* new btree cursor */ 3150 - union xfs_btree_rec *nrec, /* new record */ 2861 + union xfs_btree_key *key, /* key of new block */ 3151 2862 int *stat) 3152 2863 { 3153 - union xfs_btree_key key; /* new btree key value */ 3154 2864 int error = 0; 3155 2865 3156 2866 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && ··· 3159 2871 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3160 2872 /* A root block that can be made bigger. */ 3161 2873 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 2874 + *stat = 1; 3162 2875 } else { 3163 2876 /* A root block that needs replacing */ 3164 2877 int logflags = 0; ··· 3195 2906 * If this works we have to re-set our variables because we 3196 2907 * could be in a different block now. 3197 2908 */ 3198 - error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); 2909 + error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3199 2910 if (error || *stat == 0) 3200 2911 return error; 3201 2912 3202 2913 3203 2914 *index = cur->bc_ptrs[level]; 3204 - cur->bc_ops->init_rec_from_key(&key, nrec); 3205 2915 return 0; 3206 2916 } 3207 2917 ··· 3213 2925 struct xfs_btree_cur *cur, /* btree cursor */ 3214 2926 int level, /* level to insert record at */ 3215 2927 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3216 - union xfs_btree_rec *recp, /* i/o: record data inserted */ 2928 + union xfs_btree_rec *rec, /* record to insert */ 2929 + union xfs_btree_key *key, /* i/o: block key for ptrp */ 3217 2930 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3218 2931 int *stat) /* success/failure */ 3219 2932 { 3220 2933 struct xfs_btree_block *block; /* btree block */ 3221 2934 struct xfs_buf *bp; /* buffer for block */ 3222 - union xfs_btree_key key; /* btree key */ 3223 2935 union xfs_btree_ptr nptr; /* new block ptr */ 3224 2936 struct xfs_btree_cur *ncur; /* new btree cursor */ 3225 - union xfs_btree_rec nrec; /* new record count */ 2937 + union xfs_btree_bigkey nkey; /* new block key */ 2938 + union xfs_btree_key *lkey; 3226 2939 int optr; /* old key/record index */ 3227 2940 int ptr; /* key/record index */ 3228 2941 int numrecs;/* number of records */ ··· 3231 2942 #ifdef DEBUG 3232 2943 int i; 3233 2944 #endif 2945 + xfs_daddr_t old_bn; 3234 2946 3235 2947 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3236 - XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); 2948 + XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); 3237 2949 3238 2950 ncur = NULL; 2951 + lkey = (union xfs_btree_key *)&nkey; 3239 2952 3240 2953 /* 3241 2954 * If we have an external root pointer, and we've made it to the ··· 3260 2969 return 0; 3261 2970 } 3262 2971 3263 - /* Make a key out of the record data to be inserted, and save it. */ 3264 - cur->bc_ops->init_key_from_rec(&key, recp); 3265 - 3266 2972 optr = ptr; 3267 2973 3268 2974 XFS_BTREE_STATS_INC(cur, insrec); 3269 2975 3270 2976 /* Get pointers to the btree buffer and block. */ 3271 2977 block = xfs_btree_get_block(cur, level, &bp); 2978 + old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3272 2979 numrecs = xfs_btree_get_numrecs(block); 3273 2980 3274 2981 #ifdef DEBUG ··· 3277 2988 /* Check that the new entry is being inserted in the right place. */ 3278 2989 if (ptr <= numrecs) { 3279 2990 if (level == 0) { 3280 - ASSERT(cur->bc_ops->recs_inorder(cur, recp, 2991 + ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3281 2992 xfs_btree_rec_addr(cur, ptr, block))); 3282 2993 } else { 3283 - ASSERT(cur->bc_ops->keys_inorder(cur, &key, 2994 + ASSERT(cur->bc_ops->keys_inorder(cur, key, 3284 2995 xfs_btree_key_addr(cur, ptr, block))); 3285 2996 } 3286 2997 } ··· 3293 3004 xfs_btree_set_ptr_null(cur, &nptr); 3294 3005 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3295 3006 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3296 - &optr, &ptr, &nptr, &ncur, &nrec, stat); 3007 + &optr, &ptr, &nptr, &ncur, lkey, stat); 3297 3008 if (error || *stat == 0) 3298 3009 goto error0; 3299 3010 } ··· 3343 3054 #endif 3344 3055 3345 3056 /* Now put the new data in, bump numrecs and log it. */ 3346 - xfs_btree_copy_keys(cur, kp, &key, 1); 3057 + xfs_btree_copy_keys(cur, kp, key, 1); 3347 3058 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3348 3059 numrecs++; 3349 3060 xfs_btree_set_numrecs(block, numrecs); ··· 3364 3075 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3365 3076 3366 3077 /* Now put the new data in, bump numrecs and log it. */ 3367 - xfs_btree_copy_recs(cur, rp, recp, 1); 3078 + xfs_btree_copy_recs(cur, rp, rec, 1); 3368 3079 xfs_btree_set_numrecs(block, ++numrecs); 3369 3080 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3370 3081 #ifdef DEBUG ··· 3378 3089 /* Log the new number of records in the btree header. */ 3379 3090 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3380 3091 3381 - /* If we inserted at the start of a block, update the parents' keys. */ 3382 - if (optr == 1) { 3383 - error = xfs_btree_updkey(cur, &key, level + 1); 3092 + /* 3093 + * If we just inserted into a new tree block, we have to 3094 + * recalculate nkey here because nkey is out of date. 3095 + * 3096 + * Otherwise we're just updating an existing block (having shoved 3097 + * some records into the new tree block), so use the regular key 3098 + * update mechanism. 3099 + */ 3100 + if (bp && bp->b_bn != old_bn) { 3101 + xfs_btree_get_keys(cur, block, lkey); 3102 + } else if (xfs_btree_needs_key_update(cur, optr)) { 3103 + error = xfs_btree_update_keys(cur, level); 3384 3104 if (error) 3385 3105 goto error0; 3386 3106 } ··· 3399 3101 * we are at the far right edge of the tree, update it. 3400 3102 */ 3401 3103 if (xfs_btree_is_lastrec(cur, block, level)) { 3402 - cur->bc_ops->update_lastrec(cur, block, recp, 3104 + cur->bc_ops->update_lastrec(cur, block, rec, 3403 3105 ptr, LASTREC_INSREC); 3404 3106 } 3405 3107 ··· 3409 3111 */ 3410 3112 *ptrp = nptr; 3411 3113 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3412 - *recp = nrec; 3114 + xfs_btree_copy_keys(cur, key, lkey, 1); 3413 3115 *curp = ncur; 3414 3116 } 3415 3117 ··· 3440 3142 union xfs_btree_ptr nptr; /* new block number (split result) */ 3441 3143 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3442 3144 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3145 + union xfs_btree_bigkey bkey; /* key of block to insert */ 3146 + union xfs_btree_key *key; 3443 3147 union xfs_btree_rec rec; /* record to insert */ 3444 3148 3445 3149 level = 0; 3446 3150 ncur = NULL; 3447 3151 pcur = cur; 3152 + key = (union xfs_btree_key *)&bkey; 3448 3153 3449 3154 xfs_btree_set_ptr_null(cur, &nptr); 3155 + 3156 + /* Make a key out of the record data to be inserted, and save it. */ 3450 3157 cur->bc_ops->init_rec_from_cur(cur, &rec); 3158 + cur->bc_ops->init_key_from_rec(key, &rec); 3451 3159 3452 3160 /* 3453 3161 * Loop going up the tree, starting at the leaf level. ··· 3465 3161 * Insert nrec/nptr into this level of the tree. 3466 3162 * Note if we fail, nptr will be null. 3467 3163 */ 3468 - error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); 3164 + error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3165 + &ncur, &i); 3469 3166 if (error) { 3470 3167 if (pcur != cur) 3471 3168 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); ··· 3690 3385 struct xfs_buf *bp; /* buffer for block */ 3691 3386 int error; /* error return value */ 3692 3387 int i; /* loop counter */ 3693 - union xfs_btree_key key; /* storage for keyp */ 3694 - union xfs_btree_key *keyp = &key; /* passed to the next level */ 3695 3388 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3696 3389 struct xfs_buf *lbp; /* left buffer pointer */ 3697 3390 struct xfs_btree_block *left; /* left btree block */ ··· 3760 3457 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3761 3458 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3762 3459 } 3763 - 3764 - /* 3765 - * If it's the first record in the block, we'll need to pass a 3766 - * key up to the next level (updkey). 3767 - */ 3768 - if (ptr == 1) 3769 - keyp = xfs_btree_key_addr(cur, 1, block); 3770 3460 } else { 3771 3461 /* It's a leaf. operate on records */ 3772 3462 if (ptr < numrecs) { ··· 3767 3471 xfs_btree_rec_addr(cur, ptr + 1, block), 3768 3472 -1, numrecs - ptr); 3769 3473 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3770 - } 3771 - 3772 - /* 3773 - * If it's the first record in the block, we'll need a key 3774 - * structure to pass up to the next level (updkey). 3775 - */ 3776 - if (ptr == 1) { 3777 - cur->bc_ops->init_key_from_rec(&key, 3778 - xfs_btree_rec_addr(cur, 1, block)); 3779 - keyp = &key; 3780 3474 } 3781 3475 } 3782 3476 ··· 3834 3548 * If we deleted the leftmost entry in the block, update the 3835 3549 * key values above us in the tree. 3836 3550 */ 3837 - if (ptr == 1) { 3838 - error = xfs_btree_updkey(cur, keyp, level + 1); 3551 + if (xfs_btree_needs_key_update(cur, ptr)) { 3552 + error = xfs_btree_update_keys(cur, level); 3839 3553 if (error) 3840 3554 goto error0; 3841 3555 } ··· 4164 3878 if (level > 0) 4165 3879 cur->bc_ptrs[level]--; 4166 3880 3881 + /* 3882 + * We combined blocks, so we have to update the parent keys if the 3883 + * btree supports overlapped intervals. However, bc_ptrs[level + 1] 3884 + * points to the old block so that the caller knows which record to 3885 + * delete. Therefore, the caller must be savvy enough to call updkeys 3886 + * for us if we return stat == 2. The other exit points from this 3887 + * function don't require deletions further up the tree, so they can 3888 + * call updkeys directly. 3889 + */ 3890 + 4167 3891 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4168 3892 /* Return value means the next level up has something to do. */ 4169 3893 *stat = 2; ··· 4199 3903 int error; /* error return value */ 4200 3904 int level; 4201 3905 int i; 3906 + bool joined = false; 4202 3907 4203 3908 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 4204 3909 ··· 4211 3914 */ 4212 3915 for (level = 0, i = 2; i == 2; level++) { 4213 3916 error = xfs_btree_delrec(cur, level, &i); 3917 + if (error) 3918 + goto error0; 3919 + if (i == 2) 3920 + joined = true; 3921 + } 3922 + 3923 + /* 3924 + * If we combined blocks as part of deleting the record, delrec won't 3925 + * have updated the parent high keys so we have to do that here. 3926 + */ 3927 + if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 3928 + error = xfs_btree_updkeys_force(cur, 0); 4214 3929 if (error) 4215 3930 goto error0; 4216 3931 } ··· 4287 3978 return 0; 4288 3979 } 4289 3980 3981 + /* Visit a block in a btree. */ 3982 + STATIC int 3983 + xfs_btree_visit_block( 3984 + struct xfs_btree_cur *cur, 3985 + int level, 3986 + xfs_btree_visit_blocks_fn fn, 3987 + void *data) 3988 + { 3989 + struct xfs_btree_block *block; 3990 + struct xfs_buf *bp; 3991 + union xfs_btree_ptr rptr; 3992 + int error; 3993 + 3994 + /* do right sibling readahead */ 3995 + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 3996 + block = xfs_btree_get_block(cur, level, &bp); 3997 + 3998 + /* process the block */ 3999 + error = fn(cur, level, data); 4000 + if (error) 4001 + return error; 4002 + 4003 + /* now read rh sibling block for next iteration */ 4004 + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4005 + if (xfs_btree_ptr_is_null(cur, &rptr)) 4006 + return -ENOENT; 4007 + 4008 + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4009 + } 4010 + 4011 + 4012 + /* Visit every block in a btree. */ 4013 + int 4014 + xfs_btree_visit_blocks( 4015 + struct xfs_btree_cur *cur, 4016 + xfs_btree_visit_blocks_fn fn, 4017 + void *data) 4018 + { 4019 + union xfs_btree_ptr lptr; 4020 + int level; 4021 + struct xfs_btree_block *block = NULL; 4022 + int error = 0; 4023 + 4024 + cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4025 + 4026 + /* for each level */ 4027 + for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4028 + /* grab the left hand block */ 4029 + error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4030 + if (error) 4031 + return error; 4032 + 4033 + /* readahead the left most block for the next level down */ 4034 + if (level > 0) { 4035 + union xfs_btree_ptr *ptr; 4036 + 4037 + ptr = xfs_btree_ptr_addr(cur, 1, block); 4038 + xfs_btree_readahead_ptr(cur, ptr, 1); 4039 + 4040 + /* save for the next iteration of the loop */ 4041 + lptr = *ptr; 4042 + } 4043 + 4044 + /* for each buffer in the level */ 4045 + do { 4046 + error = xfs_btree_visit_block(cur, level, fn, data); 4047 + } while (!error); 4048 + 4049 + if (error != -ENOENT) 4050 + return error; 4051 + } 4052 + 4053 + return 0; 4054 + } 4055 + 4290 4056 /* 4291 4057 * Change the owner of a btree. 4292 4058 * ··· 4386 4002 * just queue the modified buffer as delayed write buffer so the transaction 4387 4003 * recovery completion writes the changes to disk. 4388 4004 */ 4005 + struct xfs_btree_block_change_owner_info { 4006 + __uint64_t new_owner; 4007 + struct list_head *buffer_list; 4008 + }; 4009 + 4389 4010 static int 4390 4011 xfs_btree_block_change_owner( 4391 4012 struct xfs_btree_cur *cur, 4392 4013 int level, 4393 - __uint64_t new_owner, 4394 - struct list_head *buffer_list) 4014 + void *data) 4395 4015 { 4016 + struct xfs_btree_block_change_owner_info *bbcoi = data; 4396 4017 struct xfs_btree_block *block; 4397 4018 struct xfs_buf *bp; 4398 - union xfs_btree_ptr rptr; 4399 - 4400 - /* do right sibling readahead */ 4401 - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4402 4019 4403 4020 /* modify the owner */ 4404 4021 block = xfs_btree_get_block(cur, level, &bp); 4405 4022 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4406 - block->bb_u.l.bb_owner = cpu_to_be64(new_owner); 4023 + block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4407 4024 else 4408 - block->bb_u.s.bb_owner = cpu_to_be32(new_owner); 4025 + block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4409 4026 4410 4027 /* 4411 4028 * If the block is a root block hosted in an inode, we might not have a ··· 4420 4035 xfs_trans_ordered_buf(cur->bc_tp, bp); 4421 4036 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4422 4037 } else { 4423 - xfs_buf_delwri_queue(bp, buffer_list); 4038 + xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4424 4039 } 4425 4040 } else { 4426 4041 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4427 4042 ASSERT(level == cur->bc_nlevels - 1); 4428 4043 } 4429 4044 4430 - /* now read rh sibling block for next iteration */ 4431 - xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4432 - if (xfs_btree_ptr_is_null(cur, &rptr)) 4433 - return -ENOENT; 4434 - 4435 - return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4045 + return 0; 4436 4046 } 4437 4047 4438 4048 int ··· 4436 4056 __uint64_t new_owner, 4437 4057 struct list_head *buffer_list) 4438 4058 { 4439 - union xfs_btree_ptr lptr; 4440 - int level; 4441 - struct xfs_btree_block *block = NULL; 4442 - int error = 0; 4059 + struct xfs_btree_block_change_owner_info bbcoi; 4443 4060 4444 - cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4061 + bbcoi.new_owner = new_owner; 4062 + bbcoi.buffer_list = buffer_list; 4445 4063 4446 - /* for each level */ 4447 - for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4448 - /* grab the left hand block */ 4449 - error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4450 - if (error) 4451 - return error; 4452 - 4453 - /* readahead the left most block for the next level down */ 4454 - if (level > 0) { 4455 - union xfs_btree_ptr *ptr; 4456 - 4457 - ptr = xfs_btree_ptr_addr(cur, 1, block); 4458 - xfs_btree_readahead_ptr(cur, ptr, 1); 4459 - 4460 - /* save for the next iteration of the loop */ 4461 - lptr = *ptr; 4462 - } 4463 - 4464 - /* for each buffer in the level */ 4465 - do { 4466 - error = xfs_btree_block_change_owner(cur, level, 4467 - new_owner, 4468 - buffer_list); 4469 - } while (!error); 4470 - 4471 - if (error != -ENOENT) 4472 - return error; 4473 - } 4474 - 4475 - return 0; 4064 + return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4065 + &bbcoi); 4476 4066 } 4477 4067 4478 4068 /** ··· 4520 4170 for (level = 1; maxblocks > 1; level++) 4521 4171 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4522 4172 return level; 4173 + } 4174 + 4175 + /* 4176 + * Query a regular btree for all records overlapping a given interval. 4177 + * Start with a LE lookup of the key of low_rec and return all records 4178 + * until we find a record with a key greater than the key of high_rec. 4179 + */ 4180 + STATIC int 4181 + xfs_btree_simple_query_range( 4182 + struct xfs_btree_cur *cur, 4183 + union xfs_btree_key *low_key, 4184 + union xfs_btree_key *high_key, 4185 + xfs_btree_query_range_fn fn, 4186 + void *priv) 4187 + { 4188 + union xfs_btree_rec *recp; 4189 + union xfs_btree_key rec_key; 4190 + __int64_t diff; 4191 + int stat; 4192 + bool firstrec = true; 4193 + int error; 4194 + 4195 + ASSERT(cur->bc_ops->init_high_key_from_rec); 4196 + ASSERT(cur->bc_ops->diff_two_keys); 4197 + 4198 + /* 4199 + * Find the leftmost record. The btree cursor must be set 4200 + * to the low record used to generate low_key. 4201 + */ 4202 + stat = 0; 4203 + error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4204 + if (error) 4205 + goto out; 4206 + 4207 + while (stat) { 4208 + /* Find the record. */ 4209 + error = xfs_btree_get_rec(cur, &recp, &stat); 4210 + if (error || !stat) 4211 + break; 4212 + cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4213 + 4214 + /* Skip if high_key(rec) < low_key. */ 4215 + if (firstrec) { 4216 + firstrec = false; 4217 + diff = cur->bc_ops->diff_two_keys(cur, low_key, 4218 + &rec_key); 4219 + if (diff > 0) 4220 + goto advloop; 4221 + } 4222 + 4223 + /* Stop if high_key < low_key(rec). */ 4224 + diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4225 + if (diff > 0) 4226 + break; 4227 + 4228 + /* Callback */ 4229 + error = fn(cur, recp, priv); 4230 + if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) 4231 + break; 4232 + 4233 + advloop: 4234 + /* Move on to the next record. */ 4235 + error = xfs_btree_increment(cur, 0, &stat); 4236 + if (error) 4237 + break; 4238 + } 4239 + 4240 + out: 4241 + return error; 4242 + } 4243 + 4244 + /* 4245 + * Query an overlapped interval btree for all records overlapping a given 4246 + * interval. This function roughly follows the algorithm given in 4247 + * "Interval Trees" of _Introduction to Algorithms_, which is section 4248 + * 14.3 in the 2nd and 3rd editions. 4249 + * 4250 + * First, generate keys for the low and high records passed in. 4251 + * 4252 + * For any leaf node, generate the high and low keys for the record. 4253 + * If the record keys overlap with the query low/high keys, pass the 4254 + * record to the function iterator. 4255 + * 4256 + * For any internal node, compare the low and high keys of each 4257 + * pointer against the query low/high keys. If there's an overlap, 4258 + * follow the pointer. 4259 + * 4260 + * As an optimization, we stop scanning a block when we find a low key 4261 + * that is greater than the query's high key. 4262 + */ 4263 + STATIC int 4264 + xfs_btree_overlapped_query_range( 4265 + struct xfs_btree_cur *cur, 4266 + union xfs_btree_key *low_key, 4267 + union xfs_btree_key *high_key, 4268 + xfs_btree_query_range_fn fn, 4269 + void *priv) 4270 + { 4271 + union xfs_btree_ptr ptr; 4272 + union xfs_btree_ptr *pp; 4273 + union xfs_btree_key rec_key; 4274 + union xfs_btree_key rec_hkey; 4275 + union xfs_btree_key *lkp; 4276 + union xfs_btree_key *hkp; 4277 + union xfs_btree_rec *recp; 4278 + struct xfs_btree_block *block; 4279 + __int64_t ldiff; 4280 + __int64_t hdiff; 4281 + int level; 4282 + struct xfs_buf *bp; 4283 + int i; 4284 + int error; 4285 + 4286 + /* Load the root of the btree. */ 4287 + level = cur->bc_nlevels - 1; 4288 + cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4289 + error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4290 + if (error) 4291 + return error; 4292 + xfs_btree_get_block(cur, level, &bp); 4293 + trace_xfs_btree_overlapped_query_range(cur, level, bp); 4294 + #ifdef DEBUG 4295 + error = xfs_btree_check_block(cur, block, level, bp); 4296 + if (error) 4297 + goto out; 4298 + #endif 4299 + cur->bc_ptrs[level] = 1; 4300 + 4301 + while (level < cur->bc_nlevels) { 4302 + block = xfs_btree_get_block(cur, level, &bp); 4303 + 4304 + /* End of node, pop back towards the root. */ 4305 + if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4306 + pop_up: 4307 + if (level < cur->bc_nlevels - 1) 4308 + cur->bc_ptrs[level + 1]++; 4309 + level++; 4310 + continue; 4311 + } 4312 + 4313 + if (level == 0) { 4314 + /* Handle a leaf node. */ 4315 + recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4316 + 4317 + cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4318 + ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4319 + low_key); 4320 + 4321 + cur->bc_ops->init_key_from_rec(&rec_key, recp); 4322 + hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4323 + &rec_key); 4324 + 4325 + /* 4326 + * If (record's high key >= query's low key) and 4327 + * (query's high key >= record's low key), then 4328 + * this record overlaps the query range; callback. 4329 + */ 4330 + if (ldiff >= 0 && hdiff >= 0) { 4331 + error = fn(cur, recp, priv); 4332 + if (error < 0 || 4333 + error == XFS_BTREE_QUERY_RANGE_ABORT) 4334 + break; 4335 + } else if (hdiff < 0) { 4336 + /* Record is larger than high key; pop. */ 4337 + goto pop_up; 4338 + } 4339 + cur->bc_ptrs[level]++; 4340 + continue; 4341 + } 4342 + 4343 + /* Handle an internal node. */ 4344 + lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4345 + hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4346 + pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4347 + 4348 + ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4349 + hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4350 + 4351 + /* 4352 + * If (pointer's high key >= query's low key) and 4353 + * (query's high key >= pointer's low key), then 4354 + * this record overlaps the query range; follow pointer. 4355 + */ 4356 + if (ldiff >= 0 && hdiff >= 0) { 4357 + level--; 4358 + error = xfs_btree_lookup_get_block(cur, level, pp, 4359 + &block); 4360 + if (error) 4361 + goto out; 4362 + xfs_btree_get_block(cur, level, &bp); 4363 + trace_xfs_btree_overlapped_query_range(cur, level, bp); 4364 + #ifdef DEBUG 4365 + error = xfs_btree_check_block(cur, block, level, bp); 4366 + if (error) 4367 + goto out; 4368 + #endif 4369 + cur->bc_ptrs[level] = 1; 4370 + continue; 4371 + } else if (hdiff < 0) { 4372 + /* The low key is larger than the upper range; pop. */ 4373 + goto pop_up; 4374 + } 4375 + cur->bc_ptrs[level]++; 4376 + } 4377 + 4378 + out: 4379 + /* 4380 + * If we don't end this function with the cursor pointing at a record 4381 + * block, a subsequent non-error cursor deletion will not release 4382 + * node-level buffers, causing a buffer leak. This is quite possible 4383 + * with a zero-results range query, so release the buffers if we 4384 + * failed to return any results. 4385 + */ 4386 + if (cur->bc_bufs[0] == NULL) { 4387 + for (i = 0; i < cur->bc_nlevels; i++) { 4388 + if (cur->bc_bufs[i]) { 4389 + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4390 + cur->bc_bufs[i] = NULL; 4391 + cur->bc_ptrs[i] = 0; 4392 + cur->bc_ra[i] = 0; 4393 + } 4394 + } 4395 + } 4396 + 4397 + return error; 4398 + } 4399 + 4400 + /* 4401 + * Query a btree for all records overlapping a given interval of keys. The 4402 + * supplied function will be called with each record found; return one of the 4403 + * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4404 + * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a 4405 + * negative error code. 4406 + */ 4407 + int 4408 + xfs_btree_query_range( 4409 + struct xfs_btree_cur *cur, 4410 + union xfs_btree_irec *low_rec, 4411 + union xfs_btree_irec *high_rec, 4412 + xfs_btree_query_range_fn fn, 4413 + void *priv) 4414 + { 4415 + union xfs_btree_rec rec; 4416 + union xfs_btree_key low_key; 4417 + union xfs_btree_key high_key; 4418 + 4419 + /* Find the keys of both ends of the interval. */ 4420 + cur->bc_rec = *high_rec; 4421 + cur->bc_ops->init_rec_from_cur(cur, &rec); 4422 + cur->bc_ops->init_key_from_rec(&high_key, &rec); 4423 + 4424 + cur->bc_rec = *low_rec; 4425 + cur->bc_ops->init_rec_from_cur(cur, &rec); 4426 + cur->bc_ops->init_key_from_rec(&low_key, &rec); 4427 + 4428 + /* Enforce low key < high key. */ 4429 + if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4430 + return -EINVAL; 4431 + 4432 + if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4433 + return xfs_btree_simple_query_range(cur, &low_key, 4434 + &high_key, fn, priv); 4435 + return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4436 + fn, priv); 4523 4437 }

+70 -18

fs/xfs/libxfs/xfs_btree.h

··· 19 19 #define __XFS_BTREE_H__ 20 20 21 21 struct xfs_buf; 22 - struct xfs_bmap_free; 22 + struct xfs_defer_ops; 23 23 struct xfs_inode; 24 24 struct xfs_mount; 25 25 struct xfs_trans; ··· 38 38 }; 39 39 40 40 union xfs_btree_key { 41 - xfs_bmbt_key_t bmbt; 42 - xfs_bmdr_key_t bmbr; /* bmbt root block */ 43 - xfs_alloc_key_t alloc; 44 - xfs_inobt_key_t inobt; 41 + struct xfs_bmbt_key bmbt; 42 + xfs_bmdr_key_t bmbr; /* bmbt root block */ 43 + xfs_alloc_key_t alloc; 44 + struct xfs_inobt_key inobt; 45 + struct xfs_rmap_key rmap; 46 + }; 47 + 48 + /* 49 + * In-core key that holds both low and high keys for overlapped btrees. 50 + * The two keys are packed next to each other on disk, so do the same 51 + * in memory. Preserve the existing xfs_btree_key as a single key to 52 + * avoid the mental model breakage that would happen if we passed a 53 + * bigkey into a function that operates on a single key. 54 + */ 55 + union xfs_btree_bigkey { 56 + struct xfs_bmbt_key bmbt; 57 + xfs_bmdr_key_t bmbr; /* bmbt root block */ 58 + xfs_alloc_key_t alloc; 59 + struct xfs_inobt_key inobt; 60 + struct { 61 + struct xfs_rmap_key rmap; 62 + struct xfs_rmap_key rmap_hi; 63 + }; 45 64 }; 46 65 47 66 union xfs_btree_rec { 48 - xfs_bmbt_rec_t bmbt; 49 - xfs_bmdr_rec_t bmbr; /* bmbt root block */ 50 - xfs_alloc_rec_t alloc; 51 - xfs_inobt_rec_t inobt; 67 + struct xfs_bmbt_rec bmbt; 68 + xfs_bmdr_rec_t bmbr; /* bmbt root block */ 69 + struct xfs_alloc_rec alloc; 70 + struct xfs_inobt_rec inobt; 71 + struct xfs_rmap_rec rmap; 52 72 }; 53 73 54 74 /* ··· 83 63 #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) 84 64 #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 85 65 #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) 66 + #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) 86 67 87 68 /* 88 69 * For logging record fields. ··· 116 95 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ 117 96 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ 118 97 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ 98 + case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(__mp, rmap, stat); break; \ 119 99 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 120 100 } \ 121 101 } while (0) ··· 137 115 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ 138 116 case XFS_BTNUM_FINO: \ 139 117 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ 118 + case XFS_BTNUM_RMAP: \ 119 + __XFS_BTREE_STATS_ADD(__mp, rmap, stat, val); break; \ 140 120 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 141 121 } \ 142 122 } while (0) 143 123 144 - #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 124 + #define XFS_BTREE_MAXLEVELS 9 /* max of all btrees */ 145 125 146 126 struct xfs_btree_ops { 147 127 /* size of the key and record structures */ ··· 182 158 /* init values of btree structures */ 183 159 void (*init_key_from_rec)(union xfs_btree_key *key, 184 160 union xfs_btree_rec *rec); 185 - void (*init_rec_from_key)(union xfs_btree_key *key, 186 - union xfs_btree_rec *rec); 187 161 void (*init_rec_from_cur)(struct xfs_btree_cur *cur, 188 162 union xfs_btree_rec *rec); 189 163 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, 190 164 union xfs_btree_ptr *ptr); 165 + void (*init_high_key_from_rec)(union xfs_btree_key *key, 166 + union xfs_btree_rec *rec); 191 167 192 168 /* difference between key value and cursor value */ 193 169 __int64_t (*key_diff)(struct xfs_btree_cur *cur, 194 170 union xfs_btree_key *key); 171 + 172 + /* 173 + * Difference between key2 and key1 -- positive if key1 > key2, 174 + * negative if key1 < key2, and zero if equal. 175 + */ 176 + __int64_t (*diff_two_keys)(struct xfs_btree_cur *cur, 177 + union xfs_btree_key *key1, 178 + union xfs_btree_key *key2); 195 179 196 180 const struct xfs_buf_ops *buf_ops; 197 181 ··· 224 192 #define LASTREC_DELREC 2 225 193 226 194 195 + union xfs_btree_irec { 196 + struct xfs_alloc_rec_incore a; 197 + struct xfs_bmbt_irec b; 198 + struct xfs_inobt_rec_incore i; 199 + struct xfs_rmap_irec r; 200 + }; 201 + 227 202 /* 228 203 * Btree cursor structure. 229 204 * This collects all information needed by the btree code in one place. ··· 241 202 struct xfs_mount *bc_mp; /* file system mount struct */ 242 203 const struct xfs_btree_ops *bc_ops; 243 204 uint bc_flags; /* btree features - below */ 244 - union { 245 - xfs_alloc_rec_incore_t a; 246 - xfs_bmbt_irec_t b; 247 - xfs_inobt_rec_incore_t i; 248 - } bc_rec; /* current insert/search record value */ 205 + union xfs_btree_irec bc_rec; /* current insert/search record value */ 249 206 struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ 250 207 int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ 251 208 __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ ··· 253 218 union { 254 219 struct { /* needed for BNO, CNT, INO */ 255 220 struct xfs_buf *agbp; /* agf/agi buffer pointer */ 221 + struct xfs_defer_ops *dfops; /* deferred updates */ 256 222 xfs_agnumber_t agno; /* ag number */ 257 223 } a; 258 224 struct { /* needed for BMAP */ 259 225 struct xfs_inode *ip; /* pointer to our inode */ 260 - struct xfs_bmap_free *flist; /* list to free after */ 226 + struct xfs_defer_ops *dfops; /* deferred updates */ 261 227 xfs_fsblock_t firstblock; /* 1st blk allocated */ 262 228 int allocated; /* count of alloced */ 263 229 short forksize; /* fork's inode space */ ··· 274 238 #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ 275 239 #define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ 276 240 #define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ 241 + #define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */ 277 242 278 243 279 244 #define XFS_BTREE_NOERROR 0 ··· 513 476 bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); 514 477 uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 515 478 unsigned long len); 479 + 480 + /* return codes */ 481 + #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ 482 + #define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ 483 + typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, 484 + union xfs_btree_rec *rec, void *priv); 485 + 486 + int xfs_btree_query_range(struct xfs_btree_cur *cur, 487 + union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec, 488 + xfs_btree_query_range_fn fn, void *priv); 489 + 490 + typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, 491 + void *data); 492 + int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, 493 + xfs_btree_visit_blocks_fn fn, void *data); 516 494 517 495 #endif /* __XFS_BTREE_H__ */

+3 -3

fs/xfs/libxfs/xfs_da_btree.c

··· 2029 2029 error = xfs_bmapi_write(tp, dp, *bno, count, 2030 2030 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2031 2031 args->firstblock, args->total, &map, &nmap, 2032 - args->flist); 2032 + args->dfops); 2033 2033 if (error) 2034 2034 return error; 2035 2035 ··· 2052 2052 error = xfs_bmapi_write(tp, dp, b, c, 2053 2053 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2054 2054 args->firstblock, args->total, 2055 - &mapp[mapi], &nmap, args->flist); 2055 + &mapp[mapi], &nmap, args->dfops); 2056 2056 if (error) 2057 2057 goto out_free_map; 2058 2058 if (nmap < 1) ··· 2362 2362 */ 2363 2363 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2364 2364 xfs_bmapi_aflag(w), 0, args->firstblock, 2365 - args->flist, &done); 2365 + args->dfops, &done); 2366 2366 if (error == -ENOSPC) { 2367 2367 if (w != XFS_DATA_FORK) 2368 2368 break;

+2 -2

fs/xfs/libxfs/xfs_da_btree.h

··· 19 19 #ifndef __XFS_DA_BTREE_H__ 20 20 #define __XFS_DA_BTREE_H__ 21 21 22 - struct xfs_bmap_free; 22 + struct xfs_defer_ops; 23 23 struct xfs_inode; 24 24 struct xfs_trans; 25 25 struct zone; ··· 70 70 xfs_ino_t inumber; /* input/output inode number */ 71 71 struct xfs_inode *dp; /* directory inode to manipulate */ 72 72 xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ 73 - struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */ 73 + struct xfs_defer_ops *dfops; /* ptr to freelist for bmap_finish */ 74 74 struct xfs_trans *trans; /* current trans (changes over time) */ 75 75 xfs_extlen_t total; /* total blocks needed, for 1st bmap */ 76 76 int whichfork; /* data or attribute fork */

+1

fs/xfs/libxfs/xfs_da_format.h

··· 629 629 struct xfs_attr_sf_hdr { /* constant-structure header block */ 630 630 __be16 totsize; /* total bytes in shortform list */ 631 631 __u8 count; /* count of active entries */ 632 + __u8 padding; 632 633 } hdr; 633 634 struct xfs_attr_sf_entry { 634 635 __uint8_t namelen; /* actual length of name (no NULL) */

+463

fs/xfs/libxfs/xfs_defer.c

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_log_format.h" 25 + #include "xfs_trans_resv.h" 26 + #include "xfs_bit.h" 27 + #include "xfs_sb.h" 28 + #include "xfs_mount.h" 29 + #include "xfs_defer.h" 30 + #include "xfs_trans.h" 31 + #include "xfs_trace.h" 32 + 33 + /* 34 + * Deferred Operations in XFS 35 + * 36 + * Due to the way locking rules work in XFS, certain transactions (block 37 + * mapping and unmapping, typically) have permanent reservations so that 38 + * we can roll the transaction to adhere to AG locking order rules and 39 + * to unlock buffers between metadata updates. Prior to rmap/reflink, 40 + * the mapping code had a mechanism to perform these deferrals for 41 + * extents that were going to be freed; this code makes that facility 42 + * more generic. 43 + * 44 + * When adding the reverse mapping and reflink features, it became 45 + * necessary to perform complex remapping multi-transactions to comply 46 + * with AG locking order rules, and to be able to spread a single 47 + * refcount update operation (an operation on an n-block extent can 48 + * update as many as n records!) among multiple transactions. XFS can 49 + * roll a transaction to facilitate this, but using this facility 50 + * requires us to log "intent" items in case log recovery needs to 51 + * redo the operation, and to log "done" items to indicate that redo 52 + * is not necessary. 53 + * 54 + * Deferred work is tracked in xfs_defer_pending items. Each pending 55 + * item tracks one type of deferred work. Incoming work items (which 56 + * have not yet had an intent logged) are attached to a pending item 57 + * on the dop_intake list, where they wait for the caller to finish 58 + * the deferred operations. 59 + * 60 + * Finishing a set of deferred operations is an involved process. To 61 + * start, we define "rolling a deferred-op transaction" as follows: 62 + * 63 + * > For each xfs_defer_pending item on the dop_intake list, 64 + * - Sort the work items in AG order. XFS locking 65 + * order rules require us to lock buffers in AG order. 66 + * - Create a log intent item for that type. 67 + * - Attach it to the pending item. 68 + * - Move the pending item from the dop_intake list to the 69 + * dop_pending list. 70 + * > Roll the transaction. 71 + * 72 + * NOTE: To avoid exceeding the transaction reservation, we limit the 73 + * number of items that we attach to a given xfs_defer_pending. 74 + * 75 + * The actual finishing process looks like this: 76 + * 77 + * > For each xfs_defer_pending in the dop_pending list, 78 + * - Roll the deferred-op transaction as above. 79 + * - Create a log done item for that type, and attach it to the 80 + * log intent item. 81 + * - For each work item attached to the log intent item, 82 + * * Perform the described action. 83 + * * Attach the work item to the log done item. 84 + * 85 + * The key here is that we must log an intent item for all pending 86 + * work items every time we roll the transaction, and that we must log 87 + * a done item as soon as the work is completed. With this mechanism 88 + * we can perform complex remapping operations, chaining intent items 89 + * as needed. 90 + * 91 + * This is an example of remapping the extent (E, E+B) into file X at 92 + * offset A and dealing with the extent (C, C+B) already being mapped 93 + * there: 94 + * +-------------------------------------------------+ 95 + * | Unmap file X startblock C offset A length B | t0 96 + * | Intent to reduce refcount for extent (C, B) | 97 + * | Intent to remove rmap (X, C, A, B) | 98 + * | Intent to free extent (D, 1) (bmbt block) | 99 + * | Intent to map (X, A, B) at startblock E | 100 + * +-------------------------------------------------+ 101 + * | Map file X startblock E offset A length B | t1 102 + * | Done mapping (X, E, A, B) | 103 + * | Intent to increase refcount for extent (E, B) | 104 + * | Intent to add rmap (X, E, A, B) | 105 + * +-------------------------------------------------+ 106 + * | Reduce refcount for extent (C, B) | t2 107 + * | Done reducing refcount for extent (C, B) | 108 + * | Increase refcount for extent (E, B) | 109 + * | Done increasing refcount for extent (E, B) | 110 + * | Intent to free extent (C, B) | 111 + * | Intent to free extent (F, 1) (refcountbt block) | 112 + * | Intent to remove rmap (F, 1, REFC) | 113 + * +-------------------------------------------------+ 114 + * | Remove rmap (X, C, A, B) | t3 115 + * | Done removing rmap (X, C, A, B) | 116 + * | Add rmap (X, E, A, B) | 117 + * | Done adding rmap (X, E, A, B) | 118 + * | Remove rmap (F, 1, REFC) | 119 + * | Done removing rmap (F, 1, REFC) | 120 + * +-------------------------------------------------+ 121 + * | Free extent (C, B) | t4 122 + * | Done freeing extent (C, B) | 123 + * | Free extent (D, 1) | 124 + * | Done freeing extent (D, 1) | 125 + * | Free extent (F, 1) | 126 + * | Done freeing extent (F, 1) | 127 + * +-------------------------------------------------+ 128 + * 129 + * If we should crash before t2 commits, log recovery replays 130 + * the following intent items: 131 + * 132 + * - Intent to reduce refcount for extent (C, B) 133 + * - Intent to remove rmap (X, C, A, B) 134 + * - Intent to free extent (D, 1) (bmbt block) 135 + * - Intent to increase refcount for extent (E, B) 136 + * - Intent to add rmap (X, E, A, B) 137 + * 138 + * In the process of recovering, it should also generate and take care 139 + * of these intent items: 140 + * 141 + * - Intent to free extent (C, B) 142 + * - Intent to free extent (F, 1) (refcountbt block) 143 + * - Intent to remove rmap (F, 1, REFC) 144 + */ 145 + 146 + static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; 147 + 148 + /* 149 + * For each pending item in the intake list, log its intent item and the 150 + * associated extents, then add the entire intake list to the end of 151 + * the pending list. 152 + */ 153 + STATIC void 154 + xfs_defer_intake_work( 155 + struct xfs_trans *tp, 156 + struct xfs_defer_ops *dop) 157 + { 158 + struct list_head *li; 159 + struct xfs_defer_pending *dfp; 160 + 161 + list_for_each_entry(dfp, &dop->dop_intake, dfp_list) { 162 + trace_xfs_defer_intake_work(tp->t_mountp, dfp); 163 + dfp->dfp_intent = dfp->dfp_type->create_intent(tp, 164 + dfp->dfp_count); 165 + list_sort(tp->t_mountp, &dfp->dfp_work, 166 + dfp->dfp_type->diff_items); 167 + list_for_each(li, &dfp->dfp_work) 168 + dfp->dfp_type->log_item(tp, dfp->dfp_intent, li); 169 + } 170 + 171 + list_splice_tail_init(&dop->dop_intake, &dop->dop_pending); 172 + } 173 + 174 + /* Abort all the intents that were committed. */ 175 + STATIC void 176 + xfs_defer_trans_abort( 177 + struct xfs_trans *tp, 178 + struct xfs_defer_ops *dop, 179 + int error) 180 + { 181 + struct xfs_defer_pending *dfp; 182 + 183 + trace_xfs_defer_trans_abort(tp->t_mountp, dop); 184 + /* 185 + * If the transaction was committed, drop the intent reference 186 + * since we're bailing out of here. The other reference is 187 + * dropped when the intent hits the AIL. If the transaction 188 + * was not committed, the intent is freed by the intent item 189 + * unlock handler on abort. 190 + */ 191 + if (!dop->dop_committed) 192 + return; 193 + 194 + /* Abort intent items. */ 195 + list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { 196 + trace_xfs_defer_pending_abort(tp->t_mountp, dfp); 197 + if (dfp->dfp_committed) 198 + dfp->dfp_type->abort_intent(dfp->dfp_intent); 199 + } 200 + 201 + /* Shut down FS. */ 202 + xfs_force_shutdown(tp->t_mountp, (error == -EFSCORRUPTED) ? 203 + SHUTDOWN_CORRUPT_INCORE : SHUTDOWN_META_IO_ERROR); 204 + } 205 + 206 + /* Roll a transaction so we can do some deferred op processing. */ 207 + STATIC int 208 + xfs_defer_trans_roll( 209 + struct xfs_trans **tp, 210 + struct xfs_defer_ops *dop, 211 + struct xfs_inode *ip) 212 + { 213 + int i; 214 + int error; 215 + 216 + /* Log all the joined inodes except the one we passed in. */ 217 + for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) { 218 + if (dop->dop_inodes[i] == ip) 219 + continue; 220 + xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 221 + } 222 + 223 + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 224 + 225 + /* Roll the transaction. */ 226 + error = xfs_trans_roll(tp, ip); 227 + if (error) { 228 + trace_xfs_defer_trans_roll_error((*tp)->t_mountp, dop, error); 229 + xfs_defer_trans_abort(*tp, dop, error); 230 + return error; 231 + } 232 + dop->dop_committed = true; 233 + 234 + /* Rejoin the joined inodes except the one we passed in. */ 235 + for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) { 236 + if (dop->dop_inodes[i] == ip) 237 + continue; 238 + xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 239 + } 240 + 241 + return error; 242 + } 243 + 244 + /* Do we have any work items to finish? */ 245 + bool 246 + xfs_defer_has_unfinished_work( 247 + struct xfs_defer_ops *dop) 248 + { 249 + return !list_empty(&dop->dop_pending) || !list_empty(&dop->dop_intake); 250 + } 251 + 252 + /* 253 + * Add this inode to the deferred op. Each joined inode is relogged 254 + * each time we roll the transaction, in addition to any inode passed 255 + * to xfs_defer_finish(). 256 + */ 257 + int 258 + xfs_defer_join( 259 + struct xfs_defer_ops *dop, 260 + struct xfs_inode *ip) 261 + { 262 + int i; 263 + 264 + for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) { 265 + if (dop->dop_inodes[i] == ip) 266 + return 0; 267 + else if (dop->dop_inodes[i] == NULL) { 268 + dop->dop_inodes[i] = ip; 269 + return 0; 270 + } 271 + } 272 + 273 + return -EFSCORRUPTED; 274 + } 275 + 276 + /* 277 + * Finish all the pending work. This involves logging intent items for 278 + * any work items that wandered in since the last transaction roll (if 279 + * one has even happened), rolling the transaction, and finishing the 280 + * work items in the first item on the logged-and-pending list. 281 + * 282 + * If an inode is provided, relog it to the new transaction. 283 + */ 284 + int 285 + xfs_defer_finish( 286 + struct xfs_trans **tp, 287 + struct xfs_defer_ops *dop, 288 + struct xfs_inode *ip) 289 + { 290 + struct xfs_defer_pending *dfp; 291 + struct list_head *li; 292 + struct list_head *n; 293 + void *done_item = NULL; 294 + void *state; 295 + int error = 0; 296 + void (*cleanup_fn)(struct xfs_trans *, void *, int); 297 + 298 + ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 299 + 300 + trace_xfs_defer_finish((*tp)->t_mountp, dop); 301 + 302 + /* Until we run out of pending work to finish... */ 303 + while (xfs_defer_has_unfinished_work(dop)) { 304 + /* Log intents for work items sitting in the intake. */ 305 + xfs_defer_intake_work(*tp, dop); 306 + 307 + /* Roll the transaction. */ 308 + error = xfs_defer_trans_roll(tp, dop, ip); 309 + if (error) 310 + goto out; 311 + 312 + /* Mark all pending intents as committed. */ 313 + list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) { 314 + if (dfp->dfp_committed) 315 + break; 316 + trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp); 317 + dfp->dfp_committed = true; 318 + } 319 + 320 + /* Log an intent-done item for the first pending item. */ 321 + dfp = list_first_entry(&dop->dop_pending, 322 + struct xfs_defer_pending, dfp_list); 323 + trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); 324 + done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent, 325 + dfp->dfp_count); 326 + cleanup_fn = dfp->dfp_type->finish_cleanup; 327 + 328 + /* Finish the work items. */ 329 + state = NULL; 330 + list_for_each_safe(li, n, &dfp->dfp_work) { 331 + list_del(li); 332 + dfp->dfp_count--; 333 + error = dfp->dfp_type->finish_item(*tp, dop, li, 334 + done_item, &state); 335 + if (error) { 336 + /* 337 + * Clean up after ourselves and jump out. 338 + * xfs_defer_cancel will take care of freeing 339 + * all these lists and stuff. 340 + */ 341 + if (cleanup_fn) 342 + cleanup_fn(*tp, state, error); 343 + xfs_defer_trans_abort(*tp, dop, error); 344 + goto out; 345 + } 346 + } 347 + /* Done with the dfp, free it. */ 348 + list_del(&dfp->dfp_list); 349 + kmem_free(dfp); 350 + 351 + if (cleanup_fn) 352 + cleanup_fn(*tp, state, error); 353 + } 354 + 355 + out: 356 + if (error) 357 + trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error); 358 + else 359 + trace_xfs_defer_finish_done((*tp)->t_mountp, dop); 360 + return error; 361 + } 362 + 363 + /* 364 + * Free up any items left in the list. 365 + */ 366 + void 367 + xfs_defer_cancel( 368 + struct xfs_defer_ops *dop) 369 + { 370 + struct xfs_defer_pending *dfp; 371 + struct xfs_defer_pending *pli; 372 + struct list_head *pwi; 373 + struct list_head *n; 374 + 375 + trace_xfs_defer_cancel(NULL, dop); 376 + 377 + /* 378 + * Free the pending items. Caller should already have arranged 379 + * for the intent items to be released. 380 + */ 381 + list_for_each_entry_safe(dfp, pli, &dop->dop_intake, dfp_list) { 382 + trace_xfs_defer_intake_cancel(NULL, dfp); 383 + list_del(&dfp->dfp_list); 384 + list_for_each_safe(pwi, n, &dfp->dfp_work) { 385 + list_del(pwi); 386 + dfp->dfp_count--; 387 + dfp->dfp_type->cancel_item(pwi); 388 + } 389 + ASSERT(dfp->dfp_count == 0); 390 + kmem_free(dfp); 391 + } 392 + list_for_each_entry_safe(dfp, pli, &dop->dop_pending, dfp_list) { 393 + trace_xfs_defer_pending_cancel(NULL, dfp); 394 + list_del(&dfp->dfp_list); 395 + list_for_each_safe(pwi, n, &dfp->dfp_work) { 396 + list_del(pwi); 397 + dfp->dfp_count--; 398 + dfp->dfp_type->cancel_item(pwi); 399 + } 400 + ASSERT(dfp->dfp_count == 0); 401 + kmem_free(dfp); 402 + } 403 + } 404 + 405 + /* Add an item for later deferred processing. */ 406 + void 407 + xfs_defer_add( 408 + struct xfs_defer_ops *dop, 409 + enum xfs_defer_ops_type type, 410 + struct list_head *li) 411 + { 412 + struct xfs_defer_pending *dfp = NULL; 413 + 414 + /* 415 + * Add the item to a pending item at the end of the intake list. 416 + * If the last pending item has the same type, reuse it. Else, 417 + * create a new pending item at the end of the intake list. 418 + */ 419 + if (!list_empty(&dop->dop_intake)) { 420 + dfp = list_last_entry(&dop->dop_intake, 421 + struct xfs_defer_pending, dfp_list); 422 + if (dfp->dfp_type->type != type || 423 + (dfp->dfp_type->max_items && 424 + dfp->dfp_count >= dfp->dfp_type->max_items)) 425 + dfp = NULL; 426 + } 427 + if (!dfp) { 428 + dfp = kmem_alloc(sizeof(struct xfs_defer_pending), 429 + KM_SLEEP | KM_NOFS); 430 + dfp->dfp_type = defer_op_types[type]; 431 + dfp->dfp_committed = false; 432 + dfp->dfp_intent = NULL; 433 + dfp->dfp_count = 0; 434 + INIT_LIST_HEAD(&dfp->dfp_work); 435 + list_add_tail(&dfp->dfp_list, &dop->dop_intake); 436 + } 437 + 438 + list_add_tail(li, &dfp->dfp_work); 439 + dfp->dfp_count++; 440 + } 441 + 442 + /* Initialize a deferred operation list. */ 443 + void 444 + xfs_defer_init_op_type( 445 + const struct xfs_defer_op_type *type) 446 + { 447 + defer_op_types[type->type] = type; 448 + } 449 + 450 + /* Initialize a deferred operation. */ 451 + void 452 + xfs_defer_init( 453 + struct xfs_defer_ops *dop, 454 + xfs_fsblock_t *fbp) 455 + { 456 + dop->dop_committed = false; 457 + dop->dop_low = false; 458 + memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); 459 + *fbp = NULLFSBLOCK; 460 + INIT_LIST_HEAD(&dop->dop_intake); 461 + INIT_LIST_HEAD(&dop->dop_pending); 462 + trace_xfs_defer_init(NULL, dop); 463 + }

+97

fs/xfs/libxfs/xfs_defer.h

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_DEFER_H__ 21 + #define __XFS_DEFER_H__ 22 + 23 + struct xfs_defer_op_type; 24 + 25 + /* 26 + * Save a log intent item and a list of extents, so that we can replay 27 + * whatever action had to happen to the extent list and file the log done 28 + * item. 29 + */ 30 + struct xfs_defer_pending { 31 + const struct xfs_defer_op_type *dfp_type; /* function pointers */ 32 + struct list_head dfp_list; /* pending items */ 33 + bool dfp_committed; /* committed trans? */ 34 + void *dfp_intent; /* log intent item */ 35 + struct list_head dfp_work; /* work items */ 36 + unsigned int dfp_count; /* # extent items */ 37 + }; 38 + 39 + /* 40 + * Header for deferred operation list. 41 + * 42 + * dop_low is used by the allocator to activate the lowspace algorithm - 43 + * when free space is running low the extent allocator may choose to 44 + * allocate an extent from an AG without leaving sufficient space for 45 + * a btree split when inserting the new extent. In this case the allocator 46 + * will enable the lowspace algorithm which is supposed to allow further 47 + * allocations (such as btree splits and newroots) to allocate from 48 + * sequential AGs. In order to avoid locking AGs out of order the lowspace 49 + * algorithm will start searching for free space from AG 0. If the correct 50 + * transaction reservations have been made then this algorithm will eventually 51 + * find all the space it needs. 52 + */ 53 + enum xfs_defer_ops_type { 54 + XFS_DEFER_OPS_TYPE_RMAP, 55 + XFS_DEFER_OPS_TYPE_FREE, 56 + XFS_DEFER_OPS_TYPE_MAX, 57 + }; 58 + 59 + #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 60 + 61 + struct xfs_defer_ops { 62 + bool dop_committed; /* did any trans commit? */ 63 + bool dop_low; /* alloc in low mode */ 64 + struct list_head dop_intake; /* unlogged pending work */ 65 + struct list_head dop_pending; /* logged pending work */ 66 + 67 + /* relog these inodes with each roll */ 68 + struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 69 + }; 70 + 71 + void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, 72 + struct list_head *h); 73 + int xfs_defer_finish(struct xfs_trans **tp, struct xfs_defer_ops *dop, 74 + struct xfs_inode *ip); 75 + void xfs_defer_cancel(struct xfs_defer_ops *dop); 76 + void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 77 + bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 78 + int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip); 79 + 80 + /* Description of a deferred type. */ 81 + struct xfs_defer_op_type { 82 + enum xfs_defer_ops_type type; 83 + unsigned int max_items; 84 + void (*abort_intent)(void *); 85 + void *(*create_done)(struct xfs_trans *, void *, unsigned int); 86 + int (*finish_item)(struct xfs_trans *, struct xfs_defer_ops *, 87 + struct list_head *, void *, void **); 88 + void (*finish_cleanup)(struct xfs_trans *, void *, int); 89 + void (*cancel_item)(struct list_head *); 90 + int (*diff_items)(void *, struct list_head *, struct list_head *); 91 + void *(*create_intent)(struct xfs_trans *, uint); 92 + void (*log_item)(struct xfs_trans *, void *, struct list_head *); 93 + }; 94 + 95 + void xfs_defer_init_op_type(const struct xfs_defer_op_type *type); 96 + 97 + #endif /* __XFS_DEFER_H__ */

+8 -7

fs/xfs/libxfs/xfs_dir2.c

··· 21 21 #include "xfs_log_format.h" 22 22 #include "xfs_trans_resv.h" 23 23 #include "xfs_mount.h" 24 + #include "xfs_defer.h" 24 25 #include "xfs_da_format.h" 25 26 #include "xfs_da_btree.h" 26 27 #include "xfs_inode.h" ··· 260 259 struct xfs_name *name, 261 260 xfs_ino_t inum, /* new entry inode number */ 262 261 xfs_fsblock_t *first, /* bmap's firstblock */ 263 - xfs_bmap_free_t *flist, /* bmap's freeblock list */ 262 + struct xfs_defer_ops *dfops, /* bmap's freeblock list */ 264 263 xfs_extlen_t total) /* bmap's total block count */ 265 264 { 266 265 struct xfs_da_args *args; ··· 287 286 args->inumber = inum; 288 287 args->dp = dp; 289 288 args->firstblock = first; 290 - args->flist = flist; 289 + args->dfops = dfops; 291 290 args->total = total; 292 291 args->whichfork = XFS_DATA_FORK; 293 292 args->trans = tp; ··· 437 436 struct xfs_name *name, 438 437 xfs_ino_t ino, 439 438 xfs_fsblock_t *first, /* bmap's firstblock */ 440 - xfs_bmap_free_t *flist, /* bmap's freeblock list */ 439 + struct xfs_defer_ops *dfops, /* bmap's freeblock list */ 441 440 xfs_extlen_t total) /* bmap's total block count */ 442 441 { 443 442 struct xfs_da_args *args; ··· 459 458 args->inumber = ino; 460 459 args->dp = dp; 461 460 args->firstblock = first; 462 - args->flist = flist; 461 + args->dfops = dfops; 463 462 args->total = total; 464 463 args->whichfork = XFS_DATA_FORK; 465 464 args->trans = tp; ··· 499 498 struct xfs_name *name, /* name of entry to replace */ 500 499 xfs_ino_t inum, /* new inode number */ 501 500 xfs_fsblock_t *first, /* bmap's firstblock */ 502 - xfs_bmap_free_t *flist, /* bmap's freeblock list */ 501 + struct xfs_defer_ops *dfops, /* bmap's freeblock list */ 503 502 xfs_extlen_t total) /* bmap's total block count */ 504 503 { 505 504 struct xfs_da_args *args; ··· 524 523 args->inumber = inum; 525 524 args->dp = dp; 526 525 args->firstblock = first; 527 - args->flist = flist; 526 + args->dfops = dfops; 528 527 args->total = total; 529 528 args->whichfork = XFS_DATA_FORK; 530 529 args->trans = tp; ··· 681 680 682 681 /* Unmap the fsblock(s). */ 683 682 error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0, 684 - args->firstblock, args->flist, &done); 683 + args->firstblock, args->dfops, &done); 685 684 if (error) { 686 685 /* 687 686 * ENOSPC actually can happen if we're in a removename with no

+4 -4

fs/xfs/libxfs/xfs_dir2.h

··· 18 18 #ifndef __XFS_DIR2_H__ 19 19 #define __XFS_DIR2_H__ 20 20 21 - struct xfs_bmap_free; 21 + struct xfs_defer_ops; 22 22 struct xfs_da_args; 23 23 struct xfs_inode; 24 24 struct xfs_mount; ··· 129 129 extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, 130 130 struct xfs_name *name, xfs_ino_t inum, 131 131 xfs_fsblock_t *first, 132 - struct xfs_bmap_free *flist, xfs_extlen_t tot); 132 + struct xfs_defer_ops *dfops, xfs_extlen_t tot); 133 133 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 134 134 struct xfs_name *name, xfs_ino_t *inum, 135 135 struct xfs_name *ci_name); 136 136 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 137 137 struct xfs_name *name, xfs_ino_t ino, 138 138 xfs_fsblock_t *first, 139 - struct xfs_bmap_free *flist, xfs_extlen_t tot); 139 + struct xfs_defer_ops *dfops, xfs_extlen_t tot); 140 140 extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, 141 141 struct xfs_name *name, xfs_ino_t inum, 142 142 xfs_fsblock_t *first, 143 - struct xfs_bmap_free *flist, xfs_extlen_t tot); 143 + struct xfs_defer_ops *dfops, xfs_extlen_t tot); 144 144 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 145 145 struct xfs_name *name); 146 146

+119 -12

fs/xfs/libxfs/xfs_format.h

··· 455 455 } 456 456 457 457 #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ 458 + #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ 458 459 #define XFS_SB_FEAT_RO_COMPAT_ALL \ 459 - (XFS_SB_FEAT_RO_COMPAT_FINOBT) 460 + (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ 461 + XFS_SB_FEAT_RO_COMPAT_RMAPBT) 460 462 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL 461 463 static inline bool 462 464 xfs_sb_has_ro_compat_feature( ··· 540 538 (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); 541 539 } 542 540 541 + static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp) 542 + { 543 + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && 544 + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT); 545 + } 546 + 543 547 /* 544 548 * end of superblock version macros 545 549 */ ··· 606 598 #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) 607 599 608 600 /* 609 - * Btree number 0 is bno, 1 is cnt. This value gives the size of the 601 + * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the 610 602 * arrays below. 611 603 */ 612 - #define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) 604 + #define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1) 613 605 614 606 /* 615 607 * The second word of agf_levels in the first a.g. overlaps the EFS ··· 626 618 __be32 agf_seqno; /* sequence # starting from 0 */ 627 619 __be32 agf_length; /* size in blocks of a.g. */ 628 620 /* 629 - * Freespace information 621 + * Freespace and rmap information 630 622 */ 631 623 __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ 632 - __be32 agf_spare0; /* spare field */ 633 624 __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ 634 - __be32 agf_spare1; /* spare field */ 635 625 636 626 __be32 agf_flfirst; /* first freelist block's index */ 637 627 __be32 agf_fllast; /* last freelist block's index */ ··· 1314 1308 #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 1315 1309 1316 1310 /* 1317 - * The first data block of an AG depends on whether the filesystem was formatted 1318 - * with the finobt feature. If so, account for the finobt reserved root btree 1319 - * block. 1311 + * Reverse mapping btree format definitions 1312 + * 1313 + * There is a btree for the reverse map per allocation group 1320 1314 */ 1321 - #define XFS_PREALLOC_BLOCKS(mp) \ 1315 + #define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */ 1316 + 1317 + /* 1318 + * Ownership info for an extent. This is used to create reverse-mapping 1319 + * entries. 1320 + */ 1321 + #define XFS_OWNER_INFO_ATTR_FORK (1 << 0) 1322 + #define XFS_OWNER_INFO_BMBT_BLOCK (1 << 1) 1323 + struct xfs_owner_info { 1324 + uint64_t oi_owner; 1325 + xfs_fileoff_t oi_offset; 1326 + unsigned int oi_flags; 1327 + }; 1328 + 1329 + /* 1330 + * Special owner types. 1331 + * 1332 + * Seeing as we only support up to 8EB, we have the upper bit of the owner field 1333 + * to tell us we have a special owner value. We use these for static metadata 1334 + * allocated at mkfs/growfs time, as well as for freespace management metadata. 1335 + */ 1336 + #define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */ 1337 + #define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */ 1338 + #define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */ 1339 + #define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */ 1340 + #define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */ 1341 + #define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */ 1342 + #define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */ 1343 + #define XFS_RMAP_OWN_MIN (-8ULL) /* guard */ 1344 + 1345 + #define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63))) 1346 + 1347 + /* 1348 + * Data record structure 1349 + */ 1350 + struct xfs_rmap_rec { 1351 + __be32 rm_startblock; /* extent start block */ 1352 + __be32 rm_blockcount; /* extent length */ 1353 + __be64 rm_owner; /* extent owner */ 1354 + __be64 rm_offset; /* offset within the owner */ 1355 + }; 1356 + 1357 + /* 1358 + * rmap btree record 1359 + * rm_offset:63 is the attribute fork flag 1360 + * rm_offset:62 is the bmbt block flag 1361 + * rm_offset:61 is the unwritten extent flag (same as l0:63 in bmbt) 1362 + * rm_offset:54-60 aren't used and should be zero 1363 + * rm_offset:0-53 is the block offset within the inode 1364 + */ 1365 + #define XFS_RMAP_OFF_ATTR_FORK ((__uint64_t)1ULL << 63) 1366 + #define XFS_RMAP_OFF_BMBT_BLOCK ((__uint64_t)1ULL << 62) 1367 + #define XFS_RMAP_OFF_UNWRITTEN ((__uint64_t)1ULL << 61) 1368 + 1369 + #define XFS_RMAP_LEN_MAX ((__uint32_t)~0U) 1370 + #define XFS_RMAP_OFF_FLAGS (XFS_RMAP_OFF_ATTR_FORK | \ 1371 + XFS_RMAP_OFF_BMBT_BLOCK | \ 1372 + XFS_RMAP_OFF_UNWRITTEN) 1373 + #define XFS_RMAP_OFF_MASK ((__uint64_t)0x3FFFFFFFFFFFFFULL) 1374 + 1375 + #define XFS_RMAP_OFF(off) ((off) & XFS_RMAP_OFF_MASK) 1376 + 1377 + #define XFS_RMAP_IS_BMBT_BLOCK(off) (!!((off) & XFS_RMAP_OFF_BMBT_BLOCK)) 1378 + #define XFS_RMAP_IS_ATTR_FORK(off) (!!((off) & XFS_RMAP_OFF_ATTR_FORK)) 1379 + #define XFS_RMAP_IS_UNWRITTEN(len) (!!((off) & XFS_RMAP_OFF_UNWRITTEN)) 1380 + 1381 + #define RMAPBT_STARTBLOCK_BITLEN 32 1382 + #define RMAPBT_BLOCKCOUNT_BITLEN 32 1383 + #define RMAPBT_OWNER_BITLEN 64 1384 + #define RMAPBT_ATTRFLAG_BITLEN 1 1385 + #define RMAPBT_BMBTFLAG_BITLEN 1 1386 + #define RMAPBT_EXNTFLAG_BITLEN 1 1387 + #define RMAPBT_UNUSED_OFFSET_BITLEN 7 1388 + #define RMAPBT_OFFSET_BITLEN 54 1389 + 1390 + #define XFS_RMAP_ATTR_FORK (1 << 0) 1391 + #define XFS_RMAP_BMBT_BLOCK (1 << 1) 1392 + #define XFS_RMAP_UNWRITTEN (1 << 2) 1393 + #define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ 1394 + XFS_RMAP_BMBT_BLOCK) 1395 + #define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) 1396 + struct xfs_rmap_irec { 1397 + xfs_agblock_t rm_startblock; /* extent start block */ 1398 + xfs_extlen_t rm_blockcount; /* extent length */ 1399 + __uint64_t rm_owner; /* extent owner */ 1400 + __uint64_t rm_offset; /* offset within the owner */ 1401 + unsigned int rm_flags; /* state flags */ 1402 + }; 1403 + 1404 + /* 1405 + * Key structure 1406 + * 1407 + * We don't use the length for lookups 1408 + */ 1409 + struct xfs_rmap_key { 1410 + __be32 rm_startblock; /* extent start block */ 1411 + __be64 rm_owner; /* extent owner */ 1412 + __be64 rm_offset; /* offset within the owner */ 1413 + } __attribute__((packed)); 1414 + 1415 + /* btree pointer type */ 1416 + typedef __be32 xfs_rmap_ptr_t; 1417 + 1418 + #define XFS_RMAP_BLOCK(mp) \ 1322 1419 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ 1323 1420 XFS_FIBT_BLOCK(mp) + 1 : \ 1324 1421 XFS_IBT_BLOCK(mp) + 1) 1325 - 1326 - 1327 1422 1328 1423 /* 1329 1424 * BMAP Btree format definitions

+1

fs/xfs/libxfs/xfs_fs.h

··· 206 206 #define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 207 207 #define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ 208 208 #define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */ 209 + #define XFS_FSOP_GEOM_FLAGS_RMAPBT 0x80000 /* Reverse mapping btree */ 209 210 210 211 /* 211 212 * Minimum and maximum sizes need for growth checks.

+14 -9

fs/xfs/libxfs/xfs_ialloc.c

··· 24 24 #include "xfs_bit.h" 25 25 #include "xfs_sb.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_inode.h" 28 29 #include "xfs_btree.h" 29 30 #include "xfs_ialloc.h" ··· 40 39 #include "xfs_icache.h" 41 40 #include "xfs_trace.h" 42 41 #include "xfs_log.h" 42 + #include "xfs_rmap.h" 43 43 44 44 45 45 /* ··· 616 614 args.tp = tp; 617 615 args.mp = tp->t_mountp; 618 616 args.fsbno = NULLFSBLOCK; 617 + xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES); 619 618 620 619 #ifdef DEBUG 621 620 /* randomly do sparse inode allocations */ ··· 1820 1817 struct xfs_mount *mp, 1821 1818 xfs_agnumber_t agno, 1822 1819 struct xfs_inobt_rec_incore *rec, 1823 - struct xfs_bmap_free *flist) 1820 + struct xfs_defer_ops *dfops) 1824 1821 { 1825 1822 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino); 1826 1823 int startidx, endidx; 1827 1824 int nextbit; 1828 1825 xfs_agblock_t agbno; 1829 1826 int contigblk; 1827 + struct xfs_owner_info oinfo; 1830 1828 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); 1829 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 1831 1830 1832 1831 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1833 1832 /* not sparse, calculate extent info directly */ 1834 - xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, sagbno), 1835 - mp->m_ialloc_blks); 1833 + xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno), 1834 + mp->m_ialloc_blks, &oinfo); 1836 1835 return; 1837 1836 } 1838 1837 ··· 1877 1872 1878 1873 ASSERT(agbno % mp->m_sb.sb_spino_align == 0); 1879 1874 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); 1880 - xfs_bmap_add_free(mp, flist, XFS_AGB_TO_FSB(mp, agno, agbno), 1881 - contigblk); 1875 + xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno), 1876 + contigblk, &oinfo); 1882 1877 1883 1878 /* reset range to current bit and carry on... */ 1884 1879 startidx = endidx = nextbit; ··· 1894 1889 struct xfs_trans *tp, 1895 1890 struct xfs_buf *agbp, 1896 1891 xfs_agino_t agino, 1897 - struct xfs_bmap_free *flist, 1892 + struct xfs_defer_ops *dfops, 1898 1893 struct xfs_icluster *xic, 1899 1894 struct xfs_inobt_rec_incore *orec) 1900 1895 { ··· 1981 1976 goto error0; 1982 1977 } 1983 1978 1984 - xfs_difree_inode_chunk(mp, agno, &rec, flist); 1979 + xfs_difree_inode_chunk(mp, agno, &rec, dfops); 1985 1980 } else { 1986 1981 xic->deleted = 0; 1987 1982 ··· 2126 2121 xfs_difree( 2127 2122 struct xfs_trans *tp, /* transaction pointer */ 2128 2123 xfs_ino_t inode, /* inode to be freed */ 2129 - struct xfs_bmap_free *flist, /* extents to free */ 2124 + struct xfs_defer_ops *dfops, /* extents to free */ 2130 2125 struct xfs_icluster *xic) /* cluster info if deleted */ 2131 2126 { 2132 2127 /* REFERENCED */ ··· 2178 2173 /* 2179 2174 * Fix up the inode allocation btree. 2180 2175 */ 2181 - error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec); 2176 + error = xfs_difree_inobt(mp, tp, agbp, agino, dfops, xic, &rec); 2182 2177 if (error) 2183 2178 goto error0; 2184 2179

+1 -1

fs/xfs/libxfs/xfs_ialloc.h

··· 95 95 xfs_difree( 96 96 struct xfs_trans *tp, /* transaction pointer */ 97 97 xfs_ino_t inode, /* inode to be freed */ 98 - struct xfs_bmap_free *flist, /* extents to free */ 98 + struct xfs_defer_ops *dfops, /* extents to free */ 99 99 struct xfs_icluster *ifree); /* cluster info if deleted */ 100 100 101 101 /*

+7 -11

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 32 32 #include "xfs_trace.h" 33 33 #include "xfs_cksum.h" 34 34 #include "xfs_trans.h" 35 + #include "xfs_rmap.h" 35 36 36 37 37 38 STATIC int ··· 97 96 memset(&args, 0, sizeof(args)); 98 97 args.tp = cur->bc_tp; 99 98 args.mp = cur->bc_mp; 99 + xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT); 100 100 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); 101 101 args.minlen = 1; 102 102 args.maxlen = 1; ··· 127 125 struct xfs_btree_cur *cur, 128 126 struct xfs_buf *bp) 129 127 { 128 + struct xfs_owner_info oinfo; 129 + 130 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 130 131 return xfs_free_extent(cur->bc_tp, 131 - XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); 132 + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 133 + &oinfo); 132 134 } 133 135 134 136 STATIC int ··· 149 143 union xfs_btree_rec *rec) 150 144 { 151 145 key->inobt.ir_startino = rec->inobt.ir_startino; 152 - } 153 - 154 - STATIC void 155 - xfs_inobt_init_rec_from_key( 156 - union xfs_btree_key *key, 157 - union xfs_btree_rec *rec) 158 - { 159 - rec->inobt.ir_startino = key->inobt.ir_startino; 160 146 } 161 147 162 148 STATIC void ··· 312 314 .get_minrecs = xfs_inobt_get_minrecs, 313 315 .get_maxrecs = xfs_inobt_get_maxrecs, 314 316 .init_key_from_rec = xfs_inobt_init_key_from_rec, 315 - .init_rec_from_key = xfs_inobt_init_rec_from_key, 316 317 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 317 318 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 318 319 .key_diff = xfs_inobt_key_diff, ··· 333 336 .get_minrecs = xfs_inobt_get_minrecs, 334 337 .get_maxrecs = xfs_inobt_get_maxrecs, 335 338 .init_key_from_rec = xfs_inobt_init_key_from_rec, 336 - .init_rec_from_key = xfs_inobt_init_rec_from_key, 337 339 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 338 340 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 339 341 .key_diff = xfs_inobt_key_diff,

+1

fs/xfs/libxfs/xfs_inode_buf.c

··· 22 22 #include "xfs_log_format.h" 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_mount.h" 25 + #include "xfs_defer.h" 25 26 #include "xfs_inode.h" 26 27 #include "xfs_error.h" 27 28 #include "xfs_cksum.h"

+61 -2

fs/xfs/libxfs/xfs_log_format.h

··· 110 110 #define XLOG_REG_TYPE_COMMIT 18 111 111 #define XLOG_REG_TYPE_TRANSHDR 19 112 112 #define XLOG_REG_TYPE_ICREATE 20 113 - #define XLOG_REG_TYPE_MAX 20 113 + #define XLOG_REG_TYPE_RUI_FORMAT 21 114 + #define XLOG_REG_TYPE_RUD_FORMAT 22 115 + #define XLOG_REG_TYPE_MAX 22 114 116 115 117 /* 116 118 * Flags to log operation header ··· 229 227 #define XFS_LI_DQUOT 0x123d 230 228 #define XFS_LI_QUOTAOFF 0x123e 231 229 #define XFS_LI_ICREATE 0x123f 230 + #define XFS_LI_RUI 0x1240 /* rmap update intent */ 231 + #define XFS_LI_RUD 0x1241 232 232 233 233 #define XFS_LI_TYPE_DESC \ 234 234 { XFS_LI_EFI, "XFS_LI_EFI" }, \ ··· 240 236 { XFS_LI_BUF, "XFS_LI_BUF" }, \ 241 237 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \ 242 238 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }, \ 243 - { XFS_LI_ICREATE, "XFS_LI_ICREATE" } 239 + { XFS_LI_ICREATE, "XFS_LI_ICREATE" }, \ 240 + { XFS_LI_RUI, "XFS_LI_RUI" }, \ 241 + { XFS_LI_RUD, "XFS_LI_RUD" } 244 242 245 243 /* 246 244 * Inode Log Item Format definitions. ··· 608 602 __uint64_t efd_efi_id; /* id of corresponding efi */ 609 603 xfs_extent_64_t efd_extents[1]; /* array of extents freed */ 610 604 } xfs_efd_log_format_64_t; 605 + 606 + /* 607 + * RUI/RUD (reverse mapping) log format definitions 608 + */ 609 + struct xfs_map_extent { 610 + __uint64_t me_owner; 611 + __uint64_t me_startblock; 612 + __uint64_t me_startoff; 613 + __uint32_t me_len; 614 + __uint32_t me_flags; 615 + }; 616 + 617 + /* rmap me_flags: upper bits are flags, lower byte is type code */ 618 + #define XFS_RMAP_EXTENT_MAP 1 619 + #define XFS_RMAP_EXTENT_UNMAP 3 620 + #define XFS_RMAP_EXTENT_CONVERT 5 621 + #define XFS_RMAP_EXTENT_ALLOC 7 622 + #define XFS_RMAP_EXTENT_FREE 8 623 + #define XFS_RMAP_EXTENT_TYPE_MASK 0xFF 624 + 625 + #define XFS_RMAP_EXTENT_ATTR_FORK (1U << 31) 626 + #define XFS_RMAP_EXTENT_BMBT_BLOCK (1U << 30) 627 + #define XFS_RMAP_EXTENT_UNWRITTEN (1U << 29) 628 + 629 + #define XFS_RMAP_EXTENT_FLAGS (XFS_RMAP_EXTENT_TYPE_MASK | \ 630 + XFS_RMAP_EXTENT_ATTR_FORK | \ 631 + XFS_RMAP_EXTENT_BMBT_BLOCK | \ 632 + XFS_RMAP_EXTENT_UNWRITTEN) 633 + 634 + /* 635 + * This is the structure used to lay out an rui log item in the 636 + * log. The rui_extents field is a variable size array whose 637 + * size is given by rui_nextents. 638 + */ 639 + struct xfs_rui_log_format { 640 + __uint16_t rui_type; /* rui log item type */ 641 + __uint16_t rui_size; /* size of this item */ 642 + __uint32_t rui_nextents; /* # extents to free */ 643 + __uint64_t rui_id; /* rui identifier */ 644 + struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */ 645 + }; 646 + 647 + /* 648 + * This is the structure used to lay out an rud log item in the 649 + * log. The rud_extents array is a variable size array whose 650 + * size is given by rud_nextents; 651 + */ 652 + struct xfs_rud_log_format { 653 + __uint16_t rud_type; /* rud log item type */ 654 + __uint16_t rud_size; /* size of this item */ 655 + __uint32_t __pad; 656 + __uint64_t rud_rui_id; /* id of corresponding rui */ 657 + }; 611 658 612 659 /* 613 660 * Dquot Log format definitions.

+1399

fs/xfs/libxfs/xfs_rmap.c

··· 1 + /* 2 + * Copyright (c) 2014 Red Hat, Inc. 3 + * All Rights Reserved. 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it would be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write the Free Software Foundation, 16 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 + */ 18 + #include "xfs.h" 19 + #include "xfs_fs.h" 20 + #include "xfs_shared.h" 21 + #include "xfs_format.h" 22 + #include "xfs_log_format.h" 23 + #include "xfs_trans_resv.h" 24 + #include "xfs_bit.h" 25 + #include "xfs_sb.h" 26 + #include "xfs_mount.h" 27 + #include "xfs_defer.h" 28 + #include "xfs_da_format.h" 29 + #include "xfs_da_btree.h" 30 + #include "xfs_btree.h" 31 + #include "xfs_trans.h" 32 + #include "xfs_alloc.h" 33 + #include "xfs_rmap.h" 34 + #include "xfs_rmap_btree.h" 35 + #include "xfs_trans_space.h" 36 + #include "xfs_trace.h" 37 + #include "xfs_error.h" 38 + #include "xfs_extent_busy.h" 39 + #include "xfs_bmap.h" 40 + #include "xfs_inode.h" 41 + 42 + /* 43 + * Lookup the first record less than or equal to [bno, len, owner, offset] 44 + * in the btree given by cur. 45 + */ 46 + int 47 + xfs_rmap_lookup_le( 48 + struct xfs_btree_cur *cur, 49 + xfs_agblock_t bno, 50 + xfs_extlen_t len, 51 + uint64_t owner, 52 + uint64_t offset, 53 + unsigned int flags, 54 + int *stat) 55 + { 56 + cur->bc_rec.r.rm_startblock = bno; 57 + cur->bc_rec.r.rm_blockcount = len; 58 + cur->bc_rec.r.rm_owner = owner; 59 + cur->bc_rec.r.rm_offset = offset; 60 + cur->bc_rec.r.rm_flags = flags; 61 + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 62 + } 63 + 64 + /* 65 + * Lookup the record exactly matching [bno, len, owner, offset] 66 + * in the btree given by cur. 67 + */ 68 + int 69 + xfs_rmap_lookup_eq( 70 + struct xfs_btree_cur *cur, 71 + xfs_agblock_t bno, 72 + xfs_extlen_t len, 73 + uint64_t owner, 74 + uint64_t offset, 75 + unsigned int flags, 76 + int *stat) 77 + { 78 + cur->bc_rec.r.rm_startblock = bno; 79 + cur->bc_rec.r.rm_blockcount = len; 80 + cur->bc_rec.r.rm_owner = owner; 81 + cur->bc_rec.r.rm_offset = offset; 82 + cur->bc_rec.r.rm_flags = flags; 83 + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 84 + } 85 + 86 + /* 87 + * Update the record referred to by cur to the value given 88 + * by [bno, len, owner, offset]. 89 + * This either works (return 0) or gets an EFSCORRUPTED error. 90 + */ 91 + STATIC int 92 + xfs_rmap_update( 93 + struct xfs_btree_cur *cur, 94 + struct xfs_rmap_irec *irec) 95 + { 96 + union xfs_btree_rec rec; 97 + int error; 98 + 99 + trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno, 100 + irec->rm_startblock, irec->rm_blockcount, 101 + irec->rm_owner, irec->rm_offset, irec->rm_flags); 102 + 103 + rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock); 104 + rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount); 105 + rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner); 106 + rec.rmap.rm_offset = cpu_to_be64( 107 + xfs_rmap_irec_offset_pack(irec)); 108 + error = xfs_btree_update(cur, &rec); 109 + if (error) 110 + trace_xfs_rmap_update_error(cur->bc_mp, 111 + cur->bc_private.a.agno, error, _RET_IP_); 112 + return error; 113 + } 114 + 115 + int 116 + xfs_rmap_insert( 117 + struct xfs_btree_cur *rcur, 118 + xfs_agblock_t agbno, 119 + xfs_extlen_t len, 120 + uint64_t owner, 121 + uint64_t offset, 122 + unsigned int flags) 123 + { 124 + int i; 125 + int error; 126 + 127 + trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno, 128 + len, owner, offset, flags); 129 + 130 + error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); 131 + if (error) 132 + goto done; 133 + XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done); 134 + 135 + rcur->bc_rec.r.rm_startblock = agbno; 136 + rcur->bc_rec.r.rm_blockcount = len; 137 + rcur->bc_rec.r.rm_owner = owner; 138 + rcur->bc_rec.r.rm_offset = offset; 139 + rcur->bc_rec.r.rm_flags = flags; 140 + error = xfs_btree_insert(rcur, &i); 141 + if (error) 142 + goto done; 143 + XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done); 144 + done: 145 + if (error) 146 + trace_xfs_rmap_insert_error(rcur->bc_mp, 147 + rcur->bc_private.a.agno, error, _RET_IP_); 148 + return error; 149 + } 150 + 151 + static int 152 + xfs_rmap_btrec_to_irec( 153 + union xfs_btree_rec *rec, 154 + struct xfs_rmap_irec *irec) 155 + { 156 + irec->rm_flags = 0; 157 + irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock); 158 + irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount); 159 + irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner); 160 + return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset), 161 + irec); 162 + } 163 + 164 + /* 165 + * Get the data from the pointed-to record. 166 + */ 167 + int 168 + xfs_rmap_get_rec( 169 + struct xfs_btree_cur *cur, 170 + struct xfs_rmap_irec *irec, 171 + int *stat) 172 + { 173 + union xfs_btree_rec *rec; 174 + int error; 175 + 176 + error = xfs_btree_get_rec(cur, &rec, stat); 177 + if (error || !*stat) 178 + return error; 179 + 180 + return xfs_rmap_btrec_to_irec(rec, irec); 181 + } 182 + 183 + /* 184 + * Find the extent in the rmap btree and remove it. 185 + * 186 + * The record we find should always be an exact match for the extent that we're 187 + * looking for, since we insert them into the btree without modification. 188 + * 189 + * Special Case #1: when growing the filesystem, we "free" an extent when 190 + * growing the last AG. This extent is new space and so it is not tracked as 191 + * used space in the btree. The growfs code will pass in an owner of 192 + * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this 193 + * extent. We verify that - the extent lookup result in a record that does not 194 + * overlap. 195 + * 196 + * Special Case #2: EFIs do not record the owner of the extent, so when 197 + * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap 198 + * btree to ignore the owner (i.e. wildcard match) so we don't trigger 199 + * corruption checks during log recovery. 200 + */ 201 + STATIC int 202 + xfs_rmap_unmap( 203 + struct xfs_btree_cur *cur, 204 + xfs_agblock_t bno, 205 + xfs_extlen_t len, 206 + bool unwritten, 207 + struct xfs_owner_info *oinfo) 208 + { 209 + struct xfs_mount *mp = cur->bc_mp; 210 + struct xfs_rmap_irec ltrec; 211 + uint64_t ltoff; 212 + int error = 0; 213 + int i; 214 + uint64_t owner; 215 + uint64_t offset; 216 + unsigned int flags; 217 + bool ignore_off; 218 + 219 + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 220 + ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) || 221 + (flags & XFS_RMAP_BMBT_BLOCK); 222 + if (unwritten) 223 + flags |= XFS_RMAP_UNWRITTEN; 224 + trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, 225 + unwritten, oinfo); 226 + 227 + /* 228 + * We should always have a left record because there's a static record 229 + * for the AG headers at rm_startblock == 0 created by mkfs/growfs that 230 + * will not ever be removed from the tree. 231 + */ 232 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i); 233 + if (error) 234 + goto out_error; 235 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 236 + 237 + error = xfs_rmap_get_rec(cur, &ltrec, &i); 238 + if (error) 239 + goto out_error; 240 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 241 + trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 242 + cur->bc_private.a.agno, ltrec.rm_startblock, 243 + ltrec.rm_blockcount, ltrec.rm_owner, 244 + ltrec.rm_offset, ltrec.rm_flags); 245 + ltoff = ltrec.rm_offset; 246 + 247 + /* 248 + * For growfs, the incoming extent must be beyond the left record we 249 + * just found as it is new space and won't be used by anyone. This is 250 + * just a corruption check as we don't actually do anything with this 251 + * extent. Note that we need to use >= instead of > because it might 252 + * be the case that the "left" extent goes all the way to EOFS. 253 + */ 254 + if (owner == XFS_RMAP_OWN_NULL) { 255 + XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock + 256 + ltrec.rm_blockcount, out_error); 257 + goto out_done; 258 + } 259 + 260 + /* Make sure the unwritten flag matches. */ 261 + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 262 + (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 263 + 264 + /* Make sure the extent we found covers the entire freeing range. */ 265 + XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 266 + ltrec.rm_startblock + ltrec.rm_blockcount >= 267 + bno + len, out_error); 268 + 269 + /* Make sure the owner matches what we expect to find in the tree. */ 270 + XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 271 + XFS_RMAP_NON_INODE_OWNER(owner), out_error); 272 + 273 + /* Check the offset, if necessary. */ 274 + if (!XFS_RMAP_NON_INODE_OWNER(owner)) { 275 + if (flags & XFS_RMAP_BMBT_BLOCK) { 276 + XFS_WANT_CORRUPTED_GOTO(mp, 277 + ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, 278 + out_error); 279 + } else { 280 + XFS_WANT_CORRUPTED_GOTO(mp, 281 + ltrec.rm_offset <= offset, out_error); 282 + XFS_WANT_CORRUPTED_GOTO(mp, 283 + ltoff + ltrec.rm_blockcount >= offset + len, 284 + out_error); 285 + } 286 + } 287 + 288 + if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 289 + /* exact match, simply remove the record from rmap tree */ 290 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 291 + ltrec.rm_startblock, ltrec.rm_blockcount, 292 + ltrec.rm_owner, ltrec.rm_offset, 293 + ltrec.rm_flags); 294 + error = xfs_btree_delete(cur, &i); 295 + if (error) 296 + goto out_error; 297 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 298 + } else if (ltrec.rm_startblock == bno) { 299 + /* 300 + * overlap left hand side of extent: move the start, trim the 301 + * length and update the current record. 302 + * 303 + * ltbno ltlen 304 + * Orig: |oooooooooooooooooooo| 305 + * Freeing: |fffffffff| 306 + * Result: |rrrrrrrrrr| 307 + * bno len 308 + */ 309 + ltrec.rm_startblock += len; 310 + ltrec.rm_blockcount -= len; 311 + if (!ignore_off) 312 + ltrec.rm_offset += len; 313 + error = xfs_rmap_update(cur, &ltrec); 314 + if (error) 315 + goto out_error; 316 + } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) { 317 + /* 318 + * overlap right hand side of extent: trim the length and update 319 + * the current record. 320 + * 321 + * ltbno ltlen 322 + * Orig: |oooooooooooooooooooo| 323 + * Freeing: |fffffffff| 324 + * Result: |rrrrrrrrrr| 325 + * bno len 326 + */ 327 + ltrec.rm_blockcount -= len; 328 + error = xfs_rmap_update(cur, &ltrec); 329 + if (error) 330 + goto out_error; 331 + } else { 332 + 333 + /* 334 + * overlap middle of extent: trim the length of the existing 335 + * record to the length of the new left-extent size, increment 336 + * the insertion position so we can insert a new record 337 + * containing the remaining right-extent space. 338 + * 339 + * ltbno ltlen 340 + * Orig: |oooooooooooooooooooo| 341 + * Freeing: |fffffffff| 342 + * Result: |rrrrr| |rrrr| 343 + * bno len 344 + */ 345 + xfs_extlen_t orig_len = ltrec.rm_blockcount; 346 + 347 + ltrec.rm_blockcount = bno - ltrec.rm_startblock; 348 + error = xfs_rmap_update(cur, &ltrec); 349 + if (error) 350 + goto out_error; 351 + 352 + error = xfs_btree_increment(cur, 0, &i); 353 + if (error) 354 + goto out_error; 355 + 356 + cur->bc_rec.r.rm_startblock = bno + len; 357 + cur->bc_rec.r.rm_blockcount = orig_len - len - 358 + ltrec.rm_blockcount; 359 + cur->bc_rec.r.rm_owner = ltrec.rm_owner; 360 + if (ignore_off) 361 + cur->bc_rec.r.rm_offset = 0; 362 + else 363 + cur->bc_rec.r.rm_offset = offset + len; 364 + cur->bc_rec.r.rm_flags = flags; 365 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, 366 + cur->bc_rec.r.rm_startblock, 367 + cur->bc_rec.r.rm_blockcount, 368 + cur->bc_rec.r.rm_owner, 369 + cur->bc_rec.r.rm_offset, 370 + cur->bc_rec.r.rm_flags); 371 + error = xfs_btree_insert(cur, &i); 372 + if (error) 373 + goto out_error; 374 + } 375 + 376 + out_done: 377 + trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, 378 + unwritten, oinfo); 379 + out_error: 380 + if (error) 381 + trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno, 382 + error, _RET_IP_); 383 + return error; 384 + } 385 + 386 + /* 387 + * Remove a reference to an extent in the rmap btree. 388 + */ 389 + int 390 + xfs_rmap_free( 391 + struct xfs_trans *tp, 392 + struct xfs_buf *agbp, 393 + xfs_agnumber_t agno, 394 + xfs_agblock_t bno, 395 + xfs_extlen_t len, 396 + struct xfs_owner_info *oinfo) 397 + { 398 + struct xfs_mount *mp = tp->t_mountp; 399 + struct xfs_btree_cur *cur; 400 + int error; 401 + 402 + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 403 + return 0; 404 + 405 + cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); 406 + 407 + error = xfs_rmap_unmap(cur, bno, len, false, oinfo); 408 + if (error) 409 + goto out_error; 410 + 411 + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 412 + return 0; 413 + 414 + out_error: 415 + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 416 + return error; 417 + } 418 + 419 + /* 420 + * A mergeable rmap must have the same owner and the same values for 421 + * the unwritten, attr_fork, and bmbt flags. The startblock and 422 + * offset are checked separately. 423 + */ 424 + static bool 425 + xfs_rmap_is_mergeable( 426 + struct xfs_rmap_irec *irec, 427 + uint64_t owner, 428 + unsigned int flags) 429 + { 430 + if (irec->rm_owner == XFS_RMAP_OWN_NULL) 431 + return false; 432 + if (irec->rm_owner != owner) 433 + return false; 434 + if ((flags & XFS_RMAP_UNWRITTEN) ^ 435 + (irec->rm_flags & XFS_RMAP_UNWRITTEN)) 436 + return false; 437 + if ((flags & XFS_RMAP_ATTR_FORK) ^ 438 + (irec->rm_flags & XFS_RMAP_ATTR_FORK)) 439 + return false; 440 + if ((flags & XFS_RMAP_BMBT_BLOCK) ^ 441 + (irec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 442 + return false; 443 + return true; 444 + } 445 + 446 + /* 447 + * When we allocate a new block, the first thing we do is add a reference to 448 + * the extent in the rmap btree. This takes the form of a [agbno, length, 449 + * owner, offset] record. Flags are encoded in the high bits of the offset 450 + * field. 451 + */ 452 + STATIC int 453 + xfs_rmap_map( 454 + struct xfs_btree_cur *cur, 455 + xfs_agblock_t bno, 456 + xfs_extlen_t len, 457 + bool unwritten, 458 + struct xfs_owner_info *oinfo) 459 + { 460 + struct xfs_mount *mp = cur->bc_mp; 461 + struct xfs_rmap_irec ltrec; 462 + struct xfs_rmap_irec gtrec; 463 + int have_gt; 464 + int have_lt; 465 + int error = 0; 466 + int i; 467 + uint64_t owner; 468 + uint64_t offset; 469 + unsigned int flags = 0; 470 + bool ignore_off; 471 + 472 + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 473 + ASSERT(owner != 0); 474 + ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) || 475 + (flags & XFS_RMAP_BMBT_BLOCK); 476 + if (unwritten) 477 + flags |= XFS_RMAP_UNWRITTEN; 478 + trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 479 + unwritten, oinfo); 480 + 481 + /* 482 + * For the initial lookup, look for an exact match or the left-adjacent 483 + * record for our insertion point. This will also give us the record for 484 + * start block contiguity tests. 485 + */ 486 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, 487 + &have_lt); 488 + if (error) 489 + goto out_error; 490 + XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); 491 + 492 + error = xfs_rmap_get_rec(cur, &ltrec, &have_lt); 493 + if (error) 494 + goto out_error; 495 + XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error); 496 + trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 497 + cur->bc_private.a.agno, ltrec.rm_startblock, 498 + ltrec.rm_blockcount, ltrec.rm_owner, 499 + ltrec.rm_offset, ltrec.rm_flags); 500 + 501 + if (!xfs_rmap_is_mergeable(&ltrec, owner, flags)) 502 + have_lt = 0; 503 + 504 + XFS_WANT_CORRUPTED_GOTO(mp, 505 + have_lt == 0 || 506 + ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error); 507 + 508 + /* 509 + * Increment the cursor to see if we have a right-adjacent record to our 510 + * insertion point. This will give us the record for end block 511 + * contiguity tests. 512 + */ 513 + error = xfs_btree_increment(cur, 0, &have_gt); 514 + if (error) 515 + goto out_error; 516 + if (have_gt) { 517 + error = xfs_rmap_get_rec(cur, &gtrec, &have_gt); 518 + if (error) 519 + goto out_error; 520 + XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error); 521 + XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock, 522 + out_error); 523 + trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 524 + cur->bc_private.a.agno, gtrec.rm_startblock, 525 + gtrec.rm_blockcount, gtrec.rm_owner, 526 + gtrec.rm_offset, gtrec.rm_flags); 527 + if (!xfs_rmap_is_mergeable(&gtrec, owner, flags)) 528 + have_gt = 0; 529 + } 530 + 531 + /* 532 + * Note: cursor currently points one record to the right of ltrec, even 533 + * if there is no record in the tree to the right. 534 + */ 535 + if (have_lt && 536 + ltrec.rm_startblock + ltrec.rm_blockcount == bno && 537 + (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) { 538 + /* 539 + * left edge contiguous, merge into left record. 540 + * 541 + * ltbno ltlen 542 + * orig: |ooooooooo| 543 + * adding: |aaaaaaaaa| 544 + * result: |rrrrrrrrrrrrrrrrrrr| 545 + * bno len 546 + */ 547 + ltrec.rm_blockcount += len; 548 + if (have_gt && 549 + bno + len == gtrec.rm_startblock && 550 + (ignore_off || offset + len == gtrec.rm_offset) && 551 + (unsigned long)ltrec.rm_blockcount + len + 552 + gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) { 553 + /* 554 + * right edge also contiguous, delete right record 555 + * and merge into left record. 556 + * 557 + * ltbno ltlen gtbno gtlen 558 + * orig: |ooooooooo| |ooooooooo| 559 + * adding: |aaaaaaaaa| 560 + * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| 561 + */ 562 + ltrec.rm_blockcount += gtrec.rm_blockcount; 563 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 564 + gtrec.rm_startblock, 565 + gtrec.rm_blockcount, 566 + gtrec.rm_owner, 567 + gtrec.rm_offset, 568 + gtrec.rm_flags); 569 + error = xfs_btree_delete(cur, &i); 570 + if (error) 571 + goto out_error; 572 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 573 + } 574 + 575 + /* point the cursor back to the left record and update */ 576 + error = xfs_btree_decrement(cur, 0, &have_gt); 577 + if (error) 578 + goto out_error; 579 + error = xfs_rmap_update(cur, &ltrec); 580 + if (error) 581 + goto out_error; 582 + } else if (have_gt && 583 + bno + len == gtrec.rm_startblock && 584 + (ignore_off || offset + len == gtrec.rm_offset)) { 585 + /* 586 + * right edge contiguous, merge into right record. 587 + * 588 + * gtbno gtlen 589 + * Orig: |ooooooooo| 590 + * adding: |aaaaaaaaa| 591 + * Result: |rrrrrrrrrrrrrrrrrrr| 592 + * bno len 593 + */ 594 + gtrec.rm_startblock = bno; 595 + gtrec.rm_blockcount += len; 596 + if (!ignore_off) 597 + gtrec.rm_offset = offset; 598 + error = xfs_rmap_update(cur, &gtrec); 599 + if (error) 600 + goto out_error; 601 + } else { 602 + /* 603 + * no contiguous edge with identical owner, insert 604 + * new record at current cursor position. 605 + */ 606 + cur->bc_rec.r.rm_startblock = bno; 607 + cur->bc_rec.r.rm_blockcount = len; 608 + cur->bc_rec.r.rm_owner = owner; 609 + cur->bc_rec.r.rm_offset = offset; 610 + cur->bc_rec.r.rm_flags = flags; 611 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, 612 + owner, offset, flags); 613 + error = xfs_btree_insert(cur, &i); 614 + if (error) 615 + goto out_error; 616 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); 617 + } 618 + 619 + trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, 620 + unwritten, oinfo); 621 + out_error: 622 + if (error) 623 + trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno, 624 + error, _RET_IP_); 625 + return error; 626 + } 627 + 628 + /* 629 + * Add a reference to an extent in the rmap btree. 630 + */ 631 + int 632 + xfs_rmap_alloc( 633 + struct xfs_trans *tp, 634 + struct xfs_buf *agbp, 635 + xfs_agnumber_t agno, 636 + xfs_agblock_t bno, 637 + xfs_extlen_t len, 638 + struct xfs_owner_info *oinfo) 639 + { 640 + struct xfs_mount *mp = tp->t_mountp; 641 + struct xfs_btree_cur *cur; 642 + int error; 643 + 644 + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 645 + return 0; 646 + 647 + cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); 648 + error = xfs_rmap_map(cur, bno, len, false, oinfo); 649 + if (error) 650 + goto out_error; 651 + 652 + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 653 + return 0; 654 + 655 + out_error: 656 + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 657 + return error; 658 + } 659 + 660 + #define RMAP_LEFT_CONTIG (1 << 0) 661 + #define RMAP_RIGHT_CONTIG (1 << 1) 662 + #define RMAP_LEFT_FILLING (1 << 2) 663 + #define RMAP_RIGHT_FILLING (1 << 3) 664 + #define RMAP_LEFT_VALID (1 << 6) 665 + #define RMAP_RIGHT_VALID (1 << 7) 666 + 667 + #define LEFT r[0] 668 + #define RIGHT r[1] 669 + #define PREV r[2] 670 + #define NEW r[3] 671 + 672 + /* 673 + * Convert an unwritten extent to a real extent or vice versa. 674 + * Does not handle overlapping extents. 675 + */ 676 + STATIC int 677 + xfs_rmap_convert( 678 + struct xfs_btree_cur *cur, 679 + xfs_agblock_t bno, 680 + xfs_extlen_t len, 681 + bool unwritten, 682 + struct xfs_owner_info *oinfo) 683 + { 684 + struct xfs_mount *mp = cur->bc_mp; 685 + struct xfs_rmap_irec r[4]; /* neighbor extent entries */ 686 + /* left is 0, right is 1, prev is 2 */ 687 + /* new is 3 */ 688 + uint64_t owner; 689 + uint64_t offset; 690 + uint64_t new_endoff; 691 + unsigned int oldext; 692 + unsigned int newext; 693 + unsigned int flags = 0; 694 + int i; 695 + int state = 0; 696 + int error; 697 + 698 + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 699 + ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) || 700 + (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); 701 + oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; 702 + new_endoff = offset + len; 703 + trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, 704 + unwritten, oinfo); 705 + 706 + /* 707 + * For the initial lookup, look for an exact match or the left-adjacent 708 + * record for our insertion point. This will also give us the record for 709 + * start block contiguity tests. 710 + */ 711 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); 712 + if (error) 713 + goto done; 714 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 715 + 716 + error = xfs_rmap_get_rec(cur, &PREV, &i); 717 + if (error) 718 + goto done; 719 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 720 + trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 721 + cur->bc_private.a.agno, PREV.rm_startblock, 722 + PREV.rm_blockcount, PREV.rm_owner, 723 + PREV.rm_offset, PREV.rm_flags); 724 + 725 + ASSERT(PREV.rm_offset <= offset); 726 + ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); 727 + ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext); 728 + newext = ~oldext & XFS_RMAP_UNWRITTEN; 729 + 730 + /* 731 + * Set flags determining what part of the previous oldext allocation 732 + * extent is being replaced by a newext allocation. 733 + */ 734 + if (PREV.rm_offset == offset) 735 + state |= RMAP_LEFT_FILLING; 736 + if (PREV.rm_offset + PREV.rm_blockcount == new_endoff) 737 + state |= RMAP_RIGHT_FILLING; 738 + 739 + /* 740 + * Decrement the cursor to see if we have a left-adjacent record to our 741 + * insertion point. This will give us the record for end block 742 + * contiguity tests. 743 + */ 744 + error = xfs_btree_decrement(cur, 0, &i); 745 + if (error) 746 + goto done; 747 + if (i) { 748 + state |= RMAP_LEFT_VALID; 749 + error = xfs_rmap_get_rec(cur, &LEFT, &i); 750 + if (error) 751 + goto done; 752 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 753 + XFS_WANT_CORRUPTED_GOTO(mp, 754 + LEFT.rm_startblock + LEFT.rm_blockcount <= bno, 755 + done); 756 + trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, 757 + cur->bc_private.a.agno, LEFT.rm_startblock, 758 + LEFT.rm_blockcount, LEFT.rm_owner, 759 + LEFT.rm_offset, LEFT.rm_flags); 760 + if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && 761 + LEFT.rm_offset + LEFT.rm_blockcount == offset && 762 + xfs_rmap_is_mergeable(&LEFT, owner, newext)) 763 + state |= RMAP_LEFT_CONTIG; 764 + } 765 + 766 + /* 767 + * Increment the cursor to see if we have a right-adjacent record to our 768 + * insertion point. This will give us the record for end block 769 + * contiguity tests. 770 + */ 771 + error = xfs_btree_increment(cur, 0, &i); 772 + if (error) 773 + goto done; 774 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 775 + error = xfs_btree_increment(cur, 0, &i); 776 + if (error) 777 + goto done; 778 + if (i) { 779 + state |= RMAP_RIGHT_VALID; 780 + error = xfs_rmap_get_rec(cur, &RIGHT, &i); 781 + if (error) 782 + goto done; 783 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 784 + XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock, 785 + done); 786 + trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 787 + cur->bc_private.a.agno, RIGHT.rm_startblock, 788 + RIGHT.rm_blockcount, RIGHT.rm_owner, 789 + RIGHT.rm_offset, RIGHT.rm_flags); 790 + if (bno + len == RIGHT.rm_startblock && 791 + offset + len == RIGHT.rm_offset && 792 + xfs_rmap_is_mergeable(&RIGHT, owner, newext)) 793 + state |= RMAP_RIGHT_CONTIG; 794 + } 795 + 796 + /* check that left + prev + right is not too long */ 797 + if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | 798 + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) == 799 + (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | 800 + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) && 801 + (unsigned long)LEFT.rm_blockcount + len + 802 + RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) 803 + state &= ~RMAP_RIGHT_CONTIG; 804 + 805 + trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, 806 + _RET_IP_); 807 + 808 + /* reset the cursor back to PREV */ 809 + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i); 810 + if (error) 811 + goto done; 812 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 813 + 814 + /* 815 + * Switch out based on the FILLING and CONTIG state bits. 816 + */ 817 + switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | 818 + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) { 819 + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | 820 + RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: 821 + /* 822 + * Setting all of a previous oldext extent to newext. 823 + * The left and right neighbors are both contiguous with new. 824 + */ 825 + error = xfs_btree_increment(cur, 0, &i); 826 + if (error) 827 + goto done; 828 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 829 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 830 + RIGHT.rm_startblock, RIGHT.rm_blockcount, 831 + RIGHT.rm_owner, RIGHT.rm_offset, 832 + RIGHT.rm_flags); 833 + error = xfs_btree_delete(cur, &i); 834 + if (error) 835 + goto done; 836 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 837 + error = xfs_btree_decrement(cur, 0, &i); 838 + if (error) 839 + goto done; 840 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 841 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 842 + PREV.rm_startblock, PREV.rm_blockcount, 843 + PREV.rm_owner, PREV.rm_offset, 844 + PREV.rm_flags); 845 + error = xfs_btree_delete(cur, &i); 846 + if (error) 847 + goto done; 848 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 849 + error = xfs_btree_decrement(cur, 0, &i); 850 + if (error) 851 + goto done; 852 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 853 + NEW = LEFT; 854 + NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount; 855 + error = xfs_rmap_update(cur, &NEW); 856 + if (error) 857 + goto done; 858 + break; 859 + 860 + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: 861 + /* 862 + * Setting all of a previous oldext extent to newext. 863 + * The left neighbor is contiguous, the right is not. 864 + */ 865 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 866 + PREV.rm_startblock, PREV.rm_blockcount, 867 + PREV.rm_owner, PREV.rm_offset, 868 + PREV.rm_flags); 869 + error = xfs_btree_delete(cur, &i); 870 + if (error) 871 + goto done; 872 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 873 + error = xfs_btree_decrement(cur, 0, &i); 874 + if (error) 875 + goto done; 876 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 877 + NEW = LEFT; 878 + NEW.rm_blockcount += PREV.rm_blockcount; 879 + error = xfs_rmap_update(cur, &NEW); 880 + if (error) 881 + goto done; 882 + break; 883 + 884 + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: 885 + /* 886 + * Setting all of a previous oldext extent to newext. 887 + * The right neighbor is contiguous, the left is not. 888 + */ 889 + error = xfs_btree_increment(cur, 0, &i); 890 + if (error) 891 + goto done; 892 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 893 + trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, 894 + RIGHT.rm_startblock, RIGHT.rm_blockcount, 895 + RIGHT.rm_owner, RIGHT.rm_offset, 896 + RIGHT.rm_flags); 897 + error = xfs_btree_delete(cur, &i); 898 + if (error) 899 + goto done; 900 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 901 + error = xfs_btree_decrement(cur, 0, &i); 902 + if (error) 903 + goto done; 904 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 905 + NEW = PREV; 906 + NEW.rm_blockcount = len + RIGHT.rm_blockcount; 907 + NEW.rm_flags = newext; 908 + error = xfs_rmap_update(cur, &NEW); 909 + if (error) 910 + goto done; 911 + break; 912 + 913 + case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING: 914 + /* 915 + * Setting all of a previous oldext extent to newext. 916 + * Neither the left nor right neighbors are contiguous with 917 + * the new one. 918 + */ 919 + NEW = PREV; 920 + NEW.rm_flags = newext; 921 + error = xfs_rmap_update(cur, &NEW); 922 + if (error) 923 + goto done; 924 + break; 925 + 926 + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG: 927 + /* 928 + * Setting the first part of a previous oldext extent to newext. 929 + * The left neighbor is contiguous. 930 + */ 931 + NEW = PREV; 932 + NEW.rm_offset += len; 933 + NEW.rm_startblock += len; 934 + NEW.rm_blockcount -= len; 935 + error = xfs_rmap_update(cur, &NEW); 936 + if (error) 937 + goto done; 938 + error = xfs_btree_decrement(cur, 0, &i); 939 + if (error) 940 + goto done; 941 + NEW = LEFT; 942 + NEW.rm_blockcount += len; 943 + error = xfs_rmap_update(cur, &NEW); 944 + if (error) 945 + goto done; 946 + break; 947 + 948 + case RMAP_LEFT_FILLING: 949 + /* 950 + * Setting the first part of a previous oldext extent to newext. 951 + * The left neighbor is not contiguous. 952 + */ 953 + NEW = PREV; 954 + NEW.rm_startblock += len; 955 + NEW.rm_offset += len; 956 + NEW.rm_blockcount -= len; 957 + error = xfs_rmap_update(cur, &NEW); 958 + if (error) 959 + goto done; 960 + NEW.rm_startblock = bno; 961 + NEW.rm_owner = owner; 962 + NEW.rm_offset = offset; 963 + NEW.rm_blockcount = len; 964 + NEW.rm_flags = newext; 965 + cur->bc_rec.r = NEW; 966 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, 967 + len, owner, offset, newext); 968 + error = xfs_btree_insert(cur, &i); 969 + if (error) 970 + goto done; 971 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 972 + break; 973 + 974 + case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG: 975 + /* 976 + * Setting the last part of a previous oldext extent to newext. 977 + * The right neighbor is contiguous with the new allocation. 978 + */ 979 + NEW = PREV; 980 + NEW.rm_blockcount -= len; 981 + error = xfs_rmap_update(cur, &NEW); 982 + if (error) 983 + goto done; 984 + error = xfs_btree_increment(cur, 0, &i); 985 + if (error) 986 + goto done; 987 + NEW = RIGHT; 988 + NEW.rm_offset = offset; 989 + NEW.rm_startblock = bno; 990 + NEW.rm_blockcount += len; 991 + error = xfs_rmap_update(cur, &NEW); 992 + if (error) 993 + goto done; 994 + break; 995 + 996 + case RMAP_RIGHT_FILLING: 997 + /* 998 + * Setting the last part of a previous oldext extent to newext. 999 + * The right neighbor is not contiguous. 1000 + */ 1001 + NEW = PREV; 1002 + NEW.rm_blockcount -= len; 1003 + error = xfs_rmap_update(cur, &NEW); 1004 + if (error) 1005 + goto done; 1006 + error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset, 1007 + oldext, &i); 1008 + if (error) 1009 + goto done; 1010 + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1011 + NEW.rm_startblock = bno; 1012 + NEW.rm_owner = owner; 1013 + NEW.rm_offset = offset; 1014 + NEW.rm_blockcount = len; 1015 + NEW.rm_flags = newext; 1016 + cur->bc_rec.r = NEW; 1017 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, 1018 + len, owner, offset, newext); 1019 + error = xfs_btree_insert(cur, &i); 1020 + if (error) 1021 + goto done; 1022 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1023 + break; 1024 + 1025 + case 0: 1026 + /* 1027 + * Setting the middle part of a previous oldext extent to 1028 + * newext. Contiguity is impossible here. 1029 + * One extent becomes three extents. 1030 + */ 1031 + /* new right extent - oldext */ 1032 + NEW.rm_startblock = bno + len; 1033 + NEW.rm_owner = owner; 1034 + NEW.rm_offset = new_endoff; 1035 + NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount - 1036 + new_endoff; 1037 + NEW.rm_flags = PREV.rm_flags; 1038 + error = xfs_rmap_update(cur, &NEW); 1039 + if (error) 1040 + goto done; 1041 + /* new left extent - oldext */ 1042 + NEW = PREV; 1043 + NEW.rm_blockcount = offset - PREV.rm_offset; 1044 + cur->bc_rec.r = NEW; 1045 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, 1046 + NEW.rm_startblock, NEW.rm_blockcount, 1047 + NEW.rm_owner, NEW.rm_offset, 1048 + NEW.rm_flags); 1049 + error = xfs_btree_insert(cur, &i); 1050 + if (error) 1051 + goto done; 1052 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1053 + /* 1054 + * Reset the cursor to the position of the new extent 1055 + * we are about to insert as we can't trust it after 1056 + * the previous insert. 1057 + */ 1058 + error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset, 1059 + oldext, &i); 1060 + if (error) 1061 + goto done; 1062 + XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1063 + /* new middle extent - newext */ 1064 + cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; 1065 + cur->bc_rec.r.rm_flags |= newext; 1066 + trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, 1067 + owner, offset, newext); 1068 + error = xfs_btree_insert(cur, &i); 1069 + if (error) 1070 + goto done; 1071 + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1072 + break; 1073 + 1074 + case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: 1075 + case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: 1076 + case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG: 1077 + case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG: 1078 + case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG: 1079 + case RMAP_LEFT_CONTIG: 1080 + case RMAP_RIGHT_CONTIG: 1081 + /* 1082 + * These cases are all impossible. 1083 + */ 1084 + ASSERT(0); 1085 + } 1086 + 1087 + trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, 1088 + unwritten, oinfo); 1089 + done: 1090 + if (error) 1091 + trace_xfs_rmap_convert_error(cur->bc_mp, 1092 + cur->bc_private.a.agno, error, _RET_IP_); 1093 + return error; 1094 + } 1095 + 1096 + #undef NEW 1097 + #undef LEFT 1098 + #undef RIGHT 1099 + #undef PREV 1100 + 1101 + struct xfs_rmap_query_range_info { 1102 + xfs_rmap_query_range_fn fn; 1103 + void *priv; 1104 + }; 1105 + 1106 + /* Format btree record and pass to our callback. */ 1107 + STATIC int 1108 + xfs_rmap_query_range_helper( 1109 + struct xfs_btree_cur *cur, 1110 + union xfs_btree_rec *rec, 1111 + void *priv) 1112 + { 1113 + struct xfs_rmap_query_range_info *query = priv; 1114 + struct xfs_rmap_irec irec; 1115 + int error; 1116 + 1117 + error = xfs_rmap_btrec_to_irec(rec, &irec); 1118 + if (error) 1119 + return error; 1120 + return query->fn(cur, &irec, query->priv); 1121 + } 1122 + 1123 + /* Find all rmaps between two keys. */ 1124 + int 1125 + xfs_rmap_query_range( 1126 + struct xfs_btree_cur *cur, 1127 + struct xfs_rmap_irec *low_rec, 1128 + struct xfs_rmap_irec *high_rec, 1129 + xfs_rmap_query_range_fn fn, 1130 + void *priv) 1131 + { 1132 + union xfs_btree_irec low_brec; 1133 + union xfs_btree_irec high_brec; 1134 + struct xfs_rmap_query_range_info query; 1135 + 1136 + low_brec.r = *low_rec; 1137 + high_brec.r = *high_rec; 1138 + query.priv = priv; 1139 + query.fn = fn; 1140 + return xfs_btree_query_range(cur, &low_brec, &high_brec, 1141 + xfs_rmap_query_range_helper, &query); 1142 + } 1143 + 1144 + /* Clean up after calling xfs_rmap_finish_one. */ 1145 + void 1146 + xfs_rmap_finish_one_cleanup( 1147 + struct xfs_trans *tp, 1148 + struct xfs_btree_cur *rcur, 1149 + int error) 1150 + { 1151 + struct xfs_buf *agbp; 1152 + 1153 + if (rcur == NULL) 1154 + return; 1155 + agbp = rcur->bc_private.a.agbp; 1156 + xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1157 + if (error) 1158 + xfs_trans_brelse(tp, agbp); 1159 + } 1160 + 1161 + /* 1162 + * Process one of the deferred rmap operations. We pass back the 1163 + * btree cursor to maintain our lock on the rmapbt between calls. 1164 + * This saves time and eliminates a buffer deadlock between the 1165 + * superblock and the AGF because we'll always grab them in the same 1166 + * order. 1167 + */ 1168 + int 1169 + xfs_rmap_finish_one( 1170 + struct xfs_trans *tp, 1171 + enum xfs_rmap_intent_type type, 1172 + __uint64_t owner, 1173 + int whichfork, 1174 + xfs_fileoff_t startoff, 1175 + xfs_fsblock_t startblock, 1176 + xfs_filblks_t blockcount, 1177 + xfs_exntst_t state, 1178 + struct xfs_btree_cur **pcur) 1179 + { 1180 + struct xfs_mount *mp = tp->t_mountp; 1181 + struct xfs_btree_cur *rcur; 1182 + struct xfs_buf *agbp = NULL; 1183 + int error = 0; 1184 + xfs_agnumber_t agno; 1185 + struct xfs_owner_info oinfo; 1186 + xfs_agblock_t bno; 1187 + bool unwritten; 1188 + 1189 + agno = XFS_FSB_TO_AGNO(mp, startblock); 1190 + ASSERT(agno != NULLAGNUMBER); 1191 + bno = XFS_FSB_TO_AGBNO(mp, startblock); 1192 + 1193 + trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork, 1194 + startoff, blockcount, state); 1195 + 1196 + if (XFS_TEST_ERROR(false, mp, 1197 + XFS_ERRTAG_RMAP_FINISH_ONE, 1198 + XFS_RANDOM_RMAP_FINISH_ONE)) 1199 + return -EIO; 1200 + 1201 + /* 1202 + * If we haven't gotten a cursor or the cursor AG doesn't match 1203 + * the startblock, get one now. 1204 + */ 1205 + rcur = *pcur; 1206 + if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1207 + xfs_rmap_finish_one_cleanup(tp, rcur, 0); 1208 + rcur = NULL; 1209 + *pcur = NULL; 1210 + } 1211 + if (rcur == NULL) { 1212 + /* 1213 + * Refresh the freelist before we start changing the 1214 + * rmapbt, because a shape change could cause us to 1215 + * allocate blocks. 1216 + */ 1217 + error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 1218 + if (error) 1219 + return error; 1220 + if (!agbp) 1221 + return -EFSCORRUPTED; 1222 + 1223 + rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); 1224 + if (!rcur) { 1225 + error = -ENOMEM; 1226 + goto out_cur; 1227 + } 1228 + } 1229 + *pcur = rcur; 1230 + 1231 + xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff); 1232 + unwritten = state == XFS_EXT_UNWRITTEN; 1233 + bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock); 1234 + 1235 + switch (type) { 1236 + case XFS_RMAP_ALLOC: 1237 + case XFS_RMAP_MAP: 1238 + error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo); 1239 + break; 1240 + case XFS_RMAP_FREE: 1241 + case XFS_RMAP_UNMAP: 1242 + error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten, 1243 + &oinfo); 1244 + break; 1245 + case XFS_RMAP_CONVERT: 1246 + error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten, 1247 + &oinfo); 1248 + break; 1249 + default: 1250 + ASSERT(0); 1251 + error = -EFSCORRUPTED; 1252 + } 1253 + return error; 1254 + 1255 + out_cur: 1256 + xfs_trans_brelse(tp, agbp); 1257 + 1258 + return error; 1259 + } 1260 + 1261 + /* 1262 + * Don't defer an rmap if we aren't an rmap filesystem. 1263 + */ 1264 + static bool 1265 + xfs_rmap_update_is_needed( 1266 + struct xfs_mount *mp) 1267 + { 1268 + return xfs_sb_version_hasrmapbt(&mp->m_sb); 1269 + } 1270 + 1271 + /* 1272 + * Record a rmap intent; the list is kept sorted first by AG and then by 1273 + * increasing age. 1274 + */ 1275 + static int 1276 + __xfs_rmap_add( 1277 + struct xfs_mount *mp, 1278 + struct xfs_defer_ops *dfops, 1279 + enum xfs_rmap_intent_type type, 1280 + __uint64_t owner, 1281 + int whichfork, 1282 + struct xfs_bmbt_irec *bmap) 1283 + { 1284 + struct xfs_rmap_intent *ri; 1285 + 1286 + trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock), 1287 + type, 1288 + XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), 1289 + owner, whichfork, 1290 + bmap->br_startoff, 1291 + bmap->br_blockcount, 1292 + bmap->br_state); 1293 + 1294 + ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS); 1295 + INIT_LIST_HEAD(&ri->ri_list); 1296 + ri->ri_type = type; 1297 + ri->ri_owner = owner; 1298 + ri->ri_whichfork = whichfork; 1299 + ri->ri_bmap = *bmap; 1300 + 1301 + xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); 1302 + return 0; 1303 + } 1304 + 1305 + /* Map an extent into a file. */ 1306 + int 1307 + xfs_rmap_map_extent( 1308 + struct xfs_mount *mp, 1309 + struct xfs_defer_ops *dfops, 1310 + struct xfs_inode *ip, 1311 + int whichfork, 1312 + struct xfs_bmbt_irec *PREV) 1313 + { 1314 + if (!xfs_rmap_update_is_needed(mp)) 1315 + return 0; 1316 + 1317 + return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino, 1318 + whichfork, PREV); 1319 + } 1320 + 1321 + /* Unmap an extent out of a file. */ 1322 + int 1323 + xfs_rmap_unmap_extent( 1324 + struct xfs_mount *mp, 1325 + struct xfs_defer_ops *dfops, 1326 + struct xfs_inode *ip, 1327 + int whichfork, 1328 + struct xfs_bmbt_irec *PREV) 1329 + { 1330 + if (!xfs_rmap_update_is_needed(mp)) 1331 + return 0; 1332 + 1333 + return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino, 1334 + whichfork, PREV); 1335 + } 1336 + 1337 + /* Convert a data fork extent from unwritten to real or vice versa. */ 1338 + int 1339 + xfs_rmap_convert_extent( 1340 + struct xfs_mount *mp, 1341 + struct xfs_defer_ops *dfops, 1342 + struct xfs_inode *ip, 1343 + int whichfork, 1344 + struct xfs_bmbt_irec *PREV) 1345 + { 1346 + if (!xfs_rmap_update_is_needed(mp)) 1347 + return 0; 1348 + 1349 + return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino, 1350 + whichfork, PREV); 1351 + } 1352 + 1353 + /* Schedule the creation of an rmap for non-file data. */ 1354 + int 1355 + xfs_rmap_alloc_extent( 1356 + struct xfs_mount *mp, 1357 + struct xfs_defer_ops *dfops, 1358 + xfs_agnumber_t agno, 1359 + xfs_agblock_t bno, 1360 + xfs_extlen_t len, 1361 + __uint64_t owner) 1362 + { 1363 + struct xfs_bmbt_irec bmap; 1364 + 1365 + if (!xfs_rmap_update_is_needed(mp)) 1366 + return 0; 1367 + 1368 + bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); 1369 + bmap.br_blockcount = len; 1370 + bmap.br_startoff = 0; 1371 + bmap.br_state = XFS_EXT_NORM; 1372 + 1373 + return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner, 1374 + XFS_DATA_FORK, &bmap); 1375 + } 1376 + 1377 + /* Schedule the deletion of an rmap for non-file data. */ 1378 + int 1379 + xfs_rmap_free_extent( 1380 + struct xfs_mount *mp, 1381 + struct xfs_defer_ops *dfops, 1382 + xfs_agnumber_t agno, 1383 + xfs_agblock_t bno, 1384 + xfs_extlen_t len, 1385 + __uint64_t owner) 1386 + { 1387 + struct xfs_bmbt_irec bmap; 1388 + 1389 + if (!xfs_rmap_update_is_needed(mp)) 1390 + return 0; 1391 + 1392 + bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno); 1393 + bmap.br_blockcount = len; 1394 + bmap.br_startoff = 0; 1395 + bmap.br_state = XFS_EXT_NORM; 1396 + 1397 + return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner, 1398 + XFS_DATA_FORK, &bmap); 1399 + }

+209

fs/xfs/libxfs/xfs_rmap.h

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_RMAP_H__ 21 + #define __XFS_RMAP_H__ 22 + 23 + static inline void 24 + xfs_rmap_ag_owner( 25 + struct xfs_owner_info *oi, 26 + uint64_t owner) 27 + { 28 + oi->oi_owner = owner; 29 + oi->oi_offset = 0; 30 + oi->oi_flags = 0; 31 + } 32 + 33 + static inline void 34 + xfs_rmap_ino_bmbt_owner( 35 + struct xfs_owner_info *oi, 36 + xfs_ino_t ino, 37 + int whichfork) 38 + { 39 + oi->oi_owner = ino; 40 + oi->oi_offset = 0; 41 + oi->oi_flags = XFS_OWNER_INFO_BMBT_BLOCK; 42 + if (whichfork == XFS_ATTR_FORK) 43 + oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK; 44 + } 45 + 46 + static inline void 47 + xfs_rmap_ino_owner( 48 + struct xfs_owner_info *oi, 49 + xfs_ino_t ino, 50 + int whichfork, 51 + xfs_fileoff_t offset) 52 + { 53 + oi->oi_owner = ino; 54 + oi->oi_offset = offset; 55 + oi->oi_flags = 0; 56 + if (whichfork == XFS_ATTR_FORK) 57 + oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK; 58 + } 59 + 60 + static inline void 61 + xfs_rmap_skip_owner_update( 62 + struct xfs_owner_info *oi) 63 + { 64 + oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 65 + } 66 + 67 + /* Reverse mapping functions. */ 68 + 69 + struct xfs_buf; 70 + 71 + static inline __u64 72 + xfs_rmap_irec_offset_pack( 73 + const struct xfs_rmap_irec *irec) 74 + { 75 + __u64 x; 76 + 77 + x = XFS_RMAP_OFF(irec->rm_offset); 78 + if (irec->rm_flags & XFS_RMAP_ATTR_FORK) 79 + x |= XFS_RMAP_OFF_ATTR_FORK; 80 + if (irec->rm_flags & XFS_RMAP_BMBT_BLOCK) 81 + x |= XFS_RMAP_OFF_BMBT_BLOCK; 82 + if (irec->rm_flags & XFS_RMAP_UNWRITTEN) 83 + x |= XFS_RMAP_OFF_UNWRITTEN; 84 + return x; 85 + } 86 + 87 + static inline int 88 + xfs_rmap_irec_offset_unpack( 89 + __u64 offset, 90 + struct xfs_rmap_irec *irec) 91 + { 92 + if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS)) 93 + return -EFSCORRUPTED; 94 + irec->rm_offset = XFS_RMAP_OFF(offset); 95 + if (offset & XFS_RMAP_OFF_ATTR_FORK) 96 + irec->rm_flags |= XFS_RMAP_ATTR_FORK; 97 + if (offset & XFS_RMAP_OFF_BMBT_BLOCK) 98 + irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; 99 + if (offset & XFS_RMAP_OFF_UNWRITTEN) 100 + irec->rm_flags |= XFS_RMAP_UNWRITTEN; 101 + return 0; 102 + } 103 + 104 + static inline void 105 + xfs_owner_info_unpack( 106 + struct xfs_owner_info *oinfo, 107 + uint64_t *owner, 108 + uint64_t *offset, 109 + unsigned int *flags) 110 + { 111 + unsigned int r = 0; 112 + 113 + *owner = oinfo->oi_owner; 114 + *offset = oinfo->oi_offset; 115 + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) 116 + r |= XFS_RMAP_ATTR_FORK; 117 + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) 118 + r |= XFS_RMAP_BMBT_BLOCK; 119 + *flags = r; 120 + } 121 + 122 + static inline void 123 + xfs_owner_info_pack( 124 + struct xfs_owner_info *oinfo, 125 + uint64_t owner, 126 + uint64_t offset, 127 + unsigned int flags) 128 + { 129 + oinfo->oi_owner = owner; 130 + oinfo->oi_offset = XFS_RMAP_OFF(offset); 131 + oinfo->oi_flags = 0; 132 + if (flags & XFS_RMAP_ATTR_FORK) 133 + oinfo->oi_flags |= XFS_OWNER_INFO_ATTR_FORK; 134 + if (flags & XFS_RMAP_BMBT_BLOCK) 135 + oinfo->oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; 136 + } 137 + 138 + int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp, 139 + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, 140 + struct xfs_owner_info *oinfo); 141 + int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp, 142 + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, 143 + struct xfs_owner_info *oinfo); 144 + 145 + int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, 146 + xfs_extlen_t len, uint64_t owner, uint64_t offset, 147 + unsigned int flags, int *stat); 148 + int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno, 149 + xfs_extlen_t len, uint64_t owner, uint64_t offset, 150 + unsigned int flags, int *stat); 151 + int xfs_rmap_insert(struct xfs_btree_cur *rcur, xfs_agblock_t agbno, 152 + xfs_extlen_t len, uint64_t owner, uint64_t offset, 153 + unsigned int flags); 154 + int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec, 155 + int *stat); 156 + 157 + typedef int (*xfs_rmap_query_range_fn)( 158 + struct xfs_btree_cur *cur, 159 + struct xfs_rmap_irec *rec, 160 + void *priv); 161 + 162 + int xfs_rmap_query_range(struct xfs_btree_cur *cur, 163 + struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, 164 + xfs_rmap_query_range_fn fn, void *priv); 165 + 166 + enum xfs_rmap_intent_type { 167 + XFS_RMAP_MAP, 168 + XFS_RMAP_MAP_SHARED, 169 + XFS_RMAP_UNMAP, 170 + XFS_RMAP_UNMAP_SHARED, 171 + XFS_RMAP_CONVERT, 172 + XFS_RMAP_CONVERT_SHARED, 173 + XFS_RMAP_ALLOC, 174 + XFS_RMAP_FREE, 175 + }; 176 + 177 + struct xfs_rmap_intent { 178 + struct list_head ri_list; 179 + enum xfs_rmap_intent_type ri_type; 180 + __uint64_t ri_owner; 181 + int ri_whichfork; 182 + struct xfs_bmbt_irec ri_bmap; 183 + }; 184 + 185 + /* functions for updating the rmapbt based on bmbt map/unmap operations */ 186 + int xfs_rmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 187 + struct xfs_inode *ip, int whichfork, 188 + struct xfs_bmbt_irec *imap); 189 + int xfs_rmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 190 + struct xfs_inode *ip, int whichfork, 191 + struct xfs_bmbt_irec *imap); 192 + int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 193 + struct xfs_inode *ip, int whichfork, 194 + struct xfs_bmbt_irec *imap); 195 + int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 196 + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, 197 + __uint64_t owner); 198 + int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 199 + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, 200 + __uint64_t owner); 201 + 202 + void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, 203 + struct xfs_btree_cur *rcur, int error); 204 + int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type, 205 + __uint64_t owner, int whichfork, xfs_fileoff_t startoff, 206 + xfs_fsblock_t startblock, xfs_filblks_t blockcount, 207 + xfs_exntst_t state, struct xfs_btree_cur **pcur); 208 + 209 + #endif /* __XFS_RMAP_H__ */

+511

fs/xfs/libxfs/xfs_rmap_btree.c

··· 1 + /* 2 + * Copyright (c) 2014 Red Hat, Inc. 3 + * All Rights Reserved. 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it would be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write the Free Software Foundation, 16 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 + */ 18 + #include "xfs.h" 19 + #include "xfs_fs.h" 20 + #include "xfs_shared.h" 21 + #include "xfs_format.h" 22 + #include "xfs_log_format.h" 23 + #include "xfs_trans_resv.h" 24 + #include "xfs_bit.h" 25 + #include "xfs_sb.h" 26 + #include "xfs_mount.h" 27 + #include "xfs_defer.h" 28 + #include "xfs_inode.h" 29 + #include "xfs_trans.h" 30 + #include "xfs_alloc.h" 31 + #include "xfs_btree.h" 32 + #include "xfs_rmap.h" 33 + #include "xfs_rmap_btree.h" 34 + #include "xfs_trace.h" 35 + #include "xfs_cksum.h" 36 + #include "xfs_error.h" 37 + #include "xfs_extent_busy.h" 38 + 39 + /* 40 + * Reverse map btree. 41 + * 42 + * This is a per-ag tree used to track the owner(s) of a given extent. With 43 + * reflink it is possible for there to be multiple owners, which is a departure 44 + * from classic XFS. Owner records for data extents are inserted when the 45 + * extent is mapped and removed when an extent is unmapped. Owner records for 46 + * all other block types (i.e. metadata) are inserted when an extent is 47 + * allocated and removed when an extent is freed. There can only be one owner 48 + * of a metadata extent, usually an inode or some other metadata structure like 49 + * an AG btree. 50 + * 51 + * The rmap btree is part of the free space management, so blocks for the tree 52 + * are sourced from the agfl. Hence we need transaction reservation support for 53 + * this tree so that the freelist is always large enough. This also impacts on 54 + * the minimum space we need to leave free in the AG. 55 + * 56 + * The tree is ordered by [ag block, owner, offset]. This is a large key size, 57 + * but it is the only way to enforce unique keys when a block can be owned by 58 + * multiple files at any offset. There's no need to order/search by extent 59 + * size for online updating/management of the tree. It is intended that most 60 + * reverse lookups will be to find the owner(s) of a particular block, or to 61 + * try to recover tree and file data from corrupt primary metadata. 62 + */ 63 + 64 + static struct xfs_btree_cur * 65 + xfs_rmapbt_dup_cursor( 66 + struct xfs_btree_cur *cur) 67 + { 68 + return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, 69 + cur->bc_private.a.agbp, cur->bc_private.a.agno); 70 + } 71 + 72 + STATIC void 73 + xfs_rmapbt_set_root( 74 + struct xfs_btree_cur *cur, 75 + union xfs_btree_ptr *ptr, 76 + int inc) 77 + { 78 + struct xfs_buf *agbp = cur->bc_private.a.agbp; 79 + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 80 + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); 81 + int btnum = cur->bc_btnum; 82 + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); 83 + 84 + ASSERT(ptr->s != 0); 85 + 86 + agf->agf_roots[btnum] = ptr->s; 87 + be32_add_cpu(&agf->agf_levels[btnum], inc); 88 + pag->pagf_levels[btnum] += inc; 89 + xfs_perag_put(pag); 90 + 91 + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 92 + } 93 + 94 + STATIC int 95 + xfs_rmapbt_alloc_block( 96 + struct xfs_btree_cur *cur, 97 + union xfs_btree_ptr *start, 98 + union xfs_btree_ptr *new, 99 + int *stat) 100 + { 101 + int error; 102 + xfs_agblock_t bno; 103 + 104 + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 105 + 106 + /* Allocate the new block from the freelist. If we can't, give up. */ 107 + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, 108 + &bno, 1); 109 + if (error) { 110 + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 111 + return error; 112 + } 113 + 114 + trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, 115 + bno, 1); 116 + if (bno == NULLAGBLOCK) { 117 + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 118 + *stat = 0; 119 + return 0; 120 + } 121 + 122 + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, 123 + false); 124 + 125 + xfs_trans_agbtree_delta(cur->bc_tp, 1); 126 + new->s = cpu_to_be32(bno); 127 + 128 + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 129 + *stat = 1; 130 + return 0; 131 + } 132 + 133 + STATIC int 134 + xfs_rmapbt_free_block( 135 + struct xfs_btree_cur *cur, 136 + struct xfs_buf *bp) 137 + { 138 + struct xfs_buf *agbp = cur->bc_private.a.agbp; 139 + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 140 + xfs_agblock_t bno; 141 + int error; 142 + 143 + bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); 144 + trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, 145 + bno, 1); 146 + error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); 147 + if (error) 148 + return error; 149 + 150 + xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 151 + XFS_EXTENT_BUSY_SKIP_DISCARD); 152 + xfs_trans_agbtree_delta(cur->bc_tp, -1); 153 + 154 + return 0; 155 + } 156 + 157 + STATIC int 158 + xfs_rmapbt_get_minrecs( 159 + struct xfs_btree_cur *cur, 160 + int level) 161 + { 162 + return cur->bc_mp->m_rmap_mnr[level != 0]; 163 + } 164 + 165 + STATIC int 166 + xfs_rmapbt_get_maxrecs( 167 + struct xfs_btree_cur *cur, 168 + int level) 169 + { 170 + return cur->bc_mp->m_rmap_mxr[level != 0]; 171 + } 172 + 173 + STATIC void 174 + xfs_rmapbt_init_key_from_rec( 175 + union xfs_btree_key *key, 176 + union xfs_btree_rec *rec) 177 + { 178 + key->rmap.rm_startblock = rec->rmap.rm_startblock; 179 + key->rmap.rm_owner = rec->rmap.rm_owner; 180 + key->rmap.rm_offset = rec->rmap.rm_offset; 181 + } 182 + 183 + /* 184 + * The high key for a reverse mapping record can be computed by shifting 185 + * the startblock and offset to the highest value that would still map 186 + * to that record. In practice this means that we add blockcount-1 to 187 + * the startblock for all records, and if the record is for a data/attr 188 + * fork mapping, we add blockcount-1 to the offset too. 189 + */ 190 + STATIC void 191 + xfs_rmapbt_init_high_key_from_rec( 192 + union xfs_btree_key *key, 193 + union xfs_btree_rec *rec) 194 + { 195 + __uint64_t off; 196 + int adj; 197 + 198 + adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 199 + 200 + key->rmap.rm_startblock = rec->rmap.rm_startblock; 201 + be32_add_cpu(&key->rmap.rm_startblock, adj); 202 + key->rmap.rm_owner = rec->rmap.rm_owner; 203 + key->rmap.rm_offset = rec->rmap.rm_offset; 204 + if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 205 + XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 206 + return; 207 + off = be64_to_cpu(key->rmap.rm_offset); 208 + off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 209 + key->rmap.rm_offset = cpu_to_be64(off); 210 + } 211 + 212 + STATIC void 213 + xfs_rmapbt_init_rec_from_cur( 214 + struct xfs_btree_cur *cur, 215 + union xfs_btree_rec *rec) 216 + { 217 + rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 218 + rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 219 + rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 220 + rec->rmap.rm_offset = cpu_to_be64( 221 + xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 222 + } 223 + 224 + STATIC void 225 + xfs_rmapbt_init_ptr_from_cur( 226 + struct xfs_btree_cur *cur, 227 + union xfs_btree_ptr *ptr) 228 + { 229 + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 230 + 231 + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); 232 + ASSERT(agf->agf_roots[cur->bc_btnum] != 0); 233 + 234 + ptr->s = agf->agf_roots[cur->bc_btnum]; 235 + } 236 + 237 + STATIC __int64_t 238 + xfs_rmapbt_key_diff( 239 + struct xfs_btree_cur *cur, 240 + union xfs_btree_key *key) 241 + { 242 + struct xfs_rmap_irec *rec = &cur->bc_rec.r; 243 + struct xfs_rmap_key *kp = &key->rmap; 244 + __u64 x, y; 245 + __int64_t d; 246 + 247 + d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 248 + if (d) 249 + return d; 250 + 251 + x = be64_to_cpu(kp->rm_owner); 252 + y = rec->rm_owner; 253 + if (x > y) 254 + return 1; 255 + else if (y > x) 256 + return -1; 257 + 258 + x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); 259 + y = rec->rm_offset; 260 + if (x > y) 261 + return 1; 262 + else if (y > x) 263 + return -1; 264 + return 0; 265 + } 266 + 267 + STATIC __int64_t 268 + xfs_rmapbt_diff_two_keys( 269 + struct xfs_btree_cur *cur, 270 + union xfs_btree_key *k1, 271 + union xfs_btree_key *k2) 272 + { 273 + struct xfs_rmap_key *kp1 = &k1->rmap; 274 + struct xfs_rmap_key *kp2 = &k2->rmap; 275 + __int64_t d; 276 + __u64 x, y; 277 + 278 + d = (__int64_t)be32_to_cpu(kp1->rm_startblock) - 279 + be32_to_cpu(kp2->rm_startblock); 280 + if (d) 281 + return d; 282 + 283 + x = be64_to_cpu(kp1->rm_owner); 284 + y = be64_to_cpu(kp2->rm_owner); 285 + if (x > y) 286 + return 1; 287 + else if (y > x) 288 + return -1; 289 + 290 + x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); 291 + y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); 292 + if (x > y) 293 + return 1; 294 + else if (y > x) 295 + return -1; 296 + return 0; 297 + } 298 + 299 + static bool 300 + xfs_rmapbt_verify( 301 + struct xfs_buf *bp) 302 + { 303 + struct xfs_mount *mp = bp->b_target->bt_mount; 304 + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 305 + struct xfs_perag *pag = bp->b_pag; 306 + unsigned int level; 307 + 308 + /* 309 + * magic number and level verification 310 + * 311 + * During growfs operations, we can't verify the exact level or owner as 312 + * the perag is not fully initialised and hence not attached to the 313 + * buffer. In this case, check against the maximum tree depth. 314 + * 315 + * Similarly, during log recovery we will have a perag structure 316 + * attached, but the agf information will not yet have been initialised 317 + * from the on disk AGF. Again, we can only check against maximum limits 318 + * in this case. 319 + */ 320 + if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) 321 + return false; 322 + 323 + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 324 + return false; 325 + if (!xfs_btree_sblock_v5hdr_verify(bp)) 326 + return false; 327 + 328 + level = be16_to_cpu(block->bb_level); 329 + if (pag && pag->pagf_init) { 330 + if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 331 + return false; 332 + } else if (level >= mp->m_rmap_maxlevels) 333 + return false; 334 + 335 + return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 336 + } 337 + 338 + static void 339 + xfs_rmapbt_read_verify( 340 + struct xfs_buf *bp) 341 + { 342 + if (!xfs_btree_sblock_verify_crc(bp)) 343 + xfs_buf_ioerror(bp, -EFSBADCRC); 344 + else if (!xfs_rmapbt_verify(bp)) 345 + xfs_buf_ioerror(bp, -EFSCORRUPTED); 346 + 347 + if (bp->b_error) { 348 + trace_xfs_btree_corrupt(bp, _RET_IP_); 349 + xfs_verifier_error(bp); 350 + } 351 + } 352 + 353 + static void 354 + xfs_rmapbt_write_verify( 355 + struct xfs_buf *bp) 356 + { 357 + if (!xfs_rmapbt_verify(bp)) { 358 + trace_xfs_btree_corrupt(bp, _RET_IP_); 359 + xfs_buf_ioerror(bp, -EFSCORRUPTED); 360 + xfs_verifier_error(bp); 361 + return; 362 + } 363 + xfs_btree_sblock_calc_crc(bp); 364 + 365 + } 366 + 367 + const struct xfs_buf_ops xfs_rmapbt_buf_ops = { 368 + .name = "xfs_rmapbt", 369 + .verify_read = xfs_rmapbt_read_verify, 370 + .verify_write = xfs_rmapbt_write_verify, 371 + }; 372 + 373 + #if defined(DEBUG) || defined(XFS_WARN) 374 + STATIC int 375 + xfs_rmapbt_keys_inorder( 376 + struct xfs_btree_cur *cur, 377 + union xfs_btree_key *k1, 378 + union xfs_btree_key *k2) 379 + { 380 + __uint32_t x; 381 + __uint32_t y; 382 + __uint64_t a; 383 + __uint64_t b; 384 + 385 + x = be32_to_cpu(k1->rmap.rm_startblock); 386 + y = be32_to_cpu(k2->rmap.rm_startblock); 387 + if (x < y) 388 + return 1; 389 + else if (x > y) 390 + return 0; 391 + a = be64_to_cpu(k1->rmap.rm_owner); 392 + b = be64_to_cpu(k2->rmap.rm_owner); 393 + if (a < b) 394 + return 1; 395 + else if (a > b) 396 + return 0; 397 + a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); 398 + b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); 399 + if (a <= b) 400 + return 1; 401 + return 0; 402 + } 403 + 404 + STATIC int 405 + xfs_rmapbt_recs_inorder( 406 + struct xfs_btree_cur *cur, 407 + union xfs_btree_rec *r1, 408 + union xfs_btree_rec *r2) 409 + { 410 + __uint32_t x; 411 + __uint32_t y; 412 + __uint64_t a; 413 + __uint64_t b; 414 + 415 + x = be32_to_cpu(r1->rmap.rm_startblock); 416 + y = be32_to_cpu(r2->rmap.rm_startblock); 417 + if (x < y) 418 + return 1; 419 + else if (x > y) 420 + return 0; 421 + a = be64_to_cpu(r1->rmap.rm_owner); 422 + b = be64_to_cpu(r2->rmap.rm_owner); 423 + if (a < b) 424 + return 1; 425 + else if (a > b) 426 + return 0; 427 + a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); 428 + b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); 429 + if (a <= b) 430 + return 1; 431 + return 0; 432 + } 433 + #endif /* DEBUG */ 434 + 435 + static const struct xfs_btree_ops xfs_rmapbt_ops = { 436 + .rec_len = sizeof(struct xfs_rmap_rec), 437 + .key_len = 2 * sizeof(struct xfs_rmap_key), 438 + 439 + .dup_cursor = xfs_rmapbt_dup_cursor, 440 + .set_root = xfs_rmapbt_set_root, 441 + .alloc_block = xfs_rmapbt_alloc_block, 442 + .free_block = xfs_rmapbt_free_block, 443 + .get_minrecs = xfs_rmapbt_get_minrecs, 444 + .get_maxrecs = xfs_rmapbt_get_maxrecs, 445 + .init_key_from_rec = xfs_rmapbt_init_key_from_rec, 446 + .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, 447 + .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, 448 + .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, 449 + .key_diff = xfs_rmapbt_key_diff, 450 + .buf_ops = &xfs_rmapbt_buf_ops, 451 + .diff_two_keys = xfs_rmapbt_diff_two_keys, 452 + #if defined(DEBUG) || defined(XFS_WARN) 453 + .keys_inorder = xfs_rmapbt_keys_inorder, 454 + .recs_inorder = xfs_rmapbt_recs_inorder, 455 + #endif 456 + }; 457 + 458 + /* 459 + * Allocate a new allocation btree cursor. 460 + */ 461 + struct xfs_btree_cur * 462 + xfs_rmapbt_init_cursor( 463 + struct xfs_mount *mp, 464 + struct xfs_trans *tp, 465 + struct xfs_buf *agbp, 466 + xfs_agnumber_t agno) 467 + { 468 + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 469 + struct xfs_btree_cur *cur; 470 + 471 + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 472 + cur->bc_tp = tp; 473 + cur->bc_mp = mp; 474 + /* Overlapping btree; 2 keys per pointer. */ 475 + cur->bc_btnum = XFS_BTNUM_RMAP; 476 + cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; 477 + cur->bc_blocklog = mp->m_sb.sb_blocklog; 478 + cur->bc_ops = &xfs_rmapbt_ops; 479 + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 480 + 481 + cur->bc_private.a.agbp = agbp; 482 + cur->bc_private.a.agno = agno; 483 + 484 + return cur; 485 + } 486 + 487 + /* 488 + * Calculate number of records in an rmap btree block. 489 + */ 490 + int 491 + xfs_rmapbt_maxrecs( 492 + struct xfs_mount *mp, 493 + int blocklen, 494 + int leaf) 495 + { 496 + blocklen -= XFS_RMAP_BLOCK_LEN; 497 + 498 + if (leaf) 499 + return blocklen / sizeof(struct xfs_rmap_rec); 500 + return blocklen / 501 + (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); 502 + } 503 + 504 + /* Compute the maximum height of an rmap btree. */ 505 + void 506 + xfs_rmapbt_compute_maxlevels( 507 + struct xfs_mount *mp) 508 + { 509 + mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp, 510 + mp->m_rmap_mnr, mp->m_sb.sb_agblocks); 511 + }

+61

fs/xfs/libxfs/xfs_rmap_btree.h

··· 1 + /* 2 + * Copyright (c) 2014 Red Hat, Inc. 3 + * All Rights Reserved. 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it would be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write the Free Software Foundation, 16 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 + */ 18 + #ifndef __XFS_RMAP_BTREE_H__ 19 + #define __XFS_RMAP_BTREE_H__ 20 + 21 + struct xfs_buf; 22 + struct xfs_btree_cur; 23 + struct xfs_mount; 24 + 25 + /* rmaps only exist on crc enabled filesystems */ 26 + #define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN 27 + 28 + /* 29 + * Record, key, and pointer address macros for btree blocks. 30 + * 31 + * (note that some of these may appear unused, but they are used in userspace) 32 + */ 33 + #define XFS_RMAP_REC_ADDR(block, index) \ 34 + ((struct xfs_rmap_rec *) \ 35 + ((char *)(block) + XFS_RMAP_BLOCK_LEN + \ 36 + (((index) - 1) * sizeof(struct xfs_rmap_rec)))) 37 + 38 + #define XFS_RMAP_KEY_ADDR(block, index) \ 39 + ((struct xfs_rmap_key *) \ 40 + ((char *)(block) + XFS_RMAP_BLOCK_LEN + \ 41 + ((index) - 1) * 2 * sizeof(struct xfs_rmap_key))) 42 + 43 + #define XFS_RMAP_HIGH_KEY_ADDR(block, index) \ 44 + ((struct xfs_rmap_key *) \ 45 + ((char *)(block) + XFS_RMAP_BLOCK_LEN + \ 46 + sizeof(struct xfs_rmap_key) + \ 47 + ((index) - 1) * 2 * sizeof(struct xfs_rmap_key))) 48 + 49 + #define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \ 50 + ((xfs_rmap_ptr_t *) \ 51 + ((char *)(block) + XFS_RMAP_BLOCK_LEN + \ 52 + (maxrecs) * 2 * sizeof(struct xfs_rmap_key) + \ 53 + ((index) - 1) * sizeof(xfs_rmap_ptr_t))) 54 + 55 + struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, 56 + struct xfs_trans *tp, struct xfs_buf *bp, 57 + xfs_agnumber_t agno); 58 + int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf); 59 + extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); 60 + 61 + #endif /* __XFS_RMAP_BTREE_H__ */

+9

fs/xfs/libxfs/xfs_sb.c

··· 24 24 #include "xfs_bit.h" 25 25 #include "xfs_sb.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_inode.h" 28 29 #include "xfs_ialloc.h" 29 30 #include "xfs_alloc.h" ··· 37 36 #include "xfs_alloc_btree.h" 38 37 #include "xfs_ialloc_btree.h" 39 38 #include "xfs_log.h" 39 + #include "xfs_rmap_btree.h" 40 40 41 41 /* 42 42 * Physical superblock buffer manipulations. Shared with libxfs in userspace. ··· 731 729 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; 732 730 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; 733 731 732 + mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1); 733 + mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0); 734 + mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; 735 + mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; 736 + 734 737 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 735 738 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 736 739 sbp->sb_inopblock); ··· 745 738 mp->m_ialloc_min_blks = sbp->sb_spino_align; 746 739 else 747 740 mp->m_ialloc_min_blks = mp->m_ialloc_blks; 741 + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 742 + mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); 748 743 } 749 744 750 745 /*

+2

fs/xfs/libxfs/xfs_shared.h

··· 38 38 extern const struct xfs_buf_ops xfs_agf_buf_ops; 39 39 extern const struct xfs_buf_ops xfs_agfl_buf_ops; 40 40 extern const struct xfs_buf_ops xfs_allocbt_buf_ops; 41 + extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; 41 42 extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; 42 43 extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 43 44 extern const struct xfs_buf_ops xfs_bmbt_buf_ops; ··· 117 116 #define XFS_INO_BTREE_REF 3 118 117 #define XFS_ALLOC_BTREE_REF 2 119 118 #define XFS_BMAP_BTREE_REF 2 119 + #define XFS_RMAP_BTREE_REF 2 120 120 #define XFS_DIR_BTREE_REF 2 121 121 #define XFS_INO_REF 2 122 122 #define XFS_ATTR_BTREE_REF 1

+43 -19

fs/xfs/libxfs/xfs_trans_resv.c

··· 64 64 } 65 65 66 66 /* 67 + * Per-extent log reservation for the btree changes involved in freeing or 68 + * allocating an extent. In classic XFS there were two trees that will be 69 + * modified (bnobt + cntbt). With rmap enabled, there are three trees 70 + * (rmapbt). The number of blocks reserved is based on the formula: 71 + * 72 + * num trees * ((2 blocks/level * max depth) - 1) 73 + * 74 + * Keep in mind that max depth is calculated separately for each type of tree. 75 + */ 76 + static uint 77 + xfs_allocfree_log_count( 78 + struct xfs_mount *mp, 79 + uint num_ops) 80 + { 81 + uint blocks; 82 + 83 + blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); 84 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 85 + blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); 86 + 87 + return blocks; 88 + } 89 + 90 + /* 67 91 * Logging inodes is really tricksy. They are logged in memory format, 68 92 * which means that what we write into the log doesn't directly translate into 69 93 * the amount of space they use on disk. ··· 150 126 */ 151 127 STATIC uint 152 128 xfs_calc_finobt_res( 153 - struct xfs_mount *mp, 129 + struct xfs_mount *mp, 154 130 int alloc, 155 131 int modify) 156 132 { ··· 161 137 162 138 res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); 163 139 if (alloc) 164 - res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 140 + res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 165 141 XFS_FSB_TO_B(mp, 1)); 166 142 if (modify) 167 143 res += (uint)XFS_FSB_TO_B(mp, 1); ··· 177 153 * item logged to try to account for the overhead of the transaction mechanism. 178 154 * 179 155 * Note: Most of the reservations underestimate the number of allocation 180 - * groups into which they could free extents in the xfs_bmap_finish() call. 156 + * groups into which they could free extents in the xfs_defer_finish() call. 181 157 * This is because the number in the worst case is quite high and quite 182 - * unusual. In order to fix this we need to change xfs_bmap_finish() to free 158 + * unusual. In order to fix this we need to change xfs_defer_finish() to free 183 159 * extents in only a single AG at a time. This will require changes to the 184 160 * EFI code as well, however, so that the EFI for the extents not freed is 185 161 * logged again in each transaction. See SGI PV #261917. ··· 212 188 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 213 189 XFS_FSB_TO_B(mp, 1)) + 214 190 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 215 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 191 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 216 192 XFS_FSB_TO_B(mp, 1))), 217 193 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 218 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 194 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 219 195 XFS_FSB_TO_B(mp, 1)))); 220 196 } 221 197 ··· 241 217 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, 242 218 XFS_FSB_TO_B(mp, 1))), 243 219 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 244 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 220 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 245 221 XFS_FSB_TO_B(mp, 1)) + 246 222 xfs_calc_buf_res(5, 0) + 247 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 223 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 248 224 XFS_FSB_TO_B(mp, 1)) + 249 225 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 250 226 mp->m_in_maxlevels, 0))); ··· 271 247 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), 272 248 XFS_FSB_TO_B(mp, 1))), 273 249 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + 274 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), 250 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3), 275 251 XFS_FSB_TO_B(mp, 1)))); 276 252 } 277 253 ··· 310 286 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 311 287 XFS_FSB_TO_B(mp, 1))), 312 288 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 313 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 289 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 314 290 XFS_FSB_TO_B(mp, 1)))); 315 291 } 316 292 ··· 348 324 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 349 325 XFS_FSB_TO_B(mp, 1))), 350 326 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + 351 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 327 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 352 328 XFS_FSB_TO_B(mp, 1)))); 353 329 } 354 330 ··· 395 371 mp->m_sb.sb_sectsize + 396 372 xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + 397 373 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 398 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 374 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 399 375 XFS_FSB_TO_B(mp, 1)); 400 376 } 401 377 ··· 423 399 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 424 400 mp->m_sb.sb_sectsize + 425 401 xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 426 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 402 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 427 403 XFS_FSB_TO_B(mp, 1)) + 428 404 xfs_calc_finobt_res(mp, 0, 0); 429 405 } ··· 507 483 xfs_calc_buf_res(1, 0) + 508 484 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 509 485 mp->m_in_maxlevels, 0) + 510 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 486 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 511 487 XFS_FSB_TO_B(mp, 1)) + 512 488 xfs_calc_finobt_res(mp, 0, 1); 513 489 } ··· 537 513 struct xfs_mount *mp) 538 514 { 539 515 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 540 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 516 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 541 517 XFS_FSB_TO_B(mp, 1)); 542 518 } 543 519 ··· 559 535 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 560 536 XFS_FSB_TO_B(mp, 1)) + 561 537 xfs_calc_inode_res(mp, 1) + 562 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 538 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 563 539 XFS_FSB_TO_B(mp, 1)); 564 540 } 565 541 ··· 635 611 xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + 636 612 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, 637 613 XFS_FSB_TO_B(mp, 1)) + 638 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), 614 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 639 615 XFS_FSB_TO_B(mp, 1)); 640 616 } 641 617 ··· 658 634 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), 659 635 XFS_FSB_TO_B(mp, 1))), 660 636 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 661 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), 637 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 662 638 XFS_FSB_TO_B(mp, 1)))); 663 639 } 664 640 ··· 725 701 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 726 702 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), 727 703 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 728 - xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 704 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 729 705 XFS_FSB_TO_B(mp, 1)))); 730 706 } 731 707

-10

fs/xfs/libxfs/xfs_trans_resv.h

··· 68 68 #define M_RES(mp) (&(mp)->m_resv) 69 69 70 70 /* 71 - * Per-extent log reservation for the allocation btree changes 72 - * involved in freeing or allocating an extent. 73 - * 2 trees * (2 blocks/level * max depth - 1) * block size 74 - */ 75 - #define XFS_ALLOCFREE_LOG_RES(mp,nx) \ 76 - ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1))) 77 - #define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ 78 - ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1))) 79 - 80 - /* 81 71 * Per-directory log reservation for any directory change. 82 72 * dir blocks: (1 btree block per level + data block + free block) * dblock size 83 73 * bmap btree: (levels + 2) * max depth * block size

+2 -2

fs/xfs/libxfs/xfs_types.h

··· 108 108 } xfs_lookup_t; 109 109 110 110 typedef enum { 111 - XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, 112 - XFS_BTNUM_FINOi, XFS_BTNUM_MAX 111 + XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi, 112 + XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX 113 113 } xfs_btnum_t; 114 114 115 115 struct xfs_name {

+28 -111

fs/xfs/xfs_bmap_util.c

··· 25 25 #include "xfs_bit.h" 26 26 #include "xfs_mount.h" 27 27 #include "xfs_da_format.h" 28 + #include "xfs_defer.h" 28 29 #include "xfs_inode.h" 29 30 #include "xfs_btree.h" 30 31 #include "xfs_trans.h" ··· 41 40 #include "xfs_trace.h" 42 41 #include "xfs_icache.h" 43 42 #include "xfs_log.h" 43 + #include "xfs_rmap_btree.h" 44 44 45 45 /* Kernel only BMAP related definitions and functions */ 46 46 ··· 79 77 block << (mp->m_super->s_blocksize_bits - 9), 80 78 count_fsb << (mp->m_super->s_blocksize_bits - 9), 81 79 GFP_NOFS, true); 82 - } 83 - 84 - /* Sort bmap items by AG. */ 85 - static int 86 - xfs_bmap_free_list_cmp( 87 - void *priv, 88 - struct list_head *a, 89 - struct list_head *b) 90 - { 91 - struct xfs_mount *mp = priv; 92 - struct xfs_bmap_free_item *ra; 93 - struct xfs_bmap_free_item *rb; 94 - 95 - ra = container_of(a, struct xfs_bmap_free_item, xbfi_list); 96 - rb = container_of(b, struct xfs_bmap_free_item, xbfi_list); 97 - return XFS_FSB_TO_AGNO(mp, ra->xbfi_startblock) - 98 - XFS_FSB_TO_AGNO(mp, rb->xbfi_startblock); 99 - } 100 - 101 - /* 102 - * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 103 - * caller. Frees all the extents that need freeing, which must be done 104 - * last due to locking considerations. We never free any extents in 105 - * the first transaction. 106 - * 107 - * If an inode *ip is provided, rejoin it to the transaction if 108 - * the transaction was committed. 109 - */ 110 - int /* error */ 111 - xfs_bmap_finish( 112 - struct xfs_trans **tp, /* transaction pointer addr */ 113 - struct xfs_bmap_free *flist, /* i/o: list extents to free */ 114 - struct xfs_inode *ip) 115 - { 116 - struct xfs_efd_log_item *efd; /* extent free data */ 117 - struct xfs_efi_log_item *efi; /* extent free intention */ 118 - int error; /* error return value */ 119 - int committed;/* xact committed or not */ 120 - struct xfs_bmap_free_item *free; /* free extent item */ 121 - 122 - ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 123 - if (flist->xbf_count == 0) 124 - return 0; 125 - 126 - list_sort((*tp)->t_mountp, &flist->xbf_flist, xfs_bmap_free_list_cmp); 127 - 128 - efi = xfs_trans_get_efi(*tp, flist->xbf_count); 129 - list_for_each_entry(free, &flist->xbf_flist, xbfi_list) 130 - xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, 131 - free->xbfi_blockcount); 132 - 133 - error = __xfs_trans_roll(tp, ip, &committed); 134 - if (error) { 135 - /* 136 - * If the transaction was committed, drop the EFD reference 137 - * since we're bailing out of here. The other reference is 138 - * dropped when the EFI hits the AIL. 139 - * 140 - * If the transaction was not committed, the EFI is freed by the 141 - * EFI item unlock handler on abort. Also, we have a new 142 - * transaction so we should return committed=1 even though we're 143 - * returning an error. 144 - */ 145 - if (committed) { 146 - xfs_efi_release(efi); 147 - xfs_force_shutdown((*tp)->t_mountp, 148 - SHUTDOWN_META_IO_ERROR); 149 - } 150 - return error; 151 - } 152 - 153 - /* 154 - * Get an EFD and free each extent in the list, logging to the EFD in 155 - * the process. The remaining bmap free list is cleaned up by the caller 156 - * on error. 157 - */ 158 - efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count); 159 - while (!list_empty(&flist->xbf_flist)) { 160 - free = list_first_entry(&flist->xbf_flist, 161 - struct xfs_bmap_free_item, xbfi_list); 162 - error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock, 163 - free->xbfi_blockcount); 164 - if (error) 165 - return error; 166 - 167 - xfs_bmap_del_free(flist, free); 168 - } 169 - 170 - return 0; 171 80 } 172 81 173 82 int ··· 127 214 /* 128 215 * Lock out modifications to both the RT bitmap and summary inodes 129 216 */ 130 - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 217 + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 131 218 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 132 - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); 219 + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 133 220 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 134 221 135 222 /* ··· 686 773 xfs_bmbt_irec_t imap; 687 774 int nimaps = 1; 688 775 xfs_fsblock_t firstblock; 689 - xfs_bmap_free_t flist; 776 + struct xfs_defer_ops dfops; 690 777 691 778 /* 692 779 * Map the range first and check that it is a delalloc extent ··· 717 804 WARN_ON(imap.br_blockcount == 0); 718 805 719 806 /* 720 - * Note: while we initialise the firstblock/flist pair, they 807 + * Note: while we initialise the firstblock/dfops pair, they 721 808 * should never be used because blocks should never be 722 809 * allocated or freed for a delalloc extent and hence we need 723 810 * don't cancel or finish them after the xfs_bunmapi() call. 724 811 */ 725 - xfs_bmap_init(&flist, &firstblock); 812 + xfs_defer_init(&dfops, &firstblock); 726 813 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 727 - &flist, &done); 814 + &dfops, &done); 728 815 if (error) 729 816 break; 730 817 731 - ASSERT(!flist.xbf_count && list_empty(&flist.xbf_flist)); 818 + ASSERT(!xfs_defer_has_unfinished_work(&dfops)); 732 819 next_block: 733 820 start_fsb++; 734 821 remaining--; ··· 885 972 int rt; 886 973 xfs_trans_t *tp; 887 974 xfs_bmbt_irec_t imaps[1], *imapp; 888 - xfs_bmap_free_t free_list; 975 + struct xfs_defer_ops dfops; 889 976 uint qblocks, resblks, resrtextents; 890 977 int error; 891 978 ··· 976 1063 977 1064 xfs_trans_ijoin(tp, ip, 0); 978 1065 979 - xfs_bmap_init(&free_list, &firstfsb); 1066 + xfs_defer_init(&dfops, &firstfsb); 980 1067 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 981 1068 allocatesize_fsb, alloc_type, &firstfsb, 982 - resblks, imapp, &nimaps, &free_list); 1069 + resblks, imapp, &nimaps, &dfops); 983 1070 if (error) 984 1071 goto error0; 985 1072 986 1073 /* 987 1074 * Complete the transaction 988 1075 */ 989 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1076 + error = xfs_defer_finish(&tp, &dfops, NULL); 990 1077 if (error) 991 1078 goto error0; 992 1079 ··· 1009 1096 return error; 1010 1097 1011 1098 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1012 - xfs_bmap_cancel(&free_list); 1099 + xfs_defer_cancel(&dfops); 1013 1100 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1014 1101 1015 1102 error1: /* Just cancel transaction */ ··· 1027 1114 { 1028 1115 struct xfs_mount *mp = ip->i_mount; 1029 1116 struct xfs_trans *tp; 1030 - struct xfs_bmap_free free_list; 1117 + struct xfs_defer_ops dfops; 1031 1118 xfs_fsblock_t firstfsb; 1032 1119 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1033 1120 int error; ··· 1046 1133 1047 1134 xfs_trans_ijoin(tp, ip, 0); 1048 1135 1049 - xfs_bmap_init(&free_list, &firstfsb); 1136 + xfs_defer_init(&dfops, &firstfsb); 1050 1137 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 1051 - &free_list, done); 1138 + &dfops, done); 1052 1139 if (error) 1053 1140 goto out_bmap_cancel; 1054 1141 1055 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1142 + error = xfs_defer_finish(&tp, &dfops, ip); 1056 1143 if (error) 1057 1144 goto out_bmap_cancel; 1058 1145 ··· 1062 1149 return error; 1063 1150 1064 1151 out_bmap_cancel: 1065 - xfs_bmap_cancel(&free_list); 1152 + xfs_defer_cancel(&dfops); 1066 1153 out_trans_cancel: 1067 1154 xfs_trans_cancel(tp); 1068 1155 goto out_unlock; ··· 1251 1338 struct xfs_mount *mp = ip->i_mount; 1252 1339 struct xfs_trans *tp; 1253 1340 int error; 1254 - struct xfs_bmap_free free_list; 1341 + struct xfs_defer_ops dfops; 1255 1342 xfs_fsblock_t first_block; 1256 1343 xfs_fileoff_t stop_fsb; 1257 1344 xfs_fileoff_t next_fsb; ··· 1329 1416 1330 1417 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1331 1418 1332 - xfs_bmap_init(&free_list, &first_block); 1419 + xfs_defer_init(&dfops, &first_block); 1333 1420 1334 1421 /* 1335 1422 * We are using the write transaction in which max 2 bmbt 1336 1423 * updates are allowed 1337 1424 */ 1338 1425 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1339 - &done, stop_fsb, &first_block, &free_list, 1426 + &done, stop_fsb, &first_block, &dfops, 1340 1427 direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1341 1428 if (error) 1342 1429 goto out_bmap_cancel; 1343 1430 1344 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1431 + error = xfs_defer_finish(&tp, &dfops, NULL); 1345 1432 if (error) 1346 1433 goto out_bmap_cancel; 1347 1434 ··· 1351 1438 return error; 1352 1439 1353 1440 out_bmap_cancel: 1354 - xfs_bmap_cancel(&free_list); 1441 + xfs_defer_cancel(&dfops); 1355 1442 out_trans_cancel: 1356 1443 xfs_trans_cancel(tp); 1357 1444 return error; ··· 1534 1621 int taforkblks = 0; 1535 1622 __uint64_t tmp; 1536 1623 int lock_flags; 1624 + 1625 + /* XXX: we can't do this with rmap, will fix later */ 1626 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 1627 + return -EOPNOTSUPP; 1537 1628 1538 1629 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1539 1630 if (!tempifp) {

+1 -3

fs/xfs/xfs_bmap_util.h

··· 21 21 /* Kernel only BMAP related definitions and functions */ 22 22 23 23 struct xfs_bmbt_irec; 24 - struct xfs_bmap_free_item; 24 + struct xfs_extent_free_item; 25 25 struct xfs_ifork; 26 26 struct xfs_inode; 27 27 struct xfs_mount; ··· 40 40 xfs_bmap_format_t formatter, void *arg); 41 41 42 42 /* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */ 43 - void xfs_bmap_del_free(struct xfs_bmap_free *flist, 44 - struct xfs_bmap_free_item *free); 45 43 int xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp, 46 44 struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz, 47 45 int rt, int eof, int delay, int convert,

+1 -1

fs/xfs/xfs_discard.c

··· 179 179 * matter as trimming blocks is an advisory interface. 180 180 */ 181 181 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 182 - range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || 182 + range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) || 183 183 range.len < mp->m_sb.sb_blocksize) 184 184 return -EINVAL; 185 185

+7 -6

fs/xfs/xfs_dquot.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_bit.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_inode.h" 27 28 #include "xfs_bmap.h" 28 29 #include "xfs_bmap_util.h" ··· 308 307 xfs_buf_t **O_bpp) 309 308 { 310 309 xfs_fsblock_t firstblock; 311 - xfs_bmap_free_t flist; 310 + struct xfs_defer_ops dfops; 312 311 xfs_bmbt_irec_t map; 313 312 int nmaps, error; 314 313 xfs_buf_t *bp; ··· 321 320 /* 322 321 * Initialize the bmap freelist prior to calling bmapi code. 323 322 */ 324 - xfs_bmap_init(&flist, &firstblock); 323 + xfs_defer_init(&dfops, &firstblock); 325 324 xfs_ilock(quotip, XFS_ILOCK_EXCL); 326 325 /* 327 326 * Return if this type of quotas is turned off while we didn't ··· 337 336 error = xfs_bmapi_write(tp, quotip, offset_fsb, 338 337 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 339 338 &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), 340 - &map, &nmaps, &flist); 339 + &map, &nmaps, &dfops); 341 340 if (error) 342 341 goto error0; 343 342 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ··· 369 368 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 370 369 371 370 /* 372 - * xfs_bmap_finish() may commit the current transaction and 371 + * xfs_defer_finish() may commit the current transaction and 373 372 * start a second transaction if the freelist is not empty. 374 373 * 375 374 * Since we still want to modify this buffer, we need to ··· 383 382 384 383 xfs_trans_bhold(tp, bp); 385 384 386 - error = xfs_bmap_finish(tpp, &flist, NULL); 385 + error = xfs_defer_finish(tpp, &dfops, NULL); 387 386 if (error) 388 387 goto error1; 389 388 ··· 399 398 return 0; 400 399 401 400 error1: 402 - xfs_bmap_cancel(&flist); 401 + xfs_defer_cancel(&dfops); 403 402 error0: 404 403 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 405 404

+5 -1

fs/xfs/xfs_error.h

··· 90 90 #define XFS_ERRTAG_STRATCMPL_IOERR 19 91 91 #define XFS_ERRTAG_DIOWRITE_IOERR 20 92 92 #define XFS_ERRTAG_BMAPIFORMAT 21 93 - #define XFS_ERRTAG_MAX 22 93 + #define XFS_ERRTAG_FREE_EXTENT 22 94 + #define XFS_ERRTAG_RMAP_FINISH_ONE 23 95 + #define XFS_ERRTAG_MAX 24 94 96 95 97 /* 96 98 * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. ··· 119 117 #define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) 120 118 #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) 121 119 #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 120 + #define XFS_RANDOM_FREE_EXTENT 1 121 + #define XFS_RANDOM_RMAP_FINISH_ONE 1 122 122 123 123 #ifdef DEBUG 124 124 extern int xfs_error_test_active;

+69

fs/xfs/xfs_extfree_item.c

··· 20 20 #include "xfs_format.h" 21 21 #include "xfs_log_format.h" 22 22 #include "xfs_trans_resv.h" 23 + #include "xfs_bit.h" 23 24 #include "xfs_mount.h" 24 25 #include "xfs_trans.h" 25 26 #include "xfs_trans_priv.h" 26 27 #include "xfs_buf_item.h" 27 28 #include "xfs_extfree_item.h" 28 29 #include "xfs_log.h" 30 + #include "xfs_btree.h" 31 + #include "xfs_rmap.h" 29 32 30 33 31 34 kmem_zone_t *xfs_efi_zone; ··· 488 485 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 489 486 490 487 return efdp; 488 + } 489 + 490 + /* 491 + * Process an extent free intent item that was recovered from 492 + * the log. We need to free the extents that it describes. 493 + */ 494 + int 495 + xfs_efi_recover( 496 + struct xfs_mount *mp, 497 + struct xfs_efi_log_item *efip) 498 + { 499 + struct xfs_efd_log_item *efdp; 500 + struct xfs_trans *tp; 501 + int i; 502 + int error = 0; 503 + xfs_extent_t *extp; 504 + xfs_fsblock_t startblock_fsb; 505 + struct xfs_owner_info oinfo; 506 + 507 + ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); 508 + 509 + /* 510 + * First check the validity of the extents described by the 511 + * EFI. If any are bad, then assume that all are bad and 512 + * just toss the EFI. 513 + */ 514 + for (i = 0; i < efip->efi_format.efi_nextents; i++) { 515 + extp = &efip->efi_format.efi_extents[i]; 516 + startblock_fsb = XFS_BB_TO_FSB(mp, 517 + XFS_FSB_TO_DADDR(mp, extp->ext_start)); 518 + if (startblock_fsb == 0 || 519 + extp->ext_len == 0 || 520 + startblock_fsb >= mp->m_sb.sb_dblocks || 521 + extp->ext_len >= mp->m_sb.sb_agblocks) { 522 + /* 523 + * This will pull the EFI from the AIL and 524 + * free the memory associated with it. 525 + */ 526 + set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 527 + xfs_efi_release(efip); 528 + return -EIO; 529 + } 530 + } 531 + 532 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 533 + if (error) 534 + return error; 535 + efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 536 + 537 + xfs_rmap_skip_owner_update(&oinfo); 538 + for (i = 0; i < efip->efi_format.efi_nextents; i++) { 539 + extp = &efip->efi_format.efi_extents[i]; 540 + error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 541 + extp->ext_len, &oinfo); 542 + if (error) 543 + goto abort_error; 544 + 545 + } 546 + 547 + set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 548 + error = xfs_trans_commit(tp); 549 + return error; 550 + 551 + abort_error: 552 + xfs_trans_cancel(tp); 553 + return error; 491 554 }

+3

fs/xfs/xfs_extfree_item.h

··· 98 98 void xfs_efi_item_free(xfs_efi_log_item_t *); 99 99 void xfs_efi_release(struct xfs_efi_log_item *); 100 100 101 + int xfs_efi_recover(struct xfs_mount *mp, 102 + struct xfs_efi_log_item *efip); 103 + 101 104 #endif /* __XFS_EXTFREE_ITEM_H__ */

+2 -1

fs/xfs/xfs_filestream.c

··· 22 22 #include "xfs_trans_resv.h" 23 23 #include "xfs_sb.h" 24 24 #include "xfs_mount.h" 25 + #include "xfs_defer.h" 25 26 #include "xfs_inode.h" 26 27 #include "xfs_bmap.h" 27 28 #include "xfs_bmap_util.h" ··· 386 385 } 387 386 388 387 flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | 389 - (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); 388 + (ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0); 390 389 391 390 err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); 392 391

+96 -10

fs/xfs/xfs_fsops.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_sb.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_da_format.h" 27 28 #include "xfs_da_btree.h" 28 29 #include "xfs_inode.h" ··· 33 32 #include "xfs_btree.h" 34 33 #include "xfs_alloc_btree.h" 35 34 #include "xfs_alloc.h" 35 + #include "xfs_rmap_btree.h" 36 36 #include "xfs_ialloc.h" 37 37 #include "xfs_fsops.h" 38 38 #include "xfs_itable.h" ··· 42 40 #include "xfs_trace.h" 43 41 #include "xfs_log.h" 44 42 #include "xfs_filestream.h" 43 + #include "xfs_rmap.h" 45 44 46 45 /* 47 46 * File system operations ··· 106 103 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 107 104 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) | 108 105 (xfs_sb_version_hassparseinodes(&mp->m_sb) ? 109 - XFS_FSOP_GEOM_FLAGS_SPINODES : 0); 106 + XFS_FSOP_GEOM_FLAGS_SPINODES : 0) | 107 + (xfs_sb_version_hasrmapbt(&mp->m_sb) ? 108 + XFS_FSOP_GEOM_FLAGS_RMAPBT : 0); 110 109 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 111 110 mp->m_sb.sb_logsectsize : BBSIZE; 112 111 geo->rtsectsize = mp->m_sb.sb_blocksize; ··· 244 239 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); 245 240 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); 246 241 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); 242 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 243 + agf->agf_roots[XFS_BTNUM_RMAPi] = 244 + cpu_to_be32(XFS_RMAP_BLOCK(mp)); 245 + agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); 246 + } 247 + 247 248 agf->agf_flfirst = cpu_to_be32(1); 248 249 agf->agf_fllast = 0; 249 250 agf->agf_flcount = 0; 250 - tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 251 + tmpsize = agsize - mp->m_ag_prealloc_blocks; 251 252 agf->agf_freeblks = cpu_to_be32(tmpsize); 252 253 agf->agf_longest = cpu_to_be32(tmpsize); 253 254 if (xfs_sb_version_hascrc(&mp->m_sb)) ··· 350 339 agno, 0); 351 340 352 341 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 353 - arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 342 + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 354 343 arec->ar_blockcount = cpu_to_be32( 355 344 agsize - be32_to_cpu(arec->ar_startblock)); 356 345 ··· 379 368 agno, 0); 380 369 381 370 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 382 - arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 371 + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 383 372 arec->ar_blockcount = cpu_to_be32( 384 373 agsize - be32_to_cpu(arec->ar_startblock)); 385 374 nfree += be32_to_cpu(arec->ar_blockcount); ··· 388 377 xfs_buf_relse(bp); 389 378 if (error) 390 379 goto error0; 380 + 381 + /* RMAP btree root block */ 382 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 383 + struct xfs_rmap_rec *rrec; 384 + struct xfs_btree_block *block; 385 + 386 + bp = xfs_growfs_get_hdr_buf(mp, 387 + XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)), 388 + BTOBB(mp->m_sb.sb_blocksize), 0, 389 + &xfs_rmapbt_buf_ops); 390 + if (!bp) { 391 + error = -ENOMEM; 392 + goto error0; 393 + } 394 + 395 + xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0, 396 + agno, XFS_BTREE_CRC_BLOCKS); 397 + block = XFS_BUF_TO_BLOCK(bp); 398 + 399 + 400 + /* 401 + * mark the AG header regions as static metadata The BNO 402 + * btree block is the first block after the headers, so 403 + * it's location defines the size of region the static 404 + * metadata consumes. 405 + * 406 + * Note: unlike mkfs, we never have to account for log 407 + * space when growing the data regions 408 + */ 409 + rrec = XFS_RMAP_REC_ADDR(block, 1); 410 + rrec->rm_startblock = 0; 411 + rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); 412 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); 413 + rrec->rm_offset = 0; 414 + be16_add_cpu(&block->bb_numrecs, 1); 415 + 416 + /* account freespace btree root blocks */ 417 + rrec = XFS_RMAP_REC_ADDR(block, 2); 418 + rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); 419 + rrec->rm_blockcount = cpu_to_be32(2); 420 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 421 + rrec->rm_offset = 0; 422 + be16_add_cpu(&block->bb_numrecs, 1); 423 + 424 + /* account inode btree root blocks */ 425 + rrec = XFS_RMAP_REC_ADDR(block, 3); 426 + rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); 427 + rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - 428 + XFS_IBT_BLOCK(mp)); 429 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); 430 + rrec->rm_offset = 0; 431 + be16_add_cpu(&block->bb_numrecs, 1); 432 + 433 + /* account for rmap btree root */ 434 + rrec = XFS_RMAP_REC_ADDR(block, 4); 435 + rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); 436 + rrec->rm_blockcount = cpu_to_be32(1); 437 + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 438 + rrec->rm_offset = 0; 439 + be16_add_cpu(&block->bb_numrecs, 1); 440 + 441 + error = xfs_bwrite(bp); 442 + xfs_buf_relse(bp); 443 + if (error) 444 + goto error0; 445 + } 391 446 392 447 /* 393 448 * INO btree root block ··· 512 435 * There are new blocks in the old last a.g. 513 436 */ 514 437 if (new) { 438 + struct xfs_owner_info oinfo; 439 + 515 440 /* 516 441 * Change the agi length. 517 442 */ ··· 541 462 be32_to_cpu(agi->agi_length)); 542 463 543 464 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 465 + 544 466 /* 545 467 * Free the new space. 468 + * 469 + * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that 470 + * this doesn't actually exist in the rmap btree. 546 471 */ 547 - error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, 548 - be32_to_cpu(agf->agf_length) - new), new); 549 - if (error) { 472 + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 473 + error = xfs_free_extent(tp, 474 + XFS_AGB_TO_FSB(mp, agno, 475 + be32_to_cpu(agf->agf_length) - new), 476 + new, &oinfo); 477 + if (error) 550 478 goto error0; 551 - } 552 479 } 553 480 554 481 /* ··· 586 501 } else 587 502 mp->m_maxicount = 0; 588 503 xfs_set_low_space_thresholds(mp); 504 + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 589 505 590 506 /* update secondary superblocks. */ 591 507 for (agno = 1; agno < nagcount; agno++) { ··· 724 638 cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 725 639 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 726 640 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 727 - XFS_ALLOC_SET_ASIDE(mp); 641 + mp->m_alloc_set_aside; 728 642 729 643 spin_lock(&mp->m_sb_lock); 730 644 cnt->freertx = mp->m_sb.sb_frextents; ··· 812 726 error = -ENOSPC; 813 727 do { 814 728 free = percpu_counter_sum(&mp->m_fdblocks) - 815 - XFS_ALLOC_SET_ASIDE(mp); 729 + mp->m_alloc_set_aside; 816 730 if (!free) 817 731 break; 818 732

+50 -49

fs/xfs/xfs_inode.c

··· 25 25 #include "xfs_trans_resv.h" 26 26 #include "xfs_sb.h" 27 27 #include "xfs_mount.h" 28 + #include "xfs_defer.h" 28 29 #include "xfs_inode.h" 29 30 #include "xfs_da_format.h" 30 31 #include "xfs_da_btree.h" ··· 1123 1122 struct xfs_inode *ip = NULL; 1124 1123 struct xfs_trans *tp = NULL; 1125 1124 int error; 1126 - xfs_bmap_free_t free_list; 1125 + struct xfs_defer_ops dfops; 1127 1126 xfs_fsblock_t first_block; 1128 1127 bool unlock_dp_on_error = false; 1129 1128 prid_t prid; ··· 1183 1182 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 1184 1183 unlock_dp_on_error = true; 1185 1184 1186 - xfs_bmap_init(&free_list, &first_block); 1185 + xfs_defer_init(&dfops, &first_block); 1187 1186 1188 1187 /* 1189 1188 * Reserve disk quota and the inode. ··· 1220 1219 unlock_dp_on_error = false; 1221 1220 1222 1221 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1223 - &first_block, &free_list, resblks ? 1222 + &first_block, &dfops, resblks ? 1224 1223 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1225 1224 if (error) { 1226 1225 ASSERT(error != -ENOSPC); ··· 1254 1253 */ 1255 1254 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1256 1255 1257 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1256 + error = xfs_defer_finish(&tp, &dfops, NULL); 1258 1257 if (error) 1259 1258 goto out_bmap_cancel; 1260 1259 ··· 1270 1269 return 0; 1271 1270 1272 1271 out_bmap_cancel: 1273 - xfs_bmap_cancel(&free_list); 1272 + xfs_defer_cancel(&dfops); 1274 1273 out_trans_cancel: 1275 1274 xfs_trans_cancel(tp); 1276 1275 out_release_inode: ··· 1402 1401 xfs_mount_t *mp = tdp->i_mount; 1403 1402 xfs_trans_t *tp; 1404 1403 int error; 1405 - xfs_bmap_free_t free_list; 1404 + struct xfs_defer_ops dfops; 1406 1405 xfs_fsblock_t first_block; 1407 1406 int resblks; 1408 1407 ··· 1453 1452 goto error_return; 1454 1453 } 1455 1454 1456 - xfs_bmap_init(&free_list, &first_block); 1455 + xfs_defer_init(&dfops, &first_block); 1457 1456 1458 1457 /* 1459 1458 * Handle initial link state of O_TMPFILE inode ··· 1465 1464 } 1466 1465 1467 1466 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1468 - &first_block, &free_list, resblks); 1467 + &first_block, &dfops, resblks); 1469 1468 if (error) 1470 1469 goto error_return; 1471 1470 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ··· 1483 1482 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 1484 1483 xfs_trans_set_sync(tp); 1485 1484 1486 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1485 + error = xfs_defer_finish(&tp, &dfops, NULL); 1487 1486 if (error) { 1488 - xfs_bmap_cancel(&free_list); 1487 + xfs_defer_cancel(&dfops); 1489 1488 goto error_return; 1490 1489 } 1491 1490 ··· 1527 1526 { 1528 1527 struct xfs_mount *mp = ip->i_mount; 1529 1528 struct xfs_trans *tp = *tpp; 1530 - xfs_bmap_free_t free_list; 1529 + struct xfs_defer_ops dfops; 1531 1530 xfs_fsblock_t first_block; 1532 1531 xfs_fileoff_t first_unmap_block; 1533 1532 xfs_fileoff_t last_block; ··· 1563 1562 ASSERT(first_unmap_block < last_block); 1564 1563 unmap_len = last_block - first_unmap_block + 1; 1565 1564 while (!done) { 1566 - xfs_bmap_init(&free_list, &first_block); 1565 + xfs_defer_init(&dfops, &first_block); 1567 1566 error = xfs_bunmapi(tp, ip, 1568 1567 first_unmap_block, unmap_len, 1569 1568 xfs_bmapi_aflag(whichfork), 1570 1569 XFS_ITRUNC_MAX_EXTENTS, 1571 - &first_block, &free_list, 1570 + &first_block, &dfops, 1572 1571 &done); 1573 1572 if (error) 1574 1573 goto out_bmap_cancel; ··· 1577 1576 * Duplicate the transaction that has the permanent 1578 1577 * reservation and commit the old transaction. 1579 1578 */ 1580 - error = xfs_bmap_finish(&tp, &free_list, ip); 1579 + error = xfs_defer_finish(&tp, &dfops, ip); 1581 1580 if (error) 1582 1581 goto out_bmap_cancel; 1583 1582 ··· 1603 1602 * the transaction can be properly aborted. We just need to make sure 1604 1603 * we're not holding any resources that we were not when we came in. 1605 1604 */ 1606 - xfs_bmap_cancel(&free_list); 1605 + xfs_defer_cancel(&dfops); 1607 1606 goto out; 1608 1607 } 1609 1608 ··· 1744 1743 xfs_inactive_ifree( 1745 1744 struct xfs_inode *ip) 1746 1745 { 1747 - xfs_bmap_free_t free_list; 1746 + struct xfs_defer_ops dfops; 1748 1747 xfs_fsblock_t first_block; 1749 1748 struct xfs_mount *mp = ip->i_mount; 1750 1749 struct xfs_trans *tp; ··· 1781 1780 xfs_ilock(ip, XFS_ILOCK_EXCL); 1782 1781 xfs_trans_ijoin(tp, ip, 0); 1783 1782 1784 - xfs_bmap_init(&free_list, &first_block); 1785 - error = xfs_ifree(tp, ip, &free_list); 1783 + xfs_defer_init(&dfops, &first_block); 1784 + error = xfs_ifree(tp, ip, &dfops); 1786 1785 if (error) { 1787 1786 /* 1788 1787 * If we fail to free the inode, shut down. The cancel ··· 1808 1807 * Just ignore errors at this point. There is nothing we can do except 1809 1808 * to try to keep going. Make sure it's not a silent error. 1810 1809 */ 1811 - error = xfs_bmap_finish(&tp, &free_list, NULL); 1810 + error = xfs_defer_finish(&tp, &dfops, NULL); 1812 1811 if (error) { 1813 - xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1812 + xfs_notice(mp, "%s: xfs_defer_finish returned error %d", 1814 1813 __func__, error); 1815 - xfs_bmap_cancel(&free_list); 1814 + xfs_defer_cancel(&dfops); 1816 1815 } 1817 1816 error = xfs_trans_commit(tp); 1818 1817 if (error) ··· 2368 2367 xfs_ifree( 2369 2368 xfs_trans_t *tp, 2370 2369 xfs_inode_t *ip, 2371 - xfs_bmap_free_t *flist) 2370 + struct xfs_defer_ops *dfops) 2372 2371 { 2373 2372 int error; 2374 2373 struct xfs_icluster xic = { 0 }; ··· 2387 2386 if (error) 2388 2387 return error; 2389 2388 2390 - error = xfs_difree(tp, ip->i_ino, flist, &xic); 2389 + error = xfs_difree(tp, ip->i_ino, dfops, &xic); 2391 2390 if (error) 2392 2391 return error; 2393 2392 ··· 2475 2474 * directory entry. 2476 2475 * 2477 2476 * This is still safe from a transactional point of view - it is not until we 2478 - * get to xfs_bmap_finish() that we have the possibility of multiple 2477 + * get to xfs_defer_finish() that we have the possibility of multiple 2479 2478 * transactions in this operation. Hence as long as we remove the directory 2480 2479 * entry and drop the link count in the first transaction of the remove 2481 2480 * operation, there are no transactional constraints on the ordering here. ··· 2490 2489 xfs_trans_t *tp = NULL; 2491 2490 int is_dir = S_ISDIR(VFS_I(ip)->i_mode); 2492 2491 int error = 0; 2493 - xfs_bmap_free_t free_list; 2492 + struct xfs_defer_ops dfops; 2494 2493 xfs_fsblock_t first_block; 2495 2494 uint resblks; 2496 2495 ··· 2572 2571 if (error) 2573 2572 goto out_trans_cancel; 2574 2573 2575 - xfs_bmap_init(&free_list, &first_block); 2574 + xfs_defer_init(&dfops, &first_block); 2576 2575 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2577 - &first_block, &free_list, resblks); 2576 + &first_block, &dfops, resblks); 2578 2577 if (error) { 2579 2578 ASSERT(error != -ENOENT); 2580 2579 goto out_bmap_cancel; ··· 2588 2587 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2589 2588 xfs_trans_set_sync(tp); 2590 2589 2591 - error = xfs_bmap_finish(&tp, &free_list, NULL); 2590 + error = xfs_defer_finish(&tp, &dfops, NULL); 2592 2591 if (error) 2593 2592 goto out_bmap_cancel; 2594 2593 ··· 2602 2601 return 0; 2603 2602 2604 2603 out_bmap_cancel: 2605 - xfs_bmap_cancel(&free_list); 2604 + xfs_defer_cancel(&dfops); 2606 2605 out_trans_cancel: 2607 2606 xfs_trans_cancel(tp); 2608 2607 std_return: ··· 2663 2662 static int 2664 2663 xfs_finish_rename( 2665 2664 struct xfs_trans *tp, 2666 - struct xfs_bmap_free *free_list) 2665 + struct xfs_defer_ops *dfops) 2667 2666 { 2668 2667 int error; 2669 2668 ··· 2674 2673 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2675 2674 xfs_trans_set_sync(tp); 2676 2675 2677 - error = xfs_bmap_finish(&tp, free_list, NULL); 2676 + error = xfs_defer_finish(&tp, dfops, NULL); 2678 2677 if (error) { 2679 - xfs_bmap_cancel(free_list); 2678 + xfs_defer_cancel(dfops); 2680 2679 xfs_trans_cancel(tp); 2681 2680 return error; 2682 2681 } ··· 2698 2697 struct xfs_inode *dp2, 2699 2698 struct xfs_name *name2, 2700 2699 struct xfs_inode *ip2, 2701 - struct xfs_bmap_free *free_list, 2700 + struct xfs_defer_ops *dfops, 2702 2701 xfs_fsblock_t *first_block, 2703 2702 int spaceres) 2704 2703 { ··· 2710 2709 /* Swap inode number for dirent in first parent */ 2711 2710 error = xfs_dir_replace(tp, dp1, name1, 2712 2711 ip2->i_ino, 2713 - first_block, free_list, spaceres); 2712 + first_block, dfops, spaceres); 2714 2713 if (error) 2715 2714 goto out_trans_abort; 2716 2715 2717 2716 /* Swap inode number for dirent in second parent */ 2718 2717 error = xfs_dir_replace(tp, dp2, name2, 2719 2718 ip1->i_ino, 2720 - first_block, free_list, spaceres); 2719 + first_block, dfops, spaceres); 2721 2720 if (error) 2722 2721 goto out_trans_abort; 2723 2722 ··· 2732 2731 if (S_ISDIR(VFS_I(ip2)->i_mode)) { 2733 2732 error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 2734 2733 dp1->i_ino, first_block, 2735 - free_list, spaceres); 2734 + dfops, spaceres); 2736 2735 if (error) 2737 2736 goto out_trans_abort; 2738 2737 ··· 2759 2758 if (S_ISDIR(VFS_I(ip1)->i_mode)) { 2760 2759 error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 2761 2760 dp2->i_ino, first_block, 2762 - free_list, spaceres); 2761 + dfops, spaceres); 2763 2762 if (error) 2764 2763 goto out_trans_abort; 2765 2764 ··· 2798 2797 } 2799 2798 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2800 2799 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2801 - return xfs_finish_rename(tp, free_list); 2800 + return xfs_finish_rename(tp, dfops); 2802 2801 2803 2802 out_trans_abort: 2804 - xfs_bmap_cancel(free_list); 2803 + xfs_defer_cancel(dfops); 2805 2804 xfs_trans_cancel(tp); 2806 2805 return error; 2807 2806 } ··· 2856 2855 { 2857 2856 struct xfs_mount *mp = src_dp->i_mount; 2858 2857 struct xfs_trans *tp; 2859 - struct xfs_bmap_free free_list; 2858 + struct xfs_defer_ops dfops; 2860 2859 xfs_fsblock_t first_block; 2861 2860 struct xfs_inode *wip = NULL; /* whiteout inode */ 2862 2861 struct xfs_inode *inodes[__XFS_SORT_INODES]; ··· 2945 2944 goto out_trans_cancel; 2946 2945 } 2947 2946 2948 - xfs_bmap_init(&free_list, &first_block); 2947 + xfs_defer_init(&dfops, &first_block); 2949 2948 2950 2949 /* RENAME_EXCHANGE is unique from here on. */ 2951 2950 if (flags & RENAME_EXCHANGE) 2952 2951 return xfs_cross_rename(tp, src_dp, src_name, src_ip, 2953 2952 target_dp, target_name, target_ip, 2954 - &free_list, &first_block, spaceres); 2953 + &dfops, &first_block, spaceres); 2955 2954 2956 2955 /* 2957 2956 * Set up the target. ··· 2973 2972 */ 2974 2973 error = xfs_dir_createname(tp, target_dp, target_name, 2975 2974 src_ip->i_ino, &first_block, 2976 - &free_list, spaceres); 2975 + &dfops, spaceres); 2977 2976 if (error) 2978 2977 goto out_bmap_cancel; 2979 2978 ··· 3013 3012 */ 3014 3013 error = xfs_dir_replace(tp, target_dp, target_name, 3015 3014 src_ip->i_ino, 3016 - &first_block, &free_list, spaceres); 3015 + &first_block, &dfops, spaceres); 3017 3016 if (error) 3018 3017 goto out_bmap_cancel; 3019 3018 ··· 3048 3047 */ 3049 3048 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 3050 3049 target_dp->i_ino, 3051 - &first_block, &free_list, spaceres); 3050 + &first_block, &dfops, spaceres); 3052 3051 ASSERT(error != -EEXIST); 3053 3052 if (error) 3054 3053 goto out_bmap_cancel; ··· 3087 3086 */ 3088 3087 if (wip) { 3089 3088 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, 3090 - &first_block, &free_list, spaceres); 3089 + &first_block, &dfops, spaceres); 3091 3090 } else 3092 3091 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3093 - &first_block, &free_list, spaceres); 3092 + &first_block, &dfops, spaceres); 3094 3093 if (error) 3095 3094 goto out_bmap_cancel; 3096 3095 ··· 3125 3124 if (new_parent) 3126 3125 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 3127 3126 3128 - error = xfs_finish_rename(tp, &free_list); 3127 + error = xfs_finish_rename(tp, &dfops); 3129 3128 if (wip) 3130 3129 IRELE(wip); 3131 3130 return error; 3132 3131 3133 3132 out_bmap_cancel: 3134 - xfs_bmap_cancel(&free_list); 3133 + xfs_defer_cancel(&dfops); 3135 3134 out_trans_cancel: 3136 3135 xfs_trans_cancel(tp); 3137 3136 out_release_wip:

+2 -2

fs/xfs/xfs_inode.h

··· 27 27 struct xfs_dinode; 28 28 struct xfs_inode; 29 29 struct xfs_buf; 30 - struct xfs_bmap_free; 30 + struct xfs_defer_ops; 31 31 struct xfs_bmbt_irec; 32 32 struct xfs_inode_log_item; 33 33 struct xfs_mount; ··· 398 398 399 399 uint xfs_ip2xflags(struct xfs_inode *); 400 400 int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 401 - struct xfs_bmap_free *); 401 + struct xfs_defer_ops *); 402 402 int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, 403 403 int, xfs_fsize_t); 404 404 void xfs_iext_realloc(xfs_inode_t *, int, int);

+6

fs/xfs/xfs_ioctl.c

··· 387 387 { 388 388 int error = -ENOMEM; 389 389 attrlist_cursor_kern_t *cursor; 390 + struct xfs_fsop_attrlist_handlereq __user *p = arg; 390 391 xfs_fsop_attrlist_handlereq_t al_hreq; 391 392 struct dentry *dentry; 392 393 char *kbuf; ··· 419 418 al_hreq.flags, cursor); 420 419 if (error) 421 420 goto out_kfree; 421 + 422 + if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { 423 + error = -EFAULT; 424 + goto out_kfree; 425 + } 422 426 423 427 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) 424 428 error = -EFAULT;

+16 -15

fs/xfs/xfs_iomap.c

··· 23 23 #include "xfs_log_format.h" 24 24 #include "xfs_trans_resv.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_inode.h" 27 28 #include "xfs_btree.h" 28 29 #include "xfs_bmap_btree.h" ··· 129 128 int quota_flag; 130 129 int rt; 131 130 xfs_trans_t *tp; 132 - xfs_bmap_free_t free_list; 131 + struct xfs_defer_ops dfops; 133 132 uint qblocks, resblks, resrtextents; 134 133 int error; 135 134 int lockmode; ··· 232 231 * From this point onwards we overwrite the imap pointer that the 233 232 * caller gave to us. 234 233 */ 235 - xfs_bmap_init(&free_list, &firstfsb); 234 + xfs_defer_init(&dfops, &firstfsb); 236 235 nimaps = 1; 237 236 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 238 237 bmapi_flags, &firstfsb, resblks, imap, 239 - &nimaps, &free_list); 238 + &nimaps, &dfops); 240 239 if (error) 241 240 goto out_bmap_cancel; 242 241 243 242 /* 244 243 * Complete the transaction 245 244 */ 246 - error = xfs_bmap_finish(&tp, &free_list, NULL); 245 + error = xfs_defer_finish(&tp, &dfops, NULL); 247 246 if (error) 248 247 goto out_bmap_cancel; 249 248 ··· 267 266 return error; 268 267 269 268 out_bmap_cancel: 270 - xfs_bmap_cancel(&free_list); 269 + xfs_defer_cancel(&dfops); 271 270 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 272 271 out_trans_cancel: 273 272 xfs_trans_cancel(tp); ··· 686 685 xfs_fileoff_t offset_fsb, last_block; 687 686 xfs_fileoff_t end_fsb, map_start_fsb; 688 687 xfs_fsblock_t first_block; 689 - xfs_bmap_free_t free_list; 688 + struct xfs_defer_ops dfops; 690 689 xfs_filblks_t count_fsb; 691 690 xfs_trans_t *tp; 692 691 int nimaps; ··· 728 727 xfs_ilock(ip, XFS_ILOCK_EXCL); 729 728 xfs_trans_ijoin(tp, ip, 0); 730 729 731 - xfs_bmap_init(&free_list, &first_block); 730 + xfs_defer_init(&dfops, &first_block); 732 731 733 732 /* 734 733 * it is possible that the extents have changed since ··· 784 783 error = xfs_bmapi_write(tp, ip, map_start_fsb, 785 784 count_fsb, 0, &first_block, 786 785 nres, imap, &nimaps, 787 - &free_list); 786 + &dfops); 788 787 if (error) 789 788 goto trans_cancel; 790 789 791 - error = xfs_bmap_finish(&tp, &free_list, NULL); 790 + error = xfs_defer_finish(&tp, &dfops, NULL); 792 791 if (error) 793 792 goto trans_cancel; 794 793 ··· 822 821 } 823 822 824 823 trans_cancel: 825 - xfs_bmap_cancel(&free_list); 824 + xfs_defer_cancel(&dfops); 826 825 xfs_trans_cancel(tp); 827 826 error0: 828 827 xfs_iunlock(ip, XFS_ILOCK_EXCL); ··· 843 842 int nimaps; 844 843 xfs_trans_t *tp; 845 844 xfs_bmbt_irec_t imap; 846 - xfs_bmap_free_t free_list; 845 + struct xfs_defer_ops dfops; 847 846 xfs_fsize_t i_size; 848 847 uint resblks; 849 848 int error; ··· 887 886 /* 888 887 * Modify the unwritten extent state of the buffer. 889 888 */ 890 - xfs_bmap_init(&free_list, &firstfsb); 889 + xfs_defer_init(&dfops, &firstfsb); 891 890 nimaps = 1; 892 891 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 893 892 XFS_BMAPI_CONVERT, &firstfsb, resblks, 894 - &imap, &nimaps, &free_list); 893 + &imap, &nimaps, &dfops); 895 894 if (error) 896 895 goto error_on_bmapi_transaction; 897 896 ··· 910 909 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 911 910 } 912 911 913 - error = xfs_bmap_finish(&tp, &free_list, NULL); 912 + error = xfs_defer_finish(&tp, &dfops, NULL); 914 913 if (error) 915 914 goto error_on_bmapi_transaction; 916 915 ··· 937 936 return 0; 938 937 939 938 error_on_bmapi_transaction: 940 - xfs_bmap_cancel(&free_list); 939 + xfs_defer_cancel(&dfops); 941 940 xfs_trans_cancel(tp); 942 941 xfs_iunlock(ip, XFS_ILOCK_EXCL); 943 942 return error;

+242 -100

fs/xfs/xfs_log_recover.c

··· 43 43 #include "xfs_bmap_btree.h" 44 44 #include "xfs_error.h" 45 45 #include "xfs_dir2.h" 46 + #include "xfs_rmap_item.h" 46 47 47 48 #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) 48 49 ··· 1912 1911 case XFS_LI_QUOTAOFF: 1913 1912 case XFS_LI_EFD: 1914 1913 case XFS_LI_EFI: 1914 + case XFS_LI_RUI: 1915 + case XFS_LI_RUD: 1915 1916 trace_xfs_log_recover_item_reorder_tail(log, 1916 1917 trans, item, pass); 1917 1918 list_move_tail(&item->ri_list, &inode_list); ··· 2231 2228 case XFS_ABTC_CRC_MAGIC: 2232 2229 case XFS_ABTB_MAGIC: 2233 2230 case XFS_ABTC_MAGIC: 2231 + case XFS_RMAP_CRC_MAGIC: 2234 2232 case XFS_IBT_CRC_MAGIC: 2235 2233 case XFS_IBT_MAGIC: { 2236 2234 struct xfs_btree_block *btb = blk; ··· 2399 2395 case XFS_BMAP_CRC_MAGIC: 2400 2396 case XFS_BMAP_MAGIC: 2401 2397 bp->b_ops = &xfs_bmbt_buf_ops; 2398 + break; 2399 + case XFS_RMAP_CRC_MAGIC: 2400 + bp->b_ops = &xfs_rmapbt_buf_ops; 2402 2401 break; 2403 2402 default: 2404 2403 xfs_warn(mp, "Bad btree block magic!"); ··· 3422 3415 } 3423 3416 3424 3417 /* 3418 + * This routine is called to create an in-core extent rmap update 3419 + * item from the rui format structure which was logged on disk. 3420 + * It allocates an in-core rui, copies the extents from the format 3421 + * structure into it, and adds the rui to the AIL with the given 3422 + * LSN. 3423 + */ 3424 + STATIC int 3425 + xlog_recover_rui_pass2( 3426 + struct xlog *log, 3427 + struct xlog_recover_item *item, 3428 + xfs_lsn_t lsn) 3429 + { 3430 + int error; 3431 + struct xfs_mount *mp = log->l_mp; 3432 + struct xfs_rui_log_item *ruip; 3433 + struct xfs_rui_log_format *rui_formatp; 3434 + 3435 + rui_formatp = item->ri_buf[0].i_addr; 3436 + 3437 + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); 3438 + error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); 3439 + if (error) { 3440 + xfs_rui_item_free(ruip); 3441 + return error; 3442 + } 3443 + atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); 3444 + 3445 + spin_lock(&log->l_ailp->xa_lock); 3446 + /* 3447 + * The RUI has two references. One for the RUD and one for RUI to ensure 3448 + * it makes it into the AIL. Insert the RUI into the AIL directly and 3449 + * drop the RUI reference. Note that xfs_trans_ail_update() drops the 3450 + * AIL lock. 3451 + */ 3452 + xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn); 3453 + xfs_rui_release(ruip); 3454 + return 0; 3455 + } 3456 + 3457 + 3458 + /* 3459 + * This routine is called when an RUD format structure is found in a committed 3460 + * transaction in the log. Its purpose is to cancel the corresponding RUI if it 3461 + * was still in the log. To do this it searches the AIL for the RUI with an id 3462 + * equal to that in the RUD format structure. If we find it we drop the RUD 3463 + * reference, which removes the RUI from the AIL and frees it. 3464 + */ 3465 + STATIC int 3466 + xlog_recover_rud_pass2( 3467 + struct xlog *log, 3468 + struct xlog_recover_item *item) 3469 + { 3470 + struct xfs_rud_log_format *rud_formatp; 3471 + struct xfs_rui_log_item *ruip = NULL; 3472 + struct xfs_log_item *lip; 3473 + __uint64_t rui_id; 3474 + struct xfs_ail_cursor cur; 3475 + struct xfs_ail *ailp = log->l_ailp; 3476 + 3477 + rud_formatp = item->ri_buf[0].i_addr; 3478 + ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); 3479 + rui_id = rud_formatp->rud_rui_id; 3480 + 3481 + /* 3482 + * Search for the RUI with the id in the RUD format structure in the 3483 + * AIL. 3484 + */ 3485 + spin_lock(&ailp->xa_lock); 3486 + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 3487 + while (lip != NULL) { 3488 + if (lip->li_type == XFS_LI_RUI) { 3489 + ruip = (struct xfs_rui_log_item *)lip; 3490 + if (ruip->rui_format.rui_id == rui_id) { 3491 + /* 3492 + * Drop the RUD reference to the RUI. This 3493 + * removes the RUI from the AIL and frees it. 3494 + */ 3495 + spin_unlock(&ailp->xa_lock); 3496 + xfs_rui_release(ruip); 3497 + spin_lock(&ailp->xa_lock); 3498 + break; 3499 + } 3500 + } 3501 + lip = xfs_trans_ail_cursor_next(ailp, &cur); 3502 + } 3503 + 3504 + xfs_trans_ail_cursor_done(&cur); 3505 + spin_unlock(&ailp->xa_lock); 3506 + 3507 + return 0; 3508 + } 3509 + 3510 + /* 3425 3511 * This routine is called when an inode create format structure is found in a 3426 3512 * committed transaction in the log. It's purpose is to initialise the inodes 3427 3513 * being allocated on disk. This requires us to get inode cluster buffers that ··· 3739 3639 case XFS_LI_EFI: 3740 3640 case XFS_LI_EFD: 3741 3641 case XFS_LI_QUOTAOFF: 3642 + case XFS_LI_RUI: 3643 + case XFS_LI_RUD: 3742 3644 default: 3743 3645 break; 3744 3646 } ··· 3764 3662 case XFS_LI_EFD: 3765 3663 case XFS_LI_DQUOT: 3766 3664 case XFS_LI_ICREATE: 3665 + case XFS_LI_RUI: 3666 + case XFS_LI_RUD: 3767 3667 /* nothing to do in pass 1 */ 3768 3668 return 0; 3769 3669 default: ··· 3796 3692 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 3797 3693 case XFS_LI_EFD: 3798 3694 return xlog_recover_efd_pass2(log, item); 3695 + case XFS_LI_RUI: 3696 + return xlog_recover_rui_pass2(log, item, trans->r_lsn); 3697 + case XFS_LI_RUD: 3698 + return xlog_recover_rud_pass2(log, item); 3799 3699 case XFS_LI_DQUOT: 3800 3700 return xlog_recover_dquot_pass2(log, buffer_list, item, 3801 3701 trans->r_lsn); ··· 4272 4164 return 0; 4273 4165 } 4274 4166 4275 - /* 4276 - * Process an extent free intent item that was recovered from 4277 - * the log. We need to free the extents that it describes. 4278 - */ 4167 + /* Recover the EFI if necessary. */ 4279 4168 STATIC int 4280 4169 xlog_recover_process_efi( 4281 - xfs_mount_t *mp, 4282 - xfs_efi_log_item_t *efip) 4170 + struct xfs_mount *mp, 4171 + struct xfs_ail *ailp, 4172 + struct xfs_log_item *lip) 4283 4173 { 4284 - xfs_efd_log_item_t *efdp; 4285 - xfs_trans_t *tp; 4286 - int i; 4287 - int error = 0; 4288 - xfs_extent_t *extp; 4289 - xfs_fsblock_t startblock_fsb; 4290 - 4291 - ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); 4174 + struct xfs_efi_log_item *efip; 4175 + int error; 4292 4176 4293 4177 /* 4294 - * First check the validity of the extents described by the 4295 - * EFI. If any are bad, then assume that all are bad and 4296 - * just toss the EFI. 4178 + * Skip EFIs that we've already processed. 4297 4179 */ 4298 - for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4299 - extp = &(efip->efi_format.efi_extents[i]); 4300 - startblock_fsb = XFS_BB_TO_FSB(mp, 4301 - XFS_FSB_TO_DADDR(mp, extp->ext_start)); 4302 - if ((startblock_fsb == 0) || 4303 - (extp->ext_len == 0) || 4304 - (startblock_fsb >= mp->m_sb.sb_dblocks) || 4305 - (extp->ext_len >= mp->m_sb.sb_agblocks)) { 4306 - /* 4307 - * This will pull the EFI from the AIL and 4308 - * free the memory associated with it. 4309 - */ 4310 - set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 4311 - xfs_efi_release(efip); 4312 - return -EIO; 4313 - } 4314 - } 4180 + efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4181 + if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) 4182 + return 0; 4315 4183 4316 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 4317 - if (error) 4318 - return error; 4319 - efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 4184 + spin_unlock(&ailp->xa_lock); 4185 + error = xfs_efi_recover(mp, efip); 4186 + spin_lock(&ailp->xa_lock); 4320 4187 4321 - for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4322 - extp = &(efip->efi_format.efi_extents[i]); 4323 - error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 4324 - extp->ext_len); 4325 - if (error) 4326 - goto abort_error; 4327 - 4328 - } 4329 - 4330 - set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 4331 - error = xfs_trans_commit(tp); 4332 - return error; 4333 - 4334 - abort_error: 4335 - xfs_trans_cancel(tp); 4336 4188 return error; 4337 4189 } 4338 4190 4191 + /* Release the EFI since we're cancelling everything. */ 4192 + STATIC void 4193 + xlog_recover_cancel_efi( 4194 + struct xfs_mount *mp, 4195 + struct xfs_ail *ailp, 4196 + struct xfs_log_item *lip) 4197 + { 4198 + struct xfs_efi_log_item *efip; 4199 + 4200 + efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4201 + 4202 + spin_unlock(&ailp->xa_lock); 4203 + xfs_efi_release(efip); 4204 + spin_lock(&ailp->xa_lock); 4205 + } 4206 + 4207 + /* Recover the RUI if necessary. */ 4208 + STATIC int 4209 + xlog_recover_process_rui( 4210 + struct xfs_mount *mp, 4211 + struct xfs_ail *ailp, 4212 + struct xfs_log_item *lip) 4213 + { 4214 + struct xfs_rui_log_item *ruip; 4215 + int error; 4216 + 4217 + /* 4218 + * Skip RUIs that we've already processed. 4219 + */ 4220 + ruip = container_of(lip, struct xfs_rui_log_item, rui_item); 4221 + if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)) 4222 + return 0; 4223 + 4224 + spin_unlock(&ailp->xa_lock); 4225 + error = xfs_rui_recover(mp, ruip); 4226 + spin_lock(&ailp->xa_lock); 4227 + 4228 + return error; 4229 + } 4230 + 4231 + /* Release the RUI since we're cancelling everything. */ 4232 + STATIC void 4233 + xlog_recover_cancel_rui( 4234 + struct xfs_mount *mp, 4235 + struct xfs_ail *ailp, 4236 + struct xfs_log_item *lip) 4237 + { 4238 + struct xfs_rui_log_item *ruip; 4239 + 4240 + ruip = container_of(lip, struct xfs_rui_log_item, rui_item); 4241 + 4242 + spin_unlock(&ailp->xa_lock); 4243 + xfs_rui_release(ruip); 4244 + spin_lock(&ailp->xa_lock); 4245 + } 4246 + 4247 + /* Is this log item a deferred action intent? */ 4248 + static inline bool xlog_item_is_intent(struct xfs_log_item *lip) 4249 + { 4250 + switch (lip->li_type) { 4251 + case XFS_LI_EFI: 4252 + case XFS_LI_RUI: 4253 + return true; 4254 + default: 4255 + return false; 4256 + } 4257 + } 4258 + 4339 4259 /* 4340 - * When this is called, all of the EFIs which did not have 4341 - * corresponding EFDs should be in the AIL. What we do now 4342 - * is free the extents associated with each one. 4260 + * When this is called, all of the log intent items which did not have 4261 + * corresponding log done items should be in the AIL. What we do now 4262 + * is update the data structures associated with each one. 4343 4263 * 4344 - * Since we process the EFIs in normal transactions, they 4345 - * will be removed at some point after the commit. This prevents 4346 - * us from just walking down the list processing each one. 4347 - * We'll use a flag in the EFI to skip those that we've already 4348 - * processed and use the AIL iteration mechanism's generation 4349 - * count to try to speed this up at least a bit. 4264 + * Since we process the log intent items in normal transactions, they 4265 + * will be removed at some point after the commit. This prevents us 4266 + * from just walking down the list processing each one. We'll use a 4267 + * flag in the intent item to skip those that we've already processed 4268 + * and use the AIL iteration mechanism's generation count to try to 4269 + * speed this up at least a bit. 4350 4270 * 4351 - * When we start, we know that the EFIs are the only things in 4352 - * the AIL. As we process them, however, other items are added 4353 - * to the AIL. Since everything added to the AIL must come after 4354 - * everything already in the AIL, we stop processing as soon as 4355 - * we see something other than an EFI in the AIL. 4271 + * When we start, we know that the intents are the only things in the 4272 + * AIL. As we process them, however, other items are added to the 4273 + * AIL. 4356 4274 */ 4357 4275 STATIC int 4358 - xlog_recover_process_efis( 4276 + xlog_recover_process_intents( 4359 4277 struct xlog *log) 4360 4278 { 4361 4279 struct xfs_log_item *lip; 4362 - struct xfs_efi_log_item *efip; 4363 4280 int error = 0; 4364 4281 struct xfs_ail_cursor cur; 4365 4282 struct xfs_ail *ailp; 4283 + xfs_lsn_t last_lsn; 4366 4284 4367 4285 ailp = log->l_ailp; 4368 4286 spin_lock(&ailp->xa_lock); 4369 4287 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4288 + last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); 4370 4289 while (lip != NULL) { 4371 4290 /* 4372 - * We're done when we see something other than an EFI. 4373 - * There should be no EFIs left in the AIL now. 4291 + * We're done when we see something other than an intent. 4292 + * There should be no intents left in the AIL now. 4374 4293 */ 4375 - if (lip->li_type != XFS_LI_EFI) { 4294 + if (!xlog_item_is_intent(lip)) { 4376 4295 #ifdef DEBUG 4377 4296 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4378 - ASSERT(lip->li_type != XFS_LI_EFI); 4297 + ASSERT(!xlog_item_is_intent(lip)); 4379 4298 #endif 4380 4299 break; 4381 4300 } 4382 4301 4383 4302 /* 4384 - * Skip EFIs that we've already processed. 4303 + * We should never see a redo item with a LSN higher than 4304 + * the last transaction we found in the log at the start 4305 + * of recovery. 4385 4306 */ 4386 - efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4387 - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) { 4388 - lip = xfs_trans_ail_cursor_next(ailp, &cur); 4389 - continue; 4390 - } 4307 + ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); 4391 4308 4392 - spin_unlock(&ailp->xa_lock); 4393 - error = xlog_recover_process_efi(log->l_mp, efip); 4394 - spin_lock(&ailp->xa_lock); 4309 + switch (lip->li_type) { 4310 + case XFS_LI_EFI: 4311 + error = xlog_recover_process_efi(log->l_mp, ailp, lip); 4312 + break; 4313 + case XFS_LI_RUI: 4314 + error = xlog_recover_process_rui(log->l_mp, ailp, lip); 4315 + break; 4316 + } 4395 4317 if (error) 4396 4318 goto out; 4397 4319 lip = xfs_trans_ail_cursor_next(ailp, &cur); ··· 4433 4295 } 4434 4296 4435 4297 /* 4436 - * A cancel occurs when the mount has failed and we're bailing out. Release all 4437 - * pending EFIs so they don't pin the AIL. 4298 + * A cancel occurs when the mount has failed and we're bailing out. 4299 + * Release all pending log intent items so they don't pin the AIL. 4438 4300 */ 4439 4301 STATIC int 4440 - xlog_recover_cancel_efis( 4302 + xlog_recover_cancel_intents( 4441 4303 struct xlog *log) 4442 4304 { 4443 4305 struct xfs_log_item *lip; 4444 - struct xfs_efi_log_item *efip; 4445 4306 int error = 0; 4446 4307 struct xfs_ail_cursor cur; 4447 4308 struct xfs_ail *ailp; ··· 4450 4313 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 4451 4314 while (lip != NULL) { 4452 4315 /* 4453 - * We're done when we see something other than an EFI. 4454 - * There should be no EFIs left in the AIL now. 4316 + * We're done when we see something other than an intent. 4317 + * There should be no intents left in the AIL now. 4455 4318 */ 4456 - if (lip->li_type != XFS_LI_EFI) { 4319 + if (!xlog_item_is_intent(lip)) { 4457 4320 #ifdef DEBUG 4458 4321 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) 4459 - ASSERT(lip->li_type != XFS_LI_EFI); 4322 + ASSERT(!xlog_item_is_intent(lip)); 4460 4323 #endif 4461 4324 break; 4462 4325 } 4463 4326 4464 - efip = container_of(lip, struct xfs_efi_log_item, efi_item); 4465 - 4466 - spin_unlock(&ailp->xa_lock); 4467 - xfs_efi_release(efip); 4468 - spin_lock(&ailp->xa_lock); 4327 + switch (lip->li_type) { 4328 + case XFS_LI_EFI: 4329 + xlog_recover_cancel_efi(log->l_mp, ailp, lip); 4330 + break; 4331 + case XFS_LI_RUI: 4332 + xlog_recover_cancel_rui(log->l_mp, ailp, lip); 4333 + break; 4334 + } 4469 4335 4470 4336 lip = xfs_trans_ail_cursor_next(ailp, &cur); 4471 4337 } ··· 5163 5023 xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); 5164 5024 return error; 5165 5025 } 5026 + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 5166 5027 5167 5028 xlog_recover_check_summary(log); 5168 5029 ··· 5280 5139 */ 5281 5140 if (log->l_flags & XLOG_RECOVERY_NEEDED) { 5282 5141 int error; 5283 - error = xlog_recover_process_efis(log); 5142 + error = xlog_recover_process_intents(log); 5284 5143 if (error) { 5285 - xfs_alert(log->l_mp, "Failed to recover EFIs"); 5144 + xfs_alert(log->l_mp, "Failed to recover intents"); 5286 5145 return error; 5287 5146 } 5147 + 5288 5148 /* 5289 - * Sync the log to get all the EFIs out of the AIL. 5149 + * Sync the log to get all the intents out of the AIL. 5290 5150 * This isn't absolutely necessary, but it helps in 5291 5151 * case the unlink transactions would have problems 5292 - * pushing the EFIs out of the way. 5152 + * pushing the intents out of the way. 5293 5153 */ 5294 5154 xfs_log_force(log->l_mp, XFS_LOG_SYNC); 5295 5155 ··· 5315 5173 int error = 0; 5316 5174 5317 5175 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5318 - error = xlog_recover_cancel_efis(log); 5176 + error = xlog_recover_cancel_intents(log); 5319 5177 5320 5178 return error; 5321 5179 }

+6 -1

fs/xfs/xfs_mount.c

··· 24 24 #include "xfs_bit.h" 25 25 #include "xfs_sb.h" 26 26 #include "xfs_mount.h" 27 + #include "xfs_defer.h" 27 28 #include "xfs_da_format.h" 28 29 #include "xfs_da_btree.h" 29 30 #include "xfs_inode.h" ··· 42 41 #include "xfs_trace.h" 43 42 #include "xfs_icache.h" 44 43 #include "xfs_sysfs.h" 44 + #include "xfs_rmap_btree.h" 45 45 46 46 47 47 static DEFINE_MUTEX(xfs_uuid_table_mutex); ··· 232 230 233 231 if (maxagi) 234 232 *maxagi = index; 233 + 234 + mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); 235 235 return 0; 236 236 237 237 out_unwind: ··· 683 679 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 684 680 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 685 681 xfs_ialloc_compute_maxlevels(mp); 682 + xfs_rmapbt_compute_maxlevels(mp); 686 683 687 684 xfs_set_maxicount(mp); 688 685 ··· 1221 1216 batch = XFS_FDBLOCKS_BATCH; 1222 1217 1223 1218 __percpu_counter_add(&mp->m_fdblocks, delta, batch); 1224 - if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), 1219 + if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, 1225 1220 XFS_FDBLOCKS_BATCH) >= 0) { 1226 1221 /* we had space! */ 1227 1222 return 0;

+6

fs/xfs/xfs_mount.h

··· 116 116 uint m_bmap_dmnr[2]; /* min bmap btree records */ 117 117 uint m_inobt_mxr[2]; /* max inobt btree records */ 118 118 uint m_inobt_mnr[2]; /* min inobt btree records */ 119 + uint m_rmap_mxr[2]; /* max rmap btree records */ 120 + uint m_rmap_mnr[2]; /* min rmap btree records */ 119 121 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 120 122 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 121 123 uint m_in_maxlevels; /* max inobt btree levels. */ 124 + uint m_rmap_maxlevels; /* max rmap btree levels */ 125 + xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ 126 + uint m_alloc_set_aside; /* space we can't use */ 127 + uint m_ag_max_usable; /* max space per AG */ 122 128 struct radix_tree_root m_perag_tree; /* per-ag accounting info */ 123 129 spinlock_t m_perag_lock; /* lock for m_perag_tree */ 124 130 struct mutex m_growlock; /* growfs mutex */

+3

fs/xfs/xfs_ondisk.h

··· 49 49 XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); 50 50 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); 51 51 XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); 52 + XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); 53 + XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); 52 54 XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); 53 55 XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); 54 56 XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); 55 57 XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); 56 58 XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); 59 + XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4); 57 60 58 61 /* dir/attr trees */ 59 62 XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);

+536

fs/xfs/xfs_rmap_item.c

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_format.h" 23 + #include "xfs_log_format.h" 24 + #include "xfs_trans_resv.h" 25 + #include "xfs_bit.h" 26 + #include "xfs_mount.h" 27 + #include "xfs_defer.h" 28 + #include "xfs_trans.h" 29 + #include "xfs_trans_priv.h" 30 + #include "xfs_buf_item.h" 31 + #include "xfs_rmap_item.h" 32 + #include "xfs_log.h" 33 + #include "xfs_rmap.h" 34 + 35 + 36 + kmem_zone_t *xfs_rui_zone; 37 + kmem_zone_t *xfs_rud_zone; 38 + 39 + static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip) 40 + { 41 + return container_of(lip, struct xfs_rui_log_item, rui_item); 42 + } 43 + 44 + void 45 + xfs_rui_item_free( 46 + struct xfs_rui_log_item *ruip) 47 + { 48 + if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) 49 + kmem_free(ruip); 50 + else 51 + kmem_zone_free(xfs_rui_zone, ruip); 52 + } 53 + 54 + /* 55 + * This returns the number of iovecs needed to log the given rui item. 56 + * We only need 1 iovec for an rui item. It just logs the rui_log_format 57 + * structure. 58 + */ 59 + static inline int 60 + xfs_rui_item_sizeof( 61 + struct xfs_rui_log_item *ruip) 62 + { 63 + return sizeof(struct xfs_rui_log_format) + 64 + (ruip->rui_format.rui_nextents - 1) * 65 + sizeof(struct xfs_map_extent); 66 + } 67 + 68 + STATIC void 69 + xfs_rui_item_size( 70 + struct xfs_log_item *lip, 71 + int *nvecs, 72 + int *nbytes) 73 + { 74 + *nvecs += 1; 75 + *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip)); 76 + } 77 + 78 + /* 79 + * This is called to fill in the vector of log iovecs for the 80 + * given rui log item. We use only 1 iovec, and we point that 81 + * at the rui_log_format structure embedded in the rui item. 82 + * It is at this point that we assert that all of the extent 83 + * slots in the rui item have been filled. 84 + */ 85 + STATIC void 86 + xfs_rui_item_format( 87 + struct xfs_log_item *lip, 88 + struct xfs_log_vec *lv) 89 + { 90 + struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 91 + struct xfs_log_iovec *vecp = NULL; 92 + 93 + ASSERT(atomic_read(&ruip->rui_next_extent) == 94 + ruip->rui_format.rui_nextents); 95 + 96 + ruip->rui_format.rui_type = XFS_LI_RUI; 97 + ruip->rui_format.rui_size = 1; 98 + 99 + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, 100 + xfs_rui_item_sizeof(ruip)); 101 + } 102 + 103 + /* 104 + * Pinning has no meaning for an rui item, so just return. 105 + */ 106 + STATIC void 107 + xfs_rui_item_pin( 108 + struct xfs_log_item *lip) 109 + { 110 + } 111 + 112 + /* 113 + * The unpin operation is the last place an RUI is manipulated in the log. It is 114 + * either inserted in the AIL or aborted in the event of a log I/O error. In 115 + * either case, the RUI transaction has been successfully committed to make it 116 + * this far. Therefore, we expect whoever committed the RUI to either construct 117 + * and commit the RUD or drop the RUD's reference in the event of error. Simply 118 + * drop the log's RUI reference now that the log is done with it. 119 + */ 120 + STATIC void 121 + xfs_rui_item_unpin( 122 + struct xfs_log_item *lip, 123 + int remove) 124 + { 125 + struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 126 + 127 + xfs_rui_release(ruip); 128 + } 129 + 130 + /* 131 + * RUI items have no locking or pushing. However, since RUIs are pulled from 132 + * the AIL when their corresponding RUDs are committed to disk, their situation 133 + * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller 134 + * will eventually flush the log. This should help in getting the RUI out of 135 + * the AIL. 136 + */ 137 + STATIC uint 138 + xfs_rui_item_push( 139 + struct xfs_log_item *lip, 140 + struct list_head *buffer_list) 141 + { 142 + return XFS_ITEM_PINNED; 143 + } 144 + 145 + /* 146 + * The RUI has been either committed or aborted if the transaction has been 147 + * cancelled. If the transaction was cancelled, an RUD isn't going to be 148 + * constructed and thus we free the RUI here directly. 149 + */ 150 + STATIC void 151 + xfs_rui_item_unlock( 152 + struct xfs_log_item *lip) 153 + { 154 + if (lip->li_flags & XFS_LI_ABORTED) 155 + xfs_rui_item_free(RUI_ITEM(lip)); 156 + } 157 + 158 + /* 159 + * The RUI is logged only once and cannot be moved in the log, so simply return 160 + * the lsn at which it's been logged. 161 + */ 162 + STATIC xfs_lsn_t 163 + xfs_rui_item_committed( 164 + struct xfs_log_item *lip, 165 + xfs_lsn_t lsn) 166 + { 167 + return lsn; 168 + } 169 + 170 + /* 171 + * The RUI dependency tracking op doesn't do squat. It can't because 172 + * it doesn't know where the free extent is coming from. The dependency 173 + * tracking has to be handled by the "enclosing" metadata object. For 174 + * example, for inodes, the inode is locked throughout the extent freeing 175 + * so the dependency should be recorded there. 176 + */ 177 + STATIC void 178 + xfs_rui_item_committing( 179 + struct xfs_log_item *lip, 180 + xfs_lsn_t lsn) 181 + { 182 + } 183 + 184 + /* 185 + * This is the ops vector shared by all rui log items. 186 + */ 187 + static const struct xfs_item_ops xfs_rui_item_ops = { 188 + .iop_size = xfs_rui_item_size, 189 + .iop_format = xfs_rui_item_format, 190 + .iop_pin = xfs_rui_item_pin, 191 + .iop_unpin = xfs_rui_item_unpin, 192 + .iop_unlock = xfs_rui_item_unlock, 193 + .iop_committed = xfs_rui_item_committed, 194 + .iop_push = xfs_rui_item_push, 195 + .iop_committing = xfs_rui_item_committing, 196 + }; 197 + 198 + /* 199 + * Allocate and initialize an rui item with the given number of extents. 200 + */ 201 + struct xfs_rui_log_item * 202 + xfs_rui_init( 203 + struct xfs_mount *mp, 204 + uint nextents) 205 + 206 + { 207 + struct xfs_rui_log_item *ruip; 208 + uint size; 209 + 210 + ASSERT(nextents > 0); 211 + if (nextents > XFS_RUI_MAX_FAST_EXTENTS) { 212 + size = (uint)(sizeof(struct xfs_rui_log_item) + 213 + ((nextents - 1) * sizeof(struct xfs_map_extent))); 214 + ruip = kmem_zalloc(size, KM_SLEEP); 215 + } else { 216 + ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); 217 + } 218 + 219 + xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); 220 + ruip->rui_format.rui_nextents = nextents; 221 + ruip->rui_format.rui_id = (uintptr_t)(void *)ruip; 222 + atomic_set(&ruip->rui_next_extent, 0); 223 + atomic_set(&ruip->rui_refcount, 2); 224 + 225 + return ruip; 226 + } 227 + 228 + /* 229 + * Copy an RUI format buffer from the given buf, and into the destination 230 + * RUI format structure. The RUI/RUD items were designed not to need any 231 + * special alignment handling. 232 + */ 233 + int 234 + xfs_rui_copy_format( 235 + struct xfs_log_iovec *buf, 236 + struct xfs_rui_log_format *dst_rui_fmt) 237 + { 238 + struct xfs_rui_log_format *src_rui_fmt; 239 + uint len; 240 + 241 + src_rui_fmt = buf->i_addr; 242 + len = sizeof(struct xfs_rui_log_format) + 243 + (src_rui_fmt->rui_nextents - 1) * 244 + sizeof(struct xfs_map_extent); 245 + 246 + if (buf->i_len != len) 247 + return -EFSCORRUPTED; 248 + 249 + memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len); 250 + return 0; 251 + } 252 + 253 + /* 254 + * Freeing the RUI requires that we remove it from the AIL if it has already 255 + * been placed there. However, the RUI may not yet have been placed in the AIL 256 + * when called by xfs_rui_release() from RUD processing due to the ordering of 257 + * committed vs unpin operations in bulk insert operations. Hence the reference 258 + * count to ensure only the last caller frees the RUI. 259 + */ 260 + void 261 + xfs_rui_release( 262 + struct xfs_rui_log_item *ruip) 263 + { 264 + if (atomic_dec_and_test(&ruip->rui_refcount)) { 265 + xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); 266 + xfs_rui_item_free(ruip); 267 + } 268 + } 269 + 270 + static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) 271 + { 272 + return container_of(lip, struct xfs_rud_log_item, rud_item); 273 + } 274 + 275 + STATIC void 276 + xfs_rud_item_size( 277 + struct xfs_log_item *lip, 278 + int *nvecs, 279 + int *nbytes) 280 + { 281 + *nvecs += 1; 282 + *nbytes += sizeof(struct xfs_rud_log_format); 283 + } 284 + 285 + /* 286 + * This is called to fill in the vector of log iovecs for the 287 + * given rud log item. We use only 1 iovec, and we point that 288 + * at the rud_log_format structure embedded in the rud item. 289 + * It is at this point that we assert that all of the extent 290 + * slots in the rud item have been filled. 291 + */ 292 + STATIC void 293 + xfs_rud_item_format( 294 + struct xfs_log_item *lip, 295 + struct xfs_log_vec *lv) 296 + { 297 + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 298 + struct xfs_log_iovec *vecp = NULL; 299 + 300 + rudp->rud_format.rud_type = XFS_LI_RUD; 301 + rudp->rud_format.rud_size = 1; 302 + 303 + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, 304 + sizeof(struct xfs_rud_log_format)); 305 + } 306 + 307 + /* 308 + * Pinning has no meaning for an rud item, so just return. 309 + */ 310 + STATIC void 311 + xfs_rud_item_pin( 312 + struct xfs_log_item *lip) 313 + { 314 + } 315 + 316 + /* 317 + * Since pinning has no meaning for an rud item, unpinning does 318 + * not either. 319 + */ 320 + STATIC void 321 + xfs_rud_item_unpin( 322 + struct xfs_log_item *lip, 323 + int remove) 324 + { 325 + } 326 + 327 + /* 328 + * There isn't much you can do to push on an rud item. It is simply stuck 329 + * waiting for the log to be flushed to disk. 330 + */ 331 + STATIC uint 332 + xfs_rud_item_push( 333 + struct xfs_log_item *lip, 334 + struct list_head *buffer_list) 335 + { 336 + return XFS_ITEM_PINNED; 337 + } 338 + 339 + /* 340 + * The RUD is either committed or aborted if the transaction is cancelled. If 341 + * the transaction is cancelled, drop our reference to the RUI and free the 342 + * RUD. 343 + */ 344 + STATIC void 345 + xfs_rud_item_unlock( 346 + struct xfs_log_item *lip) 347 + { 348 + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 349 + 350 + if (lip->li_flags & XFS_LI_ABORTED) { 351 + xfs_rui_release(rudp->rud_ruip); 352 + kmem_zone_free(xfs_rud_zone, rudp); 353 + } 354 + } 355 + 356 + /* 357 + * When the rud item is committed to disk, all we need to do is delete our 358 + * reference to our partner rui item and then free ourselves. Since we're 359 + * freeing ourselves we must return -1 to keep the transaction code from 360 + * further referencing this item. 361 + */ 362 + STATIC xfs_lsn_t 363 + xfs_rud_item_committed( 364 + struct xfs_log_item *lip, 365 + xfs_lsn_t lsn) 366 + { 367 + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 368 + 369 + /* 370 + * Drop the RUI reference regardless of whether the RUD has been 371 + * aborted. Once the RUD transaction is constructed, it is the sole 372 + * responsibility of the RUD to release the RUI (even if the RUI is 373 + * aborted due to log I/O error). 374 + */ 375 + xfs_rui_release(rudp->rud_ruip); 376 + kmem_zone_free(xfs_rud_zone, rudp); 377 + 378 + return (xfs_lsn_t)-1; 379 + } 380 + 381 + /* 382 + * The RUD dependency tracking op doesn't do squat. It can't because 383 + * it doesn't know where the free extent is coming from. The dependency 384 + * tracking has to be handled by the "enclosing" metadata object. For 385 + * example, for inodes, the inode is locked throughout the extent freeing 386 + * so the dependency should be recorded there. 387 + */ 388 + STATIC void 389 + xfs_rud_item_committing( 390 + struct xfs_log_item *lip, 391 + xfs_lsn_t lsn) 392 + { 393 + } 394 + 395 + /* 396 + * This is the ops vector shared by all rud log items. 397 + */ 398 + static const struct xfs_item_ops xfs_rud_item_ops = { 399 + .iop_size = xfs_rud_item_size, 400 + .iop_format = xfs_rud_item_format, 401 + .iop_pin = xfs_rud_item_pin, 402 + .iop_unpin = xfs_rud_item_unpin, 403 + .iop_unlock = xfs_rud_item_unlock, 404 + .iop_committed = xfs_rud_item_committed, 405 + .iop_push = xfs_rud_item_push, 406 + .iop_committing = xfs_rud_item_committing, 407 + }; 408 + 409 + /* 410 + * Allocate and initialize an rud item with the given number of extents. 411 + */ 412 + struct xfs_rud_log_item * 413 + xfs_rud_init( 414 + struct xfs_mount *mp, 415 + struct xfs_rui_log_item *ruip) 416 + 417 + { 418 + struct xfs_rud_log_item *rudp; 419 + 420 + rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 421 + xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); 422 + rudp->rud_ruip = ruip; 423 + rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 424 + 425 + return rudp; 426 + } 427 + 428 + /* 429 + * Process an rmap update intent item that was recovered from the log. 430 + * We need to update the rmapbt. 431 + */ 432 + int 433 + xfs_rui_recover( 434 + struct xfs_mount *mp, 435 + struct xfs_rui_log_item *ruip) 436 + { 437 + int i; 438 + int error = 0; 439 + struct xfs_map_extent *rmap; 440 + xfs_fsblock_t startblock_fsb; 441 + bool op_ok; 442 + struct xfs_rud_log_item *rudp; 443 + enum xfs_rmap_intent_type type; 444 + int whichfork; 445 + xfs_exntst_t state; 446 + struct xfs_trans *tp; 447 + struct xfs_btree_cur *rcur = NULL; 448 + 449 + ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)); 450 + 451 + /* 452 + * First check the validity of the extents described by the 453 + * RUI. If any are bad, then assume that all are bad and 454 + * just toss the RUI. 455 + */ 456 + for (i = 0; i < ruip->rui_format.rui_nextents; i++) { 457 + rmap = &ruip->rui_format.rui_extents[i]; 458 + startblock_fsb = XFS_BB_TO_FSB(mp, 459 + XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); 460 + switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { 461 + case XFS_RMAP_EXTENT_MAP: 462 + case XFS_RMAP_EXTENT_UNMAP: 463 + case XFS_RMAP_EXTENT_CONVERT: 464 + case XFS_RMAP_EXTENT_ALLOC: 465 + case XFS_RMAP_EXTENT_FREE: 466 + op_ok = true; 467 + break; 468 + default: 469 + op_ok = false; 470 + break; 471 + } 472 + if (!op_ok || startblock_fsb == 0 || 473 + rmap->me_len == 0 || 474 + startblock_fsb >= mp->m_sb.sb_dblocks || 475 + rmap->me_len >= mp->m_sb.sb_agblocks || 476 + (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) { 477 + /* 478 + * This will pull the RUI from the AIL and 479 + * free the memory associated with it. 480 + */ 481 + set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 482 + xfs_rui_release(ruip); 483 + return -EIO; 484 + } 485 + } 486 + 487 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 488 + if (error) 489 + return error; 490 + rudp = xfs_trans_get_rud(tp, ruip); 491 + 492 + for (i = 0; i < ruip->rui_format.rui_nextents; i++) { 493 + rmap = &ruip->rui_format.rui_extents[i]; 494 + state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ? 495 + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; 496 + whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ? 497 + XFS_ATTR_FORK : XFS_DATA_FORK; 498 + switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { 499 + case XFS_RMAP_EXTENT_MAP: 500 + type = XFS_RMAP_MAP; 501 + break; 502 + case XFS_RMAP_EXTENT_UNMAP: 503 + type = XFS_RMAP_UNMAP; 504 + break; 505 + case XFS_RMAP_EXTENT_CONVERT: 506 + type = XFS_RMAP_CONVERT; 507 + break; 508 + case XFS_RMAP_EXTENT_ALLOC: 509 + type = XFS_RMAP_ALLOC; 510 + break; 511 + case XFS_RMAP_EXTENT_FREE: 512 + type = XFS_RMAP_FREE; 513 + break; 514 + default: 515 + error = -EFSCORRUPTED; 516 + goto abort_error; 517 + } 518 + error = xfs_trans_log_finish_rmap_update(tp, rudp, type, 519 + rmap->me_owner, whichfork, 520 + rmap->me_startoff, rmap->me_startblock, 521 + rmap->me_len, state, &rcur); 522 + if (error) 523 + goto abort_error; 524 + 525 + } 526 + 527 + xfs_rmap_finish_one_cleanup(tp, rcur, error); 528 + set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 529 + error = xfs_trans_commit(tp); 530 + return error; 531 + 532 + abort_error: 533 + xfs_rmap_finish_one_cleanup(tp, rcur, error); 534 + xfs_trans_cancel(tp); 535 + return error; 536 + }

+95

fs/xfs/xfs_rmap_item.h

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #ifndef __XFS_RMAP_ITEM_H__ 21 + #define __XFS_RMAP_ITEM_H__ 22 + 23 + /* 24 + * There are (currently) three pairs of rmap btree redo item types: map, unmap, 25 + * and convert. The common abbreviations for these are RUI (rmap update 26 + * intent) and RUD (rmap update done). The redo item type is encoded in the 27 + * flags field of each xfs_map_extent. 28 + * 29 + * *I items should be recorded in the *first* of a series of rolled 30 + * transactions, and the *D items should be recorded in the same transaction 31 + * that records the associated rmapbt updates. Typically, the first 32 + * transaction will record a bmbt update, followed by some number of 33 + * transactions containing rmapbt updates, and finally transactions with any 34 + * bnobt/cntbt updates. 35 + * 36 + * Should the system crash after the commit of the first transaction but 37 + * before the commit of the final transaction in a series, log recovery will 38 + * use the redo information recorded by the intent items to replay the 39 + * (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction. 40 + */ 41 + 42 + /* kernel only RUI/RUD definitions */ 43 + 44 + struct xfs_mount; 45 + struct kmem_zone; 46 + 47 + /* 48 + * Max number of extents in fast allocation path. 49 + */ 50 + #define XFS_RUI_MAX_FAST_EXTENTS 16 51 + 52 + /* 53 + * Define RUI flag bits. Manipulated by set/clear/test_bit operators. 54 + */ 55 + #define XFS_RUI_RECOVERED 1 56 + 57 + /* 58 + * This is the "rmap update intent" log item. It is used to log the fact that 59 + * some reverse mappings need to change. It is used in conjunction with the 60 + * "rmap update done" log item described below. 61 + * 62 + * These log items follow the same rules as struct xfs_efi_log_item; see the 63 + * comments about that structure (in xfs_extfree_item.h) for more details. 64 + */ 65 + struct xfs_rui_log_item { 66 + struct xfs_log_item rui_item; 67 + atomic_t rui_refcount; 68 + atomic_t rui_next_extent; 69 + unsigned long rui_flags; /* misc flags */ 70 + struct xfs_rui_log_format rui_format; 71 + }; 72 + 73 + /* 74 + * This is the "rmap update done" log item. It is used to log the fact that 75 + * some rmapbt updates mentioned in an earlier rui item have been performed. 76 + */ 77 + struct xfs_rud_log_item { 78 + struct xfs_log_item rud_item; 79 + struct xfs_rui_log_item *rud_ruip; 80 + struct xfs_rud_log_format rud_format; 81 + }; 82 + 83 + extern struct kmem_zone *xfs_rui_zone; 84 + extern struct kmem_zone *xfs_rud_zone; 85 + 86 + struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); 87 + struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *, 88 + struct xfs_rui_log_item *); 89 + int xfs_rui_copy_format(struct xfs_log_iovec *buf, 90 + struct xfs_rui_log_format *dst_rui_fmt); 91 + void xfs_rui_item_free(struct xfs_rui_log_item *); 92 + void xfs_rui_release(struct xfs_rui_log_item *); 93 + int xfs_rui_recover(struct xfs_mount *mp, struct xfs_rui_log_item *ruip); 94 + 95 + #endif /* __XFS_RMAP_ITEM_H__ */

+6 -5

fs/xfs/xfs_rtalloc.c

··· 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_bit.h" 25 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 26 27 #include "xfs_inode.h" 27 28 #include "xfs_bmap.h" 28 29 #include "xfs_bmap_util.h" ··· 770 769 xfs_daddr_t d; /* disk block address */ 771 770 int error; /* error return value */ 772 771 xfs_fsblock_t firstblock;/* first block allocated in xaction */ 773 - struct xfs_bmap_free flist; /* list of freed blocks */ 772 + struct xfs_defer_ops dfops; /* list of freed blocks */ 774 773 xfs_fsblock_t fsbno; /* filesystem block for bno */ 775 774 struct xfs_bmbt_irec map; /* block map output */ 776 775 int nmap; /* number of block maps */ ··· 795 794 xfs_ilock(ip, XFS_ILOCK_EXCL); 796 795 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 797 796 798 - xfs_bmap_init(&flist, &firstblock); 797 + xfs_defer_init(&dfops, &firstblock); 799 798 /* 800 799 * Allocate blocks to the bitmap file. 801 800 */ 802 801 nmap = 1; 803 802 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, 804 803 XFS_BMAPI_METADATA, &firstblock, 805 - resblks, &map, &nmap, &flist); 804 + resblks, &map, &nmap, &dfops); 806 805 if (!error && nmap < 1) 807 806 error = -ENOSPC; 808 807 if (error) ··· 810 809 /* 811 810 * Free any blocks freed up in the transaction, then commit. 812 811 */ 813 - error = xfs_bmap_finish(&tp, &flist, NULL); 812 + error = xfs_defer_finish(&tp, &dfops, NULL); 814 813 if (error) 815 814 goto out_bmap_cancel; 816 815 error = xfs_trans_commit(tp); ··· 863 862 return 0; 864 863 865 864 out_bmap_cancel: 866 - xfs_bmap_cancel(&flist); 865 + xfs_defer_cancel(&dfops); 867 866 out_trans_cancel: 868 867 xfs_trans_cancel(tp); 869 868 return error;

+1

fs/xfs/xfs_stats.c

··· 61 61 { "bmbt2", XFSSTAT_END_BMBT_V2 }, 62 62 { "ibt2", XFSSTAT_END_IBT_V2 }, 63 63 { "fibt2", XFSSTAT_END_FIBT_V2 }, 64 + { "rmapbt", XFSSTAT_END_RMAP_V2 }, 64 65 /* we print both series of quota information together */ 65 66 { "qm", XFSSTAT_END_QM }, 66 67 };

+17 -1

fs/xfs/xfs_stats.h

··· 197 197 __uint32_t xs_fibt_2_alloc; 198 198 __uint32_t xs_fibt_2_free; 199 199 __uint32_t xs_fibt_2_moves; 200 - #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6) 200 + #define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2+15) 201 + __uint32_t xs_rmap_2_lookup; 202 + __uint32_t xs_rmap_2_compare; 203 + __uint32_t xs_rmap_2_insrec; 204 + __uint32_t xs_rmap_2_delrec; 205 + __uint32_t xs_rmap_2_newroot; 206 + __uint32_t xs_rmap_2_killroot; 207 + __uint32_t xs_rmap_2_increment; 208 + __uint32_t xs_rmap_2_decrement; 209 + __uint32_t xs_rmap_2_lshift; 210 + __uint32_t xs_rmap_2_rshift; 211 + __uint32_t xs_rmap_2_split; 212 + __uint32_t xs_rmap_2_join; 213 + __uint32_t xs_rmap_2_alloc; 214 + __uint32_t xs_rmap_2_free; 215 + __uint32_t xs_rmap_2_moves; 216 + #define XFSSTAT_END_XQMSTAT (XFSSTAT_END_RMAP_V2+6) 201 217 __uint32_t xs_qm_dqreclaims; 202 218 __uint32_t xs_qm_dqreclaim_misses; 203 219 __uint32_t xs_qm_dquot_dups;

+28 -2

fs/xfs/xfs_super.c

··· 46 46 #include "xfs_quota.h" 47 47 #include "xfs_sysfs.h" 48 48 #include "xfs_ondisk.h" 49 + #include "xfs_rmap_item.h" 49 50 50 51 #include <linux/namei.h> 51 52 #include <linux/init.h> ··· 1076 1075 statp->f_blocks = sbp->sb_dblocks - lsize; 1077 1076 spin_unlock(&mp->m_sb_lock); 1078 1077 1079 - statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1078 + statp->f_bfree = fdblocks - mp->m_alloc_set_aside; 1080 1079 statp->f_bavail = statp->f_bfree; 1081 1080 1082 1081 fakeinos = statp->f_bfree << sbp->sb_inopblog; ··· 1574 1573 } 1575 1574 } 1576 1575 1576 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 1577 + xfs_alert(mp, 1578 + "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!"); 1579 + 1577 1580 error = xfs_mountfs(mp); 1578 1581 if (error) 1579 1582 goto out_filestream_unmount; ··· 1702 1697 goto out_free_ioend_bioset; 1703 1698 1704 1699 xfs_bmap_free_item_zone = kmem_zone_init( 1705 - sizeof(struct xfs_bmap_free_item), 1700 + sizeof(struct xfs_extent_free_item), 1706 1701 "xfs_bmap_free_item"); 1707 1702 if (!xfs_bmap_free_item_zone) 1708 1703 goto out_destroy_log_ticket_zone; ··· 1770 1765 if (!xfs_icreate_zone) 1771 1766 goto out_destroy_ili_zone; 1772 1767 1768 + xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item), 1769 + "xfs_rud_item"); 1770 + if (!xfs_rud_zone) 1771 + goto out_destroy_icreate_zone; 1772 + 1773 + xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) + 1774 + ((XFS_RUI_MAX_FAST_EXTENTS - 1) * 1775 + sizeof(struct xfs_map_extent))), 1776 + "xfs_rui_item"); 1777 + if (!xfs_rui_zone) 1778 + goto out_destroy_rud_zone; 1779 + 1773 1780 return 0; 1774 1781 1782 + out_destroy_rud_zone: 1783 + kmem_zone_destroy(xfs_rud_zone); 1784 + out_destroy_icreate_zone: 1785 + kmem_zone_destroy(xfs_icreate_zone); 1775 1786 out_destroy_ili_zone: 1776 1787 kmem_zone_destroy(xfs_ili_zone); 1777 1788 out_destroy_inode_zone: ··· 1826 1805 * destroy caches. 1827 1806 */ 1828 1807 rcu_barrier(); 1808 + kmem_zone_destroy(xfs_rui_zone); 1809 + kmem_zone_destroy(xfs_rud_zone); 1829 1810 kmem_zone_destroy(xfs_icreate_zone); 1830 1811 kmem_zone_destroy(xfs_ili_zone); 1831 1812 kmem_zone_destroy(xfs_inode_zone); ··· 1876 1853 1877 1854 printk(KERN_INFO XFS_VERSION_STRING " with " 1878 1855 XFS_BUILD_OPTIONS " enabled\n"); 1856 + 1857 + xfs_extent_free_init_defer_op(); 1858 + xfs_rmap_update_init_defer_op(); 1879 1859 1880 1860 xfs_dir_startup(); 1881 1861

+13 -12

fs/xfs/xfs_symlink.c

··· 26 26 #include "xfs_mount.h" 27 27 #include "xfs_da_format.h" 28 28 #include "xfs_da_btree.h" 29 + #include "xfs_defer.h" 29 30 #include "xfs_dir2.h" 30 31 #include "xfs_inode.h" 31 32 #include "xfs_ialloc.h" ··· 173 172 struct xfs_inode *ip = NULL; 174 173 int error = 0; 175 174 int pathlen; 176 - struct xfs_bmap_free free_list; 175 + struct xfs_defer_ops dfops; 177 176 xfs_fsblock_t first_block; 178 177 bool unlock_dp_on_error = false; 179 178 xfs_fileoff_t first_fsb; ··· 270 269 * Initialize the bmap freelist prior to calling either 271 270 * bmapi or the directory create code. 272 271 */ 273 - xfs_bmap_init(&free_list, &first_block); 272 + xfs_defer_init(&dfops, &first_block); 274 273 275 274 /* 276 275 * Allocate an inode for the symlink. ··· 314 313 315 314 error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, 316 315 XFS_BMAPI_METADATA, &first_block, resblks, 317 - mval, &nmaps, &free_list); 316 + mval, &nmaps, &dfops); 318 317 if (error) 319 318 goto out_bmap_cancel; 320 319 ··· 362 361 * Create the directory entry for the symlink. 363 362 */ 364 363 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 365 - &first_block, &free_list, resblks); 364 + &first_block, &dfops, resblks); 366 365 if (error) 367 366 goto out_bmap_cancel; 368 367 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ··· 377 376 xfs_trans_set_sync(tp); 378 377 } 379 378 380 - error = xfs_bmap_finish(&tp, &free_list, NULL); 379 + error = xfs_defer_finish(&tp, &dfops, NULL); 381 380 if (error) 382 381 goto out_bmap_cancel; 383 382 ··· 393 392 return 0; 394 393 395 394 out_bmap_cancel: 396 - xfs_bmap_cancel(&free_list); 395 + xfs_defer_cancel(&dfops); 397 396 out_trans_cancel: 398 397 xfs_trans_cancel(tp); 399 398 out_release_inode: ··· 427 426 int done; 428 427 int error; 429 428 xfs_fsblock_t first_block; 430 - xfs_bmap_free_t free_list; 429 + struct xfs_defer_ops dfops; 431 430 int i; 432 431 xfs_mount_t *mp; 433 432 xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; ··· 466 465 * Find the block(s) so we can inval and unmap them. 467 466 */ 468 467 done = 0; 469 - xfs_bmap_init(&free_list, &first_block); 468 + xfs_defer_init(&dfops, &first_block); 470 469 nmaps = ARRAY_SIZE(mval); 471 470 error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), 472 471 mval, &nmaps, 0); ··· 486 485 xfs_trans_binval(tp, bp); 487 486 } 488 487 /* 489 - * Unmap the dead block(s) to the free_list. 488 + * Unmap the dead block(s) to the dfops. 490 489 */ 491 490 error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps, 492 - &first_block, &free_list, &done); 491 + &first_block, &dfops, &done); 493 492 if (error) 494 493 goto error_bmap_cancel; 495 494 ASSERT(done); 496 495 /* 497 496 * Commit the first transaction. This logs the EFI and the inode. 498 497 */ 499 - error = xfs_bmap_finish(&tp, &free_list, ip); 498 + error = xfs_defer_finish(&tp, &dfops, ip); 500 499 if (error) 501 500 goto error_bmap_cancel; 502 501 /* ··· 526 525 return 0; 527 526 528 527 error_bmap_cancel: 529 - xfs_bmap_cancel(&free_list); 528 + xfs_defer_cancel(&dfops); 530 529 error_trans_cancel: 531 530 xfs_trans_cancel(tp); 532 531 error_unlock:

+2

fs/xfs/xfs_trace.c

··· 22 22 #include "xfs_log_format.h" 23 23 #include "xfs_trans_resv.h" 24 24 #include "xfs_mount.h" 25 + #include "xfs_defer.h" 25 26 #include "xfs_da_format.h" 27 + #include "xfs_defer.h" 26 28 #include "xfs_inode.h" 27 29 #include "xfs_btree.h" 28 30 #include "xfs_da_btree.h"

+374

fs/xfs/xfs_trace.h

··· 38 38 struct xfs_buf_log_format; 39 39 struct xfs_inode_log_format; 40 40 struct xfs_bmbt_irec; 41 + struct xfs_btree_cur; 41 42 42 43 DECLARE_EVENT_CLASS(xfs_attr_list_class, 43 44 TP_PROTO(struct xfs_attr_list_context *ctx), ··· 2185 2184 DEFINE_DISCARD_EVENT(xfs_discard_toosmall); 2186 2185 DEFINE_DISCARD_EVENT(xfs_discard_exclude); 2187 2186 DEFINE_DISCARD_EVENT(xfs_discard_busy); 2187 + 2188 + /* btree cursor events */ 2189 + DECLARE_EVENT_CLASS(xfs_btree_cur_class, 2190 + TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), 2191 + TP_ARGS(cur, level, bp), 2192 + TP_STRUCT__entry( 2193 + __field(dev_t, dev) 2194 + __field(xfs_btnum_t, btnum) 2195 + __field(int, level) 2196 + __field(int, nlevels) 2197 + __field(int, ptr) 2198 + __field(xfs_daddr_t, daddr) 2199 + ), 2200 + TP_fast_assign( 2201 + __entry->dev = cur->bc_mp->m_super->s_dev; 2202 + __entry->btnum = cur->bc_btnum; 2203 + __entry->level = level; 2204 + __entry->nlevels = cur->bc_nlevels; 2205 + __entry->ptr = cur->bc_ptrs[level]; 2206 + __entry->daddr = bp ? bp->b_bn : -1; 2207 + ), 2208 + TP_printk("dev %d:%d btnum %d level %d/%d ptr %d daddr 0x%llx", 2209 + MAJOR(__entry->dev), MINOR(__entry->dev), 2210 + __entry->btnum, 2211 + __entry->level, 2212 + __entry->nlevels, 2213 + __entry->ptr, 2214 + (unsigned long long)__entry->daddr) 2215 + ) 2216 + 2217 + #define DEFINE_BTREE_CUR_EVENT(name) \ 2218 + DEFINE_EVENT(xfs_btree_cur_class, name, \ 2219 + TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), \ 2220 + TP_ARGS(cur, level, bp)) 2221 + DEFINE_BTREE_CUR_EVENT(xfs_btree_updkeys); 2222 + DEFINE_BTREE_CUR_EVENT(xfs_btree_overlapped_query_range); 2223 + 2224 + /* deferred ops */ 2225 + struct xfs_defer_pending; 2226 + struct xfs_defer_intake; 2227 + struct xfs_defer_ops; 2228 + 2229 + DECLARE_EVENT_CLASS(xfs_defer_class, 2230 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), 2231 + TP_ARGS(mp, dop), 2232 + TP_STRUCT__entry( 2233 + __field(dev_t, dev) 2234 + __field(void *, dop) 2235 + __field(bool, committed) 2236 + __field(bool, low) 2237 + ), 2238 + TP_fast_assign( 2239 + __entry->dev = mp ? mp->m_super->s_dev : 0; 2240 + __entry->dop = dop; 2241 + __entry->committed = dop->dop_committed; 2242 + __entry->low = dop->dop_low; 2243 + ), 2244 + TP_printk("dev %d:%d ops %p committed %d low %d\n", 2245 + MAJOR(__entry->dev), MINOR(__entry->dev), 2246 + __entry->dop, 2247 + __entry->committed, 2248 + __entry->low) 2249 + ) 2250 + #define DEFINE_DEFER_EVENT(name) \ 2251 + DEFINE_EVENT(xfs_defer_class, name, \ 2252 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), \ 2253 + TP_ARGS(mp, dop)) 2254 + 2255 + DECLARE_EVENT_CLASS(xfs_defer_error_class, 2256 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), 2257 + TP_ARGS(mp, dop, error), 2258 + TP_STRUCT__entry( 2259 + __field(dev_t, dev) 2260 + __field(void *, dop) 2261 + __field(bool, committed) 2262 + __field(bool, low) 2263 + __field(int, error) 2264 + ), 2265 + TP_fast_assign( 2266 + __entry->dev = mp ? mp->m_super->s_dev : 0; 2267 + __entry->dop = dop; 2268 + __entry->committed = dop->dop_committed; 2269 + __entry->low = dop->dop_low; 2270 + __entry->error = error; 2271 + ), 2272 + TP_printk("dev %d:%d ops %p committed %d low %d err %d\n", 2273 + MAJOR(__entry->dev), MINOR(__entry->dev), 2274 + __entry->dop, 2275 + __entry->committed, 2276 + __entry->low, 2277 + __entry->error) 2278 + ) 2279 + #define DEFINE_DEFER_ERROR_EVENT(name) \ 2280 + DEFINE_EVENT(xfs_defer_error_class, name, \ 2281 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error), \ 2282 + TP_ARGS(mp, dop, error)) 2283 + 2284 + DECLARE_EVENT_CLASS(xfs_defer_pending_class, 2285 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), 2286 + TP_ARGS(mp, dfp), 2287 + TP_STRUCT__entry( 2288 + __field(dev_t, dev) 2289 + __field(int, type) 2290 + __field(void *, intent) 2291 + __field(bool, committed) 2292 + __field(int, nr) 2293 + ), 2294 + TP_fast_assign( 2295 + __entry->dev = mp ? mp->m_super->s_dev : 0; 2296 + __entry->type = dfp->dfp_type->type; 2297 + __entry->intent = dfp->dfp_intent; 2298 + __entry->committed = dfp->dfp_committed; 2299 + __entry->nr = dfp->dfp_count; 2300 + ), 2301 + TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", 2302 + MAJOR(__entry->dev), MINOR(__entry->dev), 2303 + __entry->type, 2304 + __entry->intent, 2305 + __entry->committed, 2306 + __entry->nr) 2307 + ) 2308 + #define DEFINE_DEFER_PENDING_EVENT(name) \ 2309 + DEFINE_EVENT(xfs_defer_pending_class, name, \ 2310 + TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp), \ 2311 + TP_ARGS(mp, dfp)) 2312 + 2313 + DECLARE_EVENT_CLASS(xfs_phys_extent_deferred_class, 2314 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2315 + int type, xfs_agblock_t agbno, xfs_extlen_t len), 2316 + TP_ARGS(mp, agno, type, agbno, len), 2317 + TP_STRUCT__entry( 2318 + __field(dev_t, dev) 2319 + __field(xfs_agnumber_t, agno) 2320 + __field(int, type) 2321 + __field(xfs_agblock_t, agbno) 2322 + __field(xfs_extlen_t, len) 2323 + ), 2324 + TP_fast_assign( 2325 + __entry->dev = mp->m_super->s_dev; 2326 + __entry->agno = agno; 2327 + __entry->type = type; 2328 + __entry->agbno = agbno; 2329 + __entry->len = len; 2330 + ), 2331 + TP_printk("dev %d:%d op %d agno %u agbno %u len %u", 2332 + MAJOR(__entry->dev), MINOR(__entry->dev), 2333 + __entry->type, 2334 + __entry->agno, 2335 + __entry->agbno, 2336 + __entry->len) 2337 + ); 2338 + #define DEFINE_PHYS_EXTENT_DEFERRED_EVENT(name) \ 2339 + DEFINE_EVENT(xfs_phys_extent_deferred_class, name, \ 2340 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2341 + int type, \ 2342 + xfs_agblock_t bno, \ 2343 + xfs_extlen_t len), \ 2344 + TP_ARGS(mp, agno, type, bno, len)) 2345 + 2346 + DECLARE_EVENT_CLASS(xfs_map_extent_deferred_class, 2347 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2348 + int op, 2349 + xfs_agblock_t agbno, 2350 + xfs_ino_t ino, 2351 + int whichfork, 2352 + xfs_fileoff_t offset, 2353 + xfs_filblks_t len, 2354 + xfs_exntst_t state), 2355 + TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state), 2356 + TP_STRUCT__entry( 2357 + __field(dev_t, dev) 2358 + __field(xfs_agnumber_t, agno) 2359 + __field(xfs_ino_t, ino) 2360 + __field(xfs_agblock_t, agbno) 2361 + __field(int, whichfork) 2362 + __field(xfs_fileoff_t, l_loff) 2363 + __field(xfs_filblks_t, l_len) 2364 + __field(xfs_exntst_t, l_state) 2365 + __field(int, op) 2366 + ), 2367 + TP_fast_assign( 2368 + __entry->dev = mp->m_super->s_dev; 2369 + __entry->agno = agno; 2370 + __entry->ino = ino; 2371 + __entry->agbno = agbno; 2372 + __entry->whichfork = whichfork; 2373 + __entry->l_loff = offset; 2374 + __entry->l_len = len; 2375 + __entry->l_state = state; 2376 + __entry->op = op; 2377 + ), 2378 + TP_printk("dev %d:%d op %d agno %u agbno %u owner %lld %s offset %llu len %llu state %d", 2379 + MAJOR(__entry->dev), MINOR(__entry->dev), 2380 + __entry->op, 2381 + __entry->agno, 2382 + __entry->agbno, 2383 + __entry->ino, 2384 + __entry->whichfork == XFS_ATTR_FORK ? "attr" : "data", 2385 + __entry->l_loff, 2386 + __entry->l_len, 2387 + __entry->l_state) 2388 + ); 2389 + #define DEFINE_MAP_EXTENT_DEFERRED_EVENT(name) \ 2390 + DEFINE_EVENT(xfs_map_extent_deferred_class, name, \ 2391 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2392 + int op, \ 2393 + xfs_agblock_t agbno, \ 2394 + xfs_ino_t ino, \ 2395 + int whichfork, \ 2396 + xfs_fileoff_t offset, \ 2397 + xfs_filblks_t len, \ 2398 + xfs_exntst_t state), \ 2399 + TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state)) 2400 + 2401 + DEFINE_DEFER_EVENT(xfs_defer_init); 2402 + DEFINE_DEFER_EVENT(xfs_defer_cancel); 2403 + DEFINE_DEFER_EVENT(xfs_defer_trans_roll); 2404 + DEFINE_DEFER_EVENT(xfs_defer_trans_abort); 2405 + DEFINE_DEFER_EVENT(xfs_defer_finish); 2406 + DEFINE_DEFER_EVENT(xfs_defer_finish_done); 2407 + 2408 + DEFINE_DEFER_ERROR_EVENT(xfs_defer_trans_roll_error); 2409 + DEFINE_DEFER_ERROR_EVENT(xfs_defer_finish_error); 2410 + DEFINE_DEFER_ERROR_EVENT(xfs_defer_op_finish_error); 2411 + 2412 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_work); 2413 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_intake_cancel); 2414 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_commit); 2415 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel); 2416 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); 2417 + DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); 2418 + 2419 + #define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT 2420 + DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer); 2421 + DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred); 2422 + 2423 + /* rmap tracepoints */ 2424 + DECLARE_EVENT_CLASS(xfs_rmap_class, 2425 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2426 + xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, 2427 + struct xfs_owner_info *oinfo), 2428 + TP_ARGS(mp, agno, agbno, len, unwritten, oinfo), 2429 + TP_STRUCT__entry( 2430 + __field(dev_t, dev) 2431 + __field(xfs_agnumber_t, agno) 2432 + __field(xfs_agblock_t, agbno) 2433 + __field(xfs_extlen_t, len) 2434 + __field(uint64_t, owner) 2435 + __field(uint64_t, offset) 2436 + __field(unsigned long, flags) 2437 + ), 2438 + TP_fast_assign( 2439 + __entry->dev = mp->m_super->s_dev; 2440 + __entry->agno = agno; 2441 + __entry->agbno = agbno; 2442 + __entry->len = len; 2443 + __entry->owner = oinfo->oi_owner; 2444 + __entry->offset = oinfo->oi_offset; 2445 + __entry->flags = oinfo->oi_flags; 2446 + if (unwritten) 2447 + __entry->flags |= XFS_RMAP_UNWRITTEN; 2448 + ), 2449 + TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx", 2450 + MAJOR(__entry->dev), MINOR(__entry->dev), 2451 + __entry->agno, 2452 + __entry->agbno, 2453 + __entry->len, 2454 + __entry->owner, 2455 + __entry->offset, 2456 + __entry->flags) 2457 + ); 2458 + #define DEFINE_RMAP_EVENT(name) \ 2459 + DEFINE_EVENT(xfs_rmap_class, name, \ 2460 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2461 + xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \ 2462 + struct xfs_owner_info *oinfo), \ 2463 + TP_ARGS(mp, agno, agbno, len, unwritten, oinfo)) 2464 + 2465 + /* simple AG-based error/%ip tracepoint class */ 2466 + DECLARE_EVENT_CLASS(xfs_ag_error_class, 2467 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, 2468 + unsigned long caller_ip), 2469 + TP_ARGS(mp, agno, error, caller_ip), 2470 + TP_STRUCT__entry( 2471 + __field(dev_t, dev) 2472 + __field(xfs_agnumber_t, agno) 2473 + __field(int, error) 2474 + __field(unsigned long, caller_ip) 2475 + ), 2476 + TP_fast_assign( 2477 + __entry->dev = mp->m_super->s_dev; 2478 + __entry->agno = agno; 2479 + __entry->error = error; 2480 + __entry->caller_ip = caller_ip; 2481 + ), 2482 + TP_printk("dev %d:%d agno %u error %d caller %ps", 2483 + MAJOR(__entry->dev), MINOR(__entry->dev), 2484 + __entry->agno, 2485 + __entry->error, 2486 + (char *)__entry->caller_ip) 2487 + ); 2488 + 2489 + #define DEFINE_AG_ERROR_EVENT(name) \ 2490 + DEFINE_EVENT(xfs_ag_error_class, name, \ 2491 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \ 2492 + unsigned long caller_ip), \ 2493 + TP_ARGS(mp, agno, error, caller_ip)) 2494 + 2495 + DEFINE_RMAP_EVENT(xfs_rmap_unmap); 2496 + DEFINE_RMAP_EVENT(xfs_rmap_unmap_done); 2497 + DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error); 2498 + DEFINE_RMAP_EVENT(xfs_rmap_map); 2499 + DEFINE_RMAP_EVENT(xfs_rmap_map_done); 2500 + DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error); 2501 + DEFINE_RMAP_EVENT(xfs_rmap_convert); 2502 + DEFINE_RMAP_EVENT(xfs_rmap_convert_done); 2503 + DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error); 2504 + DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state); 2505 + 2506 + DECLARE_EVENT_CLASS(xfs_rmapbt_class, 2507 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2508 + xfs_agblock_t agbno, xfs_extlen_t len, 2509 + uint64_t owner, uint64_t offset, unsigned int flags), 2510 + TP_ARGS(mp, agno, agbno, len, owner, offset, flags), 2511 + TP_STRUCT__entry( 2512 + __field(dev_t, dev) 2513 + __field(xfs_agnumber_t, agno) 2514 + __field(xfs_agblock_t, agbno) 2515 + __field(xfs_extlen_t, len) 2516 + __field(uint64_t, owner) 2517 + __field(uint64_t, offset) 2518 + __field(unsigned int, flags) 2519 + ), 2520 + TP_fast_assign( 2521 + __entry->dev = mp->m_super->s_dev; 2522 + __entry->agno = agno; 2523 + __entry->agbno = agbno; 2524 + __entry->len = len; 2525 + __entry->owner = owner; 2526 + __entry->offset = offset; 2527 + __entry->flags = flags; 2528 + ), 2529 + TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x", 2530 + MAJOR(__entry->dev), MINOR(__entry->dev), 2531 + __entry->agno, 2532 + __entry->agbno, 2533 + __entry->len, 2534 + __entry->owner, 2535 + __entry->offset, 2536 + __entry->flags) 2537 + ); 2538 + #define DEFINE_RMAPBT_EVENT(name) \ 2539 + DEFINE_EVENT(xfs_rmapbt_class, name, \ 2540 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2541 + xfs_agblock_t agbno, xfs_extlen_t len, \ 2542 + uint64_t owner, uint64_t offset, unsigned int flags), \ 2543 + TP_ARGS(mp, agno, agbno, len, owner, offset, flags)) 2544 + 2545 + #define DEFINE_RMAP_DEFERRED_EVENT DEFINE_MAP_EXTENT_DEFERRED_EVENT 2546 + DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer); 2547 + DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred); 2548 + 2549 + DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block); 2550 + DEFINE_BUSY_EVENT(xfs_rmapbt_free_block); 2551 + DEFINE_RMAPBT_EVENT(xfs_rmap_update); 2552 + DEFINE_RMAPBT_EVENT(xfs_rmap_insert); 2553 + DEFINE_RMAPBT_EVENT(xfs_rmap_delete); 2554 + DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error); 2555 + DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error); 2556 + DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error); 2557 + DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result); 2558 + DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); 2559 + DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); 2188 2560 2189 2561 #endif /* _TRACE_XFS_H */ 2190 2562

+19 -7

fs/xfs/xfs_trans.h

··· 33 33 struct xfs_trans_res; 34 34 struct xfs_dquot_acct; 35 35 struct xfs_busy_extent; 36 + struct xfs_rud_log_item; 37 + struct xfs_rui_log_item; 38 + struct xfs_btree_cur; 36 39 37 40 typedef struct xfs_log_item { 38 41 struct list_head li_ail; /* AIL pointers */ ··· 213 210 void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); 214 211 void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); 215 212 void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 216 - struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); 217 - void xfs_trans_log_efi_extent(xfs_trans_t *, 218 - struct xfs_efi_log_item *, 219 - xfs_fsblock_t, 220 - xfs_extlen_t); 221 - struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *, 213 + 214 + void xfs_extent_free_init_defer_op(void); 215 + struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, 222 216 struct xfs_efi_log_item *, 223 217 uint); 224 218 int xfs_trans_free_extent(struct xfs_trans *, 225 219 struct xfs_efd_log_item *, xfs_fsblock_t, 226 - xfs_extlen_t); 220 + xfs_extlen_t, struct xfs_owner_info *); 227 221 int xfs_trans_commit(struct xfs_trans *); 228 222 int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); 229 223 int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); ··· 235 235 236 236 extern kmem_zone_t *xfs_trans_zone; 237 237 extern kmem_zone_t *xfs_log_item_desc_zone; 238 + 239 + /* rmap updates */ 240 + enum xfs_rmap_intent_type; 241 + 242 + void xfs_rmap_update_init_defer_op(void); 243 + struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, 244 + struct xfs_rui_log_item *ruip); 245 + int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, 246 + struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, 247 + __uint64_t owner, int whichfork, xfs_fileoff_t startoff, 248 + xfs_fsblock_t startblock, xfs_filblks_t blockcount, 249 + xfs_exntst_t state, struct xfs_btree_cur **pcur); 238 250 239 251 #endif /* __XFS_TRANS_H__ */

+153 -62

fs/xfs/xfs_trans_extfree.c

··· 21 21 #include "xfs_format.h" 22 22 #include "xfs_log_format.h" 23 23 #include "xfs_trans_resv.h" 24 + #include "xfs_bit.h" 24 25 #include "xfs_mount.h" 26 + #include "xfs_defer.h" 25 27 #include "xfs_trans.h" 26 28 #include "xfs_trans_priv.h" 27 29 #include "xfs_extfree_item.h" 28 30 #include "xfs_alloc.h" 29 - 30 - /* 31 - * This routine is called to allocate an "extent free intention" 32 - * log item that will hold nextents worth of extents. The 33 - * caller must use all nextents extents, because we are not 34 - * flexible about this at all. 35 - */ 36 - xfs_efi_log_item_t * 37 - xfs_trans_get_efi(xfs_trans_t *tp, 38 - uint nextents) 39 - { 40 - xfs_efi_log_item_t *efip; 41 - 42 - ASSERT(tp != NULL); 43 - ASSERT(nextents > 0); 44 - 45 - efip = xfs_efi_init(tp->t_mountp, nextents); 46 - ASSERT(efip != NULL); 47 - 48 - /* 49 - * Get a log_item_desc to point at the new item. 50 - */ 51 - xfs_trans_add_item(tp, &efip->efi_item); 52 - return efip; 53 - } 54 - 55 - /* 56 - * This routine is called to indicate that the described 57 - * extent is to be logged as needing to be freed. It should 58 - * be called once for each extent to be freed. 59 - */ 60 - void 61 - xfs_trans_log_efi_extent(xfs_trans_t *tp, 62 - xfs_efi_log_item_t *efip, 63 - xfs_fsblock_t start_block, 64 - xfs_extlen_t ext_len) 65 - { 66 - uint next_extent; 67 - xfs_extent_t *extp; 68 - 69 - tp->t_flags |= XFS_TRANS_DIRTY; 70 - efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; 71 - 72 - /* 73 - * atomic_inc_return gives us the value after the increment; 74 - * we want to use it as an array index so we need to subtract 1 from 75 - * it. 76 - */ 77 - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 78 - ASSERT(next_extent < efip->efi_format.efi_nextents); 79 - extp = &(efip->efi_format.efi_extents[next_extent]); 80 - extp->ext_start = start_block; 81 - extp->ext_len = ext_len; 82 - } 83 - 31 + #include "xfs_bmap.h" 32 + #include "xfs_trace.h" 84 33 85 34 /* 86 35 * This routine is called to allocate an "extent free done" ··· 37 88 * caller must use all nextents extents, because we are not 38 89 * flexible about this at all. 39 90 */ 40 - xfs_efd_log_item_t * 41 - xfs_trans_get_efd(xfs_trans_t *tp, 42 - xfs_efi_log_item_t *efip, 43 - uint nextents) 91 + struct xfs_efd_log_item * 92 + xfs_trans_get_efd(struct xfs_trans *tp, 93 + struct xfs_efi_log_item *efip, 94 + uint nextents) 44 95 { 45 - xfs_efd_log_item_t *efdp; 96 + struct xfs_efd_log_item *efdp; 46 97 47 98 ASSERT(tp != NULL); 48 99 ASSERT(nextents > 0); ··· 67 118 struct xfs_trans *tp, 68 119 struct xfs_efd_log_item *efdp, 69 120 xfs_fsblock_t start_block, 70 - xfs_extlen_t ext_len) 121 + xfs_extlen_t ext_len, 122 + struct xfs_owner_info *oinfo) 71 123 { 124 + struct xfs_mount *mp = tp->t_mountp; 72 125 uint next_extent; 126 + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); 127 + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block); 73 128 struct xfs_extent *extp; 74 129 int error; 75 130 76 - error = xfs_free_extent(tp, start_block, ext_len); 131 + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 132 + 133 + error = xfs_free_extent(tp, start_block, ext_len, oinfo); 77 134 78 135 /* 79 136 * Mark the transaction dirty, even on error. This ensures the ··· 99 144 efdp->efd_next_extent++; 100 145 101 146 return error; 147 + } 148 + 149 + /* Sort bmap items by AG. */ 150 + static int 151 + xfs_extent_free_diff_items( 152 + void *priv, 153 + struct list_head *a, 154 + struct list_head *b) 155 + { 156 + struct xfs_mount *mp = priv; 157 + struct xfs_extent_free_item *ra; 158 + struct xfs_extent_free_item *rb; 159 + 160 + ra = container_of(a, struct xfs_extent_free_item, xefi_list); 161 + rb = container_of(b, struct xfs_extent_free_item, xefi_list); 162 + return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - 163 + XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); 164 + } 165 + 166 + /* Get an EFI. */ 167 + STATIC void * 168 + xfs_extent_free_create_intent( 169 + struct xfs_trans *tp, 170 + unsigned int count) 171 + { 172 + struct xfs_efi_log_item *efip; 173 + 174 + ASSERT(tp != NULL); 175 + ASSERT(count > 0); 176 + 177 + efip = xfs_efi_init(tp->t_mountp, count); 178 + ASSERT(efip != NULL); 179 + 180 + /* 181 + * Get a log_item_desc to point at the new item. 182 + */ 183 + xfs_trans_add_item(tp, &efip->efi_item); 184 + return efip; 185 + } 186 + 187 + /* Log a free extent to the intent item. */ 188 + STATIC void 189 + xfs_extent_free_log_item( 190 + struct xfs_trans *tp, 191 + void *intent, 192 + struct list_head *item) 193 + { 194 + struct xfs_efi_log_item *efip = intent; 195 + struct xfs_extent_free_item *free; 196 + uint next_extent; 197 + struct xfs_extent *extp; 198 + 199 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 200 + 201 + tp->t_flags |= XFS_TRANS_DIRTY; 202 + efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY; 203 + 204 + /* 205 + * atomic_inc_return gives us the value after the increment; 206 + * we want to use it as an array index so we need to subtract 1 from 207 + * it. 208 + */ 209 + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; 210 + ASSERT(next_extent < efip->efi_format.efi_nextents); 211 + extp = &efip->efi_format.efi_extents[next_extent]; 212 + extp->ext_start = free->xefi_startblock; 213 + extp->ext_len = free->xefi_blockcount; 214 + } 215 + 216 + /* Get an EFD so we can process all the free extents. */ 217 + STATIC void * 218 + xfs_extent_free_create_done( 219 + struct xfs_trans *tp, 220 + void *intent, 221 + unsigned int count) 222 + { 223 + return xfs_trans_get_efd(tp, intent, count); 224 + } 225 + 226 + /* Process a free extent. */ 227 + STATIC int 228 + xfs_extent_free_finish_item( 229 + struct xfs_trans *tp, 230 + struct xfs_defer_ops *dop, 231 + struct list_head *item, 232 + void *done_item, 233 + void **state) 234 + { 235 + struct xfs_extent_free_item *free; 236 + int error; 237 + 238 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 239 + error = xfs_trans_free_extent(tp, done_item, 240 + free->xefi_startblock, 241 + free->xefi_blockcount, 242 + &free->xefi_oinfo); 243 + kmem_free(free); 244 + return error; 245 + } 246 + 247 + /* Abort all pending EFIs. */ 248 + STATIC void 249 + xfs_extent_free_abort_intent( 250 + void *intent) 251 + { 252 + xfs_efi_release(intent); 253 + } 254 + 255 + /* Cancel a free extent. */ 256 + STATIC void 257 + xfs_extent_free_cancel_item( 258 + struct list_head *item) 259 + { 260 + struct xfs_extent_free_item *free; 261 + 262 + free = container_of(item, struct xfs_extent_free_item, xefi_list); 263 + kmem_free(free); 264 + } 265 + 266 + static const struct xfs_defer_op_type xfs_extent_free_defer_type = { 267 + .type = XFS_DEFER_OPS_TYPE_FREE, 268 + .max_items = XFS_EFI_MAX_FAST_EXTENTS, 269 + .diff_items = xfs_extent_free_diff_items, 270 + .create_intent = xfs_extent_free_create_intent, 271 + .abort_intent = xfs_extent_free_abort_intent, 272 + .log_item = xfs_extent_free_log_item, 273 + .create_done = xfs_extent_free_create_done, 274 + .finish_item = xfs_extent_free_finish_item, 275 + .cancel_item = xfs_extent_free_cancel_item, 276 + }; 277 + 278 + /* Register the deferred op type. */ 279 + void 280 + xfs_extent_free_init_defer_op(void) 281 + { 282 + xfs_defer_init_op_type(&xfs_extent_free_defer_type); 102 283 }

+271

fs/xfs/xfs_trans_rmap.c

··· 1 + /* 2 + * Copyright (C) 2016 Oracle. All Rights Reserved. 3 + * 4 + * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 2 9 + * of the License, or (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it would be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write the Free Software Foundation, 18 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 + */ 20 + #include "xfs.h" 21 + #include "xfs_fs.h" 22 + #include "xfs_shared.h" 23 + #include "xfs_format.h" 24 + #include "xfs_log_format.h" 25 + #include "xfs_trans_resv.h" 26 + #include "xfs_mount.h" 27 + #include "xfs_defer.h" 28 + #include "xfs_trans.h" 29 + #include "xfs_trans_priv.h" 30 + #include "xfs_rmap_item.h" 31 + #include "xfs_alloc.h" 32 + #include "xfs_rmap.h" 33 + 34 + /* Set the map extent flags for this reverse mapping. */ 35 + static void 36 + xfs_trans_set_rmap_flags( 37 + struct xfs_map_extent *rmap, 38 + enum xfs_rmap_intent_type type, 39 + int whichfork, 40 + xfs_exntst_t state) 41 + { 42 + rmap->me_flags = 0; 43 + if (state == XFS_EXT_UNWRITTEN) 44 + rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 45 + if (whichfork == XFS_ATTR_FORK) 46 + rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 47 + switch (type) { 48 + case XFS_RMAP_MAP: 49 + rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 50 + break; 51 + case XFS_RMAP_UNMAP: 52 + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 53 + break; 54 + case XFS_RMAP_CONVERT: 55 + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 56 + break; 57 + case XFS_RMAP_ALLOC: 58 + rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 59 + break; 60 + case XFS_RMAP_FREE: 61 + rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 62 + break; 63 + default: 64 + ASSERT(0); 65 + } 66 + } 67 + 68 + struct xfs_rud_log_item * 69 + xfs_trans_get_rud( 70 + struct xfs_trans *tp, 71 + struct xfs_rui_log_item *ruip) 72 + { 73 + struct xfs_rud_log_item *rudp; 74 + 75 + rudp = xfs_rud_init(tp->t_mountp, ruip); 76 + xfs_trans_add_item(tp, &rudp->rud_item); 77 + return rudp; 78 + } 79 + 80 + /* 81 + * Finish an rmap update and log it to the RUD. Note that the transaction is 82 + * marked dirty regardless of whether the rmap update succeeds or fails to 83 + * support the RUI/RUD lifecycle rules. 84 + */ 85 + int 86 + xfs_trans_log_finish_rmap_update( 87 + struct xfs_trans *tp, 88 + struct xfs_rud_log_item *rudp, 89 + enum xfs_rmap_intent_type type, 90 + __uint64_t owner, 91 + int whichfork, 92 + xfs_fileoff_t startoff, 93 + xfs_fsblock_t startblock, 94 + xfs_filblks_t blockcount, 95 + xfs_exntst_t state, 96 + struct xfs_btree_cur **pcur) 97 + { 98 + int error; 99 + 100 + error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 101 + startblock, blockcount, state, pcur); 102 + 103 + /* 104 + * Mark the transaction dirty, even on error. This ensures the 105 + * transaction is aborted, which: 106 + * 107 + * 1.) releases the RUI and frees the RUD 108 + * 2.) shuts down the filesystem 109 + */ 110 + tp->t_flags |= XFS_TRANS_DIRTY; 111 + rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY; 112 + 113 + return error; 114 + } 115 + 116 + /* Sort rmap intents by AG. */ 117 + static int 118 + xfs_rmap_update_diff_items( 119 + void *priv, 120 + struct list_head *a, 121 + struct list_head *b) 122 + { 123 + struct xfs_mount *mp = priv; 124 + struct xfs_rmap_intent *ra; 125 + struct xfs_rmap_intent *rb; 126 + 127 + ra = container_of(a, struct xfs_rmap_intent, ri_list); 128 + rb = container_of(b, struct xfs_rmap_intent, ri_list); 129 + return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 130 + XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 131 + } 132 + 133 + /* Get an RUI. */ 134 + STATIC void * 135 + xfs_rmap_update_create_intent( 136 + struct xfs_trans *tp, 137 + unsigned int count) 138 + { 139 + struct xfs_rui_log_item *ruip; 140 + 141 + ASSERT(tp != NULL); 142 + ASSERT(count > 0); 143 + 144 + ruip = xfs_rui_init(tp->t_mountp, count); 145 + ASSERT(ruip != NULL); 146 + 147 + /* 148 + * Get a log_item_desc to point at the new item. 149 + */ 150 + xfs_trans_add_item(tp, &ruip->rui_item); 151 + return ruip; 152 + } 153 + 154 + /* Log rmap updates in the intent item. */ 155 + STATIC void 156 + xfs_rmap_update_log_item( 157 + struct xfs_trans *tp, 158 + void *intent, 159 + struct list_head *item) 160 + { 161 + struct xfs_rui_log_item *ruip = intent; 162 + struct xfs_rmap_intent *rmap; 163 + uint next_extent; 164 + struct xfs_map_extent *map; 165 + 166 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 167 + 168 + tp->t_flags |= XFS_TRANS_DIRTY; 169 + ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY; 170 + 171 + /* 172 + * atomic_inc_return gives us the value after the increment; 173 + * we want to use it as an array index so we need to subtract 1 from 174 + * it. 175 + */ 176 + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 177 + ASSERT(next_extent < ruip->rui_format.rui_nextents); 178 + map = &ruip->rui_format.rui_extents[next_extent]; 179 + map->me_owner = rmap->ri_owner; 180 + map->me_startblock = rmap->ri_bmap.br_startblock; 181 + map->me_startoff = rmap->ri_bmap.br_startoff; 182 + map->me_len = rmap->ri_bmap.br_blockcount; 183 + xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 184 + rmap->ri_bmap.br_state); 185 + } 186 + 187 + /* Get an RUD so we can process all the deferred rmap updates. */ 188 + STATIC void * 189 + xfs_rmap_update_create_done( 190 + struct xfs_trans *tp, 191 + void *intent, 192 + unsigned int count) 193 + { 194 + return xfs_trans_get_rud(tp, intent); 195 + } 196 + 197 + /* Process a deferred rmap update. */ 198 + STATIC int 199 + xfs_rmap_update_finish_item( 200 + struct xfs_trans *tp, 201 + struct xfs_defer_ops *dop, 202 + struct list_head *item, 203 + void *done_item, 204 + void **state) 205 + { 206 + struct xfs_rmap_intent *rmap; 207 + int error; 208 + 209 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 210 + error = xfs_trans_log_finish_rmap_update(tp, done_item, 211 + rmap->ri_type, 212 + rmap->ri_owner, rmap->ri_whichfork, 213 + rmap->ri_bmap.br_startoff, 214 + rmap->ri_bmap.br_startblock, 215 + rmap->ri_bmap.br_blockcount, 216 + rmap->ri_bmap.br_state, 217 + (struct xfs_btree_cur **)state); 218 + kmem_free(rmap); 219 + return error; 220 + } 221 + 222 + /* Clean up after processing deferred rmaps. */ 223 + STATIC void 224 + xfs_rmap_update_finish_cleanup( 225 + struct xfs_trans *tp, 226 + void *state, 227 + int error) 228 + { 229 + struct xfs_btree_cur *rcur = state; 230 + 231 + xfs_rmap_finish_one_cleanup(tp, rcur, error); 232 + } 233 + 234 + /* Abort all pending RUIs. */ 235 + STATIC void 236 + xfs_rmap_update_abort_intent( 237 + void *intent) 238 + { 239 + xfs_rui_release(intent); 240 + } 241 + 242 + /* Cancel a deferred rmap update. */ 243 + STATIC void 244 + xfs_rmap_update_cancel_item( 245 + struct list_head *item) 246 + { 247 + struct xfs_rmap_intent *rmap; 248 + 249 + rmap = container_of(item, struct xfs_rmap_intent, ri_list); 250 + kmem_free(rmap); 251 + } 252 + 253 + static const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 254 + .type = XFS_DEFER_OPS_TYPE_RMAP, 255 + .max_items = XFS_RUI_MAX_FAST_EXTENTS, 256 + .diff_items = xfs_rmap_update_diff_items, 257 + .create_intent = xfs_rmap_update_create_intent, 258 + .abort_intent = xfs_rmap_update_abort_intent, 259 + .log_item = xfs_rmap_update_log_item, 260 + .create_done = xfs_rmap_update_create_done, 261 + .finish_item = xfs_rmap_update_finish_item, 262 + .finish_cleanup = xfs_rmap_update_finish_cleanup, 263 + .cancel_item = xfs_rmap_update_cancel_item, 264 + }; 265 + 266 + /* Register the deferred op type. */ 267 + void 268 + xfs_rmap_update_init_defer_op(void) 269 + { 270 + xfs_defer_init_op_type(&xfs_rmap_update_defer_type); 271 + }

Configure Feed

Configure Feed