Merge tag 'xfs-6.11-merge-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

+12 -14

Documentation/ABI/testing/sysfs-fs-xfs

··· 1 1 What: /sys/fs/xfs/<disk>/log/log_head_lsn 2 2 Date: July 2014 3 3 KernelVersion: 3.17 4 - Contact: xfs@oss.sgi.com 4 + Contact: linux-xfs@vger.kernel.org 5 5 Description: 6 6 The log sequence number (LSN) of the current head of the 7 7 log. The LSN is exported in "cycle:basic block" format. ··· 10 10 What: /sys/fs/xfs/<disk>/log/log_tail_lsn 11 11 Date: July 2014 12 12 KernelVersion: 3.17 13 - Contact: xfs@oss.sgi.com 13 + Contact: linux-xfs@vger.kernel.org 14 14 Description: 15 15 The log sequence number (LSN) of the current tail of the 16 16 log. The LSN is exported in "cycle:basic block" format. 17 17 18 - What: /sys/fs/xfs/<disk>/log/reserve_grant_head 19 - Date: July 2014 20 - KernelVersion: 3.17 21 - Contact: xfs@oss.sgi.com 18 + What: /sys/fs/xfs/<disk>/log/reserve_grant_head_bytes 19 + Date: June 2024 20 + KernelVersion: 6.11 21 + Contact: linux-xfs@vger.kernel.org 22 22 Description: 23 23 The current state of the log reserve grant head. It 24 24 represents the total log reservation of all currently 25 - outstanding transactions. The grant head is exported in 26 - "cycle:bytes" format. 25 + outstanding transactions in bytes. 27 26 Users: xfstests 28 27 29 - What: /sys/fs/xfs/<disk>/log/write_grant_head 30 - Date: July 2014 31 - KernelVersion: 3.17 32 - Contact: xfs@oss.sgi.com 28 + What: /sys/fs/xfs/<disk>/log/write_grant_head_bytes 29 + Date: June 2024 30 + KernelVersion: 6.11 31 + Contact: linux-xfs@vger.kernel.org 33 32 Description: 34 33 The current state of the log write grant head. It 35 34 represents the total log reservation of all currently 36 35 outstanding transactions, including regrants due to 37 - rolling transactions. The grant head is exported in 38 - "cycle:bytes" format. 36 + rolling transactions in bytes. 39 37 Users: xfstests

+12

fs/xfs/Kconfig

··· 217 217 218 218 Say N unless you are an XFS developer, or you play one on TV. 219 219 220 + config XFS_DEBUG_EXPENSIVE 221 + bool "XFS expensive debugging checks" 222 + depends on XFS_FS && XFS_DEBUG 223 + help 224 + Say Y here to get an XFS build with expensive debugging checks 225 + enabled. These checks may affect performance significantly. 226 + 227 + Note that the resulting code will be HUGER and SLOWER, and probably 228 + not useful unless you are debugging a particular problem. 229 + 230 + Say N unless you are an XFS developer, or you play one on TV. 231 + 220 232 config XFS_ASSERT_FATAL 221 233 bool "XFS fatal asserts" 222 234 default y

+1

fs/xfs/Makefile

··· 40 40 xfs_iext_tree.o \ 41 41 xfs_inode_fork.o \ 42 42 xfs_inode_buf.o \ 43 + xfs_inode_util.o \ 43 44 xfs_log_rlimit.o \ 44 45 xfs_ag_resv.o \ 45 46 xfs_parent.o \

+1 -1

fs/xfs/libxfs/xfs_ag.c

··· 1008 1008 goto resv_err; 1009 1009 1010 1010 err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, 1011 - XFS_AG_RESV_NONE, true); 1011 + XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD); 1012 1012 if (err2) 1013 1013 goto resv_err; 1014 1014

-19

fs/xfs/libxfs/xfs_ag_resv.h

··· 33 33 } 34 34 } 35 35 36 - /* 37 - * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from 38 - * the AGFL, they are allocated one at a time and the reservation updates don't 39 - * require a transaction. 40 - */ 41 - static inline void 42 - xfs_ag_resv_rmapbt_alloc( 43 - struct xfs_mount *mp, 44 - xfs_agnumber_t agno) 45 - { 46 - struct xfs_alloc_arg args = { NULL }; 47 - struct xfs_perag *pag; 48 - 49 - args.len = 1; 50 - pag = xfs_perag_get(mp, agno); 51 - xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args); 52 - xfs_perag_put(pag); 53 - } 54 - 55 36 #endif /* __XFS_AG_RESV_H__ */

+139 -96

fs/xfs/libxfs/xfs_alloc.c

··· 27 27 #include "xfs_ag_resv.h" 28 28 #include "xfs_bmap.h" 29 29 #include "xfs_health.h" 30 + #include "xfs_extfree_item.h" 30 31 31 32 struct kmem_cache *xfs_extfree_item_cache; 32 33 ··· 467 466 } 468 467 469 468 /* 469 + * Determine if the cursor points to the block that contains the right-most 470 + * block of records in the by-count btree. This block contains the largest 471 + * contiguous free extent in the AG, so if we modify a record in this block we 472 + * need to call xfs_alloc_fixup_longest() once the modifications are done to 473 + * ensure the agf->agf_longest field is kept up to date with the longest free 474 + * extent tracked by the by-count btree. 475 + */ 476 + static bool 477 + xfs_alloc_cursor_at_lastrec( 478 + struct xfs_btree_cur *cnt_cur) 479 + { 480 + struct xfs_btree_block *block; 481 + union xfs_btree_ptr ptr; 482 + struct xfs_buf *bp; 483 + 484 + block = xfs_btree_get_block(cnt_cur, 0, &bp); 485 + 486 + xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB); 487 + return xfs_btree_ptr_is_null(cnt_cur, &ptr); 488 + } 489 + 490 + /* 491 + * Find the rightmost record of the cntbt, and return the longest free space 492 + * recorded in it. Simply set both the block number and the length to their 493 + * maximum values before searching. 494 + */ 495 + static int 496 + xfs_cntbt_longest( 497 + struct xfs_btree_cur *cnt_cur, 498 + xfs_extlen_t *longest) 499 + { 500 + struct xfs_alloc_rec_incore irec; 501 + union xfs_btree_rec *rec; 502 + int stat = 0; 503 + int error; 504 + 505 + memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec)); 506 + error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat); 507 + if (error) 508 + return error; 509 + if (!stat) { 510 + /* totally empty tree */ 511 + *longest = 0; 512 + return 0; 513 + } 514 + 515 + error = xfs_btree_get_rec(cnt_cur, &rec, &stat); 516 + if (error) 517 + return error; 518 + if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) { 519 + xfs_btree_mark_sick(cnt_cur); 520 + return -EFSCORRUPTED; 521 + } 522 + 523 + xfs_alloc_btrec_to_irec(rec, &irec); 524 + *longest = irec.ar_blockcount; 525 + return 0; 526 + } 527 + 528 + /* 529 + * Update the longest contiguous free extent in the AG from the by-count cursor 530 + * that is passed to us. This should be done at the end of any allocation or 531 + * freeing operation that touches the longest extent in the btree. 532 + * 533 + * Needing to update the longest extent can be determined by calling 534 + * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record 535 + * modification but before the modification begins. 536 + */ 537 + static int 538 + xfs_alloc_fixup_longest( 539 + struct xfs_btree_cur *cnt_cur) 540 + { 541 + struct xfs_perag *pag = cnt_cur->bc_ag.pag; 542 + struct xfs_buf *bp = cnt_cur->bc_ag.agbp; 543 + struct xfs_agf *agf = bp->b_addr; 544 + xfs_extlen_t longest = 0; 545 + int error; 546 + 547 + /* Lookup last rec in order to update AGF. */ 548 + error = xfs_cntbt_longest(cnt_cur, &longest); 549 + if (error) 550 + return error; 551 + 552 + pag->pagf_longest = longest; 553 + agf->agf_longest = cpu_to_be32(pag->pagf_longest); 554 + xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST); 555 + 556 + return 0; 557 + } 558 + 559 + /* 470 560 * Update the two btrees, logically removing from freespace the extent 471 561 * starting at rbno, rlen blocks. The extent is contained within the 472 562 * actual (current) free extent fbno for flen blocks. ··· 581 489 xfs_extlen_t nflen1=0; /* first new free length */ 582 490 xfs_extlen_t nflen2=0; /* second new free length */ 583 491 struct xfs_mount *mp; 492 + bool fixup_longest = false; 584 493 585 494 mp = cnt_cur->bc_mp; 586 495 ··· 670 577 nfbno2 = rbno + rlen; 671 578 nflen2 = (fbno + flen) - nfbno2; 672 579 } 580 + 581 + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) 582 + fixup_longest = true; 583 + 673 584 /* 674 585 * Delete the entry from the by-size btree. 675 586 */ ··· 751 654 return -EFSCORRUPTED; 752 655 } 753 656 } 657 + 658 + if (fixup_longest) 659 + return xfs_alloc_fixup_longest(cnt_cur); 660 + 754 661 return 0; 755 662 } 756 663 ··· 2033 1932 /* 2034 1933 * Free the extent starting at agno/bno for length. 2035 1934 */ 2036 - STATIC int 1935 + int 2037 1936 xfs_free_ag_extent( 2038 1937 struct xfs_trans *tp, 2039 1938 struct xfs_buf *agbp, ··· 2057 1956 int i; 2058 1957 int error; 2059 1958 struct xfs_perag *pag = agbp->b_pag; 1959 + bool fixup_longest = false; 2060 1960 2061 1961 bno_cur = cnt_cur = NULL; 2062 1962 mp = tp->t_mountp; ··· 2321 2219 } 2322 2220 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 2323 2221 bno_cur = NULL; 2222 + 2324 2223 /* 2325 2224 * In all cases we need to insert the new freespace in the by-size tree. 2225 + * 2226 + * If this new freespace is being inserted in the block that contains 2227 + * the largest free space in the btree, make sure we also fix up the 2228 + * agf->agf-longest tracker field. 2326 2229 */ 2327 2230 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 2328 2231 goto error0; ··· 2336 2229 error = -EFSCORRUPTED; 2337 2230 goto error0; 2338 2231 } 2232 + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) 2233 + fixup_longest = true; 2339 2234 if ((error = xfs_btree_insert(cnt_cur, &i))) 2340 2235 goto error0; 2341 2236 if (XFS_IS_CORRUPT(mp, i != 1)) { ··· 2345 2236 error = -EFSCORRUPTED; 2346 2237 goto error0; 2347 2238 } 2239 + if (fixup_longest) { 2240 + error = xfs_alloc_fixup_longest(cnt_cur); 2241 + if (error) 2242 + goto error0; 2243 + } 2244 + 2348 2245 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 2349 2246 cnt_cur = NULL; 2350 2247 ··· 2537 2422 return true; 2538 2423 } 2539 2424 2540 - int 2541 - xfs_free_agfl_block( 2542 - struct xfs_trans *tp, 2543 - xfs_agnumber_t agno, 2544 - xfs_agblock_t agbno, 2545 - struct xfs_buf *agbp, 2546 - struct xfs_owner_info *oinfo) 2547 - { 2548 - int error; 2549 - struct xfs_buf *bp; 2550 - 2551 - error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo, 2552 - XFS_AG_RESV_AGFL); 2553 - if (error) 2554 - return error; 2555 - 2556 - error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp, 2557 - XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno), 2558 - tp->t_mountp->m_bsize, 0, &bp); 2559 - if (error) 2560 - return error; 2561 - xfs_trans_binval(tp, bp); 2562 - 2563 - return 0; 2564 - } 2565 - 2566 2425 /* 2567 2426 * Check the agfl fields of the agf for inconsistency or corruption. 2568 2427 * ··· 2625 2536 } 2626 2537 2627 2538 /* 2628 - * Defer an AGFL block free. This is effectively equivalent to 2629 - * xfs_free_extent_later() with some special handling particular to AGFL blocks. 2630 - * 2631 - * Deferring AGFL frees helps prevent log reservation overruns due to too many 2632 - * allocation operations in a transaction. AGFL frees are prone to this problem 2633 - * because for one they are always freed one at a time. Further, an immediate 2634 - * AGFL block free can cause a btree join and require another block free before 2635 - * the real allocation can proceed. Deferring the free disconnects freeing up 2636 - * the AGFL slot from freeing the block. 2637 - */ 2638 - static int 2639 - xfs_defer_agfl_block( 2640 - struct xfs_trans *tp, 2641 - xfs_agnumber_t agno, 2642 - xfs_agblock_t agbno, 2643 - struct xfs_owner_info *oinfo) 2644 - { 2645 - struct xfs_mount *mp = tp->t_mountp; 2646 - struct xfs_extent_free_item *xefi; 2647 - xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); 2648 - 2649 - ASSERT(xfs_extfree_item_cache != NULL); 2650 - ASSERT(oinfo != NULL); 2651 - 2652 - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) 2653 - return -EFSCORRUPTED; 2654 - 2655 - xefi = kmem_cache_zalloc(xfs_extfree_item_cache, 2656 - GFP_KERNEL | __GFP_NOFAIL); 2657 - xefi->xefi_startblock = fsbno; 2658 - xefi->xefi_blockcount = 1; 2659 - xefi->xefi_owner = oinfo->oi_owner; 2660 - xefi->xefi_agresv = XFS_AG_RESV_AGFL; 2661 - 2662 - trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); 2663 - 2664 - xfs_extent_free_get_group(mp, xefi); 2665 - xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type); 2666 - return 0; 2667 - } 2668 - 2669 - /* 2670 2539 * Add the extent to the list of extents to be free at transaction end. 2671 2540 * The list is maintained sorted (by block number). 2672 2541 */ ··· 2635 2588 xfs_filblks_t len, 2636 2589 const struct xfs_owner_info *oinfo, 2637 2590 enum xfs_ag_resv_type type, 2638 - bool skip_discard, 2591 + unsigned int free_flags, 2639 2592 struct xfs_defer_pending **dfpp) 2640 2593 { 2641 2594 struct xfs_extent_free_item *xefi; 2642 2595 struct xfs_mount *mp = tp->t_mountp; 2643 - #ifdef DEBUG 2644 - xfs_agnumber_t agno; 2645 - xfs_agblock_t agbno; 2646 2596 2647 - ASSERT(bno != NULLFSBLOCK); 2648 - ASSERT(len > 0); 2649 2597 ASSERT(len <= XFS_MAX_BMBT_EXTLEN); 2650 2598 ASSERT(!isnullstartblock(bno)); 2651 - agno = XFS_FSB_TO_AGNO(mp, bno); 2652 - agbno = XFS_FSB_TO_AGBNO(mp, bno); 2653 - ASSERT(agno < mp->m_sb.sb_agcount); 2654 - ASSERT(agbno < mp->m_sb.sb_agblocks); 2655 - ASSERT(len < mp->m_sb.sb_agblocks); 2656 - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 2657 - #endif 2658 - ASSERT(xfs_extfree_item_cache != NULL); 2659 - ASSERT(type != XFS_AG_RESV_AGFL); 2599 + ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); 2660 2600 2661 2601 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) 2662 2602 return -EFSCORRUPTED; ··· 2653 2619 xefi->xefi_startblock = bno; 2654 2620 xefi->xefi_blockcount = (xfs_extlen_t)len; 2655 2621 xefi->xefi_agresv = type; 2656 - if (skip_discard) 2622 + if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD) 2657 2623 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; 2658 2624 if (oinfo) { 2659 2625 ASSERT(oinfo->oi_offset == 0); ··· 2666 2632 } else { 2667 2633 xefi->xefi_owner = XFS_RMAP_OWN_NULL; 2668 2634 } 2669 - trace_xfs_bmap_free_defer(mp, 2670 - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, 2671 - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); 2672 2635 2673 - xfs_extent_free_get_group(mp, xefi); 2674 - *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type); 2636 + xfs_extent_free_defer_add(tp, xefi, dfpp); 2675 2637 return 0; 2676 2638 } 2677 2639 ··· 2678 2648 xfs_filblks_t len, 2679 2649 const struct xfs_owner_info *oinfo, 2680 2650 enum xfs_ag_resv_type type, 2681 - bool skip_discard) 2651 + unsigned int free_flags) 2682 2652 { 2683 2653 struct xfs_defer_pending *dontcare = NULL; 2684 2654 2685 - return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard, 2655 + return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags, 2686 2656 &dontcare); 2687 2657 } 2688 2658 ··· 2707 2677 int 2708 2678 xfs_alloc_schedule_autoreap( 2709 2679 const struct xfs_alloc_arg *args, 2710 - bool skip_discard, 2680 + unsigned int free_flags, 2711 2681 struct xfs_alloc_autoreap *aarp) 2712 2682 { 2713 2683 int error; 2714 2684 2715 2685 error = xfs_defer_extent_free(args->tp, args->fsbno, args->len, 2716 - &args->oinfo, args->resv, skip_discard, &aarp->dfp); 2686 + &args->oinfo, args->resv, free_flags, &aarp->dfp); 2717 2687 if (error) 2718 2688 return error; 2719 2689 ··· 2925 2895 if (error) 2926 2896 goto out_agbp_relse; 2927 2897 2928 - /* defer agfl frees */ 2929 - error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); 2898 + /* 2899 + * Defer the AGFL block free. 2900 + * 2901 + * This helps to prevent log reservation overruns due to too 2902 + * many allocation operations in a transaction. AGFL frees are 2903 + * prone to this problem because for one they are always freed 2904 + * one at a time. Further, an immediate AGFL block free can 2905 + * cause a btree join and require another block free before the 2906 + * real allocation can proceed. 2907 + * Deferring the free disconnects freeing up the AGFL slot from 2908 + * freeing the block. 2909 + */ 2910 + error = xfs_free_extent_later(tp, 2911 + XFS_AGB_TO_FSB(mp, args->agno, bno), 1, 2912 + &targs.oinfo, XFS_AG_RESV_AGFL, 0); 2930 2913 if (error) 2931 2914 goto out_agbp_relse; 2932 2915 }

+11 -7

fs/xfs/libxfs/xfs_alloc.h

··· 80 80 int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp, 81 81 struct xfs_buf *agfbp, struct xfs_buf *agflbp, 82 82 xfs_agblock_t bno, int btreeblk); 83 + int xfs_free_ag_extent(struct xfs_trans *tp, struct xfs_buf *agbp, 84 + xfs_agnumber_t agno, xfs_agblock_t bno, 85 + xfs_extlen_t len, const struct xfs_owner_info *oinfo, 86 + enum xfs_ag_resv_type type); 83 87 84 88 /* 85 89 * Compute and fill in value of m_alloc_maxlevels. ··· 198 194 struct xfs_buf **agfbpp); 199 195 int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp, 200 196 struct xfs_buf **bpp); 201 - int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, 202 - struct xfs_buf *, struct xfs_owner_info *); 203 197 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags); 204 198 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag, 205 199 struct xfs_buf **agbp); ··· 235 233 236 234 int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, 237 235 xfs_filblks_t len, const struct xfs_owner_info *oinfo, 238 - enum xfs_ag_resv_type type, bool skip_discard); 236 + enum xfs_ag_resv_type type, unsigned int free_flags); 237 + 238 + /* Don't issue a discard for the blocks freed. */ 239 + #define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0) 240 + 241 + #define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD) 239 242 240 243 /* 241 244 * List of extents to be free "later". ··· 256 249 enum xfs_ag_resv_type xefi_agresv; 257 250 }; 258 251 259 - void xfs_extent_free_get_group(struct xfs_mount *mp, 260 - struct xfs_extent_free_item *xefi); 261 - 262 252 #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ 263 253 #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ 264 254 #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ ··· 266 262 }; 267 263 268 264 int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args, 269 - bool skip_discard, struct xfs_alloc_autoreap *aarp); 265 + unsigned int free_flags, struct xfs_alloc_autoreap *aarp); 270 266 void xfs_alloc_cancel_autoreap(struct xfs_trans *tp, 271 267 struct xfs_alloc_autoreap *aarp); 272 268 void xfs_alloc_commit_autoreap(struct xfs_trans *tp,

-64

fs/xfs/libxfs/xfs_alloc_btree.c

··· 115 115 return 0; 116 116 } 117 117 118 - /* 119 - * Update the longest extent in the AGF 120 - */ 121 - STATIC void 122 - xfs_allocbt_update_lastrec( 123 - struct xfs_btree_cur *cur, 124 - const struct xfs_btree_block *block, 125 - const union xfs_btree_rec *rec, 126 - int ptr, 127 - int reason) 128 - { 129 - struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 130 - struct xfs_perag *pag; 131 - __be32 len; 132 - int numrecs; 133 - 134 - ASSERT(!xfs_btree_is_bno(cur->bc_ops)); 135 - 136 - switch (reason) { 137 - case LASTREC_UPDATE: 138 - /* 139 - * If this is the last leaf block and it's the last record, 140 - * then update the size of the longest extent in the AG. 141 - */ 142 - if (ptr != xfs_btree_get_numrecs(block)) 143 - return; 144 - len = rec->alloc.ar_blockcount; 145 - break; 146 - case LASTREC_INSREC: 147 - if (be32_to_cpu(rec->alloc.ar_blockcount) <= 148 - be32_to_cpu(agf->agf_longest)) 149 - return; 150 - len = rec->alloc.ar_blockcount; 151 - break; 152 - case LASTREC_DELREC: 153 - numrecs = xfs_btree_get_numrecs(block); 154 - if (ptr <= numrecs) 155 - return; 156 - ASSERT(ptr == numrecs + 1); 157 - 158 - if (numrecs) { 159 - xfs_alloc_rec_t *rrp; 160 - 161 - rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); 162 - len = rrp->ar_blockcount; 163 - } else { 164 - len = 0; 165 - } 166 - 167 - break; 168 - default: 169 - ASSERT(0); 170 - return; 171 - } 172 - 173 - agf->agf_longest = len; 174 - pag = cur->bc_ag.agbp->b_pag; 175 - pag->pagf_longest = be32_to_cpu(len); 176 - xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST); 177 - } 178 - 179 118 STATIC int 180 119 xfs_allocbt_get_minrecs( 181 120 struct xfs_btree_cur *cur, ··· 432 493 .set_root = xfs_allocbt_set_root, 433 494 .alloc_block = xfs_allocbt_alloc_block, 434 495 .free_block = xfs_allocbt_free_block, 435 - .update_lastrec = xfs_allocbt_update_lastrec, 436 496 .get_minrecs = xfs_allocbt_get_minrecs, 437 497 .get_maxrecs = xfs_allocbt_get_maxrecs, 438 498 .init_key_from_rec = xfs_allocbt_init_key_from_rec, ··· 449 511 const struct xfs_btree_ops xfs_cntbt_ops = { 450 512 .name = "cnt", 451 513 .type = XFS_BTREE_TYPE_AG, 452 - .geom_flags = XFS_BTGEO_LASTREC_UPDATE, 453 514 454 515 .rec_len = sizeof(xfs_alloc_rec_t), 455 516 .key_len = sizeof(xfs_alloc_key_t), ··· 462 525 .set_root = xfs_allocbt_set_root, 463 526 .alloc_block = xfs_allocbt_alloc_block, 464 527 .free_block = xfs_allocbt_free_block, 465 - .update_lastrec = xfs_allocbt_update_lastrec, 466 528 .get_minrecs = xfs_allocbt_get_minrecs, 467 529 .get_maxrecs = xfs_allocbt_get_maxrecs, 468 530 .init_key_from_rec = xfs_allocbt_init_key_from_rec,

+51 -4

fs/xfs/libxfs/xfs_bmap.c

··· 39 39 #include "xfs_health.h" 40 40 #include "xfs_bmap_item.h" 41 41 #include "xfs_symlink_remote.h" 42 + #include "xfs_inode_util.h" 42 43 43 44 struct kmem_cache *xfs_bmap_intent_cache; 44 45 ··· 605 604 606 605 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 607 606 error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, 608 - XFS_AG_RESV_NONE, false); 607 + XFS_AG_RESV_NONE, 0); 609 608 if (error) 610 609 return error; 611 610 ··· 5381 5380 error = xfs_rtfree_blocks(tp, del->br_startblock, 5382 5381 del->br_blockcount); 5383 5382 } else { 5383 + unsigned int efi_flags = 0; 5384 + 5385 + if ((bflags & XFS_BMAPI_NODISCARD) || 5386 + del->br_state == XFS_EXT_UNWRITTEN) 5387 + efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; 5388 + 5384 5389 error = xfs_free_extent_later(tp, del->br_startblock, 5385 5390 del->br_blockcount, NULL, 5386 - XFS_AG_RESV_NONE, 5387 - ((bflags & XFS_BMAPI_NODISCARD) || 5388 - del->br_state == XFS_EXT_UNWRITTEN)); 5391 + XFS_AG_RESV_NONE, efi_flags); 5389 5392 } 5390 5393 if (error) 5391 5394 return error; ··· 6458 6453 }; 6459 6454 6460 6455 return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); 6456 + } 6457 + 6458 + /* Helper function to extract extent size hint from inode */ 6459 + xfs_extlen_t 6460 + xfs_get_extsz_hint( 6461 + struct xfs_inode *ip) 6462 + { 6463 + /* 6464 + * No point in aligning allocations if we need to COW to actually 6465 + * write to them. 6466 + */ 6467 + if (xfs_is_always_cow_inode(ip)) 6468 + return 0; 6469 + if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) 6470 + return ip->i_extsize; 6471 + if (XFS_IS_REALTIME_INODE(ip) && 6472 + ip->i_mount->m_sb.sb_rextsize > 1) 6473 + return ip->i_mount->m_sb.sb_rextsize; 6474 + return 0; 6475 + } 6476 + 6477 + /* 6478 + * Helper function to extract CoW extent size hint from inode. 6479 + * Between the extent size hint and the CoW extent size hint, we 6480 + * return the greater of the two. If the value is zero (automatic), 6481 + * use the default size. 6482 + */ 6483 + xfs_extlen_t 6484 + xfs_get_cowextsz_hint( 6485 + struct xfs_inode *ip) 6486 + { 6487 + xfs_extlen_t a, b; 6488 + 6489 + a = 0; 6490 + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 6491 + a = ip->i_cowextsize; 6492 + b = xfs_get_extsz_hint(ip); 6493 + 6494 + a = max(a, b); 6495 + if (a == 0) 6496 + return XFS_DEFAULT_COWEXTSZ_HINT; 6497 + return a; 6461 6498 }

+3

fs/xfs/libxfs/xfs_bmap.h

··· 296 296 int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn, 297 297 void *priv); 298 298 299 + xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); 300 + xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 301 + 299 302 #endif /* __XFS_BMAP_H__ */

+1 -1

fs/xfs/libxfs/xfs_bmap_btree.c

··· 282 282 283 283 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); 284 284 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, 285 - XFS_AG_RESV_NONE, false); 285 + XFS_AG_RESV_NONE, 0); 286 286 if (error) 287 287 return error; 288 288

-51

fs/xfs/libxfs/xfs_btree.c

··· 1331 1331 xfs_btree_owner(cur)); 1332 1332 } 1333 1333 1334 - /* 1335 - * Return true if ptr is the last record in the btree and 1336 - * we need to track updates to this record. The decision 1337 - * will be further refined in the update_lastrec method. 1338 - */ 1339 - STATIC int 1340 - xfs_btree_is_lastrec( 1341 - struct xfs_btree_cur *cur, 1342 - struct xfs_btree_block *block, 1343 - int level) 1344 - { 1345 - union xfs_btree_ptr ptr; 1346 - 1347 - if (level > 0) 1348 - return 0; 1349 - if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE)) 1350 - return 0; 1351 - 1352 - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1353 - if (!xfs_btree_ptr_is_null(cur, &ptr)) 1354 - return 0; 1355 - return 1; 1356 - } 1357 - 1358 1334 STATIC void 1359 1335 xfs_btree_buf_to_ptr( 1360 1336 struct xfs_btree_cur *cur, ··· 2395 2419 /* Fill in the new contents and log them. */ 2396 2420 xfs_btree_copy_recs(cur, rp, rec, 1); 2397 2421 xfs_btree_log_recs(cur, bp, ptr, ptr); 2398 - 2399 - /* 2400 - * If we are tracking the last record in the tree and 2401 - * we are at the far right edge of the tree, update it. 2402 - */ 2403 - if (xfs_btree_is_lastrec(cur, block, 0)) { 2404 - cur->bc_ops->update_lastrec(cur, block, rec, 2405 - ptr, LASTREC_UPDATE); 2406 - } 2407 2422 2408 2423 /* Pass new key value up to our parent. */ 2409 2424 if (xfs_btree_needs_key_update(cur, ptr)) { ··· 3585 3618 } 3586 3619 3587 3620 /* 3588 - * If we are tracking the last record in the tree and 3589 - * we are at the far right edge of the tree, update it. 3590 - */ 3591 - if (xfs_btree_is_lastrec(cur, block, level)) { 3592 - cur->bc_ops->update_lastrec(cur, block, rec, 3593 - ptr, LASTREC_INSREC); 3594 - } 3595 - 3596 - /* 3597 3621 * Return the new block number, if any. 3598 3622 * If there is one, give back a record value and a cursor too. 3599 3623 */ ··· 3940 3982 */ 3941 3983 xfs_btree_set_numrecs(block, --numrecs); 3942 3984 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3943 - 3944 - /* 3945 - * If we are tracking the last record in the tree and 3946 - * we are at the far right edge of the tree, update it. 3947 - */ 3948 - if (xfs_btree_is_lastrec(cur, block, level)) { 3949 - cur->bc_ops->update_lastrec(cur, block, NULL, 3950 - ptr, LASTREC_DELREC); 3951 - } 3952 3985 3953 3986 /* 3954 3987 * We're at the root level. First, shrink the root block in-memory.

+1 -15

fs/xfs/libxfs/xfs_btree.h

··· 154 154 int *stat); 155 155 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); 156 156 157 - /* update last record information */ 158 - void (*update_lastrec)(struct xfs_btree_cur *cur, 159 - const struct xfs_btree_block *block, 160 - const union xfs_btree_rec *rec, 161 - int ptr, int reason); 162 - 163 157 /* records in block/level */ 164 158 int (*get_minrecs)(struct xfs_btree_cur *cur, int level); 165 159 int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); ··· 216 222 }; 217 223 218 224 /* btree geometry flags */ 219 - #define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */ 220 - #define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */ 221 - 222 - /* 223 - * Reasons for the update_lastrec method to be called. 224 - */ 225 - #define LASTREC_UPDATE 0 226 - #define LASTREC_INSREC 1 227 - #define LASTREC_DELREC 2 225 + #define XFS_BTGEO_OVERLAPPING (1U << 0) /* overlapping intervals */ 228 226 229 227 230 228 union xfs_btree_irec {

+3 -1

fs/xfs/libxfs/xfs_defer.c

··· 12 12 #include "xfs_mount.h" 13 13 #include "xfs_defer.h" 14 14 #include "xfs_trans.h" 15 + #include "xfs_trans_priv.h" 15 16 #include "xfs_buf_item.h" 16 17 #include "xfs_inode.h" 17 18 #include "xfs_inode_item.h" 18 19 #include "xfs_trace.h" 19 20 #include "xfs_icache.h" 20 21 #include "xfs_log.h" 22 + #include "xfs_log_priv.h" 21 23 #include "xfs_rmap.h" 22 24 #include "xfs_refcount.h" 23 25 #include "xfs_bmap.h" ··· 558 556 * the log threshold once per call. 559 557 */ 560 558 if (threshold_lsn == NULLCOMMITLSN) { 561 - threshold_lsn = xlog_grant_push_threshold(log, 0); 559 + threshold_lsn = xfs_ail_get_push_target(log->l_ailp); 562 560 if (threshold_lsn == NULLCOMMITLSN) 563 561 break; 564 562 }

+658 -3

fs/xfs/libxfs/xfs_dir2.c

··· 19 19 #include "xfs_error.h" 20 20 #include "xfs_trace.h" 21 21 #include "xfs_health.h" 22 + #include "xfs_bmap_btree.h" 23 + #include "xfs_trans_space.h" 24 + #include "xfs_parent.h" 25 + #include "xfs_ag.h" 26 + #include "xfs_ialloc.h" 22 27 23 28 const struct xfs_name xfs_name_dotdot = { 24 29 .name = (const unsigned char *)"..", ··· 589 584 */ 590 585 int 591 586 xfs_dir_canenter( 592 - xfs_trans_t *tp, 593 - xfs_inode_t *dp, 594 - struct xfs_name *name) /* name of entry to add */ 587 + struct xfs_trans *tp, 588 + struct xfs_inode *dp, 589 + const struct xfs_name *name) /* name of entry to add */ 595 590 { 596 591 return xfs_dir_createname(tp, dp, name, 0, 0); 597 592 } ··· 760 755 if (unlikely(xfs_has_asciici(args->dp->i_mount))) 761 756 return xfs_ascii_ci_compname(args, name, len); 762 757 return xfs_da_compname(args, name, len); 758 + } 759 + 760 + #ifdef CONFIG_XFS_LIVE_HOOKS 761 + /* 762 + * Use a static key here to reduce the overhead of directory live update hooks. 763 + * If the compiler supports jump labels, the static branch will be replaced by 764 + * a nop sled when there are no hook users. Online fsck is currently the only 765 + * caller, so this is a reasonable tradeoff. 766 + * 767 + * Note: Patching the kernel code requires taking the cpu hotplug lock. Other 768 + * parts of the kernel allocate memory with that lock held, which means that 769 + * XFS callers cannot hold any locks that might be used by memory reclaim or 770 + * writeback when calling the static_branch_{inc,dec} functions. 771 + */ 772 + DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); 773 + 774 + void 775 + xfs_dir_hook_disable(void) 776 + { 777 + xfs_hooks_switch_off(&xfs_dir_hooks_switch); 778 + } 779 + 780 + void 781 + xfs_dir_hook_enable(void) 782 + { 783 + xfs_hooks_switch_on(&xfs_dir_hooks_switch); 784 + } 785 + 786 + /* Call hooks for a directory update relating to a child dirent update. */ 787 + inline void 788 + xfs_dir_update_hook( 789 + struct xfs_inode *dp, 790 + struct xfs_inode *ip, 791 + int delta, 792 + const struct xfs_name *name) 793 + { 794 + if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { 795 + struct xfs_dir_update_params p = { 796 + .dp = dp, 797 + .ip = ip, 798 + .delta = delta, 799 + .name = name, 800 + }; 801 + struct xfs_mount *mp = ip->i_mount; 802 + 803 + xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); 804 + } 805 + } 806 + 807 + /* Call the specified function during a directory update. */ 808 + int 809 + xfs_dir_hook_add( 810 + struct xfs_mount *mp, 811 + struct xfs_dir_hook *hook) 812 + { 813 + return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); 814 + } 815 + 816 + /* Stop calling the specified function during a directory update. */ 817 + void 818 + xfs_dir_hook_del( 819 + struct xfs_mount *mp, 820 + struct xfs_dir_hook *hook) 821 + { 822 + xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); 823 + } 824 + 825 + /* Configure directory update hook functions. */ 826 + void 827 + xfs_dir_hook_setup( 828 + struct xfs_dir_hook *hook, 829 + notifier_fn_t mod_fn) 830 + { 831 + xfs_hook_setup(&hook->dirent_hook, mod_fn); 832 + } 833 + #endif /* CONFIG_XFS_LIVE_HOOKS */ 834 + 835 + /* 836 + * Given a directory @dp, a newly allocated inode @ip, and a @name, link @ip 837 + * into @dp under the given @name. If @ip is a directory, it will be 838 + * initialized. Both inodes must have the ILOCK held and the transaction must 839 + * have sufficient blocks reserved. 840 + */ 841 + int 842 + xfs_dir_create_child( 843 + struct xfs_trans *tp, 844 + unsigned int resblks, 845 + struct xfs_dir_update *du) 846 + { 847 + struct xfs_inode *dp = du->dp; 848 + const struct xfs_name *name = du->name; 849 + struct xfs_inode *ip = du->ip; 850 + int error; 851 + 852 + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 853 + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); 854 + 855 + error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks); 856 + if (error) { 857 + ASSERT(error != -ENOSPC); 858 + return error; 859 + } 860 + 861 + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 862 + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 863 + 864 + if (S_ISDIR(VFS_I(ip)->i_mode)) { 865 + error = xfs_dir_init(tp, ip, dp); 866 + if (error) 867 + return error; 868 + 869 + xfs_bumplink(tp, dp); 870 + } 871 + 872 + /* 873 + * If we have parent pointers, we need to add the attribute containing 874 + * the parent information now. 875 + */ 876 + if (du->ppargs) { 877 + error = xfs_parent_addname(tp, du->ppargs, dp, name, ip); 878 + if (error) 879 + return error; 880 + } 881 + 882 + xfs_dir_update_hook(dp, ip, 1, name); 883 + return 0; 884 + } 885 + 886 + /* 887 + * Given a directory @dp, an existing non-directory inode @ip, and a @name, 888 + * link @ip into @dp under the given @name. Both inodes must have the ILOCK 889 + * held. 890 + */ 891 + int 892 + xfs_dir_add_child( 893 + struct xfs_trans *tp, 894 + unsigned int resblks, 895 + struct xfs_dir_update *du) 896 + { 897 + struct xfs_inode *dp = du->dp; 898 + const struct xfs_name *name = du->name; 899 + struct xfs_inode *ip = du->ip; 900 + struct xfs_mount *mp = tp->t_mountp; 901 + int error; 902 + 903 + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 904 + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); 905 + ASSERT(!S_ISDIR(VFS_I(ip)->i_mode)); 906 + 907 + if (!resblks) { 908 + error = xfs_dir_canenter(tp, dp, name); 909 + if (error) 910 + return error; 911 + } 912 + 913 + /* 914 + * Handle initial link state of O_TMPFILE inode 915 + */ 916 + if (VFS_I(ip)->i_nlink == 0) { 917 + struct xfs_perag *pag; 918 + 919 + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 920 + error = xfs_iunlink_remove(tp, pag, ip); 921 + xfs_perag_put(pag); 922 + if (error) 923 + return error; 924 + } 925 + 926 + error = xfs_dir_createname(tp, dp, name, ip->i_ino, resblks); 927 + if (error) 928 + return error; 929 + 930 + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 931 + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 932 + 933 + xfs_bumplink(tp, ip); 934 + 935 + /* 936 + * If we have parent pointers, we now need to add the parent record to 937 + * the attribute fork of the inode. If this is the initial parent 938 + * attribute, we need to create it correctly, otherwise we can just add 939 + * the parent to the inode. 940 + */ 941 + if (du->ppargs) { 942 + error = xfs_parent_addname(tp, du->ppargs, dp, name, ip); 943 + if (error) 944 + return error; 945 + } 946 + 947 + xfs_dir_update_hook(dp, ip, 1, name); 948 + return 0; 949 + } 950 + 951 + /* 952 + * Given a directory @dp, a child @ip, and a @name, remove the (@name, @ip) 953 + * entry from the directory. Both inodes must have the ILOCK held. 954 + */ 955 + int 956 + xfs_dir_remove_child( 957 + struct xfs_trans *tp, 958 + unsigned int resblks, 959 + struct xfs_dir_update *du) 960 + { 961 + struct xfs_inode *dp = du->dp; 962 + const struct xfs_name *name = du->name; 963 + struct xfs_inode *ip = du->ip; 964 + int error; 965 + 966 + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 967 + xfs_assert_ilocked(dp, XFS_ILOCK_EXCL); 968 + 969 + /* 970 + * If we're removing a directory perform some additional validation. 971 + */ 972 + if (S_ISDIR(VFS_I(ip)->i_mode)) { 973 + ASSERT(VFS_I(ip)->i_nlink >= 2); 974 + if (VFS_I(ip)->i_nlink != 2) 975 + return -ENOTEMPTY; 976 + if (!xfs_dir_isempty(ip)) 977 + return -ENOTEMPTY; 978 + 979 + /* Drop the link from ip's "..". */ 980 + error = xfs_droplink(tp, dp); 981 + if (error) 982 + return error; 983 + 984 + /* Drop the "." link from ip to self. */ 985 + error = xfs_droplink(tp, ip); 986 + if (error) 987 + return error; 988 + 989 + /* 990 + * Point the unlinked child directory's ".." entry to the root 991 + * directory to eliminate back-references to inodes that may 992 + * get freed before the child directory is closed. If the fs 993 + * gets shrunk, this can lead to dirent inode validation errors. 994 + */ 995 + if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { 996 + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, 997 + tp->t_mountp->m_sb.sb_rootino, 0); 998 + if (error) 999 + return error; 1000 + } 1001 + } else { 1002 + /* 1003 + * When removing a non-directory we need to log the parent 1004 + * inode here. For a directory this is done implicitly 1005 + * by the xfs_droplink call for the ".." entry. 1006 + */ 1007 + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1008 + } 1009 + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1010 + 1011 + /* Drop the link from dp to ip. */ 1012 + error = xfs_droplink(tp, ip); 1013 + if (error) 1014 + return error; 1015 + 1016 + error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); 1017 + if (error) { 1018 + ASSERT(error != -ENOENT); 1019 + return error; 1020 + } 1021 + 1022 + /* Remove parent pointer. */ 1023 + if (du->ppargs) { 1024 + error = xfs_parent_removename(tp, du->ppargs, dp, name, ip); 1025 + if (error) 1026 + return error; 1027 + } 1028 + 1029 + xfs_dir_update_hook(dp, ip, -1, name); 1030 + return 0; 1031 + } 1032 + 1033 + /* 1034 + * Exchange the entry (@name1, @ip1) in directory @dp1 with the entry (@name2, 1035 + * @ip2) in directory @dp2, and update '..' @ip1 and @ip2's entries as needed. 1036 + * @ip1 and @ip2 need not be of the same type. 1037 + * 1038 + * All inodes must have the ILOCK held, and both entries must already exist. 1039 + */ 1040 + int 1041 + xfs_dir_exchange_children( 1042 + struct xfs_trans *tp, 1043 + struct xfs_dir_update *du1, 1044 + struct xfs_dir_update *du2, 1045 + unsigned int spaceres) 1046 + { 1047 + struct xfs_inode *dp1 = du1->dp; 1048 + const struct xfs_name *name1 = du1->name; 1049 + struct xfs_inode *ip1 = du1->ip; 1050 + struct xfs_inode *dp2 = du2->dp; 1051 + const struct xfs_name *name2 = du2->name; 1052 + struct xfs_inode *ip2 = du2->ip; 1053 + int ip1_flags = 0; 1054 + int ip2_flags = 0; 1055 + int dp2_flags = 0; 1056 + int error; 1057 + 1058 + /* Swap inode number for dirent in first parent */ 1059 + error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); 1060 + if (error) 1061 + return error; 1062 + 1063 + /* Swap inode number for dirent in second parent */ 1064 + error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); 1065 + if (error) 1066 + return error; 1067 + 1068 + /* 1069 + * If we're renaming one or more directories across different parents, 1070 + * update the respective ".." entries (and link counts) to match the new 1071 + * parents. 1072 + */ 1073 + if (dp1 != dp2) { 1074 + dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 1075 + 1076 + if (S_ISDIR(VFS_I(ip2)->i_mode)) { 1077 + error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 1078 + dp1->i_ino, spaceres); 1079 + if (error) 1080 + return error; 1081 + 1082 + /* transfer ip2 ".." reference to dp1 */ 1083 + if (!S_ISDIR(VFS_I(ip1)->i_mode)) { 1084 + error = xfs_droplink(tp, dp2); 1085 + if (error) 1086 + return error; 1087 + xfs_bumplink(tp, dp1); 1088 + } 1089 + 1090 + /* 1091 + * Although ip1 isn't changed here, userspace needs 1092 + * to be warned about the change, so that applications 1093 + * relying on it (like backup ones), will properly 1094 + * notify the change 1095 + */ 1096 + ip1_flags |= XFS_ICHGTIME_CHG; 1097 + ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 1098 + } 1099 + 1100 + if (S_ISDIR(VFS_I(ip1)->i_mode)) { 1101 + error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 1102 + dp2->i_ino, spaceres); 1103 + if (error) 1104 + return error; 1105 + 1106 + /* transfer ip1 ".." reference to dp2 */ 1107 + if (!S_ISDIR(VFS_I(ip2)->i_mode)) { 1108 + error = xfs_droplink(tp, dp1); 1109 + if (error) 1110 + return error; 1111 + xfs_bumplink(tp, dp2); 1112 + } 1113 + 1114 + /* 1115 + * Although ip2 isn't changed here, userspace needs 1116 + * to be warned about the change, so that applications 1117 + * relying on it (like backup ones), will properly 1118 + * notify the change 1119 + */ 1120 + ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 1121 + ip2_flags |= XFS_ICHGTIME_CHG; 1122 + } 1123 + } 1124 + 1125 + if (ip1_flags) { 1126 + xfs_trans_ichgtime(tp, ip1, ip1_flags); 1127 + xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); 1128 + } 1129 + if (ip2_flags) { 1130 + xfs_trans_ichgtime(tp, ip2, ip2_flags); 1131 + xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); 1132 + } 1133 + if (dp2_flags) { 1134 + xfs_trans_ichgtime(tp, dp2, dp2_flags); 1135 + xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); 1136 + } 1137 + xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1138 + xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 1139 + 1140 + /* Schedule parent pointer replacements */ 1141 + if (du1->ppargs) { 1142 + error = xfs_parent_replacename(tp, du1->ppargs, dp1, name1, 1143 + dp2, name2, ip1); 1144 + if (error) 1145 + return error; 1146 + } 1147 + 1148 + if (du2->ppargs) { 1149 + error = xfs_parent_replacename(tp, du2->ppargs, dp2, name2, 1150 + dp1, name1, ip2); 1151 + if (error) 1152 + return error; 1153 + } 1154 + 1155 + /* 1156 + * Inform our hook clients that we've finished an exchange operation as 1157 + * follows: removed the source and target files from their directories; 1158 + * added the target to the source directory; and added the source to 1159 + * the target directory. All inodes are locked, so it's ok to model a 1160 + * rename this way so long as we say we deleted entries before we add 1161 + * new ones. 1162 + */ 1163 + xfs_dir_update_hook(dp1, ip1, -1, name1); 1164 + xfs_dir_update_hook(dp2, ip2, -1, name2); 1165 + xfs_dir_update_hook(dp1, ip2, 1, name1); 1166 + xfs_dir_update_hook(dp2, ip1, 1, name2); 1167 + return 0; 1168 + } 1169 + 1170 + /* 1171 + * Given an entry (@src_name, @src_ip) in directory @src_dp, make the entry 1172 + * @target_name in directory @target_dp point to @src_ip and remove the 1173 + * original entry, cleaning up everything left behind. 1174 + * 1175 + * Cleanup involves dropping a link count on @target_ip, and either removing 1176 + * the (@src_name, @src_ip) entry from @src_dp or simply replacing the entry 1177 + * with (@src_name, @wip) if a whiteout inode @wip is supplied. 1178 + * 1179 + * All inodes must have the ILOCK held. We assume that if @src_ip is a 1180 + * directory then its '..' doesn't already point to @target_dp, and that @wip 1181 + * is a freshly allocated whiteout. 1182 + */ 1183 + int 1184 + xfs_dir_rename_children( 1185 + struct xfs_trans *tp, 1186 + struct xfs_dir_update *du_src, 1187 + struct xfs_dir_update *du_tgt, 1188 + unsigned int spaceres, 1189 + struct xfs_dir_update *du_wip) 1190 + { 1191 + struct xfs_mount *mp = tp->t_mountp; 1192 + struct xfs_inode *src_dp = du_src->dp; 1193 + const struct xfs_name *src_name = du_src->name; 1194 + struct xfs_inode *src_ip = du_src->ip; 1195 + struct xfs_inode *target_dp = du_tgt->dp; 1196 + const struct xfs_name *target_name = du_tgt->name; 1197 + struct xfs_inode *target_ip = du_tgt->ip; 1198 + bool new_parent = (src_dp != target_dp); 1199 + bool src_is_directory; 1200 + int error; 1201 + 1202 + src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); 1203 + 1204 + /* 1205 + * Check for expected errors before we dirty the transaction 1206 + * so we can return an error without a transaction abort. 1207 + */ 1208 + if (target_ip == NULL) { 1209 + /* 1210 + * If there's no space reservation, check the entry will 1211 + * fit before actually inserting it. 1212 + */ 1213 + if (!spaceres) { 1214 + error = xfs_dir_canenter(tp, target_dp, target_name); 1215 + if (error) 1216 + return error; 1217 + } 1218 + } else { 1219 + /* 1220 + * If target exists and it's a directory, check that whether 1221 + * it can be destroyed. 1222 + */ 1223 + if (S_ISDIR(VFS_I(target_ip)->i_mode) && 1224 + (!xfs_dir_isempty(target_ip) || 1225 + (VFS_I(target_ip)->i_nlink > 2))) 1226 + return -EEXIST; 1227 + } 1228 + 1229 + /* 1230 + * Directory entry creation below may acquire the AGF. Remove 1231 + * the whiteout from the unlinked list first to preserve correct 1232 + * AGI/AGF locking order. This dirties the transaction so failures 1233 + * after this point will abort and log recovery will clean up the 1234 + * mess. 1235 + * 1236 + * For whiteouts, we need to bump the link count on the whiteout 1237 + * inode. After this point, we have a real link, clear the tmpfile 1238 + * state flag from the inode so it doesn't accidentally get misused 1239 + * in future. 1240 + */ 1241 + if (du_wip->ip) { 1242 + struct xfs_perag *pag; 1243 + 1244 + ASSERT(VFS_I(du_wip->ip)->i_nlink == 0); 1245 + 1246 + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, du_wip->ip->i_ino)); 1247 + error = xfs_iunlink_remove(tp, pag, du_wip->ip); 1248 + xfs_perag_put(pag); 1249 + if (error) 1250 + return error; 1251 + 1252 + xfs_bumplink(tp, du_wip->ip); 1253 + } 1254 + 1255 + /* 1256 + * Set up the target. 1257 + */ 1258 + if (target_ip == NULL) { 1259 + /* 1260 + * If target does not exist and the rename crosses 1261 + * directories, adjust the target directory link count 1262 + * to account for the ".." reference from the new entry. 1263 + */ 1264 + error = xfs_dir_createname(tp, target_dp, target_name, 1265 + src_ip->i_ino, spaceres); 1266 + if (error) 1267 + return error; 1268 + 1269 + xfs_trans_ichgtime(tp, target_dp, 1270 + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1271 + 1272 + if (new_parent && src_is_directory) { 1273 + xfs_bumplink(tp, target_dp); 1274 + } 1275 + } else { /* target_ip != NULL */ 1276 + /* 1277 + * Link the source inode under the target name. 1278 + * If the source inode is a directory and we are moving 1279 + * it across directories, its ".." entry will be 1280 + * inconsistent until we replace that down below. 1281 + * 1282 + * In case there is already an entry with the same 1283 + * name at the destination directory, remove it first. 1284 + */ 1285 + error = xfs_dir_replace(tp, target_dp, target_name, 1286 + src_ip->i_ino, spaceres); 1287 + if (error) 1288 + return error; 1289 + 1290 + xfs_trans_ichgtime(tp, target_dp, 1291 + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1292 + 1293 + /* 1294 + * Decrement the link count on the target since the target 1295 + * dir no longer points to it. 1296 + */ 1297 + error = xfs_droplink(tp, target_ip); 1298 + if (error) 1299 + return error; 1300 + 1301 + if (src_is_directory) { 1302 + /* 1303 + * Drop the link from the old "." entry. 1304 + */ 1305 + error = xfs_droplink(tp, target_ip); 1306 + if (error) 1307 + return error; 1308 + } 1309 + } /* target_ip != NULL */ 1310 + 1311 + /* 1312 + * Remove the source. 1313 + */ 1314 + if (new_parent && src_is_directory) { 1315 + /* 1316 + * Rewrite the ".." entry to point to the new 1317 + * directory. 1318 + */ 1319 + error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 1320 + target_dp->i_ino, spaceres); 1321 + ASSERT(error != -EEXIST); 1322 + if (error) 1323 + return error; 1324 + } 1325 + 1326 + /* 1327 + * We always want to hit the ctime on the source inode. 1328 + * 1329 + * This isn't strictly required by the standards since the source 1330 + * inode isn't really being changed, but old unix file systems did 1331 + * it and some incremental backup programs won't work without it. 1332 + */ 1333 + xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 1334 + xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); 1335 + 1336 + /* 1337 + * Adjust the link count on src_dp. This is necessary when 1338 + * renaming a directory, either within one parent when 1339 + * the target existed, or across two parent directories. 1340 + */ 1341 + if (src_is_directory && (new_parent || target_ip != NULL)) { 1342 + 1343 + /* 1344 + * Decrement link count on src_directory since the 1345 + * entry that's moved no longer points to it. 1346 + */ 1347 + error = xfs_droplink(tp, src_dp); 1348 + if (error) 1349 + return error; 1350 + } 1351 + 1352 + /* 1353 + * For whiteouts, we only need to update the source dirent with the 1354 + * inode number of the whiteout inode rather than removing it 1355 + * altogether. 1356 + */ 1357 + if (du_wip->ip) 1358 + error = xfs_dir_replace(tp, src_dp, src_name, du_wip->ip->i_ino, 1359 + spaceres); 1360 + else 1361 + error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 1362 + spaceres); 1363 + if (error) 1364 + return error; 1365 + 1366 + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1367 + xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 1368 + if (new_parent) 1369 + xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 1370 + 1371 + /* Schedule parent pointer updates. */ 1372 + if (du_wip->ppargs) { 1373 + error = xfs_parent_addname(tp, du_wip->ppargs, src_dp, 1374 + src_name, du_wip->ip); 1375 + if (error) 1376 + return error; 1377 + } 1378 + 1379 + if (du_src->ppargs) { 1380 + error = xfs_parent_replacename(tp, du_src->ppargs, src_dp, 1381 + src_name, target_dp, target_name, src_ip); 1382 + if (error) 1383 + return error; 1384 + } 1385 + 1386 + if (du_tgt->ppargs) { 1387 + error = xfs_parent_removename(tp, du_tgt->ppargs, target_dp, 1388 + target_name, target_ip); 1389 + if (error) 1390 + return error; 1391 + } 1392 + 1393 + /* 1394 + * Inform our hook clients that we've finished a rename operation as 1395 + * follows: removed the source and target files from their directories; 1396 + * that we've added the source to the target directory; and finally 1397 + * that we've added the whiteout, if there was one. All inodes are 1398 + * locked, so it's ok to model a rename this way so long as we say we 1399 + * deleted entries before we add new ones. 1400 + */ 1401 + if (target_ip) 1402 + xfs_dir_update_hook(target_dp, target_ip, -1, target_name); 1403 + xfs_dir_update_hook(src_dp, src_ip, -1, src_name); 1404 + xfs_dir_update_hook(target_dp, src_ip, 1, target_name); 1405 + if (du_wip->ip) 1406 + xfs_dir_update_hook(src_dp, du_wip->ip, 1, src_name); 1407 + return 0; 763 1408 }

+48 -1

fs/xfs/libxfs/xfs_dir2.h

··· 74 74 const struct xfs_name *name, xfs_ino_t inum, 75 75 xfs_extlen_t tot); 76 76 extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 77 - struct xfs_name *name); 77 + const struct xfs_name *name); 78 78 79 79 int xfs_dir_lookup_args(struct xfs_da_args *args); 80 80 int xfs_dir_createname_args(struct xfs_da_args *args); ··· 308 308 c -= 'A' - 'a'; 309 309 return c; 310 310 } 311 + 312 + struct xfs_dir_update_params { 313 + const struct xfs_inode *dp; 314 + const struct xfs_inode *ip; 315 + const struct xfs_name *name; 316 + int delta; 317 + }; 318 + 319 + #ifdef CONFIG_XFS_LIVE_HOOKS 320 + void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip, 321 + int delta, const struct xfs_name *name); 322 + 323 + struct xfs_dir_hook { 324 + struct xfs_hook dirent_hook; 325 + }; 326 + 327 + void xfs_dir_hook_disable(void); 328 + void xfs_dir_hook_enable(void); 329 + 330 + int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook); 331 + void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook); 332 + void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn); 333 + #else 334 + # define xfs_dir_update_hook(dp, ip, delta, name) ((void)0) 335 + #endif /* CONFIG_XFS_LIVE_HOOKS */ 336 + 337 + struct xfs_parent_args; 338 + 339 + struct xfs_dir_update { 340 + struct xfs_inode *dp; 341 + const struct xfs_name *name; 342 + struct xfs_inode *ip; 343 + struct xfs_parent_args *ppargs; 344 + }; 345 + 346 + int xfs_dir_create_child(struct xfs_trans *tp, unsigned int resblks, 347 + struct xfs_dir_update *du); 348 + int xfs_dir_add_child(struct xfs_trans *tp, unsigned int resblks, 349 + struct xfs_dir_update *du); 350 + int xfs_dir_remove_child(struct xfs_trans *tp, unsigned int resblks, 351 + struct xfs_dir_update *du); 352 + 353 + int xfs_dir_exchange_children(struct xfs_trans *tp, struct xfs_dir_update *du1, 354 + struct xfs_dir_update *du2, unsigned int spaceres); 355 + int xfs_dir_rename_children(struct xfs_trans *tp, struct xfs_dir_update *du_src, 356 + struct xfs_dir_update *du_tgt, unsigned int spaceres, 357 + struct xfs_dir_update *du_wip); 311 358 312 359 #endif /* __XFS_DIR2_H__ */

+26 -5

fs/xfs/libxfs/xfs_dir2_data.c

··· 178 178 while (offset < end) { 179 179 struct xfs_dir2_data_unused *dup = bp->b_addr + offset; 180 180 struct xfs_dir2_data_entry *dep = bp->b_addr + offset; 181 + unsigned int reclen; 182 + 183 + /* 184 + * Are the remaining bytes large enough to hold an 185 + * unused entry? 186 + */ 187 + if (offset > end - xfs_dir2_data_unusedsize(1)) 188 + return __this_address; 181 189 182 190 /* 183 191 * If it's unused, look for the space in the bestfree table. ··· 195 187 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 196 188 xfs_failaddr_t fa; 197 189 190 + reclen = xfs_dir2_data_unusedsize( 191 + be16_to_cpu(dup->length)); 198 192 if (lastfree != 0) 199 193 return __this_address; 200 - if (offset + be16_to_cpu(dup->length) > end) 194 + if (be16_to_cpu(dup->length) != reclen) 195 + return __this_address; 196 + if (offset + reclen > end) 201 197 return __this_address; 202 198 if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != 203 199 offset) ··· 219 207 be16_to_cpu(bf[2].length)) 220 208 return __this_address; 221 209 } 222 - offset += be16_to_cpu(dup->length); 210 + offset += reclen; 223 211 lastfree = 1; 224 212 continue; 225 213 } 214 + 215 + /* 216 + * This is not an unused entry. Are the remaining bytes 217 + * large enough for a dirent with a single-byte name? 218 + */ 219 + if (offset > end - xfs_dir2_data_entsize(mp, 1)) 220 + return __this_address; 221 + 226 222 /* 227 223 * It's a real entry. Validate the fields. 228 224 * If this is a block directory then make sure it's ··· 239 219 */ 240 220 if (dep->namelen == 0) 241 221 return __this_address; 242 - if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) 222 + reclen = xfs_dir2_data_entsize(mp, dep->namelen); 223 + if (offset + reclen > end) 243 224 return __this_address; 244 - if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end) 225 + if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) 245 226 return __this_address; 246 227 if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) 247 228 return __this_address; ··· 266 245 if (i >= be32_to_cpu(btp->count)) 267 246 return __this_address; 268 247 } 269 - offset += xfs_dir2_data_entsize(mp, dep->namelen); 248 + offset += reclen; 270 249 } 271 250 /* 272 251 * Need to have seen all the entries and all the bestfree slots.

+7

fs/xfs/libxfs/xfs_dir2_priv.h

··· 190 190 struct dir_context *ctx, size_t bufsize); 191 191 192 192 static inline unsigned int 193 + xfs_dir2_data_unusedsize( 194 + unsigned int len) 195 + { 196 + return round_up(len, XFS_DIR2_DATA_ALIGN); 197 + } 198 + 199 + static inline unsigned int 193 200 xfs_dir2_data_entsize( 194 201 struct xfs_mount *mp, 195 202 unsigned int namelen)

+4 -5

fs/xfs/libxfs/xfs_format.h

··· 90 90 #define XFSLABEL_MAX 12 91 91 92 92 /* 93 - * Superblock - in core version. Must match the ondisk version below. 94 - * Must be padded to 64 bit alignment. 93 + * Superblock - in core version. Must be padded to 64 bit alignment. 95 94 */ 96 95 typedef struct xfs_sb { 97 96 uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ ··· 177 178 /* must be padded to 64 bit alignment */ 178 179 } xfs_sb_t; 179 180 180 - #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) 181 - 182 181 /* 183 - * Superblock - on disk version. Must match the in core version above. 182 + * Superblock - on disk version. 184 183 * Must be padded to 64 bit alignment. 185 184 */ 186 185 struct xfs_dsb { ··· 261 264 262 265 /* must be padded to 64 bit alignment */ 263 266 }; 267 + 268 + #define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc) 264 269 265 270 /* 266 271 * Misc. Flags - warning - these will be cleared by xfs_repair unless

+17 -3

fs/xfs/libxfs/xfs_ialloc.c

··· 1946 1946 } 1947 1947 return -ENOSPC; 1948 1948 } 1949 + 1950 + /* 1951 + * Protect against obviously corrupt allocation btree records. Later 1952 + * xfs_iget checks will catch re-allocation of other active in-memory 1953 + * and on-disk inodes. If we don't catch reallocating the parent inode 1954 + * here we will deadlock in xfs_iget() so we have to do these checks 1955 + * first. 1956 + */ 1957 + if (ino == parent || !xfs_verify_dir_ino(mp, ino)) { 1958 + xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); 1959 + xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), 1960 + XFS_SICK_AG_INOBT); 1961 + return -EFSCORRUPTED; 1962 + } 1963 + 1949 1964 *new_ino = ino; 1950 1965 return 0; 1951 1966 } ··· 1990 1975 return xfs_free_extent_later(tp, 1991 1976 XFS_AGB_TO_FSB(mp, agno, sagbno), 1992 1977 M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, 1993 - XFS_AG_RESV_NONE, false); 1978 + XFS_AG_RESV_NONE, 0); 1994 1979 } 1995 1980 1996 1981 /* holemask is only 16-bits (fits in an unsigned long) */ ··· 2036 2021 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); 2037 2022 error = xfs_free_extent_later(tp, 2038 2023 XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, 2039 - &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 2040 - false); 2024 + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0); 2041 2025 if (error) 2042 2026 return error; 2043 2027

+1 -1

fs/xfs/libxfs/xfs_ialloc_btree.c

··· 170 170 xfs_inobt_mod_blockcount(cur, -1); 171 171 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); 172 172 return xfs_free_extent_later(cur->bc_tp, fsbno, 1, 173 - &XFS_RMAP_OINFO_INOBT, resv, false); 173 + &XFS_RMAP_OINFO_INOBT, resv, 0); 174 174 } 175 175 176 176 STATIC int

+749

fs/xfs/libxfs/xfs_inode_util.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 + * All Rights Reserved. 5 + */ 6 + #include <linux/iversion.h> 7 + #include "xfs.h" 8 + #include "xfs_fs.h" 9 + #include "xfs_shared.h" 10 + #include "xfs_format.h" 11 + #include "xfs_log_format.h" 12 + #include "xfs_trans_resv.h" 13 + #include "xfs_sb.h" 14 + #include "xfs_mount.h" 15 + #include "xfs_inode.h" 16 + #include "xfs_inode_util.h" 17 + #include "xfs_trans.h" 18 + #include "xfs_ialloc.h" 19 + #include "xfs_health.h" 20 + #include "xfs_bmap.h" 21 + #include "xfs_error.h" 22 + #include "xfs_trace.h" 23 + #include "xfs_ag.h" 24 + #include "xfs_iunlink_item.h" 25 + #include "xfs_inode_item.h" 26 + 27 + uint16_t 28 + xfs_flags2diflags( 29 + struct xfs_inode *ip, 30 + unsigned int xflags) 31 + { 32 + /* can't set PREALLOC this way, just preserve it */ 33 + uint16_t di_flags = 34 + (ip->i_diflags & XFS_DIFLAG_PREALLOC); 35 + 36 + if (xflags & FS_XFLAG_IMMUTABLE) 37 + di_flags |= XFS_DIFLAG_IMMUTABLE; 38 + if (xflags & FS_XFLAG_APPEND) 39 + di_flags |= XFS_DIFLAG_APPEND; 40 + if (xflags & FS_XFLAG_SYNC) 41 + di_flags |= XFS_DIFLAG_SYNC; 42 + if (xflags & FS_XFLAG_NOATIME) 43 + di_flags |= XFS_DIFLAG_NOATIME; 44 + if (xflags & FS_XFLAG_NODUMP) 45 + di_flags |= XFS_DIFLAG_NODUMP; 46 + if (xflags & FS_XFLAG_NODEFRAG) 47 + di_flags |= XFS_DIFLAG_NODEFRAG; 48 + if (xflags & FS_XFLAG_FILESTREAM) 49 + di_flags |= XFS_DIFLAG_FILESTREAM; 50 + if (S_ISDIR(VFS_I(ip)->i_mode)) { 51 + if (xflags & FS_XFLAG_RTINHERIT) 52 + di_flags |= XFS_DIFLAG_RTINHERIT; 53 + if (xflags & FS_XFLAG_NOSYMLINKS) 54 + di_flags |= XFS_DIFLAG_NOSYMLINKS; 55 + if (xflags & FS_XFLAG_EXTSZINHERIT) 56 + di_flags |= XFS_DIFLAG_EXTSZINHERIT; 57 + if (xflags & FS_XFLAG_PROJINHERIT) 58 + di_flags |= XFS_DIFLAG_PROJINHERIT; 59 + } else if (S_ISREG(VFS_I(ip)->i_mode)) { 60 + if (xflags & FS_XFLAG_REALTIME) 61 + di_flags |= XFS_DIFLAG_REALTIME; 62 + if (xflags & FS_XFLAG_EXTSIZE) 63 + di_flags |= XFS_DIFLAG_EXTSIZE; 64 + } 65 + 66 + return di_flags; 67 + } 68 + 69 + uint64_t 70 + xfs_flags2diflags2( 71 + struct xfs_inode *ip, 72 + unsigned int xflags) 73 + { 74 + uint64_t di_flags2 = 75 + (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | 76 + XFS_DIFLAG2_BIGTIME | 77 + XFS_DIFLAG2_NREXT64)); 78 + 79 + if (xflags & FS_XFLAG_DAX) 80 + di_flags2 |= XFS_DIFLAG2_DAX; 81 + if (xflags & FS_XFLAG_COWEXTSIZE) 82 + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 83 + 84 + return di_flags2; 85 + } 86 + 87 + uint32_t 88 + xfs_ip2xflags( 89 + struct xfs_inode *ip) 90 + { 91 + uint32_t flags = 0; 92 + 93 + if (ip->i_diflags & XFS_DIFLAG_ANY) { 94 + if (ip->i_diflags & XFS_DIFLAG_REALTIME) 95 + flags |= FS_XFLAG_REALTIME; 96 + if (ip->i_diflags & XFS_DIFLAG_PREALLOC) 97 + flags |= FS_XFLAG_PREALLOC; 98 + if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) 99 + flags |= FS_XFLAG_IMMUTABLE; 100 + if (ip->i_diflags & XFS_DIFLAG_APPEND) 101 + flags |= FS_XFLAG_APPEND; 102 + if (ip->i_diflags & XFS_DIFLAG_SYNC) 103 + flags |= FS_XFLAG_SYNC; 104 + if (ip->i_diflags & XFS_DIFLAG_NOATIME) 105 + flags |= FS_XFLAG_NOATIME; 106 + if (ip->i_diflags & XFS_DIFLAG_NODUMP) 107 + flags |= FS_XFLAG_NODUMP; 108 + if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) 109 + flags |= FS_XFLAG_RTINHERIT; 110 + if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) 111 + flags |= FS_XFLAG_PROJINHERIT; 112 + if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) 113 + flags |= FS_XFLAG_NOSYMLINKS; 114 + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) 115 + flags |= FS_XFLAG_EXTSIZE; 116 + if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) 117 + flags |= FS_XFLAG_EXTSZINHERIT; 118 + if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) 119 + flags |= FS_XFLAG_NODEFRAG; 120 + if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) 121 + flags |= FS_XFLAG_FILESTREAM; 122 + } 123 + 124 + if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { 125 + if (ip->i_diflags2 & XFS_DIFLAG2_DAX) 126 + flags |= FS_XFLAG_DAX; 127 + if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 128 + flags |= FS_XFLAG_COWEXTSIZE; 129 + } 130 + 131 + if (xfs_inode_has_attr_fork(ip)) 132 + flags |= FS_XFLAG_HASATTR; 133 + return flags; 134 + } 135 + 136 + prid_t 137 + xfs_get_initial_prid(struct xfs_inode *dp) 138 + { 139 + if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) 140 + return dp->i_projid; 141 + 142 + /* Assign to the root project by default. */ 143 + return 0; 144 + } 145 + 146 + /* Propagate di_flags from a parent inode to a child inode. */ 147 + static inline void 148 + xfs_inode_inherit_flags( 149 + struct xfs_inode *ip, 150 + const struct xfs_inode *pip) 151 + { 152 + unsigned int di_flags = 0; 153 + xfs_failaddr_t failaddr; 154 + umode_t mode = VFS_I(ip)->i_mode; 155 + 156 + if (S_ISDIR(mode)) { 157 + if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) 158 + di_flags |= XFS_DIFLAG_RTINHERIT; 159 + if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 160 + di_flags |= XFS_DIFLAG_EXTSZINHERIT; 161 + ip->i_extsize = pip->i_extsize; 162 + } 163 + if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) 164 + di_flags |= XFS_DIFLAG_PROJINHERIT; 165 + } else if (S_ISREG(mode)) { 166 + if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && 167 + xfs_has_realtime(ip->i_mount)) 168 + di_flags |= XFS_DIFLAG_REALTIME; 169 + if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 170 + di_flags |= XFS_DIFLAG_EXTSIZE; 171 + ip->i_extsize = pip->i_extsize; 172 + } 173 + } 174 + if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && 175 + xfs_inherit_noatime) 176 + di_flags |= XFS_DIFLAG_NOATIME; 177 + if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && 178 + xfs_inherit_nodump) 179 + di_flags |= XFS_DIFLAG_NODUMP; 180 + if ((pip->i_diflags & XFS_DIFLAG_SYNC) && 181 + xfs_inherit_sync) 182 + di_flags |= XFS_DIFLAG_SYNC; 183 + if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && 184 + xfs_inherit_nosymlinks) 185 + di_flags |= XFS_DIFLAG_NOSYMLINKS; 186 + if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && 187 + xfs_inherit_nodefrag) 188 + di_flags |= XFS_DIFLAG_NODEFRAG; 189 + if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) 190 + di_flags |= XFS_DIFLAG_FILESTREAM; 191 + 192 + ip->i_diflags |= di_flags; 193 + 194 + /* 195 + * Inode verifiers on older kernels only check that the extent size 196 + * hint is an integer multiple of the rt extent size on realtime files. 197 + * They did not check the hint alignment on a directory with both 198 + * rtinherit and extszinherit flags set. If the misaligned hint is 199 + * propagated from a directory into a new realtime file, new file 200 + * allocations will fail due to math errors in the rt allocator and/or 201 + * trip the verifiers. Validate the hint settings in the new file so 202 + * that we don't let broken hints propagate. 203 + */ 204 + failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, 205 + VFS_I(ip)->i_mode, ip->i_diflags); 206 + if (failaddr) { 207 + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 208 + XFS_DIFLAG_EXTSZINHERIT); 209 + ip->i_extsize = 0; 210 + } 211 + } 212 + 213 + /* Propagate di_flags2 from a parent inode to a child inode. */ 214 + static inline void 215 + xfs_inode_inherit_flags2( 216 + struct xfs_inode *ip, 217 + const struct xfs_inode *pip) 218 + { 219 + xfs_failaddr_t failaddr; 220 + 221 + if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { 222 + ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; 223 + ip->i_cowextsize = pip->i_cowextsize; 224 + } 225 + if (pip->i_diflags2 & XFS_DIFLAG2_DAX) 226 + ip->i_diflags2 |= XFS_DIFLAG2_DAX; 227 + 228 + /* Don't let invalid cowextsize hints propagate. */ 229 + failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, 230 + VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); 231 + if (failaddr) { 232 + ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 233 + ip->i_cowextsize = 0; 234 + } 235 + } 236 + 237 + /* 238 + * If we need to create attributes immediately after allocating the inode, 239 + * initialise an empty attribute fork right now. We use the default fork offset 240 + * for attributes here as we don't know exactly what size or how many 241 + * attributes we might be adding. We can do this safely here because we know 242 + * the data fork is completely empty and this saves us from needing to run a 243 + * separate transaction to set the fork offset in the immediate future. 244 + * 245 + * If we have parent pointers and the caller hasn't told us that the file will 246 + * never be linked into a directory tree, we /must/ create the attr fork. 247 + */ 248 + static inline bool 249 + xfs_icreate_want_attrfork( 250 + struct xfs_mount *mp, 251 + const struct xfs_icreate_args *args) 252 + { 253 + if (args->flags & XFS_ICREATE_INIT_XATTRS) 254 + return true; 255 + 256 + if (!(args->flags & XFS_ICREATE_UNLINKABLE) && xfs_has_parent(mp)) 257 + return true; 258 + 259 + return false; 260 + } 261 + 262 + /* Initialise an inode's attributes. */ 263 + void 264 + xfs_inode_init( 265 + struct xfs_trans *tp, 266 + const struct xfs_icreate_args *args, 267 + struct xfs_inode *ip) 268 + { 269 + struct xfs_inode *pip = args->pip; 270 + struct inode *dir = pip ? VFS_I(pip) : NULL; 271 + struct xfs_mount *mp = tp->t_mountp; 272 + struct inode *inode = VFS_I(ip); 273 + unsigned int flags; 274 + int times = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG | 275 + XFS_ICHGTIME_ACCESS; 276 + 277 + if (args->flags & XFS_ICREATE_TMPFILE) 278 + set_nlink(inode, 0); 279 + else if (S_ISDIR(args->mode)) 280 + set_nlink(inode, 2); 281 + else 282 + set_nlink(inode, 1); 283 + inode->i_rdev = args->rdev; 284 + 285 + if (!args->idmap || pip == NULL) { 286 + /* creating a tree root, sb rooted, or detached file */ 287 + inode->i_uid = GLOBAL_ROOT_UID; 288 + inode->i_gid = GLOBAL_ROOT_GID; 289 + ip->i_projid = 0; 290 + inode->i_mode = args->mode; 291 + } else { 292 + /* creating a child in the directory tree */ 293 + if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { 294 + inode_fsuid_set(inode, args->idmap); 295 + inode->i_gid = dir->i_gid; 296 + inode->i_mode = args->mode; 297 + } else { 298 + inode_init_owner(args->idmap, inode, dir, args->mode); 299 + } 300 + 301 + /* 302 + * If the group ID of the new file does not match the effective 303 + * group ID or one of the supplementary group IDs, the S_ISGID 304 + * bit is cleared (and only if the irix_sgid_inherit 305 + * compatibility variable is set). 306 + */ 307 + if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && 308 + !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode))) 309 + inode->i_mode &= ~S_ISGID; 310 + 311 + ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0; 312 + } 313 + 314 + ip->i_disk_size = 0; 315 + ip->i_df.if_nextents = 0; 316 + ASSERT(ip->i_nblocks == 0); 317 + 318 + ip->i_extsize = 0; 319 + ip->i_diflags = 0; 320 + 321 + if (xfs_has_v3inodes(mp)) { 322 + inode_set_iversion(inode, 1); 323 + ip->i_cowextsize = 0; 324 + times |= XFS_ICHGTIME_CREATE; 325 + } 326 + 327 + xfs_trans_ichgtime(tp, ip, times); 328 + 329 + flags = XFS_ILOG_CORE; 330 + switch (args->mode & S_IFMT) { 331 + case S_IFIFO: 332 + case S_IFCHR: 333 + case S_IFBLK: 334 + case S_IFSOCK: 335 + ip->i_df.if_format = XFS_DINODE_FMT_DEV; 336 + flags |= XFS_ILOG_DEV; 337 + break; 338 + case S_IFREG: 339 + case S_IFDIR: 340 + if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) 341 + xfs_inode_inherit_flags(ip, pip); 342 + if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) 343 + xfs_inode_inherit_flags2(ip, pip); 344 + fallthrough; 345 + case S_IFLNK: 346 + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 347 + ip->i_df.if_bytes = 0; 348 + ip->i_df.if_data = NULL; 349 + break; 350 + default: 351 + ASSERT(0); 352 + } 353 + 354 + if (xfs_icreate_want_attrfork(mp, args)) { 355 + ip->i_forkoff = xfs_default_attroffset(ip) >> 3; 356 + xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 357 + 358 + if (!xfs_has_attr(mp)) { 359 + spin_lock(&mp->m_sb_lock); 360 + xfs_add_attr(mp); 361 + spin_unlock(&mp->m_sb_lock); 362 + xfs_log_sb(tp); 363 + } 364 + } 365 + 366 + xfs_trans_log_inode(tp, ip, flags); 367 + } 368 + 369 + /* 370 + * In-Core Unlinked List Lookups 371 + * ============================= 372 + * 373 + * Every inode is supposed to be reachable from some other piece of metadata 374 + * with the exception of the root directory. Inodes with a connection to a 375 + * file descriptor but not linked from anywhere in the on-disk directory tree 376 + * are collectively known as unlinked inodes, though the filesystem itself 377 + * maintains links to these inodes so that on-disk metadata are consistent. 378 + * 379 + * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI 380 + * header contains a number of buckets that point to an inode, and each inode 381 + * record has a pointer to the next inode in the hash chain. This 382 + * singly-linked list causes scaling problems in the iunlink remove function 383 + * because we must walk that list to find the inode that points to the inode 384 + * being removed from the unlinked hash bucket list. 385 + * 386 + * Hence we keep an in-memory double linked list to link each inode on an 387 + * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer 388 + * based lists would require having 64 list heads in the perag, one for each 389 + * list. This is expensive in terms of memory (think millions of AGs) and cache 390 + * misses on lookups. Instead, use the fact that inodes on the unlinked list 391 + * must be referenced at the VFS level to keep them on the list and hence we 392 + * have an existence guarantee for inodes on the unlinked list. 393 + * 394 + * Given we have an existence guarantee, we can use lockless inode cache lookups 395 + * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode 396 + * for the double linked unlinked list, and we don't need any extra locking to 397 + * keep the list safe as all manipulations are done under the AGI buffer lock. 398 + * Keeping the list up to date does not require memory allocation, just finding 399 + * the XFS inode and updating the next/prev unlinked list aginos. 400 + */ 401 + 402 + /* 403 + * Update the prev pointer of the next agino. Returns -ENOLINK if the inode 404 + * is not in cache. 405 + */ 406 + static int 407 + xfs_iunlink_update_backref( 408 + struct xfs_perag *pag, 409 + xfs_agino_t prev_agino, 410 + xfs_agino_t next_agino) 411 + { 412 + struct xfs_inode *ip; 413 + 414 + /* No update necessary if we are at the end of the list. */ 415 + if (next_agino == NULLAGINO) 416 + return 0; 417 + 418 + ip = xfs_iunlink_lookup(pag, next_agino); 419 + if (!ip) 420 + return -ENOLINK; 421 + 422 + ip->i_prev_unlinked = prev_agino; 423 + return 0; 424 + } 425 + 426 + /* 427 + * Point the AGI unlinked bucket at an inode and log the results. The caller 428 + * is responsible for validating the old value. 429 + */ 430 + STATIC int 431 + xfs_iunlink_update_bucket( 432 + struct xfs_trans *tp, 433 + struct xfs_perag *pag, 434 + struct xfs_buf *agibp, 435 + unsigned int bucket_index, 436 + xfs_agino_t new_agino) 437 + { 438 + struct xfs_agi *agi = agibp->b_addr; 439 + xfs_agino_t old_value; 440 + int offset; 441 + 442 + ASSERT(xfs_verify_agino_or_null(pag, new_agino)); 443 + 444 + old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); 445 + trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, 446 + old_value, new_agino); 447 + 448 + /* 449 + * We should never find the head of the list already set to the value 450 + * passed in because either we're adding or removing ourselves from the 451 + * head of the list. 452 + */ 453 + if (old_value == new_agino) { 454 + xfs_buf_mark_corrupt(agibp); 455 + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 456 + return -EFSCORRUPTED; 457 + } 458 + 459 + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); 460 + offset = offsetof(struct xfs_agi, agi_unlinked) + 461 + (sizeof(xfs_agino_t) * bucket_index); 462 + xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); 463 + return 0; 464 + } 465 + 466 + static int 467 + xfs_iunlink_insert_inode( 468 + struct xfs_trans *tp, 469 + struct xfs_perag *pag, 470 + struct xfs_buf *agibp, 471 + struct xfs_inode *ip) 472 + { 473 + struct xfs_mount *mp = tp->t_mountp; 474 + struct xfs_agi *agi = agibp->b_addr; 475 + xfs_agino_t next_agino; 476 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 477 + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 478 + int error; 479 + 480 + /* 481 + * Get the index into the agi hash table for the list this inode will 482 + * go on. Make sure the pointer isn't garbage and that this inode 483 + * isn't already on the list. 484 + */ 485 + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 486 + if (next_agino == agino || 487 + !xfs_verify_agino_or_null(pag, next_agino)) { 488 + xfs_buf_mark_corrupt(agibp); 489 + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 490 + return -EFSCORRUPTED; 491 + } 492 + 493 + /* 494 + * Update the prev pointer in the next inode to point back to this 495 + * inode. 496 + */ 497 + error = xfs_iunlink_update_backref(pag, agino, next_agino); 498 + if (error == -ENOLINK) 499 + error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); 500 + if (error) 501 + return error; 502 + 503 + if (next_agino != NULLAGINO) { 504 + /* 505 + * There is already another inode in the bucket, so point this 506 + * inode to the current head of the list. 507 + */ 508 + error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); 509 + if (error) 510 + return error; 511 + ip->i_next_unlinked = next_agino; 512 + } 513 + 514 + /* Point the head of the list to point to this inode. */ 515 + ip->i_prev_unlinked = NULLAGINO; 516 + return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); 517 + } 518 + 519 + /* 520 + * This is called when the inode's link count has gone to 0 or we are creating 521 + * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. 522 + * 523 + * We place the on-disk inode on a list in the AGI. It will be pulled from this 524 + * list when the inode is freed. 525 + */ 526 + int 527 + xfs_iunlink( 528 + struct xfs_trans *tp, 529 + struct xfs_inode *ip) 530 + { 531 + struct xfs_mount *mp = tp->t_mountp; 532 + struct xfs_perag *pag; 533 + struct xfs_buf *agibp; 534 + int error; 535 + 536 + ASSERT(VFS_I(ip)->i_nlink == 0); 537 + ASSERT(VFS_I(ip)->i_mode != 0); 538 + trace_xfs_iunlink(ip); 539 + 540 + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 541 + 542 + /* Get the agi buffer first. It ensures lock ordering on the list. */ 543 + error = xfs_read_agi(pag, tp, 0, &agibp); 544 + if (error) 545 + goto out; 546 + 547 + error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); 548 + out: 549 + xfs_perag_put(pag); 550 + return error; 551 + } 552 + 553 + static int 554 + xfs_iunlink_remove_inode( 555 + struct xfs_trans *tp, 556 + struct xfs_perag *pag, 557 + struct xfs_buf *agibp, 558 + struct xfs_inode *ip) 559 + { 560 + struct xfs_mount *mp = tp->t_mountp; 561 + struct xfs_agi *agi = agibp->b_addr; 562 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 563 + xfs_agino_t head_agino; 564 + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 565 + int error; 566 + 567 + trace_xfs_iunlink_remove(ip); 568 + 569 + /* 570 + * Get the index into the agi hash table for the list this inode will 571 + * go on. Make sure the head pointer isn't garbage. 572 + */ 573 + head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 574 + if (!xfs_verify_agino(pag, head_agino)) { 575 + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 576 + agi, sizeof(*agi)); 577 + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 578 + return -EFSCORRUPTED; 579 + } 580 + 581 + /* 582 + * Set our inode's next_unlinked pointer to NULL and then return 583 + * the old pointer value so that we can update whatever was previous 584 + * to us in the list to point to whatever was next in the list. 585 + */ 586 + error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); 587 + if (error) 588 + return error; 589 + 590 + /* 591 + * Update the prev pointer in the next inode to point back to previous 592 + * inode in the chain. 593 + */ 594 + error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, 595 + ip->i_next_unlinked); 596 + if (error == -ENOLINK) 597 + error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, 598 + ip->i_next_unlinked); 599 + if (error) 600 + return error; 601 + 602 + if (head_agino != agino) { 603 + struct xfs_inode *prev_ip; 604 + 605 + prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); 606 + if (!prev_ip) { 607 + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 608 + return -EFSCORRUPTED; 609 + } 610 + 611 + error = xfs_iunlink_log_inode(tp, prev_ip, pag, 612 + ip->i_next_unlinked); 613 + prev_ip->i_next_unlinked = ip->i_next_unlinked; 614 + } else { 615 + /* Point the head of the list to the next unlinked inode. */ 616 + error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, 617 + ip->i_next_unlinked); 618 + } 619 + 620 + ip->i_next_unlinked = NULLAGINO; 621 + ip->i_prev_unlinked = 0; 622 + return error; 623 + } 624 + 625 + /* 626 + * Pull the on-disk inode from the AGI unlinked list. 627 + */ 628 + int 629 + xfs_iunlink_remove( 630 + struct xfs_trans *tp, 631 + struct xfs_perag *pag, 632 + struct xfs_inode *ip) 633 + { 634 + struct xfs_buf *agibp; 635 + int error; 636 + 637 + trace_xfs_iunlink_remove(ip); 638 + 639 + /* Get the agi buffer first. It ensures lock ordering on the list. */ 640 + error = xfs_read_agi(pag, tp, 0, &agibp); 641 + if (error) 642 + return error; 643 + 644 + return xfs_iunlink_remove_inode(tp, pag, agibp, ip); 645 + } 646 + 647 + /* 648 + * Decrement the link count on an inode & log the change. If this causes the 649 + * link count to go to zero, move the inode to AGI unlinked list so that it can 650 + * be freed when the last active reference goes away via xfs_inactive(). 651 + */ 652 + int 653 + xfs_droplink( 654 + struct xfs_trans *tp, 655 + struct xfs_inode *ip) 656 + { 657 + struct inode *inode = VFS_I(ip); 658 + 659 + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 660 + 661 + if (inode->i_nlink == 0) { 662 + xfs_info_ratelimited(tp->t_mountp, 663 + "Inode 0x%llx link count dropped below zero. Pinning link count.", 664 + ip->i_ino); 665 + set_nlink(inode, XFS_NLINK_PINNED); 666 + } 667 + if (inode->i_nlink != XFS_NLINK_PINNED) 668 + drop_nlink(inode); 669 + 670 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 671 + 672 + if (inode->i_nlink) 673 + return 0; 674 + 675 + return xfs_iunlink(tp, ip); 676 + } 677 + 678 + /* 679 + * Increment the link count on an inode & log the change. 680 + */ 681 + void 682 + xfs_bumplink( 683 + struct xfs_trans *tp, 684 + struct xfs_inode *ip) 685 + { 686 + struct inode *inode = VFS_I(ip); 687 + 688 + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 689 + 690 + if (inode->i_nlink == XFS_NLINK_PINNED - 1) 691 + xfs_info_ratelimited(tp->t_mountp, 692 + "Inode 0x%llx link count exceeded maximum. Pinning link count.", 693 + ip->i_ino); 694 + if (inode->i_nlink != XFS_NLINK_PINNED) 695 + inc_nlink(inode); 696 + 697 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 698 + } 699 + 700 + /* Free an inode in the ondisk index and zero it out. */ 701 + int 702 + xfs_inode_uninit( 703 + struct xfs_trans *tp, 704 + struct xfs_perag *pag, 705 + struct xfs_inode *ip, 706 + struct xfs_icluster *xic) 707 + { 708 + struct xfs_mount *mp = ip->i_mount; 709 + int error; 710 + 711 + /* 712 + * Free the inode first so that we guarantee that the AGI lock is going 713 + * to be taken before we remove the inode from the unlinked list. This 714 + * makes the AGI lock -> unlinked list modification order the same as 715 + * used in O_TMPFILE creation. 716 + */ 717 + error = xfs_difree(tp, pag, ip->i_ino, xic); 718 + if (error) 719 + return error; 720 + 721 + error = xfs_iunlink_remove(tp, pag, ip); 722 + if (error) 723 + return error; 724 + 725 + /* 726 + * Free any local-format data sitting around before we reset the 727 + * data fork to extents format. Note that the attr fork data has 728 + * already been freed by xfs_attr_inactive. 729 + */ 730 + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 731 + kfree(ip->i_df.if_data); 732 + ip->i_df.if_data = NULL; 733 + ip->i_df.if_bytes = 0; 734 + } 735 + 736 + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 737 + ip->i_diflags = 0; 738 + ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 739 + ip->i_forkoff = 0; /* mark the attr fork not in use */ 740 + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 741 + 742 + /* 743 + * Bump the generation count so no one will be confused 744 + * by reincarnations of this inode. 745 + */ 746 + VFS_I(ip)->i_generation++; 747 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 748 + return 0; 749 + }

+62

fs/xfs/libxfs/xfs_inode_util.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 4 + * All Rights Reserved. 5 + */ 6 + #ifndef __XFS_INODE_UTIL_H__ 7 + #define __XFS_INODE_UTIL_H__ 8 + 9 + struct xfs_icluster; 10 + 11 + uint16_t xfs_flags2diflags(struct xfs_inode *ip, unsigned int xflags); 12 + uint64_t xfs_flags2diflags2(struct xfs_inode *ip, unsigned int xflags); 13 + uint32_t xfs_dic2xflags(struct xfs_inode *ip); 14 + uint32_t xfs_ip2xflags(struct xfs_inode *ip); 15 + 16 + prid_t xfs_get_initial_prid(struct xfs_inode *dp); 17 + 18 + /* 19 + * File creation context. 20 + * 21 + * Due to our only partial reliance on the VFS to propagate uid and gid values 22 + * according to accepted Unix behaviors, callers must initialize idmap to the 23 + * correct idmapping structure to get the correct inheritance behaviors when 24 + * XFS_MOUNT_GRPID is set. 25 + * 26 + * To create files detached from the directory tree (e.g. quota inodes), set 27 + * idmap to NULL. To create a tree root, set pip to NULL. 28 + */ 29 + struct xfs_icreate_args { 30 + struct mnt_idmap *idmap; 31 + struct xfs_inode *pip; /* parent inode or null */ 32 + dev_t rdev; 33 + umode_t mode; 34 + 35 + #define XFS_ICREATE_TMPFILE (1U << 0) /* create an unlinked file */ 36 + #define XFS_ICREATE_INIT_XATTRS (1U << 1) /* will set xattrs immediately */ 37 + #define XFS_ICREATE_UNLINKABLE (1U << 2) /* cannot link into dir tree */ 38 + uint16_t flags; 39 + }; 40 + 41 + /* 42 + * Flags for xfs_trans_ichgtime(). 43 + */ 44 + #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 45 + #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 46 + #define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ 47 + #define XFS_ICHGTIME_ACCESS 0x8 /* last access timestamp */ 48 + void xfs_trans_ichgtime(struct xfs_trans *tp, struct xfs_inode *ip, int flags); 49 + 50 + void xfs_inode_init(struct xfs_trans *tp, const struct xfs_icreate_args *args, 51 + struct xfs_inode *ip); 52 + 53 + int xfs_inode_uninit(struct xfs_trans *tp, struct xfs_perag *pag, 54 + struct xfs_inode *ip, struct xfs_icluster *xic); 55 + 56 + int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip); 57 + int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, 58 + struct xfs_inode *ip); 59 + int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip); 60 + void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); 61 + 62 + #endif /* __XFS_INODE_UTIL_H__ */

+1

fs/xfs/libxfs/xfs_ondisk.h

··· 85 85 XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12); 86 86 */ 87 87 88 + XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224); 88 89 XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0); 89 90 XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2); 90 91 XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3);

+49 -107

fs/xfs/libxfs/xfs_refcount.c

··· 24 24 #include "xfs_rmap.h" 25 25 #include "xfs_ag.h" 26 26 #include "xfs_health.h" 27 + #include "xfs_refcount_item.h" 27 28 28 29 struct kmem_cache *xfs_refcount_intent_cache; 29 30 ··· 52 51 xfs_agblock_t bno, 53 52 int *stat) 54 53 { 55 - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 54 + trace_xfs_refcount_lookup(cur, 56 55 xfs_refcount_encode_startblock(bno, domain), 57 56 XFS_LOOKUP_LE); 58 57 cur->bc_rec.rc.rc_startblock = bno; ··· 72 71 xfs_agblock_t bno, 73 72 int *stat) 74 73 { 75 - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 74 + trace_xfs_refcount_lookup(cur, 76 75 xfs_refcount_encode_startblock(bno, domain), 77 76 XFS_LOOKUP_GE); 78 77 cur->bc_rec.rc.rc_startblock = bno; ··· 92 91 xfs_agblock_t bno, 93 92 int *stat) 94 93 { 95 - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 94 + trace_xfs_refcount_lookup(cur, 96 95 xfs_refcount_encode_startblock(bno, domain), 97 96 XFS_LOOKUP_LE); 98 97 cur->bc_rec.rc.rc_startblock = bno; ··· 184 183 if (fa) 185 184 return xfs_refcount_complain_bad_rec(cur, fa, irec); 186 185 187 - trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 186 + trace_xfs_refcount_get(cur, irec); 188 187 return 0; 189 188 } 190 189 ··· 202 201 uint32_t start; 203 202 int error; 204 203 205 - trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 204 + trace_xfs_refcount_update(cur, irec); 206 205 207 206 start = xfs_refcount_encode_startblock(irec->rc_startblock, 208 207 irec->rc_domain); ··· 212 211 213 212 error = xfs_btree_update(cur, &rec); 214 213 if (error) 215 - trace_xfs_refcount_update_error(cur->bc_mp, 216 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 214 + trace_xfs_refcount_update_error(cur, error, _RET_IP_); 217 215 return error; 218 216 } 219 217 ··· 229 229 { 230 230 int error; 231 231 232 - trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 232 + trace_xfs_refcount_insert(cur, irec); 233 233 234 234 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 235 235 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; ··· 247 247 248 248 out_error: 249 249 if (error) 250 - trace_xfs_refcount_insert_error(cur->bc_mp, 251 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 250 + trace_xfs_refcount_insert_error(cur, error, _RET_IP_); 252 251 return error; 253 252 } 254 253 ··· 274 275 error = -EFSCORRUPTED; 275 276 goto out_error; 276 277 } 277 - trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 278 + trace_xfs_refcount_delete(cur, &irec); 278 279 error = xfs_btree_delete(cur, i); 279 280 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 280 281 xfs_btree_mark_sick(cur); ··· 287 288 &found_rec); 288 289 out_error: 289 290 if (error) 290 - trace_xfs_refcount_delete_error(cur->bc_mp, 291 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 291 + trace_xfs_refcount_delete_error(cur, error, _RET_IP_); 292 292 return error; 293 293 } 294 294 ··· 411 413 return 0; 412 414 413 415 *shape_changed = true; 414 - trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 415 - &rcext, agbno); 416 + trace_xfs_refcount_split_extent(cur, &rcext, agbno); 416 417 417 418 /* Establish the right extent. */ 418 419 tmp = rcext; ··· 435 438 return error; 436 439 437 440 out_error: 438 - trace_xfs_refcount_split_extent_error(cur->bc_mp, 439 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 441 + trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_); 440 442 return error; 441 443 } 442 444 ··· 454 458 int error; 455 459 int found_rec; 456 460 457 - trace_xfs_refcount_merge_center_extents(cur->bc_mp, 458 - cur->bc_ag.pag->pag_agno, left, center, right); 461 + trace_xfs_refcount_merge_center_extents(cur, left, center, right); 459 462 460 463 ASSERT(left->rc_domain == center->rc_domain); 461 464 ASSERT(right->rc_domain == center->rc_domain); ··· 517 522 return error; 518 523 519 524 out_error: 520 - trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 521 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 525 + trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_); 522 526 return error; 523 527 } 524 528 ··· 535 541 int error; 536 542 int found_rec; 537 543 538 - trace_xfs_refcount_merge_left_extent(cur->bc_mp, 539 - cur->bc_ag.pag->pag_agno, left, cleft); 544 + trace_xfs_refcount_merge_left_extent(cur, left, cleft); 540 545 541 546 ASSERT(left->rc_domain == cleft->rc_domain); 542 547 ··· 582 589 return error; 583 590 584 591 out_error: 585 - trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 586 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 592 + trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_); 587 593 return error; 588 594 } 589 595 ··· 599 607 int error; 600 608 int found_rec; 601 609 602 - trace_xfs_refcount_merge_right_extent(cur->bc_mp, 603 - cur->bc_ag.pag->pag_agno, cright, right); 610 + trace_xfs_refcount_merge_right_extent(cur, cright, right); 604 611 605 612 ASSERT(right->rc_domain == cright->rc_domain); 606 613 ··· 649 658 return error; 650 659 651 660 out_error: 652 - trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 653 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 661 + trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_); 654 662 return error; 655 663 } 656 664 ··· 738 748 cleft->rc_refcount = 1; 739 749 cleft->rc_domain = domain; 740 750 } 741 - trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 742 - left, cleft, agbno); 751 + trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno); 743 752 return error; 744 753 745 754 out_error: 746 - trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 747 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 755 + trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_); 748 756 return error; 749 757 } 750 758 ··· 831 843 cright->rc_refcount = 1; 832 844 cright->rc_domain = domain; 833 845 } 834 - trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 835 - cright, right, agbno + aglen); 846 + trace_xfs_refcount_find_right_extent(cur, cright, right, 847 + agbno + aglen); 836 848 return error; 837 849 838 850 out_error: 839 - trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 840 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 851 + trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_); 841 852 return error; 842 853 } 843 854 ··· 1135 1148 tmp.rc_refcount = 1 + adj; 1136 1149 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 1137 1150 1138 - trace_xfs_refcount_modify_extent(cur->bc_mp, 1139 - cur->bc_ag.pag->pag_agno, &tmp); 1151 + trace_xfs_refcount_modify_extent(cur, &tmp); 1140 1152 1141 1153 /* 1142 1154 * Either cover the hole (increment) or ··· 1159 1173 tmp.rc_startblock); 1160 1174 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1161 1175 tmp.rc_blockcount, NULL, 1162 - XFS_AG_RESV_NONE, false); 1176 + XFS_AG_RESV_NONE, 0); 1163 1177 if (error) 1164 1178 goto out_error; 1165 1179 } ··· 1200 1214 if (ext.rc_refcount == MAXREFCOUNT) 1201 1215 goto skip; 1202 1216 ext.rc_refcount += adj; 1203 - trace_xfs_refcount_modify_extent(cur->bc_mp, 1204 - cur->bc_ag.pag->pag_agno, &ext); 1217 + trace_xfs_refcount_modify_extent(cur, &ext); 1205 1218 cur->bc_refc.nr_ops++; 1206 1219 if (ext.rc_refcount > 1) { 1207 1220 error = xfs_refcount_update(cur, &ext); ··· 1222 1237 ext.rc_startblock); 1223 1238 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1224 1239 ext.rc_blockcount, NULL, 1225 - XFS_AG_RESV_NONE, false); 1240 + XFS_AG_RESV_NONE, 0); 1226 1241 if (error) 1227 1242 goto out_error; 1228 1243 } ··· 1239 1254 1240 1255 return error; 1241 1256 out_error: 1242 - trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1243 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1257 + trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1244 1258 return error; 1245 1259 } 1246 1260 ··· 1256 1272 int error; 1257 1273 1258 1274 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1259 - trace_xfs_refcount_increase(cur->bc_mp, 1260 - cur->bc_ag.pag->pag_agno, *agbno, *aglen); 1275 + trace_xfs_refcount_increase(cur, *agbno, *aglen); 1261 1276 else 1262 - trace_xfs_refcount_decrease(cur->bc_mp, 1263 - cur->bc_ag.pag->pag_agno, *agbno, *aglen); 1277 + trace_xfs_refcount_decrease(cur, *agbno, *aglen); 1264 1278 1265 1279 /* 1266 1280 * Ensure that no rcextents cross the boundary of the adjustment range. ··· 1297 1315 return 0; 1298 1316 1299 1317 out_error: 1300 - trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1301 - error, _RET_IP_); 1318 + trace_xfs_refcount_adjust_error(cur, error, _RET_IP_); 1302 1319 return error; 1303 - } 1304 - 1305 - /* Clean up after calling xfs_refcount_finish_one. */ 1306 - void 1307 - xfs_refcount_finish_one_cleanup( 1308 - struct xfs_trans *tp, 1309 - struct xfs_btree_cur *rcur, 1310 - int error) 1311 - { 1312 - struct xfs_buf *agbp; 1313 - 1314 - if (rcur == NULL) 1315 - return; 1316 - agbp = rcur->bc_ag.agbp; 1317 - xfs_btree_del_cursor(rcur, error); 1318 - if (error) 1319 - xfs_trans_brelse(tp, agbp); 1320 1320 } 1321 1321 1322 1322 /* ··· 1342 1378 struct xfs_btree_cur **pcur) 1343 1379 { 1344 1380 struct xfs_mount *mp = tp->t_mountp; 1345 - struct xfs_btree_cur *rcur; 1381 + struct xfs_btree_cur *rcur = *pcur; 1346 1382 struct xfs_buf *agbp = NULL; 1347 1383 int error = 0; 1348 1384 xfs_agblock_t bno; ··· 1351 1387 1352 1388 bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); 1353 1389 1354 - trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), 1355 - ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), 1356 - ri->ri_blockcount); 1390 + trace_xfs_refcount_deferred(mp, ri); 1357 1391 1358 1392 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1359 1393 return -EIO; ··· 1360 1398 * If we haven't gotten a cursor or the cursor AG doesn't match 1361 1399 * the startblock, get one now. 1362 1400 */ 1363 - rcur = *pcur; 1364 1401 if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { 1365 1402 nr_ops = rcur->bc_refc.nr_ops; 1366 1403 shape_changes = rcur->bc_refc.shape_changes; 1367 - xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1404 + xfs_btree_del_cursor(rcur, 0); 1368 1405 rcur = NULL; 1369 1406 *pcur = NULL; 1370 1407 } ··· 1373 1412 if (error) 1374 1413 return error; 1375 1414 1376 - rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag); 1415 + *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, 1416 + ri->ri_pag); 1377 1417 rcur->bc_refc.nr_ops = nr_ops; 1378 1418 rcur->bc_refc.shape_changes = shape_changes; 1379 1419 } 1380 - *pcur = rcur; 1381 1420 1382 1421 switch (ri->ri_type) { 1383 1422 case XFS_REFCOUNT_INCREASE: ··· 1413 1452 return -EFSCORRUPTED; 1414 1453 } 1415 1454 if (!error && ri->ri_blockcount > 0) 1416 - trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno, 1417 - ri->ri_type, bno, ri->ri_blockcount); 1455 + trace_xfs_refcount_finish_one_leftover(mp, ri); 1418 1456 return error; 1419 1457 } 1420 1458 ··· 1429 1469 { 1430 1470 struct xfs_refcount_intent *ri; 1431 1471 1432 - trace_xfs_refcount_defer(tp->t_mountp, 1433 - XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1434 - type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1435 - blockcount); 1436 - 1437 1472 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1438 1473 GFP_KERNEL | __GFP_NOFAIL); 1439 1474 INIT_LIST_HEAD(&ri->ri_list); ··· 1436 1481 ri->ri_startblock = startblock; 1437 1482 ri->ri_blockcount = blockcount; 1438 1483 1439 - xfs_refcount_update_get_group(tp->t_mountp, ri); 1440 - xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); 1484 + xfs_refcount_defer_add(tp, ri); 1441 1485 } 1442 1486 1443 1487 /* ··· 1491 1537 int have; 1492 1538 int error; 1493 1539 1494 - trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1495 - agbno, aglen); 1540 + trace_xfs_refcount_find_shared(cur, agbno, aglen); 1496 1541 1497 1542 /* By default, skip the whole range */ 1498 1543 *fbno = NULLAGBLOCK; ··· 1578 1625 } 1579 1626 1580 1627 done: 1581 - trace_xfs_refcount_find_shared_result(cur->bc_mp, 1582 - cur->bc_ag.pag->pag_agno, *fbno, *flen); 1628 + trace_xfs_refcount_find_shared_result(cur, *fbno, *flen); 1583 1629 1584 1630 out_error: 1585 1631 if (error) 1586 - trace_xfs_refcount_find_shared_error(cur->bc_mp, 1587 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1632 + trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_); 1588 1633 return error; 1589 1634 } 1590 1635 ··· 1688 1737 tmp.rc_refcount = 1; 1689 1738 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1690 1739 1691 - trace_xfs_refcount_modify_extent(cur->bc_mp, 1692 - cur->bc_ag.pag->pag_agno, &tmp); 1740 + trace_xfs_refcount_modify_extent(cur, &tmp); 1693 1741 1694 1742 error = xfs_refcount_insert(cur, &tmp, 1695 1743 &found_tmp); ··· 1719 1769 } 1720 1770 1721 1771 ext.rc_refcount = 0; 1722 - trace_xfs_refcount_modify_extent(cur->bc_mp, 1723 - cur->bc_ag.pag->pag_agno, &ext); 1772 + trace_xfs_refcount_modify_extent(cur, &ext); 1724 1773 error = xfs_refcount_delete(cur, &found_rec); 1725 1774 if (error) 1726 1775 goto out_error; ··· 1735 1786 1736 1787 return error; 1737 1788 out_error: 1738 - trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1739 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1789 + trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1740 1790 return error; 1741 1791 } 1742 1792 ··· 1781 1833 return 0; 1782 1834 1783 1835 out_error: 1784 - trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1785 - error, _RET_IP_); 1836 + trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_); 1786 1837 return error; 1787 1838 } 1788 1839 ··· 1794 1847 xfs_agblock_t agbno, 1795 1848 xfs_extlen_t aglen) 1796 1849 { 1797 - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1798 - agbno, aglen); 1850 + trace_xfs_refcount_cow_increase(rcur, agbno, aglen); 1799 1851 1800 1852 /* Add refcount btree reservation */ 1801 1853 return xfs_refcount_adjust_cow(rcur, agbno, aglen, ··· 1810 1864 xfs_agblock_t agbno, 1811 1865 xfs_extlen_t aglen) 1812 1866 { 1813 - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1814 - agbno, aglen); 1867 + trace_xfs_refcount_cow_decrease(rcur, agbno, aglen); 1815 1868 1816 1869 /* Remove refcount btree reservation */ 1817 1870 return xfs_refcount_adjust_cow(rcur, agbno, aglen, ··· 1955 2010 if (error) 1956 2011 goto out_free; 1957 2012 1958 - trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1959 - &rr->rr_rrec); 1960 - 1961 2013 /* Free the orphan record */ 1962 2014 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, 1963 2015 rr->rr_rrec.rc_startblock); ··· 1964 2022 /* Free the block. */ 1965 2023 error = xfs_free_extent_later(tp, fsb, 1966 2024 rr->rr_rrec.rc_blockcount, NULL, 1967 - XFS_AG_RESV_NONE, false); 2025 + XFS_AG_RESV_NONE, 0); 1968 2026 if (error) 1969 2027 goto out_trans; 1970 2028

+6 -5

fs/xfs/libxfs/xfs_refcount.h

··· 48 48 XFS_REFCOUNT_FREE_COW, 49 49 }; 50 50 51 + #define XFS_REFCOUNT_INTENT_STRINGS \ 52 + { XFS_REFCOUNT_INCREASE, "incr" }, \ 53 + { XFS_REFCOUNT_DECREASE, "decr" }, \ 54 + { XFS_REFCOUNT_ALLOC_COW, "alloc_cow" }, \ 55 + { XFS_REFCOUNT_FREE_COW, "free_cow" } 56 + 51 57 struct xfs_refcount_intent { 52 58 struct list_head ri_list; 53 59 struct xfs_perag *ri_pag; ··· 74 68 return true; 75 69 } 76 70 77 - void xfs_refcount_update_get_group(struct xfs_mount *mp, 78 - struct xfs_refcount_intent *ri); 79 - 80 71 void xfs_refcount_increase_extent(struct xfs_trans *tp, 81 72 struct xfs_bmbt_irec *irec); 82 73 void xfs_refcount_decrease_extent(struct xfs_trans *tp, 83 74 struct xfs_bmbt_irec *irec); 84 75 85 - extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, 86 - struct xfs_btree_cur *rcur, int error); 87 76 extern int xfs_refcount_finish_one(struct xfs_trans *tp, 88 77 struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur); 89 78

+1 -1

fs/xfs/libxfs/xfs_refcount_btree.c

··· 109 109 be32_add_cpu(&agf->agf_refcount_blocks, -1); 110 110 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); 111 111 return xfs_free_extent_later(cur->bc_tp, fsbno, 1, 112 - &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false); 112 + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, 0); 113 113 } 114 114 115 115 STATIC int

+93 -175

fs/xfs/libxfs/xfs_rmap.c

··· 24 24 #include "xfs_inode.h" 25 25 #include "xfs_ag.h" 26 26 #include "xfs_health.h" 27 + #include "xfs_rmap_item.h" 27 28 28 29 struct kmem_cache *xfs_rmap_intent_cache; 29 30 ··· 101 100 union xfs_btree_rec rec; 102 101 int error; 103 102 104 - trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, 105 - irec->rm_startblock, irec->rm_blockcount, 103 + trace_xfs_rmap_update(cur, irec->rm_startblock, irec->rm_blockcount, 106 104 irec->rm_owner, irec->rm_offset, irec->rm_flags); 107 105 108 106 rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock); ··· 111 111 xfs_rmap_irec_offset_pack(irec)); 112 112 error = xfs_btree_update(cur, &rec); 113 113 if (error) 114 - trace_xfs_rmap_update_error(cur->bc_mp, 115 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 114 + trace_xfs_rmap_update_error(cur, error, _RET_IP_); 116 115 return error; 117 116 } 118 117 ··· 127 128 int i; 128 129 int error; 129 130 130 - trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, 131 - len, owner, offset, flags); 131 + trace_xfs_rmap_insert(rcur, agbno, len, owner, offset, flags); 132 132 133 133 error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); 134 134 if (error) ··· 153 155 } 154 156 done: 155 157 if (error) 156 - trace_xfs_rmap_insert_error(rcur->bc_mp, 157 - rcur->bc_ag.pag->pag_agno, error, _RET_IP_); 158 + trace_xfs_rmap_insert_error(rcur, error, _RET_IP_); 158 159 return error; 159 160 } 160 161 ··· 169 172 int i; 170 173 int error; 171 174 172 - trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno, 173 - len, owner, offset, flags); 175 + trace_xfs_rmap_delete(rcur, agbno, len, owner, offset, flags); 174 176 175 177 error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); 176 178 if (error) ··· 190 194 } 191 195 done: 192 196 if (error) 193 - trace_xfs_rmap_delete_error(rcur->bc_mp, 194 - rcur->bc_ag.pag->pag_agno, error, _RET_IP_); 197 + trace_xfs_rmap_delete_error(rcur, error, _RET_IP_); 195 198 return error; 196 199 } 197 200 ··· 337 342 { 338 343 struct xfs_find_left_neighbor_info *info = priv; 339 344 340 - trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp, 341 - cur->bc_ag.pag->pag_agno, rec->rm_startblock, 345 + trace_xfs_rmap_find_left_neighbor_candidate(cur, rec->rm_startblock, 342 346 rec->rm_blockcount, rec->rm_owner, rec->rm_offset, 343 347 rec->rm_flags); 344 348 ··· 387 393 info.high.rm_blockcount = 0; 388 394 info.irec = irec; 389 395 390 - trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, 391 - cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags); 396 + trace_xfs_rmap_find_left_neighbor_query(cur, bno, 0, owner, offset, 397 + flags); 392 398 393 399 /* 394 400 * Historically, we always used the range query to walk every reverse ··· 419 425 return error; 420 426 421 427 *stat = 1; 422 - trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, 423 - cur->bc_ag.pag->pag_agno, irec->rm_startblock, 428 + trace_xfs_rmap_find_left_neighbor_result(cur, irec->rm_startblock, 424 429 irec->rm_blockcount, irec->rm_owner, irec->rm_offset, 425 430 irec->rm_flags); 426 431 return 0; ··· 434 441 { 435 442 struct xfs_find_left_neighbor_info *info = priv; 436 443 437 - trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp, 438 - cur->bc_ag.pag->pag_agno, rec->rm_startblock, 444 + trace_xfs_rmap_lookup_le_range_candidate(cur, rec->rm_startblock, 439 445 rec->rm_blockcount, rec->rm_owner, rec->rm_offset, 440 446 rec->rm_flags); 441 447 ··· 481 489 *stat = 0; 482 490 info.irec = irec; 483 491 484 - trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno, 485 - bno, 0, owner, offset, flags); 492 + trace_xfs_rmap_lookup_le_range(cur, bno, 0, owner, offset, flags); 486 493 487 494 /* 488 495 * Historically, we always used the range query to walk every reverse ··· 512 521 return error; 513 522 514 523 *stat = 1; 515 - trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 516 - cur->bc_ag.pag->pag_agno, irec->rm_startblock, 524 + trace_xfs_rmap_lookup_le_range_result(cur, irec->rm_startblock, 517 525 irec->rm_blockcount, irec->rm_owner, irec->rm_offset, 518 526 irec->rm_flags); 519 527 return 0; ··· 624 634 (flags & XFS_RMAP_BMBT_BLOCK); 625 635 if (unwritten) 626 636 flags |= XFS_RMAP_UNWRITTEN; 627 - trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len, 628 - unwritten, oinfo); 637 + trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo); 629 638 630 639 /* 631 640 * We should always have a left record because there's a static record ··· 640 651 goto out_error; 641 652 } 642 653 643 - trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 644 - cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, 645 - ltrec.rm_blockcount, ltrec.rm_owner, 646 - ltrec.rm_offset, ltrec.rm_flags); 654 + trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock, 655 + ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, 656 + ltrec.rm_flags); 647 657 ltoff = ltrec.rm_offset; 648 658 649 659 /* ··· 709 721 710 722 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 711 723 /* exact match, simply remove the record from rmap tree */ 712 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 713 - ltrec.rm_startblock, ltrec.rm_blockcount, 714 - ltrec.rm_owner, ltrec.rm_offset, 715 - ltrec.rm_flags); 724 + trace_xfs_rmap_delete(cur, ltrec.rm_startblock, 725 + ltrec.rm_blockcount, ltrec.rm_owner, 726 + ltrec.rm_offset, ltrec.rm_flags); 716 727 error = xfs_btree_delete(cur, &i); 717 728 if (error) 718 729 goto out_error; ··· 787 800 else 788 801 cur->bc_rec.r.rm_offset = offset + len; 789 802 cur->bc_rec.r.rm_flags = flags; 790 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, 791 - cur->bc_rec.r.rm_startblock, 803 + trace_xfs_rmap_insert(cur, cur->bc_rec.r.rm_startblock, 792 804 cur->bc_rec.r.rm_blockcount, 793 805 cur->bc_rec.r.rm_owner, 794 806 cur->bc_rec.r.rm_offset, ··· 798 812 } 799 813 800 814 out_done: 801 - trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 802 - unwritten, oinfo); 815 + trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo); 803 816 out_error: 804 817 if (error) 805 - trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno, 806 - error, _RET_IP_); 818 + trace_xfs_rmap_unmap_error(cur, error, _RET_IP_); 807 819 return error; 808 820 } 809 821 ··· 971 987 (flags & XFS_RMAP_BMBT_BLOCK); 972 988 if (unwritten) 973 989 flags |= XFS_RMAP_UNWRITTEN; 974 - trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len, 975 - unwritten, oinfo); 990 + trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo); 976 991 ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); 977 992 978 993 /* ··· 984 1001 if (error) 985 1002 goto out_error; 986 1003 if (have_lt) { 987 - trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 988 - cur->bc_ag.pag->pag_agno, ltrec.rm_startblock, 1004 + trace_xfs_rmap_lookup_le_range_result(cur, ltrec.rm_startblock, 989 1005 ltrec.rm_blockcount, ltrec.rm_owner, 990 1006 ltrec.rm_offset, ltrec.rm_flags); 991 1007 ··· 1022 1040 error = -EFSCORRUPTED; 1023 1041 goto out_error; 1024 1042 } 1025 - trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 1026 - cur->bc_ag.pag->pag_agno, gtrec.rm_startblock, 1027 - gtrec.rm_blockcount, gtrec.rm_owner, 1028 - gtrec.rm_offset, gtrec.rm_flags); 1043 + trace_xfs_rmap_find_right_neighbor_result(cur, 1044 + gtrec.rm_startblock, gtrec.rm_blockcount, 1045 + gtrec.rm_owner, gtrec.rm_offset, 1046 + gtrec.rm_flags); 1029 1047 if (!xfs_rmap_is_mergeable(&gtrec, owner, flags)) 1030 1048 have_gt = 0; 1031 1049 } ··· 1062 1080 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| 1063 1081 */ 1064 1082 ltrec.rm_blockcount += gtrec.rm_blockcount; 1065 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 1066 - gtrec.rm_startblock, 1067 - gtrec.rm_blockcount, 1068 - gtrec.rm_owner, 1069 - gtrec.rm_offset, 1070 - gtrec.rm_flags); 1083 + trace_xfs_rmap_delete(cur, gtrec.rm_startblock, 1084 + gtrec.rm_blockcount, gtrec.rm_owner, 1085 + gtrec.rm_offset, gtrec.rm_flags); 1071 1086 error = xfs_btree_delete(cur, &i); 1072 1087 if (error) 1073 1088 goto out_error; ··· 1111 1132 cur->bc_rec.r.rm_owner = owner; 1112 1133 cur->bc_rec.r.rm_offset = offset; 1113 1134 cur->bc_rec.r.rm_flags = flags; 1114 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, 1115 - owner, offset, flags); 1135 + trace_xfs_rmap_insert(cur, bno, len, owner, offset, flags); 1116 1136 error = xfs_btree_insert(cur, &i); 1117 1137 if (error) 1118 1138 goto out_error; ··· 1122 1144 } 1123 1145 } 1124 1146 1125 - trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 1126 - unwritten, oinfo); 1147 + trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo); 1127 1148 out_error: 1128 1149 if (error) 1129 - trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno, 1130 - error, _RET_IP_); 1150 + trace_xfs_rmap_map_error(cur, error, _RET_IP_); 1131 1151 return error; 1132 1152 } 1133 1153 ··· 1199 1223 (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); 1200 1224 oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; 1201 1225 new_endoff = offset + len; 1202 - trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len, 1203 - unwritten, oinfo); 1226 + trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo); 1204 1227 1205 1228 /* 1206 1229 * For the initial lookup, look for an exact match or the left-adjacent ··· 1215 1240 goto done; 1216 1241 } 1217 1242 1218 - trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, 1219 - cur->bc_ag.pag->pag_agno, PREV.rm_startblock, 1220 - PREV.rm_blockcount, PREV.rm_owner, 1221 - PREV.rm_offset, PREV.rm_flags); 1243 + trace_xfs_rmap_lookup_le_range_result(cur, PREV.rm_startblock, 1244 + PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, 1245 + PREV.rm_flags); 1222 1246 1223 1247 ASSERT(PREV.rm_offset <= offset); 1224 1248 ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff); ··· 1258 1284 error = -EFSCORRUPTED; 1259 1285 goto done; 1260 1286 } 1261 - trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, 1262 - cur->bc_ag.pag->pag_agno, LEFT.rm_startblock, 1263 - LEFT.rm_blockcount, LEFT.rm_owner, 1264 - LEFT.rm_offset, LEFT.rm_flags); 1287 + trace_xfs_rmap_find_left_neighbor_result(cur, 1288 + LEFT.rm_startblock, LEFT.rm_blockcount, 1289 + LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags); 1265 1290 if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && 1266 1291 LEFT.rm_offset + LEFT.rm_blockcount == offset && 1267 1292 xfs_rmap_is_mergeable(&LEFT, owner, newext)) ··· 1298 1325 error = -EFSCORRUPTED; 1299 1326 goto done; 1300 1327 } 1301 - trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 1302 - cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, 1303 - RIGHT.rm_blockcount, RIGHT.rm_owner, 1304 - RIGHT.rm_offset, RIGHT.rm_flags); 1328 + trace_xfs_rmap_find_right_neighbor_result(cur, 1329 + RIGHT.rm_startblock, RIGHT.rm_blockcount, 1330 + RIGHT.rm_owner, RIGHT.rm_offset, 1331 + RIGHT.rm_flags); 1305 1332 if (bno + len == RIGHT.rm_startblock && 1306 1333 offset + len == RIGHT.rm_offset && 1307 1334 xfs_rmap_is_mergeable(&RIGHT, owner, newext)) ··· 1317 1344 RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) 1318 1345 state &= ~RMAP_RIGHT_CONTIG; 1319 1346 1320 - trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state, 1321 - _RET_IP_); 1347 + trace_xfs_rmap_convert_state(cur, state, _RET_IP_); 1322 1348 1323 1349 /* reset the cursor back to PREV */ 1324 1350 error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i); ··· 1348 1376 error = -EFSCORRUPTED; 1349 1377 goto done; 1350 1378 } 1351 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 1352 - RIGHT.rm_startblock, RIGHT.rm_blockcount, 1353 - RIGHT.rm_owner, RIGHT.rm_offset, 1354 - RIGHT.rm_flags); 1379 + trace_xfs_rmap_delete(cur, RIGHT.rm_startblock, 1380 + RIGHT.rm_blockcount, RIGHT.rm_owner, 1381 + RIGHT.rm_offset, RIGHT.rm_flags); 1355 1382 error = xfs_btree_delete(cur, &i); 1356 1383 if (error) 1357 1384 goto done; ··· 1367 1396 error = -EFSCORRUPTED; 1368 1397 goto done; 1369 1398 } 1370 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 1371 - PREV.rm_startblock, PREV.rm_blockcount, 1372 - PREV.rm_owner, PREV.rm_offset, 1373 - PREV.rm_flags); 1399 + trace_xfs_rmap_delete(cur, PREV.rm_startblock, 1400 + PREV.rm_blockcount, PREV.rm_owner, 1401 + PREV.rm_offset, PREV.rm_flags); 1374 1402 error = xfs_btree_delete(cur, &i); 1375 1403 if (error) 1376 1404 goto done; ··· 1398 1428 * Setting all of a previous oldext extent to newext. 1399 1429 * The left neighbor is contiguous, the right is not. 1400 1430 */ 1401 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 1402 - PREV.rm_startblock, PREV.rm_blockcount, 1403 - PREV.rm_owner, PREV.rm_offset, 1404 - PREV.rm_flags); 1431 + trace_xfs_rmap_delete(cur, PREV.rm_startblock, 1432 + PREV.rm_blockcount, PREV.rm_owner, 1433 + PREV.rm_offset, PREV.rm_flags); 1405 1434 error = xfs_btree_delete(cur, &i); 1406 1435 if (error) 1407 1436 goto done; ··· 1437 1468 error = -EFSCORRUPTED; 1438 1469 goto done; 1439 1470 } 1440 - trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno, 1441 - RIGHT.rm_startblock, RIGHT.rm_blockcount, 1442 - RIGHT.rm_owner, RIGHT.rm_offset, 1443 - RIGHT.rm_flags); 1471 + trace_xfs_rmap_delete(cur, RIGHT.rm_startblock, 1472 + RIGHT.rm_blockcount, RIGHT.rm_owner, 1473 + RIGHT.rm_offset, RIGHT.rm_flags); 1444 1474 error = xfs_btree_delete(cur, &i); 1445 1475 if (error) 1446 1476 goto done; ··· 1517 1549 NEW.rm_blockcount = len; 1518 1550 NEW.rm_flags = newext; 1519 1551 cur->bc_rec.r = NEW; 1520 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, 1521 - len, owner, offset, newext); 1552 + trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); 1522 1553 error = xfs_btree_insert(cur, &i); 1523 1554 if (error) 1524 1555 goto done; ··· 1575 1608 NEW.rm_blockcount = len; 1576 1609 NEW.rm_flags = newext; 1577 1610 cur->bc_rec.r = NEW; 1578 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, 1579 - len, owner, offset, newext); 1611 + trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); 1580 1612 error = xfs_btree_insert(cur, &i); 1581 1613 if (error) 1582 1614 goto done; ··· 1606 1640 NEW = PREV; 1607 1641 NEW.rm_blockcount = offset - PREV.rm_offset; 1608 1642 cur->bc_rec.r = NEW; 1609 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, 1610 - NEW.rm_startblock, NEW.rm_blockcount, 1611 - NEW.rm_owner, NEW.rm_offset, 1643 + trace_xfs_rmap_insert(cur, NEW.rm_startblock, 1644 + NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, 1612 1645 NEW.rm_flags); 1613 1646 error = xfs_btree_insert(cur, &i); 1614 1647 if (error) ··· 1634 1669 /* new middle extent - newext */ 1635 1670 cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; 1636 1671 cur->bc_rec.r.rm_flags |= newext; 1637 - trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len, 1638 - owner, offset, newext); 1672 + trace_xfs_rmap_insert(cur, bno, len, owner, offset, newext); 1639 1673 error = xfs_btree_insert(cur, &i); 1640 1674 if (error) 1641 1675 goto done; ··· 1658 1694 ASSERT(0); 1659 1695 } 1660 1696 1661 - trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 1662 - unwritten, oinfo); 1697 + trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo); 1663 1698 done: 1664 1699 if (error) 1665 - trace_xfs_rmap_convert_error(cur->bc_mp, 1666 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1700 + trace_xfs_rmap_convert_error(cur, error, _RET_IP_); 1667 1701 return error; 1668 1702 } 1669 1703 ··· 1697 1735 (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); 1698 1736 oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; 1699 1737 new_endoff = offset + len; 1700 - trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len, 1701 - unwritten, oinfo); 1738 + trace_xfs_rmap_convert(cur, bno, len, unwritten, oinfo); 1702 1739 1703 1740 /* 1704 1741 * For the initial lookup, look for and exact match or the left-adjacent ··· 1766 1805 error = -EFSCORRUPTED; 1767 1806 goto done; 1768 1807 } 1769 - trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 1770 - cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock, 1771 - RIGHT.rm_blockcount, RIGHT.rm_owner, 1772 - RIGHT.rm_offset, RIGHT.rm_flags); 1808 + trace_xfs_rmap_find_right_neighbor_result(cur, 1809 + RIGHT.rm_startblock, RIGHT.rm_blockcount, 1810 + RIGHT.rm_owner, RIGHT.rm_offset, 1811 + RIGHT.rm_flags); 1773 1812 if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) 1774 1813 state |= RMAP_RIGHT_CONTIG; 1775 1814 } ··· 1783 1822 RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) 1784 1823 state &= ~RMAP_RIGHT_CONTIG; 1785 1824 1786 - trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state, 1787 - _RET_IP_); 1825 + trace_xfs_rmap_convert_state(cur, state, _RET_IP_); 1788 1826 /* 1789 1827 * Switch out based on the FILLING and CONTIG state bits. 1790 1828 */ ··· 2081 2121 ASSERT(0); 2082 2122 } 2083 2123 2084 - trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 2085 - unwritten, oinfo); 2124 + trace_xfs_rmap_convert_done(cur, bno, len, unwritten, oinfo); 2086 2125 done: 2087 2126 if (error) 2088 - trace_xfs_rmap_convert_error(cur->bc_mp, 2089 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 2127 + trace_xfs_rmap_convert_error(cur, error, _RET_IP_); 2090 2128 return error; 2091 2129 } 2092 2130 ··· 2122 2164 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 2123 2165 if (unwritten) 2124 2166 flags |= XFS_RMAP_UNWRITTEN; 2125 - trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len, 2126 - unwritten, oinfo); 2167 + trace_xfs_rmap_unmap(cur, bno, len, unwritten, oinfo); 2127 2168 2128 2169 /* 2129 2170 * We should always have a left record because there's a static record ··· 2278 2321 goto out_error; 2279 2322 } 2280 2323 2281 - trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 2282 - unwritten, oinfo); 2324 + trace_xfs_rmap_unmap_done(cur, bno, len, unwritten, oinfo); 2283 2325 out_error: 2284 2326 if (error) 2285 - trace_xfs_rmap_unmap_error(cur->bc_mp, 2286 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 2327 + trace_xfs_rmap_unmap_error(cur, error, _RET_IP_); 2287 2328 return error; 2288 2329 } 2289 2330 ··· 2316 2361 xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); 2317 2362 if (unwritten) 2318 2363 flags |= XFS_RMAP_UNWRITTEN; 2319 - trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len, 2320 - unwritten, oinfo); 2364 + trace_xfs_rmap_map(cur, bno, len, unwritten, oinfo); 2321 2365 2322 2366 /* Is there a left record that abuts our range? */ 2323 2367 error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags, ··· 2341 2387 error = -EFSCORRUPTED; 2342 2388 goto out_error; 2343 2389 } 2344 - trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, 2345 - cur->bc_ag.pag->pag_agno, gtrec.rm_startblock, 2346 - gtrec.rm_blockcount, gtrec.rm_owner, 2347 - gtrec.rm_offset, gtrec.rm_flags); 2390 + trace_xfs_rmap_find_right_neighbor_result(cur, 2391 + gtrec.rm_startblock, gtrec.rm_blockcount, 2392 + gtrec.rm_owner, gtrec.rm_offset, 2393 + gtrec.rm_flags); 2348 2394 2349 2395 if (!xfs_rmap_is_mergeable(&gtrec, owner, flags)) 2350 2396 have_gt = 0; ··· 2436 2482 goto out_error; 2437 2483 } 2438 2484 2439 - trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len, 2440 - unwritten, oinfo); 2485 + trace_xfs_rmap_map_done(cur, bno, len, unwritten, oinfo); 2441 2486 out_error: 2442 2487 if (error) 2443 - trace_xfs_rmap_map_error(cur->bc_mp, 2444 - cur->bc_ag.pag->pag_agno, error, _RET_IP_); 2488 + trace_xfs_rmap_map_error(cur, error, _RET_IP_); 2445 2489 return error; 2446 2490 } 2447 2491 ··· 2524 2572 return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query); 2525 2573 } 2526 2574 2527 - /* Clean up after calling xfs_rmap_finish_one. */ 2528 - void 2529 - xfs_rmap_finish_one_cleanup( 2530 - struct xfs_trans *tp, 2531 - struct xfs_btree_cur *rcur, 2532 - int error) 2533 - { 2534 - struct xfs_buf *agbp; 2535 - 2536 - if (rcur == NULL) 2537 - return; 2538 - agbp = rcur->bc_ag.agbp; 2539 - xfs_btree_del_cursor(rcur, error); 2540 - if (error) 2541 - xfs_trans_brelse(tp, agbp); 2542 - } 2543 - 2544 2575 /* Commit an rmap operation into the ondisk tree. */ 2545 2576 int 2546 2577 __xfs_rmap_finish_intent( ··· 2569 2634 struct xfs_rmap_intent *ri, 2570 2635 struct xfs_btree_cur **pcur) 2571 2636 { 2572 - struct xfs_mount *mp = tp->t_mountp; 2573 - struct xfs_btree_cur *rcur; 2574 - struct xfs_buf *agbp = NULL; 2575 - int error = 0; 2576 2637 struct xfs_owner_info oinfo; 2638 + struct xfs_mount *mp = tp->t_mountp; 2639 + struct xfs_btree_cur *rcur = *pcur; 2640 + struct xfs_buf *agbp = NULL; 2577 2641 xfs_agblock_t bno; 2578 2642 bool unwritten; 2643 + int error = 0; 2579 2644 2580 - bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock); 2581 - 2582 - trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno, 2583 - ri->ri_owner, ri->ri_whichfork, 2584 - ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount, 2585 - ri->ri_bmap.br_state); 2645 + trace_xfs_rmap_deferred(mp, ri); 2586 2646 2587 2647 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) 2588 2648 return -EIO; ··· 2586 2656 * If we haven't gotten a cursor or the cursor AG doesn't match 2587 2657 * the startblock, get one now. 2588 2658 */ 2589 - rcur = *pcur; 2590 2659 if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { 2591 - xfs_rmap_finish_one_cleanup(tp, rcur, 0); 2660 + xfs_btree_del_cursor(rcur, 0); 2592 2661 rcur = NULL; 2593 2662 *pcur = NULL; 2594 2663 } ··· 2607 2678 return -EFSCORRUPTED; 2608 2679 } 2609 2680 2610 - rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag); 2681 + *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag); 2611 2682 } 2612 - *pcur = rcur; 2613 2683 2614 2684 xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork, 2615 2685 ri->ri_bmap.br_startoff); ··· 2650 2722 { 2651 2723 struct xfs_rmap_intent *ri; 2652 2724 2653 - trace_xfs_rmap_defer(tp->t_mountp, 2654 - XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), 2655 - type, 2656 - XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), 2657 - owner, whichfork, 2658 - bmap->br_startoff, 2659 - bmap->br_blockcount, 2660 - bmap->br_state); 2661 - 2662 2725 ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); 2663 2726 INIT_LIST_HEAD(&ri->ri_list); 2664 2727 ri->ri_type = type; ··· 2657 2738 ri->ri_whichfork = whichfork; 2658 2739 ri->ri_bmap = *bmap; 2659 2740 2660 - xfs_rmap_update_get_group(tp->t_mountp, ri); 2661 - xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type); 2741 + xfs_rmap_defer_add(tp, ri); 2662 2742 } 2663 2743 2664 2744 /* Map an extent into a file. */

+10 -5

fs/xfs/libxfs/xfs_rmap.h

··· 157 157 XFS_RMAP_FREE, 158 158 }; 159 159 160 + #define XFS_RMAP_INTENT_STRINGS \ 161 + { XFS_RMAP_MAP, "map" }, \ 162 + { XFS_RMAP_MAP_SHARED, "map_shared" }, \ 163 + { XFS_RMAP_UNMAP, "unmap" }, \ 164 + { XFS_RMAP_UNMAP_SHARED, "unmap_shared" }, \ 165 + { XFS_RMAP_CONVERT, "cvt" }, \ 166 + { XFS_RMAP_CONVERT_SHARED, "cvt_shared" }, \ 167 + { XFS_RMAP_ALLOC, "alloc" }, \ 168 + { XFS_RMAP_FREE, "free" } 169 + 160 170 struct xfs_rmap_intent { 161 171 struct list_head ri_list; 162 172 enum xfs_rmap_intent_type ri_type; ··· 175 165 struct xfs_bmbt_irec ri_bmap; 176 166 struct xfs_perag *ri_pag; 177 167 }; 178 - 179 - void xfs_rmap_update_get_group(struct xfs_mount *mp, 180 - struct xfs_rmap_intent *ri); 181 168 182 169 /* functions for updating the rmapbt based on bmbt map/unmap operations */ 183 170 void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, ··· 189 182 void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, 190 183 xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner); 191 184 192 - void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, 193 - struct xfs_btree_cur *rcur, int error); 194 185 int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri, 195 186 struct xfs_btree_cur **pcur); 196 187 int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,

+6 -1

fs/xfs/libxfs/xfs_rmap_btree.c

··· 88 88 struct xfs_buf *agbp = cur->bc_ag.agbp; 89 89 struct xfs_agf *agf = agbp->b_addr; 90 90 struct xfs_perag *pag = cur->bc_ag.pag; 91 + struct xfs_alloc_arg args = { .len = 1 }; 91 92 int error; 92 93 xfs_agblock_t bno; 93 94 ··· 108 107 be32_add_cpu(&agf->agf_rmap_blocks, 1); 109 108 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 110 109 111 - xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno); 110 + /* 111 + * Since rmapbt blocks are sourced from the AGFL, they are allocated one 112 + * at a time and the reservation updates don't require a transaction. 113 + */ 114 + xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args); 112 115 113 116 *stat = 1; 114 117 return 0;

-7

fs/xfs/libxfs/xfs_shared.h

··· 177 177 #define XFS_REFC_BTREE_REF 1 178 178 #define XFS_SSB_REF 0 179 179 180 - /* 181 - * Flags for xfs_trans_ichgtime(). 182 - */ 183 - #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 184 - #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 185 - #define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ 186 - 187 180 /* Computed inode geometry for the filesystem. */ 188 181 struct xfs_ino_geometry { 189 182 /* Maximum inode count in this filesystem. */

+2

fs/xfs/libxfs/xfs_trans_inode.c

··· 68 68 inode_set_mtime_to_ts(inode, tv); 69 69 if (flags & XFS_ICHGTIME_CHG) 70 70 inode_set_ctime_to_ts(inode, tv); 71 + if (flags & XFS_ICHGTIME_ACCESS) 72 + inode_set_atime_to_ts(inode, tv); 71 73 if (flags & XFS_ICHGTIME_CREATE) 72 74 ip->i_crtime = tv; 73 75 }

-1

fs/xfs/libxfs/xfs_trans_resv.c

··· 22 22 #include "xfs_rtbitmap.h" 23 23 #include "xfs_attr_item.h" 24 24 #include "xfs_log.h" 25 - #include "xfs_da_format.h" 26 25 27 26 #define _ALLOC true 28 27 #define _FREE false

+1

fs/xfs/scrub/common.c

··· 26 26 #include "xfs_da_format.h" 27 27 #include "xfs_da_btree.h" 28 28 #include "xfs_dir2_priv.h" 29 + #include "xfs_dir2.h" 29 30 #include "xfs_attr.h" 30 31 #include "xfs_reflink.h" 31 32 #include "xfs_ag.h"

+3 -2

fs/xfs/scrub/newbt.c

··· 160 160 if (args->tp) { 161 161 ASSERT(xnr->oinfo.oi_offset == 0); 162 162 163 - error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap); 163 + error = xfs_alloc_schedule_autoreap(args, 164 + XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap); 164 165 if (error) 165 166 goto out_pag; 166 167 } ··· 415 414 */ 416 415 fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno); 417 416 error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo, 418 - xnr->resv, true); 417 + xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD); 419 418 if (error) 420 419 return error; 421 420

-1

fs/xfs/scrub/quota_repair.c

··· 12 12 #include "xfs_defer.h" 13 13 #include "xfs_btree.h" 14 14 #include "xfs_bit.h" 15 - #include "xfs_format.h" 16 15 #include "xfs_log_format.h" 17 16 #include "xfs_trans.h" 18 17 #include "xfs_sb.h"

+4 -3

fs/xfs/scrub/reap.c

··· 451 451 452 452 xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp); 453 453 error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL, 454 - rs->resv, true); 454 + rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); 455 455 if (error) 456 456 return error; 457 457 ··· 477 477 * system with large EFIs. 478 478 */ 479 479 error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo, 480 - rs->resv, true); 480 + rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); 481 481 if (error) 482 482 return error; 483 483 ··· 943 943 xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT, 944 944 -(int64_t)imap->br_blockcount); 945 945 return xfs_free_extent_later(sc->tp, imap->br_startblock, 946 - imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true); 946 + imap->br_blockcount, NULL, XFS_AG_RESV_NONE, 947 + XFS_FREE_EXTENT_SKIP_DISCARD); 947 948 } 948 949 949 950 /*

+11 -10

fs/xfs/scrub/tempfile.c

··· 40 40 struct xfs_scrub *sc, 41 41 uint16_t mode) 42 42 { 43 + struct xfs_icreate_args args = { 44 + .pip = sc->mp->m_rootip, 45 + .mode = mode, 46 + .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE, 47 + }; 43 48 struct xfs_mount *mp = sc->mp; 44 49 struct xfs_trans *tp = NULL; 45 - struct xfs_dquot *udqp = NULL; 46 - struct xfs_dquot *gdqp = NULL; 47 - struct xfs_dquot *pdqp = NULL; 50 + struct xfs_dquot *udqp; 51 + struct xfs_dquot *gdqp; 52 + struct xfs_dquot *pdqp; 48 53 struct xfs_trans_res *tres; 49 54 struct xfs_inode *dp = mp->m_rootip; 50 55 xfs_ino_t ino; ··· 70 65 * inode should be completely root owned so that we don't fail due to 71 66 * quota limits. 72 67 */ 73 - error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 74 - XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); 68 + error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); 75 69 if (error) 76 70 return error; 77 71 ··· 91 87 error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); 92 88 if (error) 93 89 goto out_trans_cancel; 94 - error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0, 95 - 0, false, &sc->tempip); 90 + error = xfs_icreate(tp, ino, &args, &sc->tempip); 96 91 if (error) 97 92 goto out_trans_cancel; 98 93 99 - /* Change the ownership of the inode to root. */ 100 - VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID; 101 - VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID; 94 + /* We don't touch file data, so drop the realtime flags. */ 102 95 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 103 96 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); 104 97

+4

fs/xfs/xfs.h

··· 10 10 #define DEBUG 1 11 11 #endif 12 12 13 + #ifdef CONFIG_XFS_DEBUG_EXPENSIVE 14 + #define DEBUG_EXPENSIVE 1 15 + #endif 16 + 13 17 #ifdef CONFIG_XFS_ASSERT_FATAL 14 18 #define XFS_ASSERT_FATAL 1 15 19 #endif

+1 -5

fs/xfs/xfs_bmap_item.c

··· 324 324 struct xfs_mount *mp, 325 325 struct xfs_bmap_intent *bi) 326 326 { 327 - xfs_agnumber_t agno; 328 - 329 327 if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork)) 330 328 return; 331 - 332 - agno = XFS_FSB_TO_AGNO(mp, bi->bi_bmap.br_startblock); 333 329 334 330 /* 335 331 * Bump the intent count on behalf of the deferred rmap and refcount ··· 334 338 * intent drops the intent count, ensuring that the intent count 335 339 * remains nonzero across the transaction roll. 336 340 */ 337 - bi->bi_pag = xfs_perag_intent_get(mp, agno); 341 + bi->bi_pag = xfs_perag_intent_get(mp, bi->bi_bmap.br_startblock); 338 342 } 339 343 340 344 /* Add this deferred BUI to the transaction. */

+14 -8

fs/xfs/xfs_bmap_util.c

··· 808 808 xfs_off_t offset, 809 809 xfs_off_t len) 810 810 { 811 - struct xfs_mount *mp = ip->i_mount; 812 811 struct inode *inode = VFS_I(ip); 813 812 xfs_off_t rounding, start, end; 814 813 int error; 815 814 816 - rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE); 817 - start = round_down(offset, rounding); 818 - end = round_up(offset + len, rounding) - 1; 815 + /* 816 + * Make sure we extend the flush out to extent alignment 817 + * boundaries so any extent range overlapping the start/end 818 + * of the modification we are about to do is clean and idle. 819 + */ 820 + rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE); 821 + start = rounddown_64(offset, rounding); 822 + end = roundup_64(offset + len, rounding) - 1; 819 823 820 824 error = filemap_write_and_wait_range(inode->i_mapping, start, end); 821 825 if (error) ··· 902 898 struct xfs_inode *ip, 903 899 loff_t offset) 904 900 { 905 - struct xfs_mount *mp = ip->i_mount; 901 + unsigned int rounding; 906 902 int error; 907 903 908 904 /* ··· 920 916 * with the full range of the operation. If we don't, a COW writeback 921 917 * completion could race with an insert, front merge with the start 922 918 * extent (after split) during the shift and corrupt the file. Start 923 - * with the block just prior to the start to stabilize the boundary. 919 + * with the allocation unit just prior to the start to stabilize the 920 + * boundary. 924 921 */ 925 - offset = round_down(offset, mp->m_sb.sb_blocksize); 922 + rounding = xfs_inode_alloc_unitsize(ip); 923 + offset = rounddown_64(offset, rounding); 926 924 if (offset) 927 - offset -= mp->m_sb.sb_blocksize; 925 + offset -= rounding; 928 926 929 927 /* 930 928 * Writeback and invalidate cache for the remainder of the file as we're

+32

fs/xfs/xfs_buf_item.c

··· 22 22 #include "xfs_trace.h" 23 23 #include "xfs_log.h" 24 24 #include "xfs_log_priv.h" 25 + #include "xfs_error.h" 25 26 26 27 27 28 struct kmem_cache *xfs_buf_item_cache; ··· 782 781 return lsn; 783 782 } 784 783 784 + #ifdef DEBUG_EXPENSIVE 785 + static int 786 + xfs_buf_item_precommit( 787 + struct xfs_trans *tp, 788 + struct xfs_log_item *lip) 789 + { 790 + struct xfs_buf_log_item *bip = BUF_ITEM(lip); 791 + struct xfs_buf *bp = bip->bli_buf; 792 + struct xfs_mount *mp = bp->b_mount; 793 + xfs_failaddr_t fa; 794 + 795 + if (!bp->b_ops || !bp->b_ops->verify_struct) 796 + return 0; 797 + if (bip->bli_flags & XFS_BLI_STALE) 798 + return 0; 799 + 800 + fa = bp->b_ops->verify_struct(bp); 801 + if (fa) { 802 + xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name, 803 + bp->b_addr, BBTOB(bp->b_length), fa); 804 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 805 + ASSERT(fa == NULL); 806 + } 807 + 808 + return 0; 809 + } 810 + #else 811 + # define xfs_buf_item_precommit NULL 812 + #endif 813 + 785 814 static const struct xfs_item_ops xfs_buf_item_ops = { 786 815 .iop_size = xfs_buf_item_size, 816 + .iop_precommit = xfs_buf_item_precommit, 787 817 .iop_format = xfs_buf_item_format, 788 818 .iop_pin = xfs_buf_item_pin, 789 819 .iop_unpin = xfs_buf_item_unpin,

+279 -24

fs/xfs/xfs_discard.c

··· 20 20 #include "xfs_log.h" 21 21 #include "xfs_ag.h" 22 22 #include "xfs_health.h" 23 + #include "xfs_rtbitmap.h" 23 24 24 25 /* 25 26 * Notes on an efficient, low latency fstrim algorithm ··· 323 322 * we found in the last batch as the key to start the next. 324 323 */ 325 324 static int 326 - xfs_trim_extents( 325 + xfs_trim_perag_extents( 327 326 struct xfs_perag *pag, 328 327 xfs_agblock_t start, 329 328 xfs_agblock_t end, ··· 384 383 385 384 } 386 385 386 + static int 387 + xfs_trim_datadev_extents( 388 + struct xfs_mount *mp, 389 + xfs_daddr_t start, 390 + xfs_daddr_t end, 391 + xfs_extlen_t minlen, 392 + uint64_t *blocks_trimmed) 393 + { 394 + xfs_agnumber_t start_agno, end_agno; 395 + xfs_agblock_t start_agbno, end_agbno; 396 + xfs_daddr_t ddev_end; 397 + struct xfs_perag *pag; 398 + int last_error = 0, error; 399 + 400 + ddev_end = min_t(xfs_daddr_t, end, 401 + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1); 402 + 403 + start_agno = xfs_daddr_to_agno(mp, start); 404 + start_agbno = xfs_daddr_to_agbno(mp, start); 405 + end_agno = xfs_daddr_to_agno(mp, ddev_end); 406 + end_agbno = xfs_daddr_to_agbno(mp, ddev_end); 407 + 408 + for_each_perag_range(mp, start_agno, end_agno, pag) { 409 + xfs_agblock_t agend = pag->block_count; 410 + 411 + if (start_agno == end_agno) 412 + agend = end_agbno; 413 + error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen, 414 + blocks_trimmed); 415 + if (error) 416 + last_error = error; 417 + 418 + if (xfs_trim_should_stop()) { 419 + xfs_perag_rele(pag); 420 + break; 421 + } 422 + start_agbno = 0; 423 + } 424 + 425 + return last_error; 426 + } 427 + 428 + #ifdef CONFIG_XFS_RT 429 + struct xfs_trim_rtdev { 430 + /* list of rt extents to free */ 431 + struct list_head extent_list; 432 + 433 + /* pointer to count of blocks trimmed */ 434 + uint64_t *blocks_trimmed; 435 + 436 + /* minimum length that caller allows us to trim */ 437 + xfs_rtblock_t minlen_fsb; 438 + 439 + /* restart point for the rtbitmap walk */ 440 + xfs_rtxnum_t restart_rtx; 441 + 442 + /* stopping point for the current rtbitmap walk */ 443 + xfs_rtxnum_t stop_rtx; 444 + }; 445 + 446 + struct xfs_rtx_busy { 447 + struct list_head list; 448 + xfs_rtblock_t bno; 449 + xfs_rtblock_t length; 450 + }; 451 + 452 + static void 453 + xfs_discard_free_rtdev_extents( 454 + struct xfs_trim_rtdev *tr) 455 + { 456 + struct xfs_rtx_busy *busyp, *n; 457 + 458 + list_for_each_entry_safe(busyp, n, &tr->extent_list, list) { 459 + list_del_init(&busyp->list); 460 + kfree(busyp); 461 + } 462 + } 463 + 464 + /* 465 + * Walk the discard list and issue discards on all the busy extents in the 466 + * list. We plug and chain the bios so that we only need a single completion 467 + * call to clear all the busy extents once the discards are complete. 468 + */ 469 + static int 470 + xfs_discard_rtdev_extents( 471 + struct xfs_mount *mp, 472 + struct xfs_trim_rtdev *tr) 473 + { 474 + struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; 475 + struct xfs_rtx_busy *busyp; 476 + struct bio *bio = NULL; 477 + struct blk_plug plug; 478 + xfs_rtblock_t start = NULLRTBLOCK, length = 0; 479 + int error = 0; 480 + 481 + blk_start_plug(&plug); 482 + list_for_each_entry(busyp, &tr->extent_list, list) { 483 + if (start == NULLRTBLOCK) 484 + start = busyp->bno; 485 + length += busyp->length; 486 + 487 + trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length); 488 + 489 + error = __blkdev_issue_discard(bdev, 490 + XFS_FSB_TO_BB(mp, busyp->bno), 491 + XFS_FSB_TO_BB(mp, busyp->length), 492 + GFP_NOFS, &bio); 493 + if (error) 494 + break; 495 + } 496 + xfs_discard_free_rtdev_extents(tr); 497 + 498 + if (bio) { 499 + error = submit_bio_wait(bio); 500 + if (error == -EOPNOTSUPP) 501 + error = 0; 502 + if (error) 503 + xfs_info(mp, 504 + "discard failed for rtextent [0x%llx,%llu], error %d", 505 + (unsigned long long)start, 506 + (unsigned long long)length, 507 + error); 508 + bio_put(bio); 509 + } 510 + blk_finish_plug(&plug); 511 + 512 + return error; 513 + } 514 + 515 + static int 516 + xfs_trim_gather_rtextent( 517 + struct xfs_mount *mp, 518 + struct xfs_trans *tp, 519 + const struct xfs_rtalloc_rec *rec, 520 + void *priv) 521 + { 522 + struct xfs_trim_rtdev *tr = priv; 523 + struct xfs_rtx_busy *busyp; 524 + xfs_rtblock_t rbno, rlen; 525 + 526 + if (rec->ar_startext > tr->stop_rtx) { 527 + /* 528 + * If we've scanned a large number of rtbitmap blocks, update 529 + * the cursor to point at this extent so we restart the next 530 + * batch from this extent. 531 + */ 532 + tr->restart_rtx = rec->ar_startext; 533 + return -ECANCELED; 534 + } 535 + 536 + rbno = xfs_rtx_to_rtb(mp, rec->ar_startext); 537 + rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount); 538 + 539 + /* Ignore too small. */ 540 + if (rlen < tr->minlen_fsb) { 541 + trace_xfs_discard_rttoosmall(mp, rbno, rlen); 542 + return 0; 543 + } 544 + 545 + busyp = kzalloc(sizeof(struct xfs_rtx_busy), GFP_KERNEL); 546 + if (!busyp) 547 + return -ENOMEM; 548 + 549 + busyp->bno = rbno; 550 + busyp->length = rlen; 551 + INIT_LIST_HEAD(&busyp->list); 552 + list_add_tail(&busyp->list, &tr->extent_list); 553 + *tr->blocks_trimmed += rlen; 554 + 555 + tr->restart_rtx = rec->ar_startext + rec->ar_extcount; 556 + return 0; 557 + } 558 + 559 + static int 560 + xfs_trim_rtdev_extents( 561 + struct xfs_mount *mp, 562 + xfs_daddr_t start, 563 + xfs_daddr_t end, 564 + xfs_daddr_t minlen, 565 + uint64_t *blocks_trimmed) 566 + { 567 + struct xfs_rtalloc_rec low = { }; 568 + struct xfs_rtalloc_rec high = { }; 569 + struct xfs_trim_rtdev tr = { 570 + .blocks_trimmed = blocks_trimmed, 571 + .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), 572 + }; 573 + struct xfs_trans *tp; 574 + xfs_daddr_t rtdev_daddr; 575 + int error; 576 + 577 + INIT_LIST_HEAD(&tr.extent_list); 578 + 579 + /* Shift the start and end downwards to match the rt device. */ 580 + rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 581 + if (start > rtdev_daddr) 582 + start -= rtdev_daddr; 583 + else 584 + start = 0; 585 + 586 + if (end <= rtdev_daddr) 587 + return 0; 588 + end -= rtdev_daddr; 589 + 590 + error = xfs_trans_alloc_empty(mp, &tp); 591 + if (error) 592 + return error; 593 + 594 + end = min_t(xfs_daddr_t, end, 595 + XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1); 596 + 597 + /* Convert the rt blocks to rt extents */ 598 + low.ar_startext = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); 599 + high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); 600 + 601 + /* 602 + * Walk the free ranges between low and high. The query_range function 603 + * trims the extents returned. 604 + */ 605 + do { 606 + tr.stop_rtx = low.ar_startext + (mp->m_sb.sb_blocksize * NBBY); 607 + xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); 608 + error = xfs_rtalloc_query_range(mp, tp, &low, &high, 609 + xfs_trim_gather_rtextent, &tr); 610 + 611 + if (error == -ECANCELED) 612 + error = 0; 613 + if (error) { 614 + xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); 615 + xfs_discard_free_rtdev_extents(&tr); 616 + break; 617 + } 618 + 619 + if (list_empty(&tr.extent_list)) { 620 + xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); 621 + break; 622 + } 623 + 624 + error = xfs_discard_rtdev_extents(mp, &tr); 625 + xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); 626 + if (error) 627 + break; 628 + 629 + low.ar_startext = tr.restart_rtx; 630 + } while (!xfs_trim_should_stop() && low.ar_startext <= high.ar_startext); 631 + 632 + xfs_trans_cancel(tp); 633 + return error; 634 + } 635 + #else 636 + # define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP) 637 + #endif /* CONFIG_XFS_RT */ 638 + 387 639 /* 388 640 * trim a range of the filesystem. 389 641 * ··· 645 391 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format 646 392 * is a linear address range. Hence we need to use DADDR based conversions and 647 393 * comparisons for determining the correct offset and regions to trim. 394 + * 395 + * The realtime device is mapped into the FITRIM "address space" immediately 396 + * after the data device. 648 397 */ 649 398 int 650 399 xfs_ioc_trim( 651 400 struct xfs_mount *mp, 652 401 struct fstrim_range __user *urange) 653 402 { 654 - struct xfs_perag *pag; 655 403 unsigned int granularity = 656 404 bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); 405 + struct block_device *rt_bdev = NULL; 657 406 struct fstrim_range range; 658 407 xfs_daddr_t start, end; 659 408 xfs_extlen_t minlen; 660 - xfs_agnumber_t start_agno, end_agno; 661 - xfs_agblock_t start_agbno, end_agbno; 409 + xfs_rfsblock_t max_blocks; 662 410 uint64_t blocks_trimmed = 0; 663 411 int error, last_error = 0; 664 412 665 413 if (!capable(CAP_SYS_ADMIN)) 666 414 return -EPERM; 667 - if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) 415 + if (mp->m_rtdev_targp && 416 + bdev_max_discard_sectors(mp->m_rtdev_targp->bt_bdev)) 417 + rt_bdev = mp->m_rtdev_targp->bt_bdev; 418 + if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev) && !rt_bdev) 668 419 return -EOPNOTSUPP; 420 + 421 + if (rt_bdev) 422 + granularity = max(granularity, 423 + bdev_discard_granularity(rt_bdev)); 669 424 670 425 /* 671 426 * We haven't recovered the log, so we cannot use our bnobt-guided ··· 696 433 * used by the fstrim application. In the end it really doesn't 697 434 * matter as trimming blocks is an advisory interface. 698 435 */ 699 - if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 436 + max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks; 437 + if (range.start >= XFS_FSB_TO_B(mp, max_blocks) || 700 438 range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) || 701 439 range.len < mp->m_sb.sb_blocksize) 702 440 return -EINVAL; 703 441 704 442 start = BTOBB(range.start); 705 - end = min_t(xfs_daddr_t, start + BTOBBT(range.len), 706 - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) - 1; 443 + end = start + BTOBBT(range.len) - 1; 707 444 708 - start_agno = xfs_daddr_to_agno(mp, start); 709 - start_agbno = xfs_daddr_to_agbno(mp, start); 710 - end_agno = xfs_daddr_to_agno(mp, end); 711 - end_agbno = xfs_daddr_to_agbno(mp, end); 712 - 713 - for_each_perag_range(mp, start_agno, end_agno, pag) { 714 - xfs_agblock_t agend = pag->block_count; 715 - 716 - if (start_agno == end_agno) 717 - agend = end_agbno; 718 - error = xfs_trim_extents(pag, start_agbno, agend, minlen, 445 + if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) { 446 + error = xfs_trim_datadev_extents(mp, start, end, minlen, 719 447 &blocks_trimmed); 720 448 if (error) 721 449 last_error = error; 450 + } 722 451 723 - if (xfs_trim_should_stop()) { 724 - xfs_perag_rele(pag); 725 - break; 726 - } 727 - start_agbno = 0; 452 + if (rt_bdev && !xfs_trim_should_stop()) { 453 + error = xfs_trim_rtdev_extents(mp, start, end, minlen, 454 + &blocks_trimmed); 455 + if (error) 456 + last_error = error; 728 457 } 729 458 730 459 if (last_error)

+31

fs/xfs/xfs_dquot_item.c

··· 17 17 #include "xfs_trans_priv.h" 18 18 #include "xfs_qm.h" 19 19 #include "xfs_log.h" 20 + #include "xfs_error.h" 20 21 21 22 static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) 22 23 { ··· 194 193 return xfs_qm_dquot_logitem_release(lip); 195 194 } 196 195 196 + #ifdef DEBUG_EXPENSIVE 197 + static int 198 + xfs_qm_dquot_logitem_precommit( 199 + struct xfs_trans *tp, 200 + struct xfs_log_item *lip) 201 + { 202 + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; 203 + struct xfs_mount *mp = dqp->q_mount; 204 + struct xfs_disk_dquot ddq = { }; 205 + xfs_failaddr_t fa; 206 + 207 + xfs_dquot_to_disk(&ddq, dqp); 208 + fa = xfs_dquot_verify(mp, &ddq, dqp->q_id); 209 + if (fa) { 210 + XFS_CORRUPTION_ERROR("Bad dquot during logging", 211 + XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq)); 212 + xfs_alert(mp, 213 + "Metadata corruption detected at %pS, dquot 0x%x", 214 + fa, dqp->q_id); 215 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 216 + ASSERT(fa == NULL); 217 + } 218 + 219 + return 0; 220 + } 221 + #else 222 + # define xfs_qm_dquot_logitem_precommit NULL 223 + #endif 224 + 197 225 static const struct xfs_item_ops xfs_dquot_item_ops = { 198 226 .iop_size = xfs_qm_dquot_logitem_size, 227 + .iop_precommit = xfs_qm_dquot_logitem_precommit, 199 228 .iop_format = xfs_qm_dquot_logitem_format, 200 229 .iop_pin = xfs_qm_dquot_logitem_pin, 201 230 .iop_unpin = xfs_qm_dquot_logitem_unpin,

+4 -4

fs/xfs/xfs_drain.c

··· 94 94 } 95 95 96 96 /* 97 - * Get a passive reference to an AG and declare an intent to update its 98 - * metadata. 97 + * Get a passive reference to the AG that contains a fsbno and declare an intent 98 + * to update its metadata. 99 99 */ 100 100 struct xfs_perag * 101 101 xfs_perag_intent_get( 102 102 struct xfs_mount *mp, 103 - xfs_agnumber_t agno) 103 + xfs_fsblock_t fsbno) 104 104 { 105 105 struct xfs_perag *pag; 106 106 107 - pag = xfs_perag_get(mp, agno); 107 + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsbno)); 108 108 if (!pag) 109 109 return NULL; 110 110

+3 -2

fs/xfs/xfs_drain.h

··· 62 62 * until the item is finished or cancelled. 63 63 */ 64 64 struct xfs_perag *xfs_perag_intent_get(struct xfs_mount *mp, 65 - xfs_agnumber_t agno); 65 + xfs_fsblock_t fsbno); 66 66 void xfs_perag_intent_put(struct xfs_perag *pag); 67 67 68 68 void xfs_perag_intent_hold(struct xfs_perag *pag); ··· 76 76 #define xfs_defer_drain_free(dr) ((void)0) 77 77 #define xfs_defer_drain_init(dr) ((void)0) 78 78 79 - #define xfs_perag_intent_get(mp, agno) xfs_perag_get((mp), (agno)) 79 + #define xfs_perag_intent_get(mp, fsbno) \ 80 + xfs_perag_get((mp), XFS_FSB_TO_AGNO(mp, fsbno)) 80 81 #define xfs_perag_intent_put(pag) xfs_perag_put(pag) 81 82 82 83 static inline void xfs_perag_intent_hold(struct xfs_perag *pag) { }

+56 -63

fs/xfs/xfs_extfree_item.c

··· 303 303 .iop_intent = xfs_efd_item_intent, 304 304 }; 305 305 306 + static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e) 307 + { 308 + return list_entry(e, struct xfs_extent_free_item, xefi_list); 309 + } 310 + 306 311 /* 307 312 * Fill the EFD with all extents from the EFI when we need to roll the 308 313 * transaction and continue with a new EFI. ··· 336 331 efdp->efd_next_extent = efip->efi_format.efi_nextents; 337 332 } 338 333 334 + static void 335 + xfs_efd_add_extent( 336 + struct xfs_efd_log_item *efdp, 337 + struct xfs_extent_free_item *xefi) 338 + { 339 + struct xfs_extent *extp; 340 + 341 + ASSERT(efdp->efd_next_extent < efdp->efd_format.efd_nextents); 342 + 343 + extp = &efdp->efd_format.efd_extents[efdp->efd_next_extent]; 344 + extp->ext_start = xefi->xefi_startblock; 345 + extp->ext_len = xefi->xefi_blockcount; 346 + 347 + efdp->efd_next_extent++; 348 + } 349 + 339 350 /* Sort bmap items by AG. */ 340 351 static int 341 352 xfs_extent_free_diff_items( ··· 359 338 const struct list_head *a, 360 339 const struct list_head *b) 361 340 { 362 - struct xfs_extent_free_item *ra; 363 - struct xfs_extent_free_item *rb; 364 - 365 - ra = container_of(a, struct xfs_extent_free_item, xefi_list); 366 - rb = container_of(b, struct xfs_extent_free_item, xefi_list); 341 + struct xfs_extent_free_item *ra = xefi_entry(a); 342 + struct xfs_extent_free_item *rb = xefi_entry(b); 367 343 368 344 return ra->xefi_pag->pag_agno - rb->xefi_pag->pag_agno; 369 345 } ··· 436 418 return &efdp->efd_item; 437 419 } 438 420 439 - /* Take a passive ref to the AG containing the space we're freeing. */ 421 + /* Add this deferred EFI to the transaction. */ 440 422 void 441 - xfs_extent_free_get_group( 442 - struct xfs_mount *mp, 443 - struct xfs_extent_free_item *xefi) 423 + xfs_extent_free_defer_add( 424 + struct xfs_trans *tp, 425 + struct xfs_extent_free_item *xefi, 426 + struct xfs_defer_pending **dfpp) 444 427 { 445 - xfs_agnumber_t agno; 428 + struct xfs_mount *mp = tp->t_mountp; 446 429 447 - agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); 448 - xefi->xefi_pag = xfs_perag_intent_get(mp, agno); 430 + trace_xfs_extent_free_defer(mp, xefi); 431 + 432 + xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock); 433 + if (xefi->xefi_agresv == XFS_AG_RESV_AGFL) 434 + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, 435 + &xfs_agfl_free_defer_type); 436 + else 437 + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, 438 + &xfs_extent_free_defer_type); 449 439 } 450 440 451 - /* Release a passive AG ref after some freeing work. */ 452 - static inline void 453 - xfs_extent_free_put_group( 454 - struct xfs_extent_free_item *xefi) 441 + /* Cancel a free extent. */ 442 + STATIC void 443 + xfs_extent_free_cancel_item( 444 + struct list_head *item) 455 445 { 446 + struct xfs_extent_free_item *xefi = xefi_entry(item); 447 + 456 448 xfs_perag_intent_put(xefi->xefi_pag); 449 + kmem_cache_free(xfs_extfree_item_cache, xefi); 457 450 } 458 451 459 452 /* Process a free extent. */ ··· 476 447 struct xfs_btree_cur **state) 477 448 { 478 449 struct xfs_owner_info oinfo = { }; 479 - struct xfs_extent_free_item *xefi; 450 + struct xfs_extent_free_item *xefi = xefi_entry(item); 480 451 struct xfs_efd_log_item *efdp = EFD_ITEM(done); 481 452 struct xfs_mount *mp = tp->t_mountp; 482 - struct xfs_extent *extp; 483 - uint next_extent; 484 453 xfs_agblock_t agbno; 485 454 int error = 0; 486 455 487 - xefi = container_of(item, struct xfs_extent_free_item, xefi_list); 488 456 agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); 489 457 490 458 oinfo.oi_owner = xefi->xefi_owner; ··· 490 464 if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) 491 465 oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; 492 466 493 - trace_xfs_bmap_free_deferred(tp->t_mountp, xefi->xefi_pag->pag_agno, 0, 494 - agbno, xefi->xefi_blockcount); 467 + trace_xfs_extent_free_deferred(mp, xefi); 495 468 496 469 /* 497 470 * If we need a new transaction to make progress, the caller will log a ··· 507 482 return error; 508 483 } 509 484 510 - /* Add the work we finished to the EFD, even though nobody uses that */ 511 - next_extent = efdp->efd_next_extent; 512 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 513 - extp = &(efdp->efd_format.efd_extents[next_extent]); 514 - extp->ext_start = xefi->xefi_startblock; 515 - extp->ext_len = xefi->xefi_blockcount; 516 - efdp->efd_next_extent++; 517 - 518 - xfs_extent_free_put_group(xefi); 519 - kmem_cache_free(xfs_extfree_item_cache, xefi); 485 + xfs_efd_add_extent(efdp, xefi); 486 + xfs_extent_free_cancel_item(item); 520 487 return error; 521 488 } 522 489 ··· 518 501 struct xfs_log_item *intent) 519 502 { 520 503 xfs_efi_release(EFI_ITEM(intent)); 521 - } 522 - 523 - /* Cancel a free extent. */ 524 - STATIC void 525 - xfs_extent_free_cancel_item( 526 - struct list_head *item) 527 - { 528 - struct xfs_extent_free_item *xefi; 529 - 530 - xefi = container_of(item, struct xfs_extent_free_item, xefi_list); 531 - 532 - xfs_extent_free_put_group(xefi); 533 - kmem_cache_free(xfs_extfree_item_cache, xefi); 534 504 } 535 505 536 506 /* ··· 534 530 struct xfs_owner_info oinfo = { }; 535 531 struct xfs_mount *mp = tp->t_mountp; 536 532 struct xfs_efd_log_item *efdp = EFD_ITEM(done); 537 - struct xfs_extent_free_item *xefi; 538 - struct xfs_extent *extp; 533 + struct xfs_extent_free_item *xefi = xefi_entry(item); 539 534 struct xfs_buf *agbp; 540 535 int error; 541 536 xfs_agblock_t agbno; 542 - uint next_extent; 543 537 544 - xefi = container_of(item, struct xfs_extent_free_item, xefi_list); 545 538 ASSERT(xefi->xefi_blockcount == 1); 546 539 agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); 547 540 oinfo.oi_owner = xefi->xefi_owner; 548 541 549 - trace_xfs_agfl_free_deferred(mp, xefi->xefi_pag->pag_agno, 0, agbno, 550 - xefi->xefi_blockcount); 542 + trace_xfs_agfl_free_deferred(mp, xefi); 551 543 552 544 error = xfs_alloc_read_agf(xefi->xefi_pag, tp, 0, &agbp); 553 545 if (!error) 554 - error = xfs_free_agfl_block(tp, xefi->xefi_pag->pag_agno, 555 - agbno, agbp, &oinfo); 546 + error = xfs_free_ag_extent(tp, agbp, xefi->xefi_pag->pag_agno, 547 + agbno, 1, &oinfo, XFS_AG_RESV_AGFL); 556 548 557 - next_extent = efdp->efd_next_extent; 558 - ASSERT(next_extent < efdp->efd_format.efd_nextents); 559 - extp = &(efdp->efd_format.efd_extents[next_extent]); 560 - extp->ext_start = xefi->xefi_startblock; 561 - extp->ext_len = xefi->xefi_blockcount; 562 - efdp->efd_next_extent++; 563 - 564 - xfs_extent_free_put_group(xefi); 565 - kmem_cache_free(xfs_extfree_item_cache, xefi); 549 + xfs_efd_add_extent(efdp, xefi); 550 + xfs_extent_free_cancel_item(&xefi->xefi_list); 566 551 return error; 567 552 } 568 553 ··· 578 585 xefi->xefi_blockcount = extp->ext_len; 579 586 xefi->xefi_agresv = XFS_AG_RESV_NONE; 580 587 xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN; 581 - xfs_extent_free_get_group(mp, xefi); 588 + xefi->xefi_pag = xfs_perag_intent_get(mp, extp->ext_start); 582 589 583 590 xfs_defer_add_item(dfp, &xefi->xefi_list); 584 591 }

+6

fs/xfs/xfs_extfree_item.h

··· 88 88 extern struct kmem_cache *xfs_efi_cache; 89 89 extern struct kmem_cache *xfs_efd_cache; 90 90 91 + struct xfs_extent_free_item; 92 + 93 + void xfs_extent_free_defer_add(struct xfs_trans *tp, 94 + struct xfs_extent_free_item *xefi, 95 + struct xfs_defer_pending **dfpp); 96 + 91 97 #endif /* __XFS_EXTFREE_ITEM_H__ */

+76 -65

fs/xfs/xfs_file.c

··· 213 213 if (ret) 214 214 return ret; 215 215 216 - if (*lock_mode == XFS_IOLOCK_EXCL) 217 - return 0; 218 - if (!xfs_iflags_test(ip, XFS_IREMAPPING)) 219 - return 0; 216 + /* 217 + * If a reflink remap is in progress we always need to take the iolock 218 + * exclusively to wait for it to finish. 219 + */ 220 + if (*lock_mode == XFS_IOLOCK_SHARED && 221 + xfs_iflags_test(ip, XFS_IREMAPPING)) { 222 + xfs_iunlock(ip, *lock_mode); 223 + *lock_mode = XFS_IOLOCK_EXCL; 224 + return xfs_ilock_iocb(iocb, *lock_mode); 225 + } 220 226 221 - xfs_iunlock(ip, *lock_mode); 222 - *lock_mode = XFS_IOLOCK_EXCL; 223 - return xfs_ilock_iocb(iocb, *lock_mode); 224 - } 225 - 226 - static unsigned int 227 - xfs_ilock_for_write_fault( 228 - struct xfs_inode *ip) 229 - { 230 - /* get a shared lock if no remapping in progress */ 231 - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 232 - if (!xfs_iflags_test(ip, XFS_IREMAPPING)) 233 - return XFS_MMAPLOCK_SHARED; 234 - 235 - /* wait for remapping to complete */ 236 - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 237 - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 238 - return XFS_MMAPLOCK_EXCL; 227 + return 0; 239 228 } 240 229 241 230 STATIC ssize_t ··· 1236 1247 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1237 1248 } 1238 1249 1239 - #ifdef CONFIG_FS_DAX 1240 1250 static inline vm_fault_t 1241 - xfs_dax_fault( 1251 + xfs_dax_fault_locked( 1242 1252 struct vm_fault *vmf, 1243 1253 unsigned int order, 1244 - bool write_fault, 1245 - pfn_t *pfn) 1254 + bool write_fault) 1246 1255 { 1247 - return dax_iomap_fault(vmf, order, pfn, NULL, 1256 + vm_fault_t ret; 1257 + pfn_t pfn; 1258 + 1259 + if (!IS_ENABLED(CONFIG_FS_DAX)) { 1260 + ASSERT(0); 1261 + return VM_FAULT_SIGBUS; 1262 + } 1263 + ret = dax_iomap_fault(vmf, order, &pfn, NULL, 1248 1264 (write_fault && !vmf->cow_page) ? 1249 1265 &xfs_dax_write_iomap_ops : 1250 1266 &xfs_read_iomap_ops); 1267 + if (ret & VM_FAULT_NEEDDSYNC) 1268 + ret = dax_finish_sync_fault(vmf, order, pfn); 1269 + return ret; 1251 1270 } 1252 - #else 1253 - static inline vm_fault_t 1254 - xfs_dax_fault( 1271 + 1272 + static vm_fault_t 1273 + xfs_dax_read_fault( 1255 1274 struct vm_fault *vmf, 1256 - unsigned int order, 1257 - bool write_fault, 1258 - pfn_t *pfn) 1275 + unsigned int order) 1259 1276 { 1260 - ASSERT(0); 1261 - return VM_FAULT_SIGBUS; 1277 + struct xfs_inode *ip = XFS_I(file_inode(vmf->vma->vm_file)); 1278 + vm_fault_t ret; 1279 + 1280 + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1281 + ret = xfs_dax_fault_locked(vmf, order, false); 1282 + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1283 + 1284 + return ret; 1262 1285 } 1263 - #endif 1286 + 1287 + static vm_fault_t 1288 + xfs_write_fault( 1289 + struct vm_fault *vmf, 1290 + unsigned int order) 1291 + { 1292 + struct inode *inode = file_inode(vmf->vma->vm_file); 1293 + struct xfs_inode *ip = XFS_I(inode); 1294 + unsigned int lock_mode = XFS_MMAPLOCK_SHARED; 1295 + vm_fault_t ret; 1296 + 1297 + sb_start_pagefault(inode->i_sb); 1298 + file_update_time(vmf->vma->vm_file); 1299 + 1300 + /* 1301 + * Normally we only need the shared mmaplock, but if a reflink remap is 1302 + * in progress we take the exclusive lock to wait for the remap to 1303 + * finish before taking a write fault. 1304 + */ 1305 + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1306 + if (xfs_iflags_test(ip, XFS_IREMAPPING)) { 1307 + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1308 + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 1309 + lock_mode = XFS_MMAPLOCK_EXCL; 1310 + } 1311 + 1312 + if (IS_DAX(inode)) 1313 + ret = xfs_dax_fault_locked(vmf, order, true); 1314 + else 1315 + ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops); 1316 + xfs_iunlock(ip, lock_mode); 1317 + 1318 + sb_end_pagefault(inode->i_sb); 1319 + return ret; 1320 + } 1264 1321 1265 1322 /* 1266 1323 * Locking for serialisation of IO during page faults. This results in a lock ··· 1325 1290 bool write_fault) 1326 1291 { 1327 1292 struct inode *inode = file_inode(vmf->vma->vm_file); 1328 - struct xfs_inode *ip = XFS_I(inode); 1329 - vm_fault_t ret; 1330 - unsigned int lock_mode = 0; 1331 1293 1332 - trace_xfs_filemap_fault(ip, order, write_fault); 1333 - 1334 - if (write_fault) { 1335 - sb_start_pagefault(inode->i_sb); 1336 - file_update_time(vmf->vma->vm_file); 1337 - } 1338 - 1339 - if (IS_DAX(inode) || write_fault) 1340 - lock_mode = xfs_ilock_for_write_fault(XFS_I(inode)); 1341 - 1342 - if (IS_DAX(inode)) { 1343 - pfn_t pfn; 1344 - 1345 - ret = xfs_dax_fault(vmf, order, write_fault, &pfn); 1346 - if (ret & VM_FAULT_NEEDDSYNC) 1347 - ret = dax_finish_sync_fault(vmf, order, pfn); 1348 - } else if (write_fault) { 1349 - ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops); 1350 - } else { 1351 - ret = filemap_fault(vmf); 1352 - } 1353 - 1354 - if (lock_mode) 1355 - xfs_iunlock(XFS_I(inode), lock_mode); 1294 + trace_xfs_filemap_fault(XFS_I(inode), order, write_fault); 1356 1295 1357 1296 if (write_fault) 1358 - sb_end_pagefault(inode->i_sb); 1359 - return ret; 1297 + return xfs_write_fault(vmf, order); 1298 + if (IS_DAX(inode)) 1299 + return xfs_dax_read_fault(vmf, order); 1300 + return filemap_fault(vmf); 1360 1301 } 1361 1302 1362 1303 static inline bool

-1

fs/xfs/xfs_handle.c

··· 21 21 #include "xfs_attr.h" 22 22 #include "xfs_ioctl.h" 23 23 #include "xfs_parent.h" 24 - #include "xfs_da_btree.h" 25 24 #include "xfs_handle.h" 26 25 #include "xfs_health.h" 27 26 #include "xfs_icache.h"

+159 -1325

fs/xfs/xfs_inode.c

··· 42 42 #include "xfs_pnfs.h" 43 43 #include "xfs_parent.h" 44 44 #include "xfs_xattr.h" 45 - #include "xfs_sb.h" 45 + #include "xfs_inode_util.h" 46 46 47 47 struct kmem_cache *xfs_inode_cache; 48 - 49 - /* 50 - * helper function to extract extent size hint from inode 51 - */ 52 - xfs_extlen_t 53 - xfs_get_extsz_hint( 54 - struct xfs_inode *ip) 55 - { 56 - /* 57 - * No point in aligning allocations if we need to COW to actually 58 - * write to them. 59 - */ 60 - if (xfs_is_always_cow_inode(ip)) 61 - return 0; 62 - if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) 63 - return ip->i_extsize; 64 - if (XFS_IS_REALTIME_INODE(ip) && 65 - ip->i_mount->m_sb.sb_rextsize > 1) 66 - return ip->i_mount->m_sb.sb_rextsize; 67 - return 0; 68 - } 69 - 70 - /* 71 - * Helper function to extract CoW extent size hint from inode. 72 - * Between the extent size hint and the CoW extent size hint, we 73 - * return the greater of the two. If the value is zero (automatic), 74 - * use the default size. 75 - */ 76 - xfs_extlen_t 77 - xfs_get_cowextsz_hint( 78 - struct xfs_inode *ip) 79 - { 80 - xfs_extlen_t a, b; 81 - 82 - a = 0; 83 - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 84 - a = ip->i_cowextsize; 85 - b = xfs_get_extsz_hint(ip); 86 - 87 - a = max(a, b); 88 - if (a == 0) 89 - return XFS_DEFAULT_COWEXTSZ_HINT; 90 - return a; 91 - } 92 48 93 49 /* 94 50 * These two are wrapper routines around the xfs_ilock() routine used to ··· 523 567 } 524 568 } 525 569 526 - uint 527 - xfs_ip2xflags( 528 - struct xfs_inode *ip) 529 - { 530 - uint flags = 0; 531 - 532 - if (ip->i_diflags & XFS_DIFLAG_ANY) { 533 - if (ip->i_diflags & XFS_DIFLAG_REALTIME) 534 - flags |= FS_XFLAG_REALTIME; 535 - if (ip->i_diflags & XFS_DIFLAG_PREALLOC) 536 - flags |= FS_XFLAG_PREALLOC; 537 - if (ip->i_diflags & XFS_DIFLAG_IMMUTABLE) 538 - flags |= FS_XFLAG_IMMUTABLE; 539 - if (ip->i_diflags & XFS_DIFLAG_APPEND) 540 - flags |= FS_XFLAG_APPEND; 541 - if (ip->i_diflags & XFS_DIFLAG_SYNC) 542 - flags |= FS_XFLAG_SYNC; 543 - if (ip->i_diflags & XFS_DIFLAG_NOATIME) 544 - flags |= FS_XFLAG_NOATIME; 545 - if (ip->i_diflags & XFS_DIFLAG_NODUMP) 546 - flags |= FS_XFLAG_NODUMP; 547 - if (ip->i_diflags & XFS_DIFLAG_RTINHERIT) 548 - flags |= FS_XFLAG_RTINHERIT; 549 - if (ip->i_diflags & XFS_DIFLAG_PROJINHERIT) 550 - flags |= FS_XFLAG_PROJINHERIT; 551 - if (ip->i_diflags & XFS_DIFLAG_NOSYMLINKS) 552 - flags |= FS_XFLAG_NOSYMLINKS; 553 - if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) 554 - flags |= FS_XFLAG_EXTSIZE; 555 - if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) 556 - flags |= FS_XFLAG_EXTSZINHERIT; 557 - if (ip->i_diflags & XFS_DIFLAG_NODEFRAG) 558 - flags |= FS_XFLAG_NODEFRAG; 559 - if (ip->i_diflags & XFS_DIFLAG_FILESTREAM) 560 - flags |= FS_XFLAG_FILESTREAM; 561 - } 562 - 563 - if (ip->i_diflags2 & XFS_DIFLAG2_ANY) { 564 - if (ip->i_diflags2 & XFS_DIFLAG2_DAX) 565 - flags |= FS_XFLAG_DAX; 566 - if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 567 - flags |= FS_XFLAG_COWEXTSIZE; 568 - } 569 - 570 - if (xfs_inode_has_attr_fork(ip)) 571 - flags |= FS_XFLAG_HASATTR; 572 - return flags; 573 - } 574 - 575 570 /* 576 571 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match 577 572 * is allowed, otherwise it has to be an exact match. If a CI match is found, ··· 564 657 return error; 565 658 } 566 659 567 - /* Propagate di_flags from a parent inode to a child inode. */ 568 - static void 569 - xfs_inode_inherit_flags( 570 - struct xfs_inode *ip, 571 - const struct xfs_inode *pip) 572 - { 573 - unsigned int di_flags = 0; 574 - xfs_failaddr_t failaddr; 575 - umode_t mode = VFS_I(ip)->i_mode; 576 - 577 - if (S_ISDIR(mode)) { 578 - if (pip->i_diflags & XFS_DIFLAG_RTINHERIT) 579 - di_flags |= XFS_DIFLAG_RTINHERIT; 580 - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 581 - di_flags |= XFS_DIFLAG_EXTSZINHERIT; 582 - ip->i_extsize = pip->i_extsize; 583 - } 584 - if (pip->i_diflags & XFS_DIFLAG_PROJINHERIT) 585 - di_flags |= XFS_DIFLAG_PROJINHERIT; 586 - } else if (S_ISREG(mode)) { 587 - if ((pip->i_diflags & XFS_DIFLAG_RTINHERIT) && 588 - xfs_has_realtime(ip->i_mount)) 589 - di_flags |= XFS_DIFLAG_REALTIME; 590 - if (pip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 591 - di_flags |= XFS_DIFLAG_EXTSIZE; 592 - ip->i_extsize = pip->i_extsize; 593 - } 594 - } 595 - if ((pip->i_diflags & XFS_DIFLAG_NOATIME) && 596 - xfs_inherit_noatime) 597 - di_flags |= XFS_DIFLAG_NOATIME; 598 - if ((pip->i_diflags & XFS_DIFLAG_NODUMP) && 599 - xfs_inherit_nodump) 600 - di_flags |= XFS_DIFLAG_NODUMP; 601 - if ((pip->i_diflags & XFS_DIFLAG_SYNC) && 602 - xfs_inherit_sync) 603 - di_flags |= XFS_DIFLAG_SYNC; 604 - if ((pip->i_diflags & XFS_DIFLAG_NOSYMLINKS) && 605 - xfs_inherit_nosymlinks) 606 - di_flags |= XFS_DIFLAG_NOSYMLINKS; 607 - if ((pip->i_diflags & XFS_DIFLAG_NODEFRAG) && 608 - xfs_inherit_nodefrag) 609 - di_flags |= XFS_DIFLAG_NODEFRAG; 610 - if (pip->i_diflags & XFS_DIFLAG_FILESTREAM) 611 - di_flags |= XFS_DIFLAG_FILESTREAM; 612 - 613 - ip->i_diflags |= di_flags; 614 - 615 - /* 616 - * Inode verifiers on older kernels only check that the extent size 617 - * hint is an integer multiple of the rt extent size on realtime files. 618 - * They did not check the hint alignment on a directory with both 619 - * rtinherit and extszinherit flags set. If the misaligned hint is 620 - * propagated from a directory into a new realtime file, new file 621 - * allocations will fail due to math errors in the rt allocator and/or 622 - * trip the verifiers. Validate the hint settings in the new file so 623 - * that we don't let broken hints propagate. 624 - */ 625 - failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize, 626 - VFS_I(ip)->i_mode, ip->i_diflags); 627 - if (failaddr) { 628 - ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 629 - XFS_DIFLAG_EXTSZINHERIT); 630 - ip->i_extsize = 0; 631 - } 632 - } 633 - 634 - /* Propagate di_flags2 from a parent inode to a child inode. */ 635 - static void 636 - xfs_inode_inherit_flags2( 637 - struct xfs_inode *ip, 638 - const struct xfs_inode *pip) 639 - { 640 - xfs_failaddr_t failaddr; 641 - 642 - if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { 643 - ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; 644 - ip->i_cowextsize = pip->i_cowextsize; 645 - } 646 - if (pip->i_diflags2 & XFS_DIFLAG2_DAX) 647 - ip->i_diflags2 |= XFS_DIFLAG2_DAX; 648 - 649 - /* Don't let invalid cowextsize hints propagate. */ 650 - failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, 651 - VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2); 652 - if (failaddr) { 653 - ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 654 - ip->i_cowextsize = 0; 655 - } 656 - } 657 - 658 660 /* 659 661 * Initialise a newly allocated inode and return the in-core inode to the 660 662 * caller locked exclusively. ··· 571 755 * Caller is responsible for unlocking the inode manually upon return 572 756 */ 573 757 int 574 - xfs_init_new_inode( 575 - struct mnt_idmap *idmap, 758 + xfs_icreate( 576 759 struct xfs_trans *tp, 577 - struct xfs_inode *pip, 578 760 xfs_ino_t ino, 579 - umode_t mode, 580 - xfs_nlink_t nlink, 581 - dev_t rdev, 582 - prid_t prid, 583 - bool init_xattrs, 761 + const struct xfs_icreate_args *args, 584 762 struct xfs_inode **ipp) 585 763 { 586 - struct inode *dir = pip ? VFS_I(pip) : NULL; 587 764 struct xfs_mount *mp = tp->t_mountp; 588 - struct xfs_inode *ip; 589 - unsigned int flags; 765 + struct xfs_inode *ip = NULL; 590 766 int error; 591 - struct timespec64 tv; 592 - struct inode *inode; 593 - 594 - /* 595 - * Protect against obviously corrupt allocation btree records. Later 596 - * xfs_iget checks will catch re-allocation of other active in-memory 597 - * and on-disk inodes. If we don't catch reallocating the parent inode 598 - * here we will deadlock in xfs_iget() so we have to do these checks 599 - * first. 600 - */ 601 - if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) { 602 - xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino); 603 - xfs_agno_mark_sick(mp, XFS_INO_TO_AGNO(mp, ino), 604 - XFS_SICK_AG_INOBT); 605 - return -EFSCORRUPTED; 606 - } 607 767 608 768 /* 609 769 * Get the in-core inode with the lock held exclusively to prevent ··· 590 798 return error; 591 799 592 800 ASSERT(ip != NULL); 593 - inode = VFS_I(ip); 594 - set_nlink(inode, nlink); 595 - inode->i_rdev = rdev; 596 - ip->i_projid = prid; 597 - 598 - if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { 599 - inode_fsuid_set(inode, idmap); 600 - inode->i_gid = dir->i_gid; 601 - inode->i_mode = mode; 602 - } else { 603 - inode_init_owner(idmap, inode, dir, mode); 604 - } 605 - 606 - /* 607 - * If the group ID of the new file does not match the effective group 608 - * ID or one of the supplementary group IDs, the S_ISGID bit is cleared 609 - * (and only if the irix_sgid_inherit compatibility variable is set). 610 - */ 611 - if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && 612 - !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) 613 - inode->i_mode &= ~S_ISGID; 614 - 615 - ip->i_disk_size = 0; 616 - ip->i_df.if_nextents = 0; 617 - ASSERT(ip->i_nblocks == 0); 618 - 619 - tv = inode_set_ctime_current(inode); 620 - inode_set_mtime_to_ts(inode, tv); 621 - inode_set_atime_to_ts(inode, tv); 622 - 623 - ip->i_extsize = 0; 624 - ip->i_diflags = 0; 625 - 626 - if (xfs_has_v3inodes(mp)) { 627 - inode_set_iversion(inode, 1); 628 - ip->i_cowextsize = 0; 629 - ip->i_crtime = tv; 630 - } 631 - 632 - flags = XFS_ILOG_CORE; 633 - switch (mode & S_IFMT) { 634 - case S_IFIFO: 635 - case S_IFCHR: 636 - case S_IFBLK: 637 - case S_IFSOCK: 638 - ip->i_df.if_format = XFS_DINODE_FMT_DEV; 639 - flags |= XFS_ILOG_DEV; 640 - break; 641 - case S_IFREG: 642 - case S_IFDIR: 643 - if (pip && (pip->i_diflags & XFS_DIFLAG_ANY)) 644 - xfs_inode_inherit_flags(ip, pip); 645 - if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY)) 646 - xfs_inode_inherit_flags2(ip, pip); 647 - fallthrough; 648 - case S_IFLNK: 649 - ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 650 - ip->i_df.if_bytes = 0; 651 - ip->i_df.if_data = NULL; 652 - break; 653 - default: 654 - ASSERT(0); 655 - } 656 - 657 - /* 658 - * If we need to create attributes immediately after allocating the 659 - * inode, initialise an empty attribute fork right now. We use the 660 - * default fork offset for attributes here as we don't know exactly what 661 - * size or how many attributes we might be adding. We can do this 662 - * safely here because we know the data fork is completely empty and 663 - * this saves us from needing to run a separate transaction to set the 664 - * fork offset in the immediate future. 665 - */ 666 - if (init_xattrs) { 667 - ip->i_forkoff = xfs_default_attroffset(ip) >> 3; 668 - xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 669 - 670 - if (!xfs_has_attr(mp)) { 671 - spin_lock(&mp->m_sb_lock); 672 - xfs_add_attr(mp); 673 - spin_unlock(&mp->m_sb_lock); 674 - xfs_log_sb(tp); 675 - } 676 - } 677 - 678 - /* 679 - * Log the new values stuffed into the inode. 680 - */ 681 801 xfs_trans_ijoin(tp, ip, 0); 682 - xfs_trans_log_inode(tp, ip, flags); 802 + xfs_inode_init(tp, args, ip); 683 803 684 804 /* now that we have an i_mode we can setup the inode structure */ 685 805 xfs_setup_inode(ip); ··· 600 896 return 0; 601 897 } 602 898 603 - /* 604 - * Decrement the link count on an inode & log the change. If this causes the 605 - * link count to go to zero, move the inode to AGI unlinked list so that it can 606 - * be freed when the last active reference goes away via xfs_inactive(). 607 - */ 899 + /* Return dquots for the ids that will be assigned to a new file. */ 608 900 int 609 - xfs_droplink( 610 - struct xfs_trans *tp, 611 - struct xfs_inode *ip) 901 + xfs_icreate_dqalloc( 902 + const struct xfs_icreate_args *args, 903 + struct xfs_dquot **udqpp, 904 + struct xfs_dquot **gdqpp, 905 + struct xfs_dquot **pdqpp) 612 906 { 613 - struct inode *inode = VFS_I(ip); 907 + struct inode *dir = VFS_I(args->pip); 908 + kuid_t uid = GLOBAL_ROOT_UID; 909 + kgid_t gid = GLOBAL_ROOT_GID; 910 + prid_t prid = 0; 911 + unsigned int flags = XFS_QMOPT_QUOTALL; 614 912 615 - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 616 - 617 - if (inode->i_nlink == 0) { 618 - xfs_info_ratelimited(tp->t_mountp, 619 - "Inode 0x%llx link count dropped below zero. Pinning link count.", 620 - ip->i_ino); 621 - set_nlink(inode, XFS_NLINK_PINNED); 913 + if (args->idmap) { 914 + /* 915 + * The uid/gid computation code must match what the VFS uses to 916 + * assign i_[ug]id. INHERIT adjusts the gid computation for 917 + * setgid/grpid systems. 918 + */ 919 + uid = mapped_fsuid(args->idmap, i_user_ns(dir)); 920 + gid = mapped_fsgid(args->idmap, i_user_ns(dir)); 921 + prid = xfs_get_initial_prid(args->pip); 922 + flags |= XFS_QMOPT_INHERIT; 622 923 } 623 - if (inode->i_nlink != XFS_NLINK_PINNED) 624 - drop_nlink(inode); 625 924 626 - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 925 + *udqpp = *gdqpp = *pdqpp = NULL; 627 926 628 - if (inode->i_nlink) 629 - return 0; 630 - 631 - return xfs_iunlink(tp, ip); 927 + return xfs_qm_vop_dqalloc(args->pip, uid, gid, prid, flags, udqpp, 928 + gdqpp, pdqpp); 632 929 } 633 - 634 - /* 635 - * Increment the link count on an inode & log the change. 636 - */ 637 - void 638 - xfs_bumplink( 639 - struct xfs_trans *tp, 640 - struct xfs_inode *ip) 641 - { 642 - struct inode *inode = VFS_I(ip); 643 - 644 - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 645 - 646 - if (inode->i_nlink == XFS_NLINK_PINNED - 1) 647 - xfs_info_ratelimited(tp->t_mountp, 648 - "Inode 0x%llx link count exceeded maximum. Pinning link count.", 649 - ip->i_ino); 650 - if (inode->i_nlink != XFS_NLINK_PINNED) 651 - inc_nlink(inode); 652 - 653 - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 654 - } 655 - 656 - #ifdef CONFIG_XFS_LIVE_HOOKS 657 - /* 658 - * Use a static key here to reduce the overhead of directory live update hooks. 659 - * If the compiler supports jump labels, the static branch will be replaced by 660 - * a nop sled when there are no hook users. Online fsck is currently the only 661 - * caller, so this is a reasonable tradeoff. 662 - * 663 - * Note: Patching the kernel code requires taking the cpu hotplug lock. Other 664 - * parts of the kernel allocate memory with that lock held, which means that 665 - * XFS callers cannot hold any locks that might be used by memory reclaim or 666 - * writeback when calling the static_branch_{inc,dec} functions. 667 - */ 668 - DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); 669 - 670 - void 671 - xfs_dir_hook_disable(void) 672 - { 673 - xfs_hooks_switch_off(&xfs_dir_hooks_switch); 674 - } 675 - 676 - void 677 - xfs_dir_hook_enable(void) 678 - { 679 - xfs_hooks_switch_on(&xfs_dir_hooks_switch); 680 - } 681 - 682 - /* Call hooks for a directory update relating to a child dirent update. */ 683 - inline void 684 - xfs_dir_update_hook( 685 - struct xfs_inode *dp, 686 - struct xfs_inode *ip, 687 - int delta, 688 - const struct xfs_name *name) 689 - { 690 - if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { 691 - struct xfs_dir_update_params p = { 692 - .dp = dp, 693 - .ip = ip, 694 - .delta = delta, 695 - .name = name, 696 - }; 697 - struct xfs_mount *mp = ip->i_mount; 698 - 699 - xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); 700 - } 701 - } 702 - 703 - /* Call the specified function during a directory update. */ 704 - int 705 - xfs_dir_hook_add( 706 - struct xfs_mount *mp, 707 - struct xfs_dir_hook *hook) 708 - { 709 - return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); 710 - } 711 - 712 - /* Stop calling the specified function during a directory update. */ 713 - void 714 - xfs_dir_hook_del( 715 - struct xfs_mount *mp, 716 - struct xfs_dir_hook *hook) 717 - { 718 - xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); 719 - } 720 - 721 - /* Configure directory update hook functions. */ 722 - void 723 - xfs_dir_hook_setup( 724 - struct xfs_dir_hook *hook, 725 - notifier_fn_t mod_fn) 726 - { 727 - xfs_hook_setup(&hook->dirent_hook, mod_fn); 728 - } 729 - #endif /* CONFIG_XFS_LIVE_HOOKS */ 730 930 731 931 int 732 932 xfs_create( 733 - struct mnt_idmap *idmap, 734 - struct xfs_inode *dp, 933 + const struct xfs_icreate_args *args, 735 934 struct xfs_name *name, 736 - umode_t mode, 737 - dev_t rdev, 738 - bool init_xattrs, 739 - xfs_inode_t **ipp) 935 + struct xfs_inode **ipp) 740 936 { 741 - int is_dir = S_ISDIR(mode); 937 + struct xfs_inode *dp = args->pip; 938 + struct xfs_dir_update du = { 939 + .dp = dp, 940 + .name = name, 941 + }; 742 942 struct xfs_mount *mp = dp->i_mount; 743 - struct xfs_inode *ip = NULL; 744 943 struct xfs_trans *tp = NULL; 745 - int error; 746 - bool unlock_dp_on_error = false; 747 - prid_t prid; 748 - struct xfs_dquot *udqp = NULL; 749 - struct xfs_dquot *gdqp = NULL; 750 - struct xfs_dquot *pdqp = NULL; 944 + struct xfs_dquot *udqp; 945 + struct xfs_dquot *gdqp; 946 + struct xfs_dquot *pdqp; 751 947 struct xfs_trans_res *tres; 752 - uint resblks; 753 948 xfs_ino_t ino; 754 - struct xfs_parent_args *ppargs; 949 + bool unlock_dp_on_error = false; 950 + bool is_dir = S_ISDIR(args->mode); 951 + uint resblks; 952 + int error; 755 953 756 954 trace_xfs_create(dp, name); 757 955 ··· 662 1056 if (xfs_ifork_zapped(dp, XFS_DATA_FORK)) 663 1057 return -EIO; 664 1058 665 - prid = xfs_get_initial_prid(dp); 666 - 667 - /* 668 - * Make sure that we have allocated dquot(s) on disk. 669 - */ 670 - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), 671 - mapped_fsgid(idmap, &init_user_ns), prid, 672 - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 673 - &udqp, &gdqp, &pdqp); 1059 + /* Make sure that we have allocated dquot(s) on disk. */ 1060 + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); 674 1061 if (error) 675 1062 return error; 676 1063 ··· 675 1076 tres = &M_RES(mp)->tr_create; 676 1077 } 677 1078 678 - error = xfs_parent_start(mp, &ppargs); 1079 + error = xfs_parent_start(mp, &du.ppargs); 679 1080 if (error) 680 1081 goto out_release_dquots; 681 1082 ··· 704 1105 * entry pointing to them, but a directory also the "." entry 705 1106 * pointing to itself. 706 1107 */ 707 - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); 1108 + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); 708 1109 if (!error) 709 - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 710 - is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); 1110 + error = xfs_icreate(tp, ino, args, &du.ip); 711 1111 if (error) 712 1112 goto out_trans_cancel; 713 1113 ··· 719 1121 */ 720 1122 xfs_trans_ijoin(tp, dp, 0); 721 1123 722 - error = xfs_dir_createname(tp, dp, name, ip->i_ino, 723 - resblks - XFS_IALLOC_SPACE_RES(mp)); 724 - if (error) { 725 - ASSERT(error != -ENOSPC); 1124 + error = xfs_dir_create_child(tp, resblks, &du); 1125 + if (error) 726 1126 goto out_trans_cancel; 727 - } 728 - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 729 - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 730 - 731 - if (is_dir) { 732 - error = xfs_dir_init(tp, ip, dp); 733 - if (error) 734 - goto out_trans_cancel; 735 - 736 - xfs_bumplink(tp, dp); 737 - } 738 - 739 - /* 740 - * If we have parent pointers, we need to add the attribute containing 741 - * the parent information now. 742 - */ 743 - if (ppargs) { 744 - error = xfs_parent_addname(tp, ppargs, dp, name, ip); 745 - if (error) 746 - goto out_trans_cancel; 747 - } 748 - 749 - /* 750 - * Create ip with a reference from dp, and add '.' and '..' references 751 - * if it's a directory. 752 - */ 753 - xfs_dir_update_hook(dp, ip, 1, name); 754 1127 755 1128 /* 756 1129 * If this is a synchronous mount, make sure that the ··· 736 1167 * These ids of the inode couldn't have changed since the new 737 1168 * inode has been locked ever since it was created. 738 1169 */ 739 - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1170 + xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); 740 1171 741 1172 error = xfs_trans_commit(tp); 742 1173 if (error) ··· 746 1177 xfs_qm_dqrele(gdqp); 747 1178 xfs_qm_dqrele(pdqp); 748 1179 749 - *ipp = ip; 750 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 1180 + *ipp = du.ip; 1181 + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); 751 1182 xfs_iunlock(dp, XFS_ILOCK_EXCL); 752 - xfs_parent_finish(mp, ppargs); 1183 + xfs_parent_finish(mp, du.ppargs); 753 1184 return 0; 754 1185 755 1186 out_trans_cancel: ··· 760 1191 * setup of the inode and release the inode. This prevents recursive 761 1192 * transactions and deadlocks from xfs_inactive. 762 1193 */ 763 - if (ip) { 764 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 765 - xfs_finish_inode_setup(ip); 766 - xfs_irele(ip); 1194 + if (du.ip) { 1195 + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); 1196 + xfs_finish_inode_setup(du.ip); 1197 + xfs_irele(du.ip); 767 1198 } 768 1199 out_parent: 769 - xfs_parent_finish(mp, ppargs); 1200 + xfs_parent_finish(mp, du.ppargs); 770 1201 out_release_dquots: 771 1202 xfs_qm_dqrele(udqp); 772 1203 xfs_qm_dqrele(gdqp); ··· 779 1210 780 1211 int 781 1212 xfs_create_tmpfile( 782 - struct mnt_idmap *idmap, 783 - struct xfs_inode *dp, 784 - umode_t mode, 785 - bool init_xattrs, 1213 + const struct xfs_icreate_args *args, 786 1214 struct xfs_inode **ipp) 787 1215 { 1216 + struct xfs_inode *dp = args->pip; 788 1217 struct xfs_mount *mp = dp->i_mount; 789 1218 struct xfs_inode *ip = NULL; 790 1219 struct xfs_trans *tp = NULL; 791 - int error; 792 - prid_t prid; 793 - struct xfs_dquot *udqp = NULL; 794 - struct xfs_dquot *gdqp = NULL; 795 - struct xfs_dquot *pdqp = NULL; 1220 + struct xfs_dquot *udqp; 1221 + struct xfs_dquot *gdqp; 1222 + struct xfs_dquot *pdqp; 796 1223 struct xfs_trans_res *tres; 797 - uint resblks; 798 1224 xfs_ino_t ino; 1225 + uint resblks; 1226 + int error; 1227 + 1228 + ASSERT(args->flags & XFS_ICREATE_TMPFILE); 799 1229 800 1230 if (xfs_is_shutdown(mp)) 801 1231 return -EIO; 802 1232 803 - prid = xfs_get_initial_prid(dp); 804 - 805 - /* 806 - * Make sure that we have allocated dquot(s) on disk. 807 - */ 808 - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), 809 - mapped_fsgid(idmap, &init_user_ns), prid, 810 - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 811 - &udqp, &gdqp, &pdqp); 1233 + /* Make sure that we have allocated dquot(s) on disk. */ 1234 + error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp); 812 1235 if (error) 813 1236 return error; 814 1237 ··· 812 1251 if (error) 813 1252 goto out_release_dquots; 814 1253 815 - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); 1254 + error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); 816 1255 if (!error) 817 - error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 818 - 0, 0, prid, init_xattrs, &ip); 1256 + error = xfs_icreate(tp, ino, args, &ip); 819 1257 if (error) 820 1258 goto out_trans_cancel; 821 1259 ··· 871 1311 struct xfs_inode *sip, 872 1312 struct xfs_name *target_name) 873 1313 { 1314 + struct xfs_dir_update du = { 1315 + .dp = tdp, 1316 + .name = target_name, 1317 + .ip = sip, 1318 + }; 874 1319 struct xfs_mount *mp = tdp->i_mount; 875 1320 struct xfs_trans *tp; 876 1321 int error, nospace_error = 0; 877 1322 int resblks; 878 - struct xfs_parent_args *ppargs; 879 1323 880 1324 trace_xfs_link(tdp, target_name); 881 1325 ··· 898 1334 if (error) 899 1335 goto std_return; 900 1336 901 - error = xfs_parent_start(mp, &ppargs); 1337 + error = xfs_parent_start(mp, &du.ppargs); 902 1338 if (error) 903 1339 goto std_return; 904 1340 ··· 913 1349 * pointers are enabled because we can't back out if the xattrs must 914 1350 * grow. 915 1351 */ 916 - if (ppargs && nospace_error) { 1352 + if (du.ppargs && nospace_error) { 917 1353 error = nospace_error; 918 1354 goto error_return; 919 1355 } ··· 940 1376 } 941 1377 } 942 1378 943 - if (!resblks) { 944 - error = xfs_dir_canenter(tp, tdp, target_name); 945 - if (error) 946 - goto error_return; 947 - } 948 - 949 - /* 950 - * Handle initial link state of O_TMPFILE inode 951 - */ 952 - if (VFS_I(sip)->i_nlink == 0) { 953 - struct xfs_perag *pag; 954 - 955 - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sip->i_ino)); 956 - error = xfs_iunlink_remove(tp, pag, sip); 957 - xfs_perag_put(pag); 958 - if (error) 959 - goto error_return; 960 - } 961 - 962 - error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 963 - resblks); 1379 + error = xfs_dir_add_child(tp, resblks, &du); 964 1380 if (error) 965 1381 goto error_return; 966 - xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 967 - xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 968 - 969 - xfs_bumplink(tp, sip); 970 - 971 - /* 972 - * If we have parent pointers, we now need to add the parent record to 973 - * the attribute fork of the inode. If this is the initial parent 974 - * attribute, we need to create it correctly, otherwise we can just add 975 - * the parent to the inode. 976 - */ 977 - if (ppargs) { 978 - error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip); 979 - if (error) 980 - goto error_return; 981 - } 982 - 983 - xfs_dir_update_hook(tdp, sip, 1, target_name); 984 1382 985 1383 /* 986 1384 * If this is a synchronous mount, make sure that the ··· 955 1429 error = xfs_trans_commit(tp); 956 1430 xfs_iunlock(tdp, XFS_ILOCK_EXCL); 957 1431 xfs_iunlock(sip, XFS_ILOCK_EXCL); 958 - xfs_parent_finish(mp, ppargs); 1432 + xfs_parent_finish(mp, du.ppargs); 959 1433 return error; 960 1434 961 1435 error_return: ··· 963 1437 xfs_iunlock(tdp, XFS_ILOCK_EXCL); 964 1438 xfs_iunlock(sip, XFS_ILOCK_EXCL); 965 1439 out_parent: 966 - xfs_parent_finish(mp, ppargs); 1440 + xfs_parent_finish(mp, du.ppargs); 967 1441 std_return: 968 1442 if (error == -ENOSPC && nospace_error) 969 1443 error = nospace_error; ··· 1550 2024 } 1551 2025 1552 2026 /* 1553 - * In-Core Unlinked List Lookups 1554 - * ============================= 1555 - * 1556 - * Every inode is supposed to be reachable from some other piece of metadata 1557 - * with the exception of the root directory. Inodes with a connection to a 1558 - * file descriptor but not linked from anywhere in the on-disk directory tree 1559 - * are collectively known as unlinked inodes, though the filesystem itself 1560 - * maintains links to these inodes so that on-disk metadata are consistent. 1561 - * 1562 - * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI 1563 - * header contains a number of buckets that point to an inode, and each inode 1564 - * record has a pointer to the next inode in the hash chain. This 1565 - * singly-linked list causes scaling problems in the iunlink remove function 1566 - * because we must walk that list to find the inode that points to the inode 1567 - * being removed from the unlinked hash bucket list. 1568 - * 1569 - * Hence we keep an in-memory double linked list to link each inode on an 1570 - * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer 1571 - * based lists would require having 64 list heads in the perag, one for each 1572 - * list. This is expensive in terms of memory (think millions of AGs) and cache 1573 - * misses on lookups. Instead, use the fact that inodes on the unlinked list 1574 - * must be referenced at the VFS level to keep them on the list and hence we 1575 - * have an existence guarantee for inodes on the unlinked list. 1576 - * 1577 - * Given we have an existence guarantee, we can use lockless inode cache lookups 1578 - * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode 1579 - * for the double linked unlinked list, and we don't need any extra locking to 1580 - * keep the list safe as all manipulations are done under the AGI buffer lock. 1581 - * Keeping the list up to date does not require memory allocation, just finding 1582 - * the XFS inode and updating the next/prev unlinked list aginos. 1583 - */ 1584 - 1585 - /* 1586 2027 * Find an inode on the unlinked list. This does not take references to the 1587 2028 * inode as we have existence guarantees by holding the AGI buffer lock and that 1588 2029 * only unlinked, referenced inodes can be on the unlinked inode list. If we ··· 1584 2091 } 1585 2092 1586 2093 /* 1587 - * Update the prev pointer of the next agino. Returns -ENOLINK if the inode 1588 - * is not in cache. 1589 - */ 1590 - static int 1591 - xfs_iunlink_update_backref( 1592 - struct xfs_perag *pag, 1593 - xfs_agino_t prev_agino, 1594 - xfs_agino_t next_agino) 1595 - { 1596 - struct xfs_inode *ip; 1597 - 1598 - /* No update necessary if we are at the end of the list. */ 1599 - if (next_agino == NULLAGINO) 1600 - return 0; 1601 - 1602 - ip = xfs_iunlink_lookup(pag, next_agino); 1603 - if (!ip) 1604 - return -ENOLINK; 1605 - 1606 - ip->i_prev_unlinked = prev_agino; 1607 - return 0; 1608 - } 1609 - 1610 - /* 1611 - * Point the AGI unlinked bucket at an inode and log the results. The caller 1612 - * is responsible for validating the old value. 1613 - */ 1614 - STATIC int 1615 - xfs_iunlink_update_bucket( 1616 - struct xfs_trans *tp, 1617 - struct xfs_perag *pag, 1618 - struct xfs_buf *agibp, 1619 - unsigned int bucket_index, 1620 - xfs_agino_t new_agino) 1621 - { 1622 - struct xfs_agi *agi = agibp->b_addr; 1623 - xfs_agino_t old_value; 1624 - int offset; 1625 - 1626 - ASSERT(xfs_verify_agino_or_null(pag, new_agino)); 1627 - 1628 - old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); 1629 - trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, 1630 - old_value, new_agino); 1631 - 1632 - /* 1633 - * We should never find the head of the list already set to the value 1634 - * passed in because either we're adding or removing ourselves from the 1635 - * head of the list. 1636 - */ 1637 - if (old_value == new_agino) { 1638 - xfs_buf_mark_corrupt(agibp); 1639 - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 1640 - return -EFSCORRUPTED; 1641 - } 1642 - 1643 - agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); 1644 - offset = offsetof(struct xfs_agi, agi_unlinked) + 1645 - (sizeof(xfs_agino_t) * bucket_index); 1646 - xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); 1647 - return 0; 1648 - } 1649 - 1650 - /* 1651 2094 * Load the inode @next_agino into the cache and set its prev_unlinked pointer 1652 2095 * to @prev_agino. Caller must hold the AGI to synchronize with other changes 1653 2096 * to the unlinked list. 1654 2097 */ 1655 - STATIC int 2098 + int 1656 2099 xfs_iunlink_reload_next( 1657 2100 struct xfs_trans *tp, 1658 2101 struct xfs_buf *agibp, ··· 1642 2213 xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED); 1643 2214 xfs_irele(next_ip); 1644 2215 return error; 1645 - } 1646 - 1647 - static int 1648 - xfs_iunlink_insert_inode( 1649 - struct xfs_trans *tp, 1650 - struct xfs_perag *pag, 1651 - struct xfs_buf *agibp, 1652 - struct xfs_inode *ip) 1653 - { 1654 - struct xfs_mount *mp = tp->t_mountp; 1655 - struct xfs_agi *agi = agibp->b_addr; 1656 - xfs_agino_t next_agino; 1657 - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1658 - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1659 - int error; 1660 - 1661 - /* 1662 - * Get the index into the agi hash table for the list this inode will 1663 - * go on. Make sure the pointer isn't garbage and that this inode 1664 - * isn't already on the list. 1665 - */ 1666 - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 1667 - if (next_agino == agino || 1668 - !xfs_verify_agino_or_null(pag, next_agino)) { 1669 - xfs_buf_mark_corrupt(agibp); 1670 - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 1671 - return -EFSCORRUPTED; 1672 - } 1673 - 1674 - /* 1675 - * Update the prev pointer in the next inode to point back to this 1676 - * inode. 1677 - */ 1678 - error = xfs_iunlink_update_backref(pag, agino, next_agino); 1679 - if (error == -ENOLINK) 1680 - error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); 1681 - if (error) 1682 - return error; 1683 - 1684 - if (next_agino != NULLAGINO) { 1685 - /* 1686 - * There is already another inode in the bucket, so point this 1687 - * inode to the current head of the list. 1688 - */ 1689 - error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); 1690 - if (error) 1691 - return error; 1692 - ip->i_next_unlinked = next_agino; 1693 - } 1694 - 1695 - /* Point the head of the list to point to this inode. */ 1696 - ip->i_prev_unlinked = NULLAGINO; 1697 - return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); 1698 - } 1699 - 1700 - /* 1701 - * This is called when the inode's link count has gone to 0 or we are creating 1702 - * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. 1703 - * 1704 - * We place the on-disk inode on a list in the AGI. It will be pulled from this 1705 - * list when the inode is freed. 1706 - */ 1707 - int 1708 - xfs_iunlink( 1709 - struct xfs_trans *tp, 1710 - struct xfs_inode *ip) 1711 - { 1712 - struct xfs_mount *mp = tp->t_mountp; 1713 - struct xfs_perag *pag; 1714 - struct xfs_buf *agibp; 1715 - int error; 1716 - 1717 - ASSERT(VFS_I(ip)->i_nlink == 0); 1718 - ASSERT(VFS_I(ip)->i_mode != 0); 1719 - trace_xfs_iunlink(ip); 1720 - 1721 - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1722 - 1723 - /* Get the agi buffer first. It ensures lock ordering on the list. */ 1724 - error = xfs_read_agi(pag, tp, 0, &agibp); 1725 - if (error) 1726 - goto out; 1727 - 1728 - error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); 1729 - out: 1730 - xfs_perag_put(pag); 1731 - return error; 1732 - } 1733 - 1734 - static int 1735 - xfs_iunlink_remove_inode( 1736 - struct xfs_trans *tp, 1737 - struct xfs_perag *pag, 1738 - struct xfs_buf *agibp, 1739 - struct xfs_inode *ip) 1740 - { 1741 - struct xfs_mount *mp = tp->t_mountp; 1742 - struct xfs_agi *agi = agibp->b_addr; 1743 - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1744 - xfs_agino_t head_agino; 1745 - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1746 - int error; 1747 - 1748 - trace_xfs_iunlink_remove(ip); 1749 - 1750 - /* 1751 - * Get the index into the agi hash table for the list this inode will 1752 - * go on. Make sure the head pointer isn't garbage. 1753 - */ 1754 - head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); 1755 - if (!xfs_verify_agino(pag, head_agino)) { 1756 - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 1757 - agi, sizeof(*agi)); 1758 - xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); 1759 - return -EFSCORRUPTED; 1760 - } 1761 - 1762 - /* 1763 - * Set our inode's next_unlinked pointer to NULL and then return 1764 - * the old pointer value so that we can update whatever was previous 1765 - * to us in the list to point to whatever was next in the list. 1766 - */ 1767 - error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); 1768 - if (error) 1769 - return error; 1770 - 1771 - /* 1772 - * Update the prev pointer in the next inode to point back to previous 1773 - * inode in the chain. 1774 - */ 1775 - error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, 1776 - ip->i_next_unlinked); 1777 - if (error == -ENOLINK) 1778 - error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, 1779 - ip->i_next_unlinked); 1780 - if (error) 1781 - return error; 1782 - 1783 - if (head_agino != agino) { 1784 - struct xfs_inode *prev_ip; 1785 - 1786 - prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); 1787 - if (!prev_ip) { 1788 - xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 1789 - return -EFSCORRUPTED; 1790 - } 1791 - 1792 - error = xfs_iunlink_log_inode(tp, prev_ip, pag, 1793 - ip->i_next_unlinked); 1794 - prev_ip->i_next_unlinked = ip->i_next_unlinked; 1795 - } else { 1796 - /* Point the head of the list to the next unlinked inode. */ 1797 - error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, 1798 - ip->i_next_unlinked); 1799 - } 1800 - 1801 - ip->i_next_unlinked = NULLAGINO; 1802 - ip->i_prev_unlinked = 0; 1803 - return error; 1804 - } 1805 - 1806 - /* 1807 - * Pull the on-disk inode from the AGI unlinked list. 1808 - */ 1809 - int 1810 - xfs_iunlink_remove( 1811 - struct xfs_trans *tp, 1812 - struct xfs_perag *pag, 1813 - struct xfs_inode *ip) 1814 - { 1815 - struct xfs_buf *agibp; 1816 - int error; 1817 - 1818 - trace_xfs_iunlink_remove(ip); 1819 - 1820 - /* Get the agi buffer first. It ensures lock ordering on the list. */ 1821 - error = xfs_read_agi(pag, tp, 0, &agibp); 1822 - if (error) 1823 - return error; 1824 - 1825 - return xfs_iunlink_remove_inode(tp, pag, agibp, ip); 1826 2216 } 1827 2217 1828 2218 /* ··· 1862 2614 1863 2615 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1864 2616 1865 - /* 1866 - * Free the inode first so that we guarantee that the AGI lock is going 1867 - * to be taken before we remove the inode from the unlinked list. This 1868 - * makes the AGI lock -> unlinked list modification order the same as 1869 - * used in O_TMPFILE creation. 1870 - */ 1871 - error = xfs_difree(tp, pag, ip->i_ino, &xic); 2617 + error = xfs_inode_uninit(tp, pag, ip, &xic); 1872 2618 if (error) 1873 2619 goto out; 1874 2620 1875 - error = xfs_iunlink_remove(tp, pag, ip); 1876 - if (error) 1877 - goto out; 1878 - 1879 - /* 1880 - * Free any local-format data sitting around before we reset the 1881 - * data fork to extents format. Note that the attr fork data has 1882 - * already been freed by xfs_attr_inactive. 1883 - */ 1884 - if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { 1885 - kfree(ip->i_df.if_data); 1886 - ip->i_df.if_data = NULL; 1887 - ip->i_df.if_bytes = 0; 1888 - } 1889 - 1890 - VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ 1891 - ip->i_diflags = 0; 1892 - ip->i_diflags2 = mp->m_ino_geo.new_diflags2; 1893 - ip->i_forkoff = 0; /* mark the attr fork not in use */ 1894 - ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; 1895 2621 if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS)) 1896 2622 xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS); 1897 2623 ··· 1873 2651 spin_lock(&iip->ili_lock); 1874 2652 iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER); 1875 2653 spin_unlock(&iip->ili_lock); 1876 - 1877 - /* 1878 - * Bump the generation count so no one will be confused 1879 - * by reincarnations of this inode. 1880 - */ 1881 - VFS_I(ip)->i_generation++; 1882 - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1883 2654 1884 2655 if (xic.deleted) 1885 2656 error = xfs_ifree_cluster(tp, pag, ip, &xic); ··· 1957 2742 struct xfs_name *name, 1958 2743 struct xfs_inode *ip) 1959 2744 { 2745 + struct xfs_dir_update du = { 2746 + .dp = dp, 2747 + .name = name, 2748 + .ip = ip, 2749 + }; 1960 2750 struct xfs_mount *mp = dp->i_mount; 1961 2751 struct xfs_trans *tp = NULL; 1962 2752 int is_dir = S_ISDIR(VFS_I(ip)->i_mode); 1963 2753 int dontcare; 1964 2754 int error = 0; 1965 2755 uint resblks; 1966 - struct xfs_parent_args *ppargs; 1967 2756 1968 2757 trace_xfs_remove(dp, name); 1969 2758 ··· 1984 2765 if (error) 1985 2766 goto std_return; 1986 2767 1987 - error = xfs_parent_start(mp, &ppargs); 2768 + error = xfs_parent_start(mp, &du.ppargs); 1988 2769 if (error) 1989 2770 goto std_return; 1990 2771 ··· 2007 2788 goto out_parent; 2008 2789 } 2009 2790 2010 - /* 2011 - * If we're removing a directory perform some additional validation. 2012 - */ 2013 - if (is_dir) { 2014 - ASSERT(VFS_I(ip)->i_nlink >= 2); 2015 - if (VFS_I(ip)->i_nlink != 2) { 2016 - error = -ENOTEMPTY; 2017 - goto out_trans_cancel; 2018 - } 2019 - if (!xfs_dir_isempty(ip)) { 2020 - error = -ENOTEMPTY; 2021 - goto out_trans_cancel; 2022 - } 2023 - 2024 - /* Drop the link from ip's "..". */ 2025 - error = xfs_droplink(tp, dp); 2026 - if (error) 2027 - goto out_trans_cancel; 2028 - 2029 - /* Drop the "." link from ip to self. */ 2030 - error = xfs_droplink(tp, ip); 2031 - if (error) 2032 - goto out_trans_cancel; 2033 - 2034 - /* 2035 - * Point the unlinked child directory's ".." entry to the root 2036 - * directory to eliminate back-references to inodes that may 2037 - * get freed before the child directory is closed. If the fs 2038 - * gets shrunk, this can lead to dirent inode validation errors. 2039 - */ 2040 - if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { 2041 - error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, 2042 - tp->t_mountp->m_sb.sb_rootino, 0); 2043 - if (error) 2044 - goto out_trans_cancel; 2045 - } 2046 - } else { 2047 - /* 2048 - * When removing a non-directory we need to log the parent 2049 - * inode here. For a directory this is done implicitly 2050 - * by the xfs_droplink call for the ".." entry. 2051 - */ 2052 - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2053 - } 2054 - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2055 - 2056 - /* Drop the link from dp to ip. */ 2057 - error = xfs_droplink(tp, ip); 2791 + error = xfs_dir_remove_child(tp, resblks, &du); 2058 2792 if (error) 2059 2793 goto out_trans_cancel; 2060 - 2061 - error = xfs_dir_removename(tp, dp, name, ip->i_ino, resblks); 2062 - if (error) { 2063 - ASSERT(error != -ENOENT); 2064 - goto out_trans_cancel; 2065 - } 2066 - 2067 - /* Remove parent pointer. */ 2068 - if (ppargs) { 2069 - error = xfs_parent_removename(tp, ppargs, dp, name, ip); 2070 - if (error) 2071 - goto out_trans_cancel; 2072 - } 2073 - 2074 - /* 2075 - * Drop the link from dp to ip, and if ip was a directory, remove the 2076 - * '.' and '..' references since we freed the directory. 2077 - */ 2078 - xfs_dir_update_hook(dp, ip, -1, name); 2079 2794 2080 2795 /* 2081 2796 * If this is a synchronous mount, make sure that the ··· 2028 2875 2029 2876 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2030 2877 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2031 - xfs_parent_finish(mp, ppargs); 2878 + xfs_parent_finish(mp, du.ppargs); 2032 2879 return 0; 2033 2880 2034 2881 out_trans_cancel: ··· 2037 2884 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2038 2885 xfs_iunlock(dp, XFS_ILOCK_EXCL); 2039 2886 out_parent: 2040 - xfs_parent_finish(mp, ppargs); 2887 + xfs_parent_finish(mp, du.ppargs); 2041 2888 std_return: 2042 2889 return error; 2043 2890 } ··· 2117 2964 } 2118 2965 } 2119 2966 2120 - static int 2121 - xfs_finish_rename( 2122 - struct xfs_trans *tp) 2123 - { 2124 - /* 2125 - * If this is a synchronous mount, make sure that the rename transaction 2126 - * goes to disk before returning to the user. 2127 - */ 2128 - if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) 2129 - xfs_trans_set_sync(tp); 2130 - 2131 - return xfs_trans_commit(tp); 2132 - } 2133 - 2134 - /* 2135 - * xfs_cross_rename() 2136 - * 2137 - * responsible for handling RENAME_EXCHANGE flag in renameat2() syscall 2138 - */ 2139 - STATIC int 2140 - xfs_cross_rename( 2141 - struct xfs_trans *tp, 2142 - struct xfs_inode *dp1, 2143 - struct xfs_name *name1, 2144 - struct xfs_inode *ip1, 2145 - struct xfs_parent_args *ip1_ppargs, 2146 - struct xfs_inode *dp2, 2147 - struct xfs_name *name2, 2148 - struct xfs_inode *ip2, 2149 - struct xfs_parent_args *ip2_ppargs, 2150 - int spaceres) 2151 - { 2152 - int error = 0; 2153 - int ip1_flags = 0; 2154 - int ip2_flags = 0; 2155 - int dp2_flags = 0; 2156 - 2157 - /* Swap inode number for dirent in first parent */ 2158 - error = xfs_dir_replace(tp, dp1, name1, ip2->i_ino, spaceres); 2159 - if (error) 2160 - goto out_trans_abort; 2161 - 2162 - /* Swap inode number for dirent in second parent */ 2163 - error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, spaceres); 2164 - if (error) 2165 - goto out_trans_abort; 2166 - 2167 - /* 2168 - * If we're renaming one or more directories across different parents, 2169 - * update the respective ".." entries (and link counts) to match the new 2170 - * parents. 2171 - */ 2172 - if (dp1 != dp2) { 2173 - dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 2174 - 2175 - if (S_ISDIR(VFS_I(ip2)->i_mode)) { 2176 - error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot, 2177 - dp1->i_ino, spaceres); 2178 - if (error) 2179 - goto out_trans_abort; 2180 - 2181 - /* transfer ip2 ".." reference to dp1 */ 2182 - if (!S_ISDIR(VFS_I(ip1)->i_mode)) { 2183 - error = xfs_droplink(tp, dp2); 2184 - if (error) 2185 - goto out_trans_abort; 2186 - xfs_bumplink(tp, dp1); 2187 - } 2188 - 2189 - /* 2190 - * Although ip1 isn't changed here, userspace needs 2191 - * to be warned about the change, so that applications 2192 - * relying on it (like backup ones), will properly 2193 - * notify the change 2194 - */ 2195 - ip1_flags |= XFS_ICHGTIME_CHG; 2196 - ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 2197 - } 2198 - 2199 - if (S_ISDIR(VFS_I(ip1)->i_mode)) { 2200 - error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot, 2201 - dp2->i_ino, spaceres); 2202 - if (error) 2203 - goto out_trans_abort; 2204 - 2205 - /* transfer ip1 ".." reference to dp2 */ 2206 - if (!S_ISDIR(VFS_I(ip2)->i_mode)) { 2207 - error = xfs_droplink(tp, dp1); 2208 - if (error) 2209 - goto out_trans_abort; 2210 - xfs_bumplink(tp, dp2); 2211 - } 2212 - 2213 - /* 2214 - * Although ip2 isn't changed here, userspace needs 2215 - * to be warned about the change, so that applications 2216 - * relying on it (like backup ones), will properly 2217 - * notify the change 2218 - */ 2219 - ip1_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 2220 - ip2_flags |= XFS_ICHGTIME_CHG; 2221 - } 2222 - } 2223 - 2224 - /* Schedule parent pointer replacements */ 2225 - if (ip1_ppargs) { 2226 - error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2, 2227 - name2, ip1); 2228 - if (error) 2229 - goto out_trans_abort; 2230 - } 2231 - 2232 - if (ip2_ppargs) { 2233 - error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1, 2234 - name1, ip2); 2235 - if (error) 2236 - goto out_trans_abort; 2237 - } 2238 - 2239 - if (ip1_flags) { 2240 - xfs_trans_ichgtime(tp, ip1, ip1_flags); 2241 - xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); 2242 - } 2243 - if (ip2_flags) { 2244 - xfs_trans_ichgtime(tp, ip2, ip2_flags); 2245 - xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); 2246 - } 2247 - if (dp2_flags) { 2248 - xfs_trans_ichgtime(tp, dp2, dp2_flags); 2249 - xfs_trans_log_inode(tp, dp2, XFS_ILOG_CORE); 2250 - } 2251 - xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2252 - xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2253 - 2254 - /* 2255 - * Inform our hook clients that we've finished an exchange operation as 2256 - * follows: removed the source and target files from their directories; 2257 - * added the target to the source directory; and added the source to 2258 - * the target directory. All inodes are locked, so it's ok to model a 2259 - * rename this way so long as we say we deleted entries before we add 2260 - * new ones. 2261 - */ 2262 - xfs_dir_update_hook(dp1, ip1, -1, name1); 2263 - xfs_dir_update_hook(dp2, ip2, -1, name2); 2264 - xfs_dir_update_hook(dp1, ip2, 1, name1); 2265 - xfs_dir_update_hook(dp2, ip1, 1, name2); 2266 - 2267 - return xfs_finish_rename(tp); 2268 - 2269 - out_trans_abort: 2270 - xfs_trans_cancel(tp); 2271 - return error; 2272 - } 2273 - 2274 2967 /* 2275 2968 * xfs_rename_alloc_whiteout() 2276 2969 * ··· 2132 3133 struct xfs_inode *dp, 2133 3134 struct xfs_inode **wip) 2134 3135 { 3136 + struct xfs_icreate_args args = { 3137 + .idmap = idmap, 3138 + .pip = dp, 3139 + .mode = S_IFCHR | WHITEOUT_MODE, 3140 + .flags = XFS_ICREATE_TMPFILE, 3141 + }; 2135 3142 struct xfs_inode *tmpfile; 2136 3143 struct qstr name; 2137 3144 int error; 2138 3145 2139 - error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, 2140 - xfs_has_parent(dp->i_mount), &tmpfile); 3146 + error = xfs_create_tmpfile(&args, &tmpfile); 2141 3147 if (error) 2142 3148 return error; 2143 3149 ··· 2182 3178 struct xfs_inode *target_ip, 2183 3179 unsigned int flags) 2184 3180 { 3181 + struct xfs_dir_update du_src = { 3182 + .dp = src_dp, 3183 + .name = src_name, 3184 + .ip = src_ip, 3185 + }; 3186 + struct xfs_dir_update du_tgt = { 3187 + .dp = target_dp, 3188 + .name = target_name, 3189 + .ip = target_ip, 3190 + }; 3191 + struct xfs_dir_update du_wip = { }; 2185 3192 struct xfs_mount *mp = src_dp->i_mount; 2186 3193 struct xfs_trans *tp; 2187 - struct xfs_inode *wip = NULL; /* whiteout inode */ 2188 3194 struct xfs_inode *inodes[__XFS_SORT_INODES]; 2189 - struct xfs_parent_args *src_ppargs = NULL; 2190 - struct xfs_parent_args *tgt_ppargs = NULL; 2191 - struct xfs_parent_args *wip_ppargs = NULL; 2192 3195 int i; 2193 3196 int num_inodes = __XFS_SORT_INODES; 2194 3197 bool new_parent = (src_dp != target_dp); ··· 2215 3204 * appropriately. 2216 3205 */ 2217 3206 if (flags & RENAME_WHITEOUT) { 2218 - error = xfs_rename_alloc_whiteout(idmap, src_name, 2219 - target_dp, &wip); 3207 + error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp, 3208 + &du_wip.ip); 2220 3209 if (error) 2221 3210 return error; 2222 3211 ··· 2224 3213 src_name->type = XFS_DIR3_FT_CHRDEV; 2225 3214 } 2226 3215 2227 - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, 2228 - inodes, &num_inodes); 3216 + xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, du_wip.ip, 3217 + inodes, &num_inodes); 2229 3218 2230 - error = xfs_parent_start(mp, &src_ppargs); 3219 + error = xfs_parent_start(mp, &du_src.ppargs); 2231 3220 if (error) 2232 3221 goto out_release_wip; 2233 3222 2234 - if (wip) { 2235 - error = xfs_parent_start(mp, &wip_ppargs); 3223 + if (du_wip.ip) { 3224 + error = xfs_parent_start(mp, &du_wip.ppargs); 2236 3225 if (error) 2237 3226 goto out_src_ppargs; 2238 3227 } 2239 3228 2240 3229 if (target_ip) { 2241 - error = xfs_parent_start(mp, &tgt_ppargs); 3230 + error = xfs_parent_start(mp, &du_tgt.ppargs); 2242 3231 if (error) 2243 3232 goto out_wip_ppargs; 2244 3233 } ··· 2246 3235 retry: 2247 3236 nospace_error = 0; 2248 3237 spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL, 2249 - target_name->len, wip != NULL); 3238 + target_name->len, du_wip.ip != NULL); 2250 3239 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); 2251 3240 if (error == -ENOSPC) { 2252 3241 nospace_error = error; ··· 2261 3250 * We don't allow reservationless renaming when parent pointers are 2262 3251 * enabled because we can't back out if the xattrs must grow. 2263 3252 */ 2264 - if (src_ppargs && nospace_error) { 3253 + if (du_src.ppargs && nospace_error) { 2265 3254 error = nospace_error; 2266 3255 xfs_trans_cancel(tp); 2267 3256 goto out_tgt_ppargs; ··· 2293 3282 xfs_trans_ijoin(tp, src_ip, 0); 2294 3283 if (target_ip) 2295 3284 xfs_trans_ijoin(tp, target_ip, 0); 2296 - if (wip) 2297 - xfs_trans_ijoin(tp, wip, 0); 3285 + if (du_wip.ip) 3286 + xfs_trans_ijoin(tp, du_wip.ip, 0); 2298 3287 2299 3288 /* 2300 3289 * If we are using project inheritance, we only allow renames ··· 2309 3298 2310 3299 /* RENAME_EXCHANGE is unique from here on. */ 2311 3300 if (flags & RENAME_EXCHANGE) { 2312 - error = xfs_cross_rename(tp, src_dp, src_name, src_ip, 2313 - src_ppargs, target_dp, target_name, target_ip, 2314 - tgt_ppargs, spaceres); 2315 - nospace_error = 0; 2316 - goto out_unlock; 3301 + error = xfs_dir_exchange_children(tp, &du_src, &du_tgt, 3302 + spaceres); 3303 + if (error) 3304 + goto out_trans_cancel; 3305 + goto out_commit; 2317 3306 } 2318 3307 2319 3308 /* ··· 2346 3335 * We don't allow quotaless renaming when parent pointers are enabled 2347 3336 * because we can't back out if the xattrs must grow. 2348 3337 */ 2349 - if (src_ppargs && nospace_error) { 3338 + if (du_src.ppargs && nospace_error) { 2350 3339 error = nospace_error; 2351 3340 goto out_trans_cancel; 2352 - } 2353 - 2354 - /* 2355 - * Check for expected errors before we dirty the transaction 2356 - * so we can return an error without a transaction abort. 2357 - */ 2358 - if (target_ip == NULL) { 2359 - /* 2360 - * If there's no space reservation, check the entry will 2361 - * fit before actually inserting it. 2362 - */ 2363 - if (!spaceres) { 2364 - error = xfs_dir_canenter(tp, target_dp, target_name); 2365 - if (error) 2366 - goto out_trans_cancel; 2367 - } 2368 - } else { 2369 - /* 2370 - * If target exists and it's a directory, check that whether 2371 - * it can be destroyed. 2372 - */ 2373 - if (S_ISDIR(VFS_I(target_ip)->i_mode) && 2374 - (!xfs_dir_isempty(target_ip) || 2375 - (VFS_I(target_ip)->i_nlink > 2))) { 2376 - error = -EEXIST; 2377 - goto out_trans_cancel; 2378 - } 2379 3341 } 2380 3342 2381 3343 /* ··· 2362 3378 * target_ip is either null or an empty directory. 2363 3379 */ 2364 3380 for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { 2365 - if (inodes[i] == wip || 3381 + if (inodes[i] == du_wip.ip || 2366 3382 (inodes[i] == target_ip && 2367 3383 (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { 2368 3384 struct xfs_perag *pag; ··· 2377 3393 } 2378 3394 } 2379 3395 2380 - /* 2381 - * Directory entry creation below may acquire the AGF. Remove 2382 - * the whiteout from the unlinked list first to preserve correct 2383 - * AGI/AGF locking order. This dirties the transaction so failures 2384 - * after this point will abort and log recovery will clean up the 2385 - * mess. 2386 - * 2387 - * For whiteouts, we need to bump the link count on the whiteout 2388 - * inode. After this point, we have a real link, clear the tmpfile 2389 - * state flag from the inode so it doesn't accidentally get misused 2390 - * in future. 2391 - */ 2392 - if (wip) { 2393 - struct xfs_perag *pag; 2394 - 2395 - ASSERT(VFS_I(wip)->i_nlink == 0); 2396 - 2397 - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, wip->i_ino)); 2398 - error = xfs_iunlink_remove(tp, pag, wip); 2399 - xfs_perag_put(pag); 2400 - if (error) 2401 - goto out_trans_cancel; 2402 - 2403 - xfs_bumplink(tp, wip); 2404 - VFS_I(wip)->i_state &= ~I_LINKABLE; 2405 - } 2406 - 2407 - /* 2408 - * Set up the target. 2409 - */ 2410 - if (target_ip == NULL) { 2411 - /* 2412 - * If target does not exist and the rename crosses 2413 - * directories, adjust the target directory link count 2414 - * to account for the ".." reference from the new entry. 2415 - */ 2416 - error = xfs_dir_createname(tp, target_dp, target_name, 2417 - src_ip->i_ino, spaceres); 2418 - if (error) 2419 - goto out_trans_cancel; 2420 - 2421 - xfs_trans_ichgtime(tp, target_dp, 2422 - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2423 - 2424 - if (new_parent && src_is_directory) { 2425 - xfs_bumplink(tp, target_dp); 2426 - } 2427 - } else { /* target_ip != NULL */ 2428 - /* 2429 - * Link the source inode under the target name. 2430 - * If the source inode is a directory and we are moving 2431 - * it across directories, its ".." entry will be 2432 - * inconsistent until we replace that down below. 2433 - * 2434 - * In case there is already an entry with the same 2435 - * name at the destination directory, remove it first. 2436 - */ 2437 - error = xfs_dir_replace(tp, target_dp, target_name, 2438 - src_ip->i_ino, spaceres); 2439 - if (error) 2440 - goto out_trans_cancel; 2441 - 2442 - xfs_trans_ichgtime(tp, target_dp, 2443 - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2444 - 2445 - /* 2446 - * Decrement the link count on the target since the target 2447 - * dir no longer points to it. 2448 - */ 2449 - error = xfs_droplink(tp, target_ip); 2450 - if (error) 2451 - goto out_trans_cancel; 2452 - 2453 - if (src_is_directory) { 2454 - /* 2455 - * Drop the link from the old "." entry. 2456 - */ 2457 - error = xfs_droplink(tp, target_ip); 2458 - if (error) 2459 - goto out_trans_cancel; 2460 - } 2461 - } /* target_ip != NULL */ 2462 - 2463 - /* 2464 - * Remove the source. 2465 - */ 2466 - if (new_parent && src_is_directory) { 2467 - /* 2468 - * Rewrite the ".." entry to point to the new 2469 - * directory. 2470 - */ 2471 - error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 2472 - target_dp->i_ino, spaceres); 2473 - ASSERT(error != -EEXIST); 2474 - if (error) 2475 - goto out_trans_cancel; 2476 - } 2477 - 2478 - /* 2479 - * We always want to hit the ctime on the source inode. 2480 - * 2481 - * This isn't strictly required by the standards since the source 2482 - * inode isn't really being changed, but old unix file systems did 2483 - * it and some incremental backup programs won't work without it. 2484 - */ 2485 - xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 2486 - xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); 2487 - 2488 - /* 2489 - * Adjust the link count on src_dp. This is necessary when 2490 - * renaming a directory, either within one parent when 2491 - * the target existed, or across two parent directories. 2492 - */ 2493 - if (src_is_directory && (new_parent || target_ip != NULL)) { 2494 - 2495 - /* 2496 - * Decrement link count on src_directory since the 2497 - * entry that's moved no longer points to it. 2498 - */ 2499 - error = xfs_droplink(tp, src_dp); 2500 - if (error) 2501 - goto out_trans_cancel; 2502 - } 2503 - 2504 - /* 2505 - * For whiteouts, we only need to update the source dirent with the 2506 - * inode number of the whiteout inode rather than removing it 2507 - * altogether. 2508 - */ 2509 - if (wip) 2510 - error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, 2511 - spaceres); 2512 - else 2513 - error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 2514 - spaceres); 2515 - 3396 + error = xfs_dir_rename_children(tp, &du_src, &du_tgt, spaceres, 3397 + &du_wip); 2516 3398 if (error) 2517 3399 goto out_trans_cancel; 2518 3400 2519 - /* Schedule parent pointer updates. */ 2520 - if (wip_ppargs) { 2521 - error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name, 2522 - wip); 2523 - if (error) 2524 - goto out_trans_cancel; 3401 + if (du_wip.ip) { 3402 + /* 3403 + * Now we have a real link, clear the "I'm a tmpfile" state 3404 + * flag from the inode so it doesn't accidentally get misused in 3405 + * future. 3406 + */ 3407 + VFS_I(du_wip.ip)->i_state &= ~I_LINKABLE; 2525 3408 } 2526 3409 2527 - if (src_ppargs) { 2528 - error = xfs_parent_replacename(tp, src_ppargs, src_dp, 2529 - src_name, target_dp, target_name, src_ip); 2530 - if (error) 2531 - goto out_trans_cancel; 2532 - } 2533 - 2534 - if (tgt_ppargs) { 2535 - error = xfs_parent_removename(tp, tgt_ppargs, target_dp, 2536 - target_name, target_ip); 2537 - if (error) 2538 - goto out_trans_cancel; 2539 - } 2540 - 2541 - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2542 - xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 2543 - if (new_parent) 2544 - xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 2545 - 3410 + out_commit: 2546 3411 /* 2547 - * Inform our hook clients that we've finished a rename operation as 2548 - * follows: removed the source and target files from their directories; 2549 - * that we've added the source to the target directory; and finally 2550 - * that we've added the whiteout, if there was one. All inodes are 2551 - * locked, so it's ok to model a rename this way so long as we say we 2552 - * deleted entries before we add new ones. 3412 + * If this is a synchronous mount, make sure that the rename 3413 + * transaction goes to disk before returning to the user. 2553 3414 */ 2554 - if (target_ip) 2555 - xfs_dir_update_hook(target_dp, target_ip, -1, target_name); 2556 - xfs_dir_update_hook(src_dp, src_ip, -1, src_name); 2557 - xfs_dir_update_hook(target_dp, src_ip, 1, target_name); 2558 - if (wip) 2559 - xfs_dir_update_hook(src_dp, wip, 1, src_name); 3415 + if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp)) 3416 + xfs_trans_set_sync(tp); 2560 3417 2561 - error = xfs_finish_rename(tp); 3418 + error = xfs_trans_commit(tp); 2562 3419 nospace_error = 0; 2563 3420 goto out_unlock; 2564 3421 ··· 2408 3583 out_unlock: 2409 3584 xfs_iunlock_rename(inodes, num_inodes); 2410 3585 out_tgt_ppargs: 2411 - xfs_parent_finish(mp, tgt_ppargs); 3586 + xfs_parent_finish(mp, du_tgt.ppargs); 2412 3587 out_wip_ppargs: 2413 - xfs_parent_finish(mp, wip_ppargs); 3588 + xfs_parent_finish(mp, du_wip.ppargs); 2414 3589 out_src_ppargs: 2415 - xfs_parent_finish(mp, src_ppargs); 3590 + xfs_parent_finish(mp, du_src.ppargs); 2416 3591 out_release_wip: 2417 - if (wip) 2418 - xfs_irele(wip); 3592 + if (du_wip.ip) 3593 + xfs_irele(du_wip.ip); 2419 3594 if (error == -ENOSPC && nospace_error) 2420 3595 error = nospace_error; 2421 3596 return error; ··· 2555 3730 iip->ili_last_fields = iip->ili_fields; 2556 3731 iip->ili_fields = 0; 2557 3732 iip->ili_fsync_fields = 0; 3733 + set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags); 2558 3734 spin_unlock(&iip->ili_lock); 2559 3735 2560 3736 /* ··· 3118 4292 blocks = ip->i_mount->m_sb.sb_rextsize; 3119 4293 3120 4294 return XFS_FSB_TO_B(ip->i_mount, blocks); 4295 + } 4296 + 4297 + /* Should we always be using copy on write for file writes? */ 4298 + bool 4299 + xfs_is_always_cow_inode( 4300 + struct xfs_inode *ip) 4301 + { 4302 + return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); 3121 4303 }

+18 -52

fs/xfs/xfs_inode.h

··· 8 8 9 9 #include "xfs_inode_buf.h" 10 10 #include "xfs_inode_fork.h" 11 + #include "xfs_inode_util.h" 11 12 12 13 /* 13 14 * Kernel only inode definitions ··· 271 270 return ret; 272 271 } 273 272 274 - static inline prid_t 275 - xfs_get_initial_prid(struct xfs_inode *dp) 276 - { 277 - if (dp->i_diflags & XFS_DIFLAG_PROJINHERIT) 278 - return dp->i_projid; 279 - 280 - return XFS_PROJID_DEFAULT; 281 - } 282 - 283 273 static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) 284 274 { 285 275 return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; ··· 282 290 283 291 return ip == mp->m_rbmip || ip == mp->m_rsumip || 284 292 xfs_is_quota_inode(&mp->m_sb, ip->i_ino); 293 + } 294 + 295 + bool xfs_is_always_cow_inode(struct xfs_inode *ip); 296 + 297 + static inline bool xfs_is_cow_inode(struct xfs_inode *ip) 298 + { 299 + return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); 285 300 } 286 301 287 302 /* ··· 516 517 int xfs_inactive(struct xfs_inode *ip); 517 518 int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, 518 519 struct xfs_inode **ipp, struct xfs_name *ci_name); 519 - int xfs_create(struct mnt_idmap *idmap, 520 - struct xfs_inode *dp, struct xfs_name *name, 521 - umode_t mode, dev_t rdev, bool need_xattr, 522 - struct xfs_inode **ipp); 523 - int xfs_create_tmpfile(struct mnt_idmap *idmap, 524 - struct xfs_inode *dp, umode_t mode, bool init_xattrs, 520 + int xfs_create(const struct xfs_icreate_args *iargs, 521 + struct xfs_name *name, struct xfs_inode **ipp); 522 + int xfs_create_tmpfile(const struct xfs_icreate_args *iargs, 525 523 struct xfs_inode **ipp); 526 524 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 527 525 struct xfs_inode *ip); ··· 538 542 uint xfs_ilock_data_map_shared(struct xfs_inode *); 539 543 uint xfs_ilock_attr_map_shared(struct xfs_inode *); 540 544 541 - uint xfs_ip2xflags(struct xfs_inode *); 542 545 int xfs_ifree(struct xfs_trans *, struct xfs_inode *); 543 546 int xfs_itruncate_extents_flags(struct xfs_trans **, 544 547 struct xfs_inode *, int, xfs_fsize_t, int); ··· 551 556 void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, 552 557 struct xfs_inode *ip1, uint ip1_mode); 553 558 554 - xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); 555 - xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 556 - 557 - int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp, 558 - struct xfs_inode *pip, xfs_ino_t ino, umode_t mode, 559 - xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs, 560 - struct xfs_inode **ipp); 559 + int xfs_icreate(struct xfs_trans *tp, xfs_ino_t ino, 560 + const struct xfs_icreate_args *args, struct xfs_inode **ipp); 561 561 562 562 static inline int 563 563 xfs_itruncate_extents( ··· 606 616 607 617 bool xfs_inode_needs_inactive(struct xfs_inode *ip); 608 618 609 - int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip); 610 - int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, 611 - struct xfs_inode *ip); 612 619 struct xfs_inode *xfs_iunlink_lookup(struct xfs_perag *pag, xfs_agino_t agino); 620 + int xfs_iunlink_reload_next(struct xfs_trans *tp, struct xfs_buf *agibp, 621 + xfs_agino_t prev_agino, xfs_agino_t next_agino); 613 622 614 623 void xfs_end_io(struct work_struct *work); 615 624 616 625 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); 617 626 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); 618 627 void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); 619 - int xfs_droplink(struct xfs_trans *tp, struct xfs_inode *ip); 620 - void xfs_bumplink(struct xfs_trans *tp, struct xfs_inode *ip); 621 628 void xfs_lock_inodes(struct xfs_inode **ips, int inodes, uint lock_mode); 622 629 void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); 623 630 ··· 632 645 xfs_filblks_t *dblocks, xfs_filblks_t *rblocks); 633 646 unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip); 634 647 635 - struct xfs_dir_update_params { 636 - const struct xfs_inode *dp; 637 - const struct xfs_inode *ip; 638 - const struct xfs_name *name; 639 - int delta; 640 - }; 641 - 642 - #ifdef CONFIG_XFS_LIVE_HOOKS 643 - void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip, 644 - int delta, const struct xfs_name *name); 645 - 646 - struct xfs_dir_hook { 647 - struct xfs_hook dirent_hook; 648 - }; 649 - 650 - void xfs_dir_hook_disable(void); 651 - void xfs_dir_hook_enable(void); 652 - 653 - int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook); 654 - void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook); 655 - void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn); 656 - #else 657 - # define xfs_dir_update_hook(dp, ip, delta, name) ((void)0) 658 - #endif /* CONFIG_XFS_LIVE_HOOKS */ 648 + int xfs_icreate_dqalloc(const struct xfs_icreate_args *args, 649 + struct xfs_dquot **udqpp, struct xfs_dquot **gdqpp, 650 + struct xfs_dquot **pdqpp); 659 651 660 652 #endif /* __XFS_INODE_H__ */

+37 -1

fs/xfs/xfs_inode_item.c

··· 37 37 return INODE_ITEM(lip)->ili_inode->i_ino; 38 38 } 39 39 40 + #ifdef DEBUG_EXPENSIVE 41 + static void 42 + xfs_inode_item_precommit_check( 43 + struct xfs_inode *ip) 44 + { 45 + struct xfs_mount *mp = ip->i_mount; 46 + struct xfs_dinode *dip; 47 + xfs_failaddr_t fa; 48 + 49 + dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS); 50 + if (!dip) { 51 + ASSERT(dip != NULL); 52 + return; 53 + } 54 + 55 + xfs_inode_to_disk(ip, dip, 0); 56 + xfs_dinode_calc_crc(mp, dip); 57 + fa = xfs_dinode_verify(mp, ip->i_ino, dip); 58 + if (fa) { 59 + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, 60 + sizeof(*dip), fa); 61 + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 62 + ASSERT(fa == NULL); 63 + } 64 + kfree(dip); 65 + } 66 + #else 67 + # define xfs_inode_item_precommit_check(ip) ((void)0) 68 + #endif 69 + 40 70 /* 41 71 * Prior to finally logging the inode, we have to ensure that all the 42 72 * per-modification inode state changes are applied. This includes VFS inode ··· 198 168 */ 199 169 iip->ili_fields |= (flags | iip->ili_last_fields); 200 170 spin_unlock(&iip->ili_lock); 171 + 172 + xfs_inode_item_precommit_check(ip); 201 173 202 174 /* 203 175 * We are done with the log item transaction dirty state, so clear it so ··· 965 933 } 966 934 iip->ili_last_fields = 0; 967 935 iip->ili_flush_lsn = 0; 936 + clear_bit(XFS_LI_FLUSHING, &lip->li_flags); 968 937 spin_unlock(&iip->ili_lock); 969 938 xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING); 970 939 if (drop_buffer) ··· 1024 991 { 1025 992 struct xfs_log_item *lip; 1026 993 1027 - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) 994 + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 1028 995 set_bit(XFS_LI_FAILED, &lip->li_flags); 996 + clear_bit(XFS_LI_FLUSHING, &lip->li_flags); 997 + } 1029 998 } 1030 999 1031 1000 /* ··· 1046 1011 iip->ili_flush_lsn = 0; 1047 1012 iip->ili_item.li_buf = NULL; 1048 1013 list_del_init(&iip->ili_item.li_bio_list); 1014 + clear_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags); 1049 1015 } 1050 1016 1051 1017 /*

-60

fs/xfs/xfs_ioctl.c

··· 469 469 return 0; 470 470 } 471 471 472 - STATIC uint16_t 473 - xfs_flags2diflags( 474 - struct xfs_inode *ip, 475 - unsigned int xflags) 476 - { 477 - /* can't set PREALLOC this way, just preserve it */ 478 - uint16_t di_flags = 479 - (ip->i_diflags & XFS_DIFLAG_PREALLOC); 480 - 481 - if (xflags & FS_XFLAG_IMMUTABLE) 482 - di_flags |= XFS_DIFLAG_IMMUTABLE; 483 - if (xflags & FS_XFLAG_APPEND) 484 - di_flags |= XFS_DIFLAG_APPEND; 485 - if (xflags & FS_XFLAG_SYNC) 486 - di_flags |= XFS_DIFLAG_SYNC; 487 - if (xflags & FS_XFLAG_NOATIME) 488 - di_flags |= XFS_DIFLAG_NOATIME; 489 - if (xflags & FS_XFLAG_NODUMP) 490 - di_flags |= XFS_DIFLAG_NODUMP; 491 - if (xflags & FS_XFLAG_NODEFRAG) 492 - di_flags |= XFS_DIFLAG_NODEFRAG; 493 - if (xflags & FS_XFLAG_FILESTREAM) 494 - di_flags |= XFS_DIFLAG_FILESTREAM; 495 - if (S_ISDIR(VFS_I(ip)->i_mode)) { 496 - if (xflags & FS_XFLAG_RTINHERIT) 497 - di_flags |= XFS_DIFLAG_RTINHERIT; 498 - if (xflags & FS_XFLAG_NOSYMLINKS) 499 - di_flags |= XFS_DIFLAG_NOSYMLINKS; 500 - if (xflags & FS_XFLAG_EXTSZINHERIT) 501 - di_flags |= XFS_DIFLAG_EXTSZINHERIT; 502 - if (xflags & FS_XFLAG_PROJINHERIT) 503 - di_flags |= XFS_DIFLAG_PROJINHERIT; 504 - } else if (S_ISREG(VFS_I(ip)->i_mode)) { 505 - if (xflags & FS_XFLAG_REALTIME) 506 - di_flags |= XFS_DIFLAG_REALTIME; 507 - if (xflags & FS_XFLAG_EXTSIZE) 508 - di_flags |= XFS_DIFLAG_EXTSIZE; 509 - } 510 - 511 - return di_flags; 512 - } 513 - 514 - STATIC uint64_t 515 - xfs_flags2diflags2( 516 - struct xfs_inode *ip, 517 - unsigned int xflags) 518 - { 519 - uint64_t di_flags2 = 520 - (ip->i_diflags2 & (XFS_DIFLAG2_REFLINK | 521 - XFS_DIFLAG2_BIGTIME | 522 - XFS_DIFLAG2_NREXT64)); 523 - 524 - if (xflags & FS_XFLAG_DAX) 525 - di_flags2 |= XFS_DIFLAG2_DAX; 526 - if (xflags & FS_XFLAG_COWEXTSIZE) 527 - di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 528 - 529 - return di_flags2; 530 - } 531 - 532 472 static int 533 473 xfs_ioctl_setattr_xflags( 534 474 struct xfs_trans *tp,

+34 -37

fs/xfs/xfs_iomap.c

··· 717 717 return true; 718 718 } 719 719 720 + /* 721 + * Extents not yet cached requires exclusive access, don't block for 722 + * IOMAP_NOWAIT. 723 + * 724 + * This is basically an opencoded xfs_ilock_data_map_shared() call, but with 725 + * support for IOMAP_NOWAIT. 726 + */ 720 727 static int 721 728 xfs_ilock_for_iomap( 722 729 struct xfs_inode *ip, 723 730 unsigned flags, 724 731 unsigned *lockmode) 725 732 { 726 - unsigned int mode = *lockmode; 727 - bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); 728 - 729 - /* 730 - * COW writes may allocate delalloc space or convert unwritten COW 731 - * extents, so we need to make sure to take the lock exclusively here. 732 - */ 733 - if (xfs_is_cow_inode(ip) && is_write) 734 - mode = XFS_ILOCK_EXCL; 735 - 736 - /* 737 - * Extents not yet cached requires exclusive access, don't block. This 738 - * is an opencoded xfs_ilock_data_map_shared() call but with 739 - * non-blocking behaviour. 740 - */ 741 - if (xfs_need_iread_extents(&ip->i_df)) { 742 - if (flags & IOMAP_NOWAIT) 743 - return -EAGAIN; 744 - mode = XFS_ILOCK_EXCL; 745 - } 746 - 747 - relock: 748 733 if (flags & IOMAP_NOWAIT) { 749 - if (!xfs_ilock_nowait(ip, mode)) 734 + if (xfs_need_iread_extents(&ip->i_df)) 735 + return -EAGAIN; 736 + if (!xfs_ilock_nowait(ip, *lockmode)) 750 737 return -EAGAIN; 751 738 } else { 752 - xfs_ilock(ip, mode); 739 + if (xfs_need_iread_extents(&ip->i_df)) 740 + *lockmode = XFS_ILOCK_EXCL; 741 + xfs_ilock(ip, *lockmode); 753 742 } 754 743 755 - /* 756 - * The reflink iflag could have changed since the earlier unlocked 757 - * check, so if we got ILOCK_SHARED for a write and but we're now a 758 - * reflink inode we have to switch to ILOCK_EXCL and relock. 759 - */ 760 - if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) { 761 - xfs_iunlock(ip, mode); 762 - mode = XFS_ILOCK_EXCL; 763 - goto relock; 764 - } 765 - 766 - *lockmode = mode; 767 744 return 0; 768 745 } 769 746 ··· 778 801 int nimaps = 1, error = 0; 779 802 bool shared = false; 780 803 u16 iomap_flags = 0; 781 - unsigned int lockmode = XFS_ILOCK_SHARED; 804 + unsigned int lockmode; 782 805 u64 seq; 783 806 784 807 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); ··· 794 817 if (offset + length > i_size_read(inode)) 795 818 iomap_flags |= IOMAP_F_DIRTY; 796 819 820 + /* 821 + * COW writes may allocate delalloc space or convert unwritten COW 822 + * extents, so we need to make sure to take the lock exclusively here. 823 + */ 824 + if (xfs_is_cow_inode(ip)) 825 + lockmode = XFS_ILOCK_EXCL; 826 + else 827 + lockmode = XFS_ILOCK_SHARED; 828 + 829 + relock: 797 830 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 798 831 if (error) 799 832 return error; 833 + 834 + /* 835 + * The reflink iflag could have changed since the earlier unlocked 836 + * check, check if it again and relock if needed. 837 + */ 838 + if (xfs_is_cow_inode(ip) && lockmode == XFS_ILOCK_SHARED) { 839 + xfs_iunlock(ip, lockmode); 840 + lockmode = XFS_ILOCK_EXCL; 841 + goto relock; 842 + } 800 843 801 844 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 802 845 &nimaps, 0);

+28 -23

fs/xfs/xfs_iops.c

··· 28 28 #include "xfs_ioctl.h" 29 29 #include "xfs_xattr.h" 30 30 #include "xfs_file.h" 31 + #include "xfs_bmap.h" 31 32 32 33 #include <linux/posix_acl.h> 33 34 #include <linux/security.h> ··· 160 159 if (dir->i_sb->s_security) 161 160 return true; 162 161 #endif 163 - if (xfs_has_parent(XFS_I(dir)->i_mount)) 164 - return true; 165 162 return false; 166 163 } 167 164 ··· 173 174 dev_t rdev, 174 175 struct file *tmpfile) /* unnamed file */ 175 176 { 176 - struct inode *inode; 177 - struct xfs_inode *ip = NULL; 178 - struct posix_acl *default_acl, *acl; 179 - struct xfs_name name; 180 - int error; 177 + struct xfs_icreate_args args = { 178 + .idmap = idmap, 179 + .pip = XFS_I(dir), 180 + .rdev = rdev, 181 + .mode = mode, 182 + }; 183 + struct inode *inode; 184 + struct xfs_inode *ip = NULL; 185 + struct posix_acl *default_acl, *acl; 186 + struct xfs_name name; 187 + int error; 181 188 182 189 /* 183 190 * Irix uses Missed'em'V split, but doesn't want to see 184 191 * the upper 5 bits of (14bit) major. 185 192 */ 186 - if (S_ISCHR(mode) || S_ISBLK(mode)) { 187 - if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 193 + if (S_ISCHR(args.mode) || S_ISBLK(args.mode)) { 194 + if (unlikely(!sysv_valid_dev(args.rdev) || 195 + MAJOR(args.rdev) & ~0x1ff)) 188 196 return -EINVAL; 189 197 } else { 190 - rdev = 0; 198 + args.rdev = 0; 191 199 } 192 200 193 - error = posix_acl_create(dir, &mode, &default_acl, &acl); 201 + error = posix_acl_create(dir, &args.mode, &default_acl, &acl); 194 202 if (error) 195 203 return error; 196 204 197 205 /* Verify mode is valid also for tmpfile case */ 198 - error = xfs_dentry_mode_to_name(&name, dentry, mode); 206 + error = xfs_dentry_mode_to_name(&name, dentry, args.mode); 199 207 if (unlikely(error)) 200 208 goto out_free_acl; 201 209 202 210 if (!tmpfile) { 203 - error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev, 204 - xfs_create_need_xattr(dir, default_acl, acl), 205 - &ip); 211 + if (xfs_create_need_xattr(dir, default_acl, acl)) 212 + args.flags |= XFS_ICREATE_INIT_XATTRS; 213 + 214 + error = xfs_create(&args, &name, &ip); 206 215 } else { 207 - bool init_xattrs = false; 216 + args.flags |= XFS_ICREATE_TMPFILE; 208 217 209 218 /* 210 - * If this temporary file will be linkable, set up the file 211 - * with an attr fork to receive a parent pointer. 219 + * If this temporary file will not be linkable, don't bother 220 + * creating an attr fork to receive a parent pointer. 212 221 */ 213 - if (!(tmpfile->f_flags & O_EXCL) && 214 - xfs_has_parent(XFS_I(dir)->i_mount)) 215 - init_xattrs = true; 222 + if (tmpfile->f_flags & O_EXCL) 223 + args.flags |= XFS_ICREATE_UNLINKABLE; 216 224 217 - error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, 218 - init_xattrs, &ip); 225 + error = xfs_create_tmpfile(&args, &ip); 219 226 } 220 227 if (unlikely(error)) 221 228 goto out_free_acl;

-2

fs/xfs/xfs_linux.h

··· 135 135 */ 136 136 #define __this_address ({ __label__ __here; __here: barrier(); &&__here; }) 137 137 138 - #define XFS_PROJID_DEFAULT 0 139 - 140 138 #define howmany(x, y) (((x)+((y)-1))/(y)) 141 139 142 140 static inline void delay(long ticks)

+120 -391

fs/xfs/xfs_log.c

··· 30 30 struct xfs_buftarg *log_target, 31 31 xfs_daddr_t blk_offset, 32 32 int num_bblks); 33 - STATIC int 34 - xlog_space_left( 35 - struct xlog *log, 36 - atomic64_t *head); 37 33 STATIC void 38 34 xlog_dealloc_log( 39 35 struct xlog *log); ··· 47 51 struct xlog_ticket *ticket, 48 52 int *logoffsetp); 49 53 STATIC void 50 - xlog_grant_push_ail( 51 - struct xlog *log, 52 - int need_bytes); 53 - STATIC void 54 54 xlog_sync( 55 55 struct xlog *log, 56 56 struct xlog_in_core *iclog, 57 57 struct xlog_ticket *ticket); 58 58 #if defined(DEBUG) 59 - STATIC void 60 - xlog_verify_grant_tail( 61 - struct xlog *log); 62 59 STATIC void 63 60 xlog_verify_iclog( 64 61 struct xlog *log, ··· 62 73 struct xlog *log, 63 74 struct xlog_in_core *iclog); 64 75 #else 65 - #define xlog_verify_grant_tail(a) 66 76 #define xlog_verify_iclog(a,b,c) 67 77 #define xlog_verify_tail_lsn(a,b) 68 78 #endif ··· 129 141 return buf; 130 142 } 131 143 132 - static void 144 + static inline void 133 145 xlog_grant_sub_space( 134 - struct xlog *log, 135 - atomic64_t *head, 136 - int bytes) 146 + struct xlog_grant_head *head, 147 + int64_t bytes) 137 148 { 138 - int64_t head_val = atomic64_read(head); 139 - int64_t new, old; 149 + atomic64_sub(bytes, &head->grant); 150 + } 140 151 141 - do { 142 - int cycle, space; 143 - 144 - xlog_crack_grant_head_val(head_val, &cycle, &space); 145 - 146 - space -= bytes; 147 - if (space < 0) { 148 - space += log->l_logsize; 149 - cycle--; 150 - } 151 - 152 - old = head_val; 153 - new = xlog_assign_grant_head_val(cycle, space); 154 - head_val = atomic64_cmpxchg(head, old, new); 155 - } while (head_val != old); 152 + static inline void 153 + xlog_grant_add_space( 154 + struct xlog_grant_head *head, 155 + int64_t bytes) 156 + { 157 + atomic64_add(bytes, &head->grant); 156 158 } 157 159 158 160 static void 159 - xlog_grant_add_space( 160 - struct xlog *log, 161 - atomic64_t *head, 162 - int bytes) 163 - { 164 - int64_t head_val = atomic64_read(head); 165 - int64_t new, old; 166 - 167 - do { 168 - int tmp; 169 - int cycle, space; 170 - 171 - xlog_crack_grant_head_val(head_val, &cycle, &space); 172 - 173 - tmp = log->l_logsize - space; 174 - if (tmp > bytes) 175 - space += bytes; 176 - else { 177 - space = bytes - tmp; 178 - cycle++; 179 - } 180 - 181 - old = head_val; 182 - new = xlog_assign_grant_head_val(cycle, space); 183 - head_val = atomic64_cmpxchg(head, old, new); 184 - } while (head_val != old); 185 - } 186 - 187 - STATIC void 188 161 xlog_grant_head_init( 189 162 struct xlog_grant_head *head) 190 163 { 191 - xlog_assign_grant_head(&head->grant, 1, 0); 164 + atomic64_set(&head->grant, 0); 192 165 INIT_LIST_HEAD(&head->waiters); 193 166 spin_lock_init(&head->lock); 167 + } 168 + 169 + void 170 + xlog_grant_return_space( 171 + struct xlog *log, 172 + xfs_lsn_t old_head, 173 + xfs_lsn_t new_head) 174 + { 175 + int64_t diff = xlog_lsn_sub(log, new_head, old_head); 176 + 177 + xlog_grant_sub_space(&log->l_reserve_head, diff); 178 + xlog_grant_sub_space(&log->l_write_head, diff); 179 + } 180 + 181 + /* 182 + * Return the space in the log between the tail and the head. In the case where 183 + * we have overrun available reservation space, return 0. The memory barrier 184 + * pairs with the smp_wmb() in xlog_cil_ail_insert() to ensure that grant head 185 + * vs tail space updates are seen in the correct order and hence avoid 186 + * transients as space is transferred from the grant heads to the AIL on commit 187 + * completion. 188 + */ 189 + static uint64_t 190 + xlog_grant_space_left( 191 + struct xlog *log, 192 + struct xlog_grant_head *head) 193 + { 194 + int64_t free_bytes; 195 + 196 + smp_rmb(); /* paired with smp_wmb in xlog_cil_ail_insert() */ 197 + free_bytes = log->l_logsize - READ_ONCE(log->l_tail_space) - 198 + atomic64_read(&head->grant); 199 + if (free_bytes > 0) 200 + return free_bytes; 201 + return 0; 194 202 } 195 203 196 204 STATIC void ··· 226 242 { 227 243 struct xlog_ticket *tic; 228 244 int need_bytes; 229 - bool woken_task = false; 230 245 231 246 list_for_each_entry(tic, &head->waiters, t_queue) { 232 - 233 - /* 234 - * There is a chance that the size of the CIL checkpoints in 235 - * progress at the last AIL push target calculation resulted in 236 - * limiting the target to the log head (l_last_sync_lsn) at the 237 - * time. This may not reflect where the log head is now as the 238 - * CIL checkpoints may have completed. 239 - * 240 - * Hence when we are woken here, it may be that the head of the 241 - * log that has moved rather than the tail. As the tail didn't 242 - * move, there still won't be space available for the 243 - * reservation we require. However, if the AIL has already 244 - * pushed to the target defined by the old log head location, we 245 - * will hang here waiting for something else to update the AIL 246 - * push target. 247 - * 248 - * Therefore, if there isn't space to wake the first waiter on 249 - * the grant head, we need to push the AIL again to ensure the 250 - * target reflects both the current log tail and log head 251 - * position before we wait for the tail to move again. 252 - */ 253 - 254 247 need_bytes = xlog_ticket_reservation(log, head, tic); 255 - if (*free_bytes < need_bytes) { 256 - if (!woken_task) 257 - xlog_grant_push_ail(log, need_bytes); 248 + if (*free_bytes < need_bytes) 258 249 return false; 259 - } 260 250 261 251 *free_bytes -= need_bytes; 262 252 trace_xfs_log_grant_wake_up(log, tic); 263 253 wake_up_process(tic->t_task); 264 - woken_task = true; 265 254 } 266 255 267 256 return true; ··· 253 296 do { 254 297 if (xlog_is_shutdown(log)) 255 298 goto shutdown; 256 - xlog_grant_push_ail(log, need_bytes); 257 299 258 300 __set_current_state(TASK_UNINTERRUPTIBLE); 259 301 spin_unlock(&head->lock); 260 302 261 303 XFS_STATS_INC(log->l_mp, xs_sleep_logspace); 304 + 305 + /* Push on the AIL to free up all the log space. */ 306 + xfs_ail_push_all(log->l_ailp); 262 307 263 308 trace_xfs_log_grant_sleep(log, tic); 264 309 schedule(); ··· 269 310 spin_lock(&head->lock); 270 311 if (xlog_is_shutdown(log)) 271 312 goto shutdown; 272 - } while (xlog_space_left(log, &head->grant) < need_bytes); 313 + } while (xlog_grant_space_left(log, head) < need_bytes); 273 314 274 315 list_del_init(&tic->t_queue); 275 316 return 0; ··· 314 355 * otherwise try to get some space for this transaction. 315 356 */ 316 357 *need_bytes = xlog_ticket_reservation(log, head, tic); 317 - free_bytes = xlog_space_left(log, &head->grant); 358 + free_bytes = xlog_grant_space_left(log, head); 318 359 if (!list_empty_careful(&head->waiters)) { 319 360 spin_lock(&head->lock); 320 361 if (!xlog_grant_head_wake(log, head, &free_bytes) || ··· 377 418 * of rolling transactions in the log easily. 378 419 */ 379 420 tic->t_tid++; 380 - 381 - xlog_grant_push_ail(log, tic->t_unit_res); 382 - 383 421 tic->t_curr_res = tic->t_unit_res; 384 422 if (tic->t_cnt > 0) 385 423 return 0; ··· 388 432 if (error) 389 433 goto out_error; 390 434 391 - xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); 435 + xlog_grant_add_space(&log->l_write_head, need_bytes); 392 436 trace_xfs_log_regrant_exit(log, tic); 393 - xlog_verify_grant_tail(log); 394 437 return 0; 395 438 396 439 out_error: ··· 432 477 ASSERT(*ticp == NULL); 433 478 tic = xlog_ticket_alloc(log, unit_bytes, cnt, permanent); 434 479 *ticp = tic; 435 - 436 - xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt 437 - : tic->t_unit_res); 438 - 439 480 trace_xfs_log_reserve(log, tic); 440 - 441 481 error = xlog_grant_head_check(log, &log->l_reserve_head, tic, 442 482 &need_bytes); 443 483 if (error) 444 484 goto out_error; 445 485 446 - xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes); 447 - xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); 486 + xlog_grant_add_space(&log->l_reserve_head, need_bytes); 487 + xlog_grant_add_space(&log->l_write_head, need_bytes); 448 488 trace_xfs_log_reserve_exit(log, tic); 449 - xlog_verify_grant_tail(log); 450 489 return 0; 451 490 452 491 out_error: ··· 520 571 struct xlog_in_core *iclog, 521 572 struct xlog_ticket *ticket) 522 573 { 523 - xfs_lsn_t tail_lsn; 524 574 bool last_ref; 525 575 526 576 lockdep_assert_held(&log->l_icloglock); ··· 534 586 if ((iclog->ic_state == XLOG_STATE_WANT_SYNC || 535 587 (iclog->ic_flags & XLOG_ICL_NEED_FUA)) && 536 588 !iclog->ic_header.h_tail_lsn) { 537 - tail_lsn = xlog_assign_tail_lsn(log->l_mp); 538 - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); 589 + iclog->ic_header.h_tail_lsn = 590 + cpu_to_be64(atomic64_read(&log->l_tail_lsn)); 539 591 } 540 592 541 593 last_ref = atomic_dec_and_test(&iclog->ic_refcnt); ··· 1097 1149 ASSERT(!xlog_in_recovery(log)); 1098 1150 1099 1151 spin_lock(&log->l_write_head.lock); 1100 - free_bytes = xlog_space_left(log, &log->l_write_head.grant); 1152 + free_bytes = xlog_grant_space_left(log, &log->l_write_head); 1101 1153 xlog_grant_head_wake(log, &log->l_write_head, &free_bytes); 1102 1154 spin_unlock(&log->l_write_head.lock); 1103 1155 } ··· 1106 1158 ASSERT(!xlog_in_recovery(log)); 1107 1159 1108 1160 spin_lock(&log->l_reserve_head.lock); 1109 - free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); 1161 + free_bytes = xlog_grant_space_left(log, &log->l_reserve_head); 1110 1162 xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes); 1111 1163 spin_unlock(&log->l_reserve_head.lock); 1112 1164 } ··· 1219 1271 1220 1272 return error; 1221 1273 } 1222 - 1223 - /* 1224 - * We may be holding the log iclog lock upon entering this routine. 1225 - */ 1226 - xfs_lsn_t 1227 - xlog_assign_tail_lsn_locked( 1228 - struct xfs_mount *mp) 1229 - { 1230 - struct xlog *log = mp->m_log; 1231 - struct xfs_log_item *lip; 1232 - xfs_lsn_t tail_lsn; 1233 - 1234 - assert_spin_locked(&mp->m_ail->ail_lock); 1235 - 1236 - /* 1237 - * To make sure we always have a valid LSN for the log tail we keep 1238 - * track of the last LSN which was committed in log->l_last_sync_lsn, 1239 - * and use that when the AIL was empty. 1240 - */ 1241 - lip = xfs_ail_min(mp->m_ail); 1242 - if (lip) 1243 - tail_lsn = lip->li_lsn; 1244 - else 1245 - tail_lsn = atomic64_read(&log->l_last_sync_lsn); 1246 - trace_xfs_log_assign_tail_lsn(log, tail_lsn); 1247 - atomic64_set(&log->l_tail_lsn, tail_lsn); 1248 - return tail_lsn; 1249 - } 1250 - 1251 - xfs_lsn_t 1252 - xlog_assign_tail_lsn( 1253 - struct xfs_mount *mp) 1254 - { 1255 - xfs_lsn_t tail_lsn; 1256 - 1257 - spin_lock(&mp->m_ail->ail_lock); 1258 - tail_lsn = xlog_assign_tail_lsn_locked(mp); 1259 - spin_unlock(&mp->m_ail->ail_lock); 1260 - 1261 - return tail_lsn; 1262 - } 1263 - 1264 - /* 1265 - * Return the space in the log between the tail and the head. The head 1266 - * is passed in the cycle/bytes formal parms. In the special case where 1267 - * the reserve head has wrapped passed the tail, this calculation is no 1268 - * longer valid. In this case, just return 0 which means there is no space 1269 - * in the log. This works for all places where this function is called 1270 - * with the reserve head. Of course, if the write head were to ever 1271 - * wrap the tail, we should blow up. Rather than catch this case here, 1272 - * we depend on other ASSERTions in other parts of the code. XXXmiken 1273 - * 1274 - * If reservation head is behind the tail, we have a problem. Warn about it, 1275 - * but then treat it as if the log is empty. 1276 - * 1277 - * If the log is shut down, the head and tail may be invalid or out of whack, so 1278 - * shortcut invalidity asserts in this case so that we don't trigger them 1279 - * falsely. 1280 - */ 1281 - STATIC int 1282 - xlog_space_left( 1283 - struct xlog *log, 1284 - atomic64_t *head) 1285 - { 1286 - int tail_bytes; 1287 - int tail_cycle; 1288 - int head_cycle; 1289 - int head_bytes; 1290 - 1291 - xlog_crack_grant_head(head, &head_cycle, &head_bytes); 1292 - xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_bytes); 1293 - tail_bytes = BBTOB(tail_bytes); 1294 - if (tail_cycle == head_cycle && head_bytes >= tail_bytes) 1295 - return log->l_logsize - (head_bytes - tail_bytes); 1296 - if (tail_cycle + 1 < head_cycle) 1297 - return 0; 1298 - 1299 - /* Ignore potential inconsistency when shutdown. */ 1300 - if (xlog_is_shutdown(log)) 1301 - return log->l_logsize; 1302 - 1303 - if (tail_cycle < head_cycle) { 1304 - ASSERT(tail_cycle == (head_cycle - 1)); 1305 - return tail_bytes - head_bytes; 1306 - } 1307 - 1308 - /* 1309 - * The reservation head is behind the tail. In this case we just want to 1310 - * return the size of the log as the amount of space left. 1311 - */ 1312 - xfs_alert(log->l_mp, "xlog_space_left: head behind tail"); 1313 - xfs_alert(log->l_mp, " tail_cycle = %d, tail_bytes = %d", 1314 - tail_cycle, tail_bytes); 1315 - xfs_alert(log->l_mp, " GH cycle = %d, GH bytes = %d", 1316 - head_cycle, head_bytes); 1317 - ASSERT(0); 1318 - return log->l_logsize; 1319 - } 1320 - 1321 1274 1322 1275 static void 1323 1276 xlog_ioend_work( ··· 1392 1543 log->l_prev_block = -1; 1393 1544 /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ 1394 1545 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); 1395 - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); 1396 1546 log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ 1397 1547 1398 1548 if (xfs_has_logv2(mp) && mp->m_sb.sb_logsunit > 1) ··· 1514 1666 out: 1515 1667 return ERR_PTR(error); 1516 1668 } /* xlog_alloc_log */ 1517 - 1518 - /* 1519 - * Compute the LSN that we'd need to push the log tail towards in order to have 1520 - * (a) enough on-disk log space to log the number of bytes specified, (b) at 1521 - * least 25% of the log space free, and (c) at least 256 blocks free. If the 1522 - * log free space already meets all three thresholds, this function returns 1523 - * NULLCOMMITLSN. 1524 - */ 1525 - xfs_lsn_t 1526 - xlog_grant_push_threshold( 1527 - struct xlog *log, 1528 - int need_bytes) 1529 - { 1530 - xfs_lsn_t threshold_lsn = 0; 1531 - xfs_lsn_t last_sync_lsn; 1532 - int free_blocks; 1533 - int free_bytes; 1534 - int threshold_block; 1535 - int threshold_cycle; 1536 - int free_threshold; 1537 - 1538 - ASSERT(BTOBB(need_bytes) < log->l_logBBsize); 1539 - 1540 - free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); 1541 - free_blocks = BTOBBT(free_bytes); 1542 - 1543 - /* 1544 - * Set the threshold for the minimum number of free blocks in the 1545 - * log to the maximum of what the caller needs, one quarter of the 1546 - * log, and 256 blocks. 1547 - */ 1548 - free_threshold = BTOBB(need_bytes); 1549 - free_threshold = max(free_threshold, (log->l_logBBsize >> 2)); 1550 - free_threshold = max(free_threshold, 256); 1551 - if (free_blocks >= free_threshold) 1552 - return NULLCOMMITLSN; 1553 - 1554 - xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, 1555 - &threshold_block); 1556 - threshold_block += free_threshold; 1557 - if (threshold_block >= log->l_logBBsize) { 1558 - threshold_block -= log->l_logBBsize; 1559 - threshold_cycle += 1; 1560 - } 1561 - threshold_lsn = xlog_assign_lsn(threshold_cycle, 1562 - threshold_block); 1563 - /* 1564 - * Don't pass in an lsn greater than the lsn of the last 1565 - * log record known to be on disk. Use a snapshot of the last sync lsn 1566 - * so that it doesn't change between the compare and the set. 1567 - */ 1568 - last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); 1569 - if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) 1570 - threshold_lsn = last_sync_lsn; 1571 - 1572 - return threshold_lsn; 1573 - } 1574 - 1575 - /* 1576 - * Push the tail of the log if we need to do so to maintain the free log space 1577 - * thresholds set out by xlog_grant_push_threshold. We may need to adopt a 1578 - * policy which pushes on an lsn which is further along in the log once we 1579 - * reach the high water mark. In this manner, we would be creating a low water 1580 - * mark. 1581 - */ 1582 - STATIC void 1583 - xlog_grant_push_ail( 1584 - struct xlog *log, 1585 - int need_bytes) 1586 - { 1587 - xfs_lsn_t threshold_lsn; 1588 - 1589 - threshold_lsn = xlog_grant_push_threshold(log, need_bytes); 1590 - if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log)) 1591 - return; 1592 - 1593 - /* 1594 - * Get the transaction layer to kick the dirty buffers out to 1595 - * disk asynchronously. No point in trying to do this if 1596 - * the filesystem is shutting down. 1597 - */ 1598 - xfs_ail_push(log->l_ailp, threshold_lsn); 1599 - } 1600 1669 1601 1670 /* 1602 1671 * Stamp cycle number in every block ··· 1813 2048 if (ticket) { 1814 2049 ticket->t_curr_res -= roundoff; 1815 2050 } else { 1816 - xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); 1817 - xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); 2051 + xlog_grant_add_space(&log->l_reserve_head, roundoff); 2052 + xlog_grant_add_space(&log->l_write_head, roundoff); 1818 2053 } 1819 2054 1820 2055 /* put cycle number in every block */ ··· 2440 2675 } 2441 2676 2442 2677 /* 2443 - * Completion of a iclog IO does not imply that a transaction has completed, as 2444 - * transactions can be large enough to span many iclogs. We cannot change the 2445 - * tail of the log half way through a transaction as this may be the only 2446 - * transaction in the log and moving the tail to point to the middle of it 2447 - * will prevent recovery from finding the start of the transaction. Hence we 2448 - * should only update the last_sync_lsn if this iclog contains transaction 2449 - * completion callbacks on it. 2450 - * 2451 - * We have to do this before we drop the icloglock to ensure we are the only one 2452 - * that can update it. 2453 - * 2454 - * If we are moving the last_sync_lsn forwards, we also need to ensure we kick 2455 - * the reservation grant head pushing. This is due to the fact that the push 2456 - * target is bound by the current last_sync_lsn value. Hence if we have a large 2457 - * amount of log space bound up in this committing transaction then the 2458 - * last_sync_lsn value may be the limiting factor preventing tail pushing from 2459 - * freeing space in the log. Hence once we've updated the last_sync_lsn we 2460 - * should push the AIL to ensure the push target (and hence the grant head) is 2461 - * no longer bound by the old log head location and can move forwards and make 2462 - * progress again. 2463 - */ 2464 - static void 2465 - xlog_state_set_callback( 2466 - struct xlog *log, 2467 - struct xlog_in_core *iclog, 2468 - xfs_lsn_t header_lsn) 2469 - { 2470 - trace_xlog_iclog_callback(iclog, _RET_IP_); 2471 - iclog->ic_state = XLOG_STATE_CALLBACK; 2472 - 2473 - ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2474 - header_lsn) <= 0); 2475 - 2476 - if (list_empty_careful(&iclog->ic_callbacks)) 2477 - return; 2478 - 2479 - atomic64_set(&log->l_last_sync_lsn, header_lsn); 2480 - xlog_grant_push_ail(log, 0); 2481 - } 2482 - 2483 - /* 2484 2678 * Return true if we need to stop processing, false to continue to the next 2485 2679 * iclog. The caller will need to run callbacks if the iclog is returned in the 2486 2680 * XLOG_STATE_CALLBACK state. ··· 2470 2746 lowest_lsn = xlog_get_lowest_lsn(log); 2471 2747 if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) 2472 2748 return false; 2473 - xlog_state_set_callback(log, iclog, header_lsn); 2749 + /* 2750 + * If there are no callbacks on this iclog, we can mark it clean 2751 + * immediately and return. Otherwise we need to run the 2752 + * callbacks. 2753 + */ 2754 + if (list_empty(&iclog->ic_callbacks)) { 2755 + xlog_state_clean_iclog(log, iclog); 2756 + return false; 2757 + } 2758 + trace_xlog_iclog_callback(iclog, _RET_IP_); 2759 + iclog->ic_state = XLOG_STATE_CALLBACK; 2474 2760 return false; 2475 2761 default: 2476 2762 /* ··· 2734 3000 if (ticket->t_cnt > 0) 2735 3001 ticket->t_cnt--; 2736 3002 2737 - xlog_grant_sub_space(log, &log->l_reserve_head.grant, 2738 - ticket->t_curr_res); 2739 - xlog_grant_sub_space(log, &log->l_write_head.grant, 2740 - ticket->t_curr_res); 3003 + xlog_grant_sub_space(&log->l_reserve_head, ticket->t_curr_res); 3004 + xlog_grant_sub_space(&log->l_write_head, ticket->t_curr_res); 2741 3005 ticket->t_curr_res = ticket->t_unit_res; 2742 3006 2743 3007 trace_xfs_log_ticket_regrant_sub(log, ticket); 2744 3008 2745 3009 /* just return if we still have some of the pre-reserved space */ 2746 3010 if (!ticket->t_cnt) { 2747 - xlog_grant_add_space(log, &log->l_reserve_head.grant, 2748 - ticket->t_unit_res); 3011 + xlog_grant_add_space(&log->l_reserve_head, ticket->t_unit_res); 2749 3012 trace_xfs_log_ticket_regrant_exit(log, ticket); 2750 3013 2751 3014 ticket->t_curr_res = ticket->t_unit_res; ··· 2789 3058 bytes += ticket->t_unit_res*ticket->t_cnt; 2790 3059 } 2791 3060 2792 - xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); 2793 - xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); 3061 + xlog_grant_sub_space(&log->l_reserve_head, bytes); 3062 + xlog_grant_sub_space(&log->l_write_head, bytes); 2794 3063 2795 3064 trace_xfs_log_ticket_ungrant_exit(log, ticket); 2796 3065 ··· 3263 3532 } 3264 3533 3265 3534 #if defined(DEBUG) 3266 - /* 3267 - * Check to make sure the grant write head didn't just over lap the tail. If 3268 - * the cycles are the same, we can't be overlapping. Otherwise, make sure that 3269 - * the cycles differ by exactly one and check the byte count. 3270 - * 3271 - * This check is run unlocked, so can give false positives. Rather than assert 3272 - * on failures, use a warn-once flag and a panic tag to allow the admin to 3273 - * determine if they want to panic the machine when such an error occurs. For 3274 - * debug kernels this will have the same effect as using an assert but, unlinke 3275 - * an assert, it can be turned off at runtime. 3276 - */ 3277 - STATIC void 3278 - xlog_verify_grant_tail( 3279 - struct xlog *log) 3535 + static void 3536 + xlog_verify_dump_tail( 3537 + struct xlog *log, 3538 + struct xlog_in_core *iclog) 3280 3539 { 3281 - int tail_cycle, tail_blocks; 3282 - int cycle, space; 3283 - 3284 - xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space); 3285 - xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3286 - if (tail_cycle != cycle) { 3287 - if (cycle - 1 != tail_cycle && 3288 - !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { 3289 - xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 3290 - "%s: cycle - 1 != tail_cycle", __func__); 3291 - } 3292 - 3293 - if (space > BBTOB(tail_blocks) && 3294 - !test_and_set_bit(XLOG_TAIL_WARN, &log->l_opstate)) { 3295 - xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 3296 - "%s: space > BBTOB(tail_blocks)", __func__); 3297 - } 3298 - } 3540 + xfs_alert(log->l_mp, 3541 + "ran out of log space tail 0x%llx/0x%llx, head lsn 0x%llx, head 0x%x/0x%x, prev head 0x%x/0x%x", 3542 + iclog ? be64_to_cpu(iclog->ic_header.h_tail_lsn) : -1, 3543 + atomic64_read(&log->l_tail_lsn), 3544 + log->l_ailp->ail_head_lsn, 3545 + log->l_curr_cycle, log->l_curr_block, 3546 + log->l_prev_cycle, log->l_prev_block); 3547 + xfs_alert(log->l_mp, 3548 + "write grant 0x%llx, reserve grant 0x%llx, tail_space 0x%llx, size 0x%x, iclog flags 0x%x", 3549 + atomic64_read(&log->l_write_head.grant), 3550 + atomic64_read(&log->l_reserve_head.grant), 3551 + log->l_tail_space, log->l_logsize, 3552 + iclog ? iclog->ic_flags : -1); 3299 3553 } 3300 3554 3301 - /* check if it will fit */ 3555 + /* Check if the new iclog will fit in the log. */ 3302 3556 STATIC void 3303 3557 xlog_verify_tail_lsn( 3304 3558 struct xlog *log, ··· 3292 3576 xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); 3293 3577 int blocks; 3294 3578 3295 - if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { 3296 - blocks = 3297 - log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); 3298 - if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) 3299 - xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); 3300 - } else { 3301 - ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); 3579 + if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { 3580 + blocks = log->l_logBBsize - 3581 + (log->l_prev_block - BLOCK_LSN(tail_lsn)); 3582 + if (blocks < BTOBB(iclog->ic_offset) + 3583 + BTOBB(log->l_iclog_hsize)) { 3584 + xfs_emerg(log->l_mp, 3585 + "%s: ran out of log space", __func__); 3586 + xlog_verify_dump_tail(log, iclog); 3587 + } 3588 + return; 3589 + } 3302 3590 3303 - if (BLOCK_LSN(tail_lsn) == log->l_prev_block) 3591 + if (CYCLE_LSN(tail_lsn) + 1 != log->l_prev_cycle) { 3592 + xfs_emerg(log->l_mp, "%s: head has wrapped tail.", __func__); 3593 + xlog_verify_dump_tail(log, iclog); 3594 + return; 3595 + } 3596 + if (BLOCK_LSN(tail_lsn) == log->l_prev_block) { 3304 3597 xfs_emerg(log->l_mp, "%s: tail wrapped", __func__); 3598 + xlog_verify_dump_tail(log, iclog); 3599 + return; 3600 + } 3305 3601 3306 3602 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; 3307 - if (blocks < BTOBB(iclog->ic_offset) + 1) 3308 - xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); 3309 - } 3603 + if (blocks < BTOBB(iclog->ic_offset) + 1) { 3604 + xfs_emerg(log->l_mp, "%s: ran out of iclog space", __func__); 3605 + xlog_verify_dump_tail(log, iclog); 3606 + } 3310 3607 } 3311 3608 3312 3609 /*

-1

fs/xfs/xfs_log.h

··· 156 156 void xfs_log_clean(struct xfs_mount *mp); 157 157 bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); 158 158 159 - xfs_lsn_t xlog_grant_push_threshold(struct xlog *log, int need_bytes); 160 159 bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags); 161 160 162 161 int xfs_attr_use_log_assist(struct xfs_mount *mp);

+175 -2

fs/xfs/xfs_log_cil.c

··· 694 694 } 695 695 } 696 696 697 + static inline void 698 + xlog_cil_ail_insert_batch( 699 + struct xfs_ail *ailp, 700 + struct xfs_ail_cursor *cur, 701 + struct xfs_log_item **log_items, 702 + int nr_items, 703 + xfs_lsn_t commit_lsn) 704 + { 705 + int i; 706 + 707 + spin_lock(&ailp->ail_lock); 708 + /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ 709 + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); 710 + 711 + for (i = 0; i < nr_items; i++) { 712 + struct xfs_log_item *lip = log_items[i]; 713 + 714 + if (lip->li_ops->iop_unpin) 715 + lip->li_ops->iop_unpin(lip, 0); 716 + } 717 + } 718 + 719 + /* 720 + * Take the checkpoint's log vector chain of items and insert the attached log 721 + * items into the AIL. This uses bulk insertion techniques to minimise AIL lock 722 + * traffic. 723 + * 724 + * The AIL tracks log items via the start record LSN of the checkpoint, 725 + * not the commit record LSN. This is because we can pipeline multiple 726 + * checkpoints, and so the start record of checkpoint N+1 can be 727 + * written before the commit record of checkpoint N. i.e: 728 + * 729 + * start N commit N 730 + * +-------------+------------+----------------+ 731 + * start N+1 commit N+1 732 + * 733 + * The tail of the log cannot be moved to the LSN of commit N when all 734 + * the items of that checkpoint are written back, because then the 735 + * start record for N+1 is no longer in the active portion of the log 736 + * and recovery will fail/corrupt the filesystem. 737 + * 738 + * Hence when all the log items in checkpoint N are written back, the 739 + * tail of the log most now only move as far forwards as the start LSN 740 + * of checkpoint N+1. 741 + * 742 + * If we are called with the aborted flag set, it is because a log write during 743 + * a CIL checkpoint commit has failed. In this case, all the items in the 744 + * checkpoint have already gone through iop_committed and iop_committing, which 745 + * means that checkpoint commit abort handling is treated exactly the same as an 746 + * iclog write error even though we haven't started any IO yet. Hence in this 747 + * case all we need to do is iop_committed processing, followed by an 748 + * iop_unpin(aborted) call. 749 + * 750 + * The AIL cursor is used to optimise the insert process. If commit_lsn is not 751 + * at the end of the AIL, the insert cursor avoids the need to walk the AIL to 752 + * find the insertion point on every xfs_log_item_batch_insert() call. This 753 + * saves a lot of needless list walking and is a net win, even though it 754 + * slightly increases that amount of AIL lock traffic to set it up and tear it 755 + * down. 756 + */ 757 + static void 758 + xlog_cil_ail_insert( 759 + struct xfs_cil_ctx *ctx, 760 + bool aborted) 761 + { 762 + #define LOG_ITEM_BATCH_SIZE 32 763 + struct xfs_ail *ailp = ctx->cil->xc_log->l_ailp; 764 + struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 765 + struct xfs_log_vec *lv; 766 + struct xfs_ail_cursor cur; 767 + xfs_lsn_t old_head; 768 + int i = 0; 769 + 770 + /* 771 + * Update the AIL head LSN with the commit record LSN of this 772 + * checkpoint. As iclogs are always completed in order, this should 773 + * always be the same (as iclogs can contain multiple commit records) or 774 + * higher LSN than the current head. We do this before insertion of the 775 + * items so that log space checks during insertion will reflect the 776 + * space that this checkpoint has already consumed. We call 777 + * xfs_ail_update_finish() so that tail space and space-based wakeups 778 + * will be recalculated appropriately. 779 + */ 780 + ASSERT(XFS_LSN_CMP(ctx->commit_lsn, ailp->ail_head_lsn) >= 0 || 781 + aborted); 782 + spin_lock(&ailp->ail_lock); 783 + xfs_trans_ail_cursor_last(ailp, &cur, ctx->start_lsn); 784 + old_head = ailp->ail_head_lsn; 785 + ailp->ail_head_lsn = ctx->commit_lsn; 786 + /* xfs_ail_update_finish() drops the ail_lock */ 787 + xfs_ail_update_finish(ailp, NULLCOMMITLSN); 788 + 789 + /* 790 + * We move the AIL head forwards to account for the space used in the 791 + * log before we remove that space from the grant heads. This prevents a 792 + * transient condition where reservation space appears to become 793 + * available on return, only for it to disappear again immediately as 794 + * the AIL head update accounts in the log tail space. 795 + */ 796 + smp_wmb(); /* paired with smp_rmb in xlog_grant_space_left */ 797 + xlog_grant_return_space(ailp->ail_log, old_head, ailp->ail_head_lsn); 798 + 799 + /* unpin all the log items */ 800 + list_for_each_entry(lv, &ctx->lv_chain, lv_list) { 801 + struct xfs_log_item *lip = lv->lv_item; 802 + xfs_lsn_t item_lsn; 803 + 804 + if (aborted) 805 + set_bit(XFS_LI_ABORTED, &lip->li_flags); 806 + 807 + if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 808 + lip->li_ops->iop_release(lip); 809 + continue; 810 + } 811 + 812 + if (lip->li_ops->iop_committed) 813 + item_lsn = lip->li_ops->iop_committed(lip, 814 + ctx->start_lsn); 815 + else 816 + item_lsn = ctx->start_lsn; 817 + 818 + /* item_lsn of -1 means the item needs no further processing */ 819 + if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 820 + continue; 821 + 822 + /* 823 + * if we are aborting the operation, no point in inserting the 824 + * object into the AIL as we are in a shutdown situation. 825 + */ 826 + if (aborted) { 827 + ASSERT(xlog_is_shutdown(ailp->ail_log)); 828 + if (lip->li_ops->iop_unpin) 829 + lip->li_ops->iop_unpin(lip, 1); 830 + continue; 831 + } 832 + 833 + if (item_lsn != ctx->start_lsn) { 834 + 835 + /* 836 + * Not a bulk update option due to unusual item_lsn. 837 + * Push into AIL immediately, rechecking the lsn once 838 + * we have the ail lock. Then unpin the item. This does 839 + * not affect the AIL cursor the bulk insert path is 840 + * using. 841 + */ 842 + spin_lock(&ailp->ail_lock); 843 + if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) 844 + xfs_trans_ail_update(ailp, lip, item_lsn); 845 + else 846 + spin_unlock(&ailp->ail_lock); 847 + if (lip->li_ops->iop_unpin) 848 + lip->li_ops->iop_unpin(lip, 0); 849 + continue; 850 + } 851 + 852 + /* Item is a candidate for bulk AIL insert. */ 853 + log_items[i++] = lv->lv_item; 854 + if (i >= LOG_ITEM_BATCH_SIZE) { 855 + xlog_cil_ail_insert_batch(ailp, &cur, log_items, 856 + LOG_ITEM_BATCH_SIZE, ctx->start_lsn); 857 + i = 0; 858 + } 859 + } 860 + 861 + /* make sure we insert the remainder! */ 862 + if (i) 863 + xlog_cil_ail_insert_batch(ailp, &cur, log_items, i, 864 + ctx->start_lsn); 865 + 866 + spin_lock(&ailp->ail_lock); 867 + xfs_trans_ail_cursor_done(&cur); 868 + spin_unlock(&ailp->ail_lock); 869 + } 870 + 697 871 static void 698 872 xlog_cil_free_logvec( 699 873 struct list_head *lv_chain) ··· 907 733 spin_unlock(&ctx->cil->xc_push_lock); 908 734 } 909 735 910 - xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, 911 - ctx->start_lsn, abort); 736 + xlog_cil_ail_insert(ctx, abort); 912 737 913 738 xfs_extent_busy_sort(&ctx->busy_extents.extent_list); 914 739 xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list,

+25 -36

fs/xfs/xfs_log_priv.h

··· 431 431 int l_prev_block; /* previous logical log block */ 432 432 433 433 /* 434 - * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and 435 - * read without needing to hold specific locks. To avoid operations 436 - * contending with other hot objects, place each of them on a separate 437 - * cacheline. 434 + * l_tail_lsn is atomic so it can be set and read without needing to 435 + * hold specific locks. To avoid operations contending with other hot 436 + * objects, it on a separate cacheline. 438 437 */ 439 - /* lsn of last LR on disk */ 440 - atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp; 441 438 /* lsn of 1st LR with unflushed * buffers */ 442 439 atomic64_t l_tail_lsn ____cacheline_aligned_in_smp; 443 440 444 441 struct xlog_grant_head l_reserve_head; 445 442 struct xlog_grant_head l_write_head; 443 + uint64_t l_tail_space; 446 444 447 445 struct xfs_kobj l_kobj; 448 446 ··· 544 546 } 545 547 546 548 /* 547 - * When we crack the grant head, we sample it first so that the value will not 548 - * change while we are cracking it into the component values. This means we 549 - * will always get consistent component values to work from. 550 - */ 551 - static inline void 552 - xlog_crack_grant_head_val(int64_t val, int *cycle, int *space) 553 - { 554 - *cycle = val >> 32; 555 - *space = val & 0xffffffff; 556 - } 557 - 558 - static inline void 559 - xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) 560 - { 561 - xlog_crack_grant_head_val(atomic64_read(head), cycle, space); 562 - } 563 - 564 - static inline int64_t 565 - xlog_assign_grant_head_val(int cycle, int space) 566 - { 567 - return ((int64_t)cycle << 32) | space; 568 - } 569 - 570 - static inline void 571 - xlog_assign_grant_head(atomic64_t *head, int cycle, int space) 572 - { 573 - atomic64_set(head, xlog_assign_grant_head_val(cycle, space)); 574 - } 575 - 576 - /* 577 549 * Committed Item List interfaces 578 550 */ 579 551 int xlog_cil_init(struct xlog *log); ··· 590 622 591 623 int xlog_wait_on_iclog(struct xlog_in_core *iclog) 592 624 __releases(iclog->ic_log->l_icloglock); 625 + 626 + /* Calculate the distance between two LSNs in bytes */ 627 + static inline uint64_t 628 + xlog_lsn_sub( 629 + struct xlog *log, 630 + xfs_lsn_t high, 631 + xfs_lsn_t low) 632 + { 633 + uint32_t hi_cycle = CYCLE_LSN(high); 634 + uint32_t hi_block = BLOCK_LSN(high); 635 + uint32_t lo_cycle = CYCLE_LSN(low); 636 + uint32_t lo_block = BLOCK_LSN(low); 637 + 638 + if (hi_cycle == lo_cycle) 639 + return BBTOB(hi_block - lo_block); 640 + ASSERT((hi_cycle == lo_cycle + 1) || xlog_is_shutdown(log)); 641 + return (uint64_t)log->l_logsize - BBTOB(lo_block - hi_block); 642 + } 643 + 644 + void xlog_grant_return_space(struct xlog *log, xfs_lsn_t old_head, 645 + xfs_lsn_t new_head); 593 646 594 647 /* 595 648 * The LSN is valid so long as it is behind the current LSN. If it isn't, this

+13 -15

fs/xfs/xfs_log_recover.c

··· 1177 1177 */ 1178 1178 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1179 1179 log->l_curr_cycle, after_umount_blk); 1180 - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1181 - log->l_curr_cycle, after_umount_blk); 1180 + log->l_ailp->ail_head_lsn = 1181 + atomic64_read(&log->l_tail_lsn); 1182 1182 *tail_blk = after_umount_blk; 1183 1183 1184 1184 *clean = true; ··· 1212 1212 if (bump_cycle) 1213 1213 log->l_curr_cycle++; 1214 1214 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 1215 - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 1216 - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, 1217 - BBTOB(log->l_curr_block)); 1218 - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, 1219 - BBTOB(log->l_curr_block)); 1215 + log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn); 1220 1216 } 1221 1217 1222 1218 /* ··· 2485 2489 2486 2490 ohead = (struct xlog_op_header *)dp; 2487 2491 dp += sizeof(*ohead); 2488 - ASSERT(dp <= end); 2492 + if (dp > end) { 2493 + xfs_warn(log->l_mp, "%s: op header overrun", __func__); 2494 + return -EFSCORRUPTED; 2495 + } 2489 2496 2490 2497 /* errors will abort recovery */ 2491 2498 error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, ··· 3362 3363 3363 3364 /* 3364 3365 * We now update the tail_lsn since much of the recovery has completed 3365 - * and there may be space available to use. If there were no extent 3366 - * or iunlinks, we can free up the entire log and set the tail_lsn to 3367 - * be the last_sync_lsn. This was set in xlog_find_tail to be the 3368 - * lsn of the last known good LR on disk. If there are extent frees 3369 - * or iunlinks they will have some entries in the AIL; so we look at 3370 - * the AIL to determine how to set the tail_lsn. 3366 + * and there may be space available to use. If there were no extent or 3367 + * iunlinks, we can free up the entire log. This was set in 3368 + * xlog_find_tail to be the lsn of the last known good LR on disk. If 3369 + * there are extent frees or iunlinks they will have some entries in the 3370 + * AIL; so we look at the AIL to determine how to set the tail_lsn. 3371 3371 */ 3372 - xlog_assign_tail_lsn(mp); 3372 + xfs_ail_assign_tail_lsn(log->l_ailp); 3373 3373 3374 3374 /* 3375 3375 * Now that we've finished replaying all buffer and inode updates,

+5 -2

fs/xfs/xfs_qm.c

··· 793 793 return error; 794 794 795 795 if (need_alloc) { 796 + struct xfs_icreate_args args = { 797 + .mode = S_IFREG, 798 + .flags = XFS_ICREATE_UNLINKABLE, 799 + }; 796 800 xfs_ino_t ino; 797 801 798 802 error = xfs_dialloc(&tp, 0, S_IFREG, &ino); 799 803 if (!error) 800 - error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino, 801 - S_IFREG, 1, 0, 0, false, ipp); 804 + error = xfs_icreate(tp, ino, &args, ipp); 802 805 if (error) { 803 806 xfs_trans_cancel(tp); 804 807 return error;

-1

fs/xfs/xfs_qm_bhv.c

··· 11 11 #include "xfs_trans_resv.h" 12 12 #include "xfs_mount.h" 13 13 #include "xfs_quota.h" 14 - #include "xfs_mount.h" 15 14 #include "xfs_inode.h" 16 15 #include "xfs_trans.h" 17 16 #include "xfs_qm.h"

+56 -54

fs/xfs/xfs_refcount_item.c

··· 21 21 #include "xfs_log_priv.h" 22 22 #include "xfs_log_recover.h" 23 23 #include "xfs_ag.h" 24 + #include "xfs_btree.h" 25 + #include "xfs_trace.h" 24 26 25 27 struct kmem_cache *xfs_cui_cache; 26 28 struct kmem_cache *xfs_cud_cache; ··· 229 227 .iop_intent = xfs_cud_item_intent, 230 228 }; 231 229 230 + static inline struct xfs_refcount_intent *ci_entry(const struct list_head *e) 231 + { 232 + return list_entry(e, struct xfs_refcount_intent, ri_list); 233 + } 234 + 232 235 /* Sort refcount intents by AG. */ 233 236 static int 234 237 xfs_refcount_update_diff_items( ··· 241 234 const struct list_head *a, 242 235 const struct list_head *b) 243 236 { 244 - struct xfs_refcount_intent *ra; 245 - struct xfs_refcount_intent *rb; 246 - 247 - ra = container_of(a, struct xfs_refcount_intent, ri_list); 248 - rb = container_of(b, struct xfs_refcount_intent, ri_list); 237 + struct xfs_refcount_intent *ra = ci_entry(a); 238 + struct xfs_refcount_intent *rb = ci_entry(b); 249 239 250 240 return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 251 - } 252 - 253 - /* Set the phys extent flags for this reverse mapping. */ 254 - static void 255 - xfs_trans_set_refcount_flags( 256 - struct xfs_phys_extent *pmap, 257 - enum xfs_refcount_intent_type type) 258 - { 259 - pmap->pe_flags = 0; 260 - switch (type) { 261 - case XFS_REFCOUNT_INCREASE: 262 - case XFS_REFCOUNT_DECREASE: 263 - case XFS_REFCOUNT_ALLOC_COW: 264 - case XFS_REFCOUNT_FREE_COW: 265 - pmap->pe_flags |= type; 266 - break; 267 - default: 268 - ASSERT(0); 269 - } 270 241 } 271 242 272 243 /* Log refcount updates in the intent item. */ ··· 267 282 pmap = &cuip->cui_format.cui_extents[next_extent]; 268 283 pmap->pe_startblock = ri->ri_startblock; 269 284 pmap->pe_len = ri->ri_blockcount; 270 - xfs_trans_set_refcount_flags(pmap, ri->ri_type); 285 + 286 + pmap->pe_flags = 0; 287 + switch (ri->ri_type) { 288 + case XFS_REFCOUNT_INCREASE: 289 + case XFS_REFCOUNT_DECREASE: 290 + case XFS_REFCOUNT_ALLOC_COW: 291 + case XFS_REFCOUNT_FREE_COW: 292 + pmap->pe_flags |= ri->ri_type; 293 + break; 294 + default: 295 + ASSERT(0); 296 + } 271 297 } 272 298 273 299 static struct xfs_log_item * ··· 320 324 return &cudp->cud_item; 321 325 } 322 326 323 - /* Take a passive ref to the AG containing the space we're refcounting. */ 327 + /* Add this deferred CUI to the transaction. */ 324 328 void 325 - xfs_refcount_update_get_group( 326 - struct xfs_mount *mp, 329 + xfs_refcount_defer_add( 330 + struct xfs_trans *tp, 327 331 struct xfs_refcount_intent *ri) 328 332 { 329 - xfs_agnumber_t agno; 333 + struct xfs_mount *mp = tp->t_mountp; 330 334 331 - agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock); 332 - ri->ri_pag = xfs_perag_intent_get(mp, agno); 335 + trace_xfs_refcount_defer(mp, ri); 336 + 337 + ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_startblock); 338 + xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); 333 339 } 334 340 335 - /* Release a passive AG ref after finishing refcounting work. */ 336 - static inline void 337 - xfs_refcount_update_put_group( 338 - struct xfs_refcount_intent *ri) 341 + /* Cancel a deferred refcount update. */ 342 + STATIC void 343 + xfs_refcount_update_cancel_item( 344 + struct list_head *item) 339 345 { 346 + struct xfs_refcount_intent *ri = ci_entry(item); 347 + 340 348 xfs_perag_intent_put(ri->ri_pag); 349 + kmem_cache_free(xfs_refcount_intent_cache, ri); 341 350 } 342 351 343 352 /* Process a deferred refcount update. */ ··· 353 352 struct list_head *item, 354 353 struct xfs_btree_cur **state) 355 354 { 356 - struct xfs_refcount_intent *ri; 355 + struct xfs_refcount_intent *ri = ci_entry(item); 357 356 int error; 358 - 359 - ri = container_of(item, struct xfs_refcount_intent, ri_list); 360 357 361 358 /* Did we run out of reservation? Requeue what we didn't finish. */ 362 359 error = xfs_refcount_finish_one(tp, ri, state); ··· 364 365 return -EAGAIN; 365 366 } 366 367 367 - xfs_refcount_update_put_group(ri); 368 - kmem_cache_free(xfs_refcount_intent_cache, ri); 368 + xfs_refcount_update_cancel_item(item); 369 369 return error; 370 + } 371 + 372 + /* Clean up after calling xfs_refcount_finish_one. */ 373 + STATIC void 374 + xfs_refcount_finish_one_cleanup( 375 + struct xfs_trans *tp, 376 + struct xfs_btree_cur *rcur, 377 + int error) 378 + { 379 + struct xfs_buf *agbp; 380 + 381 + if (rcur == NULL) 382 + return; 383 + agbp = rcur->bc_ag.agbp; 384 + xfs_btree_del_cursor(rcur, error); 385 + if (error) 386 + xfs_trans_brelse(tp, agbp); 370 387 } 371 388 372 389 /* Abort all pending CUIs. */ ··· 391 376 struct xfs_log_item *intent) 392 377 { 393 378 xfs_cui_release(CUI_ITEM(intent)); 394 - } 395 - 396 - /* Cancel a deferred refcount update. */ 397 - STATIC void 398 - xfs_refcount_update_cancel_item( 399 - struct list_head *item) 400 - { 401 - struct xfs_refcount_intent *ri; 402 - 403 - ri = container_of(item, struct xfs_refcount_intent, ri_list); 404 - 405 - xfs_refcount_update_put_group(ri); 406 - kmem_cache_free(xfs_refcount_intent_cache, ri); 407 379 } 408 380 409 381 /* Is this recovered CUI ok? */ ··· 431 429 ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 432 430 ri->ri_startblock = pmap->pe_startblock; 433 431 ri->ri_blockcount = pmap->pe_len; 434 - xfs_refcount_update_get_group(mp, ri); 432 + ri->ri_pag = xfs_perag_intent_get(mp, pmap->pe_startblock); 435 433 436 434 xfs_defer_add_item(dfp, &ri->ri_list); 437 435 }

+5

fs/xfs/xfs_refcount_item.h

··· 71 71 extern struct kmem_cache *xfs_cui_cache; 72 72 extern struct kmem_cache *xfs_cud_cache; 73 73 74 + struct xfs_refcount_intent; 75 + 76 + void xfs_refcount_defer_add(struct xfs_trans *tp, 77 + struct xfs_refcount_intent *ri); 78 + 74 79 #endif /* __XFS_REFCOUNT_ITEM_H__ */

+1 -1

fs/xfs/xfs_reflink.c

··· 603 603 604 604 error = xfs_free_extent_later(*tpp, del.br_startblock, 605 605 del.br_blockcount, NULL, 606 - XFS_AG_RESV_NONE, false); 606 + XFS_AG_RESV_NONE, 0); 607 607 if (error) 608 608 break; 609 609

-10

fs/xfs/xfs_reflink.h

··· 6 6 #ifndef __XFS_REFLINK_H 7 7 #define __XFS_REFLINK_H 1 8 8 9 - static inline bool xfs_is_always_cow_inode(struct xfs_inode *ip) 10 - { 11 - return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); 12 - } 13 - 14 - static inline bool xfs_is_cow_inode(struct xfs_inode *ip) 15 - { 16 - return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); 17 - } 18 - 19 9 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, 20 10 struct xfs_bmbt_irec *irec, bool *shared); 21 11 int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,

+80 -81

fs/xfs/xfs_rmap_item.c

··· 21 21 #include "xfs_log_priv.h" 22 22 #include "xfs_log_recover.h" 23 23 #include "xfs_ag.h" 24 + #include "xfs_btree.h" 25 + #include "xfs_trace.h" 24 26 25 27 struct kmem_cache *xfs_rui_cache; 26 28 struct kmem_cache *xfs_rud_cache; ··· 228 226 .iop_intent = xfs_rud_item_intent, 229 227 }; 230 228 231 - /* Set the map extent flags for this reverse mapping. */ 232 - static void 233 - xfs_trans_set_rmap_flags( 234 - struct xfs_map_extent *map, 235 - enum xfs_rmap_intent_type type, 236 - int whichfork, 237 - xfs_exntst_t state) 229 + static inline struct xfs_rmap_intent *ri_entry(const struct list_head *e) 238 230 { 231 + return list_entry(e, struct xfs_rmap_intent, ri_list); 232 + } 233 + 234 + /* Sort rmap intents by AG. */ 235 + static int 236 + xfs_rmap_update_diff_items( 237 + void *priv, 238 + const struct list_head *a, 239 + const struct list_head *b) 240 + { 241 + struct xfs_rmap_intent *ra = ri_entry(a); 242 + struct xfs_rmap_intent *rb = ri_entry(b); 243 + 244 + return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 245 + } 246 + 247 + /* Log rmap updates in the intent item. */ 248 + STATIC void 249 + xfs_rmap_update_log_item( 250 + struct xfs_trans *tp, 251 + struct xfs_rui_log_item *ruip, 252 + struct xfs_rmap_intent *ri) 253 + { 254 + uint next_extent; 255 + struct xfs_map_extent *map; 256 + 257 + /* 258 + * atomic_inc_return gives us the value after the increment; 259 + * we want to use it as an array index so we need to subtract 1 from 260 + * it. 261 + */ 262 + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 263 + ASSERT(next_extent < ruip->rui_format.rui_nextents); 264 + map = &ruip->rui_format.rui_extents[next_extent]; 265 + map->me_owner = ri->ri_owner; 266 + map->me_startblock = ri->ri_bmap.br_startblock; 267 + map->me_startoff = ri->ri_bmap.br_startoff; 268 + map->me_len = ri->ri_bmap.br_blockcount; 269 + 239 270 map->me_flags = 0; 240 - if (state == XFS_EXT_UNWRITTEN) 271 + if (ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN) 241 272 map->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 242 - if (whichfork == XFS_ATTR_FORK) 273 + if (ri->ri_whichfork == XFS_ATTR_FORK) 243 274 map->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 244 - switch (type) { 275 + switch (ri->ri_type) { 245 276 case XFS_RMAP_MAP: 246 277 map->me_flags |= XFS_RMAP_EXTENT_MAP; 247 278 break; ··· 302 267 default: 303 268 ASSERT(0); 304 269 } 305 - } 306 - 307 - /* Sort rmap intents by AG. */ 308 - static int 309 - xfs_rmap_update_diff_items( 310 - void *priv, 311 - const struct list_head *a, 312 - const struct list_head *b) 313 - { 314 - struct xfs_rmap_intent *ra; 315 - struct xfs_rmap_intent *rb; 316 - 317 - ra = container_of(a, struct xfs_rmap_intent, ri_list); 318 - rb = container_of(b, struct xfs_rmap_intent, ri_list); 319 - 320 - return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 321 - } 322 - 323 - /* Log rmap updates in the intent item. */ 324 - STATIC void 325 - xfs_rmap_update_log_item( 326 - struct xfs_trans *tp, 327 - struct xfs_rui_log_item *ruip, 328 - struct xfs_rmap_intent *ri) 329 - { 330 - uint next_extent; 331 - struct xfs_map_extent *map; 332 - 333 - /* 334 - * atomic_inc_return gives us the value after the increment; 335 - * we want to use it as an array index so we need to subtract 1 from 336 - * it. 337 - */ 338 - next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 339 - ASSERT(next_extent < ruip->rui_format.rui_nextents); 340 - map = &ruip->rui_format.rui_extents[next_extent]; 341 - map->me_owner = ri->ri_owner; 342 - map->me_startblock = ri->ri_bmap.br_startblock; 343 - map->me_startoff = ri->ri_bmap.br_startoff; 344 - map->me_len = ri->ri_bmap.br_blockcount; 345 - xfs_trans_set_rmap_flags(map, ri->ri_type, ri->ri_whichfork, 346 - ri->ri_bmap.br_state); 347 270 } 348 271 349 272 static struct xfs_log_item * ··· 343 350 return &rudp->rud_item; 344 351 } 345 352 346 - /* Take a passive ref to the AG containing the space we're rmapping. */ 353 + /* Add this deferred RUI to the transaction. */ 347 354 void 348 - xfs_rmap_update_get_group( 349 - struct xfs_mount *mp, 355 + xfs_rmap_defer_add( 356 + struct xfs_trans *tp, 350 357 struct xfs_rmap_intent *ri) 351 358 { 352 - xfs_agnumber_t agno; 359 + struct xfs_mount *mp = tp->t_mountp; 353 360 354 - agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock); 355 - ri->ri_pag = xfs_perag_intent_get(mp, agno); 361 + trace_xfs_rmap_defer(mp, ri); 362 + 363 + ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_bmap.br_startblock); 364 + xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type); 356 365 } 357 366 358 - /* Release a passive AG ref after finishing rmapping work. */ 359 - static inline void 360 - xfs_rmap_update_put_group( 361 - struct xfs_rmap_intent *ri) 367 + /* Cancel a deferred rmap update. */ 368 + STATIC void 369 + xfs_rmap_update_cancel_item( 370 + struct list_head *item) 362 371 { 372 + struct xfs_rmap_intent *ri = ri_entry(item); 373 + 363 374 xfs_perag_intent_put(ri->ri_pag); 375 + kmem_cache_free(xfs_rmap_intent_cache, ri); 364 376 } 365 377 366 378 /* Process a deferred rmap update. */ ··· 376 378 struct list_head *item, 377 379 struct xfs_btree_cur **state) 378 380 { 379 - struct xfs_rmap_intent *ri; 381 + struct xfs_rmap_intent *ri = ri_entry(item); 380 382 int error; 381 - 382 - ri = container_of(item, struct xfs_rmap_intent, ri_list); 383 383 384 384 error = xfs_rmap_finish_one(tp, ri, state); 385 385 386 - xfs_rmap_update_put_group(ri); 387 - kmem_cache_free(xfs_rmap_intent_cache, ri); 386 + xfs_rmap_update_cancel_item(item); 388 387 return error; 388 + } 389 + 390 + /* Clean up after calling xfs_rmap_finish_one. */ 391 + STATIC void 392 + xfs_rmap_finish_one_cleanup( 393 + struct xfs_trans *tp, 394 + struct xfs_btree_cur *rcur, 395 + int error) 396 + { 397 + struct xfs_buf *agbp = NULL; 398 + 399 + if (rcur == NULL) 400 + return; 401 + agbp = rcur->bc_ag.agbp; 402 + xfs_btree_del_cursor(rcur, error); 403 + if (error && agbp) 404 + xfs_trans_brelse(tp, agbp); 389 405 } 390 406 391 407 /* Abort all pending RUIs. */ ··· 408 396 struct xfs_log_item *intent) 409 397 { 410 398 xfs_rui_release(RUI_ITEM(intent)); 411 - } 412 - 413 - /* Cancel a deferred rmap update. */ 414 - STATIC void 415 - xfs_rmap_update_cancel_item( 416 - struct list_head *item) 417 - { 418 - struct xfs_rmap_intent *ri; 419 - 420 - ri = container_of(item, struct xfs_rmap_intent, ri_list); 421 - 422 - xfs_rmap_update_put_group(ri); 423 - kmem_cache_free(xfs_rmap_intent_cache, ri); 424 399 } 425 400 426 401 /* Is this recovered RUI ok? */ ··· 494 495 ri->ri_bmap.br_blockcount = map->me_len; 495 496 ri->ri_bmap.br_state = (map->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ? 496 497 XFS_EXT_UNWRITTEN : XFS_EXT_NORM; 497 - xfs_rmap_update_get_group(mp, ri); 498 + ri->ri_pag = xfs_perag_intent_get(mp, map->me_startblock); 498 499 499 500 xfs_defer_add_item(dfp, &ri->ri_list); 500 501 }

+4

fs/xfs/xfs_rmap_item.h

··· 71 71 extern struct kmem_cache *xfs_rui_cache; 72 72 extern struct kmem_cache *xfs_rud_cache; 73 73 74 + struct xfs_rmap_intent; 75 + 76 + void xfs_rmap_defer_add(struct xfs_trans *tp, struct xfs_rmap_intent *ri); 77 + 74 78 #endif /* __XFS_RMAP_ITEM_H__ */

+2 -1

fs/xfs/xfs_rtalloc.c

··· 12 12 #include "xfs_bit.h" 13 13 #include "xfs_mount.h" 14 14 #include "xfs_inode.h" 15 + #include "xfs_alloc.h" 15 16 #include "xfs_bmap.h" 16 17 #include "xfs_bmap_btree.h" 17 18 #include "xfs_bmap_util.h" ··· 1383 1382 start = 0; 1384 1383 } else if (xfs_bmap_adjacent(ap)) { 1385 1384 start = xfs_rtb_to_rtx(mp, ap->blkno); 1386 - } else if (ap->eof && ap->offset == 0) { 1385 + } else if (ap->datatype & XFS_ALLOC_INITIAL_USER_DATA) { 1387 1386 /* 1388 1387 * If it's an allocation to an empty file at offset 0, pick an 1389 1388 * extent that will space things out in the rt area.

+28 -42

fs/xfs/xfs_symlink.c

··· 90 90 struct xfs_inode **ipp) 91 91 { 92 92 struct xfs_mount *mp = dp->i_mount; 93 + struct xfs_icreate_args args = { 94 + .idmap = idmap, 95 + .pip = dp, 96 + .mode = S_IFLNK | (mode & ~S_IFMT), 97 + }; 98 + struct xfs_dir_update du = { 99 + .dp = dp, 100 + .name = link_name, 101 + }; 93 102 struct xfs_trans *tp = NULL; 94 - struct xfs_inode *ip = NULL; 95 103 int error = 0; 96 104 int pathlen; 97 105 bool unlock_dp_on_error = false; 98 106 xfs_filblks_t fs_blocks; 99 - prid_t prid; 100 - struct xfs_dquot *udqp = NULL; 101 - struct xfs_dquot *gdqp = NULL; 102 - struct xfs_dquot *pdqp = NULL; 107 + struct xfs_dquot *udqp; 108 + struct xfs_dquot *gdqp; 109 + struct xfs_dquot *pdqp; 103 110 uint resblks; 104 111 xfs_ino_t ino; 105 - struct xfs_parent_args *ppargs; 106 112 107 113 *ipp = NULL; 108 114 ··· 125 119 return -ENAMETOOLONG; 126 120 ASSERT(pathlen > 0); 127 121 128 - prid = xfs_get_initial_prid(dp); 129 - 130 - /* 131 - * Make sure that we have allocated dquot(s) on disk. 132 - */ 133 - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), 134 - mapped_fsgid(idmap, &init_user_ns), prid, 135 - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 136 - &udqp, &gdqp, &pdqp); 122 + /* Make sure that we have allocated dquot(s) on disk. */ 123 + error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); 137 124 if (error) 138 125 return error; 139 126 ··· 142 143 fs_blocks = xfs_symlink_blocks(mp, pathlen); 143 144 resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks); 144 145 145 - error = xfs_parent_start(mp, &ppargs); 146 + error = xfs_parent_start(mp, &du.ppargs); 146 147 if (error) 147 148 goto out_release_dquots; 148 149 ··· 167 168 */ 168 169 error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); 169 170 if (!error) 170 - error = xfs_init_new_inode(idmap, tp, dp, ino, 171 - S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, 172 - xfs_has_parent(mp), &ip); 171 + error = xfs_icreate(tp, ino, &args, &du.ip); 173 172 if (error) 174 173 goto out_trans_cancel; 175 174 ··· 183 186 /* 184 187 * Also attach the dquot(s) to it, if applicable. 185 188 */ 186 - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 189 + xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp); 187 190 188 191 resblks -= XFS_IALLOC_SPACE_RES(mp); 189 - error = xfs_symlink_write_target(tp, ip, ip->i_ino, target_path, 192 + error = xfs_symlink_write_target(tp, du.ip, du.ip->i_ino, target_path, 190 193 pathlen, fs_blocks, resblks); 191 194 if (error) 192 195 goto out_trans_cancel; 193 196 resblks -= fs_blocks; 194 - i_size_write(VFS_I(ip), ip->i_disk_size); 197 + i_size_write(VFS_I(du.ip), du.ip->i_disk_size); 195 198 196 199 /* 197 200 * Create the directory entry for the symlink. 198 201 */ 199 - error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, resblks); 202 + error = xfs_dir_create_child(tp, resblks, &du); 200 203 if (error) 201 204 goto out_trans_cancel; 202 - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 203 - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 204 - 205 - /* Add parent pointer for the new symlink. */ 206 - if (ppargs) { 207 - error = xfs_parent_addname(tp, ppargs, dp, link_name, ip); 208 - if (error) 209 - goto out_trans_cancel; 210 - } 211 - 212 - xfs_dir_update_hook(dp, ip, 1, link_name); 213 205 214 206 /* 215 207 * If this is a synchronous mount, make sure that the ··· 216 230 xfs_qm_dqrele(gdqp); 217 231 xfs_qm_dqrele(pdqp); 218 232 219 - *ipp = ip; 220 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 233 + *ipp = du.ip; 234 + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); 221 235 xfs_iunlock(dp, XFS_ILOCK_EXCL); 222 - xfs_parent_finish(mp, ppargs); 236 + xfs_parent_finish(mp, du.ppargs); 223 237 return 0; 224 238 225 239 out_trans_cancel: ··· 230 244 * setup of the inode and release the inode. This prevents recursive 231 245 * transactions and deadlocks from xfs_inactive. 232 246 */ 233 - if (ip) { 234 - xfs_iunlock(ip, XFS_ILOCK_EXCL); 235 - xfs_finish_inode_setup(ip); 236 - xfs_irele(ip); 247 + if (du.ip) { 248 + xfs_iunlock(du.ip, XFS_ILOCK_EXCL); 249 + xfs_finish_inode_setup(du.ip); 250 + xfs_irele(du.ip); 237 251 } 238 252 out_parent: 239 - xfs_parent_finish(mp, ppargs); 253 + xfs_parent_finish(mp, du.ppargs); 240 254 out_release_dquots: 241 255 xfs_qm_dqrele(udqp); 242 256 xfs_qm_dqrele(gdqp);

+10 -19

fs/xfs/xfs_sysfs.c

··· 432 432 XFS_SYSFS_ATTR_RO(log_tail_lsn); 433 433 434 434 STATIC ssize_t 435 - reserve_grant_head_show( 435 + reserve_grant_head_bytes_show( 436 436 struct kobject *kobject, 437 437 char *buf) 438 - 439 438 { 440 - int cycle; 441 - int bytes; 442 - struct xlog *log = to_xlog(kobject); 443 - 444 - xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); 445 - return sysfs_emit(buf, "%d:%d\n", cycle, bytes); 439 + return sysfs_emit(buf, "%lld\n", 440 + atomic64_read(&to_xlog(kobject)->l_reserve_head.grant)); 446 441 } 447 - XFS_SYSFS_ATTR_RO(reserve_grant_head); 442 + XFS_SYSFS_ATTR_RO(reserve_grant_head_bytes); 448 443 449 444 STATIC ssize_t 450 - write_grant_head_show( 445 + write_grant_head_bytes_show( 451 446 struct kobject *kobject, 452 447 char *buf) 453 448 { 454 - int cycle; 455 - int bytes; 456 - struct xlog *log = to_xlog(kobject); 457 - 458 - xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); 459 - return sysfs_emit(buf, "%d:%d\n", cycle, bytes); 449 + return sysfs_emit(buf, "%lld\n", 450 + atomic64_read(&to_xlog(kobject)->l_write_head.grant)); 460 451 } 461 - XFS_SYSFS_ATTR_RO(write_grant_head); 452 + XFS_SYSFS_ATTR_RO(write_grant_head_bytes); 462 453 463 454 static struct attribute *xfs_log_attrs[] = { 464 455 ATTR_LIST(log_head_lsn), 465 456 ATTR_LIST(log_tail_lsn), 466 - ATTR_LIST(reserve_grant_head), 467 - ATTR_LIST(write_grant_head), 457 + ATTR_LIST(reserve_grant_head_bytes), 458 + ATTR_LIST(write_grant_head_bytes), 468 459 NULL, 469 460 }; 470 461 ATTRIBUTE_GROUPS(xfs_log);

+3 -1

fs/xfs/xfs_trace.c

··· 22 22 #include "xfs_trans.h" 23 23 #include "xfs_log.h" 24 24 #include "xfs_log_priv.h" 25 + #include "xfs_trans_priv.h" 25 26 #include "xfs_buf_item.h" 26 27 #include "xfs_quota.h" 27 28 #include "xfs_dquot_item.h" ··· 39 38 #include "xfs_iomap.h" 40 39 #include "xfs_buf_mem.h" 41 40 #include "xfs_btree_mem.h" 42 - #include "xfs_bmap.h" 43 41 #include "xfs_exchmaps.h" 44 42 #include "xfs_exchrange.h" 45 43 #include "xfs_parent.h" 44 + #include "xfs_rmap.h" 45 + #include "xfs_refcount.h" 46 46 47 47 /* 48 48 * We include this last to have the helpers above available for the trace

+311 -220

fs/xfs/xfs_trace.h

··· 90 90 struct xfs_getparents; 91 91 struct xfs_parent_irec; 92 92 struct xfs_attrlist_cursor_kern; 93 + struct xfs_extent_free_item; 94 + struct xfs_rmap_intent; 95 + struct xfs_refcount_intent; 93 96 94 97 #define XFS_ATTR_FILTER_FLAGS \ 95 98 { XFS_ATTR_ROOT, "ROOT" }, \ ··· 1230 1227 TP_ARGS(log, tic), 1231 1228 TP_STRUCT__entry( 1232 1229 __field(dev_t, dev) 1230 + __field(unsigned long, tic) 1233 1231 __field(char, ocnt) 1234 1232 __field(char, cnt) 1235 1233 __field(int, curr_res) ··· 1238 1234 __field(unsigned int, flags) 1239 1235 __field(int, reserveq) 1240 1236 __field(int, writeq) 1241 - __field(int, grant_reserve_cycle) 1242 - __field(int, grant_reserve_bytes) 1243 - __field(int, grant_write_cycle) 1244 - __field(int, grant_write_bytes) 1237 + __field(uint64_t, grant_reserve_bytes) 1238 + __field(uint64_t, grant_write_bytes) 1239 + __field(uint64_t, tail_space) 1245 1240 __field(int, curr_cycle) 1246 1241 __field(int, curr_block) 1247 1242 __field(xfs_lsn_t, tail_lsn) 1248 1243 ), 1249 1244 TP_fast_assign( 1250 1245 __entry->dev = log->l_mp->m_super->s_dev; 1246 + __entry->tic = (unsigned long)tic; 1251 1247 __entry->ocnt = tic->t_ocnt; 1252 1248 __entry->cnt = tic->t_cnt; 1253 1249 __entry->curr_res = tic->t_curr_res; ··· 1255 1251 __entry->flags = tic->t_flags; 1256 1252 __entry->reserveq = list_empty(&log->l_reserve_head.waiters); 1257 1253 __entry->writeq = list_empty(&log->l_write_head.waiters); 1258 - xlog_crack_grant_head(&log->l_reserve_head.grant, 1259 - &__entry->grant_reserve_cycle, 1260 - &__entry->grant_reserve_bytes); 1261 - xlog_crack_grant_head(&log->l_write_head.grant, 1262 - &__entry->grant_write_cycle, 1263 - &__entry->grant_write_bytes); 1254 + __entry->tail_space = READ_ONCE(log->l_tail_space); 1255 + __entry->grant_reserve_bytes = __entry->tail_space + 1256 + atomic64_read(&log->l_reserve_head.grant); 1257 + __entry->grant_write_bytes = __entry->tail_space + 1258 + atomic64_read(&log->l_write_head.grant); 1264 1259 __entry->curr_cycle = log->l_curr_cycle; 1265 1260 __entry->curr_block = log->l_curr_block; 1266 1261 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); 1267 1262 ), 1268 - TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u " 1269 - "t_unit_res %u t_flags %s reserveq %s " 1270 - "writeq %s grant_reserve_cycle %d " 1271 - "grant_reserve_bytes %d grant_write_cycle %d " 1272 - "grant_write_bytes %d curr_cycle %d curr_block %d " 1263 + TP_printk("dev %d:%d tic 0x%lx t_ocnt %u t_cnt %u t_curr_res %u " 1264 + "t_unit_res %u t_flags %s reserveq %s writeq %s " 1265 + "tail space %llu grant_reserve_bytes %llu " 1266 + "grant_write_bytes %llu curr_cycle %d curr_block %d " 1273 1267 "tail_cycle %d tail_block %d", 1274 1268 MAJOR(__entry->dev), MINOR(__entry->dev), 1269 + __entry->tic, 1275 1270 __entry->ocnt, 1276 1271 __entry->cnt, 1277 1272 __entry->curr_res, ··· 1278 1275 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), 1279 1276 __entry->reserveq ? "empty" : "active", 1280 1277 __entry->writeq ? "empty" : "active", 1281 - __entry->grant_reserve_cycle, 1278 + __entry->tail_space, 1282 1279 __entry->grant_reserve_bytes, 1283 - __entry->grant_write_cycle, 1284 1280 __entry->grant_write_bytes, 1285 1281 __entry->curr_cycle, 1286 1282 __entry->curr_block, ··· 1307 1305 DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub); 1308 1306 DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit); 1309 1307 DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait); 1308 + DEFINE_LOGGRANT_EVENT(xfs_log_cil_return); 1310 1309 1311 1310 DECLARE_EVENT_CLASS(xfs_log_item_class, 1312 1311 TP_PROTO(struct xfs_log_item *lip), ··· 1407 1404 __field(dev_t, dev) 1408 1405 __field(xfs_lsn_t, new_lsn) 1409 1406 __field(xfs_lsn_t, old_lsn) 1410 - __field(xfs_lsn_t, last_sync_lsn) 1407 + __field(xfs_lsn_t, head_lsn) 1411 1408 ), 1412 1409 TP_fast_assign( 1413 1410 __entry->dev = log->l_mp->m_super->s_dev; 1414 1411 __entry->new_lsn = new_lsn; 1415 1412 __entry->old_lsn = atomic64_read(&log->l_tail_lsn); 1416 - __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); 1413 + __entry->head_lsn = log->l_ailp->ail_head_lsn; 1417 1414 ), 1418 - TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d", 1415 + TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, head lsn %d/%d", 1419 1416 MAJOR(__entry->dev), MINOR(__entry->dev), 1420 1417 CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), 1421 1418 CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), 1422 - CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn)) 1419 + CYCLE_LSN(__entry->head_lsn), BLOCK_LSN(__entry->head_lsn)) 1423 1420 ) 1424 1421 1425 1422 DECLARE_EVENT_CLASS(xfs_file_class, ··· 2463 2460 DEFINE_DISCARD_EVENT(xfs_discard_exclude); 2464 2461 DEFINE_DISCARD_EVENT(xfs_discard_busy); 2465 2462 2463 + DECLARE_EVENT_CLASS(xfs_rtdiscard_class, 2464 + TP_PROTO(struct xfs_mount *mp, 2465 + xfs_rtblock_t rtbno, xfs_rtblock_t len), 2466 + TP_ARGS(mp, rtbno, len), 2467 + TP_STRUCT__entry( 2468 + __field(dev_t, dev) 2469 + __field(xfs_rtblock_t, rtbno) 2470 + __field(xfs_rtblock_t, len) 2471 + ), 2472 + TP_fast_assign( 2473 + __entry->dev = mp->m_rtdev_targp->bt_dev; 2474 + __entry->rtbno = rtbno; 2475 + __entry->len = len; 2476 + ), 2477 + TP_printk("dev %d:%d rtbno 0x%llx rtbcount 0x%llx", 2478 + MAJOR(__entry->dev), MINOR(__entry->dev), 2479 + __entry->rtbno, 2480 + __entry->len) 2481 + ) 2482 + 2483 + #define DEFINE_RTDISCARD_EVENT(name) \ 2484 + DEFINE_EVENT(xfs_rtdiscard_class, name, \ 2485 + TP_PROTO(struct xfs_mount *mp, \ 2486 + xfs_rtblock_t rtbno, xfs_rtblock_t len), \ 2487 + TP_ARGS(mp, rtbno, len)) 2488 + DEFINE_RTDISCARD_EVENT(xfs_discard_rtextent); 2489 + DEFINE_RTDISCARD_EVENT(xfs_discard_rttoosmall); 2490 + DEFINE_RTDISCARD_EVENT(xfs_discard_rtrelax); 2491 + 2466 2492 DECLARE_EVENT_CLASS(xfs_btree_cur_class, 2467 2493 TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp), 2468 2494 TP_ARGS(cur, level, bp), ··· 2713 2681 DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause); 2714 2682 2715 2683 DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class, 2716 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2717 - int type, xfs_agblock_t agbno, xfs_extlen_t len), 2718 - TP_ARGS(mp, agno, type, agbno, len), 2684 + TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), 2685 + TP_ARGS(mp, free), 2719 2686 TP_STRUCT__entry( 2720 2687 __field(dev_t, dev) 2721 2688 __field(xfs_agnumber_t, agno) 2722 - __field(int, type) 2723 2689 __field(xfs_agblock_t, agbno) 2724 2690 __field(xfs_extlen_t, len) 2691 + __field(unsigned int, flags) 2725 2692 ), 2726 2693 TP_fast_assign( 2727 2694 __entry->dev = mp->m_super->s_dev; 2728 - __entry->agno = agno; 2729 - __entry->type = type; 2730 - __entry->agbno = agbno; 2731 - __entry->len = len; 2695 + __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); 2696 + __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); 2697 + __entry->len = free->xefi_blockcount; 2698 + __entry->flags = free->xefi_flags; 2732 2699 ), 2733 - TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x", 2700 + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x", 2734 2701 MAJOR(__entry->dev), MINOR(__entry->dev), 2735 - __entry->type, 2736 2702 __entry->agno, 2737 2703 __entry->agbno, 2738 - __entry->len) 2704 + __entry->len, 2705 + __entry->flags) 2739 2706 ); 2740 2707 #define DEFINE_FREE_EXTENT_DEFERRED_EVENT(name) \ 2741 2708 DEFINE_EVENT(xfs_free_extent_deferred_class, name, \ 2742 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2743 - int type, \ 2744 - xfs_agblock_t bno, \ 2745 - xfs_extlen_t len), \ 2746 - TP_ARGS(mp, agno, type, bno, len)) 2747 - DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_defer); 2748 - DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_deferred); 2709 + TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \ 2710 + TP_ARGS(mp, free)) 2749 2711 DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer); 2750 2712 DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred); 2713 + DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); 2714 + DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); 2751 2715 2752 2716 DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, 2753 2717 TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, ··· 2788 2760 2789 2761 /* rmap tracepoints */ 2790 2762 DECLARE_EVENT_CLASS(xfs_rmap_class, 2791 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2763 + TP_PROTO(struct xfs_btree_cur *cur, 2792 2764 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, 2793 2765 const struct xfs_owner_info *oinfo), 2794 - TP_ARGS(mp, agno, agbno, len, unwritten, oinfo), 2766 + TP_ARGS(cur, agbno, len, unwritten, oinfo), 2795 2767 TP_STRUCT__entry( 2796 2768 __field(dev_t, dev) 2797 2769 __field(xfs_agnumber_t, agno) ··· 2802 2774 __field(unsigned long, flags) 2803 2775 ), 2804 2776 TP_fast_assign( 2805 - __entry->dev = mp->m_super->s_dev; 2806 - __entry->agno = agno; 2777 + __entry->dev = cur->bc_mp->m_super->s_dev; 2778 + __entry->agno = cur->bc_ag.pag->pag_agno; 2807 2779 __entry->agbno = agbno; 2808 2780 __entry->len = len; 2809 2781 __entry->owner = oinfo->oi_owner; ··· 2823 2795 ); 2824 2796 #define DEFINE_RMAP_EVENT(name) \ 2825 2797 DEFINE_EVENT(xfs_rmap_class, name, \ 2826 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2798 + TP_PROTO(struct xfs_btree_cur *cur, \ 2827 2799 xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \ 2828 2800 const struct xfs_owner_info *oinfo), \ 2829 - TP_ARGS(mp, agno, agbno, len, unwritten, oinfo)) 2801 + TP_ARGS(cur, agbno, len, unwritten, oinfo)) 2830 2802 2831 - /* simple AG-based error/%ip tracepoint class */ 2832 - DECLARE_EVENT_CLASS(xfs_ag_error_class, 2833 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, 2803 + /* btree cursor error/%ip tracepoint class */ 2804 + DECLARE_EVENT_CLASS(xfs_btree_error_class, 2805 + TP_PROTO(struct xfs_btree_cur *cur, int error, 2834 2806 unsigned long caller_ip), 2835 - TP_ARGS(mp, agno, error, caller_ip), 2807 + TP_ARGS(cur, error, caller_ip), 2836 2808 TP_STRUCT__entry( 2837 2809 __field(dev_t, dev) 2838 2810 __field(xfs_agnumber_t, agno) 2811 + __field(xfs_ino_t, ino) 2839 2812 __field(int, error) 2840 2813 __field(unsigned long, caller_ip) 2841 2814 ), 2842 2815 TP_fast_assign( 2843 - __entry->dev = mp->m_super->s_dev; 2844 - __entry->agno = agno; 2816 + __entry->dev = cur->bc_mp->m_super->s_dev; 2817 + switch (cur->bc_ops->type) { 2818 + case XFS_BTREE_TYPE_INODE: 2819 + __entry->agno = 0; 2820 + __entry->ino = cur->bc_ino.ip->i_ino; 2821 + break; 2822 + case XFS_BTREE_TYPE_AG: 2823 + __entry->agno = cur->bc_ag.pag->pag_agno; 2824 + __entry->ino = 0; 2825 + break; 2826 + case XFS_BTREE_TYPE_MEM: 2827 + __entry->agno = 0; 2828 + __entry->ino = 0; 2829 + break; 2830 + } 2845 2831 __entry->error = error; 2846 2832 __entry->caller_ip = caller_ip; 2847 2833 ), 2848 - TP_printk("dev %d:%d agno 0x%x error %d caller %pS", 2834 + TP_printk("dev %d:%d agno 0x%x ino 0x%llx error %d caller %pS", 2849 2835 MAJOR(__entry->dev), MINOR(__entry->dev), 2850 2836 __entry->agno, 2837 + __entry->ino, 2851 2838 __entry->error, 2852 2839 (char *)__entry->caller_ip) 2853 2840 ); 2854 2841 2855 - #define DEFINE_AG_ERROR_EVENT(name) \ 2856 - DEFINE_EVENT(xfs_ag_error_class, name, \ 2857 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \ 2842 + #define DEFINE_BTREE_ERROR_EVENT(name) \ 2843 + DEFINE_EVENT(xfs_btree_error_class, name, \ 2844 + TP_PROTO(struct xfs_btree_cur *cur, int error, \ 2858 2845 unsigned long caller_ip), \ 2859 - TP_ARGS(mp, agno, error, caller_ip)) 2846 + TP_ARGS(cur, error, caller_ip)) 2860 2847 2861 2848 DEFINE_RMAP_EVENT(xfs_rmap_unmap); 2862 2849 DEFINE_RMAP_EVENT(xfs_rmap_unmap_done); 2863 - DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error); 2850 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_unmap_error); 2864 2851 DEFINE_RMAP_EVENT(xfs_rmap_map); 2865 2852 DEFINE_RMAP_EVENT(xfs_rmap_map_done); 2866 - DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error); 2853 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_map_error); 2867 2854 DEFINE_RMAP_EVENT(xfs_rmap_convert); 2868 2855 DEFINE_RMAP_EVENT(xfs_rmap_convert_done); 2869 - DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_error); 2870 - DEFINE_AG_ERROR_EVENT(xfs_rmap_convert_state); 2856 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_convert_error); 2857 + 2858 + TRACE_EVENT(xfs_rmap_convert_state, 2859 + TP_PROTO(struct xfs_btree_cur *cur, int state, 2860 + unsigned long caller_ip), 2861 + TP_ARGS(cur, state, caller_ip), 2862 + TP_STRUCT__entry( 2863 + __field(dev_t, dev) 2864 + __field(xfs_agnumber_t, agno) 2865 + __field(xfs_ino_t, ino) 2866 + __field(int, state) 2867 + __field(unsigned long, caller_ip) 2868 + ), 2869 + TP_fast_assign( 2870 + __entry->dev = cur->bc_mp->m_super->s_dev; 2871 + switch (cur->bc_ops->type) { 2872 + case XFS_BTREE_TYPE_INODE: 2873 + __entry->agno = 0; 2874 + __entry->ino = cur->bc_ino.ip->i_ino; 2875 + break; 2876 + case XFS_BTREE_TYPE_AG: 2877 + __entry->agno = cur->bc_ag.pag->pag_agno; 2878 + __entry->ino = 0; 2879 + break; 2880 + case XFS_BTREE_TYPE_MEM: 2881 + __entry->agno = 0; 2882 + __entry->ino = 0; 2883 + break; 2884 + } 2885 + __entry->state = state; 2886 + __entry->caller_ip = caller_ip; 2887 + ), 2888 + TP_printk("dev %d:%d agno 0x%x ino 0x%llx state %d caller %pS", 2889 + MAJOR(__entry->dev), MINOR(__entry->dev), 2890 + __entry->agno, 2891 + __entry->ino, 2892 + __entry->state, 2893 + (char *)__entry->caller_ip) 2894 + ); 2871 2895 2872 2896 DECLARE_EVENT_CLASS(xfs_rmapbt_class, 2873 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2897 + TP_PROTO(struct xfs_btree_cur *cur, 2874 2898 xfs_agblock_t agbno, xfs_extlen_t len, 2875 2899 uint64_t owner, uint64_t offset, unsigned int flags), 2876 - TP_ARGS(mp, agno, agbno, len, owner, offset, flags), 2900 + TP_ARGS(cur, agbno, len, owner, offset, flags), 2877 2901 TP_STRUCT__entry( 2878 2902 __field(dev_t, dev) 2879 2903 __field(xfs_agnumber_t, agno) ··· 2936 2856 __field(unsigned int, flags) 2937 2857 ), 2938 2858 TP_fast_assign( 2939 - __entry->dev = mp->m_super->s_dev; 2940 - __entry->agno = agno; 2859 + __entry->dev = cur->bc_mp->m_super->s_dev; 2860 + __entry->agno = cur->bc_ag.pag->pag_agno; 2941 2861 __entry->agbno = agbno; 2942 2862 __entry->len = len; 2943 2863 __entry->owner = owner; ··· 2955 2875 ); 2956 2876 #define DEFINE_RMAPBT_EVENT(name) \ 2957 2877 DEFINE_EVENT(xfs_rmapbt_class, name, \ 2958 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 2878 + TP_PROTO(struct xfs_btree_cur *cur, \ 2959 2879 xfs_agblock_t agbno, xfs_extlen_t len, \ 2960 2880 uint64_t owner, uint64_t offset, unsigned int flags), \ 2961 - TP_ARGS(mp, agno, agbno, len, owner, offset, flags)) 2881 + TP_ARGS(cur, agbno, len, owner, offset, flags)) 2882 + 2883 + TRACE_DEFINE_ENUM(XFS_RMAP_MAP); 2884 + TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED); 2885 + TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP); 2886 + TRACE_DEFINE_ENUM(XFS_RMAP_UNMAP_SHARED); 2887 + TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT); 2888 + TRACE_DEFINE_ENUM(XFS_RMAP_CONVERT_SHARED); 2889 + TRACE_DEFINE_ENUM(XFS_RMAP_ALLOC); 2890 + TRACE_DEFINE_ENUM(XFS_RMAP_FREE); 2962 2891 2963 2892 DECLARE_EVENT_CLASS(xfs_rmap_deferred_class, 2964 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 2965 - int op, 2966 - xfs_agblock_t agbno, 2967 - xfs_ino_t ino, 2968 - int whichfork, 2969 - xfs_fileoff_t offset, 2970 - xfs_filblks_t len, 2971 - xfs_exntst_t state), 2972 - TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state), 2893 + TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri), 2894 + TP_ARGS(mp, ri), 2973 2895 TP_STRUCT__entry( 2974 2896 __field(dev_t, dev) 2897 + __field(unsigned long long, owner) 2975 2898 __field(xfs_agnumber_t, agno) 2976 - __field(xfs_ino_t, ino) 2977 2899 __field(xfs_agblock_t, agbno) 2978 2900 __field(int, whichfork) 2979 2901 __field(xfs_fileoff_t, l_loff) ··· 2985 2903 ), 2986 2904 TP_fast_assign( 2987 2905 __entry->dev = mp->m_super->s_dev; 2988 - __entry->agno = agno; 2989 - __entry->ino = ino; 2990 - __entry->agbno = agbno; 2991 - __entry->whichfork = whichfork; 2992 - __entry->l_loff = offset; 2993 - __entry->l_len = len; 2994 - __entry->l_state = state; 2995 - __entry->op = op; 2906 + __entry->agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock); 2907 + __entry->agbno = XFS_FSB_TO_AGBNO(mp, 2908 + ri->ri_bmap.br_startblock); 2909 + __entry->owner = ri->ri_owner; 2910 + __entry->whichfork = ri->ri_whichfork; 2911 + __entry->l_loff = ri->ri_bmap.br_startoff; 2912 + __entry->l_len = ri->ri_bmap.br_blockcount; 2913 + __entry->l_state = ri->ri_bmap.br_state; 2914 + __entry->op = ri->ri_type; 2996 2915 ), 2997 - TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d", 2916 + TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d", 2998 2917 MAJOR(__entry->dev), MINOR(__entry->dev), 2999 - __entry->op, 2918 + __print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS), 3000 2919 __entry->agno, 3001 2920 __entry->agbno, 3002 - __entry->ino, 2921 + __entry->owner, 3003 2922 __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), 3004 2923 __entry->l_loff, 3005 2924 __entry->l_len, ··· 3008 2925 ); 3009 2926 #define DEFINE_RMAP_DEFERRED_EVENT(name) \ 3010 2927 DEFINE_EVENT(xfs_rmap_deferred_class, name, \ 3011 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3012 - int op, \ 3013 - xfs_agblock_t agbno, \ 3014 - xfs_ino_t ino, \ 3015 - int whichfork, \ 3016 - xfs_fileoff_t offset, \ 3017 - xfs_filblks_t len, \ 3018 - xfs_exntst_t state), \ 3019 - TP_ARGS(mp, agno, op, agbno, ino, whichfork, offset, len, state)) 2928 + TP_PROTO(struct xfs_mount *mp, struct xfs_rmap_intent *ri), \ 2929 + TP_ARGS(mp, ri)) 3020 2930 DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_defer); 3021 2931 DEFINE_RMAP_DEFERRED_EVENT(xfs_rmap_deferred); 3022 2932 3023 2933 DEFINE_RMAPBT_EVENT(xfs_rmap_update); 3024 2934 DEFINE_RMAPBT_EVENT(xfs_rmap_insert); 3025 2935 DEFINE_RMAPBT_EVENT(xfs_rmap_delete); 3026 - DEFINE_AG_ERROR_EVENT(xfs_rmap_insert_error); 3027 - DEFINE_AG_ERROR_EVENT(xfs_rmap_delete_error); 3028 - DEFINE_AG_ERROR_EVENT(xfs_rmap_update_error); 2936 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_insert_error); 2937 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_delete_error); 2938 + DEFINE_BTREE_ERROR_EVENT(xfs_rmap_update_error); 3029 2939 3030 2940 DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_candidate); 3031 2941 DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_query); ··· 3144 3068 DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical); 3145 3069 DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed); 3146 3070 3071 + /* simple AG-based error/%ip tracepoint class */ 3072 + DECLARE_EVENT_CLASS(xfs_ag_error_class, 3073 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, 3074 + unsigned long caller_ip), 3075 + TP_ARGS(mp, agno, error, caller_ip), 3076 + TP_STRUCT__entry( 3077 + __field(dev_t, dev) 3078 + __field(xfs_agnumber_t, agno) 3079 + __field(int, error) 3080 + __field(unsigned long, caller_ip) 3081 + ), 3082 + TP_fast_assign( 3083 + __entry->dev = mp->m_super->s_dev; 3084 + __entry->agno = agno; 3085 + __entry->error = error; 3086 + __entry->caller_ip = caller_ip; 3087 + ), 3088 + TP_printk("dev %d:%d agno 0x%x error %d caller %pS", 3089 + MAJOR(__entry->dev), MINOR(__entry->dev), 3090 + __entry->agno, 3091 + __entry->error, 3092 + (char *)__entry->caller_ip) 3093 + ); 3094 + 3095 + #define DEFINE_AG_ERROR_EVENT(name) \ 3096 + DEFINE_EVENT(xfs_ag_error_class, name, \ 3097 + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \ 3098 + unsigned long caller_ip), \ 3099 + TP_ARGS(mp, agno, error, caller_ip)) 3147 3100 DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error); 3148 3101 3149 3102 /* refcount tracepoint classes */ 3150 3103 3151 - /* reuse the discard trace class for agbno/aglen-based traces */ 3152 - #define DEFINE_AG_EXTENT_EVENT(name) DEFINE_DISCARD_EVENT(name) 3104 + DECLARE_EVENT_CLASS(xfs_refcount_class, 3105 + TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, 3106 + xfs_extlen_t len), 3107 + TP_ARGS(cur, agbno, len), 3108 + TP_STRUCT__entry( 3109 + __field(dev_t, dev) 3110 + __field(xfs_agnumber_t, agno) 3111 + __field(xfs_agblock_t, agbno) 3112 + __field(xfs_extlen_t, len) 3113 + ), 3114 + TP_fast_assign( 3115 + __entry->dev = cur->bc_mp->m_super->s_dev; 3116 + __entry->agno = cur->bc_ag.pag->pag_agno; 3117 + __entry->agbno = agbno; 3118 + __entry->len = len; 3119 + ), 3120 + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", 3121 + MAJOR(__entry->dev), MINOR(__entry->dev), 3122 + __entry->agno, 3123 + __entry->agbno, 3124 + __entry->len) 3125 + ); 3126 + #define DEFINE_REFCOUNT_EVENT(name) \ 3127 + DEFINE_EVENT(xfs_refcount_class, name, \ 3128 + TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, \ 3129 + xfs_extlen_t len), \ 3130 + TP_ARGS(cur, agbno, len)) 3153 3131 3154 - /* ag btree lookup tracepoint class */ 3155 3132 TRACE_DEFINE_ENUM(XFS_LOOKUP_EQi); 3156 3133 TRACE_DEFINE_ENUM(XFS_LOOKUP_LEi); 3157 3134 TRACE_DEFINE_ENUM(XFS_LOOKUP_GEi); 3158 - DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class, 3159 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3160 - xfs_agblock_t agbno, xfs_lookup_t dir), 3161 - TP_ARGS(mp, agno, agbno, dir), 3135 + TRACE_EVENT(xfs_refcount_lookup, 3136 + TP_PROTO(struct xfs_btree_cur *cur, xfs_agblock_t agbno, 3137 + xfs_lookup_t dir), 3138 + TP_ARGS(cur, agbno, dir), 3162 3139 TP_STRUCT__entry( 3163 3140 __field(dev_t, dev) 3164 3141 __field(xfs_agnumber_t, agno) ··· 3219 3090 __field(xfs_lookup_t, dir) 3220 3091 ), 3221 3092 TP_fast_assign( 3222 - __entry->dev = mp->m_super->s_dev; 3223 - __entry->agno = agno; 3093 + __entry->dev = cur->bc_mp->m_super->s_dev; 3094 + __entry->agno = cur->bc_ag.pag->pag_agno; 3224 3095 __entry->agbno = agbno; 3225 3096 __entry->dir = dir; 3226 3097 ), ··· 3232 3103 __entry->dir) 3233 3104 ) 3234 3105 3235 - #define DEFINE_AG_BTREE_LOOKUP_EVENT(name) \ 3236 - DEFINE_EVENT(xfs_ag_btree_lookup_class, name, \ 3237 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3238 - xfs_agblock_t agbno, xfs_lookup_t dir), \ 3239 - TP_ARGS(mp, agno, agbno, dir)) 3240 - 3241 3106 /* single-rcext tracepoint class */ 3242 3107 DECLARE_EVENT_CLASS(xfs_refcount_extent_class, 3243 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3244 - struct xfs_refcount_irec *irec), 3245 - TP_ARGS(mp, agno, irec), 3108 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec), 3109 + TP_ARGS(cur, irec), 3246 3110 TP_STRUCT__entry( 3247 3111 __field(dev_t, dev) 3248 3112 __field(xfs_agnumber_t, agno) ··· 3245 3123 __field(xfs_nlink_t, refcount) 3246 3124 ), 3247 3125 TP_fast_assign( 3248 - __entry->dev = mp->m_super->s_dev; 3249 - __entry->agno = agno; 3126 + __entry->dev = cur->bc_mp->m_super->s_dev; 3127 + __entry->agno = cur->bc_ag.pag->pag_agno; 3250 3128 __entry->domain = irec->rc_domain; 3251 3129 __entry->startblock = irec->rc_startblock; 3252 3130 __entry->blockcount = irec->rc_blockcount; ··· 3263 3141 3264 3142 #define DEFINE_REFCOUNT_EXTENT_EVENT(name) \ 3265 3143 DEFINE_EVENT(xfs_refcount_extent_class, name, \ 3266 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3267 - struct xfs_refcount_irec *irec), \ 3268 - TP_ARGS(mp, agno, irec)) 3144 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec), \ 3145 + TP_ARGS(cur, irec)) 3269 3146 3270 3147 /* single-rcext and an agbno tracepoint class */ 3271 3148 DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, 3272 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3273 - struct xfs_refcount_irec *irec, xfs_agblock_t agbno), 3274 - TP_ARGS(mp, agno, irec, agbno), 3149 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, 3150 + xfs_agblock_t agbno), 3151 + TP_ARGS(cur, irec, agbno), 3275 3152 TP_STRUCT__entry( 3276 3153 __field(dev_t, dev) 3277 3154 __field(xfs_agnumber_t, agno) ··· 3281 3160 __field(xfs_agblock_t, agbno) 3282 3161 ), 3283 3162 TP_fast_assign( 3284 - __entry->dev = mp->m_super->s_dev; 3285 - __entry->agno = agno; 3163 + __entry->dev = cur->bc_mp->m_super->s_dev; 3164 + __entry->agno = cur->bc_ag.pag->pag_agno; 3286 3165 __entry->domain = irec->rc_domain; 3287 3166 __entry->startblock = irec->rc_startblock; 3288 3167 __entry->blockcount = irec->rc_blockcount; ··· 3301 3180 3302 3181 #define DEFINE_REFCOUNT_EXTENT_AT_EVENT(name) \ 3303 3182 DEFINE_EVENT(xfs_refcount_extent_at_class, name, \ 3304 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3305 - struct xfs_refcount_irec *irec, xfs_agblock_t agbno), \ 3306 - TP_ARGS(mp, agno, irec, agbno)) 3183 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, \ 3184 + xfs_agblock_t agbno), \ 3185 + TP_ARGS(cur, irec, agbno)) 3307 3186 3308 3187 /* double-rcext tracepoint class */ 3309 3188 DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, 3310 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3311 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), 3312 - TP_ARGS(mp, agno, i1, i2), 3189 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, 3190 + struct xfs_refcount_irec *i2), 3191 + TP_ARGS(cur, i1, i2), 3313 3192 TP_STRUCT__entry( 3314 3193 __field(dev_t, dev) 3315 3194 __field(xfs_agnumber_t, agno) ··· 3323 3202 __field(xfs_nlink_t, i2_refcount) 3324 3203 ), 3325 3204 TP_fast_assign( 3326 - __entry->dev = mp->m_super->s_dev; 3327 - __entry->agno = agno; 3205 + __entry->dev = cur->bc_mp->m_super->s_dev; 3206 + __entry->agno = cur->bc_ag.pag->pag_agno; 3328 3207 __entry->i1_domain = i1->rc_domain; 3329 3208 __entry->i1_startblock = i1->rc_startblock; 3330 3209 __entry->i1_blockcount = i1->rc_blockcount; ··· 3350 3229 3351 3230 #define DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(name) \ 3352 3231 DEFINE_EVENT(xfs_refcount_double_extent_class, name, \ 3353 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3354 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2), \ 3355 - TP_ARGS(mp, agno, i1, i2)) 3232 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ 3233 + struct xfs_refcount_irec *i2), \ 3234 + TP_ARGS(cur, i1, i2)) 3356 3235 3357 3236 /* double-rcext and an agbno tracepoint class */ 3358 3237 DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, 3359 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3360 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, 3361 - xfs_agblock_t agbno), 3362 - TP_ARGS(mp, agno, i1, i2, agbno), 3238 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, 3239 + struct xfs_refcount_irec *i2, xfs_agblock_t agbno), 3240 + TP_ARGS(cur, i1, i2, agbno), 3363 3241 TP_STRUCT__entry( 3364 3242 __field(dev_t, dev) 3365 3243 __field(xfs_agnumber_t, agno) ··· 3373 3253 __field(xfs_agblock_t, agbno) 3374 3254 ), 3375 3255 TP_fast_assign( 3376 - __entry->dev = mp->m_super->s_dev; 3377 - __entry->agno = agno; 3256 + __entry->dev = cur->bc_mp->m_super->s_dev; 3257 + __entry->agno = cur->bc_ag.pag->pag_agno; 3378 3258 __entry->i1_domain = i1->rc_domain; 3379 3259 __entry->i1_startblock = i1->rc_startblock; 3380 3260 __entry->i1_blockcount = i1->rc_blockcount; ··· 3402 3282 3403 3283 #define DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(name) \ 3404 3284 DEFINE_EVENT(xfs_refcount_double_extent_at_class, name, \ 3405 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3406 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \ 3407 - xfs_agblock_t agbno), \ 3408 - TP_ARGS(mp, agno, i1, i2, agbno)) 3285 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ 3286 + struct xfs_refcount_irec *i2, xfs_agblock_t agbno), \ 3287 + TP_ARGS(cur, i1, i2, agbno)) 3409 3288 3410 3289 /* triple-rcext tracepoint class */ 3411 3290 DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, 3412 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3413 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, 3414 - struct xfs_refcount_irec *i3), 3415 - TP_ARGS(mp, agno, i1, i2, i3), 3291 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, 3292 + struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3), 3293 + TP_ARGS(cur, i1, i2, i3), 3416 3294 TP_STRUCT__entry( 3417 3295 __field(dev_t, dev) 3418 3296 __field(xfs_agnumber_t, agno) ··· 3428 3310 __field(xfs_nlink_t, i3_refcount) 3429 3311 ), 3430 3312 TP_fast_assign( 3431 - __entry->dev = mp->m_super->s_dev; 3432 - __entry->agno = agno; 3313 + __entry->dev = cur->bc_mp->m_super->s_dev; 3314 + __entry->agno = cur->bc_ag.pag->pag_agno; 3433 3315 __entry->i1_domain = i1->rc_domain; 3434 3316 __entry->i1_startblock = i1->rc_startblock; 3435 3317 __entry->i1_blockcount = i1->rc_blockcount; ··· 3464 3346 3465 3347 #define DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(name) \ 3466 3348 DEFINE_EVENT(xfs_refcount_triple_extent_class, name, \ 3467 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3468 - struct xfs_refcount_irec *i1, struct xfs_refcount_irec *i2, \ 3469 - struct xfs_refcount_irec *i3), \ 3470 - TP_ARGS(mp, agno, i1, i2, i3)) 3349 + TP_PROTO(struct xfs_btree_cur *cur, struct xfs_refcount_irec *i1, \ 3350 + struct xfs_refcount_irec *i2, struct xfs_refcount_irec *i3), \ 3351 + TP_ARGS(cur, i1, i2, i3)) 3471 3352 3472 3353 /* refcount btree tracepoints */ 3473 - DEFINE_AG_BTREE_LOOKUP_EVENT(xfs_refcount_lookup); 3474 3354 DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_get); 3475 3355 DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_update); 3476 3356 DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_insert); 3477 3357 DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_delete); 3478 - DEFINE_AG_ERROR_EVENT(xfs_refcount_insert_error); 3479 - DEFINE_AG_ERROR_EVENT(xfs_refcount_delete_error); 3480 - DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error); 3358 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_insert_error); 3359 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_delete_error); 3360 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_update_error); 3481 3361 3482 3362 /* refcount adjustment tracepoints */ 3483 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase); 3484 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease); 3485 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase); 3486 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease); 3363 + DEFINE_REFCOUNT_EVENT(xfs_refcount_increase); 3364 + DEFINE_REFCOUNT_EVENT(xfs_refcount_decrease); 3365 + DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_increase); 3366 + DEFINE_REFCOUNT_EVENT(xfs_refcount_cow_decrease); 3487 3367 DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents); 3488 3368 DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent); 3489 - DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_recover_extent); 3490 3369 DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent); 3491 3370 DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent); 3492 3371 DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent); 3493 3372 DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent); 3494 3373 DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent); 3495 - DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error); 3496 - DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error); 3497 - DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error); 3498 - DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error); 3499 - DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error); 3500 - DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_left_extent_error); 3501 - DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_right_extent_error); 3502 - DEFINE_AG_ERROR_EVENT(xfs_refcount_find_left_extent_error); 3503 - DEFINE_AG_ERROR_EVENT(xfs_refcount_find_right_extent_error); 3374 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_error); 3375 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_adjust_cow_error); 3376 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_center_extents_error); 3377 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_modify_extent_error); 3378 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_split_extent_error); 3379 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_left_extent_error); 3380 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_merge_right_extent_error); 3381 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_left_extent_error); 3382 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_right_extent_error); 3504 3383 3505 3384 /* reflink helpers */ 3506 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared); 3507 - DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared_result); 3508 - DEFINE_AG_ERROR_EVENT(xfs_refcount_find_shared_error); 3385 + DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared); 3386 + DEFINE_REFCOUNT_EVENT(xfs_refcount_find_shared_result); 3387 + DEFINE_BTREE_ERROR_EVENT(xfs_refcount_find_shared_error); 3388 + 3389 + TRACE_DEFINE_ENUM(XFS_REFCOUNT_INCREASE); 3390 + TRACE_DEFINE_ENUM(XFS_REFCOUNT_DECREASE); 3391 + TRACE_DEFINE_ENUM(XFS_REFCOUNT_ALLOC_COW); 3392 + TRACE_DEFINE_ENUM(XFS_REFCOUNT_FREE_COW); 3509 3393 3510 3394 DECLARE_EVENT_CLASS(xfs_refcount_deferred_class, 3511 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3512 - int type, xfs_agblock_t agbno, xfs_extlen_t len), 3513 - TP_ARGS(mp, agno, type, agbno, len), 3395 + TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc), 3396 + TP_ARGS(mp, refc), 3514 3397 TP_STRUCT__entry( 3515 3398 __field(dev_t, dev) 3516 3399 __field(xfs_agnumber_t, agno) 3517 - __field(int, type) 3400 + __field(int, op) 3518 3401 __field(xfs_agblock_t, agbno) 3519 3402 __field(xfs_extlen_t, len) 3520 3403 ), 3521 3404 TP_fast_assign( 3522 3405 __entry->dev = mp->m_super->s_dev; 3523 - __entry->agno = agno; 3524 - __entry->type = type; 3525 - __entry->agbno = agbno; 3526 - __entry->len = len; 3406 + __entry->agno = XFS_FSB_TO_AGNO(mp, refc->ri_startblock); 3407 + __entry->op = refc->ri_type; 3408 + __entry->agbno = XFS_FSB_TO_AGBNO(mp, refc->ri_startblock); 3409 + __entry->len = refc->ri_blockcount; 3527 3410 ), 3528 - TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x", 3411 + TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x fsbcount 0x%x", 3529 3412 MAJOR(__entry->dev), MINOR(__entry->dev), 3530 - __entry->type, 3413 + __print_symbolic(__entry->op, XFS_REFCOUNT_INTENT_STRINGS), 3531 3414 __entry->agno, 3532 3415 __entry->agbno, 3533 3416 __entry->len) 3534 3417 ); 3535 3418 #define DEFINE_REFCOUNT_DEFERRED_EVENT(name) \ 3536 3419 DEFINE_EVENT(xfs_refcount_deferred_class, name, \ 3537 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ 3538 - int type, \ 3539 - xfs_agblock_t bno, \ 3540 - xfs_extlen_t len), \ 3541 - TP_ARGS(mp, agno, type, bno, len)) 3420 + TP_PROTO(struct xfs_mount *mp, struct xfs_refcount_intent *refc), \ 3421 + TP_ARGS(mp, refc)) 3542 3422 DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_defer); 3543 3423 DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred); 3544 - 3545 - TRACE_EVENT(xfs_refcount_finish_one_leftover, 3546 - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 3547 - int type, xfs_agblock_t agbno, xfs_extlen_t len), 3548 - TP_ARGS(mp, agno, type, agbno, len), 3549 - TP_STRUCT__entry( 3550 - __field(dev_t, dev) 3551 - __field(xfs_agnumber_t, agno) 3552 - __field(int, type) 3553 - __field(xfs_agblock_t, agbno) 3554 - __field(xfs_extlen_t, len) 3555 - ), 3556 - TP_fast_assign( 3557 - __entry->dev = mp->m_super->s_dev; 3558 - __entry->agno = agno; 3559 - __entry->type = type; 3560 - __entry->agbno = agbno; 3561 - __entry->len = len; 3562 - ), 3563 - TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x", 3564 - MAJOR(__entry->dev), MINOR(__entry->dev), 3565 - __entry->type, 3566 - __entry->agno, 3567 - __entry->agbno, 3568 - __entry->len) 3569 - ); 3424 + DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_finish_one_leftover); 3570 3425 3571 3426 /* simple inode-based error/%ip tracepoint class */ 3572 3427 DECLARE_EVENT_CLASS(xfs_inode_error_class,

-129

fs/xfs/xfs_trans.c

··· 725 725 } 726 726 } 727 727 728 - static inline void 729 - xfs_log_item_batch_insert( 730 - struct xfs_ail *ailp, 731 - struct xfs_ail_cursor *cur, 732 - struct xfs_log_item **log_items, 733 - int nr_items, 734 - xfs_lsn_t commit_lsn) 735 - { 736 - int i; 737 - 738 - spin_lock(&ailp->ail_lock); 739 - /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ 740 - xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); 741 - 742 - for (i = 0; i < nr_items; i++) { 743 - struct xfs_log_item *lip = log_items[i]; 744 - 745 - if (lip->li_ops->iop_unpin) 746 - lip->li_ops->iop_unpin(lip, 0); 747 - } 748 - } 749 - 750 - /* 751 - * Bulk operation version of xfs_trans_committed that takes a log vector of 752 - * items to insert into the AIL. This uses bulk AIL insertion techniques to 753 - * minimise lock traffic. 754 - * 755 - * If we are called with the aborted flag set, it is because a log write during 756 - * a CIL checkpoint commit has failed. In this case, all the items in the 757 - * checkpoint have already gone through iop_committed and iop_committing, which 758 - * means that checkpoint commit abort handling is treated exactly the same 759 - * as an iclog write error even though we haven't started any IO yet. Hence in 760 - * this case all we need to do is iop_committed processing, followed by an 761 - * iop_unpin(aborted) call. 762 - * 763 - * The AIL cursor is used to optimise the insert process. If commit_lsn is not 764 - * at the end of the AIL, the insert cursor avoids the need to walk 765 - * the AIL to find the insertion point on every xfs_log_item_batch_insert() 766 - * call. This saves a lot of needless list walking and is a net win, even 767 - * though it slightly increases that amount of AIL lock traffic to set it up 768 - * and tear it down. 769 - */ 770 - void 771 - xfs_trans_committed_bulk( 772 - struct xfs_ail *ailp, 773 - struct list_head *lv_chain, 774 - xfs_lsn_t commit_lsn, 775 - bool aborted) 776 - { 777 - #define LOG_ITEM_BATCH_SIZE 32 778 - struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 779 - struct xfs_log_vec *lv; 780 - struct xfs_ail_cursor cur; 781 - int i = 0; 782 - 783 - spin_lock(&ailp->ail_lock); 784 - xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); 785 - spin_unlock(&ailp->ail_lock); 786 - 787 - /* unpin all the log items */ 788 - list_for_each_entry(lv, lv_chain, lv_list) { 789 - struct xfs_log_item *lip = lv->lv_item; 790 - xfs_lsn_t item_lsn; 791 - 792 - if (aborted) 793 - set_bit(XFS_LI_ABORTED, &lip->li_flags); 794 - 795 - if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 796 - lip->li_ops->iop_release(lip); 797 - continue; 798 - } 799 - 800 - if (lip->li_ops->iop_committed) 801 - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 802 - else 803 - item_lsn = commit_lsn; 804 - 805 - /* item_lsn of -1 means the item needs no further processing */ 806 - if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 807 - continue; 808 - 809 - /* 810 - * if we are aborting the operation, no point in inserting the 811 - * object into the AIL as we are in a shutdown situation. 812 - */ 813 - if (aborted) { 814 - ASSERT(xlog_is_shutdown(ailp->ail_log)); 815 - if (lip->li_ops->iop_unpin) 816 - lip->li_ops->iop_unpin(lip, 1); 817 - continue; 818 - } 819 - 820 - if (item_lsn != commit_lsn) { 821 - 822 - /* 823 - * Not a bulk update option due to unusual item_lsn. 824 - * Push into AIL immediately, rechecking the lsn once 825 - * we have the ail lock. Then unpin the item. This does 826 - * not affect the AIL cursor the bulk insert path is 827 - * using. 828 - */ 829 - spin_lock(&ailp->ail_lock); 830 - if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) 831 - xfs_trans_ail_update(ailp, lip, item_lsn); 832 - else 833 - spin_unlock(&ailp->ail_lock); 834 - if (lip->li_ops->iop_unpin) 835 - lip->li_ops->iop_unpin(lip, 0); 836 - continue; 837 - } 838 - 839 - /* Item is a candidate for bulk AIL insert. */ 840 - log_items[i++] = lv->lv_item; 841 - if (i >= LOG_ITEM_BATCH_SIZE) { 842 - xfs_log_item_batch_insert(ailp, &cur, log_items, 843 - LOG_ITEM_BATCH_SIZE, commit_lsn); 844 - i = 0; 845 - } 846 - } 847 - 848 - /* make sure we insert the remainder! */ 849 - if (i) 850 - xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); 851 - 852 - spin_lock(&ailp->ail_lock); 853 - xfs_trans_ail_cursor_done(&cur); 854 - spin_unlock(&ailp->ail_lock); 855 - } 856 - 857 728 /* 858 729 * Sort transaction items prior to running precommit operations. This will 859 730 * attempt to order the items such that they will always be locked in the same

+3 -2

fs/xfs/xfs_trans.h

··· 58 58 #define XFS_LI_FAILED 2 59 59 #define XFS_LI_DIRTY 3 60 60 #define XFS_LI_WHITEOUT 4 61 + #define XFS_LI_FLUSHING 5 61 62 62 63 #define XFS_LI_FLAGS \ 63 64 { (1u << XFS_LI_IN_AIL), "IN_AIL" }, \ 64 65 { (1u << XFS_LI_ABORTED), "ABORTED" }, \ 65 66 { (1u << XFS_LI_FAILED), "FAILED" }, \ 66 67 { (1u << XFS_LI_DIRTY), "DIRTY" }, \ 67 - { (1u << XFS_LI_WHITEOUT), "WHITEOUT" } 68 + { (1u << XFS_LI_WHITEOUT), "WHITEOUT" }, \ 69 + { (1u << XFS_LI_FLUSHING), "FLUSHING" } 68 70 69 71 struct xfs_item_ops { 70 72 unsigned flags; ··· 226 224 bool xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *); 227 225 void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 228 226 void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 229 - void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); 230 227 void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); 231 228 void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint, 232 229 uint);

+132 -112

fs/xfs/xfs_trans_ail.c

··· 135 135 } 136 136 137 137 /* 138 - * Return the maximum lsn held in the AIL, or zero if the AIL is empty. 139 - */ 140 - static xfs_lsn_t 141 - xfs_ail_max_lsn( 142 - struct xfs_ail *ailp) 143 - { 144 - xfs_lsn_t lsn = 0; 145 - struct xfs_log_item *lip; 146 - 147 - spin_lock(&ailp->ail_lock); 148 - lip = xfs_ail_max(ailp); 149 - if (lip) 150 - lsn = lip->li_lsn; 151 - spin_unlock(&ailp->ail_lock); 152 - 153 - return lsn; 154 - } 155 - 156 - /* 157 138 * The cursor keeps track of where our current traversal is up to by tracking 158 139 * the next item in the list for us. However, for this to be safe, removing an 159 140 * object from the AIL needs to invalidate any cursor that points to it. hence ··· 395 414 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); 396 415 } 397 416 417 + /* 418 + * Compute the LSN that we'd need to push the log tail towards in order to have 419 + * at least 25% of the log space free. If the log free space already meets this 420 + * threshold, this function returns the lowest LSN in the AIL to slowly keep 421 + * writeback ticking over and the tail of the log moving forward. 422 + */ 423 + static xfs_lsn_t 424 + xfs_ail_calc_push_target( 425 + struct xfs_ail *ailp) 426 + { 427 + struct xlog *log = ailp->ail_log; 428 + struct xfs_log_item *lip; 429 + xfs_lsn_t target_lsn; 430 + xfs_lsn_t max_lsn; 431 + xfs_lsn_t min_lsn; 432 + int32_t free_bytes; 433 + uint32_t target_block; 434 + uint32_t target_cycle; 435 + 436 + lockdep_assert_held(&ailp->ail_lock); 437 + 438 + lip = xfs_ail_max(ailp); 439 + if (!lip) 440 + return NULLCOMMITLSN; 441 + 442 + max_lsn = lip->li_lsn; 443 + min_lsn = __xfs_ail_min_lsn(ailp); 444 + 445 + /* 446 + * If we are supposed to push all the items in the AIL, we want to push 447 + * to the current head. We then clear the push flag so that we don't 448 + * keep pushing newly queued items beyond where the push all command was 449 + * run. If the push waiter wants to empty the ail, it should queue 450 + * itself on the ail_empty wait queue. 451 + */ 452 + if (test_and_clear_bit(XFS_AIL_OPSTATE_PUSH_ALL, &ailp->ail_opstate)) 453 + return max_lsn; 454 + 455 + /* If someone wants the AIL empty, keep pushing everything we have. */ 456 + if (waitqueue_active(&ailp->ail_empty)) 457 + return max_lsn; 458 + 459 + /* 460 + * Background pushing - attempt to keep 25% of the log free and if we 461 + * have that much free retain the existing target. 462 + */ 463 + free_bytes = log->l_logsize - xlog_lsn_sub(log, max_lsn, min_lsn); 464 + if (free_bytes >= log->l_logsize >> 2) 465 + return ailp->ail_target; 466 + 467 + target_cycle = CYCLE_LSN(min_lsn); 468 + target_block = BLOCK_LSN(min_lsn) + (log->l_logBBsize >> 2); 469 + if (target_block >= log->l_logBBsize) { 470 + target_block -= log->l_logBBsize; 471 + target_cycle += 1; 472 + } 473 + target_lsn = xlog_assign_lsn(target_cycle, target_block); 474 + 475 + /* Cap the target to the highest LSN known to be in the AIL. */ 476 + if (XFS_LSN_CMP(target_lsn, max_lsn) > 0) 477 + return max_lsn; 478 + 479 + /* If the existing target is higher than the new target, keep it. */ 480 + if (XFS_LSN_CMP(ailp->ail_target, target_lsn) >= 0) 481 + return ailp->ail_target; 482 + return target_lsn; 483 + } 484 + 398 485 static long 399 486 xfsaild_push( 400 487 struct xfs_ail *ailp) ··· 471 422 struct xfs_ail_cursor cur; 472 423 struct xfs_log_item *lip; 473 424 xfs_lsn_t lsn; 474 - xfs_lsn_t target = NULLCOMMITLSN; 475 425 long tout; 476 426 int stuck = 0; 477 427 int flushing = 0; ··· 495 447 } 496 448 497 449 spin_lock(&ailp->ail_lock); 498 - 499 - /* 500 - * If we have a sync push waiter, we always have to push till the AIL is 501 - * empty. Update the target to point to the end of the AIL so that 502 - * capture updates that occur after the sync push waiter has gone to 503 - * sleep. 504 - */ 505 - if (waitqueue_active(&ailp->ail_empty)) { 506 - lip = xfs_ail_max(ailp); 507 - if (lip) 508 - target = lip->li_lsn; 509 - } else { 510 - /* barrier matches the ail_target update in xfs_ail_push() */ 511 - smp_rmb(); 512 - target = ailp->ail_target; 513 - ailp->ail_target_prev = target; 514 - } 450 + WRITE_ONCE(ailp->ail_target, xfs_ail_calc_push_target(ailp)); 451 + if (ailp->ail_target == NULLCOMMITLSN) 452 + goto out_done; 515 453 516 454 /* we're done if the AIL is empty or our push has reached the end */ 517 455 lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn); 518 456 if (!lip) 519 - goto out_done; 457 + goto out_done_cursor; 520 458 521 459 XFS_STATS_INC(mp, xs_push_ail); 522 460 523 - ASSERT(target != NULLCOMMITLSN); 461 + ASSERT(ailp->ail_target != NULLCOMMITLSN); 524 462 525 463 lsn = lip->li_lsn; 526 - while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { 464 + while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { 527 465 int lock_result; 466 + 467 + if (test_bit(XFS_LI_FLUSHING, &lip->li_flags)) 468 + goto next_item; 528 469 529 470 /* 530 471 * Note that iop_push may unlock and reacquire the AIL lock. We ··· 584 547 if (stuck > 100) 585 548 break; 586 549 550 + next_item: 587 551 lip = xfs_trans_ail_cursor_next(ailp, &cur); 588 552 if (lip == NULL) 553 + break; 554 + if (lip->li_lsn != lsn && count > 1000) 589 555 break; 590 556 lsn = lip->li_lsn; 591 557 } 592 558 593 - out_done: 559 + out_done_cursor: 594 560 xfs_trans_ail_cursor_done(&cur); 561 + out_done: 595 562 spin_unlock(&ailp->ail_lock); 596 563 597 564 if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list)) 598 565 ailp->ail_log_flush++; 599 566 600 - if (!count || XFS_LSN_CMP(lsn, target) >= 0) { 567 + if (!count || XFS_LSN_CMP(lsn, ailp->ail_target) >= 0) { 601 568 /* 602 569 * We reached the target or the AIL is empty, so wait a bit 603 570 * longer for I/O to complete and remove pushed items from the ··· 626 585 /* 627 586 * Assume we have more work to do in a short while. 628 587 */ 629 - tout = 10; 588 + tout = 0; 630 589 } 631 590 632 591 return tout; ··· 644 603 set_freezable(); 645 604 646 605 while (1) { 647 - if (tout && tout <= 20) 606 + if (tout) 648 607 set_current_state(TASK_KILLABLE|TASK_FREEZABLE); 649 608 else 650 609 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); ··· 680 639 break; 681 640 } 682 641 642 + /* Idle if the AIL is empty. */ 683 643 spin_lock(&ailp->ail_lock); 684 - 685 - /* 686 - * Idle if the AIL is empty and we are not racing with a target 687 - * update. We check the AIL after we set the task to a sleep 688 - * state to guarantee that we either catch an ail_target update 689 - * or that a wake_up resets the state to TASK_RUNNING. 690 - * Otherwise, we run the risk of sleeping indefinitely. 691 - * 692 - * The barrier matches the ail_target update in xfs_ail_push(). 693 - */ 694 - smp_rmb(); 695 - if (!xfs_ail_min(ailp) && 696 - ailp->ail_target == ailp->ail_target_prev && 697 - list_empty(&ailp->ail_buf_list)) { 644 + if (!xfs_ail_min(ailp) && list_empty(&ailp->ail_buf_list)) { 698 645 spin_unlock(&ailp->ail_lock); 699 646 schedule(); 700 647 tout = 0; ··· 702 673 703 674 memalloc_noreclaim_restore(noreclaim_flag); 704 675 return 0; 705 - } 706 - 707 - /* 708 - * This routine is called to move the tail of the AIL forward. It does this by 709 - * trying to flush items in the AIL whose lsns are below the given 710 - * threshold_lsn. 711 - * 712 - * The push is run asynchronously in a workqueue, which means the caller needs 713 - * to handle waiting on the async flush for space to become available. 714 - * We don't want to interrupt any push that is in progress, hence we only queue 715 - * work if we set the pushing bit appropriately. 716 - * 717 - * We do this unlocked - we only need to know whether there is anything in the 718 - * AIL at the time we are called. We don't need to access the contents of 719 - * any of the objects, so the lock is not needed. 720 - */ 721 - void 722 - xfs_ail_push( 723 - struct xfs_ail *ailp, 724 - xfs_lsn_t threshold_lsn) 725 - { 726 - struct xfs_log_item *lip; 727 - 728 - lip = xfs_ail_min(ailp); 729 - if (!lip || xlog_is_shutdown(ailp->ail_log) || 730 - XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0) 731 - return; 732 - 733 - /* 734 - * Ensure that the new target is noticed in push code before it clears 735 - * the XFS_AIL_PUSHING_BIT. 736 - */ 737 - smp_wmb(); 738 - xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn); 739 - smp_wmb(); 740 - 741 - wake_up_process(ailp->ail_task); 742 - } 743 - 744 - /* 745 - * Push out all items in the AIL immediately 746 - */ 747 - void 748 - xfs_ail_push_all( 749 - struct xfs_ail *ailp) 750 - { 751 - xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); 752 - 753 - if (threshold_lsn) 754 - xfs_ail_push(ailp, threshold_lsn); 755 676 } 756 677 757 678 /* ··· 727 748 } 728 749 729 750 void 751 + __xfs_ail_assign_tail_lsn( 752 + struct xfs_ail *ailp) 753 + { 754 + struct xlog *log = ailp->ail_log; 755 + xfs_lsn_t tail_lsn; 756 + 757 + assert_spin_locked(&ailp->ail_lock); 758 + 759 + if (xlog_is_shutdown(log)) 760 + return; 761 + 762 + tail_lsn = __xfs_ail_min_lsn(ailp); 763 + if (!tail_lsn) 764 + tail_lsn = ailp->ail_head_lsn; 765 + 766 + WRITE_ONCE(log->l_tail_space, 767 + xlog_lsn_sub(log, ailp->ail_head_lsn, tail_lsn)); 768 + trace_xfs_log_assign_tail_lsn(log, tail_lsn); 769 + atomic64_set(&log->l_tail_lsn, tail_lsn); 770 + } 771 + 772 + /* 773 + * Callers should pass the original tail lsn so that we can detect if the tail 774 + * has moved as a result of the operation that was performed. If the caller 775 + * needs to force a tail space update, it should pass NULLCOMMITLSN to bypass 776 + * the "did the tail LSN change?" checks. If the caller wants to avoid a tail 777 + * update (e.g. it knows the tail did not change) it should pass an @old_lsn of 778 + * 0. 779 + */ 780 + void 730 781 xfs_ail_update_finish( 731 782 struct xfs_ail *ailp, 732 783 xfs_lsn_t old_lsn) __releases(ailp->ail_lock) 733 784 { 734 785 struct xlog *log = ailp->ail_log; 735 786 736 - /* if the tail lsn hasn't changed, don't do updates or wakeups. */ 787 + /* If the tail lsn hasn't changed, don't do updates or wakeups. */ 737 788 if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) { 738 789 spin_unlock(&ailp->ail_lock); 739 790 return; 740 791 } 741 792 742 - if (!xlog_is_shutdown(log)) 743 - xlog_assign_tail_lsn_locked(log->l_mp); 744 - 793 + __xfs_ail_assign_tail_lsn(ailp); 745 794 if (list_empty(&ailp->ail_head)) 746 795 wake_up_all(&ailp->ail_empty); 747 796 spin_unlock(&ailp->ail_lock); ··· 835 828 836 829 if (!list_empty(&tmp)) 837 830 xfs_ail_splice(ailp, cur, &tmp, lsn); 831 + 832 + /* 833 + * If this is the first insert, wake up the push daemon so it can 834 + * actively scan for items to push. We also need to do a log tail 835 + * LSN update to ensure that it is correctly tracked by the log, so 836 + * set the tail_lsn to NULLCOMMITLSN so that xfs_ail_update_finish() 837 + * will see that the tail lsn has changed and will update the tail 838 + * appropriately. 839 + */ 840 + if (!mlip) { 841 + wake_up_process(ailp->ail_task); 842 + tail_lsn = NULLCOMMITLSN; 843 + } 838 844 839 845 xfs_ail_update_finish(ailp, tail_lsn); 840 846 }

+35 -9

fs/xfs/xfs_trans_priv.h

··· 19 19 void xfs_trans_del_item(struct xfs_log_item *); 20 20 void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 21 21 22 - void xfs_trans_committed_bulk(struct xfs_ail *ailp, 23 - struct list_head *lv_chain, 24 - xfs_lsn_t commit_lsn, bool aborted); 25 22 /* 26 23 * AIL traversal cursor. 27 24 * ··· 52 55 struct xlog *ail_log; 53 56 struct task_struct *ail_task; 54 57 struct list_head ail_head; 55 - xfs_lsn_t ail_target; 56 - xfs_lsn_t ail_target_prev; 57 58 struct list_head ail_cursors; 58 59 spinlock_t ail_lock; 59 60 xfs_lsn_t ail_last_pushed_lsn; 61 + xfs_lsn_t ail_head_lsn; 60 62 int ail_log_flush; 63 + unsigned long ail_opstate; 61 64 struct list_head ail_buf_list; 62 65 wait_queue_head_t ail_empty; 66 + xfs_lsn_t ail_target; 63 67 }; 68 + 69 + /* Push all items out of the AIL immediately. */ 70 + #define XFS_AIL_OPSTATE_PUSH_ALL 0u 64 71 65 72 /* 66 73 * From xfs_trans_ail.c ··· 102 101 __releases(ailp->ail_lock); 103 102 void xfs_trans_ail_delete(struct xfs_log_item *lip, int shutdown_type); 104 103 105 - void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); 106 - void xfs_ail_push_all(struct xfs_ail *); 107 - void xfs_ail_push_all_sync(struct xfs_ail *); 108 - struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp); 104 + static inline void xfs_ail_push(struct xfs_ail *ailp) 105 + { 106 + wake_up_process(ailp->ail_task); 107 + } 108 + 109 + static inline void xfs_ail_push_all(struct xfs_ail *ailp) 110 + { 111 + if (!test_and_set_bit(XFS_AIL_OPSTATE_PUSH_ALL, &ailp->ail_opstate)) 112 + xfs_ail_push(ailp); 113 + } 114 + 115 + static inline xfs_lsn_t xfs_ail_get_push_target(struct xfs_ail *ailp) 116 + { 117 + return READ_ONCE(ailp->ail_target); 118 + } 119 + 120 + void xfs_ail_push_all_sync(struct xfs_ail *ailp); 109 121 xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 110 122 111 123 struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, ··· 130 116 struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, 131 117 struct xfs_ail_cursor *cur); 132 118 void xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur); 119 + 120 + void __xfs_ail_assign_tail_lsn(struct xfs_ail *ailp); 121 + 122 + static inline void 123 + xfs_ail_assign_tail_lsn( 124 + struct xfs_ail *ailp) 125 + { 126 + 127 + spin_lock(&ailp->ail_lock); 128 + __xfs_ail_assign_tail_lsn(ailp); 129 + spin_unlock(&ailp->ail_lock); 130 + } 133 131 134 132 #if BITS_PER_LONG != 64 135 133 static inline void

Configure Feed

Configure Feed