Merge tag 'xfs-6.6-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull xfs fixes from Chandan Babu:

- Fix an integer overflow bug when processing an fsmap call

- Fix crash due to CPU hot remove event racing with filesystem mount
operation

- During read-only mount, XFS does not allow the contents of the log to
be recovered when there are one or more unrecognized rcompat features
in the primary superblock, since the log might have intent items
which the kernel does not know how to process

- During recovery of log intent items, XFS now reserves log space
sufficient for one cycle of a permanent transaction to execute.
Otherwise, this could lead to livelocks due to non-availability of
log space

- On an fs which has an ondisk unlinked inode list, trying to delete a
file or allocating an O_TMPFILE file can cause the fs to the shutdown
if the first inode in the ondisk inode list is not present in the
inode cache. The bug is solved by explicitly loading the first inode
in the ondisk unlinked inode list into the inode cache if it is not
already cached

A similar problem arises when the uncached inode is present in the
middle of the ondisk unlinked inode list. This second bug is
triggered when executing operations like quotacheck and bulkstat. In
this case, XFS now reads in the entire ondisk unlinked inode list

- Enable LARP mode only on recent v5 filesystems

- Fix a out of bounds memory access in scrub

- Fix a performance bug when locating the tail of the log during
mounting a filesystem

* tag 'xfs-6.6-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: use roundup_pow_of_two instead of ffs during xlog_find_tail
xfs: only call xchk_stats_merge after validating scrub inputs
xfs: require a relatively recent V5 filesystem for LARP mode
xfs: make inode unlinked bucket recovery work with quotacheck
xfs: load uncached unlinked inodes into memory on demand
xfs: reserve less log space when recovering log intent items
xfs: fix log recovery when unknown rocompat bits are set
xfs: reload entire unlinked bucket lists
xfs: allow inode inactivation during a ro mount log recovery
xfs: use i_prev_unlinked to distinguish inodes that are not on the unlinked list
xfs: remove CPU hotplug infrastructure
xfs: remove the all-mounts list
xfs: use per-mount cpumask to track nonempty percpu inodegc lists
xfs: fix an agbno overflow in __xfs_getfsmap_datadev
xfs: fix per-cpu CIL structure aggregation racing with dying cpus
xfs: fix select in config XFS_ONLINE_SCRUB_STATS

Linus Torvalds 2 years ago 3abc79dc 8018e02a

+441 -241

28 changed files

expand all

xfs

Kconfig

libxfs

xfs_log_recover.h

xfs_sb.c

scrub

scrub.c

stats.c

xfs_attr_inactive.c

xfs_attr_item.c

xfs_bmap_item.c

xfs_export.c

xfs_extfree_item.c

xfs_fsmap.c

xfs_icache.c

xfs_icache.h

xfs_inode.c

xfs_inode.h

xfs_itable.c

xfs_log.c

xfs_log_cil.c

xfs_log_priv.h

xfs_log_recover.c

xfs_mount.h

xfs_qm.c

xfs_refcount_item.c

xfs_rmap_item.c

xfs_super.c

xfs_trace.h

xfs_xattr.c

include

linux

cpuhotplug.h

+1 -1

fs/xfs/Kconfig

··· 147 147 bool "XFS online metadata check usage data collection" 148 148 default y 149 149 depends on XFS_ONLINE_SCRUB 150 - select FS_DEBUG 150 + select XFS_DEBUG 151 151 help 152 152 If you say Y here, the kernel will gather usage data about 153 153 the online metadata check subsystem. This includes the number

+22

fs/xfs/libxfs/xfs_log_recover.h

··· 131 131 #define xlog_check_buf_cancel_table(log) do { } while (0) 132 132 #endif 133 133 134 + /* 135 + * Transform a regular reservation into one suitable for recovery of a log 136 + * intent item. 137 + * 138 + * Intent recovery only runs a single step of the transaction chain and defers 139 + * the rest to a separate transaction. Therefore, we reduce logcount to 1 here 140 + * to avoid livelocks if the log grant space is nearly exhausted due to the 141 + * recovered intent pinning the tail. Keep the same logflags to avoid tripping 142 + * asserts elsewhere. Struct copies abound below. 143 + */ 144 + static inline struct xfs_trans_res 145 + xlog_recover_resv(const struct xfs_trans_res *r) 146 + { 147 + struct xfs_trans_res ret = { 148 + .tr_logres = r->tr_logres, 149 + .tr_logcount = 1, 150 + .tr_logflags = r->tr_logflags, 151 + }; 152 + 153 + return ret; 154 + } 155 + 134 156 #endif /* __XFS_LOG_RECOVER_H__ */

+2 -1

fs/xfs/libxfs/xfs_sb.c

··· 266 266 return -EFSCORRUPTED; 267 267 } 268 268 269 - if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 269 + if (!xfs_is_readonly(mp) && 270 + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 270 271 xfs_alert(mp, 271 272 "Corruption detected in superblock read-only compatible features (0x%x)!", 272 273 (sbp->sb_features_ro_compat &

+2 -2

fs/xfs/scrub/scrub.c

··· 588 588 out_teardown: 589 589 error = xchk_teardown(sc, error); 590 590 out_sc: 591 + if (error != -ENOENT) 592 + xchk_stats_merge(mp, sm, &run); 591 593 kfree(sc); 592 594 out: 593 595 trace_xchk_done(XFS_I(file_inode(file)), sm, error); ··· 597 595 sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 598 596 error = 0; 599 597 } 600 - if (error != -ENOENT) 601 - xchk_stats_merge(mp, sm, &run); 602 598 return error; 603 599 need_drain: 604 600 error = xchk_teardown(sc, 0);

+4 -1

fs/xfs/scrub/stats.c

··· 185 185 { 186 186 struct xchk_scrub_stats *css; 187 187 188 - ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); 188 + if (sm->sm_type >= XFS_SCRUB_TYPE_NR) { 189 + ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); 190 + return; 191 + } 189 192 190 193 css = &cs->cs_stats[sm->sm_type]; 191 194 spin_lock(&css->css_lock);

-1

fs/xfs/xfs_attr_inactive.c

··· 333 333 int error = 0; 334 334 335 335 mp = dp->i_mount; 336 - ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); 337 336 338 337 xfs_ilock(dp, lock_mode); 339 338 if (!xfs_inode_has_attr_fork(dp))

+4 -3

fs/xfs/xfs_attr_item.c

··· 547 547 struct xfs_inode *ip; 548 548 struct xfs_da_args *args; 549 549 struct xfs_trans *tp; 550 - struct xfs_trans_res tres; 550 + struct xfs_trans_res resv; 551 551 struct xfs_attri_log_format *attrp; 552 552 struct xfs_attri_log_nameval *nv = attrip->attri_nameval; 553 553 int error; ··· 618 618 goto out; 619 619 } 620 620 621 - xfs_init_attr_trans(args, &tres, &total); 622 - error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp); 621 + xfs_init_attr_trans(args, &resv, &total); 622 + resv = xlog_recover_resv(&resv); 623 + error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp); 623 624 if (error) 624 625 goto out; 625 626

+3 -1

fs/xfs/xfs_bmap_item.c

··· 490 490 struct list_head *capture_list) 491 491 { 492 492 struct xfs_bmap_intent fake = { }; 493 + struct xfs_trans_res resv; 493 494 struct xfs_bui_log_item *buip = BUI_ITEM(lip); 494 495 struct xfs_trans *tp; 495 496 struct xfs_inode *ip = NULL; ··· 516 515 return error; 517 516 518 517 /* Allocate transaction and do the work. */ 519 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 518 + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 519 + error = xfs_trans_alloc(mp, &resv, 520 520 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); 521 521 if (error) 522 522 goto err_rele;

fs/xfs/xfs_export.c

··· 146 146 return ERR_PTR(error); 147 147 } 148 148 149 + error = xfs_inode_reload_unlinked(ip); 150 + if (error) { 151 + xfs_irele(ip); 152 + return ERR_PTR(error); 153 + } 154 + 149 155 if (VFS_I(ip)->i_generation != generation) { 150 156 xfs_irele(ip); 151 157 return ERR_PTR(-ESTALE);

+3 -1

fs/xfs/xfs_extfree_item.c

··· 660 660 struct xfs_log_item *lip, 661 661 struct list_head *capture_list) 662 662 { 663 + struct xfs_trans_res resv; 663 664 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 664 665 struct xfs_mount *mp = lip->li_log->l_mp; 665 666 struct xfs_efd_log_item *efdp; ··· 684 683 } 685 684 } 686 685 687 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 686 + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 687 + error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); 688 688 if (error) 689 689 return error; 690 690 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);

+18 -7

fs/xfs/xfs_fsmap.c

··· 565 565 } 566 566 #endif /* CONFIG_XFS_RT */ 567 567 568 + static inline bool 569 + rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) 570 + { 571 + if (!xfs_has_reflink(mp)) 572 + return true; 573 + if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) 574 + return true; 575 + if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | 576 + XFS_RMAP_UNWRITTEN)) 577 + return true; 578 + return false; 579 + } 580 + 568 581 /* Execute a getfsmap query against the regular data device. */ 569 582 STATIC int 570 583 __xfs_getfsmap_datadev( ··· 611 598 * low to the fsmap low key and max out the high key to the end 612 599 * of the AG. 613 600 */ 614 - info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); 615 601 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); 616 602 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); 617 603 if (error) ··· 620 608 621 609 /* Adjust the low key if we are continuing from where we left off. */ 622 610 if (info->low.rm_blockcount == 0) { 623 - /* empty */ 624 - } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || 625 - (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | 626 - XFS_RMAP_BMBT_BLOCK | 627 - XFS_RMAP_UNWRITTEN))) { 628 - info->low.rm_startblock += info->low.rm_blockcount; 611 + /* No previous record from which to continue */ 612 + } else if (rmap_not_shareable(mp, &info->low)) { 613 + /* Last record seen was an unshareable extent */ 629 614 info->low.rm_owner = 0; 630 615 info->low.rm_offset = 0; 631 616 ··· 630 621 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) 631 622 return 0; 632 623 } else { 624 + /* Last record seen was a shareable file data extent */ 633 625 info->low.rm_offset += info->low.rm_blockcount; 634 626 } 627 + info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); 635 628 636 629 info->high.rm_startblock = -1U; 637 630 info->high.rm_owner = ULLONG_MAX;

+29 -51

fs/xfs/xfs_icache.c

··· 113 113 INIT_LIST_HEAD(&ip->i_ioend_list); 114 114 spin_lock_init(&ip->i_ioend_lock); 115 115 ip->i_next_unlinked = NULLAGINO; 116 - ip->i_prev_unlinked = NULLAGINO; 116 + ip->i_prev_unlinked = 0; 117 117 118 118 return ip; 119 119 } ··· 443 443 int cpu; 444 444 bool ret = false; 445 445 446 - for_each_online_cpu(cpu) { 446 + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 447 447 gc = per_cpu_ptr(mp->m_inodegc, cpu); 448 448 if (!llist_empty(&gc->list)) { 449 449 mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); ··· 463 463 int error = 0; 464 464 465 465 flush_workqueue(mp->m_inodegc_wq); 466 - for_each_online_cpu(cpu) { 466 + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 467 467 struct xfs_inodegc *gc; 468 468 469 469 gc = per_cpu_ptr(mp->m_inodegc, cpu); ··· 1845 1845 struct xfs_inodegc, work); 1846 1846 struct llist_node *node = llist_del_all(&gc->list); 1847 1847 struct xfs_inode *ip, *n; 1848 + struct xfs_mount *mp = gc->mp; 1848 1849 unsigned int nofs_flag; 1849 1850 1850 - ASSERT(gc->cpu == smp_processor_id()); 1851 + /* 1852 + * Clear the cpu mask bit and ensure that we have seen the latest 1853 + * update of the gc structure associated with this CPU. This matches 1854 + * with the release semantics used when setting the cpumask bit in 1855 + * xfs_inodegc_queue. 1856 + */ 1857 + cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask); 1858 + smp_mb__after_atomic(); 1851 1859 1852 1860 WRITE_ONCE(gc->items, 0); 1853 1861 ··· 1870 1862 nofs_flag = memalloc_nofs_save(); 1871 1863 1872 1864 ip = llist_entry(node, struct xfs_inode, i_gclist); 1873 - trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); 1865 + trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits)); 1874 1866 1875 1867 WRITE_ONCE(gc->shrinker_hits, 0); 1876 1868 llist_for_each_entry_safe(ip, n, node, i_gclist) { ··· 2065 2057 struct xfs_inodegc *gc; 2066 2058 int items; 2067 2059 unsigned int shrinker_hits; 2060 + unsigned int cpu_nr; 2068 2061 unsigned long queue_delay = 1; 2069 2062 2070 2063 trace_xfs_inode_set_need_inactive(ip); ··· 2073 2064 ip->i_flags |= XFS_NEED_INACTIVE; 2074 2065 spin_unlock(&ip->i_flags_lock); 2075 2066 2076 - gc = get_cpu_ptr(mp->m_inodegc); 2067 + cpu_nr = get_cpu(); 2068 + gc = this_cpu_ptr(mp->m_inodegc); 2077 2069 llist_add(&ip->i_gclist, &gc->list); 2078 2070 items = READ_ONCE(gc->items); 2079 2071 WRITE_ONCE(gc->items, items + 1); 2080 2072 shrinker_hits = READ_ONCE(gc->shrinker_hits); 2081 2073 2082 2074 /* 2075 + * Ensure the list add is always seen by anyone who finds the cpumask 2076 + * bit set. This effectively gives the cpumask bit set operation 2077 + * release ordering semantics. 2078 + */ 2079 + smp_mb__before_atomic(); 2080 + if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask)) 2081 + cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask); 2082 + 2083 + /* 2083 2084 * We queue the work while holding the current CPU so that the work 2084 2085 * is scheduled to run on this CPU. 2085 2086 */ 2086 2087 if (!xfs_is_inodegc_enabled(mp)) { 2087 - put_cpu_ptr(gc); 2088 + put_cpu(); 2088 2089 return; 2089 2090 } 2090 2091 ··· 2104 2085 trace_xfs_inodegc_queue(mp, __return_address); 2105 2086 mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 2106 2087 queue_delay); 2107 - put_cpu_ptr(gc); 2088 + put_cpu(); 2108 2089 2109 2090 if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { 2110 2091 trace_xfs_inodegc_throttle(mp, __return_address); 2111 2092 flush_delayed_work(&gc->work); 2112 2093 } 2113 - } 2114 - 2115 - /* 2116 - * Fold the dead CPU inodegc queue into the current CPUs queue. 2117 - */ 2118 - void 2119 - xfs_inodegc_cpu_dead( 2120 - struct xfs_mount *mp, 2121 - unsigned int dead_cpu) 2122 - { 2123 - struct xfs_inodegc *dead_gc, *gc; 2124 - struct llist_node *first, *last; 2125 - unsigned int count = 0; 2126 - 2127 - dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); 2128 - cancel_delayed_work_sync(&dead_gc->work); 2129 - 2130 - if (llist_empty(&dead_gc->list)) 2131 - return; 2132 - 2133 - first = dead_gc->list.first; 2134 - last = first; 2135 - while (last->next) { 2136 - last = last->next; 2137 - count++; 2138 - } 2139 - dead_gc->list.first = NULL; 2140 - dead_gc->items = 0; 2141 - 2142 - /* Add pending work to current CPU */ 2143 - gc = get_cpu_ptr(mp->m_inodegc); 2144 - llist_add_batch(first, last, &gc->list); 2145 - count += READ_ONCE(gc->items); 2146 - WRITE_ONCE(gc->items, count); 2147 - 2148 - if (xfs_is_inodegc_enabled(mp)) { 2149 - trace_xfs_inodegc_queue(mp, __return_address); 2150 - mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 2151 - 0); 2152 - } 2153 - put_cpu_ptr(gc); 2154 2094 } 2155 2095 2156 2096 /* ··· 2173 2195 if (!xfs_is_inodegc_enabled(mp)) 2174 2196 return 0; 2175 2197 2176 - for_each_online_cpu(cpu) { 2198 + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 2177 2199 gc = per_cpu_ptr(mp->m_inodegc, cpu); 2178 2200 if (!llist_empty(&gc->list)) 2179 2201 return XFS_INODEGC_SHRINKER_COUNT; ··· 2198 2220 2199 2221 trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); 2200 2222 2201 - for_each_online_cpu(cpu) { 2223 + for_each_cpu(cpu, &mp->m_inodegc_cpumask) { 2202 2224 gc = per_cpu_ptr(mp->m_inodegc, cpu); 2203 2225 if (!llist_empty(&gc->list)) { 2204 2226 unsigned int h = READ_ONCE(gc->shrinker_hits);

-1

fs/xfs/xfs_icache.h

··· 79 79 int xfs_inodegc_flush(struct xfs_mount *mp); 80 80 void xfs_inodegc_stop(struct xfs_mount *mp); 81 81 void xfs_inodegc_start(struct xfs_mount *mp); 82 - void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu); 83 82 int xfs_inodegc_register_shrinker(struct xfs_mount *mp); 84 83 85 84 #endif

+196 -13

fs/xfs/xfs_inode.c

··· 1642 1642 if (VFS_I(ip)->i_mode == 0) 1643 1643 return false; 1644 1644 1645 - /* If this is a read-only mount, don't do this (would generate I/O) */ 1646 - if (xfs_is_readonly(mp)) 1645 + /* 1646 + * If this is a read-only mount, don't do this (would generate I/O) 1647 + * unless we're in log recovery and cleaning the iunlinked list. 1648 + */ 1649 + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) 1647 1650 return false; 1648 1651 1649 1652 /* If the log isn't running, push inodes straight to reclaim. */ ··· 1706 1703 mp = ip->i_mount; 1707 1704 ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); 1708 1705 1709 - /* If this is a read-only mount, don't do this (would generate I/O) */ 1710 - if (xfs_is_readonly(mp)) 1706 + /* 1707 + * If this is a read-only mount, don't do this (would generate I/O) 1708 + * unless we're in log recovery and cleaning the iunlinked list. 1709 + */ 1710 + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) 1711 1711 goto out; 1712 1712 1713 1713 /* Metadata inodes require explicit resource cleanup. */ ··· 1742 1736 ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) 1743 1737 truncate = 1; 1744 1738 1745 - error = xfs_qm_dqattach(ip); 1746 - if (error) 1747 - goto out; 1739 + if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { 1740 + xfs_qm_dqdetach(ip); 1741 + } else { 1742 + error = xfs_qm_dqattach(ip); 1743 + if (error) 1744 + goto out; 1745 + } 1748 1746 1749 1747 if (S_ISLNK(VFS_I(ip)->i_mode)) 1750 1748 error = xfs_inactive_symlink(ip); ··· 1832 1822 1833 1823 rcu_read_lock(); 1834 1824 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 1825 + if (!ip) { 1826 + /* Caller can handle inode not being in memory. */ 1827 + rcu_read_unlock(); 1828 + return NULL; 1829 + } 1835 1830 1836 1831 /* 1837 - * Inode not in memory or in RCU freeing limbo should not happen. 1838 - * Warn about this and let the caller handle the failure. 1832 + * Inode in RCU freeing limbo should not happen. Warn about this and 1833 + * let the caller handle the failure. 1839 1834 */ 1840 - if (WARN_ON_ONCE(!ip || !ip->i_ino)) { 1835 + if (WARN_ON_ONCE(!ip->i_ino)) { 1841 1836 rcu_read_unlock(); 1842 1837 return NULL; 1843 1838 } ··· 1851 1836 return ip; 1852 1837 } 1853 1838 1854 - /* Update the prev pointer of the next agino. */ 1839 + /* 1840 + * Update the prev pointer of the next agino. Returns -ENOLINK if the inode 1841 + * is not in cache. 1842 + */ 1855 1843 static int 1856 1844 xfs_iunlink_update_backref( 1857 1845 struct xfs_perag *pag, ··· 1869 1851 1870 1852 ip = xfs_iunlink_lookup(pag, next_agino); 1871 1853 if (!ip) 1872 - return -EFSCORRUPTED; 1854 + return -ENOLINK; 1855 + 1873 1856 ip->i_prev_unlinked = prev_agino; 1874 1857 return 0; 1875 1858 } ··· 1914 1895 return 0; 1915 1896 } 1916 1897 1898 + /* 1899 + * Load the inode @next_agino into the cache and set its prev_unlinked pointer 1900 + * to @prev_agino. Caller must hold the AGI to synchronize with other changes 1901 + * to the unlinked list. 1902 + */ 1903 + STATIC int 1904 + xfs_iunlink_reload_next( 1905 + struct xfs_trans *tp, 1906 + struct xfs_buf *agibp, 1907 + xfs_agino_t prev_agino, 1908 + xfs_agino_t next_agino) 1909 + { 1910 + struct xfs_perag *pag = agibp->b_pag; 1911 + struct xfs_mount *mp = pag->pag_mount; 1912 + struct xfs_inode *next_ip = NULL; 1913 + xfs_ino_t ino; 1914 + int error; 1915 + 1916 + ASSERT(next_agino != NULLAGINO); 1917 + 1918 + #ifdef DEBUG 1919 + rcu_read_lock(); 1920 + next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino); 1921 + ASSERT(next_ip == NULL); 1922 + rcu_read_unlock(); 1923 + #endif 1924 + 1925 + xfs_info_ratelimited(mp, 1926 + "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.", 1927 + next_agino, pag->pag_agno); 1928 + 1929 + /* 1930 + * Use an untrusted lookup just to be cautious in case the AGI has been 1931 + * corrupted and now points at a free inode. That shouldn't happen, 1932 + * but we'd rather shut down now since we're already running in a weird 1933 + * situation. 1934 + */ 1935 + ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); 1936 + error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip); 1937 + if (error) 1938 + return error; 1939 + 1940 + /* If this is not an unlinked inode, something is very wrong. */ 1941 + if (VFS_I(next_ip)->i_nlink != 0) { 1942 + error = -EFSCORRUPTED; 1943 + goto rele; 1944 + } 1945 + 1946 + next_ip->i_prev_unlinked = prev_agino; 1947 + trace_xfs_iunlink_reload_next(next_ip); 1948 + rele: 1949 + ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE)); 1950 + if (xfs_is_quotacheck_running(mp) && next_ip) 1951 + xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED); 1952 + xfs_irele(next_ip); 1953 + return error; 1954 + } 1955 + 1917 1956 static int 1918 1957 xfs_iunlink_insert_inode( 1919 1958 struct xfs_trans *tp, ··· 2003 1926 * inode. 2004 1927 */ 2005 1928 error = xfs_iunlink_update_backref(pag, agino, next_agino); 1929 + if (error == -ENOLINK) 1930 + error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino); 2006 1931 if (error) 2007 1932 return error; 2008 1933 ··· 2020 1941 } 2021 1942 2022 1943 /* Point the head of the list to point to this inode. */ 1944 + ip->i_prev_unlinked = NULLAGINO; 2023 1945 return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); 2024 1946 } 2025 1947 ··· 2100 2020 */ 2101 2021 error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, 2102 2022 ip->i_next_unlinked); 2023 + if (error == -ENOLINK) 2024 + error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, 2025 + ip->i_next_unlinked); 2103 2026 if (error) 2104 2027 return error; 2105 2028 ··· 2123 2040 } 2124 2041 2125 2042 ip->i_next_unlinked = NULLAGINO; 2126 - ip->i_prev_unlinked = NULLAGINO; 2043 + ip->i_prev_unlinked = 0; 2127 2044 return error; 2128 2045 } 2129 2046 ··· 3611 3528 inode_unlock(VFS_I(ip2)); 3612 3529 if (ip1 != ip2) 3613 3530 inode_unlock(VFS_I(ip1)); 3531 + } 3532 + 3533 + /* 3534 + * Reload the incore inode list for this inode. Caller should ensure that 3535 + * the link count cannot change, either by taking ILOCK_SHARED or otherwise 3536 + * preventing other threads from executing. 3537 + */ 3538 + int 3539 + xfs_inode_reload_unlinked_bucket( 3540 + struct xfs_trans *tp, 3541 + struct xfs_inode *ip) 3542 + { 3543 + struct xfs_mount *mp = tp->t_mountp; 3544 + struct xfs_buf *agibp; 3545 + struct xfs_agi *agi; 3546 + struct xfs_perag *pag; 3547 + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 3548 + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 3549 + xfs_agino_t prev_agino, next_agino; 3550 + unsigned int bucket; 3551 + bool foundit = false; 3552 + int error; 3553 + 3554 + /* Grab the first inode in the list */ 3555 + pag = xfs_perag_get(mp, agno); 3556 + error = xfs_ialloc_read_agi(pag, tp, &agibp); 3557 + xfs_perag_put(pag); 3558 + if (error) 3559 + return error; 3560 + 3561 + bucket = agino % XFS_AGI_UNLINKED_BUCKETS; 3562 + agi = agibp->b_addr; 3563 + 3564 + trace_xfs_inode_reload_unlinked_bucket(ip); 3565 + 3566 + xfs_info_ratelimited(mp, 3567 + "Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.", 3568 + agino, agno); 3569 + 3570 + prev_agino = NULLAGINO; 3571 + next_agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3572 + while (next_agino != NULLAGINO) { 3573 + struct xfs_inode *next_ip = NULL; 3574 + 3575 + if (next_agino == agino) { 3576 + /* Found this inode, set its backlink. */ 3577 + next_ip = ip; 3578 + next_ip->i_prev_unlinked = prev_agino; 3579 + foundit = true; 3580 + } 3581 + if (!next_ip) { 3582 + /* Inode already in memory. */ 3583 + next_ip = xfs_iunlink_lookup(pag, next_agino); 3584 + } 3585 + if (!next_ip) { 3586 + /* Inode not in memory, reload. */ 3587 + error = xfs_iunlink_reload_next(tp, agibp, prev_agino, 3588 + next_agino); 3589 + if (error) 3590 + break; 3591 + 3592 + next_ip = xfs_iunlink_lookup(pag, next_agino); 3593 + } 3594 + if (!next_ip) { 3595 + /* No incore inode at all? We reloaded it... */ 3596 + ASSERT(next_ip != NULL); 3597 + error = -EFSCORRUPTED; 3598 + break; 3599 + } 3600 + 3601 + prev_agino = next_agino; 3602 + next_agino = next_ip->i_next_unlinked; 3603 + } 3604 + 3605 + xfs_trans_brelse(tp, agibp); 3606 + /* Should have found this inode somewhere in the iunlinked bucket. */ 3607 + if (!error && !foundit) 3608 + error = -EFSCORRUPTED; 3609 + return error; 3610 + } 3611 + 3612 + /* Decide if this inode is missing its unlinked list and reload it. */ 3613 + int 3614 + xfs_inode_reload_unlinked( 3615 + struct xfs_inode *ip) 3616 + { 3617 + struct xfs_trans *tp; 3618 + int error; 3619 + 3620 + error = xfs_trans_alloc_empty(ip->i_mount, &tp); 3621 + if (error) 3622 + return error; 3623 + 3624 + xfs_ilock(ip, XFS_ILOCK_SHARED); 3625 + if (xfs_inode_unlinked_incomplete(ip)) 3626 + error = xfs_inode_reload_unlinked_bucket(tp, ip); 3627 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 3628 + xfs_trans_cancel(tp); 3629 + 3630 + return error; 3614 3631 }

+32 -2

fs/xfs/xfs_inode.h

··· 68 68 uint64_t i_diflags2; /* XFS_DIFLAG2_... */ 69 69 struct timespec64 i_crtime; /* time created */ 70 70 71 - /* unlinked list pointers */ 71 + /* 72 + * Unlinked list pointers. These point to the next and previous inodes 73 + * in the AGI unlinked bucket list, respectively. These fields can 74 + * only be updated with the AGI locked. 75 + * 76 + * i_next_unlinked caches di_next_unlinked. 77 + */ 72 78 xfs_agino_t i_next_unlinked; 79 + 80 + /* 81 + * If the inode is not on an unlinked list, this field is zero. If the 82 + * inode is the first element in an unlinked list, this field is 83 + * NULLAGINO. Otherwise, i_prev_unlinked points to the previous inode 84 + * in the unlinked list. 85 + */ 73 86 xfs_agino_t i_prev_unlinked; 74 87 75 88 /* VFS inode */ ··· 93 80 struct work_struct i_ioend_work; 94 81 struct list_head i_ioend_list; 95 82 } xfs_inode_t; 83 + 84 + static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip) 85 + { 86 + return ip->i_prev_unlinked != 0; 87 + } 96 88 97 89 static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip) 98 90 { ··· 344 326 */ 345 327 #define XFS_INACTIVATING (1 << 13) 346 328 329 + /* Quotacheck is running but inode has not been added to quota counts. */ 330 + #define XFS_IQUOTAUNCHECKED (1 << 14) 331 + 347 332 /* All inode state flags related to inode reclaim. */ 348 333 #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ 349 334 XFS_IRECLAIM | \ ··· 361 340 #define XFS_IRECLAIM_RESET_FLAGS \ 362 341 (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ 363 342 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ 364 - XFS_INACTIVATING) 343 + XFS_INACTIVATING | XFS_IQUOTAUNCHECKED) 365 344 366 345 /* 367 346 * Flags for inode locking. ··· 595 574 596 575 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); 597 576 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); 577 + 578 + static inline bool 579 + xfs_inode_unlinked_incomplete( 580 + struct xfs_inode *ip) 581 + { 582 + return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); 583 + } 584 + int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); 585 + int xfs_inode_reload_unlinked(struct xfs_inode *ip); 598 586 599 587 #endif /* __XFS_INODE_H__ */

fs/xfs/xfs_itable.c

··· 80 80 if (error) 81 81 goto out; 82 82 83 + if (xfs_inode_unlinked_incomplete(ip)) { 84 + error = xfs_inode_reload_unlinked_bucket(tp, ip); 85 + if (error) { 86 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 87 + xfs_irele(ip); 88 + return error; 89 + } 90 + } 91 + 83 92 ASSERT(ip != NULL); 84 93 ASSERT(ip->i_imap.im_blkno != 0); 85 94 inode = VFS_I(ip);

-17

fs/xfs/xfs_log.c

··· 715 715 * just worked. 716 716 */ 717 717 if (!xfs_has_norecovery(mp)) { 718 - /* 719 - * log recovery ignores readonly state and so we need to clear 720 - * mount-based read only state so it can write to disk. 721 - */ 722 - bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, 723 - &mp->m_opstate); 724 718 error = xlog_recover(log); 725 - if (readonly) 726 - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 727 719 if (error) { 728 720 xfs_warn(mp, "log mount/recovery failed: error %d", 729 721 error); ··· 764 772 struct xfs_mount *mp) 765 773 { 766 774 struct xlog *log = mp->m_log; 767 - bool readonly; 768 775 int error = 0; 769 776 770 777 if (xfs_has_norecovery(mp)) { 771 778 ASSERT(xfs_is_readonly(mp)); 772 779 return 0; 773 780 } 774 - 775 - /* 776 - * log recovery ignores readonly state and so we need to clear 777 - * mount-based read only state so it can write to disk. 778 - */ 779 - readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 780 781 781 782 /* 782 783 * During the second phase of log recovery, we need iget and ··· 820 835 xfs_buftarg_drain(mp->m_ddev_targp); 821 836 822 837 clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); 823 - if (readonly) 824 - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 825 838 826 839 /* Make sure the log is dead if we're returning failure. */ 827 840 ASSERT(!error || xlog_is_shutdown(log));

+16 -36

fs/xfs/xfs_log_cil.c

··· 124 124 struct xlog_cil_pcp *cilpcp; 125 125 int cpu; 126 126 127 - for_each_online_cpu(cpu) { 127 + for_each_cpu(cpu, &ctx->cil_pcpmask) { 128 128 cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); 129 129 130 130 ctx->ticket->t_curr_res += cilpcp->space_reserved; ··· 165 165 if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) 166 166 return; 167 167 168 - for_each_online_cpu(cpu) { 168 + /* 169 + * We can race with other cpus setting cil_pcpmask. However, we've 170 + * atomically cleared PCP_SPACE which forces other threads to add to 171 + * the global space used count. cil_pcpmask is a superset of cilpcp 172 + * structures that could have a nonzero space_used. 173 + */ 174 + for_each_cpu(cpu, &ctx->cil_pcpmask) { 169 175 int old, prev; 170 176 171 177 cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); ··· 560 554 int iovhdr_res = 0, split_res = 0, ctx_res = 0; 561 555 int space_used; 562 556 int order; 557 + unsigned int cpu_nr; 563 558 struct xlog_cil_pcp *cilpcp; 564 559 565 560 ASSERT(tp); ··· 584 577 * can't be scheduled away between split sample/update operations that 585 578 * are done without outside locking to serialise them. 586 579 */ 587 - cilpcp = get_cpu_ptr(cil->xc_pcp); 580 + cpu_nr = get_cpu(); 581 + cilpcp = this_cpu_ptr(cil->xc_pcp); 582 + 583 + /* Tell the future push that there was work added by this CPU. */ 584 + if (!cpumask_test_cpu(cpu_nr, &ctx->cil_pcpmask)) 585 + cpumask_test_and_set_cpu(cpu_nr, &ctx->cil_pcpmask); 588 586 589 587 /* 590 588 * We need to take the CIL checkpoint unit reservation on the first ··· 675 663 continue; 676 664 list_add_tail(&lip->li_cil, &cilpcp->log_items); 677 665 } 678 - put_cpu_ptr(cilpcp); 666 + put_cpu(); 679 667 680 668 /* 681 669 * If we've overrun the reservation, dump the tx details before we move ··· 1800 1788 out_shutdown: 1801 1789 spin_unlock(&cil->xc_push_lock); 1802 1790 return 0; 1803 - } 1804 - 1805 - /* 1806 - * Move dead percpu state to the relevant CIL context structures. 1807 - * 1808 - * We have to lock the CIL context here to ensure that nothing is modifying 1809 - * the percpu state, either addition or removal. Both of these are done under 1810 - * the CIL context lock, so grabbing that exclusively here will ensure we can 1811 - * safely drain the cilpcp for the CPU that is dying. 1812 - */ 1813 - void 1814 - xlog_cil_pcp_dead( 1815 - struct xlog *log, 1816 - unsigned int cpu) 1817 - { 1818 - struct xfs_cil *cil = log->l_cilp; 1819 - struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); 1820 - struct xfs_cil_ctx *ctx; 1821 - 1822 - down_write(&cil->xc_ctx_lock); 1823 - ctx = cil->xc_ctx; 1824 - if (ctx->ticket) 1825 - ctx->ticket->t_curr_res += cilpcp->space_reserved; 1826 - cilpcp->space_reserved = 0; 1827 - 1828 - if (!list_empty(&cilpcp->log_items)) 1829 - list_splice_init(&cilpcp->log_items, &ctx->log_items); 1830 - if (!list_empty(&cilpcp->busy_extents)) 1831 - list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents); 1832 - atomic_add(cilpcp->space_used, &ctx->space_used); 1833 - cilpcp->space_used = 0; 1834 - up_write(&cil->xc_ctx_lock); 1835 1791 } 1836 1792 1837 1793 /*

+6 -8

fs/xfs/xfs_log_priv.h

··· 231 231 struct work_struct discard_endio_work; 232 232 struct work_struct push_work; 233 233 atomic_t order_id; 234 + 235 + /* 236 + * CPUs that could have added items to the percpu CIL data. Access is 237 + * coordinated with xc_ctx_lock. 238 + */ 239 + struct cpumask cil_pcpmask; 234 240 }; 235 241 236 242 /* ··· 284 278 wait_queue_head_t xc_push_wait; /* background push throttle */ 285 279 286 280 void __percpu *xc_pcp; /* percpu CIL structures */ 287 - #ifdef CONFIG_HOTPLUG_CPU 288 - struct list_head xc_pcp_list; 289 - #endif 290 281 } ____cacheline_aligned_in_smp; 291 282 292 283 /* xc_flags bit values */ ··· 707 704 708 705 return p; 709 706 } 710 - 711 - /* 712 - * CIL CPU dead notifier 713 - */ 714 - void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu); 715 707 716 708 #endif /* __XFS_LOG_PRIV_H__ */

+2 -2

fs/xfs/xfs_log_recover.c

··· 329 329 * try a smaller size. We need to be able to read at least 330 330 * a log sector, or we're out of luck. 331 331 */ 332 - bufblks = 1 << ffs(nbblks); 332 + bufblks = roundup_pow_of_two(nbblks); 333 333 while (bufblks > log->l_logBBsize) 334 334 bufblks >>= 1; 335 335 while (!(buffer = xlog_alloc_buffer(log, bufblks))) { ··· 1528 1528 * a smaller size. We need to be able to write at least a 1529 1529 * log sector, or we're out of luck. 1530 1530 */ 1531 - bufblks = 1 << ffs(blocks); 1531 + bufblks = roundup_pow_of_two(blocks); 1532 1532 while (bufblks > log->l_logBBsize) 1533 1533 bufblks >>= 1; 1534 1534 while (!(buffer = xlog_alloc_buffer(log, bufblks))) {

+13 -4

fs/xfs/xfs_mount.h

··· 60 60 * Per-cpu deferred inode inactivation GC lists. 61 61 */ 62 62 struct xfs_inodegc { 63 + struct xfs_mount *mp; 63 64 struct llist_head list; 64 65 struct delayed_work work; 65 66 int error; ··· 68 67 /* approximate count of inodes in the list */ 69 68 unsigned int items; 70 69 unsigned int shrinker_hits; 71 - #if defined(DEBUG) || defined(XFS_WARN) 72 70 unsigned int cpu; 73 - #endif 74 71 }; 75 72 76 73 /* ··· 97 98 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 98 99 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 99 100 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 100 - struct list_head m_mount_list; /* global mount list */ 101 101 void __percpu *m_inodegc; /* percpu inodegc structures */ 102 102 103 103 /* ··· 247 249 unsigned int *m_errortag; 248 250 struct xfs_kobj m_errortag_kobj; 249 251 #endif 252 + 253 + /* cpus that have inodes queued for inactivation */ 254 + struct cpumask m_inodegc_cpumask; 250 255 } xfs_mount_t; 251 256 252 257 #define M_IGEO(mp) (&(mp)->m_ino_geo) ··· 405 404 #define XFS_OPSTATE_WARNED_SHRINK 8 406 405 /* Kernel has logged a warning about logged xattr updates being used. */ 407 406 #define XFS_OPSTATE_WARNED_LARP 9 407 + /* Mount time quotacheck is running */ 408 + #define XFS_OPSTATE_QUOTACHECK_RUNNING 10 408 409 409 410 #define __XFS_IS_OPSTATE(name, NAME) \ 410 411 static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ ··· 429 426 __XFS_IS_OPSTATE(readonly, READONLY) 430 427 __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED) 431 428 __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) 429 + #ifdef CONFIG_XFS_QUOTA 430 + __XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING) 431 + #else 432 + # define xfs_is_quotacheck_running(mp) (false) 433 + #endif 432 434 433 435 static inline bool 434 436 xfs_should_warn(struct xfs_mount *mp, long nr) ··· 451 443 { (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \ 452 444 { (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \ 453 445 { (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \ 454 - { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" } 446 + { (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \ 447 + { (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" } 455 448 456 449 /* 457 450 * Max and min values for mount-option defined I/O

fs/xfs/xfs_qm.c

··· 1160 1160 if (error) 1161 1161 return error; 1162 1162 1163 + error = xfs_inode_reload_unlinked(ip); 1164 + if (error) 1165 + goto error0; 1166 + 1163 1167 ASSERT(ip->i_delayed_blks == 0); 1164 1168 1165 1169 if (XFS_IS_REALTIME_INODE(ip)) { ··· 1177 1173 } 1178 1174 1179 1175 nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks; 1176 + xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED); 1180 1177 1181 1178 /* 1182 1179 * Add the (disk blocks and inode) resources occupied by this ··· 1324 1319 flags |= XFS_PQUOTA_CHKD; 1325 1320 } 1326 1321 1322 + xfs_set_quotacheck_running(mp); 1327 1323 error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, 1328 1324 NULL); 1325 + xfs_clear_quotacheck_running(mp); 1329 1326 1330 1327 /* 1331 1328 * On error, the inode walk may have partially populated the dquot

+4 -2

fs/xfs/xfs_refcount_item.c

··· 477 477 struct xfs_log_item *lip, 478 478 struct list_head *capture_list) 479 479 { 480 + struct xfs_trans_res resv; 480 481 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 481 482 struct xfs_cud_log_item *cudp; 482 483 struct xfs_trans *tp; ··· 515 514 * doesn't fit. We need to reserve enough blocks to handle a 516 515 * full btree split on either end of the refcount range. 517 516 */ 518 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 519 - mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); 517 + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 518 + error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, 519 + XFS_TRANS_RESERVE, &tp); 520 520 if (error) 521 521 return error; 522 522

+4 -2

fs/xfs/xfs_rmap_item.c

··· 507 507 struct xfs_log_item *lip, 508 508 struct list_head *capture_list) 509 509 { 510 + struct xfs_trans_res resv; 510 511 struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 511 512 struct xfs_rud_log_item *rudp; 512 513 struct xfs_trans *tp; ··· 531 530 } 532 531 } 533 532 534 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 535 - mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); 533 + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 534 + error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0, 535 + XFS_TRANS_RESERVE, &tp); 536 536 if (error) 537 537 return error; 538 538 rudp = xfs_trans_get_rud(tp, ruip);

+2 -84

fs/xfs/xfs_super.c

··· 56 56 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 57 57 #endif 58 58 59 - #ifdef CONFIG_HOTPLUG_CPU 60 - static LIST_HEAD(xfs_mount_list); 61 - static DEFINE_SPINLOCK(xfs_mount_list_lock); 62 - 63 - static inline void xfs_mount_list_add(struct xfs_mount *mp) 64 - { 65 - spin_lock(&xfs_mount_list_lock); 66 - list_add(&mp->m_mount_list, &xfs_mount_list); 67 - spin_unlock(&xfs_mount_list_lock); 68 - } 69 - 70 - static inline void xfs_mount_list_del(struct xfs_mount *mp) 71 - { 72 - spin_lock(&xfs_mount_list_lock); 73 - list_del(&mp->m_mount_list); 74 - spin_unlock(&xfs_mount_list_lock); 75 - } 76 - #else /* !CONFIG_HOTPLUG_CPU */ 77 - static inline void xfs_mount_list_add(struct xfs_mount *mp) {} 78 - static inline void xfs_mount_list_del(struct xfs_mount *mp) {} 79 - #endif 80 - 81 59 enum xfs_dax_mode { 82 60 XFS_DAX_INODE = 0, 83 61 XFS_DAX_ALWAYS = 1, ··· 1113 1135 1114 1136 for_each_possible_cpu(cpu) { 1115 1137 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1116 - #if defined(DEBUG) || defined(XFS_WARN) 1117 1138 gc->cpu = cpu; 1118 - #endif 1139 + gc->mp = mp; 1119 1140 init_llist_head(&gc->list); 1120 1141 gc->items = 0; 1121 1142 gc->error = 0; ··· 1145 1168 xfs_freesb(mp); 1146 1169 xchk_mount_stats_free(mp); 1147 1170 free_percpu(mp->m_stats.xs_stats); 1148 - xfs_mount_list_del(mp); 1149 1171 xfs_inodegc_free_percpu(mp); 1150 1172 xfs_destroy_percpu_counters(mp); 1151 1173 xfs_destroy_mount_workqueues(mp); ··· 1553 1577 if (error) 1554 1578 goto out_destroy_counters; 1555 1579 1556 - /* 1557 - * All percpu data structures requiring cleanup when a cpu goes offline 1558 - * must be allocated before adding this @mp to the cpu-dead handler's 1559 - * mount list. 1560 - */ 1561 - xfs_mount_list_add(mp); 1562 - 1563 1580 /* Allocate stats memory before we do operations that might use it */ 1564 1581 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1565 1582 if (!mp->m_stats.xs_stats) { ··· 1750 1781 out_free_stats: 1751 1782 free_percpu(mp->m_stats.xs_stats); 1752 1783 out_destroy_inodegc: 1753 - xfs_mount_list_del(mp); 1754 1784 xfs_inodegc_free_percpu(mp); 1755 1785 out_destroy_counters: 1756 1786 xfs_destroy_percpu_counters(mp); ··· 2294 2326 destroy_workqueue(xfs_alloc_wq); 2295 2327 } 2296 2328 2297 - #ifdef CONFIG_HOTPLUG_CPU 2298 - static int 2299 - xfs_cpu_dead( 2300 - unsigned int cpu) 2301 - { 2302 - struct xfs_mount *mp, *n; 2303 - 2304 - spin_lock(&xfs_mount_list_lock); 2305 - list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { 2306 - spin_unlock(&xfs_mount_list_lock); 2307 - xfs_inodegc_cpu_dead(mp, cpu); 2308 - xlog_cil_pcp_dead(mp->m_log, cpu); 2309 - spin_lock(&xfs_mount_list_lock); 2310 - } 2311 - spin_unlock(&xfs_mount_list_lock); 2312 - return 0; 2313 - } 2314 - 2315 - static int __init 2316 - xfs_cpu_hotplug_init(void) 2317 - { 2318 - int error; 2319 - 2320 - error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, 2321 - xfs_cpu_dead); 2322 - if (error < 0) 2323 - xfs_alert(NULL, 2324 - "Failed to initialise CPU hotplug, error %d. XFS is non-functional.", 2325 - error); 2326 - return error; 2327 - } 2328 - 2329 - static void 2330 - xfs_cpu_hotplug_destroy(void) 2331 - { 2332 - cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); 2333 - } 2334 - 2335 - #else /* !CONFIG_HOTPLUG_CPU */ 2336 - static inline int xfs_cpu_hotplug_init(void) { return 0; } 2337 - static inline void xfs_cpu_hotplug_destroy(void) {} 2338 - #endif 2339 - 2340 2329 STATIC int __init 2341 2330 init_xfs_fs(void) 2342 2331 { ··· 2310 2385 2311 2386 xfs_dir_startup(); 2312 2387 2313 - error = xfs_cpu_hotplug_init(); 2314 - if (error) 2315 - goto out; 2316 - 2317 2388 error = xfs_init_caches(); 2318 2389 if (error) 2319 - goto out_destroy_hp; 2390 + goto out; 2320 2391 2321 2392 error = xfs_init_workqueues(); 2322 2393 if (error) ··· 2396 2475 xfs_destroy_workqueues(); 2397 2476 out_destroy_caches: 2398 2477 xfs_destroy_caches(); 2399 - out_destroy_hp: 2400 - xfs_cpu_hotplug_destroy(); 2401 2478 out: 2402 2479 return error; 2403 2480 } ··· 2419 2500 xfs_destroy_workqueues(); 2420 2501 xfs_destroy_caches(); 2421 2502 xfs_uuid_table_free(); 2422 - xfs_cpu_hotplug_destroy(); 2423 2503 } 2424 2504 2425 2505 module_init(init_xfs_fs);

+45

fs/xfs/xfs_trace.h

··· 3824 3824 __entry->new_ptr) 3825 3825 ); 3826 3826 3827 + TRACE_EVENT(xfs_iunlink_reload_next, 3828 + TP_PROTO(struct xfs_inode *ip), 3829 + TP_ARGS(ip), 3830 + TP_STRUCT__entry( 3831 + __field(dev_t, dev) 3832 + __field(xfs_agnumber_t, agno) 3833 + __field(xfs_agino_t, agino) 3834 + __field(xfs_agino_t, prev_agino) 3835 + __field(xfs_agino_t, next_agino) 3836 + ), 3837 + TP_fast_assign( 3838 + __entry->dev = ip->i_mount->m_super->s_dev; 3839 + __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); 3840 + __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); 3841 + __entry->prev_agino = ip->i_prev_unlinked; 3842 + __entry->next_agino = ip->i_next_unlinked; 3843 + ), 3844 + TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x", 3845 + MAJOR(__entry->dev), MINOR(__entry->dev), 3846 + __entry->agno, 3847 + __entry->agino, 3848 + __entry->prev_agino, 3849 + __entry->next_agino) 3850 + ); 3851 + 3852 + TRACE_EVENT(xfs_inode_reload_unlinked_bucket, 3853 + TP_PROTO(struct xfs_inode *ip), 3854 + TP_ARGS(ip), 3855 + TP_STRUCT__entry( 3856 + __field(dev_t, dev) 3857 + __field(xfs_agnumber_t, agno) 3858 + __field(xfs_agino_t, agino) 3859 + ), 3860 + TP_fast_assign( 3861 + __entry->dev = ip->i_mount->m_super->s_dev; 3862 + __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); 3863 + __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); 3864 + ), 3865 + TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u", 3866 + MAJOR(__entry->dev), MINOR(__entry->dev), 3867 + __entry->agno, 3868 + __entry->agino, 3869 + __entry->agino % XFS_AGI_UNLINKED_BUCKETS) 3870 + ); 3871 + 3827 3872 DECLARE_EVENT_CLASS(xfs_ag_inode_class, 3828 3873 TP_PROTO(struct xfs_inode *ip), 3829 3874 TP_ARGS(ip),

+11

fs/xfs/xfs_xattr.c

··· 46 46 if (xfs_sb_version_haslogxattrs(&mp->m_sb)) 47 47 return 0; 48 48 49 + /* 50 + * Check if the filesystem featureset is new enough to set this log 51 + * incompat feature bit. Strictly speaking, the minimum requirement is 52 + * a V5 filesystem for the superblock field, but we'll require rmap 53 + * or reflink to avoid having to deal with really old kernels. 54 + */ 55 + if (!xfs_has_reflink(mp) && !xfs_has_rmapbt(mp)) { 56 + error = -EOPNOTSUPP; 57 + goto drop_incompat; 58 + } 59 + 49 60 /* Enable log-assisted xattrs. */ 50 61 error = xfs_add_incompat_log_feature(mp, 51 62 XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);

-1

include/linux/cpuhotplug.h

··· 90 90 CPUHP_FS_BUFF_DEAD, 91 91 CPUHP_PRINTK_DEAD, 92 92 CPUHP_MM_MEMCQ_DEAD, 93 - CPUHP_XFS_DEAD, 94 93 CPUHP_PERCPU_CNT_DEAD, 95 94 CPUHP_RADIX_DEAD, 96 95 CPUHP_PAGE_ALLOC,

Configure Feed

Configure Feed