Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs bug fixes from Dave Chinner:
"Largely minor bug fixes and cleanups, th emost important of which are
probably the fixes for regressions in the extent allocation code:

- fixes for inode garbage collection shutdown racing with work queue
updates

- ensure inodegc workers run on the CPU they are supposed to

- disable counter scrubbing until we can exclusively freeze the
filesystem from the kernel

- regression fixes for new allocation related bugs

- a couple of minor cleanups"

* tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: fix xfs_inodegc_stop racing with mod_delayed_work
xfs: disable reaping in fscounters scrub
xfs: check that per-cpu inodegc workers actually run on that cpu
xfs: explicitly specify cpu when forcing inodegc delayed work to run immediately
xfs: fix negative array access in xfs_getbmap
xfs: don't allocate into the data fork for an unshare request
xfs: flush dirty data and drain directios before scrubbing cow fork
xfs: set bnobt/cntbt numrecs correctly when formatting new AGs
xfs: don't unconditionally null args->pag in xfs_bmap_btalloc_at_eof

+65 -63
+9 -10
fs/xfs/libxfs/xfs_ag.c
··· 495 495 ASSERT(start >= mp->m_ag_prealloc_blocks); 496 496 if (start != mp->m_ag_prealloc_blocks) { 497 497 /* 498 - * Modify first record to pad stripe align of log 498 + * Modify first record to pad stripe align of log and 499 + * bump the record count. 499 500 */ 500 501 arec->ar_blockcount = cpu_to_be32(start - 501 502 mp->m_ag_prealloc_blocks); 503 + be16_add_cpu(&block->bb_numrecs, 1); 502 504 nrec = arec + 1; 503 505 504 506 /* ··· 511 509 be32_to_cpu(arec->ar_startblock) + 512 510 be32_to_cpu(arec->ar_blockcount)); 513 511 arec = nrec; 514 - be16_add_cpu(&block->bb_numrecs, 1); 515 512 } 516 513 /* 517 514 * Change record start to after the internal log ··· 519 518 } 520 519 521 520 /* 522 - * Calculate the record block count and check for the case where 523 - * the log might have consumed all available space in the AG. If 524 - * so, reset the record count to 0 to avoid exposure of an invalid 525 - * record start block. 521 + * Calculate the block count of this record; if it is nonzero, 522 + * increment the record count. 526 523 */ 527 524 arec->ar_blockcount = cpu_to_be32(id->agsize - 528 525 be32_to_cpu(arec->ar_startblock)); 529 - if (!arec->ar_blockcount) 530 - block->bb_numrecs = 0; 526 + if (arec->ar_blockcount) 527 + be16_add_cpu(&block->bb_numrecs, 1); 531 528 } 532 529 533 530 /* ··· 537 538 struct xfs_buf *bp, 538 539 struct aghdr_init_data *id) 539 540 { 540 - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); 541 + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno); 541 542 xfs_freesp_init_recs(mp, bp, id); 542 543 } 543 544 ··· 547 548 struct xfs_buf *bp, 548 549 struct aghdr_init_data *id) 549 550 { 550 - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); 551 + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno); 551 552 xfs_freesp_init_recs(mp, bp, id); 552 553 } 553 554
+3 -2
fs/xfs/libxfs/xfs_bmap.c
··· 3494 3494 if (!caller_pag) 3495 3495 args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); 3496 3496 error = xfs_alloc_vextent_exact_bno(args, ap->blkno); 3497 - if (!caller_pag) 3497 + if (!caller_pag) { 3498 3498 xfs_perag_put(args->pag); 3499 + args->pag = NULL; 3500 + } 3499 3501 if (error) 3500 3502 return error; 3501 3503 ··· 3507 3505 * Exact allocation failed. Reset to try an aligned allocation 3508 3506 * according to the original allocation specification. 3509 3507 */ 3510 - args->pag = NULL; 3511 3508 args->alignment = stripe_align; 3512 3509 args->minlen = nextminlen; 3513 3510 args->minalignslop = 0;
+2 -2
fs/xfs/scrub/bmap.c
··· 42 42 xfs_ilock(sc->ip, XFS_IOLOCK_EXCL); 43 43 44 44 /* 45 - * We don't want any ephemeral data fork updates sitting around 45 + * We don't want any ephemeral data/cow fork updates sitting around 46 46 * while we inspect block mappings, so wait for directio to finish 47 47 * and flush dirty data if we have delalloc reservations. 48 48 */ 49 49 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 50 - sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { 50 + sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 51 51 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 52 52 53 53 sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
-26
fs/xfs/scrub/common.c
··· 1164 1164 return 0; 1165 1165 } 1166 1166 1167 - /* Pause background reaping of resources. */ 1168 - void 1169 - xchk_stop_reaping( 1170 - struct xfs_scrub *sc) 1171 - { 1172 - sc->flags |= XCHK_REAPING_DISABLED; 1173 - xfs_blockgc_stop(sc->mp); 1174 - xfs_inodegc_stop(sc->mp); 1175 - } 1176 - 1177 - /* Restart background reaping of resources. */ 1178 - void 1179 - xchk_start_reaping( 1180 - struct xfs_scrub *sc) 1181 - { 1182 - /* 1183 - * Readonly filesystems do not perform inactivation or speculative 1184 - * preallocation, so there's no need to restart the workers. 1185 - */ 1186 - if (!xfs_is_readonly(sc->mp)) { 1187 - xfs_inodegc_start(sc->mp); 1188 - xfs_blockgc_start(sc->mp); 1189 - } 1190 - sc->flags &= ~XCHK_REAPING_DISABLED; 1191 - } 1192 - 1193 1167 /* 1194 1168 * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub 1195 1169 * operation. Callers must not hold any locks that intersect with the CPU
-2
fs/xfs/scrub/common.h
··· 156 156 } 157 157 158 158 int xchk_metadata_inode_forks(struct xfs_scrub *sc); 159 - void xchk_stop_reaping(struct xfs_scrub *sc); 160 - void xchk_start_reaping(struct xfs_scrub *sc); 161 159 162 160 /* 163 161 * Setting up a hook to wait for intents to drain is costly -- we have to take
+6 -7
fs/xfs/scrub/fscounters.c
··· 150 150 if (error) 151 151 return error; 152 152 153 - /* 154 - * Pause background reclaim while we're scrubbing to reduce the 155 - * likelihood of background perturbations to the counters throwing off 156 - * our calculations. 157 - */ 158 - xchk_stop_reaping(sc); 159 - 160 153 return xchk_trans_alloc(sc, 0); 161 154 } 162 155 ··· 445 452 /* See if frextents is obviously wrong. */ 446 453 if (frextents > mp->m_sb.sb_rextents) 447 454 xchk_set_corrupt(sc); 455 + 456 + /* 457 + * XXX: We can't quiesce percpu counter updates, so exit early. 458 + * This can be re-enabled when we gain exclusive freeze functionality. 459 + */ 460 + return 0; 448 461 449 462 /* 450 463 * If ifree exceeds icount by more than the minimum variance then
-2
fs/xfs/scrub/scrub.c
··· 186 186 } 187 187 if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 188 188 mnt_drop_write_file(sc->file); 189 - if (sc->flags & XCHK_REAPING_DISABLED) 190 - xchk_start_reaping(sc); 191 189 if (sc->buf) { 192 190 if (sc->buf_cleanup) 193 191 sc->buf_cleanup(sc->buf);
-1
fs/xfs/scrub/scrub.h
··· 106 106 107 107 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ 108 108 #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ 109 - #define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */ 110 109 #define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */ 111 110 #define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */ 112 111 #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
-1
fs/xfs/scrub/trace.h
··· 98 98 99 99 #define XFS_SCRUB_STATE_STRINGS \ 100 100 { XCHK_TRY_HARDER, "try_harder" }, \ 101 - { XCHK_REAPING_DISABLED, "reaping_disabled" }, \ 102 101 { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ 103 102 { XCHK_NEED_DRAIN, "need_drain" }, \ 104 103 { XREP_ALREADY_FIXED, "already_fixed" }
+3 -1
fs/xfs/xfs_bmap_util.c
··· 558 558 if (!xfs_iext_next_extent(ifp, &icur, &got)) { 559 559 xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 560 560 561 - out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; 561 + if (bmv->bmv_entries > 0) 562 + out[bmv->bmv_entries - 1].bmv_oflags |= 563 + BMV_OF_LAST; 562 564 563 565 if (whichfork != XFS_ATTR_FORK && bno < end && 564 566 !xfs_getbmap_full(bmv)) {
+33 -7
fs/xfs/xfs_icache.c
··· 435 435 } 436 436 437 437 /* Make all pending inactivation work start immediately. */ 438 - static void 438 + static bool 439 439 xfs_inodegc_queue_all( 440 440 struct xfs_mount *mp) 441 441 { 442 442 struct xfs_inodegc *gc; 443 443 int cpu; 444 + bool ret = false; 444 445 445 446 for_each_online_cpu(cpu) { 446 447 gc = per_cpu_ptr(mp->m_inodegc, cpu); 447 - if (!llist_empty(&gc->list)) 448 + if (!llist_empty(&gc->list)) { 448 449 mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); 450 + ret = true; 451 + } 449 452 } 453 + 454 + return ret; 450 455 } 451 456 452 457 /* ··· 1861 1856 struct xfs_inode *ip, *n; 1862 1857 unsigned int nofs_flag; 1863 1858 1859 + ASSERT(gc->cpu == smp_processor_id()); 1860 + 1864 1861 WRITE_ONCE(gc->items, 0); 1865 1862 1866 1863 if (!node) ··· 1916 1909 1917 1910 /* 1918 1911 * Flush all the pending work and then disable the inode inactivation background 1919 - * workers and wait for them to stop. 1912 + * workers and wait for them to stop. Caller must hold sb->s_umount to 1913 + * coordinate changes in the inodegc_enabled state. 1920 1914 */ 1921 1915 void 1922 1916 xfs_inodegc_stop( 1923 1917 struct xfs_mount *mp) 1924 1918 { 1919 + bool rerun; 1920 + 1925 1921 if (!xfs_clear_inodegc_enabled(mp)) 1926 1922 return; 1927 1923 1924 + /* 1925 + * Drain all pending inodegc work, including inodes that could be 1926 + * queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan 1927 + * threads that sample the inodegc state just prior to us clearing it. 1928 + * The inodegc flag state prevents new threads from queuing more 1929 + * inodes, so we queue pending work items and flush the workqueue until 1930 + * all inodegc lists are empty. IOWs, we cannot use drain_workqueue 1931 + * here because it does not allow other unserialized mechanisms to 1932 + * reschedule inodegc work while this draining is in progress. 1933 + */ 1928 1934 xfs_inodegc_queue_all(mp); 1929 - drain_workqueue(mp->m_inodegc_wq); 1935 + do { 1936 + flush_workqueue(mp->m_inodegc_wq); 1937 + rerun = xfs_inodegc_queue_all(mp); 1938 + } while (rerun); 1930 1939 1931 1940 trace_xfs_inodegc_stop(mp, __return_address); 1932 1941 } 1933 1942 1934 1943 /* 1935 1944 * Enable the inode inactivation background workers and schedule deferred inode 1936 - * inactivation work if there is any. 1945 + * inactivation work if there is any. Caller must hold sb->s_umount to 1946 + * coordinate changes in the inodegc_enabled state. 1937 1947 */ 1938 1948 void 1939 1949 xfs_inodegc_start( ··· 2093 2069 queue_delay = 0; 2094 2070 2095 2071 trace_xfs_inodegc_queue(mp, __return_address); 2096 - mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay); 2072 + mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 2073 + queue_delay); 2097 2074 put_cpu_ptr(gc); 2098 2075 2099 2076 if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { ··· 2138 2113 2139 2114 if (xfs_is_inodegc_enabled(mp)) { 2140 2115 trace_xfs_inodegc_queue(mp, __return_address); 2141 - mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0); 2116 + mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, 2117 + 0); 2142 2118 } 2143 2119 put_cpu_ptr(gc); 2144 2120 }
+3 -2
fs/xfs/xfs_iomap.c
··· 1006 1006 if (eof) 1007 1007 imap.br_startoff = end_fsb; /* fake hole until the end */ 1008 1008 1009 - /* We never need to allocate blocks for zeroing a hole. */ 1010 - if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { 1009 + /* We never need to allocate blocks for zeroing or unsharing a hole. */ 1010 + if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) && 1011 + imap.br_startoff > offset_fsb) { 1011 1012 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); 1012 1013 goto out_unlock; 1013 1014 }
+3
fs/xfs/xfs_mount.h
··· 66 66 /* approximate count of inodes in the list */ 67 67 unsigned int items; 68 68 unsigned int shrinker_hits; 69 + #if defined(DEBUG) || defined(XFS_WARN) 70 + unsigned int cpu; 71 + #endif 69 72 }; 70 73 71 74 /*
+3
fs/xfs/xfs_super.c
··· 1095 1095 1096 1096 for_each_possible_cpu(cpu) { 1097 1097 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1098 + #if defined(DEBUG) || defined(XFS_WARN) 1099 + gc->cpu = cpu; 1100 + #endif 1098 1101 init_llist_head(&gc->list); 1099 1102 gc->items = 0; 1100 1103 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);