Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:
"Just a single bug fix (and documentation for the issue)"

* tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: document another racy GC case in xfs_zoned_map_extent
xfs: prevent gc from picking the same zone twice

+41
+6
fs/xfs/libxfs/xfs_rtgroup.h
··· 50 50 uint8_t *rtg_rsum_cache; 51 51 struct xfs_open_zone *rtg_open_zone; 52 52 }; 53 + 54 + /* 55 + * Count of outstanding GC operations for zoned XFS. Any RTG with a 56 + * non-zero rtg_gccount will not be picked as new GC victim. 57 + */ 58 + atomic_t rtg_gccount; 53 59 }; 54 60 55 61 /*
+8
fs/xfs/xfs_zone_alloc.c
··· 246 246 * If a data write raced with this GC write, keep the existing data in 247 247 * the data fork, mark our newly written GC extent as reclaimable, then 248 248 * move on to the next extent. 249 + * 250 + * Note that this can also happen when racing with operations that do 251 + * not actually invalidate the data, but just move it to a different 252 + * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the 253 + * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the 254 + * data was just moved around, GC fails to free the zone, but the zone 255 + * becomes a GC candidate again as soon as all previous GC I/O has 256 + * finished and these blocks will be moved out eventually. 249 257 */ 250 258 if (old_startblock != NULLFSBLOCK && 251 259 old_startblock != data.br_startblock)
+27
fs/xfs/xfs_zone_gc.c
··· 114 114 /* Open Zone being written to */ 115 115 struct xfs_open_zone *oz; 116 116 117 + struct xfs_rtgroup *victim_rtg; 118 + 117 119 /* Bio used for reads and writes, including the bvec used by it */ 118 120 struct bio_vec bv; 119 121 struct bio bio; /* must be last */ ··· 266 264 iter->rec_count = 0; 267 265 iter->rec_idx = 0; 268 266 iter->victim_rtg = victim_rtg; 267 + atomic_inc(&victim_rtg->rtg_gccount); 269 268 } 270 269 271 270 /* ··· 365 362 366 363 return 0; 367 364 done: 365 + atomic_dec(&iter->victim_rtg->rtg_gccount); 368 366 xfs_rtgroup_rele(iter->victim_rtg); 369 367 iter->victim_rtg = NULL; 370 368 return 0; ··· 454 450 455 451 if (!rtg) 456 452 continue; 453 + 454 + /* 455 + * If the zone is already undergoing GC, don't pick it again. 456 + * 457 + * This prevents us from picking one of the zones for which we 458 + * already submitted GC I/O, but for which the remapping hasn't 459 + * concluded yet. This won't cause data corruption, but 460 + * increases write amplification and slows down GC, so this is 461 + * a bad thing. 462 + */ 463 + if (atomic_read(&rtg->rtg_gccount)) { 464 + xfs_rtgroup_rele(rtg); 465 + continue; 466 + } 457 467 458 468 /* skip zones that are just waiting for a reset */ 459 469 if (rtg_rmap(rtg)->i_used_blocks == 0 || ··· 706 688 chunk->scratch = &data->scratch[data->scratch_idx]; 707 689 chunk->data = data; 708 690 chunk->oz = oz; 691 + chunk->victim_rtg = iter->victim_rtg; 692 + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); 693 + atomic_inc(&chunk->victim_rtg->rtg_gccount); 709 694 710 695 bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); 711 696 bio->bi_end_io = xfs_zone_gc_end_io; ··· 731 710 xfs_zone_gc_free_chunk( 732 711 struct xfs_gc_bio *chunk) 733 712 { 713 + atomic_dec(&chunk->victim_rtg->rtg_gccount); 714 + xfs_rtgroup_rele(chunk->victim_rtg); 734 715 list_del(&chunk->entry); 735 716 xfs_open_zone_put(chunk->oz); 736 717 xfs_irele(chunk->ip); ··· 792 769 split_chunk->new_daddr = chunk->new_daddr; 793 770 split_chunk->oz = chunk->oz; 794 771 atomic_inc(&chunk->oz->oz_ref); 772 + 773 + split_chunk->victim_rtg = chunk->victim_rtg; 774 + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); 775 + atomic_inc(&chunk->victim_rtg->rtg_gccount); 795 776 796 777 chunk->offset += split_len; 797 778 chunk->len -= split_len;