Merge branch 'xfs-7.1-merge' into for-next

+4 -1

fs/xfs/libxfs/xfs_fs.h

··· 995 995 __u32 rg_sick; /* o: sick things in ag */ 996 996 __u32 rg_checked; /* o: checked metadata in ag */ 997 997 __u32 rg_flags; /* i/o: flags for this ag */ 998 - __u32 rg_reserved[27]; /* o: zero */ 998 + __u32 rg_writepointer; /* o: write pointer block offset for zoned */ 999 + __u32 rg_reserved[26]; /* o: zero */ 999 1000 }; 1000 1001 #define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */ 1001 1002 #define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */ 1002 1003 #define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */ 1003 1004 #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ 1004 1005 #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ 1006 + 1007 + #define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */ 1005 1008 1006 1009 /* Health monitor event domains */ 1007 1010

+69 -26

fs/xfs/xfs_file.c

··· 560 560 flags, ac); 561 561 } 562 562 563 + /* 564 + * We need to lock the test/set EOF update as we can be racing with 565 + * other IO completions here to update the EOF. Failing to serialise 566 + * here can result in EOF moving backwards and Bad Things Happen when 567 + * that occurs. 568 + * 569 + * As IO completion only ever extends EOF, we can do an unlocked check 570 + * here to avoid taking the spinlock. If we land within the current EOF, 571 + * then we do not need to do an extending update at all, and we don't 572 + * need to take the lock to check this. If we race with an update moving 573 + * EOF, then we'll either still be beyond EOF and need to take the lock, 574 + * or we'll be within EOF and we don't need to take it at all. 575 + */ 576 + static int 577 + xfs_dio_endio_set_isize( 578 + struct inode *inode, 579 + loff_t offset, 580 + ssize_t size) 581 + { 582 + struct xfs_inode *ip = XFS_I(inode); 583 + 584 + if (offset + size <= i_size_read(inode)) 585 + return 0; 586 + 587 + spin_lock(&ip->i_flags_lock); 588 + if (offset + size <= i_size_read(inode)) { 589 + spin_unlock(&ip->i_flags_lock); 590 + return 0; 591 + } 592 + 593 + i_size_write(inode, offset + size); 594 + spin_unlock(&ip->i_flags_lock); 595 + 596 + return xfs_setfilesize(ip, offset, size); 597 + } 598 + 599 + static int 600 + xfs_zoned_dio_write_end_io( 601 + struct kiocb *iocb, 602 + ssize_t size, 603 + int error, 604 + unsigned flags) 605 + { 606 + struct inode *inode = file_inode(iocb->ki_filp); 607 + struct xfs_inode *ip = XFS_I(inode); 608 + unsigned int nofs_flag; 609 + 610 + ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); 611 + 612 + trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size); 613 + 614 + if (xfs_is_shutdown(ip->i_mount)) 615 + return -EIO; 616 + 617 + if (error || !size) 618 + return error; 619 + 620 + XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 621 + 622 + nofs_flag = memalloc_nofs_save(); 623 + error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size); 624 + memalloc_nofs_restore(nofs_flag); 625 + 626 + return error; 627 + } 628 + 563 629 static int 564 630 xfs_dio_write_end_io( 565 631 struct kiocb *iocb, ··· 638 572 loff_t offset = iocb->ki_pos; 639 573 unsigned int nofs_flag; 640 574 641 - ASSERT(!xfs_is_zoned_inode(ip) || 642 - !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); 575 + ASSERT(!xfs_is_zoned_inode(ip)); 643 576 644 577 trace_xfs_end_io_direct_write(ip, offset, size); 645 578 ··· 688 623 * with the on-disk inode size being outside the in-core inode size. We 689 624 * have no other method of updating EOF for AIO, so always do it here 690 625 * if necessary. 691 - * 692 - * We need to lock the test/set EOF update as we can be racing with 693 - * other IO completions here to update the EOF. Failing to serialise 694 - * here can result in EOF moving backwards and Bad Things Happen when 695 - * that occurs. 696 - * 697 - * As IO completion only ever extends EOF, we can do an unlocked check 698 - * here to avoid taking the spinlock. If we land within the current EOF, 699 - * then we do not need to do an extending update at all, and we don't 700 - * need to take the lock to check this. If we race with an update moving 701 - * EOF, then we'll either still be beyond EOF and need to take the lock, 702 - * or we'll be within EOF and we don't need to take it at all. 703 626 */ 704 - if (offset + size <= i_size_read(inode)) 705 - goto out; 706 - 707 - spin_lock(&ip->i_flags_lock); 708 - if (offset + size > i_size_read(inode)) { 709 - i_size_write(inode, offset + size); 710 - spin_unlock(&ip->i_flags_lock); 711 - error = xfs_setfilesize(ip, offset, size); 712 - } else { 713 - spin_unlock(&ip->i_flags_lock); 714 - } 627 + error = xfs_dio_endio_set_isize(inode, offset, size); 715 628 716 629 out: 717 630 memalloc_nofs_restore(nofs_flag); ··· 731 688 static const struct iomap_dio_ops xfs_dio_zoned_write_ops = { 732 689 .bio_set = &iomap_ioend_bioset, 733 690 .submit_io = xfs_dio_zoned_submit_io, 734 - .end_io = xfs_dio_write_end_io, 691 + .end_io = xfs_zoned_dio_write_end_io, 735 692 }; 736 693 737 694 /*

+19

fs/xfs/xfs_ioctl.c

··· 37 37 #include "xfs_ioctl.h" 38 38 #include "xfs_xattr.h" 39 39 #include "xfs_rtbitmap.h" 40 + #include "xfs_rtrmap_btree.h" 40 41 #include "xfs_file.h" 41 42 #include "xfs_exchrange.h" 42 43 #include "xfs_handle.h" 43 44 #include "xfs_rtgroup.h" 44 45 #include "xfs_healthmon.h" 45 46 #include "xfs_verify_media.h" 47 + #include "xfs_zone_priv.h" 48 + #include "xfs_zone_alloc.h" 46 49 47 50 #include <linux/mount.h> 48 51 #include <linux/fileattr.h> ··· 416 413 { 417 414 struct xfs_rtgroup *rtg; 418 415 struct xfs_rtgroup_geometry rgeo; 416 + xfs_rgblock_t highest_rgbno; 419 417 int error; 420 418 421 419 if (copy_from_user(&rgeo, arg, sizeof(rgeo))) ··· 436 432 xfs_rtgroup_put(rtg); 437 433 if (error) 438 434 return error; 435 + 436 + if (xfs_has_zoned(mp)) { 437 + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 438 + if (rtg->rtg_open_zone) { 439 + rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated; 440 + } else { 441 + highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); 442 + if (highest_rgbno == NULLRGBLOCK) 443 + rgeo.rg_writepointer = 0; 444 + else 445 + rgeo.rg_writepointer = highest_rgbno + 1; 446 + } 447 + xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); 448 + rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER; 449 + } 439 450 440 451 if (copy_to_user(arg, &rgeo, sizeof(rgeo))) 441 452 return -EFAULT;

+21 -23

fs/xfs/xfs_zone_alloc.c

··· 189 189 xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used); 190 190 } 191 191 192 - static void 193 - xfs_zone_record_blocks( 194 - struct xfs_trans *tp, 192 + static inline void 193 + xfs_zone_inc_written( 195 194 struct xfs_open_zone *oz, 196 - xfs_fsblock_t fsbno, 197 195 xfs_filblks_t len) 198 196 { 199 - struct xfs_mount *mp = tp->t_mountp; 200 - struct xfs_rtgroup *rtg = oz->oz_rtg; 201 - struct xfs_inode *rmapip = rtg_rmap(rtg); 197 + xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL); 202 198 203 - trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len); 204 - 205 - xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 206 - xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); 207 - rmapip->i_used_blocks += len; 208 - ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); 209 199 oz->oz_written += len; 210 - if (oz->oz_written == rtg_blocks(rtg)) 200 + if (oz->oz_written == rtg_blocks(oz->oz_rtg)) 211 201 xfs_open_zone_mark_full(oz); 212 - xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); 213 202 } 214 203 215 204 /* ··· 216 227 trace_xfs_zone_skip_blocks(oz, 0, len); 217 228 218 229 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 219 - oz->oz_written += len; 220 - if (oz->oz_written == rtg_blocks(rtg)) 221 - xfs_open_zone_mark_full(oz); 230 + xfs_zone_inc_written(oz, len); 222 231 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); 223 232 224 233 xfs_add_frextents(rtg_mount(rtg), len); ··· 231 244 xfs_fsblock_t old_startblock) 232 245 { 233 246 struct xfs_bmbt_irec data; 247 + struct xfs_rtgroup *rtg = oz->oz_rtg; 248 + struct xfs_inode *rmapip = rtg_rmap(rtg); 234 249 int nmaps = 1; 235 250 int error; 236 251 ··· 291 302 } 292 303 } 293 304 294 - xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); 305 + trace_xfs_zone_record_blocks(oz, 306 + xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock), 307 + new->br_blockcount); 308 + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 309 + xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); 310 + rmapip->i_used_blocks += new->br_blockcount; 311 + ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); 312 + xfs_zone_inc_written(oz, new->br_blockcount); 313 + xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); 295 314 296 315 /* Map the new blocks into the data fork. */ 297 316 xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); ··· 678 681 if (oz) 679 682 goto out_unlock; 680 683 681 - if (pack_tight) 684 + if (pack_tight) { 682 685 oz = xfs_select_open_zone_mru(zi, write_hint); 683 - if (oz) 684 - goto out_unlock; 686 + if (oz) 687 + goto out_unlock; 688 + } 685 689 686 690 /* 687 691 * See if we can open a new zone and use that so that data for different ··· 693 695 goto out_unlock; 694 696 695 697 /* 696 - * Try to find an zone that is an ok match to colocate data with. 698 + * Try to find a zone that is an ok match to colocate data with. 697 699 */ 698 700 oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK); 699 701 if (oz)

+18 -6

fs/xfs/xfs_zone_gc.c

··· 170 170 s64 available, free, threshold; 171 171 s32 remainder; 172 172 173 + /* If we have no reclaimable blocks, running GC is useless. */ 173 174 if (!xfs_zoned_have_reclaimable(mp->m_zone_info)) 174 175 return false; 175 176 177 + /* 178 + * In order to avoid file fragmentation as much as possible, we should 179 + * make sure that we can open enough zones. So trigger GC if the number 180 + * of blocks immediately available for writes is lower than the total 181 + * number of blocks from all possible open zones. 182 + */ 176 183 available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); 177 - 178 184 if (available < 179 185 xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) 180 186 return true; 181 187 182 - free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); 188 + /* 189 + * For cases where the user wants to be more aggressive with GC, 190 + * the sysfs attribute zonegc_low_space may be set to a non zero value, 191 + * to indicate that GC should try to maintain at least zonegc_low_space 192 + * percent of the free space to be directly available for writing. Check 193 + * this here. 194 + */ 195 + if (!mp->m_zonegc_low_space) 196 + return false; 183 197 198 + free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); 184 199 threshold = div_s64_rem(free, 100, &remainder); 185 200 threshold = threshold * mp->m_zonegc_low_space + 186 201 remainder * div_s64(mp->m_zonegc_low_space, 100); 187 202 188 - if (available < threshold) 189 - return true; 190 - 191 - return false; 203 + return available < threshold; 192 204 } 193 205 194 206 static struct xfs_zone_gc_data *

+6 -1

fs/xfs/xfs_zone_info.c

··· 90 90 seq_printf(m, "\tRT GC required: %d\n", 91 91 xfs_zoned_need_gc(mp)); 92 92 93 + seq_printf(m, "\ttotal number of zones: %u\n", 94 + mp->m_sb.sb_rgcount); 93 95 seq_printf(m, "\tfree zones: %d\n", atomic_read(&zi->zi_nr_free_zones)); 94 - seq_puts(m, "\topen zones:\n"); 96 + 95 97 spin_lock(&zi->zi_open_zones_lock); 98 + seq_printf(m, "\tnumber of open zones: %u / %u\n", 99 + zi->zi_nr_open_zones, mp->m_max_open_zones); 100 + seq_puts(m, "\topen zones:\n"); 96 101 list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) 97 102 xfs_show_open_zone(m, oz); 98 103 if (zi->zi_open_gc_zone) {

Configure Feed

Configure Feed