···995995 __u32 rg_sick; /* o: sick things in ag */996996 __u32 rg_checked; /* o: checked metadata in ag */997997 __u32 rg_flags; /* i/o: flags for this ag */998998- __u32 rg_reserved[27]; /* o: zero */998998+ __u32 rg_writepointer; /* o: write pointer block offset for zoned */999999+ __u32 rg_reserved[26]; /* o: zero */9991000};10001001#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */10011002#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */10021003#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */10031004#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */10041005#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */10061006+10071007+#define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */1005100810061009/* Health monitor event domains */10071010
+69-26
fs/xfs/xfs_file.c
···560560 flags, ac);561561}562562563563+/*564564+ * We need to lock the test/set EOF update as we can be racing with565565+ * other IO completions here to update the EOF. Failing to serialise566566+ * here can result in EOF moving backwards and Bad Things Happen when567567+ * that occurs.568568+ *569569+ * As IO completion only ever extends EOF, we can do an unlocked check570570+ * here to avoid taking the spinlock. If we land within the current EOF,571571+ * then we do not need to do an extending update at all, and we don't572572+ * need to take the lock to check this. If we race with an update moving573573+ * EOF, then we'll either still be beyond EOF and need to take the lock,574574+ * or we'll be within EOF and we don't need to take it at all.575575+ */576576+static int577577+xfs_dio_endio_set_isize(578578+ struct inode *inode,579579+ loff_t offset,580580+ ssize_t size)581581+{582582+ struct xfs_inode *ip = XFS_I(inode);583583+584584+ if (offset + size <= i_size_read(inode))585585+ return 0;586586+587587+ spin_lock(&ip->i_flags_lock);588588+ if (offset + size <= i_size_read(inode)) {589589+ spin_unlock(&ip->i_flags_lock);590590+ return 0;591591+ }592592+593593+ i_size_write(inode, offset + size);594594+ spin_unlock(&ip->i_flags_lock);595595+596596+ return xfs_setfilesize(ip, offset, size);597597+}598598+599599+static int600600+xfs_zoned_dio_write_end_io(601601+ struct kiocb *iocb,602602+ ssize_t size,603603+ int error,604604+ unsigned flags)605605+{606606+ struct inode *inode = file_inode(iocb->ki_filp);607607+ struct xfs_inode *ip = XFS_I(inode);608608+ unsigned int nofs_flag;609609+610610+ ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));611611+612612+ trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size);613613+614614+ if (xfs_is_shutdown(ip->i_mount))615615+ return -EIO;616616+617617+ if (error || !size)618618+ return error;619619+620620+ XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);621621+622622+ nofs_flag = memalloc_nofs_save();623623+ error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size);624624+ memalloc_nofs_restore(nofs_flag);625625+626626+ return error;627627+}628628+563629static int564630xfs_dio_write_end_io(565631 struct kiocb *iocb,···638572 loff_t offset = iocb->ki_pos;639573 unsigned int nofs_flag;640574641641- ASSERT(!xfs_is_zoned_inode(ip) ||642642- !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));575575+ ASSERT(!xfs_is_zoned_inode(ip));643576644577 trace_xfs_end_io_direct_write(ip, offset, size);645578···688623 * with the on-disk inode size being outside the in-core inode size. We689624 * have no other method of updating EOF for AIO, so always do it here690625 * if necessary.691691- *692692- * We need to lock the test/set EOF update as we can be racing with693693- * other IO completions here to update the EOF. Failing to serialise694694- * here can result in EOF moving backwards and Bad Things Happen when695695- * that occurs.696696- *697697- * As IO completion only ever extends EOF, we can do an unlocked check698698- * here to avoid taking the spinlock. If we land within the current EOF,699699- * then we do not need to do an extending update at all, and we don't700700- * need to take the lock to check this. If we race with an update moving701701- * EOF, then we'll either still be beyond EOF and need to take the lock,702702- * or we'll be within EOF and we don't need to take it at all.703626 */704704- if (offset + size <= i_size_read(inode))705705- goto out;706706-707707- spin_lock(&ip->i_flags_lock);708708- if (offset + size > i_size_read(inode)) {709709- i_size_write(inode, offset + size);710710- spin_unlock(&ip->i_flags_lock);711711- error = xfs_setfilesize(ip, offset, size);712712- } else {713713- spin_unlock(&ip->i_flags_lock);714714- }627627+ error = xfs_dio_endio_set_isize(inode, offset, size);715628716629out:717630 memalloc_nofs_restore(nofs_flag);···731688static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {732689 .bio_set = &iomap_ioend_bioset,733690 .submit_io = xfs_dio_zoned_submit_io,734734- .end_io = xfs_dio_write_end_io,691691+ .end_io = xfs_zoned_dio_write_end_io,735692};736693737694/*
···189189 xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);190190}191191192192-static void193193-xfs_zone_record_blocks(194194- struct xfs_trans *tp,192192+static inline void193193+xfs_zone_inc_written(195194 struct xfs_open_zone *oz,196196- xfs_fsblock_t fsbno,197195 xfs_filblks_t len)198196{199199- struct xfs_mount *mp = tp->t_mountp;200200- struct xfs_rtgroup *rtg = oz->oz_rtg;201201- struct xfs_inode *rmapip = rtg_rmap(rtg);197197+ xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL);202198203203- trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len);204204-205205- xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);206206- xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);207207- rmapip->i_used_blocks += len;208208- ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));209199 oz->oz_written += len;210210- if (oz->oz_written == rtg_blocks(rtg))200200+ if (oz->oz_written == rtg_blocks(oz->oz_rtg))211201 xfs_open_zone_mark_full(oz);212212- xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);213202}214203215204/*···216227 trace_xfs_zone_skip_blocks(oz, 0, len);217228218229 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);219219- oz->oz_written += len;220220- if (oz->oz_written == rtg_blocks(rtg))221221- xfs_open_zone_mark_full(oz);230230+ xfs_zone_inc_written(oz, len);222231 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);223232224233 xfs_add_frextents(rtg_mount(rtg), len);···231244 xfs_fsblock_t old_startblock)232245{233246 struct xfs_bmbt_irec data;247247+ struct xfs_rtgroup *rtg = oz->oz_rtg;248248+ struct xfs_inode *rmapip = rtg_rmap(rtg);234249 int nmaps = 1;235250 int error;236251···291302 }292303 }293304294294- xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);305305+ trace_xfs_zone_record_blocks(oz,306306+ xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock),307307+ new->br_blockcount);308308+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);309309+ xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);310310+ rmapip->i_used_blocks += new->br_blockcount;311311+ ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));312312+ xfs_zone_inc_written(oz, new->br_blockcount);313313+ xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);295314296315 /* Map the new blocks into the data fork. */297316 xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);···678681 if (oz)679682 goto out_unlock;680683681681- if (pack_tight)684684+ if (pack_tight) {682685 oz = xfs_select_open_zone_mru(zi, write_hint);683683- if (oz)684684- goto out_unlock;686686+ if (oz)687687+ goto out_unlock;688688+ }685689686690 /*687691 * See if we can open a new zone and use that so that data for different···693695 goto out_unlock;694696695697 /*696696- * Try to find an zone that is an ok match to colocate data with.698698+ * Try to find a zone that is an ok match to colocate data with.697699 */698700 oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);699701 if (oz)
+18-6
fs/xfs/xfs_zone_gc.c
···170170 s64 available, free, threshold;171171 s32 remainder;172172173173+ /* If we have no reclaimable blocks, running GC is useless. */173174 if (!xfs_zoned_have_reclaimable(mp->m_zone_info))174175 return false;175176177177+ /*178178+ * In order to avoid file fragmentation as much as possible, we should179179+ * make sure that we can open enough zones. So trigger GC if the number180180+ * of blocks immediately available for writes is lower than the total181181+ * number of blocks from all possible open zones.182182+ */176183 available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);177177-178184 if (available <179185 xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES))180186 return true;181187182182- free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);188188+ /*189189+ * For cases where the user wants to be more aggressive with GC,190190+ * the sysfs attribute zonegc_low_space may be set to a non zero value,191191+ * to indicate that GC should try to maintain at least zonegc_low_space192192+ * percent of the free space to be directly available for writing. Check193193+ * this here.194194+ */195195+ if (!mp->m_zonegc_low_space)196196+ return false;183197198198+ free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);184199 threshold = div_s64_rem(free, 100, &remainder);185200 threshold = threshold * mp->m_zonegc_low_space +186201 remainder * div_s64(mp->m_zonegc_low_space, 100);187202188188- if (available < threshold)189189- return true;190190-191191- return false;203203+ return available < threshold;192204}193205194206static struct xfs_zone_gc_data *
+6-1
fs/xfs/xfs_zone_info.c
···9090 seq_printf(m, "\tRT GC required: %d\n",9191 xfs_zoned_need_gc(mp));92929393+ seq_printf(m, "\ttotal number of zones: %u\n",9494+ mp->m_sb.sb_rgcount);9395 seq_printf(m, "\tfree zones: %d\n", atomic_read(&zi->zi_nr_free_zones));9494- seq_puts(m, "\topen zones:\n");9696+9597 spin_lock(&zi->zi_open_zones_lock);9898+ seq_printf(m, "\tnumber of open zones: %u / %u\n",9999+ zi->zi_nr_open_zones, mp->m_max_open_zones);100100+ seq_puts(m, "\topen zones:\n");96101 list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)97102 xfs_show_open_zone(m, oz);98103 if (zi->zi_open_gc_zone) {