Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
"Fix a few bugs that could lead to corrupt files, fsck complaints, and
filesystem crashes:

- Minor documentation fixes

- Fix a file corruption due to read racing with an insert range
operation.

- Fix log reservation overflows when allocating large rt extents

- Fix a buffer log item flags check

- Don't allow administrators to mount with sunit= options that will
cause later xfs_repair complaints about the root directory being
suspicious because the fs geometry appeared inconsistent

- Fix a non-static helper that should have been static"

* tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: Make the symbol 'xfs_rtalloc_log_count' static
xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
xfs: split the sunit parameter update into two parts
xfs: refactor agfl length computation function
libxfs: resync with the userspace libxfs
xfs: use bitops interface for buf log item AIL flag check
xfs: fix log reservation overflows when allocating large rt extents
xfs: stabilize insert range start boundary to avoid COW writeback race
xfs: fix Sphinx documentation warning

Linus Torvalds 6 years ago c6017471 a3965607

+347 -110

13 changed files

expand all collapse all

Documentation

admin-guide

xfs.rst

xfs

libxfs

xfs_alloc.c

xfs_bmap.c

xfs_dir2.c

xfs_dir2_priv.h

xfs_dir2_sf.c

xfs_ialloc.c

xfs_ialloc.h

xfs_trans_resv.c

xfs_bmap_util.c

xfs_buf_item.c

xfs_mount.c

xfs_trace.h

+1 -1

Documentation/admin-guide/xfs.rst

reviewed

··· 253 253 pool. 254 254 255 255 fs.xfs.speculative_prealloc_lifetime 256 256 - (Units: seconds Min: 1 Default: 300 Max: 86400) 256 256 + (Units: seconds Min: 1 Default: 300 Max: 86400) 257 257 The interval at which the background scanning for inodes 258 258 with unused speculative preallocation runs. The scan 259 259 removes unused preallocation from clean inodes and releases

+13 -5

fs/xfs/libxfs/xfs_alloc.c

reviewed

··· 2248 2248 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 2249 2249 } 2250 2250 2251 2251 + /* 2252 2252 + * Compute the minimum length of the AGFL in the given AG. If @pag is NULL, 2253 2253 + * return the largest possible minimum length. 2254 2254 + */ 2251 2255 unsigned int 2252 2256 xfs_alloc_min_freelist( 2253 2257 struct xfs_mount *mp, 2254 2258 struct xfs_perag *pag) 2255 2259 { 2260 2260 + /* AG btrees have at least 1 level. */ 2261 2261 + static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1}; 2262 2262 + const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; 2256 2263 unsigned int min_free; 2257 2264 2265 2265 + ASSERT(mp->m_ag_maxlevels > 0); 2266 2266 + 2258 2267 /* space needed by-bno freespace btree */ 2259 2259 - min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 2268 2268 + min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, 2260 2269 mp->m_ag_maxlevels); 2261 2270 /* space needed by-size freespace btree */ 2262 2262 - min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 2271 2271 + min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, 2263 2272 mp->m_ag_maxlevels); 2264 2273 /* space needed reverse mapping used space btree */ 2265 2274 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2266 2266 - min_free += min_t(unsigned int, 2267 2267 - pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 2268 2268 - mp->m_rmap_maxlevels); 2275 2275 + min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, 2276 2276 + mp->m_rmap_maxlevels); 2269 2277 2270 2278 return min_free; 2271 2279 }

+2 -3

fs/xfs/libxfs/xfs_bmap.c

reviewed

··· 4561 4561 struct xfs_mount *mp = ip->i_mount; 4562 4562 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 4563 4563 struct xfs_bmalloca bma = { NULL }; 4564 4564 - u16 flags = 0; 4564 4564 + uint16_t flags = 0; 4565 4565 struct xfs_trans *tp; 4566 4566 int error; 4567 4567 ··· 5972 5972 goto del_cursor; 5973 5973 } 5974 5974 5975 5975 - if (XFS_IS_CORRUPT(mp, 5976 5976 - stop_fsb >= got.br_startoff + got.br_blockcount)) { 5975 5975 + if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) { 5977 5976 error = -EFSCORRUPTED; 5978 5977 goto del_cursor; 5979 5978 }

+21

fs/xfs/libxfs/xfs_dir2.c

reviewed

··· 724 724 /* There shouldn't be any slashes or nulls here */ 725 725 return !memchr(name, '/', length) && !memchr(name, 0, length); 726 726 } 727 727 + 728 728 + xfs_dahash_t 729 729 + xfs_dir2_hashname( 730 730 + struct xfs_mount *mp, 731 731 + struct xfs_name *name) 732 732 + { 733 733 + if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb))) 734 734 + return xfs_ascii_ci_hashname(name); 735 735 + return xfs_da_hashname(name->name, name->len); 736 736 + } 737 737 + 738 738 + enum xfs_dacmp 739 739 + xfs_dir2_compname( 740 740 + struct xfs_da_args *args, 741 741 + const unsigned char *name, 742 742 + int len) 743 743 + { 744 744 + if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb))) 745 745 + return xfs_ascii_ci_compname(args, name, len); 746 746 + return xfs_da_compname(args, name, len); 747 747 + }

+9 -20

fs/xfs/libxfs/xfs_dir2_priv.h

reviewed

··· 175 175 extern int xfs_dir2_sf_removename(struct xfs_da_args *args); 176 176 extern int xfs_dir2_sf_replace(struct xfs_da_args *args); 177 177 extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); 178 178 + int xfs_dir2_sf_entsize(struct xfs_mount *mp, 179 179 + struct xfs_dir2_sf_hdr *hdr, int len); 180 180 + void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, 181 181 + struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino); 182 182 + void xfs_dir2_sf_put_ftype(struct xfs_mount *mp, 183 183 + struct xfs_dir2_sf_entry *sfep, uint8_t ftype); 178 184 179 185 /* xfs_dir2_readdir.c */ 180 186 extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, ··· 200 194 return round_up(len, XFS_DIR2_DATA_ALIGN); 201 195 } 202 196 203 203 - static inline xfs_dahash_t 204 204 - xfs_dir2_hashname( 205 205 - struct xfs_mount *mp, 206 206 - struct xfs_name *name) 207 207 - { 208 208 - if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb))) 209 209 - return xfs_ascii_ci_hashname(name); 210 210 - return xfs_da_hashname(name->name, name->len); 211 211 - } 212 212 - 213 213 - static inline enum xfs_dacmp 214 214 - xfs_dir2_compname( 215 215 - struct xfs_da_args *args, 216 216 - const unsigned char *name, 217 217 - int len) 218 218 - { 219 219 - if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb))) 220 220 - return xfs_ascii_ci_compname(args, name, len); 221 221 - return xfs_da_compname(args, name, len); 222 222 - } 197 197 + xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name); 198 198 + enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args, 199 199 + const unsigned char *name, int len); 223 200 224 201 #endif /* __XFS_DIR2_PRIV_H__ */

+3 -3

fs/xfs/libxfs/xfs_dir2_sf.c

reviewed

··· 37 37 static void xfs_dir2_sf_toino4(xfs_da_args_t *args); 38 38 static void xfs_dir2_sf_toino8(xfs_da_args_t *args); 39 39 40 40 - static int 40 40 + int 41 41 xfs_dir2_sf_entsize( 42 42 struct xfs_mount *mp, 43 43 struct xfs_dir2_sf_hdr *hdr, ··· 84 84 return get_unaligned_be64(from) & XFS_MAXINUMBER; 85 85 } 86 86 87 87 - static void 87 87 + void 88 88 xfs_dir2_sf_put_ino( 89 89 struct xfs_mount *mp, 90 90 struct xfs_dir2_sf_hdr *hdr, ··· 145 145 return XFS_DIR3_FT_UNKNOWN; 146 146 } 147 147 148 148 - static void 148 148 + void 149 149 xfs_dir2_sf_put_ftype( 150 150 struct xfs_mount *mp, 151 151 struct xfs_dir2_sf_entry *sfep,

+64

fs/xfs/libxfs/xfs_ialloc.c

reviewed

··· 2909 2909 else 2910 2910 igeo->ialloc_align = 0; 2911 2911 } 2912 2912 + 2913 2913 + /* Compute the location of the root directory inode that is laid out by mkfs. */ 2914 2914 + xfs_ino_t 2915 2915 + xfs_ialloc_calc_rootino( 2916 2916 + struct xfs_mount *mp, 2917 2917 + int sunit) 2918 2918 + { 2919 2919 + struct xfs_ino_geometry *igeo = M_IGEO(mp); 2920 2920 + xfs_agblock_t first_bno; 2921 2921 + 2922 2922 + /* 2923 2923 + * Pre-calculate the geometry of AG 0. We know what it looks like 2924 2924 + * because libxfs knows how to create allocation groups now. 2925 2925 + * 2926 2926 + * first_bno is the first block in which mkfs could possibly have 2927 2927 + * allocated the root directory inode, once we factor in the metadata 2928 2928 + * that mkfs formats before it. Namely, the four AG headers... 2929 2929 + */ 2930 2930 + first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); 2931 2931 + 2932 2932 + /* ...the two free space btree roots... */ 2933 2933 + first_bno += 2; 2934 2934 + 2935 2935 + /* ...the inode btree root... */ 2936 2936 + first_bno += 1; 2937 2937 + 2938 2938 + /* ...the initial AGFL... */ 2939 2939 + first_bno += xfs_alloc_min_freelist(mp, NULL); 2940 2940 + 2941 2941 + /* ...the free inode btree root... */ 2942 2942 + if (xfs_sb_version_hasfinobt(&mp->m_sb)) 2943 2943 + first_bno++; 2944 2944 + 2945 2945 + /* ...the reverse mapping btree root... */ 2946 2946 + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2947 2947 + first_bno++; 2948 2948 + 2949 2949 + /* ...the reference count btree... */ 2950 2950 + if (xfs_sb_version_hasreflink(&mp->m_sb)) 2951 2951 + first_bno++; 2952 2952 + 2953 2953 + /* 2954 2954 + * ...and the log, if it is allocated in the first allocation group. 2955 2955 + * 2956 2956 + * This can happen with filesystems that only have a single 2957 2957 + * allocation group, or very odd geometries created by old mkfs 2958 2958 + * versions on very small filesystems. 2959 2959 + */ 2960 2960 + if (mp->m_sb.sb_logstart && 2961 2961 + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) 2962 2962 + first_bno += mp->m_sb.sb_logblocks; 2963 2963 + 2964 2964 + /* 2965 2965 + * Now round first_bno up to whatever allocation alignment is given 2966 2966 + * by the filesystem or was passed in. 2967 2967 + */ 2968 2968 + if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0) 2969 2969 + first_bno = roundup(first_bno, sunit); 2970 2970 + else if (xfs_sb_version_hasalign(&mp->m_sb) && 2971 2971 + mp->m_sb.sb_inoalignmt > 1) 2972 2972 + first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt); 2973 2973 + 2974 2974 + return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); 2975 2975 + }

fs/xfs/libxfs/xfs_ialloc.h

reviewed

··· 152 152 153 153 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 154 154 void xfs_ialloc_setup_geometry(struct xfs_mount *mp); 155 155 + xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); 155 156 156 157 #endif /* __XFS_IALLOC_H__ */

+77 -19

fs/xfs/libxfs/xfs_trans_resv.c

reviewed

··· 197 197 } 198 198 199 199 /* 200 200 + * Per-extent log reservation for the btree changes involved in freeing or 201 201 + * allocating a realtime extent. We have to be able to log as many rtbitmap 202 202 + * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents, 203 203 + * as well as the realtime summary block. 204 204 + */ 205 205 + static unsigned int 206 206 + xfs_rtalloc_log_count( 207 207 + struct xfs_mount *mp, 208 208 + unsigned int num_ops) 209 209 + { 210 210 + unsigned int blksz = XFS_FSB_TO_B(mp, 1); 211 211 + unsigned int rtbmp_bytes; 212 212 + 213 213 + rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY; 214 214 + return (howmany(rtbmp_bytes, blksz) + 1) * num_ops; 215 215 + } 216 216 + 217 217 + /* 200 218 * Various log reservation values. 201 219 * 202 220 * These are based on the size of the file system block because that is what ··· 236 218 237 219 /* 238 220 * In a write transaction we can allocate a maximum of 2 239 239 - * extents. This gives: 221 221 + * extents. This gives (t1): 240 222 * the inode getting the new extents: inode size 241 223 * the inode's bmap btree: max depth * block size 242 224 * the agfs of the ags from which the extents are allocated: 2 * sector 243 225 * the superblock free block counter: sector size 244 226 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size 245 245 - * And the bmap_finish transaction can free bmap blocks in a join: 227 227 + * Or, if we're writing to a realtime file (t2): 228 228 + * the inode getting the new extents: inode size 229 229 + * the inode's bmap btree: max depth * block size 230 230 + * the agfs of the ags from which the extents are allocated: 2 * sector 231 231 + * the superblock free block counter: sector size 232 232 + * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes 233 233 + * the realtime summary: 1 block 234 234 + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size 235 235 + * And the bmap_finish transaction can free bmap blocks in a join (t3): 246 236 * the agfs of the ags containing the blocks: 2 * sector size 247 237 * the agfls of the ags containing the blocks: 2 * sector size 248 238 * the super block free block counter: sector size ··· 260 234 xfs_calc_write_reservation( 261 235 struct xfs_mount *mp) 262 236 { 263 263 - return XFS_DQUOT_LOGRES(mp) + 264 264 - max((xfs_calc_inode_res(mp, 1) + 237 237 + unsigned int t1, t2, t3; 238 238 + unsigned int blksz = XFS_FSB_TO_B(mp, 1); 239 239 + 240 240 + t1 = xfs_calc_inode_res(mp, 1) + 241 241 + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) + 242 242 + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 243 243 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); 244 244 + 245 245 + if (xfs_sb_version_hasrealtime(&mp->m_sb)) { 246 246 + t2 = xfs_calc_inode_res(mp, 1) + 265 247 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 266 266 - XFS_FSB_TO_B(mp, 1)) + 248 248 + blksz) + 267 249 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + 268 268 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 269 269 - XFS_FSB_TO_B(mp, 1))), 270 270 - (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 271 271 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), 272 272 - XFS_FSB_TO_B(mp, 1)))); 250 250 + xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) + 251 251 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz); 252 252 + } else { 253 253 + t2 = 0; 254 254 + } 255 255 + 256 256 + t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 257 257 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); 258 258 + 259 259 + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); 273 260 } 274 261 275 262 /* 276 276 - * In truncating a file we free up to two extents at once. We can modify: 263 263 + * In truncating a file we free up to two extents at once. We can modify (t1): 277 264 * the inode being truncated: inode size 278 265 * the inode's bmap btree: (max depth + 1) * block size 279 279 - * And the bmap_finish transaction can free the blocks and bmap blocks: 266 266 + * And the bmap_finish transaction can free the blocks and bmap blocks (t2): 280 267 * the agf for each of the ags: 4 * sector size 281 268 * the agfl for each of the ags: 4 * sector size 282 269 * the super block to reflect the freed blocks: sector size 283 270 * worst case split in allocation btrees per extent assuming 4 extents: 284 271 * 4 exts * 2 trees * (2 * max depth - 1) * block size 272 272 + * Or, if it's a realtime file (t3): 273 273 + * the agf for each of the ags: 2 * sector size 274 274 + * the agfl for each of the ags: 2 * sector size 275 275 + * the super block to reflect the freed blocks: sector size 276 276 + * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes 277 277 + * the realtime summary: 2 exts * 1 block 278 278 + * worst case split in allocation btrees per extent assuming 2 extents: 279 279 + * 2 exts * 2 trees * (2 * max depth - 1) * block size 285 280 */ 286 281 STATIC uint 287 282 xfs_calc_itruncate_reservation( 288 283 struct xfs_mount *mp) 289 284 { 290 290 - return XFS_DQUOT_LOGRES(mp) + 291 291 - max((xfs_calc_inode_res(mp, 1) + 292 292 - xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, 293 293 - XFS_FSB_TO_B(mp, 1))), 294 294 - (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 295 295 - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), 296 296 - XFS_FSB_TO_B(mp, 1)))); 285 285 + unsigned int t1, t2, t3; 286 286 + unsigned int blksz = XFS_FSB_TO_B(mp, 1); 287 287 + 288 288 + t1 = xfs_calc_inode_res(mp, 1) + 289 289 + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz); 290 290 + 291 291 + t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + 292 292 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz); 293 293 + 294 294 + if (xfs_sb_version_hasrealtime(&mp->m_sb)) { 295 295 + t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 296 296 + xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) + 297 297 + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz); 298 298 + } else { 299 299 + t3 = 0; 300 300 + } 301 301 + 302 302 + return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); 297 303 } 298 304 299 305 /*

+12

fs/xfs/xfs_bmap_util.c

reviewed

··· 992 992 struct xfs_inode *ip, 993 993 loff_t offset) 994 994 { 995 995 + struct xfs_mount *mp = ip->i_mount; 995 996 int error; 996 997 997 998 /* ··· 1004 1003 if (error) 1005 1004 return error; 1006 1005 } 1006 1006 + 1007 1007 + /* 1008 1008 + * Shift operations must stabilize the start block offset boundary along 1009 1009 + * with the full range of the operation. If we don't, a COW writeback 1010 1010 + * completion could race with an insert, front merge with the start 1011 1011 + * extent (after split) during the shift and corrupt the file. Start 1012 1012 + * with the block just prior to the start to stabilize the boundary. 1013 1013 + */ 1014 1014 + offset = round_down(offset, 1 << mp->m_sb.sb_blocklog); 1015 1015 + if (offset) 1016 1016 + offset -= (1 << mp->m_sb.sb_blocklog); 1007 1017 1008 1018 /* 1009 1019 * Writeback and invalidate cache for the remainder of the file as we're

+1 -1

fs/xfs/xfs_buf_item.c

reviewed

··· 956 956 struct xfs_buf_log_item *bip = bp->b_log_item; 957 957 958 958 trace_xfs_buf_item_relse(bp, _RET_IP_); 959 959 - ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 959 959 + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); 960 960 961 961 bp->b_log_item = NULL; 962 962 if (list_empty(&bp->b_li_list))

+122 -58

fs/xfs/xfs_mount.c

reviewed

··· 31 31 #include "xfs_reflink.h" 32 32 #include "xfs_extent_busy.h" 33 33 #include "xfs_health.h" 34 34 - 34 34 + #include "xfs_trace.h" 35 35 36 36 static DEFINE_MUTEX(xfs_uuid_table_mutex); 37 37 static int xfs_uuid_table_size; ··· 360 360 } 361 361 362 362 /* 363 363 - * Update alignment values based on mount options and sb values 363 363 + * If the sunit/swidth change would move the precomputed root inode value, we 364 364 + * must reject the ondisk change because repair will stumble over that. 365 365 + * However, we allow the mount to proceed because we never rejected this 366 366 + * combination before. Returns true to update the sb, false otherwise. 367 367 + */ 368 368 + static inline int 369 369 + xfs_check_new_dalign( 370 370 + struct xfs_mount *mp, 371 371 + int new_dalign, 372 372 + bool *update_sb) 373 373 + { 374 374 + struct xfs_sb *sbp = &mp->m_sb; 375 375 + xfs_ino_t calc_ino; 376 376 + 377 377 + calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign); 378 378 + trace_xfs_check_new_dalign(mp, new_dalign, calc_ino); 379 379 + 380 380 + if (sbp->sb_rootino == calc_ino) { 381 381 + *update_sb = true; 382 382 + return 0; 383 383 + } 384 384 + 385 385 + xfs_warn(mp, 386 386 + "Cannot change stripe alignment; would require moving root inode."); 387 387 + 388 388 + /* 389 389 + * XXX: Next time we add a new incompat feature, this should start 390 390 + * returning -EINVAL to fail the mount. Until then, spit out a warning 391 391 + * that we're ignoring the administrator's instructions. 392 392 + */ 393 393 + xfs_warn(mp, "Skipping superblock stripe alignment update."); 394 394 + *update_sb = false; 395 395 + return 0; 396 396 + } 397 397 + 398 398 + /* 399 399 + * If we were provided with new sunit/swidth values as mount options, make sure 400 400 + * that they pass basic alignment and superblock feature checks, and convert 401 401 + * them into the same units (FSB) that everything else expects. This step 402 402 + * /must/ be done before computing the inode geometry. 364 403 */ 365 404 STATIC int 366 366 - xfs_update_alignment(xfs_mount_t *mp) 405 405 + xfs_validate_new_dalign( 406 406 + struct xfs_mount *mp) 367 407 { 368 368 - xfs_sb_t *sbp = &(mp->m_sb); 408 408 + if (mp->m_dalign == 0) 409 409 + return 0; 410 410 + 411 411 + /* 412 412 + * If stripe unit and stripe width are not multiples 413 413 + * of the fs blocksize turn off alignment. 414 414 + */ 415 415 + if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 416 416 + (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 417 417 + xfs_warn(mp, 418 418 + "alignment check failed: sunit/swidth vs. blocksize(%d)", 419 419 + mp->m_sb.sb_blocksize); 420 420 + return -EINVAL; 421 421 + } else { 422 422 + /* 423 423 + * Convert the stripe unit and width to FSBs. 424 424 + */ 425 425 + mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 426 426 + if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) { 427 427 + xfs_warn(mp, 428 428 + "alignment check failed: sunit/swidth vs. agsize(%d)", 429 429 + mp->m_sb.sb_agblocks); 430 430 + return -EINVAL; 431 431 + } else if (mp->m_dalign) { 432 432 + mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 433 433 + } else { 434 434 + xfs_warn(mp, 435 435 + "alignment check failed: sunit(%d) less than bsize(%d)", 436 436 + mp->m_dalign, mp->m_sb.sb_blocksize); 437 437 + return -EINVAL; 438 438 + } 439 439 + } 440 440 + 441 441 + if (!xfs_sb_version_hasdalign(&mp->m_sb)) { 442 442 + xfs_warn(mp, 443 443 + "cannot change alignment: superblock does not support data alignment"); 444 444 + return -EINVAL; 445 445 + } 446 446 + 447 447 + return 0; 448 448 + } 449 449 + 450 450 + /* Update alignment values based on mount options and sb values. */ 451 451 + STATIC int 452 452 + xfs_update_alignment( 453 453 + struct xfs_mount *mp) 454 454 + { 455 455 + struct xfs_sb *sbp = &mp->m_sb; 369 456 370 457 if (mp->m_dalign) { 371 371 - /* 372 372 - * If stripe unit and stripe width are not multiples 373 373 - * of the fs blocksize turn off alignment. 374 374 - */ 375 375 - if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 376 376 - (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 377 377 - xfs_warn(mp, 378 378 - "alignment check failed: sunit/swidth vs. blocksize(%d)", 379 379 - sbp->sb_blocksize); 380 380 - return -EINVAL; 381 381 - } else { 382 382 - /* 383 383 - * Convert the stripe unit and width to FSBs. 384 384 - */ 385 385 - mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 386 386 - if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 387 387 - xfs_warn(mp, 388 388 - "alignment check failed: sunit/swidth vs. agsize(%d)", 389 389 - sbp->sb_agblocks); 390 390 - return -EINVAL; 391 391 - } else if (mp->m_dalign) { 392 392 - mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 393 393 - } else { 394 394 - xfs_warn(mp, 395 395 - "alignment check failed: sunit(%d) less than bsize(%d)", 396 396 - mp->m_dalign, sbp->sb_blocksize); 397 397 - return -EINVAL; 398 398 - } 399 399 - } 458 458 + bool update_sb; 459 459 + int error; 400 460 401 401 - /* 402 402 - * Update superblock with new values 403 403 - * and log changes 404 404 - */ 405 405 - if (xfs_sb_version_hasdalign(sbp)) { 406 406 - if (sbp->sb_unit != mp->m_dalign) { 407 407 - sbp->sb_unit = mp->m_dalign; 408 408 - mp->m_update_sb = true; 409 409 - } 410 410 - if (sbp->sb_width != mp->m_swidth) { 411 411 - sbp->sb_width = mp->m_swidth; 412 412 - mp->m_update_sb = true; 413 413 - } 414 414 - } else { 415 415 - xfs_warn(mp, 416 416 - "cannot change alignment: superblock does not support data alignment"); 417 417 - return -EINVAL; 418 418 - } 461 461 + if (sbp->sb_unit == mp->m_dalign && 462 462 + sbp->sb_width == mp->m_swidth) 463 463 + return 0; 464 464 + 465 465 + error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb); 466 466 + if (error || !update_sb) 467 467 + return error; 468 468 + 469 469 + sbp->sb_unit = mp->m_dalign; 470 470 + sbp->sb_width = mp->m_swidth; 471 471 + mp->m_update_sb = true; 419 472 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 420 473 xfs_sb_version_hasdalign(&mp->m_sb)) { 421 421 - mp->m_dalign = sbp->sb_unit; 422 422 - mp->m_swidth = sbp->sb_width; 474 474 + mp->m_dalign = sbp->sb_unit; 475 475 + mp->m_swidth = sbp->sb_width; 423 476 } 424 477 425 478 return 0; ··· 701 648 } 702 649 703 650 /* 704 704 - * Check if sb_agblocks is aligned at stripe boundary 705 705 - * If sb_agblocks is NOT aligned turn off m_dalign since 706 706 - * allocator alignment is within an ag, therefore ag has 707 707 - * to be aligned at stripe boundary. 651 651 + * If we were given new sunit/swidth options, do some basic validation 652 652 + * checks and convert the incore dalign and swidth values to the 653 653 + * same units (FSB) that everything else uses. This /must/ happen 654 654 + * before computing the inode geometry. 708 655 */ 709 709 - error = xfs_update_alignment(mp); 656 656 + error = xfs_validate_new_dalign(mp); 710 657 if (error) 711 658 goto out; 712 659 ··· 716 663 xfs_ialloc_setup_geometry(mp); 717 664 xfs_rmapbt_compute_maxlevels(mp); 718 665 xfs_refcountbt_compute_maxlevels(mp); 666 666 + 667 667 + /* 668 668 + * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks 669 669 + * is NOT aligned turn off m_dalign since allocator alignment is within 670 670 + * an ag, therefore ag has to be aligned at stripe boundary. Note that 671 671 + * we must compute the free space and rmap btree geometry before doing 672 672 + * this. 673 673 + */ 674 674 + error = xfs_update_alignment(mp); 675 675 + if (error) 676 676 + goto out; 719 677 720 678 /* enable fail_at_unmount as default */ 721 679 mp->m_fail_unmount = true;

+21

fs/xfs/xfs_trace.h

reviewed

··· 3573 3573 DEFINE_KMEM_EVENT(kmem_realloc); 3574 3574 DEFINE_KMEM_EVENT(kmem_zone_alloc); 3575 3575 3576 3576 + TRACE_EVENT(xfs_check_new_dalign, 3577 3577 + TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), 3578 3578 + TP_ARGS(mp, new_dalign, calc_rootino), 3579 3579 + TP_STRUCT__entry( 3580 3580 + __field(dev_t, dev) 3581 3581 + __field(int, new_dalign) 3582 3582 + __field(xfs_ino_t, sb_rootino) 3583 3583 + __field(xfs_ino_t, calc_rootino) 3584 3584 + ), 3585 3585 + TP_fast_assign( 3586 3586 + __entry->dev = mp->m_super->s_dev; 3587 3587 + __entry->new_dalign = new_dalign; 3588 3588 + __entry->sb_rootino = mp->m_sb.sb_rootino; 3589 3589 + __entry->calc_rootino = calc_rootino; 3590 3590 + ), 3591 3591 + TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu", 3592 3592 + MAJOR(__entry->dev), MINOR(__entry->dev), 3593 3593 + __entry->new_dalign, __entry->sb_rootino, 3594 3594 + __entry->calc_rootino) 3595 3595 + ) 3596 3596 + 3576 3597 #endif /* _TRACE_XFS_H */ 3577 3598 3578 3599 #undef TRACE_INCLUDE_PATH