Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
"A few fixes for issues in the new online shrink code, additional
corrections for my recent bug-hunt w.r.t. extent size hints on
realtime, and improved input checking of the GROWFSRT ioctl.

IOW, the usual 'I somehow got bored during the merge window and
resumed auditing the farther reaches of xfs':

- Fix shrink eligibility checking when sparse inode clusters enabled

- Reset '..' directory entries when unlinking directories to prevent
verifier errors if fs is shrinked later

- Don't report unusable extent size hints to FSGETXATTR

- Don't warn when extent size hints are unusable because the sysadmin
configured them that way

- Fix insufficient parameter validation in GROWFSRT ioctl

- Fix integer overflow when adding rt volumes to filesystem"

* tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: detect misaligned rtinherit directory extent size hints
xfs: fix an integer overflow error in xfs_growfs_rt
xfs: improve FSGROWFSRT precondition checking
xfs: don't expose misaligned extszinherit hints to userspace
xfs: correct the narrative around misaligned rtinherit/extszinherit dirs
xfs: reset child dir '..' entry when unlinking child
xfs: check for sparse inode clusters that cross new EOAG when shrinking

+174 -37
+8
fs/xfs/libxfs/xfs_ag.c
··· 804 804 args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); 805 805 806 806 /* 807 + * Make sure that the last inode cluster cannot overlap with the new 808 + * end of the AG, even if it's sparse. 809 + */ 810 + error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta); 811 + if (error) 812 + return error; 813 + 814 + /* 807 815 * Disable perag reservations so it doesn't cause the allocation request 808 816 * to fail. We'll reestablish reservation before we return. 809 817 */
+55
fs/xfs/libxfs/xfs_ialloc.c
··· 2928 2928 2929 2929 return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); 2930 2930 } 2931 + 2932 + /* 2933 + * Ensure there are not sparse inode clusters that cross the new EOAG. 2934 + * 2935 + * This is a no-op for non-spinode filesystems since clusters are always fully 2936 + * allocated and checking the bnobt suffices. However, a spinode filesystem 2937 + * could have a record where the upper inodes are free blocks. If those blocks 2938 + * were removed from the filesystem, the inode record would extend beyond EOAG, 2939 + * which will be flagged as corruption. 2940 + */ 2941 + int 2942 + xfs_ialloc_check_shrink( 2943 + struct xfs_trans *tp, 2944 + xfs_agnumber_t agno, 2945 + struct xfs_buf *agibp, 2946 + xfs_agblock_t new_length) 2947 + { 2948 + struct xfs_inobt_rec_incore rec; 2949 + struct xfs_btree_cur *cur; 2950 + struct xfs_mount *mp = tp->t_mountp; 2951 + struct xfs_perag *pag; 2952 + xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length); 2953 + int has; 2954 + int error; 2955 + 2956 + if (!xfs_sb_version_hassparseinodes(&mp->m_sb)) 2957 + return 0; 2958 + 2959 + pag = xfs_perag_get(mp, agno); 2960 + cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO); 2961 + 2962 + /* Look up the inobt record that would correspond to the new EOFS. */ 2963 + error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); 2964 + if (error || !has) 2965 + goto out; 2966 + 2967 + error = xfs_inobt_get_rec(cur, &rec, &has); 2968 + if (error) 2969 + goto out; 2970 + 2971 + if (!has) { 2972 + error = -EFSCORRUPTED; 2973 + goto out; 2974 + } 2975 + 2976 + /* If the record covers inodes that would be beyond EOFS, bail out. */ 2977 + if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) { 2978 + error = -ENOSPC; 2979 + goto out; 2980 + } 2981 + out: 2982 + xfs_btree_del_cursor(cur, error); 2983 + xfs_perag_put(pag); 2984 + return error; 2985 + }
+3
fs/xfs/libxfs/xfs_ialloc.h
··· 122 122 void xfs_ialloc_setup_geometry(struct xfs_mount *mp); 123 123 xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); 124 124 125 + int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno, 126 + struct xfs_buf *agibp, xfs_agblock_t new_length); 127 + 125 128 #endif /* __XFS_IALLOC_H__ */
+16 -12
fs/xfs/libxfs/xfs_inode_buf.c
··· 592 592 /* 593 593 * This comment describes a historic gap in this verifier function. 594 594 * 595 - * On older kernels, the extent size hint verifier doesn't check that 596 - * the extent size hint is an integer multiple of the realtime extent 597 - * size on a directory with both RTINHERIT and EXTSZINHERIT flags set. 598 - * The verifier has always enforced the alignment rule for regular 599 - * files with the REALTIME flag set. 595 + * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 596 + * function has never checked that the extent size hint is an integer 597 + * multiple of the realtime extent size. Since we allow users to set 598 + * this combination on non-rt filesystems /and/ to change the rt 599 + * extent size when adding a rt device to a filesystem, the net effect 600 + * is that users can configure a filesystem anticipating one rt 601 + * geometry and change their minds later. Directories do not use the 602 + * extent size hint, so this is harmless for them. 600 603 * 601 604 * If a directory with a misaligned extent size hint is allowed to 602 605 * propagate that hint into a new regular realtime file, the result 603 606 * is that the inode cluster buffer verifier will trigger a corruption 604 - * shutdown the next time it is run. 607 + * shutdown the next time it is run, because the verifier has always 608 + * enforced the alignment rule for regular files. 605 609 * 606 - * Unfortunately, there could be filesystems with these misconfigured 607 - * directories in the wild, so we cannot add a check to this verifier 608 - * at this time because that will result a new source of directory 609 - * corruption errors when reading an existing filesystem. Instead, we 610 - * permit the misconfiguration to pass through the verifiers so that 611 - * callers of this function can correct and mitigate externally. 610 + * Because we allow administrators to set a new rt extent size when 611 + * adding a rt section, we cannot add a check to this verifier because 612 + * that will result a new source of directory corruption errors when 613 + * reading an existing filesystem. Instead, we rely on callers to 614 + * decide when alignment checks are appropriate, and fix things up as 615 + * needed. 612 616 */ 613 617 614 618 if (rt_flag)
+4 -6
fs/xfs/libxfs/xfs_trans_inode.c
··· 143 143 } 144 144 145 145 /* 146 - * Inode verifiers on older kernels don't check that the extent size 147 - * hint is an integer multiple of the rt extent size on a directory 148 - * with both rtinherit and extszinherit flags set. If we're logging a 149 - * directory that is misconfigured in this way, clear the hint. 146 + * Inode verifiers do not check that the extent size hint is an integer 147 + * multiple of the rt extent size on a directory with both rtinherit 148 + * and extszinherit flags set. If we're logging a directory that is 149 + * misconfigured in this way, clear the hint. 150 150 */ 151 151 if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 152 152 (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 153 153 (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { 154 - xfs_info_once(ip->i_mount, 155 - "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino); 156 154 ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | 157 155 XFS_DIFLAG_EXTSZINHERIT); 158 156 ip->i_extsize = 0;
+16 -2
fs/xfs/scrub/inode.c
··· 73 73 uint16_t flags) 74 74 { 75 75 xfs_failaddr_t fa; 76 + uint32_t value = be32_to_cpu(dip->di_extsize); 76 77 77 - fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 78 - mode, flags); 78 + fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags); 79 79 if (fa) 80 80 xchk_ino_set_corrupt(sc, ino); 81 + 82 + /* 83 + * XFS allows a sysadmin to change the rt extent size when adding a rt 84 + * section to a filesystem after formatting. If there are any 85 + * directories with extszinherit and rtinherit set, the hint could 86 + * become misaligned with the new rextsize. The verifier doesn't check 87 + * this, because we allow rtinherit directories even without an rt 88 + * device. Flag this as an administrative warning since we will clean 89 + * this up eventually. 90 + */ 91 + if ((flags & XFS_DIFLAG_RTINHERIT) && 92 + (flags & XFS_DIFLAG_EXTSZINHERIT) && 93 + value % sc->mp->m_sb.sb_rextsize > 0) 94 + xchk_ino_set_warning(sc, ino); 81 95 } 82 96 83 97 /*
+13
fs/xfs/xfs_inode.c
··· 2763 2763 error = xfs_droplink(tp, ip); 2764 2764 if (error) 2765 2765 goto out_trans_cancel; 2766 + 2767 + /* 2768 + * Point the unlinked child directory's ".." entry to the root 2769 + * directory to eliminate back-references to inodes that may 2770 + * get freed before the child directory is closed. If the fs 2771 + * gets shrunk, this can lead to dirent inode validation errors. 2772 + */ 2773 + if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) { 2774 + error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, 2775 + tp->t_mountp->m_sb.sb_rootino, 0); 2776 + if (error) 2777 + return error; 2778 + } 2766 2779 } else { 2767 2780 /* 2768 2781 * When removing a non-directory we need to log the parent
+22 -5
fs/xfs/xfs_ioctl.c
··· 1065 1065 1066 1066 fileattr_fill_xflags(fa, xfs_ip2xflags(ip)); 1067 1067 1068 - fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); 1068 + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) { 1069 + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); 1070 + } else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) { 1071 + /* 1072 + * Don't let a misaligned extent size hint on a directory 1073 + * escape to userspace if it won't pass the setattr checks 1074 + * later. 1075 + */ 1076 + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1077 + ip->i_extsize % mp->m_sb.sb_rextsize > 0) { 1078 + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | 1079 + FS_XFLAG_EXTSZINHERIT); 1080 + fa->fsx_extsize = 0; 1081 + } else { 1082 + fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); 1083 + } 1084 + } 1085 + 1069 1086 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 1070 1087 fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize); 1071 1088 fa->fsx_projid = ip->i_projid; ··· 1309 1292 new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); 1310 1293 1311 1294 /* 1312 - * Inode verifiers on older kernels don't check that the extent size 1313 - * hint is an integer multiple of the rt extent size on a directory 1314 - * with both rtinherit and extszinherit flags set. Don't let sysadmins 1315 - * misconfigure directories. 1295 + * Inode verifiers do not check that the extent size hint is an integer 1296 + * multiple of the rt extent size on a directory with both rtinherit 1297 + * and extszinherit flags set. Don't let sysadmins misconfigure 1298 + * directories. 1316 1299 */ 1317 1300 if ((new_diflags & XFS_DIFLAG_RTINHERIT) && 1318 1301 (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
+37 -12
fs/xfs/xfs_rtalloc.c
··· 923 923 uint8_t *rsum_cache; /* old summary cache */ 924 924 925 925 sbp = &mp->m_sb; 926 - /* 927 - * Initial error checking. 928 - */ 926 + 929 927 if (!capable(CAP_SYS_ADMIN)) 930 928 return -EPERM; 931 - if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || 932 - (nrblocks = in->newblocks) <= sbp->sb_rblocks || 933 - (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 929 + 930 + /* Needs to have been mounted with an rt device. */ 931 + if (!XFS_IS_REALTIME_MOUNT(mp)) 934 932 return -EINVAL; 935 - if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) 933 + /* 934 + * Mount should fail if the rt bitmap/summary files don't load, but 935 + * we'll check anyway. 936 + */ 937 + if (!mp->m_rbmip || !mp->m_rsumip) 938 + return -EINVAL; 939 + 940 + /* Shrink not supported. */ 941 + if (in->newblocks <= sbp->sb_rblocks) 942 + return -EINVAL; 943 + 944 + /* Can only change rt extent size when adding rt volume. */ 945 + if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) 946 + return -EINVAL; 947 + 948 + /* Range check the extent size. */ 949 + if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE || 950 + XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) 951 + return -EINVAL; 952 + 953 + /* Unsupported realtime features. */ 954 + if (xfs_sb_version_hasrmapbt(&mp->m_sb) || 955 + xfs_sb_version_hasreflink(&mp->m_sb)) 956 + return -EOPNOTSUPP; 957 + 958 + nrblocks = in->newblocks; 959 + error = xfs_sb_validate_fsb_count(sbp, nrblocks); 960 + if (error) 936 961 return error; 937 962 /* 938 963 * Read in the last block of the device, make sure it exists. ··· 1021 996 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); 1022 997 bmbno < nrbmblocks; 1023 998 bmbno++) { 1024 - xfs_trans_t *tp; 999 + struct xfs_trans *tp; 1000 + xfs_rfsblock_t nrblocks_step; 1025 1001 1026 1002 *nmp = *mp; 1027 1003 nsbp = &nmp->m_sb; ··· 1031 1005 */ 1032 1006 nsbp->sb_rextsize = in->extsize; 1033 1007 nsbp->sb_rbmblocks = bmbno + 1; 1034 - nsbp->sb_rblocks = 1035 - XFS_RTMIN(nrblocks, 1036 - nsbp->sb_rbmblocks * NBBY * 1037 - nsbp->sb_blocksize * nsbp->sb_rextsize); 1008 + nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize * 1009 + nsbp->sb_rextsize; 1010 + nsbp->sb_rblocks = min(nrblocks, nrblocks_step); 1038 1011 nsbp->sb_rextents = nsbp->sb_rblocks; 1039 1012 do_div(nsbp->sb_rextents, nsbp->sb_rextsize); 1040 1013 ASSERT(nsbp->sb_rextents != 0);