Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs

Pull xfs fix from Ben Myers:
"Fix for regression in commit cca9f93a52d2 ("xfs: don't do IO when
creating an new inode"), recovery causing filesystem corruption after
a crash"

* tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs:
xfs: di_flushiter considered harmful

+36 -11
+3
fs/xfs/xfs_dinode.h
··· 39 39 * There is a very similar struct icdinode in xfs_inode which matches the 40 40 * layout of the first 96 bytes of this structure, but is kept in native 41 41 * format instead of big endian. 42 + * 43 + * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed 44 + * padding field for v3 inodes. 42 45 */ 43 46 typedef struct xfs_dinode { 44 47 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
+22 -9
fs/xfs/xfs_inode.c
··· 896 896 to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 897 897 to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 898 898 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 899 - to->di_flushiter = cpu_to_be16(from->di_flushiter); 900 899 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 901 900 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 902 901 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); ··· 923 924 to->di_lsn = cpu_to_be64(from->di_lsn); 924 925 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 925 926 uuid_copy(&to->di_uuid, &from->di_uuid); 927 + to->di_flushiter = 0; 928 + } else { 929 + to->di_flushiter = cpu_to_be16(from->di_flushiter); 926 930 } 927 931 } 928 932 ··· 1031 1029 /* 1032 1030 * Read the disk inode attributes into the in-core inode structure. 1033 1031 * 1034 - * If we are initialising a new inode and we are not utilising the 1035 - * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core 1036 - * with a random generation number. If we are keeping inodes around, we need to 1037 - * read the inode cluster to get the existing generation number off disk. 1032 + * For version 5 superblocks, if we are initialising a new inode and we are not 1033 + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new 1034 + * inode core with a random generation number. If we are keeping inodes around, 1035 + * we need to read the inode cluster to get the existing generation number off 1036 + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode 1037 + * format) then log recovery is dependent on the di_flushiter field being 1038 + * initialised from the current on-disk value and hence we must also read the 1039 + * inode off disk. 1038 1040 */ 1039 1041 int 1040 1042 xfs_iread( ··· 1060 1054 1061 1055 /* shortcut IO on inode allocation if possible */ 1062 1056 if ((iget_flags & XFS_IGET_CREATE) && 1057 + xfs_sb_version_hascrc(&mp->m_sb) && 1063 1058 !(mp->m_flags & XFS_MOUNT_IKEEP)) { 1064 1059 /* initialise the on-disk inode core */ 1065 1060 memset(&ip->i_d, 0, sizeof(ip->i_d)); ··· 2889 2882 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 2890 2883 goto corrupt_out; 2891 2884 } 2885 + 2892 2886 /* 2893 - * bump the flush iteration count, used to detect flushes which 2894 - * postdate a log record during recovery. This is redundant as we now 2895 - * log every change and hence this can't happen. Still, it doesn't hurt. 2887 + * Inode item log recovery for v1/v2 inodes are dependent on the 2888 + * di_flushiter count for correct sequencing. We bump the flush 2889 + * iteration count so we can detect flushes which postdate a log record 2890 + * during recovery. This is redundant as we now log every change and 2891 + * hence this can't happen but we need to still do it to ensure 2892 + * backwards compatibility with old kernels that predate logging all 2893 + * inode changes. 2896 2894 */ 2897 - ip->i_d.di_flushiter++; 2895 + if (ip->i_d.di_version < 3) 2896 + ip->i_d.di_flushiter++; 2898 2897 2899 2898 /* 2900 2899 * Copy the dirty parts of the inode into the on-disk
+11 -2
fs/xfs/xfs_log_recover.c
··· 2592 2592 goto error; 2593 2593 } 2594 2594 2595 - /* Skip replay when the on disk inode is newer than the log one */ 2596 - if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2595 + /* 2596 + * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes 2597 + * are transactional and if ordering is necessary we can determine that 2598 + * more accurately by the LSN field in the V3 inode core. Don't trust 2599 + * the inode versions we might be changing them here - use the 2600 + * superblock flag to determine whether we need to look at di_flushiter 2601 + * to skip replay when the on disk inode is newer than the log one 2602 + */ 2603 + if (!xfs_sb_version_hascrc(&mp->m_sb) && 2604 + dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2597 2605 /* 2598 2606 * Deal with the wrap case, DI_MAX_FLUSH is less 2599 2607 * than smaller numbers ··· 2616 2608 goto error; 2617 2609 } 2618 2610 } 2611 + 2619 2612 /* Take the opportunity to reset the flush iteration count */ 2620 2613 dicp->di_flushiter = 0; 2621 2614