Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull Ext4 updates from Theodore Ts'o:
"The major new feature added in this update is Darrick J Wong's
metadata checksum feature, which adds crc32 checksums to ext4's
metadata fields.

There is also the usual set of cleanups and bug fixes."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (44 commits)
ext4: hole-punch use truncate_pagecache_range
jbd2: use kmem_cache_zalloc wrapper instead of flag
ext4: remove mb_groups before tearing down the buddy_cache
ext4: add ext4_mb_unload_buddy in the error path
ext4: don't trash state flags in EXT4_IOC_SETFLAGS
ext4: let getattr report the right blocks in delalloc+bigalloc
ext4: add missing save_error_info() to ext4_error()
ext4: add debugging trigger for ext4_error()
ext4: protect group inode free counting with group lock
ext4: use consistent ssize_t type in ext4_file_write()
ext4: fix format flag in ext4_ext_binsearch_idx()
ext4: cleanup in ext4_discard_allocated_blocks()
ext4: return ENOMEM when mounts fail due to lack of memory
ext4: remove redundundant "(char *) bh->b_data" casts
ext4: disallow hard-linked directory in ext4_lookup
ext4: fix potential integer overflow in alloc_flex_gd()
ext4: remove needs_recovery in ext4_mb_init()
ext4: force ro mount if ext4_setup_super() fails
ext4: fix potential NULL dereference in ext4_free_inodes_counts()
ext4/jbd2: add metadata checksumming to the list of supported features
...

+1783 -196
+2
fs/ext4/Kconfig
··· 2 2 tristate "The Extended 4 (ext4) filesystem" 3 3 select JBD2 4 4 select CRC16 5 + select CRYPTO 6 + select CRYPTO_CRC32C 5 7 help 6 8 This is the next generation of the ext3 filesystem. 7 9
+33 -8
fs/ext4/balloc.c
··· 168 168 169 169 /* If checksum is bad mark all blocks used to prevent allocation 170 170 * essentially implementing a per-group read-only flag. */ 171 - if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 171 + if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 172 172 ext4_error(sb, "Checksum bad for group %u", block_group); 173 173 ext4_free_group_clusters_set(sb, gdp, 0); 174 174 ext4_free_inodes_set(sb, gdp, 0); 175 175 ext4_itable_unused_set(sb, gdp, 0); 176 176 memset(bh->b_data, 0xff, sb->s_blocksize); 177 + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, 178 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 177 179 return; 178 180 } 179 181 memset(bh->b_data, 0, sb->s_blocksize); ··· 212 210 */ 213 211 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), 214 212 sb->s_blocksize * 8, bh->b_data); 213 + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, 214 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 215 + ext4_group_desc_csum_set(sb, block_group, gdp); 215 216 } 216 217 217 218 /* Return the number of free blocks in a block group. It is used when ··· 281 276 } 282 277 283 278 static int ext4_valid_block_bitmap(struct super_block *sb, 284 - struct ext4_group_desc *desc, 285 - unsigned int block_group, 286 - struct buffer_head *bh) 279 + struct ext4_group_desc *desc, 280 + unsigned int block_group, 281 + struct buffer_head *bh) 287 282 { 288 283 ext4_grpblk_t offset; 289 284 ext4_grpblk_t next_zero_bit; ··· 330 325 block_group, bitmap_blk); 331 326 return 0; 332 327 } 328 + 329 + void ext4_validate_block_bitmap(struct super_block *sb, 330 + struct ext4_group_desc *desc, 331 + unsigned int block_group, 332 + struct buffer_head *bh) 333 + { 334 + if (buffer_verified(bh)) 335 + return; 336 + 337 + ext4_lock_group(sb, block_group); 338 + if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && 339 + ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, 340 + EXT4_BLOCKS_PER_GROUP(sb) / 8)) 341 + set_buffer_verified(bh); 342 + ext4_unlock_group(sb, block_group); 343 + } 344 + 333 345 /** 334 346 * ext4_read_block_bitmap() 335 347 * @sb: super block ··· 377 355 } 378 356 379 357 if (bitmap_uptodate(bh)) 380 - return bh; 358 + goto verify; 381 359 382 360 lock_buffer(bh); 383 361 if (bitmap_uptodate(bh)) { 384 362 unlock_buffer(bh); 385 - return bh; 363 + goto verify; 386 364 } 387 365 ext4_lock_group(sb, block_group); 388 366 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ··· 401 379 */ 402 380 set_bitmap_uptodate(bh); 403 381 unlock_buffer(bh); 404 - return bh; 382 + goto verify; 405 383 } 406 384 /* 407 385 * submit the buffer_head for reading ··· 411 389 bh->b_end_io = ext4_end_bitmap_read; 412 390 get_bh(bh); 413 391 submit_bh(READ, bh); 392 + return bh; 393 + verify: 394 + ext4_validate_block_bitmap(sb, desc, block_group, bh); 414 395 return bh; 415 396 } 416 397 ··· 437 412 } 438 413 clear_buffer_new(bh); 439 414 /* Panic or remount fs read-only if block bitmap is invalid */ 440 - ext4_valid_block_bitmap(sb, desc, block_group, bh); 415 + ext4_validate_block_bitmap(sb, desc, block_group, bh); 441 416 return 0; 442 417 } 443 418
+83
fs/ext4/bitmap.c
··· 29 29 30 30 #endif /* EXT4FS_DEBUG */ 31 31 32 + int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 33 + struct ext4_group_desc *gdp, 34 + struct buffer_head *bh, int sz) 35 + { 36 + __u32 hi; 37 + __u32 provided, calculated; 38 + struct ext4_sb_info *sbi = EXT4_SB(sb); 39 + 40 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 41 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 42 + return 1; 43 + 44 + provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); 45 + calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 46 + if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { 47 + hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi); 48 + provided |= (hi << 16); 49 + } else 50 + calculated &= 0xFFFF; 51 + 52 + return provided == calculated; 53 + } 54 + 55 + void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 56 + struct ext4_group_desc *gdp, 57 + struct buffer_head *bh, int sz) 58 + { 59 + __u32 csum; 60 + struct ext4_sb_info *sbi = EXT4_SB(sb); 61 + 62 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 63 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 64 + return; 65 + 66 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 67 + gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); 68 + if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) 69 + gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); 70 + } 71 + 72 + int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 73 + struct ext4_group_desc *gdp, 74 + struct buffer_head *bh, int sz) 75 + { 76 + __u32 hi; 77 + __u32 provided, calculated; 78 + struct ext4_sb_info *sbi = EXT4_SB(sb); 79 + 80 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 81 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 82 + return 1; 83 + 84 + provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); 85 + calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 86 + if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) { 87 + hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi); 88 + provided |= (hi << 16); 89 + } else 90 + calculated &= 0xFFFF; 91 + 92 + if (provided == calculated) 93 + return 1; 94 + 95 + ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); 96 + return 0; 97 + } 98 + 99 + void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 100 + struct ext4_group_desc *gdp, 101 + struct buffer_head *bh, int sz) 102 + { 103 + __u32 csum; 104 + struct ext4_sb_info *sbi = EXT4_SB(sb); 105 + 106 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 107 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 108 + return; 109 + 110 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 111 + gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); 112 + if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) 113 + gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16); 114 + }
+12
fs/ext4/dir.c
··· 179 179 continue; 180 180 } 181 181 182 + /* Check the checksum */ 183 + if (!buffer_verified(bh) && 184 + !ext4_dirent_csum_verify(inode, 185 + (struct ext4_dir_entry *)bh->b_data)) { 186 + EXT4_ERROR_FILE(filp, 0, "directory fails checksum " 187 + "at offset %llu", 188 + (unsigned long long)filp->f_pos); 189 + filp->f_pos += sb->s_blocksize - offset; 190 + continue; 191 + } 192 + set_buffer_verified(bh); 193 + 182 194 revalidate: 183 195 /* If the dir block has changed since the last call to 184 196 * readdir(2), then we might be pointing to an invalid
+117 -13
fs/ext4/ext4.h
··· 29 29 #include <linux/wait.h> 30 30 #include <linux/blockgroup_lock.h> 31 31 #include <linux/percpu_counter.h> 32 + #include <crypto/hash.h> 32 33 #ifdef __KERNEL__ 33 34 #include <linux/compat.h> 34 35 #endif ··· 299 298 __le16 bg_free_inodes_count_lo;/* Free inodes count */ 300 299 __le16 bg_used_dirs_count_lo; /* Directories count */ 301 300 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ 302 - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ 301 + __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ 302 + __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ 303 + __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ 303 304 __le16 bg_itable_unused_lo; /* Unused inodes count */ 304 305 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ 305 306 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ ··· 311 308 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ 312 309 __le16 bg_used_dirs_count_hi; /* Directories count MSB */ 313 310 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ 314 - __u32 bg_reserved2[3]; 311 + __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ 312 + __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ 313 + __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ 314 + __u32 bg_reserved; 315 315 }; 316 + 317 + #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ 318 + (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ 319 + sizeof(__le16)) 320 + #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ 321 + (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ 322 + sizeof(__le16)) 316 323 317 324 /* 318 325 * Structure of a flex block group info ··· 663 650 __le16 l_i_file_acl_high; 664 651 __le16 l_i_uid_high; /* these 2 fields */ 665 652 __le16 l_i_gid_high; /* were reserved2[0] */ 666 - __u32 l_i_reserved2; 653 + __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ 654 + __le16 l_i_reserved; 667 655 } linux2; 668 656 struct { 669 657 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ ··· 680 666 } masix2; 681 667 } osd2; /* OS dependent 2 */ 682 668 __le16 i_extra_isize; 683 - __le16 i_pad1; 669 + __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ 684 670 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ 685 671 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ 686 672 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ ··· 782 768 #define i_gid_low i_gid 783 769 #define i_uid_high osd2.linux2.l_i_uid_high 784 770 #define i_gid_high osd2.linux2.l_i_gid_high 785 - #define i_reserved2 osd2.linux2.l_i_reserved2 771 + #define i_checksum_lo osd2.linux2.l_i_checksum_lo 786 772 787 773 #elif defined(__GNU__) 788 774 ··· 922 908 */ 923 909 tid_t i_sync_tid; 924 910 tid_t i_datasync_tid; 911 + 912 + /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ 913 + __u32 i_csum_seed; 925 914 }; 926 915 927 916 /* ··· 1018 1001 #define EXT4_ERRORS_PANIC 3 /* Panic */ 1019 1002 #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE 1020 1003 1004 + /* Metadata checksum algorithm codes */ 1005 + #define EXT4_CRC32C_CHKSUM 1 1006 + 1021 1007 /* 1022 1008 * Structure of the super block 1023 1009 */ ··· 1107 1087 __le64 s_mmp_block; /* Block for multi-mount protection */ 1108 1088 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1109 1089 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1110 - __u8 s_reserved_char_pad; 1090 + __u8 s_checksum_type; /* metadata checksum algorithm used */ 1111 1091 __le16 s_reserved_pad; 1112 1092 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1113 1093 __le32 s_snapshot_inum; /* Inode number of active snapshot */ ··· 1133 1113 __le32 s_usr_quota_inum; /* inode for tracking user quota */ 1134 1114 __le32 s_grp_quota_inum; /* inode for tracking group quota */ 1135 1115 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1136 - __le32 s_reserved[109]; /* Padding to the end of the block */ 1116 + __le32 s_reserved[108]; /* Padding to the end of the block */ 1117 + __le32 s_checksum; /* crc32c(superblock) */ 1137 1118 }; 1138 1119 1139 1120 #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) ··· 1197 1176 struct proc_dir_entry *s_proc; 1198 1177 struct kobject s_kobj; 1199 1178 struct completion s_kobj_unregister; 1179 + struct super_block *s_sb; 1200 1180 1201 1181 /* Journaling */ 1202 1182 struct journal_s *s_journal; ··· 1288 1266 1289 1267 /* record the last minlen when FITRIM is called. */ 1290 1268 atomic_t s_last_trim_minblks; 1269 + 1270 + /* Reference to checksum algorithm driver via cryptoapi */ 1271 + struct crypto_shash *s_chksum_driver; 1272 + 1273 + /* Precomputed FS UUID checksum for seeding other checksums */ 1274 + __u32 s_csum_seed; 1291 1275 }; 1292 1276 1293 1277 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) ··· 1442 1414 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1443 1415 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1444 1416 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1417 + /* 1418 + * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When 1419 + * METADATA_CSUM is set, group descriptor checksums use the same algorithm as 1420 + * all other data structures' checksums. However, the METADATA_CSUM and 1421 + * GDT_CSUM bits are mutually exclusive. 1422 + */ 1445 1423 #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 1446 1424 1447 1425 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 ··· 1495 1461 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ 1496 1462 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ 1497 1463 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ 1498 - EXT4_FEATURE_RO_COMPAT_BIGALLOC) 1464 + EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ 1465 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) 1499 1466 1500 1467 /* 1501 1468 * Default values for user and/or group using reserved blocks ··· 1562 1527 }; 1563 1528 1564 1529 /* 1530 + * This is a bogus directory entry at the end of each leaf block that 1531 + * records checksums. 1532 + */ 1533 + struct ext4_dir_entry_tail { 1534 + __le32 det_reserved_zero1; /* Pretend to be unused */ 1535 + __le16 det_rec_len; /* 12 */ 1536 + __u8 det_reserved_zero2; /* Zero name length */ 1537 + __u8 det_reserved_ft; /* 0xDE, fake file type */ 1538 + __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ 1539 + }; 1540 + 1541 + /* 1565 1542 * Ext4 directory file types. Only the low 3 bits are used. The 1566 1543 * other bits are reserved for now. 1567 1544 */ ··· 1587 1540 #define EXT4_FT_SYMLINK 7 1588 1541 1589 1542 #define EXT4_FT_MAX 8 1543 + 1544 + #define EXT4_FT_DIR_CSUM 0xDE 1590 1545 1591 1546 /* 1592 1547 * EXT4_DIR_PAD defines the directory entries boundaries ··· 1657 1608 #define DX_HASH_LEGACY_UNSIGNED 3 1658 1609 #define DX_HASH_HALF_MD4_UNSIGNED 4 1659 1610 #define DX_HASH_TEA_UNSIGNED 5 1611 + 1612 + static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, 1613 + const void *address, unsigned int length) 1614 + { 1615 + struct { 1616 + struct shash_desc shash; 1617 + char ctx[crypto_shash_descsize(sbi->s_chksum_driver)]; 1618 + } desc; 1619 + int err; 1620 + 1621 + desc.shash.tfm = sbi->s_chksum_driver; 1622 + desc.shash.flags = 0; 1623 + *(u32 *)desc.ctx = crc; 1624 + 1625 + err = crypto_shash_update(&desc.shash, address, length); 1626 + BUG_ON(err); 1627 + 1628 + return *(u32 *)desc.ctx; 1629 + } 1660 1630 1661 1631 #ifdef __KERNEL__ 1662 1632 ··· 1809 1741 __le16 mmp_check_interval; 1810 1742 1811 1743 __le16 mmp_pad1; 1812 - __le32 mmp_pad2[227]; 1744 + __le32 mmp_pad2[226]; 1745 + __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ 1813 1746 }; 1814 1747 1815 1748 /* arguments passed to the mmp thread */ ··· 1853 1784 1854 1785 /* bitmap.c */ 1855 1786 extern unsigned int ext4_count_free(struct buffer_head *, unsigned); 1787 + void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 1788 + struct ext4_group_desc *gdp, 1789 + struct buffer_head *bh, int sz); 1790 + int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 1791 + struct ext4_group_desc *gdp, 1792 + struct buffer_head *bh, int sz); 1793 + void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 1794 + struct ext4_group_desc *gdp, 1795 + struct buffer_head *bh, int sz); 1796 + int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 1797 + struct ext4_group_desc *gdp, 1798 + struct buffer_head *bh, int sz); 1856 1799 1857 1800 /* balloc.c */ 1801 + extern void ext4_validate_block_bitmap(struct super_block *sb, 1802 + struct ext4_group_desc *desc, 1803 + unsigned int block_group, 1804 + struct buffer_head *bh); 1858 1805 extern unsigned int ext4_block_group(struct super_block *sb, 1859 1806 ext4_fsblk_t blocknr); 1860 1807 extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, ··· 1949 1864 /* mballoc.c */ 1950 1865 extern long ext4_mb_stats; 1951 1866 extern long ext4_mb_max_to_scan; 1952 - extern int ext4_mb_init(struct super_block *, int); 1867 + extern int ext4_mb_init(struct super_block *); 1953 1868 extern int ext4_mb_release(struct super_block *); 1954 1869 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, 1955 1870 struct ext4_allocation_request *, int *); ··· 2021 1936 extern int ext4_ext_migrate(struct inode *); 2022 1937 2023 1938 /* namei.c */ 1939 + extern int ext4_dirent_csum_verify(struct inode *inode, 1940 + struct ext4_dir_entry *dirent); 2024 1941 extern int ext4_orphan_add(handle_t *, struct inode *); 2025 1942 extern int ext4_orphan_del(handle_t *, struct inode *); 2026 1943 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, ··· 2037 1950 extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); 2038 1951 2039 1952 /* super.c */ 1953 + extern int ext4_superblock_csum_verify(struct super_block *sb, 1954 + struct ext4_super_block *es); 1955 + extern void ext4_superblock_csum_set(struct super_block *sb, 1956 + struct ext4_super_block *es); 2040 1957 extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2041 1958 extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2042 1959 extern void ext4_kvfree(void *ptr); ··· 2116 2025 struct ext4_group_desc *bg, __u32 count); 2117 2026 extern void ext4_itable_unused_set(struct super_block *sb, 2118 2027 struct ext4_group_desc *bg, __u32 count); 2119 - extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, 2120 - struct ext4_group_desc *gdp); 2121 - extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, 2028 + extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, 2122 2029 struct ext4_group_desc *gdp); 2030 + extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, 2031 + struct ext4_group_desc *gdp); 2032 + 2033 + static inline int ext4_has_group_desc_csum(struct super_block *sb) 2034 + { 2035 + return EXT4_HAS_RO_COMPAT_FEATURE(sb, 2036 + EXT4_FEATURE_RO_COMPAT_GDT_CSUM | 2037 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); 2038 + } 2123 2039 2124 2040 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2125 2041 { ··· 2323 2225 2324 2226 static inline void ext4_mark_super_dirty(struct super_block *sb) 2325 2227 { 2228 + struct ext4_super_block *es = EXT4_SB(sb)->s_es; 2229 + 2230 + ext4_superblock_csum_set(sb, es); 2326 2231 if (EXT4_SB(sb)->s_journal == NULL) 2327 2232 sb->s_dirt =1; 2328 2233 } ··· 2415 2314 2416 2315 /* mmp.c */ 2417 2316 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); 2317 + extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); 2318 + extern int ext4_mmp_csum_verify(struct super_block *sb, 2319 + struct mmp_struct *mmp); 2418 2320 2419 2321 /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ 2420 2322 enum ext4_state_bits {
+24
fs/ext4/ext4_extents.h
··· 63 63 * ext4_inode has i_block array (60 bytes total). 64 64 * The first 12 bytes store ext4_extent_header; 65 65 * the remainder stores an array of ext4_extent. 66 + * For non-inode extent blocks, ext4_extent_tail 67 + * follows the array. 66 68 */ 69 + 70 + /* 71 + * This is the extent tail on-disk structure. 72 + * All other extent structures are 12 bytes long. It turns out that 73 + * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which 74 + * covers all valid ext4 block sizes. Therefore, this tail structure can be 75 + * crammed into the end of the block without having to rebalance the tree. 76 + */ 77 + struct ext4_extent_tail { 78 + __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ 79 + }; 67 80 68 81 /* 69 82 * This is the extent on-disk structure. ··· 113 100 }; 114 101 115 102 #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) 103 + 104 + #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ 105 + (sizeof(struct ext4_extent_header) + \ 106 + (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) 107 + 108 + static inline struct ext4_extent_tail * 109 + find_ext4_extent_tail(struct ext4_extent_header *eh) 110 + { 111 + return (struct ext4_extent_tail *)(((void *)eh) + 112 + EXT4_EXTENT_TAIL_OFFSET(eh)); 113 + } 116 114 117 115 /* 118 116 * Array of ext4_ext_path contains path to some extent.
+8 -1
fs/ext4/ext4_jbd2.c
··· 138 138 } 139 139 140 140 int __ext4_handle_dirty_super(const char *where, unsigned int line, 141 - handle_t *handle, struct super_block *sb) 141 + handle_t *handle, struct super_block *sb, 142 + int now) 142 143 { 143 144 struct buffer_head *bh = EXT4_SB(sb)->s_sbh; 144 145 int err = 0; 145 146 146 147 if (ext4_handle_valid(handle)) { 148 + ext4_superblock_csum_set(sb, 149 + (struct ext4_super_block *)bh->b_data); 147 150 err = jbd2_journal_dirty_metadata(handle, bh); 148 151 if (err) 149 152 ext4_journal_abort_handle(where, line, __func__, 150 153 bh, handle, err); 154 + } else if (now) { 155 + ext4_superblock_csum_set(sb, 156 + (struct ext4_super_block *)bh->b_data); 157 + mark_buffer_dirty(bh); 151 158 } else 152 159 sb->s_dirt = 1; 153 160 return err;
+5 -2
fs/ext4/ext4_jbd2.h
··· 213 213 struct buffer_head *bh); 214 214 215 215 int __ext4_handle_dirty_super(const char *where, unsigned int line, 216 - handle_t *handle, struct super_block *sb); 216 + handle_t *handle, struct super_block *sb, 217 + int now); 217 218 218 219 #define ext4_journal_get_write_access(handle, bh) \ 219 220 __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) ··· 226 225 #define ext4_handle_dirty_metadata(handle, inode, bh) \ 227 226 __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ 228 227 (bh)) 228 + #define ext4_handle_dirty_super_now(handle, sb) \ 229 + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1) 229 230 #define ext4_handle_dirty_super(handle, sb) \ 230 - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) 231 + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0) 231 232 232 233 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 233 234 int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
+79 -12
fs/ext4/extents.c
··· 52 52 #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 53 53 #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 54 54 55 + static __le32 ext4_extent_block_csum(struct inode *inode, 56 + struct ext4_extent_header *eh) 57 + { 58 + struct ext4_inode_info *ei = EXT4_I(inode); 59 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 60 + __u32 csum; 61 + 62 + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, 63 + EXT4_EXTENT_TAIL_OFFSET(eh)); 64 + return cpu_to_le32(csum); 65 + } 66 + 67 + static int ext4_extent_block_csum_verify(struct inode *inode, 68 + struct ext4_extent_header *eh) 69 + { 70 + struct ext4_extent_tail *et; 71 + 72 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 73 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 74 + return 1; 75 + 76 + et = find_ext4_extent_tail(eh); 77 + if (et->et_checksum != ext4_extent_block_csum(inode, eh)) 78 + return 0; 79 + return 1; 80 + } 81 + 82 + static void ext4_extent_block_csum_set(struct inode *inode, 83 + struct ext4_extent_header *eh) 84 + { 85 + struct ext4_extent_tail *et; 86 + 87 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 88 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 89 + return; 90 + 91 + et = find_ext4_extent_tail(eh); 92 + et->et_checksum = ext4_extent_block_csum(inode, eh); 93 + } 94 + 55 95 static int ext4_split_extent(handle_t *handle, 56 96 struct inode *inode, 57 97 struct ext4_ext_path *path, ··· 157 117 { 158 118 int err; 159 119 if (path->p_bh) { 120 + ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); 160 121 /* path points to block */ 161 122 err = __ext4_handle_dirty_metadata(where, line, handle, 162 123 inode, path->p_bh); ··· 432 391 error_msg = "invalid extent entries"; 433 392 goto corrupted; 434 393 } 394 + /* Verify checksum on non-root extent tree nodes */ 395 + if (ext_depth(inode) != depth && 396 + !ext4_extent_block_csum_verify(inode, eh)) { 397 + error_msg = "extent tree corrupted"; 398 + goto corrupted; 399 + } 435 400 return 0; 436 401 437 402 corrupted: ··· 458 411 { 459 412 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); 460 413 } 414 + 415 + static int __ext4_ext_check_block(const char *function, unsigned int line, 416 + struct inode *inode, 417 + struct ext4_extent_header *eh, 418 + int depth, 419 + struct buffer_head *bh) 420 + { 421 + int ret; 422 + 423 + if (buffer_verified(bh)) 424 + return 0; 425 + ret = ext4_ext_check(inode, eh, depth); 426 + if (ret) 427 + return ret; 428 + set_buffer_verified(bh); 429 + return ret; 430 + } 431 + 432 + #define ext4_ext_check_block(inode, eh, depth, bh) \ 433 + __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) 461 434 462 435 #ifdef EXT_DEBUG 463 436 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) ··· 603 536 } 604 537 605 538 path->p_idx = l - 1; 606 - ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 539 + ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), 607 540 ext4_idx_pblock(path->p_idx)); 608 541 609 542 #ifdef CHECK_BINSEARCH ··· 735 668 i = depth; 736 669 /* walk through the tree */ 737 670 while (i) { 738 - int need_to_validate = 0; 739 - 740 671 ext_debug("depth %d: num %d, max %d\n", 741 672 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 742 673 ··· 753 688 put_bh(bh); 754 689 goto err; 755 690 } 756 - /* validate the extent entries */ 757 - need_to_validate = 1; 758 691 } 759 692 eh = ext_block_hdr(bh); 760 693 ppos++; ··· 766 703 path[ppos].p_hdr = eh; 767 704 i--; 768 705 769 - if (need_to_validate && ext4_ext_check(inode, eh, i)) 706 + if (ext4_ext_check_block(inode, eh, i, bh)) 770 707 goto err; 771 708 } 772 709 ··· 977 914 le16_add_cpu(&neh->eh_entries, m); 978 915 } 979 916 917 + ext4_extent_block_csum_set(inode, neh); 980 918 set_buffer_uptodate(bh); 981 919 unlock_buffer(bh); 982 920 ··· 1056 992 sizeof(struct ext4_extent_idx) * m); 1057 993 le16_add_cpu(&neh->eh_entries, m); 1058 994 } 995 + ext4_extent_block_csum_set(inode, neh); 1059 996 set_buffer_uptodate(bh); 1060 997 unlock_buffer(bh); 1061 998 ··· 1154 1089 else 1155 1090 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); 1156 1091 neh->eh_magic = EXT4_EXT_MAGIC; 1092 + ext4_extent_block_csum_set(inode, neh); 1157 1093 set_buffer_uptodate(bh); 1158 1094 unlock_buffer(bh); 1159 1095 ··· 1410 1344 return -EIO; 1411 1345 eh = ext_block_hdr(bh); 1412 1346 /* subtract from p_depth to get proper eh_depth */ 1413 - if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1347 + if (ext4_ext_check_block(inode, eh, 1348 + path->p_depth - depth, bh)) { 1414 1349 put_bh(bh); 1415 1350 return -EIO; 1416 1351 } ··· 1424 1357 if (bh == NULL) 1425 1358 return -EIO; 1426 1359 eh = ext_block_hdr(bh); 1427 - if (ext4_ext_check(inode, eh, path->p_depth - depth)) { 1360 + if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) { 1428 1361 put_bh(bh); 1429 1362 return -EIO; 1430 1363 } ··· 2711 2644 err = -EIO; 2712 2645 break; 2713 2646 } 2714 - if (ext4_ext_check(inode, ext_block_hdr(bh), 2715 - depth - i - 1)) { 2647 + if (ext4_ext_check_block(inode, ext_block_hdr(bh), 2648 + depth - i - 1, bh)) { 2716 2649 err = -EIO; 2717 2650 break; 2718 2651 } ··· 4789 4722 4790 4723 /* Now release the pages */ 4791 4724 if (last_page_offset > first_page_offset) { 4792 - truncate_inode_pages_range(mapping, first_page_offset, 4793 - last_page_offset-1); 4725 + truncate_pagecache_range(inode, first_page_offset, 4726 + last_page_offset - 1); 4794 4727 } 4795 4728 4796 4729 /* finish any pending end_io work */
+1 -1
fs/ext4/file.c
··· 95 95 { 96 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 97 97 int unaligned_aio = 0; 98 - int ret; 98 + ssize_t ret; 99 99 100 100 /* 101 101 * If we have encountered a bitmap-format file, the size limit
+62 -19
fs/ext4/ialloc.c
··· 70 70 ext4_group_t block_group, 71 71 struct ext4_group_desc *gdp) 72 72 { 73 - struct ext4_sb_info *sbi = EXT4_SB(sb); 74 - 75 73 J_ASSERT_BH(bh, buffer_locked(bh)); 76 74 77 75 /* If checksum is bad mark all blocks and inodes use to prevent 78 76 * allocation, essentially implementing a per-group read-only flag. */ 79 - if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 77 + if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 80 78 ext4_error(sb, "Checksum bad for group %u", block_group); 81 79 ext4_free_group_clusters_set(sb, gdp, 0); 82 80 ext4_free_inodes_set(sb, gdp, 0); 83 81 ext4_itable_unused_set(sb, gdp, 0); 84 82 memset(bh->b_data, 0xff, sb->s_blocksize); 83 + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, 84 + EXT4_INODES_PER_GROUP(sb) / 8); 85 85 return 0; 86 86 } 87 87 88 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 89 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 90 90 bh->b_data); 91 + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, 92 + EXT4_INODES_PER_GROUP(sb) / 8); 93 + ext4_group_desc_csum_set(sb, block_group, gdp); 91 94 92 95 return EXT4_INODES_PER_GROUP(sb); 93 96 } ··· 131 128 return NULL; 132 129 } 133 130 if (bitmap_uptodate(bh)) 134 - return bh; 131 + goto verify; 135 132 136 133 lock_buffer(bh); 137 134 if (bitmap_uptodate(bh)) { 138 135 unlock_buffer(bh); 139 - return bh; 136 + goto verify; 140 137 } 141 138 142 139 ext4_lock_group(sb, block_group); ··· 144 141 ext4_init_inode_bitmap(sb, bh, block_group, desc); 145 142 set_bitmap_uptodate(bh); 146 143 set_buffer_uptodate(bh); 144 + set_buffer_verified(bh); 147 145 ext4_unlock_group(sb, block_group); 148 146 unlock_buffer(bh); 149 147 return bh; ··· 158 154 */ 159 155 set_bitmap_uptodate(bh); 160 156 unlock_buffer(bh); 161 - return bh; 157 + goto verify; 162 158 } 163 159 /* 164 160 * submit the buffer_head for reading ··· 175 171 block_group, bitmap_blk); 176 172 return NULL; 177 173 } 174 + 175 + verify: 176 + ext4_lock_group(sb, block_group); 177 + if (!buffer_verified(bh) && 178 + !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, 179 + EXT4_INODES_PER_GROUP(sb) / 8)) { 180 + ext4_unlock_group(sb, block_group); 181 + put_bh(bh); 182 + ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 183 + "inode_bitmap = %llu", block_group, bitmap_blk); 184 + return NULL; 185 + } 186 + ext4_unlock_group(sb, block_group); 187 + set_buffer_verified(bh); 178 188 return bh; 179 189 } 180 190 ··· 294 276 ext4_used_dirs_set(sb, gdp, count); 295 277 percpu_counter_dec(&sbi->s_dirs_counter); 296 278 } 297 - gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 279 + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, 280 + EXT4_INODES_PER_GROUP(sb) / 8); 281 + ext4_group_desc_csum_set(sb, block_group, gdp); 298 282 ext4_unlock_group(sb, block_group); 299 283 300 284 percpu_counter_inc(&sbi->s_freeinodes_counter); ··· 508 488 for (i = 0; i < ngroups; i++) { 509 489 grp = (parent_group + i) % ngroups; 510 490 desc = ext4_get_group_desc(sb, grp, NULL); 511 - grp_free = ext4_free_inodes_count(sb, desc); 512 - if (desc && grp_free && grp_free >= avefreei) { 513 - *group = grp; 514 - return 0; 491 + if (desc) { 492 + grp_free = ext4_free_inodes_count(sb, desc); 493 + if (grp_free && grp_free >= avefreei) { 494 + *group = grp; 495 + return 0; 496 + } 515 497 } 516 498 } 517 499 ··· 731 709 732 710 got: 733 711 /* We may have to initialize the block bitmap if it isn't already */ 734 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && 712 + if (ext4_has_group_desc_csum(sb) && 735 713 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 736 714 struct buffer_head *block_bitmap_bh; 737 715 ··· 753 731 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 754 732 ext4_free_group_clusters_set(sb, gdp, 755 733 ext4_free_clusters_after_init(sb, group, gdp)); 756 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 757 - gdp); 734 + ext4_block_bitmap_csum_set(sb, group, gdp, 735 + block_bitmap_bh, 736 + EXT4_BLOCKS_PER_GROUP(sb) / 737 + 8); 738 + ext4_group_desc_csum_set(sb, group, gdp); 758 739 } 759 740 ext4_unlock_group(sb, group); 760 741 ··· 776 751 goto fail; 777 752 778 753 /* Update the relevant bg descriptor fields */ 779 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 754 + if (ext4_has_group_desc_csum(sb)) { 780 755 int free; 781 756 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 782 757 ··· 797 772 ext4_itable_unused_set(sb, gdp, 798 773 (EXT4_INODES_PER_GROUP(sb) - ino)); 799 774 up_read(&grp->alloc_sem); 775 + } else { 776 + ext4_lock_group(sb, group); 800 777 } 778 + 801 779 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); 802 780 if (S_ISDIR(mode)) { 803 781 ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); ··· 810 782 atomic_inc(&sbi->s_flex_groups[f].used_dirs); 811 783 } 812 784 } 813 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 814 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 815 - ext4_unlock_group(sb, group); 785 + if (ext4_has_group_desc_csum(sb)) { 786 + ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, 787 + EXT4_INODES_PER_GROUP(sb) / 8); 788 + ext4_group_desc_csum_set(sb, group, gdp); 816 789 } 790 + ext4_unlock_group(sb, group); 817 791 818 792 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); 819 793 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); ··· 879 849 spin_lock(&sbi->s_next_gen_lock); 880 850 inode->i_generation = sbi->s_next_generation++; 881 851 spin_unlock(&sbi->s_next_gen_lock); 852 + 853 + /* Precompute checksum seed for inode metadata */ 854 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 855 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 856 + __u32 csum; 857 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 858 + __le32 inum = cpu_to_le32(inode->i_ino); 859 + __le32 gen = cpu_to_le32(inode->i_generation); 860 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 861 + sizeof(inum)); 862 + ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 863 + sizeof(gen)); 864 + } 882 865 883 866 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 884 867 ext4_set_inode_state(inode, EXT4_STATE_NEW); ··· 1183 1140 skip_zeroout: 1184 1141 ext4_lock_group(sb, group); 1185 1142 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); 1186 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 1143 + ext4_group_desc_csum_set(sb, group, gdp); 1187 1144 ext4_unlock_group(sb, group); 1188 1145 1189 1146 BUFFER_TRACE(group_desc_bh,
+107 -12
fs/ext4/inode.c
··· 47 47 48 48 #define MPAGE_DA_EXTENT_TAIL 0x01 49 49 50 + static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, 51 + struct ext4_inode_info *ei) 52 + { 53 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 54 + __u16 csum_lo; 55 + __u16 csum_hi = 0; 56 + __u32 csum; 57 + 58 + csum_lo = raw->i_checksum_lo; 59 + raw->i_checksum_lo = 0; 60 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 61 + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { 62 + csum_hi = raw->i_checksum_hi; 63 + raw->i_checksum_hi = 0; 64 + } 65 + 66 + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, 67 + EXT4_INODE_SIZE(inode->i_sb)); 68 + 69 + raw->i_checksum_lo = csum_lo; 70 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 71 + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 72 + raw->i_checksum_hi = csum_hi; 73 + 74 + return csum; 75 + } 76 + 77 + static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, 78 + struct ext4_inode_info *ei) 79 + { 80 + __u32 provided, calculated; 81 + 82 + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 83 + cpu_to_le32(EXT4_OS_LINUX) || 84 + !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 85 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 86 + return 1; 87 + 88 + provided = le16_to_cpu(raw->i_checksum_lo); 89 + calculated = ext4_inode_csum(inode, raw, ei); 90 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 91 + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 92 + provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; 93 + else 94 + calculated &= 0xFFFF; 95 + 96 + return provided == calculated; 97 + } 98 + 99 + static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, 100 + struct ext4_inode_info *ei) 101 + { 102 + __u32 csum; 103 + 104 + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 105 + cpu_to_le32(EXT4_OS_LINUX) || 106 + !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 107 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 108 + return; 109 + 110 + csum = ext4_inode_csum(inode, raw, ei); 111 + raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); 112 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 113 + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 114 + raw->i_checksum_hi = cpu_to_le16(csum >> 16); 115 + } 116 + 50 117 static inline int ext4_begin_ordered_truncate(struct inode *inode, 51 118 loff_t new_size) 52 119 { ··· 3584 3517 b = table; 3585 3518 end = b + EXT4_SB(sb)->s_inode_readahead_blks; 3586 3519 num = EXT4_INODES_PER_GROUP(sb); 3587 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3588 - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 3520 + if (ext4_has_group_desc_csum(sb)) 3589 3521 num -= ext4_itable_unused_count(sb, gdp); 3590 3522 table += num / inodes_per_block; 3591 3523 if (end > table) ··· 3712 3646 if (ret < 0) 3713 3647 goto bad_inode; 3714 3648 raw_inode = ext4_raw_inode(&iloc); 3649 + 3650 + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 3651 + ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 3652 + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 3653 + EXT4_INODE_SIZE(inode->i_sb)) { 3654 + EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", 3655 + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, 3656 + EXT4_INODE_SIZE(inode->i_sb)); 3657 + ret = -EIO; 3658 + goto bad_inode; 3659 + } 3660 + } else 3661 + ei->i_extra_isize = 0; 3662 + 3663 + /* Precompute checksum seed for inode metadata */ 3664 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3665 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3666 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3667 + __u32 csum; 3668 + __le32 inum = cpu_to_le32(inode->i_ino); 3669 + __le32 gen = raw_inode->i_generation; 3670 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 3671 + sizeof(inum)); 3672 + ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 3673 + sizeof(gen)); 3674 + } 3675 + 3676 + if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { 3677 + EXT4_ERROR_INODE(inode, "checksum invalid"); 3678 + ret = -EIO; 3679 + goto bad_inode; 3680 + } 3681 + 3715 3682 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 3716 3683 i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 3717 3684 i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); ··· 3824 3725 } 3825 3726 3826 3727 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 3827 - ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 3828 - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 3829 - EXT4_INODE_SIZE(inode->i_sb)) { 3830 - ret = -EIO; 3831 - goto bad_inode; 3832 - } 3833 3728 if (ei->i_extra_isize == 0) { 3834 3729 /* The extra space is currently unused. Use it. */ 3835 3730 ei->i_extra_isize = sizeof(struct ext4_inode) - ··· 3835 3742 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 3836 3743 ext4_set_inode_state(inode, EXT4_STATE_XATTR); 3837 3744 } 3838 - } else 3839 - ei->i_extra_isize = 0; 3745 + } 3840 3746 3841 3747 EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); 3842 3748 EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); ··· 4034 3942 EXT4_SET_RO_COMPAT_FEATURE(sb, 4035 3943 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 4036 3944 ext4_handle_sync(handle); 4037 - err = ext4_handle_dirty_super(handle, sb); 3945 + err = ext4_handle_dirty_super_now(handle, sb); 4038 3946 } 4039 3947 } 4040 3948 raw_inode->i_generation = cpu_to_le32(inode->i_generation); ··· 4060 3968 cpu_to_le32(inode->i_version >> 32); 4061 3969 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4062 3970 } 3971 + 3972 + ext4_inode_csum_set(inode, raw_inode, ei); 4063 3973 4064 3974 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4065 3975 rc = ext4_handle_dirty_metadata(handle, NULL, bh); ··· 4307 4213 * will return the blocks that include the delayed allocation 4308 4214 * blocks for this file. 4309 4215 */ 4310 - delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; 4216 + delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), 4217 + EXT4_I(inode)->i_reserved_data_blocks); 4311 4218 4312 4219 stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; 4313 4220 return 0;
+16 -3
fs/ext4/ioctl.c
··· 38 38 handle_t *handle = NULL; 39 39 int err, migrate = 0; 40 40 struct ext4_iloc iloc; 41 - unsigned int oldflags; 41 + unsigned int oldflags, mask, i; 42 42 unsigned int jflag; 43 43 44 44 if (!inode_owner_or_capable(inode)) ··· 115 115 if (err) 116 116 goto flags_err; 117 117 118 - flags = flags & EXT4_FL_USER_MODIFIABLE; 119 - flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; 118 + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { 119 + if (!(mask & EXT4_FL_USER_MODIFIABLE)) 120 + continue; 121 + if (mask & flags) 122 + ext4_set_inode_flag(inode, i); 123 + else 124 + ext4_clear_inode_flag(inode, i); 125 + } 120 126 ei->i_flags = flags; 121 127 122 128 ext4_set_inode_flags(inode); ··· 157 151 158 152 if (!inode_owner_or_capable(inode)) 159 153 return -EPERM; 154 + 155 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 156 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 157 + ext4_warning(sb, "Setting inode version is not " 158 + "supported with metadata_csum enabled."); 159 + return -ENOTTY; 160 + } 160 161 161 162 err = mnt_want_write_file(filp); 162 163 if (err)
+17 -13
fs/ext4/mballoc.c
··· 788 788 int first_block; 789 789 struct super_block *sb; 790 790 struct buffer_head *bhs; 791 - struct buffer_head **bh; 791 + struct buffer_head **bh = NULL; 792 792 struct inode *inode; 793 793 char *data; 794 794 char *bitmap; ··· 2375 2375 return 0; 2376 2376 } 2377 2377 2378 - int ext4_mb_init(struct super_block *sb, int needs_recovery) 2378 + int ext4_mb_init(struct super_block *sb) 2379 2379 { 2380 2380 struct ext4_sb_info *sbi = EXT4_SB(sb); 2381 2381 unsigned i, j; ··· 2517 2517 struct ext4_sb_info *sbi = EXT4_SB(sb); 2518 2518 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2519 2519 2520 + if (sbi->s_proc) 2521 + remove_proc_entry("mb_groups", sbi->s_proc); 2522 + 2520 2523 if (sbi->s_group_info) { 2521 2524 for (i = 0; i < ngroups; i++) { 2522 2525 grinfo = ext4_get_group_info(sb, i); ··· 2567 2564 } 2568 2565 2569 2566 free_percpu(sbi->s_locality_groups); 2570 - if (sbi->s_proc) 2571 - remove_proc_entry("mb_groups", sbi->s_proc); 2572 2567 2573 2568 return 0; 2574 2569 } ··· 2798 2797 } 2799 2798 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; 2800 2799 ext4_free_group_clusters_set(sb, gdp, len); 2801 - gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2800 + ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, 2801 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 2802 + ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); 2802 2803 2803 2804 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2804 2805 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); ··· 3074 3071 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) 3075 3072 { 3076 3073 struct ext4_prealloc_space *pa = ac->ac_pa; 3077 - int len; 3078 3074 3079 - if (pa && pa->pa_type == MB_INODE_PA) { 3080 - len = ac->ac_b_ex.fe_len; 3081 - pa->pa_free += len; 3082 - } 3083 - 3075 + if (pa && pa->pa_type == MB_INODE_PA) 3076 + pa->pa_free += ac->ac_b_ex.fe_len; 3084 3077 } 3085 3078 3086 3079 /* ··· 4635 4636 */ 4636 4637 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); 4637 4638 if (!new_entry) { 4639 + ext4_mb_unload_buddy(&e4b); 4638 4640 err = -ENOMEM; 4639 4641 goto error_return; 4640 4642 } ··· 4659 4659 4660 4660 ret = ext4_free_group_clusters(sb, gdp) + count_clusters; 4661 4661 ext4_free_group_clusters_set(sb, gdp, ret); 4662 - gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4662 + ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, 4663 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 4664 + ext4_group_desc_csum_set(sb, block_group, gdp); 4663 4665 ext4_unlock_group(sb, block_group); 4664 4666 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); 4665 4667 ··· 4805 4803 mb_free_blocks(NULL, &e4b, bit, count); 4806 4804 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); 4807 4805 ext4_free_group_clusters_set(sb, desc, blk_free_count); 4808 - desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 4806 + ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, 4807 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 4808 + ext4_group_desc_csum_set(sb, block_group, desc); 4809 4809 ext4_unlock_group(sb, block_group); 4810 4810 percpu_counter_add(&sbi->s_freeclusters_counter, 4811 4811 EXT4_B2C(sbi, blocks_freed));
+39 -5
fs/ext4/mmp.c
··· 6 6 7 7 #include "ext4.h" 8 8 9 + /* Checksumming functions */ 10 + static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 11 + { 12 + struct ext4_sb_info *sbi = EXT4_SB(sb); 13 + int offset = offsetof(struct mmp_struct, mmp_checksum); 14 + __u32 csum; 15 + 16 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 17 + 18 + return cpu_to_le32(csum); 19 + } 20 + 21 + int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 22 + { 23 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 24 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 25 + return 1; 26 + 27 + return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 28 + } 29 + 30 + void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 31 + { 32 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 33 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 34 + return; 35 + 36 + mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 37 + } 38 + 9 39 /* 10 40 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 11 41 * faster. 12 42 */ 13 - static int write_mmp_block(struct buffer_head *bh) 43 + static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 14 44 { 45 + struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 46 + 47 + ext4_mmp_csum_set(sb, mmp); 15 48 mark_buffer_dirty(bh); 16 49 lock_buffer(bh); 17 50 bh->b_end_io = end_buffer_write_sync; ··· 92 59 } 93 60 94 61 mmp = (struct mmp_struct *)((*bh)->b_data); 95 - if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) 62 + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || 63 + !ext4_mmp_csum_verify(sb, mmp)) 96 64 return -EINVAL; 97 65 98 66 return 0; ··· 154 120 mmp->mmp_time = cpu_to_le64(get_seconds()); 155 121 last_update_time = jiffies; 156 122 157 - retval = write_mmp_block(bh); 123 + retval = write_mmp_block(sb, bh); 158 124 /* 159 125 * Don't spew too many error messages. Print one every 160 126 * (s_mmp_update_interval * 60) seconds. ··· 234 200 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 235 201 mmp->mmp_time = cpu_to_le64(get_seconds()); 236 202 237 - retval = write_mmp_block(bh); 203 + retval = write_mmp_block(sb, bh); 238 204 239 205 failed: 240 206 kfree(data); ··· 333 299 seq = mmp_new_seq(); 334 300 mmp->mmp_seq = cpu_to_le32(seq); 335 301 336 - retval = write_mmp_block(bh); 302 + retval = write_mmp_block(sb, bh); 337 303 if (retval) 338 304 goto failed; 339 305
+419 -26
fs/ext4/namei.c
··· 145 145 u16 size; 146 146 }; 147 147 148 + /* 149 + * This goes at the end of each htree block. 150 + */ 151 + struct dx_tail { 152 + u32 dt_reserved; 153 + __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ 154 + }; 155 + 148 156 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); 149 157 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); 150 158 static inline unsigned dx_get_hash(struct dx_entry *entry); ··· 187 179 int *err); 188 180 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 189 181 struct inode *inode); 182 + 183 + /* checksumming functions */ 184 + #define EXT4_DIRENT_TAIL(block, blocksize) \ 185 + ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ 186 + ((blocksize) - \ 187 + sizeof(struct ext4_dir_entry_tail)))) 188 + 189 + static void initialize_dirent_tail(struct ext4_dir_entry_tail *t, 190 + unsigned int blocksize) 191 + { 192 + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); 193 + t->det_rec_len = ext4_rec_len_to_disk( 194 + sizeof(struct ext4_dir_entry_tail), blocksize); 195 + t->det_reserved_ft = EXT4_FT_DIR_CSUM; 196 + } 197 + 198 + /* Walk through a dirent block to find a checksum "dirent" at the tail */ 199 + static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, 200 + struct ext4_dir_entry *de) 201 + { 202 + struct ext4_dir_entry_tail *t; 203 + 204 + #ifdef PARANOID 205 + struct ext4_dir_entry *d, *top; 206 + 207 + d = de; 208 + top = (struct ext4_dir_entry *)(((void *)de) + 209 + (EXT4_BLOCK_SIZE(inode->i_sb) - 210 + sizeof(struct ext4_dir_entry_tail))); 211 + while (d < top && d->rec_len) 212 + d = (struct ext4_dir_entry *)(((void *)d) + 213 + le16_to_cpu(d->rec_len)); 214 + 215 + if (d != top) 216 + return NULL; 217 + 218 + t = (struct ext4_dir_entry_tail *)d; 219 + #else 220 + t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); 221 + #endif 222 + 223 + if (t->det_reserved_zero1 || 224 + le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || 225 + t->det_reserved_zero2 || 226 + t->det_reserved_ft != EXT4_FT_DIR_CSUM) 227 + return NULL; 228 + 229 + return t; 230 + } 231 + 232 + static __le32 ext4_dirent_csum(struct inode *inode, 233 + struct ext4_dir_entry *dirent, int size) 234 + { 235 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 236 + struct ext4_inode_info *ei = EXT4_I(inode); 237 + __u32 csum; 238 + 239 + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); 240 + return cpu_to_le32(csum); 241 + } 242 + 243 + int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) 244 + { 245 + struct ext4_dir_entry_tail *t; 246 + 247 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 248 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 249 + return 1; 250 + 251 + t = get_dirent_tail(inode, dirent); 252 + if (!t) { 253 + EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " 254 + "leaf for checksum. Please run e2fsck -D."); 255 + return 0; 256 + } 257 + 258 + if (t->det_checksum != ext4_dirent_csum(inode, dirent, 259 + (void *)t - (void *)dirent)) 260 + return 0; 261 + 262 + return 1; 263 + } 264 + 265 + static void ext4_dirent_csum_set(struct inode *inode, 266 + struct ext4_dir_entry *dirent) 267 + { 268 + struct ext4_dir_entry_tail *t; 269 + 270 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 271 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 272 + return; 273 + 274 + t = get_dirent_tail(inode, dirent); 275 + if (!t) { 276 + EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " 277 + "leaf for checksum. Please run e2fsck -D."); 278 + return; 279 + } 280 + 281 + t->det_checksum = ext4_dirent_csum(inode, dirent, 282 + (void *)t - (void *)dirent); 283 + } 284 + 285 + static inline int ext4_handle_dirty_dirent_node(handle_t *handle, 286 + struct inode *inode, 287 + struct buffer_head *bh) 288 + { 289 + ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); 290 + return ext4_handle_dirty_metadata(handle, inode, bh); 291 + } 292 + 293 + static struct dx_countlimit *get_dx_countlimit(struct inode *inode, 294 + struct ext4_dir_entry *dirent, 295 + int *offset) 296 + { 297 + struct ext4_dir_entry *dp; 298 + struct dx_root_info *root; 299 + int count_offset; 300 + 301 + if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) 302 + count_offset = 8; 303 + else if (le16_to_cpu(dirent->rec_len) == 12) { 304 + dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); 305 + if (le16_to_cpu(dp->rec_len) != 306 + EXT4_BLOCK_SIZE(inode->i_sb) - 12) 307 + return NULL; 308 + root = (struct dx_root_info *)(((void *)dp + 12)); 309 + if (root->reserved_zero || 310 + root->info_length != sizeof(struct dx_root_info)) 311 + return NULL; 312 + count_offset = 32; 313 + } else 314 + return NULL; 315 + 316 + if (offset) 317 + *offset = count_offset; 318 + return (struct dx_countlimit *)(((void *)dirent) + count_offset); 319 + } 320 + 321 + static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, 322 + int count_offset, int count, struct dx_tail *t) 323 + { 324 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 325 + struct ext4_inode_info *ei = EXT4_I(inode); 326 + __u32 csum, old_csum; 327 + int size; 328 + 329 + size = count_offset + (count * sizeof(struct dx_entry)); 330 + old_csum = t->dt_checksum; 331 + t->dt_checksum = 0; 332 + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); 333 + csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); 334 + t->dt_checksum = old_csum; 335 + 336 + return cpu_to_le32(csum); 337 + } 338 + 339 + static int ext4_dx_csum_verify(struct inode *inode, 340 + struct ext4_dir_entry *dirent) 341 + { 342 + struct dx_countlimit *c; 343 + struct dx_tail *t; 344 + int count_offset, limit, count; 345 + 346 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 347 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 348 + return 1; 349 + 350 + c = get_dx_countlimit(inode, dirent, &count_offset); 351 + if (!c) { 352 + EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); 353 + return 1; 354 + } 355 + limit = le16_to_cpu(c->limit); 356 + count = le16_to_cpu(c->count); 357 + if (count_offset + (limit * sizeof(struct dx_entry)) > 358 + EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { 359 + EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " 360 + "tree checksum found. Run e2fsck -D."); 361 + return 1; 362 + } 363 + t = (struct dx_tail *)(((struct dx_entry *)c) + limit); 364 + 365 + if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset, 366 + count, t)) 367 + return 0; 368 + return 1; 369 + } 370 + 371 + static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) 372 + { 373 + struct dx_countlimit *c; 374 + struct dx_tail *t; 375 + int count_offset, limit, count; 376 + 377 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 378 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 379 + return; 380 + 381 + c = get_dx_countlimit(inode, dirent, &count_offset); 382 + if (!c) { 383 + EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); 384 + return; 385 + } 386 + limit = le16_to_cpu(c->limit); 387 + count = le16_to_cpu(c->count); 388 + if (count_offset + (limit * sizeof(struct dx_entry)) > 389 + EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { 390 + EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " 391 + "tree checksum. Run e2fsck -D."); 392 + return; 393 + } 394 + t = (struct dx_tail *)(((struct dx_entry *)c) + limit); 395 + 396 + t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t); 397 + } 398 + 399 + static inline int ext4_handle_dirty_dx_node(handle_t *handle, 400 + struct inode *inode, 401 + struct buffer_head *bh) 402 + { 403 + ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); 404 + return ext4_handle_dirty_metadata(handle, inode, bh); 405 + } 190 406 191 407 /* 192 408 * p is at least 6 bytes before the end of page ··· 471 239 { 472 240 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 473 241 EXT4_DIR_REC_LEN(2) - infosize; 242 + 243 + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 244 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 245 + entry_space -= sizeof(struct dx_tail); 474 246 return entry_space / sizeof(struct dx_entry); 475 247 } 476 248 477 249 static inline unsigned dx_node_limit(struct inode *dir) 478 250 { 479 251 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 252 + 253 + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 254 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 255 + entry_space -= sizeof(struct dx_tail); 480 256 return entry_space / sizeof(struct dx_entry); 481 257 } 482 258 ··· 630 390 goto fail; 631 391 } 632 392 393 + if (!buffer_verified(bh) && 394 + !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) { 395 + ext4_warning(dir->i_sb, "Root failed checksum"); 396 + brelse(bh); 397 + *err = ERR_BAD_DX_DIR; 398 + goto fail; 399 + } 400 + set_buffer_verified(bh); 401 + 633 402 entries = (struct dx_entry *) (((char *)&root->info) + 634 403 root->info.info_length); 635 404 ··· 699 450 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 700 451 goto fail2; 701 452 at = entries = ((struct dx_node *) bh->b_data)->entries; 453 + 454 + if (!buffer_verified(bh) && 455 + !ext4_dx_csum_verify(dir, 456 + (struct ext4_dir_entry *)bh->b_data)) { 457 + ext4_warning(dir->i_sb, "Node failed checksum"); 458 + brelse(bh); 459 + *err = ERR_BAD_DX_DIR; 460 + goto fail; 461 + } 462 + set_buffer_verified(bh); 463 + 702 464 if (dx_get_limit(entries) != dx_node_limit (dir)) { 703 465 ext4_warning(dir->i_sb, 704 466 "dx entry: limit != node limit"); ··· 809 549 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 810 550 0, &err))) 811 551 return err; /* Failure */ 552 + 553 + if (!buffer_verified(bh) && 554 + !ext4_dx_csum_verify(dir, 555 + (struct ext4_dir_entry *)bh->b_data)) { 556 + ext4_warning(dir->i_sb, "Node failed checksum"); 557 + return -EIO; 558 + } 559 + set_buffer_verified(bh); 560 + 812 561 p++; 813 562 brelse(p->bh); 814 563 p->bh = bh; ··· 845 576 (unsigned long)block)); 846 577 if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) 847 578 return err; 579 + 580 + if (!buffer_verified(bh) && 581 + !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 582 + return -EIO; 583 + set_buffer_verified(bh); 848 584 849 585 de = (struct ext4_dir_entry_2 *) bh->b_data; 850 586 top = (struct ext4_dir_entry_2 *) ((char *) de + ··· 1210 936 brelse(bh); 1211 937 goto next; 1212 938 } 939 + if (!buffer_verified(bh) && 940 + !ext4_dirent_csum_verify(dir, 941 + (struct ext4_dir_entry *)bh->b_data)) { 942 + EXT4_ERROR_INODE(dir, "checksumming directory " 943 + "block %lu", (unsigned long)block); 944 + brelse(bh); 945 + goto next; 946 + } 947 + set_buffer_verified(bh); 1213 948 i = search_dirblock(bh, dir, d_name, 1214 949 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); 1215 950 if (i == 1) { ··· 1270 987 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) 1271 988 goto errout; 1272 989 990 + if (!buffer_verified(bh) && 991 + !ext4_dirent_csum_verify(dir, 992 + (struct ext4_dir_entry *)bh->b_data)) { 993 + EXT4_ERROR_INODE(dir, "checksumming directory " 994 + "block %lu", (unsigned long)block); 995 + brelse(bh); 996 + *err = -EIO; 997 + goto errout; 998 + } 999 + set_buffer_verified(bh); 1273 1000 retval = search_dirblock(bh, dir, d_name, 1274 1001 block << EXT4_BLOCK_SIZE_BITS(sb), 1275 1002 res_dir); ··· 1328 1035 brelse(bh); 1329 1036 if (!ext4_valid_inum(dir->i_sb, ino)) { 1330 1037 EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); 1038 + return ERR_PTR(-EIO); 1039 + } 1040 + if (unlikely(ino == dir->i_ino)) { 1041 + EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", 1042 + dentry->d_name.len, 1043 + dentry->d_name.name); 1331 1044 return ERR_PTR(-EIO); 1332 1045 } 1333 1046 inode = ext4_iget(dir->i_sb, ino); ··· 1455 1156 char *data1 = (*bh)->b_data, *data2; 1456 1157 unsigned split, move, size; 1457 1158 struct ext4_dir_entry_2 *de = NULL, *de2; 1159 + struct ext4_dir_entry_tail *t; 1160 + int csum_size = 0; 1458 1161 int err = 0, i; 1162 + 1163 + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 1164 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1165 + csum_size = sizeof(struct ext4_dir_entry_tail); 1459 1166 1460 1167 bh2 = ext4_append (handle, dir, &newblock, &err); 1461 1168 if (!(bh2)) { ··· 1509 1204 /* Fancy dance to stay within two buffers */ 1510 1205 de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); 1511 1206 de = dx_pack_dirents(data1, blocksize); 1512 - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, 1207 + de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - 1208 + (char *) de, 1513 1209 blocksize); 1514 - de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2, 1210 + de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - 1211 + (char *) de2, 1515 1212 blocksize); 1213 + if (csum_size) { 1214 + t = EXT4_DIRENT_TAIL(data2, blocksize); 1215 + initialize_dirent_tail(t, blocksize); 1216 + 1217 + t = EXT4_DIRENT_TAIL(data1, blocksize); 1218 + initialize_dirent_tail(t, blocksize); 1219 + } 1220 + 1516 1221 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1517 1222 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1518 1223 ··· 1533 1218 de = de2; 1534 1219 } 1535 1220 dx_insert_block(frame, hash2 + continued, newblock); 1536 - err = ext4_handle_dirty_metadata(handle, dir, bh2); 1221 + err = ext4_handle_dirty_dirent_node(handle, dir, bh2); 1537 1222 if (err) 1538 1223 goto journal_error; 1539 - err = ext4_handle_dirty_metadata(handle, dir, frame->bh); 1224 + err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); 1540 1225 if (err) 1541 1226 goto journal_error; 1542 1227 brelse(bh2); ··· 1573 1258 unsigned short reclen; 1574 1259 int nlen, rlen, err; 1575 1260 char *top; 1261 + int csum_size = 0; 1262 + 1263 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1264 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1265 + csum_size = sizeof(struct ext4_dir_entry_tail); 1576 1266 1577 1267 reclen = EXT4_DIR_REC_LEN(namelen); 1578 1268 if (!de) { 1579 1269 de = (struct ext4_dir_entry_2 *)bh->b_data; 1580 - top = bh->b_data + blocksize - reclen; 1270 + top = bh->b_data + (blocksize - csum_size) - reclen; 1581 1271 while ((char *) de <= top) { 1582 1272 if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) 1583 1273 return -EIO; ··· 1615 1295 de = de1; 1616 1296 } 1617 1297 de->file_type = EXT4_FT_UNKNOWN; 1618 - if (inode) { 1619 - de->inode = cpu_to_le32(inode->i_ino); 1620 - ext4_set_de_type(dir->i_sb, de, inode->i_mode); 1621 - } else 1622 - de->inode = 0; 1298 + de->inode = cpu_to_le32(inode->i_ino); 1299 + ext4_set_de_type(dir->i_sb, de, inode->i_mode); 1623 1300 de->name_len = namelen; 1624 1301 memcpy(de->name, name, namelen); 1625 1302 /* ··· 1635 1318 dir->i_version++; 1636 1319 ext4_mark_inode_dirty(handle, dir); 1637 1320 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 1638 - err = ext4_handle_dirty_metadata(handle, dir, bh); 1321 + err = ext4_handle_dirty_dirent_node(handle, dir, bh); 1639 1322 if (err) 1640 1323 ext4_std_error(dir->i_sb, err); 1641 1324 return 0; ··· 1656 1339 struct dx_frame frames[2], *frame; 1657 1340 struct dx_entry *entries; 1658 1341 struct ext4_dir_entry_2 *de, *de2; 1342 + struct ext4_dir_entry_tail *t; 1659 1343 char *data1, *top; 1660 1344 unsigned len; 1661 1345 int retval; ··· 1664 1346 struct dx_hash_info hinfo; 1665 1347 ext4_lblk_t block; 1666 1348 struct fake_dirent *fde; 1349 + int csum_size = 0; 1350 + 1351 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1352 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1353 + csum_size = sizeof(struct ext4_dir_entry_tail); 1667 1354 1668 1355 blocksize = dir->i_sb->s_blocksize; 1669 1356 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); ··· 1689 1366 brelse(bh); 1690 1367 return -EIO; 1691 1368 } 1692 - len = ((char *) root) + blocksize - (char *) de; 1369 + len = ((char *) root) + (blocksize - csum_size) - (char *) de; 1693 1370 1694 1371 /* Allocate new block for the 0th block's dirents */ 1695 1372 bh2 = ext4_append(handle, dir, &block, &retval); ··· 1705 1382 top = data1 + len; 1706 1383 while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) 1707 1384 de = de2; 1708 - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, 1385 + de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - 1386 + (char *) de, 1709 1387 blocksize); 1388 + 1389 + if (csum_size) { 1390 + t = EXT4_DIRENT_TAIL(data1, blocksize); 1391 + initialize_dirent_tail(t, blocksize); 1392 + } 1393 + 1710 1394 /* Initialize the root; the dot dirents already exist */ 1711 1395 de = (struct ext4_dir_entry_2 *) (&root->dotdot); 1712 1396 de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), ··· 1738 1408 frame->bh = bh; 1739 1409 bh = bh2; 1740 1410 1741 - ext4_handle_dirty_metadata(handle, dir, frame->bh); 1742 - ext4_handle_dirty_metadata(handle, dir, bh); 1411 + ext4_handle_dirty_dx_node(handle, dir, frame->bh); 1412 + ext4_handle_dirty_dirent_node(handle, dir, bh); 1743 1413 1744 1414 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1745 1415 if (!de) { ··· 1775 1445 struct inode *dir = dentry->d_parent->d_inode; 1776 1446 struct buffer_head *bh; 1777 1447 struct ext4_dir_entry_2 *de; 1448 + struct ext4_dir_entry_tail *t; 1778 1449 struct super_block *sb; 1779 1450 int retval; 1780 1451 int dx_fallback=0; 1781 1452 unsigned blocksize; 1782 1453 ext4_lblk_t block, blocks; 1454 + int csum_size = 0; 1455 + 1456 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1457 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1458 + csum_size = sizeof(struct ext4_dir_entry_tail); 1783 1459 1784 1460 sb = dir->i_sb; 1785 1461 blocksize = sb->s_blocksize; ··· 1804 1468 bh = ext4_bread(handle, dir, block, 0, &retval); 1805 1469 if(!bh) 1806 1470 return retval; 1471 + if (!buffer_verified(bh) && 1472 + !ext4_dirent_csum_verify(dir, 1473 + (struct ext4_dir_entry *)bh->b_data)) 1474 + return -EIO; 1475 + set_buffer_verified(bh); 1807 1476 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1808 1477 if (retval != -ENOSPC) { 1809 1478 brelse(bh); ··· 1825 1484 return retval; 1826 1485 de = (struct ext4_dir_entry_2 *) bh->b_data; 1827 1486 de->inode = 0; 1828 - de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1487 + de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); 1488 + 1489 + if (csum_size) { 1490 + t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); 1491 + initialize_dirent_tail(t, blocksize); 1492 + } 1493 + 1829 1494 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1830 1495 brelse(bh); 1831 1496 if (retval == 0) ··· 1862 1515 1863 1516 if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) 1864 1517 goto cleanup; 1518 + 1519 + if (!buffer_verified(bh) && 1520 + !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 1521 + goto journal_error; 1522 + set_buffer_verified(bh); 1865 1523 1866 1524 BUFFER_TRACE(bh, "get_write_access"); 1867 1525 err = ext4_journal_get_write_access(handle, bh); ··· 1935 1583 dxtrace(dx_show_index("node", frames[1].entries)); 1936 1584 dxtrace(dx_show_index("node", 1937 1585 ((struct dx_node *) bh2->b_data)->entries)); 1938 - err = ext4_handle_dirty_metadata(handle, dir, bh2); 1586 + err = ext4_handle_dirty_dx_node(handle, dir, bh2); 1939 1587 if (err) 1940 1588 goto journal_error; 1941 1589 brelse (bh2); ··· 1961 1609 if (err) 1962 1610 goto journal_error; 1963 1611 } 1964 - err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); 1612 + err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); 1965 1613 if (err) { 1966 1614 ext4_std_error(inode->i_sb, err); 1967 1615 goto cleanup; ··· 1993 1641 { 1994 1642 struct ext4_dir_entry_2 *de, *pde; 1995 1643 unsigned int blocksize = dir->i_sb->s_blocksize; 1644 + int csum_size = 0; 1996 1645 int i, err; 1646 + 1647 + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 1648 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1649 + csum_size = sizeof(struct ext4_dir_entry_tail); 1997 1650 1998 1651 i = 0; 1999 1652 pde = NULL; 2000 1653 de = (struct ext4_dir_entry_2 *) bh->b_data; 2001 - while (i < bh->b_size) { 1654 + while (i < bh->b_size - csum_size) { 2002 1655 if (ext4_check_dir_entry(dir, NULL, de, bh, i)) 2003 1656 return -EIO; 2004 1657 if (de == de_del) { ··· 2024 1667 de->inode = 0; 2025 1668 dir->i_version++; 2026 1669 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 2027 - err = ext4_handle_dirty_metadata(handle, dir, bh); 1670 + err = ext4_handle_dirty_dirent_node(handle, dir, bh); 2028 1671 if (unlikely(err)) { 2029 1672 ext4_std_error(dir->i_sb, err); 2030 1673 return err; ··· 2166 1809 struct inode *inode; 2167 1810 struct buffer_head *dir_block = NULL; 2168 1811 struct ext4_dir_entry_2 *de; 1812 + struct ext4_dir_entry_tail *t; 2169 1813 unsigned int blocksize = dir->i_sb->s_blocksize; 1814 + int csum_size = 0; 2170 1815 int err, retries = 0; 1816 + 1817 + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 1818 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1819 + csum_size = sizeof(struct ext4_dir_entry_tail); 2171 1820 2172 1821 if (EXT4_DIR_LINK_MAX(dir)) 2173 1822 return -EMLINK; ··· 2215 1852 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 2216 1853 de = ext4_next_entry(de, blocksize); 2217 1854 de->inode = cpu_to_le32(dir->i_ino); 2218 - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), 1855 + de->rec_len = ext4_rec_len_to_disk(blocksize - 1856 + (csum_size + EXT4_DIR_REC_LEN(1)), 2219 1857 blocksize); 2220 1858 de->name_len = 2; 2221 1859 strcpy(de->name, ".."); 2222 1860 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 2223 1861 set_nlink(inode, 2); 1862 + 1863 + if (csum_size) { 1864 + t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); 1865 + initialize_dirent_tail(t, blocksize); 1866 + } 1867 + 2224 1868 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 2225 - err = ext4_handle_dirty_metadata(handle, inode, dir_block); 1869 + err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); 2226 1870 if (err) 2227 1871 goto out_clear_inode; 1872 + set_buffer_verified(dir_block); 2228 1873 err = ext4_mark_inode_dirty(handle, inode); 2229 1874 if (!err) 2230 1875 err = ext4_add_entry(handle, dentry, inode); ··· 2282 1911 inode->i_ino); 2283 1912 return 1; 2284 1913 } 1914 + if (!buffer_verified(bh) && 1915 + !ext4_dirent_csum_verify(inode, 1916 + (struct ext4_dir_entry *)bh->b_data)) { 1917 + EXT4_ERROR_INODE(inode, "checksum error reading directory " 1918 + "lblock 0"); 1919 + return -EIO; 1920 + } 1921 + set_buffer_verified(bh); 2285 1922 de = (struct ext4_dir_entry_2 *) bh->b_data; 2286 1923 de1 = ext4_next_entry(de, sb->s_blocksize); 2287 1924 if (le32_to_cpu(de->inode) != inode->i_ino || ··· 2321 1942 offset += sb->s_blocksize; 2322 1943 continue; 2323 1944 } 1945 + if (!buffer_verified(bh) && 1946 + !ext4_dirent_csum_verify(inode, 1947 + (struct ext4_dir_entry *)bh->b_data)) { 1948 + EXT4_ERROR_INODE(inode, "checksum error " 1949 + "reading directory lblock 0"); 1950 + return -EIO; 1951 + } 1952 + set_buffer_verified(bh); 2324 1953 de = (struct ext4_dir_entry_2 *) bh->b_data; 2325 1954 } 2326 1955 if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { ··· 2397 2010 /* Insert this inode at the head of the on-disk orphan list... */ 2398 2011 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); 2399 2012 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 2400 - err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 2013 + err = ext4_handle_dirty_super_now(handle, sb); 2401 2014 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 2402 2015 if (!err) 2403 2016 err = rc; ··· 2470 2083 if (err) 2471 2084 goto out_brelse; 2472 2085 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2473 - err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); 2086 + err = ext4_handle_dirty_super_now(handle, inode->i_sb); 2474 2087 } else { 2475 2088 struct ext4_iloc iloc2; 2476 2089 struct inode *i_prev = ··· 2829 2442 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); 2830 2443 if (!dir_bh) 2831 2444 goto end_rename; 2445 + if (!buffer_verified(dir_bh) && 2446 + !ext4_dirent_csum_verify(old_inode, 2447 + (struct ext4_dir_entry *)dir_bh->b_data)) 2448 + goto end_rename; 2449 + set_buffer_verified(dir_bh); 2832 2450 if (le32_to_cpu(PARENT_INO(dir_bh->b_data, 2833 2451 old_dir->i_sb->s_blocksize)) != old_dir->i_ino) 2834 2452 goto end_rename; ··· 2864 2472 ext4_current_time(new_dir); 2865 2473 ext4_mark_inode_dirty(handle, new_dir); 2866 2474 BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); 2867 - retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh); 2475 + retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh); 2868 2476 if (unlikely(retval)) { 2869 2477 ext4_std_error(new_dir->i_sb, retval); 2870 2478 goto end_rename; ··· 2918 2526 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2919 2527 cpu_to_le32(new_dir->i_ino); 2920 2528 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2921 - retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); 2529 + retval = ext4_handle_dirty_dirent_node(handle, old_inode, 2530 + dir_bh); 2922 2531 if (retval) { 2923 2532 ext4_std_error(old_dir->i_sb, retval); 2924 2533 goto end_rename;
+63 -8
fs/ext4/resize.c
··· 161 161 if (flex_gd == NULL) 162 162 goto out3; 163 163 164 + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) 165 + goto out2; 164 166 flex_gd->count = flexbg_size; 165 167 166 168 flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * ··· 798 796 ext4_kvfree(o_group_desc); 799 797 800 798 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 801 - err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 799 + err = ext4_handle_dirty_super_now(handle, sb); 802 800 if (err) 803 801 ext4_std_error(sb, err); 804 802 ··· 970 968 goto exit_err; 971 969 } 972 970 971 + ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); 972 + 973 973 while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { 974 974 struct buffer_head *bh; 975 975 ··· 1071 1067 return err; 1072 1068 } 1073 1069 1070 + static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) 1071 + { 1072 + struct buffer_head *bh = sb_getblk(sb, block); 1073 + if (!bh) 1074 + return NULL; 1075 + 1076 + if (bitmap_uptodate(bh)) 1077 + return bh; 1078 + 1079 + lock_buffer(bh); 1080 + if (bh_submit_read(bh) < 0) { 1081 + unlock_buffer(bh); 1082 + brelse(bh); 1083 + return NULL; 1084 + } 1085 + unlock_buffer(bh); 1086 + 1087 + return bh; 1088 + } 1089 + 1090 + static int ext4_set_bitmap_checksums(struct super_block *sb, 1091 + ext4_group_t group, 1092 + struct ext4_group_desc *gdp, 1093 + struct ext4_new_group_data *group_data) 1094 + { 1095 + struct buffer_head *bh; 1096 + 1097 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 1098 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1099 + return 0; 1100 + 1101 + bh = ext4_get_bitmap(sb, group_data->inode_bitmap); 1102 + if (!bh) 1103 + return -EIO; 1104 + ext4_inode_bitmap_csum_set(sb, group, gdp, bh, 1105 + EXT4_INODES_PER_GROUP(sb) / 8); 1106 + brelse(bh); 1107 + 1108 + bh = ext4_get_bitmap(sb, group_data->block_bitmap); 1109 + if (!bh) 1110 + return -EIO; 1111 + ext4_block_bitmap_csum_set(sb, group, gdp, bh, 1112 + EXT4_BLOCKS_PER_GROUP(sb) / 8); 1113 + brelse(bh); 1114 + 1115 + return 0; 1116 + } 1117 + 1074 1118 /* 1075 1119 * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg 1076 1120 */ ··· 1145 1093 */ 1146 1094 gdb_bh = sbi->s_group_desc[gdb_num]; 1147 1095 /* Update group descriptor block for new group */ 1148 - gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + 1096 + gdp = (struct ext4_group_desc *)(gdb_bh->b_data + 1149 1097 gdb_off * EXT4_DESC_SIZE(sb)); 1150 1098 1151 1099 memset(gdp, 0, EXT4_DESC_SIZE(sb)); 1152 1100 ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); 1153 1101 ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); 1102 + err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); 1103 + if (err) { 1104 + ext4_std_error(sb, err); 1105 + break; 1106 + } 1107 + 1154 1108 ext4_inode_table_set(sb, gdp, group_data->inode_table); 1155 1109 ext4_free_group_clusters_set(sb, gdp, 1156 1110 EXT4_B2C(sbi, group_data->free_blocks_count)); 1157 1111 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1158 1112 gdp->bg_flags = cpu_to_le16(*bg_flags); 1159 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 1113 + ext4_group_desc_csum_set(sb, group, gdp); 1160 1114 1161 1115 err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); 1162 1116 if (unlikely(err)) { ··· 1401 1343 (1 + ext4_bg_num_gdb(sb, group + i) + 1402 1344 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 1403 1345 group_data[i].free_blocks_count = blocks_per_group - overhead; 1404 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1405 - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 1346 + if (ext4_has_group_desc_csum(sb)) 1406 1347 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1407 1348 EXT4_BG_INODE_UNINIT; 1408 1349 else 1409 1350 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; 1410 1351 } 1411 1352 1412 - if (last_group == n_group && 1413 - EXT4_HAS_RO_COMPAT_FEATURE(sb, 1414 - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 1353 + if (last_group == n_group && ext4_has_group_desc_csum(sb)) 1415 1354 /* We need to initialize block bitmap of last group. */ 1416 1355 flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; 1417 1356
+209 -42
fs/ext4/super.c
··· 112 112 #define IS_EXT3_SB(sb) (0) 113 113 #endif 114 114 115 + static int ext4_verify_csum_type(struct super_block *sb, 116 + struct ext4_super_block *es) 117 + { 118 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 119 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 120 + return 1; 121 + 122 + return es->s_checksum_type == EXT4_CRC32C_CHKSUM; 123 + } 124 + 125 + static __le32 ext4_superblock_csum(struct super_block *sb, 126 + struct ext4_super_block *es) 127 + { 128 + struct ext4_sb_info *sbi = EXT4_SB(sb); 129 + int offset = offsetof(struct ext4_super_block, s_checksum); 130 + __u32 csum; 131 + 132 + csum = ext4_chksum(sbi, ~0, (char *)es, offset); 133 + 134 + return cpu_to_le32(csum); 135 + } 136 + 137 + int ext4_superblock_csum_verify(struct super_block *sb, 138 + struct ext4_super_block *es) 139 + { 140 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 141 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 142 + return 1; 143 + 144 + return es->s_checksum == ext4_superblock_csum(sb, es); 145 + } 146 + 147 + void ext4_superblock_csum_set(struct super_block *sb, 148 + struct ext4_super_block *es) 149 + { 150 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 151 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 152 + return; 153 + 154 + es->s_checksum = ext4_superblock_csum(sb, es); 155 + } 156 + 115 157 void *ext4_kvmalloc(size_t size, gfp_t flags) 116 158 { 117 159 void *ret; ··· 539 497 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 540 498 sb->s_id, function, line, current->comm, &vaf); 541 499 va_end(args); 500 + save_error_info(sb, function, line); 542 501 543 502 ext4_handle_error(sb); 544 503 } ··· 948 905 unlock_super(sb); 949 906 kobject_put(&sbi->s_kobj); 950 907 wait_for_completion(&sbi->s_kobj_unregister); 908 + if (sbi->s_chksum_driver) 909 + crypto_free_shash(sbi->s_chksum_driver); 951 910 kfree(sbi->s_blockgroup_lock); 952 911 kfree(sbi); 953 912 } ··· 1967 1922 return 0; 1968 1923 } 1969 1924 1970 - __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1971 - struct ext4_group_desc *gdp) 1925 + static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, 1926 + struct ext4_group_desc *gdp) 1972 1927 { 1928 + int offset; 1973 1929 __u16 crc = 0; 1930 + __le32 le_group = cpu_to_le32(block_group); 1974 1931 1975 - if (sbi->s_es->s_feature_ro_compat & 1976 - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { 1977 - int offset = offsetof(struct ext4_group_desc, bg_checksum); 1978 - __le32 le_group = cpu_to_le32(block_group); 1932 + if ((sbi->s_es->s_feature_ro_compat & 1933 + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { 1934 + /* Use new metadata_csum algorithm */ 1935 + __u16 old_csum; 1936 + __u32 csum32; 1979 1937 1980 - crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1981 - crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1982 - crc = crc16(crc, (__u8 *)gdp, offset); 1983 - offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1984 - /* for checksum of struct ext4_group_desc do the rest...*/ 1985 - if ((sbi->s_es->s_feature_incompat & 1986 - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1987 - offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1988 - crc = crc16(crc, (__u8 *)gdp + offset, 1989 - le16_to_cpu(sbi->s_es->s_desc_size) - 1990 - offset); 1938 + old_csum = gdp->bg_checksum; 1939 + gdp->bg_checksum = 0; 1940 + csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, 1941 + sizeof(le_group)); 1942 + csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, 1943 + sbi->s_desc_size); 1944 + gdp->bg_checksum = old_csum; 1945 + 1946 + crc = csum32 & 0xFFFF; 1947 + goto out; 1991 1948 } 1992 1949 1950 + /* old crc16 code */ 1951 + offset = offsetof(struct ext4_group_desc, bg_checksum); 1952 + 1953 + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 1954 + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); 1955 + crc = crc16(crc, (__u8 *)gdp, offset); 1956 + offset += sizeof(gdp->bg_checksum); /* skip checksum */ 1957 + /* for checksum of struct ext4_group_desc do the rest...*/ 1958 + if ((sbi->s_es->s_feature_incompat & 1959 + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && 1960 + offset < le16_to_cpu(sbi->s_es->s_desc_size)) 1961 + crc = crc16(crc, (__u8 *)gdp + offset, 1962 + le16_to_cpu(sbi->s_es->s_desc_size) - 1963 + offset); 1964 + 1965 + out: 1993 1966 return cpu_to_le16(crc); 1994 1967 } 1995 1968 1996 - int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, 1969 + int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, 1997 1970 struct ext4_group_desc *gdp) 1998 1971 { 1999 - if ((sbi->s_es->s_feature_ro_compat & 2000 - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && 2001 - (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) 1972 + if (ext4_has_group_desc_csum(sb) && 1973 + (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), 1974 + block_group, gdp))) 2002 1975 return 0; 2003 1976 2004 1977 return 1; 1978 + } 1979 + 1980 + void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, 1981 + struct ext4_group_desc *gdp) 1982 + { 1983 + if (!ext4_has_group_desc_csum(sb)) 1984 + return; 1985 + gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); 2005 1986 } 2006 1987 2007 1988 /* Called at mount-time, super-block is locked */ ··· 2084 2013 return 0; 2085 2014 } 2086 2015 ext4_lock_group(sb, i); 2087 - if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 2016 + if (!ext4_group_desc_csum_verify(sb, i, gdp)) { 2088 2017 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " 2089 2018 "Checksum for group %u failed (%u!=%u)", 2090 2019 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, ··· 2488 2417 return count; 2489 2418 } 2490 2419 2420 + static ssize_t trigger_test_error(struct ext4_attr *a, 2421 + struct ext4_sb_info *sbi, 2422 + const char *buf, size_t count) 2423 + { 2424 + int len = count; 2425 + 2426 + if (!capable(CAP_SYS_ADMIN)) 2427 + return -EPERM; 2428 + 2429 + if (len && buf[len-1] == '\n') 2430 + len--; 2431 + 2432 + if (len) 2433 + ext4_error(sbi->s_sb, "%.*s", len, buf); 2434 + return count; 2435 + } 2436 + 2491 2437 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ 2492 2438 static struct ext4_attr ext4_attr_##_name = { \ 2493 2439 .attr = {.name = __stringify(_name), .mode = _mode }, \ ··· 2535 2447 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2536 2448 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2537 2449 EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2450 + EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2538 2451 2539 2452 static struct attribute *ext4_attrs[] = { 2540 2453 ATTR_LIST(delayed_allocation_blocks), ··· 2550 2461 ATTR_LIST(mb_stream_req), 2551 2462 ATTR_LIST(mb_group_prealloc), 2552 2463 ATTR_LIST(max_writeback_mb_bump), 2464 + ATTR_LIST(trigger_fs_error), 2553 2465 NULL, 2554 2466 }; 2555 2467 ··· 3047 2957 kthread_stop(ext4_lazyinit_task); 3048 2958 } 3049 2959 2960 + static int set_journal_csum_feature_set(struct super_block *sb) 2961 + { 2962 + int ret = 1; 2963 + int compat, incompat; 2964 + struct ext4_sb_info *sbi = EXT4_SB(sb); 2965 + 2966 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2967 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 2968 + /* journal checksum v2 */ 2969 + compat = 0; 2970 + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 2971 + } else { 2972 + /* journal checksum v1 */ 2973 + compat = JBD2_FEATURE_COMPAT_CHECKSUM; 2974 + incompat = 0; 2975 + } 2976 + 2977 + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 2978 + ret = jbd2_journal_set_features(sbi->s_journal, 2979 + compat, 0, 2980 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 2981 + incompat); 2982 + } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 2983 + ret = jbd2_journal_set_features(sbi->s_journal, 2984 + compat, 0, 2985 + incompat); 2986 + jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2987 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2988 + } else { 2989 + jbd2_journal_clear_features(sbi->s_journal, 2990 + JBD2_FEATURE_COMPAT_CHECKSUM, 0, 2991 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 2992 + JBD2_FEATURE_INCOMPAT_CSUM_V2); 2993 + } 2994 + 2995 + return ret; 2996 + } 2997 + 3050 2998 static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3051 2999 { 3052 3000 char *orig_data = kstrdup(data, GFP_KERNEL); ··· 3121 2993 goto out_free_orig; 3122 2994 } 3123 2995 sb->s_fs_info = sbi; 2996 + sbi->s_sb = sb; 3124 2997 sbi->s_mount_opt = 0; 3125 2998 sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); 3126 2999 sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); ··· 3161 3032 * Note: s_es must be initialized as soon as possible because 3162 3033 * some ext4 macro-instructions depend on its value 3163 3034 */ 3164 - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3035 + es = (struct ext4_super_block *) (bh->b_data + offset); 3165 3036 sbi->s_es = es; 3166 3037 sb->s_magic = le16_to_cpu(es->s_magic); 3167 3038 if (sb->s_magic != EXT4_SUPER_MAGIC) 3168 3039 goto cantfind_ext4; 3169 3040 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); 3041 + 3042 + /* Warn if metadata_csum and gdt_csum are both set. */ 3043 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3044 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 3045 + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) 3046 + ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " 3047 + "redundant flags; please run fsck."); 3048 + 3049 + /* Check for a known checksum algorithm */ 3050 + if (!ext4_verify_csum_type(sb, es)) { 3051 + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3052 + "unknown checksum algorithm."); 3053 + silent = 1; 3054 + goto cantfind_ext4; 3055 + } 3056 + 3057 + /* Load the checksum driver */ 3058 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3059 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3060 + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 3061 + if (IS_ERR(sbi->s_chksum_driver)) { 3062 + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); 3063 + ret = PTR_ERR(sbi->s_chksum_driver); 3064 + sbi->s_chksum_driver = NULL; 3065 + goto failed_mount; 3066 + } 3067 + } 3068 + 3069 + /* Check superblock checksum */ 3070 + if (!ext4_superblock_csum_verify(sb, es)) { 3071 + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " 3072 + "invalid superblock checksum. Run e2fsck?"); 3073 + silent = 1; 3074 + goto cantfind_ext4; 3075 + } 3076 + 3077 + /* Precompute checksum seed for all metadata */ 3078 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3079 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 3080 + sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3081 + sizeof(es->s_uuid)); 3170 3082 3171 3083 /* Set defaults before we parse the mount options */ 3172 3084 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); ··· 3370 3200 "Can't read superblock on 2nd try"); 3371 3201 goto failed_mount; 3372 3202 } 3373 - es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 3203 + es = (struct ext4_super_block *)(bh->b_data + offset); 3374 3204 sbi->s_es = es; 3375 3205 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 3376 3206 ext4_msg(sb, KERN_ERR, ··· 3562 3392 GFP_KERNEL); 3563 3393 if (sbi->s_group_desc == NULL) { 3564 3394 ext4_msg(sb, KERN_ERR, "not enough memory"); 3395 + ret = -ENOMEM; 3565 3396 goto failed_mount; 3566 3397 } 3567 3398 ··· 3620 3449 } 3621 3450 if (err) { 3622 3451 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3452 + ret = err; 3623 3453 goto failed_mount3; 3624 3454 } 3625 3455 ··· 3678 3506 goto no_journal; 3679 3507 } 3680 3508 3681 - if (ext4_blocks_count(es) > 0xffffffffULL && 3509 + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && 3682 3510 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, 3683 3511 JBD2_FEATURE_INCOMPAT_64BIT)) { 3684 3512 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); 3685 3513 goto failed_mount_wq; 3686 3514 } 3687 3515 3688 - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3689 - jbd2_journal_set_features(sbi->s_journal, 3690 - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3691 - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3692 - } else if (test_opt(sb, JOURNAL_CHECKSUM)) { 3693 - jbd2_journal_set_features(sbi->s_journal, 3694 - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 3695 - jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3696 - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3697 - } else { 3698 - jbd2_journal_clear_features(sbi->s_journal, 3699 - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3700 - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3516 + if (!set_journal_csum_feature_set(sb)) { 3517 + ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " 3518 + "feature set"); 3519 + goto failed_mount_wq; 3701 3520 } 3702 3521 3703 3522 /* We have now updated the journal if required, so we can ··· 3769 3606 goto failed_mount4; 3770 3607 } 3771 3608 3772 - ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 3609 + if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) 3610 + sb->s_flags |= MS_RDONLY; 3773 3611 3774 3612 /* determine the minimum size of new large inodes, if present */ 3775 3613 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { ··· 3805 3641 } 3806 3642 3807 3643 ext4_ext_init(sb); 3808 - err = ext4_mb_init(sb, needs_recovery); 3644 + err = ext4_mb_init(sb); 3809 3645 if (err) { 3810 3646 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3811 3647 err); ··· 3888 3724 brelse(sbi->s_group_desc[i]); 3889 3725 ext4_kvfree(sbi->s_group_desc); 3890 3726 failed_mount: 3727 + if (sbi->s_chksum_driver) 3728 + crypto_free_shash(sbi->s_chksum_driver); 3891 3729 if (sbi->s_proc) { 3892 3730 remove_proc_entry("options", sbi->s_proc); 3893 3731 remove_proc_entry(sb->s_id, ext4_proc_root); ··· 4013 3847 goto out_bdev; 4014 3848 } 4015 3849 4016 - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); 3850 + es = (struct ext4_super_block *) (bh->b_data + offset); 4017 3851 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || 4018 3852 !(le32_to_cpu(es->s_feature_incompat) & 4019 3853 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { ··· 4205 4039 &EXT4_SB(sb)->s_freeinodes_counter)); 4206 4040 sb->s_dirt = 0; 4207 4041 BUFFER_TRACE(sbh, "marking dirty"); 4042 + ext4_superblock_csum_set(sb, es); 4208 4043 mark_buffer_dirty(sbh); 4209 4044 if (sync) { 4210 4045 error = sync_dirty_buffer(sbh); ··· 4500 4333 struct ext4_group_desc *gdp = 4501 4334 ext4_get_group_desc(sb, g, NULL); 4502 4335 4503 - if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { 4336 + if (!ext4_group_desc_csum_verify(sb, g, gdp)) { 4504 4337 ext4_msg(sb, KERN_ERR, 4505 4338 "ext4_remount: Checksum for group %u failed (%u!=%u)", 4506 4339 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+77 -15
fs/ext4/xattr.c
··· 122 122 NULL 123 123 }; 124 124 125 + static __le32 ext4_xattr_block_csum(struct inode *inode, 126 + sector_t block_nr, 127 + struct ext4_xattr_header *hdr) 128 + { 129 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 130 + struct ext4_inode_info *ei = EXT4_I(inode); 131 + __u32 csum, old; 132 + 133 + old = hdr->h_checksum; 134 + hdr->h_checksum = 0; 135 + if (le32_to_cpu(hdr->h_refcount) != 1) { 136 + block_nr = cpu_to_le64(block_nr); 137 + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, 138 + sizeof(block_nr)); 139 + } else 140 + csum = ei->i_csum_seed; 141 + csum = ext4_chksum(sbi, csum, (__u8 *)hdr, 142 + EXT4_BLOCK_SIZE(inode->i_sb)); 143 + hdr->h_checksum = old; 144 + return cpu_to_le32(csum); 145 + } 146 + 147 + static int ext4_xattr_block_csum_verify(struct inode *inode, 148 + sector_t block_nr, 149 + struct ext4_xattr_header *hdr) 150 + { 151 + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 152 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && 153 + (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) 154 + return 0; 155 + return 1; 156 + } 157 + 158 + static void ext4_xattr_block_csum_set(struct inode *inode, 159 + sector_t block_nr, 160 + struct ext4_xattr_header *hdr) 161 + { 162 + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 163 + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 164 + return; 165 + 166 + hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); 167 + } 168 + 169 + static inline int ext4_handle_dirty_xattr_block(handle_t *handle, 170 + struct inode *inode, 171 + struct buffer_head *bh) 172 + { 173 + ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh)); 174 + return ext4_handle_dirty_metadata(handle, inode, bh); 175 + } 176 + 125 177 static inline const struct xattr_handler * 126 178 ext4_xattr_handler(int name_index) 127 179 { ··· 208 156 } 209 157 210 158 static inline int 211 - ext4_xattr_check_block(struct buffer_head *bh) 159 + ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) 212 160 { 161 + int error; 162 + 163 + if (buffer_verified(bh)) 164 + return 0; 165 + 213 166 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 214 167 BHDR(bh)->h_blocks != cpu_to_le32(1)) 215 168 return -EIO; 216 - return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 169 + if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) 170 + return -EIO; 171 + error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 172 + if (!error) 173 + set_buffer_verified(bh); 174 + return error; 217 175 } 218 176 219 177 static inline int ··· 286 224 goto cleanup; 287 225 ea_bdebug(bh, "b_count=%d, refcount=%d", 288 226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 289 - if (ext4_xattr_check_block(bh)) { 227 + if (ext4_xattr_check_block(inode, bh)) { 290 228 bad_block: 291 229 EXT4_ERROR_INODE(inode, "bad block %llu", 292 230 EXT4_I(inode)->i_file_acl); ··· 431 369 goto cleanup; 432 370 ea_bdebug(bh, "b_count=%d, refcount=%d", 433 371 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 434 - if (ext4_xattr_check_block(bh)) { 372 + if (ext4_xattr_check_block(inode, bh)) { 435 373 EXT4_ERROR_INODE(inode, "bad block %llu", 436 374 EXT4_I(inode)->i_file_acl); 437 375 error = -EIO; ··· 554 492 if (ce) 555 493 mb_cache_entry_release(ce); 556 494 unlock_buffer(bh); 557 - error = ext4_handle_dirty_metadata(handle, inode, bh); 495 + error = ext4_handle_dirty_xattr_block(handle, inode, bh); 558 496 if (IS_SYNC(inode)) 559 497 ext4_handle_sync(handle); 560 498 dquot_free_block(inode, 1); ··· 724 662 ea_bdebug(bs->bh, "b_count=%d, refcount=%d", 725 663 atomic_read(&(bs->bh->b_count)), 726 664 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 727 - if (ext4_xattr_check_block(bs->bh)) { 665 + if (ext4_xattr_check_block(inode, bs->bh)) { 728 666 EXT4_ERROR_INODE(inode, "bad block %llu", 729 667 EXT4_I(inode)->i_file_acl); 730 668 error = -EIO; ··· 787 725 if (error == -EIO) 788 726 goto bad_block; 789 727 if (!error) 790 - error = ext4_handle_dirty_metadata(handle, 791 - inode, 792 - bs->bh); 728 + error = ext4_handle_dirty_xattr_block(handle, 729 + inode, 730 + bs->bh); 793 731 if (error) 794 732 goto cleanup; 795 733 goto inserted; ··· 858 796 ea_bdebug(new_bh, "reusing; refcount now=%d", 859 797 le32_to_cpu(BHDR(new_bh)->h_refcount)); 860 798 unlock_buffer(new_bh); 861 - error = ext4_handle_dirty_metadata(handle, 862 - inode, 863 - new_bh); 799 + error = ext4_handle_dirty_xattr_block(handle, 800 + inode, 801 + new_bh); 864 802 if (error) 865 803 goto cleanup_dquot; 866 804 } ··· 917 855 set_buffer_uptodate(new_bh); 918 856 unlock_buffer(new_bh); 919 857 ext4_xattr_cache_insert(new_bh); 920 - error = ext4_handle_dirty_metadata(handle, 921 - inode, new_bh); 858 + error = ext4_handle_dirty_xattr_block(handle, 859 + inode, new_bh); 922 860 if (error) 923 861 goto cleanup; 924 862 } ··· 1255 1193 error = -EIO; 1256 1194 if (!bh) 1257 1195 goto cleanup; 1258 - if (ext4_xattr_check_block(bh)) { 1196 + if (ext4_xattr_check_block(inode, bh)) { 1259 1197 EXT4_ERROR_INODE(inode, "bad block %llu", 1260 1198 EXT4_I(inode)->i_file_acl); 1261 1199 error = -EIO;
+3 -1
fs/ext4/xattr.h
··· 27 27 __le32 h_refcount; /* reference count */ 28 28 __le32 h_blocks; /* number of disk blocks used */ 29 29 __le32 h_hash; /* hash value of all attributes */ 30 - __u32 h_reserved[4]; /* zero right now */ 30 + __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ 31 + /* id = inum if refcount=1, blknum otherwise */ 32 + __u32 h_reserved[3]; /* zero right now */ 31 33 }; 32 34 33 35 struct ext4_xattr_ibody_header {
+2
fs/jbd2/Kconfig
··· 1 1 config JBD2 2 2 tristate 3 3 select CRC32 4 + select CRYPTO 5 + select CRYPTO_CRC32C 4 6 help 5 7 This is a generic journaling layer for block devices that support 6 8 both 32-bit and 64-bit block numbers. It is currently used by
+67 -3
fs/jbd2/commit.c
··· 85 85 __brelse(bh); 86 86 } 87 87 88 + static void jbd2_commit_block_csum_set(journal_t *j, 89 + struct journal_head *descriptor) 90 + { 91 + struct commit_header *h; 92 + __u32 csum; 93 + 94 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 95 + return; 96 + 97 + h = (struct commit_header *)(jh2bh(descriptor)->b_data); 98 + h->h_chksum_type = 0; 99 + h->h_chksum_size = 0; 100 + h->h_chksum[0] = 0; 101 + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 102 + j->j_blocksize); 103 + h->h_chksum[0] = cpu_to_be32(csum); 104 + } 105 + 88 106 /* 89 107 * Done it all: now submit the commit record. We should have 90 108 * cleaned up our previous buffers by now, so if we are in abort ··· 146 128 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; 147 129 tmp->h_chksum[0] = cpu_to_be32(crc32_sum); 148 130 } 131 + jbd2_commit_block_csum_set(journal, descriptor); 149 132 150 133 JBUFFER_TRACE(descriptor, "submit commit block"); 151 134 lock_buffer(bh); ··· 320 301 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 321 302 } 322 303 304 + static void jbd2_descr_block_csum_set(journal_t *j, 305 + struct journal_head *descriptor) 306 + { 307 + struct jbd2_journal_block_tail *tail; 308 + __u32 csum; 309 + 310 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 311 + return; 312 + 313 + tail = (struct jbd2_journal_block_tail *) 314 + (jh2bh(descriptor)->b_data + j->j_blocksize - 315 + sizeof(struct jbd2_journal_block_tail)); 316 + tail->t_checksum = 0; 317 + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 318 + j->j_blocksize); 319 + tail->t_checksum = cpu_to_be32(csum); 320 + } 321 + 322 + static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, 323 + struct buffer_head *bh, __u32 sequence) 324 + { 325 + struct page *page = bh->b_page; 326 + __u8 *addr; 327 + __u32 csum; 328 + 329 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 + return; 331 + 332 + sequence = cpu_to_be32(sequence); 333 + addr = kmap_atomic(page, KM_USER0); 334 + csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 335 + sizeof(sequence)); 336 + csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), 337 + bh->b_size); 338 + kunmap_atomic(addr, KM_USER0); 339 + 340 + tag->t_checksum = cpu_to_be32(csum); 341 + } 323 342 /* 324 343 * jbd2_journal_commit_transaction 325 344 * ··· 391 334 unsigned long first_block; 392 335 tid_t first_tid; 393 336 int update_tail; 337 + int csum_size = 0; 338 + 339 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 340 + csum_size = sizeof(struct jbd2_journal_block_tail); 394 341 395 342 /* 396 343 * First job: lock down the current transaction and wait for ··· 688 627 689 628 tag = (journal_block_tag_t *) tagp; 690 629 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 691 - tag->t_flags = cpu_to_be32(tag_flag); 630 + tag->t_flags = cpu_to_be16(tag_flag); 631 + jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), 632 + commit_transaction->t_tid); 692 633 tagp += tag_bytes; 693 634 space_left -= tag_bytes; 694 635 ··· 706 643 707 644 if (bufs == journal->j_wbufsize || 708 645 commit_transaction->t_buffers == NULL || 709 - space_left < tag_bytes + 16) { 646 + space_left < tag_bytes + 16 + csum_size) { 710 647 711 648 jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); 712 649 ··· 714 651 submitting the IOs. "tag" still points to 715 652 the last tag we set up. */ 716 653 717 - tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); 654 + tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); 718 655 656 + jbd2_descr_block_csum_set(journal, descriptor); 719 657 start_journal_io: 720 658 for (i = 0; i < bufs; i++) { 721 659 struct buffer_head *bh = wbuf[i];
+130 -2
fs/jbd2/journal.c
··· 97 97 static void __journal_abort_soft (journal_t *journal, int errno); 98 98 static int jbd2_journal_create_slab(size_t slab_size); 99 99 100 + /* Checksumming functions */ 101 + int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 102 + { 103 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 104 + return 1; 105 + 106 + return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 107 + } 108 + 109 + static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 110 + { 111 + __u32 csum, old_csum; 112 + 113 + old_csum = sb->s_checksum; 114 + sb->s_checksum = 0; 115 + csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); 116 + sb->s_checksum = old_csum; 117 + 118 + return cpu_to_be32(csum); 119 + } 120 + 121 + int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 122 + { 123 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 124 + return 1; 125 + 126 + return sb->s_checksum == jbd2_superblock_csum(j, sb); 127 + } 128 + 129 + void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 130 + { 131 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 132 + return; 133 + 134 + sb->s_checksum = jbd2_superblock_csum(j, sb); 135 + } 136 + 100 137 /* 101 138 * Helper function used to manage commit timeouts 102 139 */ ··· 1385 1348 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", 1386 1349 journal->j_errno); 1387 1350 sb->s_errno = cpu_to_be32(journal->j_errno); 1351 + jbd2_superblock_csum_set(journal, sb); 1388 1352 read_unlock(&journal->j_state_lock); 1389 1353 1390 1354 jbd2_write_superblock(journal, WRITE_SYNC); ··· 1413 1375 goto out; 1414 1376 } 1415 1377 } 1378 + 1379 + if (buffer_verified(bh)) 1380 + return 0; 1416 1381 1417 1382 sb = journal->j_superblock; 1418 1383 ··· 1453 1412 be32_to_cpu(sb->s_first)); 1454 1413 goto out; 1455 1414 } 1415 + 1416 + if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1417 + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1418 + /* Can't have checksum v1 and v2 on at the same time! */ 1419 + printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " 1420 + "at the same time!\n"); 1421 + goto out; 1422 + } 1423 + 1424 + if (!jbd2_verify_csum_type(journal, sb)) { 1425 + printk(KERN_ERR "JBD: Unknown checksum type\n"); 1426 + goto out; 1427 + } 1428 + 1429 + /* Load the checksum driver */ 1430 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1431 + journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1432 + if (IS_ERR(journal->j_chksum_driver)) { 1433 + printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); 1434 + err = PTR_ERR(journal->j_chksum_driver); 1435 + journal->j_chksum_driver = NULL; 1436 + goto out; 1437 + } 1438 + } 1439 + 1440 + /* Check superblock checksum */ 1441 + if (!jbd2_superblock_csum_verify(journal, sb)) { 1442 + printk(KERN_ERR "JBD: journal checksum error\n"); 1443 + goto out; 1444 + } 1445 + 1446 + /* Precompute checksum seed for all metadata */ 1447 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1448 + journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1449 + sizeof(sb->s_uuid)); 1450 + 1451 + set_buffer_verified(bh); 1456 1452 1457 1453 return 0; 1458 1454 ··· 1642 1564 iput(journal->j_inode); 1643 1565 if (journal->j_revoke) 1644 1566 jbd2_journal_destroy_revoke(journal); 1567 + if (journal->j_chksum_driver) 1568 + crypto_free_shash(journal->j_chksum_driver); 1645 1569 kfree(journal->j_wbuf); 1646 1570 kfree(journal); 1647 1571 ··· 1733 1653 int jbd2_journal_set_features (journal_t *journal, unsigned long compat, 1734 1654 unsigned long ro, unsigned long incompat) 1735 1655 { 1656 + #define INCOMPAT_FEATURE_ON(f) \ 1657 + ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) 1658 + #define COMPAT_FEATURE_ON(f) \ 1659 + ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) 1736 1660 journal_superblock_t *sb; 1737 1661 1738 1662 if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) ··· 1745 1661 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1746 1662 return 0; 1747 1663 1664 + /* Asking for checksumming v2 and v1? Only give them v2. */ 1665 + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1666 + compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1667 + compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1668 + 1748 1669 jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 1749 1670 compat, ro, incompat); 1750 1671 1751 1672 sb = journal->j_superblock; 1673 + 1674 + /* If enabling v2 checksums, update superblock */ 1675 + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1676 + sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1677 + sb->s_feature_compat &= 1678 + ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1679 + 1680 + /* Load the checksum driver */ 1681 + if (journal->j_chksum_driver == NULL) { 1682 + journal->j_chksum_driver = crypto_alloc_shash("crc32c", 1683 + 0, 0); 1684 + if (IS_ERR(journal->j_chksum_driver)) { 1685 + printk(KERN_ERR "JBD: Cannot load crc32c " 1686 + "driver.\n"); 1687 + journal->j_chksum_driver = NULL; 1688 + return 0; 1689 + } 1690 + } 1691 + 1692 + /* Precompute checksum seed for all metadata */ 1693 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1694 + JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1695 + journal->j_csum_seed = jbd2_chksum(journal, ~0, 1696 + sb->s_uuid, 1697 + sizeof(sb->s_uuid)); 1698 + } 1699 + 1700 + /* If enabling v1 checksums, downgrade superblock */ 1701 + if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1702 + sb->s_feature_incompat &= 1703 + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1752 1704 1753 1705 sb->s_feature_compat |= cpu_to_be32(compat); 1754 1706 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1755 1707 sb->s_feature_incompat |= cpu_to_be32(incompat); 1756 1708 1757 1709 return 1; 1710 + #undef COMPAT_FEATURE_ON 1711 + #undef INCOMPAT_FEATURE_ON 1758 1712 } 1759 1713 1760 1714 /* ··· 2097 1975 */ 2098 1976 size_t journal_tag_bytes(journal_t *journal) 2099 1977 { 1978 + journal_block_tag_t tag; 1979 + size_t x = 0; 1980 + 1981 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1982 + x += sizeof(tag.t_checksum); 1983 + 2100 1984 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2101 - return JBD2_TAG_SIZE64; 1985 + return x + JBD2_TAG_SIZE64; 2102 1986 else 2103 - return JBD2_TAG_SIZE32; 1987 + return x + JBD2_TAG_SIZE32; 2104 1988 } 2105 1989 2106 1990 /*
+122 -4
fs/jbd2/recovery.c
··· 174 174 return 0; 175 175 } 176 176 177 + static int jbd2_descr_block_csum_verify(journal_t *j, 178 + void *buf) 179 + { 180 + struct jbd2_journal_block_tail *tail; 181 + __u32 provided, calculated; 182 + 183 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 + return 1; 185 + 186 + tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 187 + sizeof(struct jbd2_journal_block_tail)); 188 + provided = tail->t_checksum; 189 + tail->t_checksum = 0; 190 + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 191 + tail->t_checksum = provided; 192 + 193 + provided = be32_to_cpu(provided); 194 + return provided == calculated; 195 + } 177 196 178 197 /* 179 198 * Count the number of in-use tags in a journal descriptor block. ··· 205 186 int nr = 0, size = journal->j_blocksize; 206 187 int tag_bytes = journal_tag_bytes(journal); 207 188 189 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 190 + size -= sizeof(struct jbd2_journal_block_tail); 191 + 208 192 tagp = &bh->b_data[sizeof(journal_header_t)]; 209 193 210 194 while ((tagp - bh->b_data + tag_bytes) <= size) { ··· 215 193 216 194 nr++; 217 195 tagp += tag_bytes; 218 - if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) 196 + if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 219 197 tagp += 16; 220 198 221 - if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) 199 + if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 222 200 break; 223 201 } 224 202 ··· 375 353 return 0; 376 354 } 377 355 356 + static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 357 + { 358 + struct commit_header *h; 359 + __u32 provided, calculated; 360 + 361 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 362 + return 1; 363 + 364 + h = buf; 365 + provided = h->h_chksum[0]; 366 + h->h_chksum[0] = 0; 367 + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 368 + h->h_chksum[0] = provided; 369 + 370 + provided = be32_to_cpu(provided); 371 + return provided == calculated; 372 + } 373 + 374 + static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 375 + void *buf, __u32 sequence) 376 + { 377 + __u32 provided, calculated; 378 + 379 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 380 + return 1; 381 + 382 + sequence = cpu_to_be32(sequence); 383 + calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, 384 + sizeof(sequence)); 385 + calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); 386 + provided = be32_to_cpu(tag->t_checksum); 387 + 388 + return provided == cpu_to_be32(calculated); 389 + } 390 + 378 391 static int do_one_pass(journal_t *journal, 379 392 struct recovery_info *info, enum passtype pass) 380 393 { ··· 423 366 int blocktype; 424 367 int tag_bytes = journal_tag_bytes(journal); 425 368 __u32 crc32_sum = ~0; /* Transactional Checksums */ 369 + int descr_csum_size = 0; 426 370 427 371 /* 428 372 * First thing is to establish what we expect to find in the log ··· 509 451 510 452 switch(blocktype) { 511 453 case JBD2_DESCRIPTOR_BLOCK: 454 + /* Verify checksum first */ 455 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, 456 + JBD2_FEATURE_INCOMPAT_CSUM_V2)) 457 + descr_csum_size = 458 + sizeof(struct jbd2_journal_block_tail); 459 + if (descr_csum_size > 0 && 460 + !jbd2_descr_block_csum_verify(journal, 461 + bh->b_data)) { 462 + err = -EIO; 463 + goto failed; 464 + } 465 + 512 466 /* If it is a valid descriptor block, replay it 513 467 * in pass REPLAY; if journal_checksums enabled, then 514 468 * calculate checksums in PASS_SCAN, otherwise, ··· 551 481 552 482 tagp = &bh->b_data[sizeof(journal_header_t)]; 553 483 while ((tagp - bh->b_data + tag_bytes) 554 - <= journal->j_blocksize) { 484 + <= journal->j_blocksize - descr_csum_size) { 555 485 unsigned long io_block; 556 486 557 487 tag = (journal_block_tag_t *) tagp; 558 - flags = be32_to_cpu(tag->t_flags); 488 + flags = be16_to_cpu(tag->t_flags); 559 489 560 490 io_block = next_log_block++; 561 491 wrap(journal, next_log_block); ··· 584 514 brelse(obh); 585 515 ++info->nr_revoke_hits; 586 516 goto skip_write; 517 + } 518 + 519 + /* Look for block corruption */ 520 + if (!jbd2_block_tag_csum_verify( 521 + journal, tag, obh->b_data, 522 + be32_to_cpu(tmp->h_sequence))) { 523 + brelse(obh); 524 + success = -EIO; 525 + printk(KERN_ERR "JBD: Invalid " 526 + "checksum recovering " 527 + "block %llu in log\n", 528 + blocknr); 529 + continue; 587 530 } 588 531 589 532 /* Find a buffer for the new ··· 733 650 } 734 651 crc32_sum = ~0; 735 652 } 653 + if (pass == PASS_SCAN && 654 + !jbd2_commit_block_csum_verify(journal, 655 + bh->b_data)) { 656 + info->end_transaction = next_commit_ID; 657 + 658 + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 659 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 660 + journal->j_failed_commit = 661 + next_commit_ID; 662 + brelse(bh); 663 + break; 664 + } 665 + } 736 666 brelse(bh); 737 667 next_commit_ID++; 738 668 continue; ··· 802 706 return err; 803 707 } 804 708 709 + static int jbd2_revoke_block_csum_verify(journal_t *j, 710 + void *buf) 711 + { 712 + struct jbd2_journal_revoke_tail *tail; 713 + __u32 provided, calculated; 714 + 715 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 716 + return 1; 717 + 718 + tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - 719 + sizeof(struct jbd2_journal_revoke_tail)); 720 + provided = tail->r_checksum; 721 + tail->r_checksum = 0; 722 + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 723 + tail->r_checksum = provided; 724 + 725 + provided = be32_to_cpu(provided); 726 + return provided == calculated; 727 + } 805 728 806 729 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 807 730 ··· 834 719 header = (jbd2_journal_revoke_header_t *) bh->b_data; 835 720 offset = sizeof(jbd2_journal_revoke_header_t); 836 721 max = be32_to_cpu(header->r_count); 722 + 723 + if (!jbd2_revoke_block_csum_verify(journal, header)) 724 + return -EINVAL; 837 725 838 726 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 839 727 record_len = 8;
+26 -1
fs/jbd2/revoke.c
··· 578 578 struct jbd2_revoke_record_s *record, 579 579 int write_op) 580 580 { 581 + int csum_size = 0; 581 582 struct journal_head *descriptor; 582 583 int offset; 583 584 journal_header_t *header; ··· 593 592 descriptor = *descriptorp; 594 593 offset = *offsetp; 595 594 595 + /* Do we need to leave space at the end for a checksum? */ 596 + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 597 + csum_size = sizeof(struct jbd2_journal_revoke_tail); 598 + 596 599 /* Make sure we have a descriptor with space left for the record */ 597 600 if (descriptor) { 598 - if (offset == journal->j_blocksize) { 601 + if (offset >= journal->j_blocksize - csum_size) { 599 602 flush_descriptor(journal, descriptor, offset, write_op); 600 603 descriptor = NULL; 601 604 } ··· 636 631 *offsetp = offset; 637 632 } 638 633 634 + static void jbd2_revoke_csum_set(journal_t *j, 635 + struct journal_head *descriptor) 636 + { 637 + struct jbd2_journal_revoke_tail *tail; 638 + __u32 csum; 639 + 640 + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 641 + return; 642 + 643 + tail = (struct jbd2_journal_revoke_tail *) 644 + (jh2bh(descriptor)->b_data + j->j_blocksize - 645 + sizeof(struct jbd2_journal_revoke_tail)); 646 + tail->r_checksum = 0; 647 + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, 648 + j->j_blocksize); 649 + tail->r_checksum = cpu_to_be32(csum); 650 + } 651 + 639 652 /* 640 653 * Flush a revoke descriptor out to the journal. If we are aborting, 641 654 * this is a noop; otherwise we are generating a buffer which needs to ··· 675 652 676 653 header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; 677 654 header->r_count = cpu_to_be32(offset); 655 + jbd2_revoke_csum_set(journal, descriptor); 656 + 678 657 set_buffer_jwrite(bh); 679 658 BUFFER_TRACE(bh, "write"); 680 659 set_buffer_dirty(bh);
+2 -2
fs/jbd2/transaction.c
··· 162 162 163 163 alloc_transaction: 164 164 if (!journal->j_running_transaction) { 165 - new_transaction = kmem_cache_alloc(transaction_cache, 166 - gfp_mask | __GFP_ZERO); 165 + new_transaction = kmem_cache_zalloc(transaction_cache, 166 + gfp_mask); 167 167 if (!new_transaction) { 168 168 /* 169 169 * If __GFP_FS is not present, then we may be
+56 -3
include/linux/jbd2.h
··· 31 31 #include <linux/mutex.h> 32 32 #include <linux/timer.h> 33 33 #include <linux/slab.h> 34 + #include <crypto/hash.h> 34 35 #endif 35 36 36 37 #define journal_oom_retry 1 ··· 148 147 #define JBD2_CRC32_CHKSUM 1 149 148 #define JBD2_MD5_CHKSUM 2 150 149 #define JBD2_SHA1_CHKSUM 3 150 + #define JBD2_CRC32C_CHKSUM 4 151 151 152 152 #define JBD2_CRC32_CHKSUM_SIZE 4 153 153 154 154 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) 155 155 /* 156 156 * Commit block header for storing transactional checksums: 157 + * 158 + * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* 159 + * fields are used to store a checksum of the descriptor and data blocks. 160 + * 161 + * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum 162 + * field is used to store crc32c(uuid+commit_block). Each journal metadata 163 + * block gets its own checksum, and data block checksums are stored in 164 + * journal_block_tag (in the descriptor). The other h_chksum* fields are 165 + * not used. 166 + * 167 + * Checksum v1 and v2 are mutually exclusive features. 157 168 */ 158 169 struct commit_header { 159 170 __be32 h_magic; ··· 188 175 typedef struct journal_block_tag_s 189 176 { 190 177 __be32 t_blocknr; /* The on-disk block number */ 191 - __be32 t_flags; /* See below */ 178 + __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ 179 + __be16 t_flags; /* See below */ 192 180 __be32 t_blocknr_high; /* most-significant high 32bits. */ 193 181 } journal_block_tag_t; 194 182 195 183 #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) 196 184 #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) 185 + 186 + /* Tail of descriptor block, for checksumming */ 187 + struct jbd2_journal_block_tail { 188 + __be32 t_checksum; /* crc32c(uuid+descr_block) */ 189 + }; 197 190 198 191 /* 199 192 * The revoke descriptor: used on disk to describe a series of blocks to ··· 211 192 __be32 r_count; /* Count of bytes used in the block */ 212 193 } jbd2_journal_revoke_header_t; 213 194 195 + /* Tail of revoke block, for checksumming */ 196 + struct jbd2_journal_revoke_tail { 197 + __be32 r_checksum; /* crc32c(uuid+revoke_block) */ 198 + }; 214 199 215 200 /* Definitions for the journal tag flags word: */ 216 201 #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ ··· 264 241 __be32 s_max_trans_data; /* Limit of data blocks per trans. */ 265 242 266 243 /* 0x0050 */ 267 - __u32 s_padding[44]; 244 + __u8 s_checksum_type; /* checksum type */ 245 + __u8 s_padding2[3]; 246 + __u32 s_padding[42]; 247 + __be32 s_checksum; /* crc32c(superblock) */ 268 248 269 249 /* 0x0100 */ 270 250 __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ ··· 289 263 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 290 264 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 291 265 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 266 + #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 292 267 293 268 /* Features known to this kernel version: */ 294 269 #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM 295 270 #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 296 271 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ 297 272 JBD2_FEATURE_INCOMPAT_64BIT | \ 298 - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) 273 + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ 274 + JBD2_FEATURE_INCOMPAT_CSUM_V2) 299 275 300 276 #ifdef __KERNEL__ 301 277 ··· 967 939 * superblock pointer here 968 940 */ 969 941 void *j_private; 942 + 943 + /* Reference to checksum algorithm driver via cryptoapi */ 944 + struct crypto_shash *j_chksum_driver; 945 + 946 + /* Precomputed journal UUID checksum for seeding other checksums */ 947 + __u32 j_csum_seed; 970 948 }; 971 949 972 950 /* ··· 1301 1267 #define BJ_Types 7 1302 1268 1303 1269 extern int jbd_blocks_per_page(struct inode *inode); 1270 + 1271 + static inline u32 jbd2_chksum(journal_t *journal, u32 crc, 1272 + const void *address, unsigned int length) 1273 + { 1274 + struct { 1275 + struct shash_desc shash; 1276 + char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; 1277 + } desc; 1278 + int err; 1279 + 1280 + desc.shash.tfm = journal->j_chksum_driver; 1281 + desc.shash.flags = 0; 1282 + *(u32 *)desc.ctx = crc; 1283 + 1284 + err = crypto_shash_update(&desc.shash, address, length); 1285 + BUG_ON(err); 1286 + 1287 + return *(u32 *)desc.ctx; 1288 + } 1304 1289 1305 1290 #ifdef __KERNEL__ 1306 1291
+2
include/linux/jbd_common.h
··· 12 12 BH_State, /* Pins most journal_head state */ 13 13 BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ 14 14 BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ 15 + BH_Verified, /* Metadata block has been verified ok */ 15 16 BH_JBDPrivateStart, /* First bit available for private use by FS */ 16 17 }; 17 18 ··· 25 24 BUFFER_FNS(RevokeValid, revokevalid) 26 25 TAS_BUFFER_FNS(RevokeValid, revokevalid) 27 26 BUFFER_FNS(Freed, freed) 27 + BUFFER_FNS(Verified, verified) 28 28 29 29 static inline struct buffer_head *jh2bh(struct journal_head *jh) 30 30 {