diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-01 10:12:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-01 10:12:15 -0700 |
commit | 4edebed86690eb8db9af3ab85baf4a34e73266cc (patch) | |
tree | 8ab144b08f490f239fa62be52470860c9311664d | |
parent | 51eab603f5c86dd1eae4c525df3e7f7eeab401d6 (diff) | |
parent | 5e44f8c374dc4f8eadf61cd18b2c0d46bc87c1b7 (diff) | |
download | blackbird-op-linux-4edebed86690eb8db9af3ab85baf4a34e73266cc.tar.gz blackbird-op-linux-4edebed86690eb8db9af3ab85baf4a34e73266cc.zip |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull Ext4 updates from Theodore Ts'o:
"The major new feature added in this update is Darrick J Wong's
metadata checksum feature, which adds crc32 checksums to ext4's
metadata fields.
There is also the usual set of cleanups and bug fixes."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (44 commits)
ext4: hole-punch use truncate_pagecache_range
jbd2: use kmem_cache_zalloc wrapper instead of flag
ext4: remove mb_groups before tearing down the buddy_cache
ext4: add ext4_mb_unload_buddy in the error path
ext4: don't trash state flags in EXT4_IOC_SETFLAGS
ext4: let getattr report the right blocks in delalloc+bigalloc
ext4: add missing save_error_info() to ext4_error()
ext4: add debugging trigger for ext4_error()
ext4: protect group inode free counting with group lock
ext4: use consistent ssize_t type in ext4_file_write()
ext4: fix format flag in ext4_ext_binsearch_idx()
ext4: cleanup in ext4_discard_allocated_blocks()
ext4: return ENOMEM when mounts fail due to lack of memory
ext4: remove redundundant "(char *) bh->b_data" casts
ext4: disallow hard-linked directory in ext4_lookup
ext4: fix potential integer overflow in alloc_flex_gd()
ext4: remove needs_recovery in ext4_mb_init()
ext4: force ro mount if ext4_setup_super() fails
ext4: fix potential NULL dereference in ext4_free_inodes_counts()
ext4/jbd2: add metadata checksumming to the list of supported features
...
-rw-r--r-- | fs/ext4/Kconfig | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 41 | ||||
-rw-r--r-- | fs/ext4/bitmap.c | 83 | ||||
-rw-r--r-- | fs/ext4/dir.c | 12 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 130 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 24 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.c | 9 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 7 | ||||
-rw-r--r-- | fs/ext4/extents.c | 91 | ||||
-rw-r--r-- | fs/ext4/file.c | 2 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 81 | ||||
-rw-r--r-- | fs/ext4/inode.c | 119 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 19 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 30 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 44 | ||||
-rw-r--r-- | fs/ext4/namei.c | 445 | ||||
-rw-r--r-- | fs/ext4/resize.c | 71 | ||||
-rw-r--r-- | fs/ext4/super.c | 253 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 92 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 4 | ||||
-rw-r--r-- | fs/jbd2/Kconfig | 2 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 70 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 132 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 126 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 27 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 4 | ||||
-rw-r--r-- | include/linux/jbd2.h | 59 | ||||
-rw-r--r-- | include/linux/jbd_common.h | 2 |
28 files changed, 1784 insertions, 197 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9ed1bb1f319f..c22f17021b6e 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -2,6 +2,8 @@ config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select JBD2 select CRC16 + select CRYPTO + select CRYPTO_CRC32C help This is the next generation of the ext3 filesystem. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c45c41129a35..99b6324290db 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -168,12 +168,14 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_group_clusters_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); return; } memset(bh->b_data, 0, sb->s_blocksize); @@ -210,6 +212,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, block_group, gdp); } /* Return the number of free blocks in a block group. It is used when @@ -276,9 +281,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } static int ext4_valid_block_bitmap(struct super_block *sb, - struct ext4_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) { ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; @@ -325,6 +330,23 @@ err_out: block_group, bitmap_blk); return 0; } + +void ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + if (buffer_verified(bh)) + return; + + ext4_lock_group(sb, block_group); + if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && + ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8)) + set_buffer_verified(bh); + ext4_unlock_group(sb, block_group); +} + /** * ext4_read_block_bitmap() * @sb: super block @@ -355,12 +377,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } if (bitmap_uptodate(bh)) - return bh; + goto verify; lock_buffer(bh); if (bitmap_uptodate(bh)) { unlock_buffer(bh); - return bh; + goto verify; } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { @@ -379,7 +401,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) */ set_bitmap_uptodate(bh); unlock_buffer(bh); - return bh; + goto verify; } /* * submit the buffer_head for reading @@ -390,6 +412,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) get_bh(bh); submit_bh(READ, bh); return bh; +verify: + ext4_validate_block_bitmap(sb, desc, block_group, bh); + return bh; } /* Returns 0 on success, 1 on error */ @@ -412,7 +437,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, } clear_buffer_new(bh); /* Panic or remount fs read-only if block bitmap is invalid */ - ext4_valid_block_bitmap(sb, desc, block_group, bh); + ext4_validate_block_bitmap(sb, desc, block_group, bh); return 0; } diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index fa3af81ac565..b319721da26a 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -29,3 +29,86 @@ unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) #endif /* EXT4FS_DEBUG */ +int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz) +{ + __u32 hi; + __u32 provided, calculated; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); + calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); + if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { + hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi); + provided |= (hi << 16); + } else + calculated &= 0xFFFF; + + return provided == calculated; +} + +void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz) +{ + __u32 csum; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); + gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); + if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) + gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); +} + +int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz) +{ + __u32 hi; + __u32 provided, calculated; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); + calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); + if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) { + hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi); + provided |= (hi << 16); + } else + calculated &= 0xFFFF; + + if (provided == calculated) + return 1; + + ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); + return 0; +} + +void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz) +{ + __u32 csum; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); + gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); + if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) + gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16); +} diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b86786202643..aa39e600d159 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -179,6 +179,18 @@ static int ext4_readdir(struct file *filp, continue; } + /* Check the checksum */ + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(inode, + (struct ext4_dir_entry *)bh->b_data)) { + EXT4_ERROR_FILE(filp, 0, "directory fails checksum " + "at offset %llu", + (unsigned long long)filp->f_pos); + filp->f_pos += sb->s_blocksize - offset; + continue; + } + set_buffer_verified(bh); + revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c21b1de51afb..cfc4e01b3c83 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -29,6 +29,7 @@ #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> +#include <crypto/hash.h> #ifdef __KERNEL__ #include <linux/compat.h> #endif @@ -298,7 +299,9 @@ struct ext4_group_desc __le16 bg_free_inodes_count_lo;/* Free inodes count */ __le16 bg_used_dirs_count_lo; /* Directories count */ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ - __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ + __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ + __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ __le16 bg_itable_unused_lo; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ @@ -308,9 +311,19 @@ struct ext4_group_desc __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ __le16 bg_used_dirs_count_hi; /* Directories count MSB */ __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ - __u32 bg_reserved2[3]; + __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ + __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ + __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ + __u32 bg_reserved; }; +#define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ + (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ + sizeof(__le16)) +#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ + (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ + sizeof(__le16)) + /* * Structure of a flex block group info */ @@ -650,7 +663,8 @@ struct ext4_inode { __le16 l_i_file_acl_high; __le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; + __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ + __le16 l_i_reserved; } linux2; struct { __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ @@ -666,7 +680,7 @@ struct ext4_inode { } masix2; } osd2; /* OS dependent 2 */ __le16 i_extra_isize; - __le16 i_pad1; + __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ @@ -768,7 +782,7 @@ do { \ #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high -#define i_reserved2 osd2.linux2.l_i_reserved2 +#define i_checksum_lo osd2.linux2.l_i_checksum_lo #elif defined(__GNU__) @@ -908,6 +922,9 @@ struct ext4_inode_info { */ tid_t i_sync_tid; tid_t i_datasync_tid; + + /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ + __u32 i_csum_seed; }; /* @@ -1001,6 +1018,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); #define EXT4_ERRORS_PANIC 3 /* Panic */ #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE +/* Metadata checksum algorithm codes */ +#define EXT4_CRC32C_CHKSUM 1 + /* * Structure of the super block */ @@ -1087,7 +1107,7 @@ struct ext4_super_block { __le64 s_mmp_block; /* Block for multi-mount protection */ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __u8 s_log_groups_per_flex; /* FLEX_BG group size */ - __u8 s_reserved_char_pad; + __u8 s_checksum_type; /* metadata checksum algorithm used */ __le16 s_reserved_pad; __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ __le32 s_snapshot_inum; /* Inode number of active snapshot */ @@ -1113,7 +1133,8 @@ struct ext4_super_block { __le32 s_usr_quota_inum; /* inode for tracking user quota */ __le32 s_grp_quota_inum; /* inode for tracking group quota */ __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ - __le32 s_reserved[109]; /* Padding to the end of the block */ + __le32 s_reserved[108]; /* Padding to the end of the block */ + __le32 s_checksum; /* crc32c(superblock) */ }; #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) @@ -1176,6 +1197,7 @@ struct ext4_sb_info { struct proc_dir_entry *s_proc; struct kobject s_kobj; struct completion s_kobj_unregister; + struct super_block *s_sb; /* Journaling */ struct journal_s *s_journal; @@ -1266,6 +1288,12 @@ struct ext4_sb_info { /* record the last minlen when FITRIM is called. */ atomic_t s_last_trim_minblks; + + /* Reference to checksum algorithm driver via cryptoapi */ + struct crypto_shash *s_chksum_driver; + + /* Precomputed FS UUID checksum for seeding other checksums */ + __u32 s_csum_seed; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1414,6 +1442,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 +/* + * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When + * METADATA_CSUM is set, group descriptor checksums use the same algorithm as + * all other data structures' checksums. However, the METADATA_CSUM and + * GDT_CSUM bits are mutually exclusive. + */ #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -1461,7 +1495,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ - EXT4_FEATURE_RO_COMPAT_BIGALLOC) + EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) /* * Default values for user and/or group using reserved blocks @@ -1527,6 +1562,18 @@ struct ext4_dir_entry_2 { }; /* + * This is a bogus directory entry at the end of each leaf block that + * records checksums. + */ +struct ext4_dir_entry_tail { + __le32 det_reserved_zero1; /* Pretend to be unused */ + __le16 det_rec_len; /* 12 */ + __u8 det_reserved_zero2; /* Zero name length */ + __u8 det_reserved_ft; /* 0xDE, fake file type */ + __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ +}; + +/* * Ext4 directory file types. Only the low 3 bits are used. The * other bits are reserved for now. */ @@ -1541,6 +1588,8 @@ struct ext4_dir_entry_2 { #define EXT4_FT_MAX 8 +#define EXT4_FT_DIR_CSUM 0xDE + /* * EXT4_DIR_PAD defines the directory entries boundaries * @@ -1609,6 +1658,25 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) #define DX_HASH_HALF_MD4_UNSIGNED 4 #define DX_HASH_TEA_UNSIGNED 5 +static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(sbi->s_chksum_driver)]; + } desc; + int err; + + desc.shash.tfm = sbi->s_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + #ifdef __KERNEL__ /* hash info structure used by the directory hash */ @@ -1741,7 +1809,8 @@ struct mmp_struct { __le16 mmp_check_interval; __le16 mmp_pad1; - __le32 mmp_pad2[227]; + __le32 mmp_pad2[226]; + __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ }; /* arguments passed to the mmp thread */ @@ -1784,8 +1853,24 @@ struct mmpd_data { /* bitmap.c */ extern unsigned int ext4_count_free(struct buffer_head *, unsigned); +void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz); +int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz); +void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz); +int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, + struct ext4_group_desc *gdp, + struct buffer_head *bh, int sz); /* balloc.c */ +extern void ext4_validate_block_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh); extern unsigned int ext4_block_group(struct super_block *sb, ext4_fsblk_t blocknr); extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, @@ -1864,7 +1949,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); /* mballoc.c */ extern long ext4_mb_stats; extern long ext4_mb_max_to_scan; -extern int ext4_mb_init(struct super_block *, int); +extern int ext4_mb_init(struct super_block *); extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); @@ -1936,6 +2021,8 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); extern int ext4_ext_migrate(struct inode *); /* namei.c */ +extern int ext4_dirent_csum_verify(struct inode *inode, + struct ext4_dir_entry *dirent); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, @@ -1950,6 +2037,10 @@ extern int ext4_group_extend(struct super_block *sb, extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ +extern int ext4_superblock_csum_verify(struct super_block *sb, + struct ext4_super_block *es); +extern void ext4_superblock_csum_set(struct super_block *sb, + struct ext4_super_block *es); extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvzalloc(size_t size, gfp_t flags); extern void ext4_kvfree(void *ptr); @@ -2025,10 +2116,17 @@ extern void ext4_used_dirs_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); extern void ext4_itable_unused_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); -extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, - struct ext4_group_desc *gdp); -extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, +extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, struct ext4_group_desc *gdp); +extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, + struct ext4_group_desc *gdp); + +static inline int ext4_has_group_desc_csum(struct super_block *sb) +{ + return EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM | + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); +} static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { @@ -2225,6 +2323,9 @@ static inline void ext4_unlock_group(struct super_block *sb, static inline void ext4_mark_super_dirty(struct super_block *sb) { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + ext4_superblock_csum_set(sb, es); if (EXT4_SB(sb)->s_journal == NULL) sb->s_dirt =1; } @@ -2314,6 +2415,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); +extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); +extern int ext4_mmp_csum_verify(struct super_block *sb, + struct mmp_struct *mmp); /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ enum ext4_state_bits { diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 0f58b86e3a02..cb1b2c919963 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -63,9 +63,22 @@ * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. + * For non-inode extent blocks, ext4_extent_tail + * follows the array. */ /* + * This is the extent tail on-disk structure. + * All other extent structures are 12 bytes long. It turns out that + * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which + * covers all valid ext4 block sizes. Therefore, this tail structure can be + * crammed into the end of the block without having to rebalance the tree. + */ +struct ext4_extent_tail { + __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ +}; + +/* * This is the extent on-disk structure. * It's used at the bottom of the tree. */ @@ -101,6 +114,17 @@ struct ext4_extent_header { #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) +#define EXT4_EXTENT_TAIL_OFFSET(hdr) \ + (sizeof(struct ext4_extent_header) + \ + (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) + +static inline struct ext4_extent_tail * +find_ext4_extent_tail(struct ext4_extent_header *eh) +{ + return (struct ext4_extent_tail *)(((void *)eh) + + EXT4_EXTENT_TAIL_OFFSET(eh)); +} + /* * Array of ext4_ext_path contains path to some extent. * Creation/lookup routines use it for traversal/splitting/etc. diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index aca179017582..90f7c2e84db1 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -138,16 +138,23 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb) + handle_t *handle, struct super_block *sb, + int now) { struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; if (ext4_handle_valid(handle)) { + ext4_superblock_csum_set(sb, + (struct ext4_super_block *)bh->b_data); err = jbd2_journal_dirty_metadata(handle, bh); if (err) ext4_journal_abort_handle(where, line, __func__, bh, handle, err); + } else if (now) { + ext4_superblock_csum_set(sb, + (struct ext4_super_block *)bh->b_data); + mark_buffer_dirty(bh); } else sb->s_dirt = 1; return err; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 83b20fcf9400..f440e8f1841f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -213,7 +213,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, struct buffer_head *bh); int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb); + handle_t *handle, struct super_block *sb, + int now); #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) @@ -225,8 +226,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) +#define ext4_handle_dirty_super_now(handle, sb) \ + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1) #define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) + __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0) handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index abcdeab67f52..91341ec6e06a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -52,6 +52,46 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +static __le32 ext4_extent_block_csum(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + __u32 csum; + + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, + EXT4_EXTENT_TAIL_OFFSET(eh)); + return cpu_to_le32(csum); +} + +static int ext4_extent_block_csum_verify(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent_tail *et; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + et = find_ext4_extent_tail(eh); + if (et->et_checksum != ext4_extent_block_csum(inode, eh)) + return 0; + return 1; +} + +static void ext4_extent_block_csum_set(struct inode *inode, + struct ext4_extent_header *eh) +{ + struct ext4_extent_tail *et; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + et = find_ext4_extent_tail(eh); + et->et_checksum = ext4_extent_block_csum(inode, eh); +} + static int ext4_split_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, @@ -117,6 +157,7 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, { int err; if (path->p_bh) { + ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); @@ -391,6 +432,12 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid extent entries"; goto corrupted; } + /* Verify checksum on non-root extent tree nodes */ + if (ext_depth(inode) != depth && + !ext4_extent_block_csum_verify(inode, eh)) { + error_msg = "extent tree corrupted"; + goto corrupted; + } return 0; corrupted: @@ -412,6 +459,26 @@ int ext4_ext_check_inode(struct inode *inode) return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); } +static int __ext4_ext_check_block(const char *function, unsigned int line, + struct inode *inode, + struct ext4_extent_header *eh, + int depth, + struct buffer_head *bh) +{ + int ret; + + if (buffer_verified(bh)) + return 0; + ret = ext4_ext_check(inode, eh, depth); + if (ret) + return ret; + set_buffer_verified(bh); + return ret; +} + +#define ext4_ext_check_block(inode, eh, depth, bh) \ + __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) + #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { @@ -536,7 +603,7 @@ ext4_ext_binsearch_idx(struct inode *inode, } path->p_idx = l - 1; - ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), + ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), ext4_idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH @@ -668,8 +735,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, i = depth; /* walk through the tree */ while (i) { - int need_to_validate = 0; - ext_debug("depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); @@ -688,8 +753,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, put_bh(bh); goto err; } - /* validate the extent entries */ - need_to_validate = 1; } eh = ext_block_hdr(bh); ppos++; @@ -703,7 +766,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_hdr = eh; i--; - if (need_to_validate && ext4_ext_check(inode, eh, i)) + if (ext4_ext_check_block(inode, eh, i, bh)) goto err; } @@ -914,6 +977,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, le16_add_cpu(&neh->eh_entries, m); } + ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -992,6 +1056,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, sizeof(struct ext4_extent_idx) * m); le16_add_cpu(&neh->eh_entries, m); } + ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1089,6 +1154,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, else neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; + ext4_extent_block_csum_set(inode, neh); set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1344,7 +1410,8 @@ got_index: return -EIO; eh = ext_block_hdr(bh); /* subtract from p_depth to get proper eh_depth */ - if (ext4_ext_check(inode, eh, path->p_depth - depth)) { + if (ext4_ext_check_block(inode, eh, + path->p_depth - depth, bh)) { put_bh(bh); return -EIO; } @@ -1357,7 +1424,7 @@ got_index: if (bh == NULL) return -EIO; eh = ext_block_hdr(bh); - if (ext4_ext_check(inode, eh, path->p_depth - depth)) { + if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) { put_bh(bh); return -EIO; } @@ -2644,8 +2711,8 @@ cont: err = -EIO; break; } - if (ext4_ext_check(inode, ext_block_hdr(bh), - depth - i - 1)) { + if (ext4_ext_check_block(inode, ext_block_hdr(bh), + depth - i - 1, bh)) { err = -EIO; break; } @@ -4722,8 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) /* Now release the pages */ if (last_page_offset > first_page_offset) { - truncate_inode_pages_range(mapping, first_page_offset, - last_page_offset-1); + truncate_pagecache_range(inode, first_page_offset, + last_page_offset - 1); } /* finish any pending end_io work */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index cb70f1812a70..8c7642a00054 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -95,7 +95,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int unaligned_aio = 0; - int ret; + ssize_t ret; /* * If we have encountered a bitmap-format file, the size limit diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9f9acac6c43f..d48e8b14928c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -70,24 +70,27 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp) { - struct ext4_sb_info *sbi = EXT4_SB(sb); - J_ASSERT_BH(bh, buffer_locked(bh)); /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_group_clusters_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); memset(bh->b_data, 0xff, sb->s_blocksize); + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, + EXT4_INODES_PER_GROUP(sb) / 8); return 0; } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, + EXT4_INODES_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, block_group, gdp); return EXT4_INODES_PER_GROUP(sb); } @@ -128,12 +131,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return NULL; } if (bitmap_uptodate(bh)) - return bh; + goto verify; lock_buffer(bh); if (bitmap_uptodate(bh)) { unlock_buffer(bh); - return bh; + goto verify; } ext4_lock_group(sb, block_group); @@ -141,6 +144,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; @@ -154,7 +158,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) */ set_bitmap_uptodate(bh); unlock_buffer(bh); - return bh; + goto verify; } /* * submit the buffer_head for reading @@ -171,6 +175,20 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } + +verify: + ext4_lock_group(sb, block_group); + if (!buffer_verified(bh) && + !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, + EXT4_INODES_PER_GROUP(sb) / 8)) { + ext4_unlock_group(sb, block_group); + put_bh(bh); + ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " + "inode_bitmap = %llu", block_group, bitmap_blk); + return NULL; + } + ext4_unlock_group(sb, block_group); + set_buffer_verified(bh); return bh; } @@ -276,7 +294,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ext4_used_dirs_set(sb, gdp, count); percpu_counter_dec(&sbi->s_dirs_counter); } - gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); + ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, + EXT4_INODES_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); percpu_counter_inc(&sbi->s_freeinodes_counter); @@ -488,10 +508,12 @@ fallback_retry: for (i = 0; i < ngroups; i++) { grp = (parent_group + i) % ngroups; desc = ext4_get_group_desc(sb, grp, NULL); - grp_free = ext4_free_inodes_count(sb, desc); - if (desc && grp_free && grp_free >= avefreei) { - *group = grp; - return 0; + if (desc) { + grp_free = ext4_free_inodes_count(sb, desc); + if (grp_free && grp_free >= avefreei) { + *group = grp; + return 0; + } } } @@ -709,7 +731,7 @@ repeat_in_this_group: got: /* We may have to initialize the block bitmap if it isn't already */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + if (ext4_has_group_desc_csum(sb) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { struct buffer_head *block_bitmap_bh; @@ -731,8 +753,11 @@ got: gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, - gdp); + ext4_block_bitmap_csum_set(sb, group, gdp, + block_bitmap_bh, + EXT4_BLOCKS_PER_GROUP(sb) / + 8); + ext4_group_desc_csum_set(sb, group, gdp); } ext4_unlock_group(sb, group); @@ -751,7 +776,7 @@ got: goto fail; /* Update the relevant bg descriptor fields */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (ext4_has_group_desc_csum(sb)) { int free; struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -772,7 +797,10 @@ got: ext4_itable_unused_set(sb, gdp, (EXT4_INODES_PER_GROUP(sb) - ino)); up_read(&grp->alloc_sem); + } else { + ext4_lock_group(sb, group); } + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); if (S_ISDIR(mode)) { ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); @@ -782,10 +810,12 @@ got: atomic_inc(&sbi->s_flex_groups[f].used_dirs); } } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); - ext4_unlock_group(sb, group); + if (ext4_has_group_desc_csum(sb)) { + ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, + EXT4_INODES_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, group, gdp); } + ext4_unlock_group(sb, group); BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); @@ -850,6 +880,19 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); + /* Precompute checksum seed for inode metadata */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + __u32 csum; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + __le32 inum = cpu_to_le32(inode->i_ino); + __le32 gen = cpu_to_le32(inode->i_generation); + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, + sizeof(inum)); + ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, + sizeof(gen)); + } + ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ ext4_set_inode_state(inode, EXT4_STATE_NEW); @@ -1140,7 +1183,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, skip_zeroout: ext4_lock_group(sb, group); gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_group_desc_csum_set(sb, group, gdp); ext4_unlock_group(sb, group); BUFFER_TRACE(group_desc_bh, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 07eaf565fdcb..02bc8cbe7281 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,6 +47,73 @@ #define MPAGE_DA_EXTENT_TAIL 0x01 +static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, + struct ext4_inode_info *ei) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + __u16 csum_lo; + __u16 csum_hi = 0; + __u32 csum; + + csum_lo = raw->i_checksum_lo; + raw->i_checksum_lo = 0; + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { + csum_hi = raw->i_checksum_hi; + raw->i_checksum_hi = 0; + } + + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, + EXT4_INODE_SIZE(inode->i_sb)); + + raw->i_checksum_lo = csum_lo; + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) + raw->i_checksum_hi = csum_hi; + + return csum; +} + +static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, + struct ext4_inode_info *ei) +{ + __u32 provided, calculated; + + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != + cpu_to_le32(EXT4_OS_LINUX) || + !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + provided = le16_to_cpu(raw->i_checksum_lo); + calculated = ext4_inode_csum(inode, raw, ei); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) + provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; + else + calculated &= 0xFFFF; + + return provided == calculated; +} + +static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, + struct ext4_inode_info *ei) +{ + __u32 csum; + + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != + cpu_to_le32(EXT4_OS_LINUX) || + !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + csum = ext4_inode_csum(inode, raw, ei); + raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) + raw->i_checksum_hi = cpu_to_le16(csum >> 16); +} + static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { @@ -3517,8 +3584,7 @@ make_io: b = table; end = b + EXT4_SB(sb)->s_inode_readahead_blks; num = EXT4_INODES_PER_GROUP(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + if (ext4_has_group_desc_csum(sb)) num -= ext4_itable_unused_count(sb, gdp); table += num / inodes_per_block; if (end > table) @@ -3646,6 +3712,39 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (ret < 0) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); + + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > + EXT4_INODE_SIZE(inode->i_sb)) { + EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); + ret = -EIO; + goto bad_inode; + } + } else + ei->i_extra_isize = 0; + + /* Precompute checksum seed for inode metadata */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + __u32 csum; + __le32 inum = cpu_to_le32(inode->i_ino); + __le32 gen = raw_inode->i_generation; + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, + sizeof(inum)); + ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, + sizeof(gen)); + } + + if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { + EXT4_ERROR_INODE(inode, "checksum invalid"); + ret = -EIO; + goto bad_inode; + } + inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); @@ -3725,12 +3824,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { - ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) { - ret = -EIO; - goto bad_inode; - } if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ ei->i_extra_isize = sizeof(struct ext4_inode) - @@ -3742,8 +3835,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) ext4_set_inode_state(inode, EXT4_STATE_XATTR); } - } else - ei->i_extra_isize = 0; + } EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); @@ -3942,7 +4034,7 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_super_now(handle, sb); } } raw_inode->i_generation = cpu_to_le32(inode->i_generation); @@ -3969,6 +4061,8 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } + ext4_inode_csum_set(inode, raw_inode, ei); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) @@ -4213,7 +4307,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * will return the blocks that include the delayed allocation * blocks for this file. */ - delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; + delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), + EXT4_I(inode)->i_reserved_data_blocks); stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; return 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6eee25591b81..8ad112ae0ade 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) handle_t *handle = NULL; int err, migrate = 0; struct ext4_iloc iloc; - unsigned int oldflags; + unsigned int oldflags, mask, i; unsigned int jflag; if (!inode_owner_or_capable(inode)) @@ -115,8 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) goto flags_err; - flags = flags & EXT4_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { + if (!(mask & EXT4_FL_USER_MODIFIABLE)) + continue; + if (mask & flags) + ext4_set_inode_flag(inode, i); + else + ext4_clear_inode_flag(inode, i); + } ei->i_flags = flags; ext4_set_inode_flags(inode); @@ -152,6 +158,13 @@ flags_out: if (!inode_owner_or_capable(inode)) return -EPERM; + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + ext4_warning(sb, "Setting inode version is not " + "supported with metadata_csum enabled."); + return -ENOTTY; + } + err = mnt_want_write_file(filp); if (err) return err; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99ab428bcfa0..1cd6994fc446 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -788,7 +788,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) int first_block; struct super_block *sb; struct buffer_head *bhs; - struct buffer_head **bh; + struct buffer_head **bh = NULL; struct inode *inode; char *data; char *bitmap; @@ -2375,7 +2375,7 @@ static int ext4_groupinfo_create_slab(size_t size) return 0; } -int ext4_mb_init(struct super_block *sb, int needs_recovery) +int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned i, j; @@ -2517,6 +2517,9 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); + if (sbi->s_proc) + remove_proc_entry("mb_groups", sbi->s_proc); + if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2564,8 +2567,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); return 0; } @@ -2797,7 +2798,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; ext4_free_group_clusters_set(sb, gdp, len); - gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); + ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); @@ -3071,13 +3074,9 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; - int len; - - if (pa && pa->pa_type == MB_INODE_PA) { - len = ac->ac_b_ex.fe_len; - pa->pa_free += len; - } + if (pa && pa->pa_type == MB_INODE_PA) + pa->pa_free += ac->ac_b_ex.fe_len; } /* @@ -4636,6 +4635,7 @@ do_more: */ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { + ext4_mb_unload_buddy(&e4b); err = -ENOMEM; goto error_return; } @@ -4659,7 +4659,9 @@ do_more: ret = ext4_free_group_clusters(sb, gdp) + count_clusters; ext4_free_group_clusters_set(sb, gdp, ret); - gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); + ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); @@ -4803,7 +4805,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, mb_free_blocks(NULL, &e4b, bit, count); blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); ext4_free_group_clusters_set(sb, desc, blk_free_count); - desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); + ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, EXT4_B2C(sbi, blocks_freed)); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index ed6548d89165..f99a1311e847 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -6,12 +6,45 @@ #include "ext4.h" +/* Checksumming functions */ +static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int offset = offsetof(struct mmp_struct, mmp_checksum); + __u32 csum; + + csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); + + return cpu_to_le32(csum); +} + +int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); +} + +void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); +} + /* * Write the MMP block using WRITE_SYNC to try to get the block on-disk * faster. */ -static int write_mmp_block(struct buffer_head *bh) +static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) { + struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); + + ext4_mmp_csum_set(sb, mmp); mark_buffer_dirty(bh); lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; @@ -59,7 +92,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, } mmp = (struct mmp_struct *)((*bh)->b_data); - if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || + !ext4_mmp_csum_verify(sb, mmp)) return -EINVAL; return 0; @@ -120,7 +154,7 @@ static int kmmpd(void *data) mmp->mmp_time = cpu_to_le64(get_seconds()); last_update_time = jiffies; - retval = write_mmp_block(bh); + retval = write_mmp_block(sb, bh); /* * Don't spew too many error messages. Print one every * (s_mmp_update_interval * 60) seconds. @@ -200,7 +234,7 @@ static int kmmpd(void *data) mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); mmp->mmp_time = cpu_to_le64(get_seconds()); - retval = write_mmp_block(bh); + retval = write_mmp_block(sb, bh); failed: kfree(data); @@ -299,7 +333,7 @@ skip: seq = mmp_new_seq(); mmp->mmp_seq = cpu_to_le32(seq); - retval = write_mmp_block(bh); + retval = write_mmp_block(sb, bh); if (retval) goto failed; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e2a3f4b0ff78..5845cd97bf8b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -145,6 +145,14 @@ struct dx_map_entry u16 size; }; +/* + * This goes at the end of each htree block. + */ +struct dx_tail { + u32 dt_reserved; + __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ +}; + static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); static inline unsigned dx_get_hash(struct dx_entry *entry); @@ -180,6 +188,230 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); +/* checksumming functions */ +#define EXT4_DIRENT_TAIL(block, blocksize) \ + ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ + ((blocksize) - \ + sizeof(struct ext4_dir_entry_tail)))) + +static void initialize_dirent_tail(struct ext4_dir_entry_tail *t, + unsigned int blocksize) +{ + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); + t->det_rec_len = ext4_rec_len_to_disk( + sizeof(struct ext4_dir_entry_tail), blocksize); + t->det_reserved_ft = EXT4_FT_DIR_CSUM; +} + +/* Walk through a dirent block to find a checksum "dirent" at the tail */ +static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, + struct ext4_dir_entry *de) +{ + struct ext4_dir_entry_tail *t; + +#ifdef PARANOID + struct ext4_dir_entry *d, *top; + + d = de; + top = (struct ext4_dir_entry *)(((void *)de) + + (EXT4_BLOCK_SIZE(inode->i_sb) - + sizeof(struct ext4_dir_entry_tail))); + while (d < top && d->rec_len) + d = (struct ext4_dir_entry *)(((void *)d) + + le16_to_cpu(d->rec_len)); + + if (d != top) + return NULL; + + t = (struct ext4_dir_entry_tail *)d; +#else + t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); +#endif + + if (t->det_reserved_zero1 || + le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || + t->det_reserved_zero2 || + t->det_reserved_ft != EXT4_FT_DIR_CSUM) + return NULL; + + return t; +} + +static __le32 ext4_dirent_csum(struct inode *inode, + struct ext4_dir_entry *dirent, int size) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); + __u32 csum; + + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); + return cpu_to_le32(csum); +} + +int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) +{ + struct ext4_dir_entry_tail *t; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + t = get_dirent_tail(inode, dirent); + if (!t) { + EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " + "leaf for checksum. Please run e2fsck -D."); + return 0; + } + + if (t->det_checksum != ext4_dirent_csum(inode, dirent, + (void *)t - (void *)dirent)) + return 0; + + return 1; +} + +static void ext4_dirent_csum_set(struct inode *inode, + struct ext4_dir_entry *dirent) +{ + struct ext4_dir_entry_tail *t; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + t = get_dirent_tail(inode, dirent); + if (!t) { + EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " + "leaf for checksum. Please run e2fsck -D."); + return; + } + + t->det_checksum = ext4_dirent_csum(inode, dirent, + (void *)t - (void *)dirent); +} + +static inline int ext4_handle_dirty_dirent_node(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) +{ + ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + return ext4_handle_dirty_metadata(handle, inode, bh); +} + +static struct dx_countlimit *get_dx_countlimit(struct inode *inode, + struct ext4_dir_entry *dirent, + int *offset) +{ + struct ext4_dir_entry *dp; + struct dx_root_info *root; + int count_offset; + + if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) + count_offset = 8; + else if (le16_to_cpu(dirent->rec_len) == 12) { + dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); + if (le16_to_cpu(dp->rec_len) != + EXT4_BLOCK_SIZE(inode->i_sb) - 12) + return NULL; + root = (struct dx_root_info *)(((void *)dp + 12)); + if (root->reserved_zero || + root->info_length != sizeof(struct dx_root_info)) + return NULL; + count_offset = 32; + } else + return NULL; + + if (offset) + *offset = count_offset; + return (struct dx_countlimit *)(((void *)dirent) + count_offset); +} + +static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, + int count_offset, int count, struct dx_tail *t) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); + __u32 csum, old_csum; + int size; + + size = count_offset + (count * sizeof(struct dx_entry)); + old_csum = t->dt_checksum; + t->dt_checksum = 0; + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); + csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); + t->dt_checksum = old_csum; + + return cpu_to_le32(csum); +} + +static int ext4_dx_csum_verify(struct inode *inode, + struct ext4_dir_entry *dirent) +{ + struct dx_countlimit *c; + struct dx_tail *t; + int count_offset, limit, count; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + c = get_dx_countlimit(inode, dirent, &count_offset); + if (!c) { + EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); + return 1; + } + limit = le16_to_cpu(c->limit); + count = le16_to_cpu(c->count); + if (count_offset + (limit * sizeof(struct dx_entry)) > + EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { + EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " + "tree checksum found. Run e2fsck -D."); + return 1; + } + t = (struct dx_tail *)(((struct dx_entry *)c) + limit); + + if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset, + count, t)) + return 0; + return 1; +} + +static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) +{ + struct dx_countlimit *c; + struct dx_tail *t; + int count_offset, limit, count; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + c = get_dx_countlimit(inode, dirent, &count_offset); + if (!c) { + EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); + return; + } + limit = le16_to_cpu(c->limit); + count = le16_to_cpu(c->count); + if (count_offset + (limit * sizeof(struct dx_entry)) > + EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { + EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " + "tree checksum. Run e2fsck -D."); + return; + } + t = (struct dx_tail *)(((struct dx_entry *)c) + limit); + + t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t); +} + +static inline int ext4_handle_dirty_dx_node(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) +{ + ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + return ext4_handle_dirty_metadata(handle, inode, bh); +} + /* * p is at least 6 bytes before the end of page */ @@ -239,12 +471,20 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; + + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit(struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); + + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -390,6 +630,15 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } + if (!buffer_verified(bh) && + !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) { + ext4_warning(dir->i_sb, "Root failed checksum"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + set_buffer_verified(bh); + entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); @@ -450,6 +699,17 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; + + if (!buffer_verified(bh) && + !ext4_dx_csum_verify(dir, + (struct ext4_dir_entry *)bh->b_data)) { + ext4_warning(dir->i_sb, "Node failed checksum"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + set_buffer_verified(bh); + if (dx_get_limit(entries) != dx_node_limit (dir)) { ext4_warning(dir->i_sb, "dx entry: limit != node limit"); @@ -549,6 +809,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 0, &err))) return err; /* Failure */ + + if (!buffer_verified(bh) && + !ext4_dx_csum_verify(dir, + (struct ext4_dir_entry *)bh->b_data)) { + ext4_warning(dir->i_sb, "Node failed checksum"); + return -EIO; + } + set_buffer_verified(bh); + p++; brelse(p->bh); p->bh = bh; @@ -577,6 +846,11 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) return err; + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) + return -EIO; + set_buffer_verified(bh); + de = (struct ext4_dir_entry_2 *) bh->b_data; top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - @@ -936,6 +1210,15 @@ restart: brelse(bh); goto next; } + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(dir, + (struct ext4_dir_entry *)bh->b_data)) { + EXT4_ERROR_INODE(dir, "checksumming directory " + "block %lu", (unsigned long)block); + brelse(bh); + goto next; + } + set_buffer_verified(bh); i = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { @@ -987,6 +1270,16 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q if (!(bh = ext4_bread(NULL, dir, block, 0, err))) goto errout; + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(dir, + (struct ext4_dir_entry *)bh->b_data)) { + EXT4_ERROR_INODE(dir, "checksumming directory " + "block %lu", (unsigned long)block); + brelse(bh); + *err = -EIO; + goto errout; + } + set_buffer_verified(bh); retval = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); @@ -1037,6 +1330,12 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); return ERR_PTR(-EIO); } + if (unlikely(ino == dir->i_ino)) { + EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", + dentry->d_name.len, + dentry->d_name.name); + return ERR_PTR(-EIO); + } inode = ext4_iget(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, @@ -1156,8 +1455,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; + struct ext4_dir_entry_tail *t; + int csum_size = 0; int err = 0, i; + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); + bh2 = ext4_append (handle, dir, &newblock, &err); if (!(bh2)) { brelse(*bh); @@ -1204,10 +1509,20 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); de = dx_pack_dirents(data1, blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, + de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - + (char *) de, blocksize); - de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2, + de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de2, blocksize); + if (csum_size) { + t = EXT4_DIRENT_TAIL(data2, blocksize); + initialize_dirent_tail(t, blocksize); + + t = EXT4_DIRENT_TAIL(data1, blocksize); + initialize_dirent_tail(t, blocksize); + } + dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); @@ -1218,10 +1533,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_metadata(handle, dir, bh2); + err = ext4_handle_dirty_dirent_node(handle, dir, bh2); if (err) goto journal_error; - err = ext4_handle_dirty_metadata(handle, dir, frame->bh); + err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; brelse(bh2); @@ -1258,11 +1573,16 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, unsigned short reclen; int nlen, rlen, err; char *top; + int csum_size = 0; + + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); reclen = EXT4_DIR_REC_LEN(namelen); if (!de) { de = (struct ext4_dir_entry_2 *)bh->b_data; - top = bh->b_data + blocksize - reclen; + top = bh->b_data + (blocksize - csum_size) - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) return -EIO; @@ -1295,11 +1615,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, de = de1; } de->file_type = EXT4_FT_UNKNOWN; - if (inode) { - de->inode = cpu_to_le32(inode->i_ino); - ext4_set_de_type(dir->i_sb, de, inode->i_mode); - } else - de->inode = 0; + de->inode = cpu_to_le32(inode->i_ino); + ext4_set_de_type(dir->i_sb, de, inode->i_mode); de->name_len = namelen; memcpy(de->name, name, namelen); /* @@ -1318,7 +1635,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, dir->i_version++; ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, dir, bh); + err = ext4_handle_dirty_dirent_node(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return 0; @@ -1339,6 +1656,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct dx_frame frames[2], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; + struct ext4_dir_entry_tail *t; char *data1, *top; unsigned len; int retval; @@ -1346,6 +1664,11 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct dx_hash_info hinfo; ext4_lblk_t block; struct fake_dirent *fde; + int csum_size = 0; + + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); @@ -1366,7 +1689,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, brelse(bh); return -EIO; } - len = ((char *) root) + blocksize - (char *) de; + len = ((char *) root) + (blocksize - csum_size) - (char *) de; /* Allocate new block for the 0th block's dirents */ bh2 = ext4_append(handle, dir, &block, &retval); @@ -1382,8 +1705,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, top = data1 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, + de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - + (char *) de, blocksize); + + if (csum_size) { + t = EXT4_DIRENT_TAIL(data1, blocksize); + initialize_dirent_tail(t, blocksize); + } + /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), @@ -1408,8 +1738,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, frame->bh = bh; bh = bh2; - ext4_handle_dirty_metadata(handle, dir, frame->bh); - ext4_handle_dirty_metadata(handle, dir, bh); + ext4_handle_dirty_dx_node(handle, dir, frame->bh); + ext4_handle_dirty_dirent_node(handle, dir, bh); de = do_split(handle,dir, &bh, frame, &hinfo, &retval); if (!de) { @@ -1445,11 +1775,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = dentry->d_parent->d_inode; struct buffer_head *bh; struct ext4_dir_entry_2 *de; + struct ext4_dir_entry_tail *t; struct super_block *sb; int retval; int dx_fallback=0; unsigned blocksize; ext4_lblk_t block, blocks; + int csum_size = 0; + + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; blocksize = sb->s_blocksize; @@ -1468,6 +1804,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, bh = ext4_bread(handle, dir, block, 0, &retval); if(!bh) return retval; + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(dir, + (struct ext4_dir_entry *)bh->b_data)) + return -EIO; + set_buffer_verified(bh); retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); if (retval != -ENOSPC) { brelse(bh); @@ -1484,7 +1825,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; - de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); + de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); + + if (csum_size) { + t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); + initialize_dirent_tail(t, blocksize); + } + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); if (retval == 0) @@ -1516,6 +1863,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) goto cleanup; + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) + goto journal_error; + set_buffer_verified(bh); + BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) @@ -1583,7 +1935,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, dxtrace(dx_show_index("node", frames[1].entries)); dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); - err = ext4_handle_dirty_metadata(handle, dir, bh2); + err = ext4_handle_dirty_dx_node(handle, dir, bh2); if (err) goto journal_error; brelse (bh2); @@ -1609,7 +1961,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; } - err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); + err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); if (err) { ext4_std_error(inode->i_sb, err); goto cleanup; @@ -1641,12 +1993,17 @@ static int ext4_delete_entry(handle_t *handle, { struct ext4_dir_entry_2 *de, *pde; unsigned int blocksize = dir->i_sb->s_blocksize; + int csum_size = 0; int i, err; + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); + i = 0; pde = NULL; de = (struct ext4_dir_entry_2 *) bh->b_data; - while (i < bh->b_size) { + while (i < bh->b_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, i)) return -EIO; if (de == de_del) { @@ -1667,7 +2024,7 @@ static int ext4_delete_entry(handle_t *handle, de->inode = 0; dir->i_version++; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, dir, bh); + err = ext4_handle_dirty_dirent_node(handle, dir, bh); if (unlikely(err)) { ext4_std_error(dir->i_sb, err); return err; @@ -1809,9 +2166,15 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; + struct ext4_dir_entry_tail *t; unsigned int blocksize = dir->i_sb->s_blocksize; + int csum_size = 0; int err, retries = 0; + if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + csum_size = sizeof(struct ext4_dir_entry_tail); + if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; @@ -1852,16 +2215,24 @@ retry: ext4_set_de_type(dir->i_sb, de, S_IFDIR); de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), + de->rec_len = ext4_rec_len_to_disk(blocksize - + (csum_size + EXT4_DIR_REC_LEN(1)), blocksize); de->name_len = 2; strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); set_nlink(inode, 2); + + if (csum_size) { + t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); + initialize_dirent_tail(t, blocksize); + } + BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, dir_block); + err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); if (err) goto out_clear_inode; + set_buffer_verified(dir_block); err = ext4_mark_inode_dirty(handle, inode); if (!err) err = ext4_add_entry(handle, dentry, inode); @@ -1911,6 +2282,14 @@ static int empty_dir(struct inode *inode) inode->i_ino); return 1; } + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(inode, + (struct ext4_dir_entry *)bh->b_data)) { + EXT4_ERROR_INODE(inode, "checksum error reading directory " + "lblock 0"); + return -EIO; + } + set_buffer_verified(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; de1 = ext4_next_entry(de, sb->s_blocksize); if (le32_to_cpu(de->inode) != inode->i_ino || @@ -1942,6 +2321,14 @@ static int empty_dir(struct inode *inode) offset += sb->s_blocksize; continue; } + if (!buffer_verified(bh) && + !ext4_dirent_csum_verify(inode, + (struct ext4_dir_entry *)bh->b_data)) { + EXT4_ERROR_INODE(inode, "checksum error " + "reading directory lblock 0"); + return -EIO; + } + set_buffer_verified(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; } if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { @@ -2010,7 +2397,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_super_now(handle, sb); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2083,7 +2470,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); + err = ext4_handle_dirty_super_now(handle, inode->i_sb); } else { struct ext4_iloc iloc2; struct inode *i_prev = @@ -2442,6 +2829,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; + if (!buffer_verified(dir_bh) && + !ext4_dirent_csum_verify(old_inode, + (struct ext4_dir_entry *)dir_bh->b_data)) + goto end_rename; + set_buffer_verified(dir_bh); if (le32_to_cpu(PARENT_INO(dir_bh->b_data, old_dir->i_sb->s_blocksize)) != old_dir->i_ino) goto end_rename; @@ -2472,7 +2864,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_current_time(new_dir); ext4_mark_inode_dirty(handle, new_dir); BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); - retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh); + retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh); if (unlikely(retval)) { ext4_std_error(new_dir->i_sb, retval); goto end_rename; @@ -2526,7 +2918,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); - retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); + retval = ext4_handle_dirty_dirent_node(handle, old_inode, + dir_bh); if (retval) { ext4_std_error(old_dir->i_sb, retval); goto end_rename; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 59fa0be27251..7ea6cbb44121 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -161,6 +161,8 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + goto out2; flex_gd->count = flexbg_size; flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * @@ -796,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_kvfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_super_now(handle, sb); if (err) ext4_std_error(sb, err); @@ -968,6 +970,8 @@ static void update_backups(struct super_block *sb, goto exit_err; } + ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); + while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { struct buffer_head *bh; @@ -1067,6 +1071,54 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, return err; } +static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) +{ + struct buffer_head *bh = sb_getblk(sb, block); + if (!bh) + return NULL; + + if (bitmap_uptodate(bh)) + return bh; + + lock_buffer(bh); + if (bh_submit_read(bh) < 0) { + unlock_buffer(bh); + brelse(bh); + return NULL; + } + unlock_buffer(bh); + + return bh; +} + +static int ext4_set_bitmap_checksums(struct super_block *sb, + ext4_group_t group, + struct ext4_group_desc *gdp, + struct ext4_new_group_data *group_data) +{ + struct buffer_head *bh; + + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 0; + + bh = ext4_get_bitmap(sb, group_data->inode_bitmap); + if (!bh) + return -EIO; + ext4_inode_bitmap_csum_set(sb, group, gdp, bh, + EXT4_INODES_PER_GROUP(sb) / 8); + brelse(bh); + + bh = ext4_get_bitmap(sb, group_data->block_bitmap); + if (!bh) + return -EIO; + ext4_block_bitmap_csum_set(sb, group, gdp, bh, + EXT4_BLOCKS_PER_GROUP(sb) / 8); + brelse(bh); + + return 0; +} + /* * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg */ @@ -1093,18 +1145,24 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, */ gdb_bh = sbi->s_group_desc[gdb_num]; /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + + gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); + err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); + if (err) { + ext4_std_error(sb, err); + break; + } + ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, EXT4_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_group_desc_csum_set(sb, group, gdp); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { @@ -1343,17 +1401,14 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, (1 + ext4_bg_num_gdb(sb, group + i) + le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; group_data[i].free_blocks_count = blocks_per_group - overhead; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + if (ext4_has_group_desc_csum(sb)) flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | EXT4_BG_INODE_UNINIT; else flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; } - if (last_group == n_group && - EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + if (last_group == n_group && ext4_has_group_desc_csum(sb)) /* We need to initialize block bitmap of last group. */ flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 35b5954489ee..eb7aa3e4ef05 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -112,6 +112,48 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif +static int ext4_verify_csum_type(struct super_block *sb, + struct ext4_super_block *es) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + return es->s_checksum_type == EXT4_CRC32C_CHKSUM; +} + +static __le32 ext4_superblock_csum(struct super_block *sb, + struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int offset = offsetof(struct ext4_super_block, s_checksum); + __u32 csum; + + csum = ext4_chksum(sbi, ~0, (char *)es, offset); + + return cpu_to_le32(csum); +} + +int ext4_superblock_csum_verify(struct super_block *sb, + struct ext4_super_block *es) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return 1; + + return es->s_checksum == ext4_superblock_csum(sb, es); +} + +void ext4_superblock_csum_set(struct super_block *sb, + struct ext4_super_block *es) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + es->s_checksum = ext4_superblock_csum(sb, es); +} + void *ext4_kvmalloc(size_t size, gfp_t flags) { void *ret; @@ -497,6 +539,7 @@ void __ext4_error(struct super_block *sb, const char *function, printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", sb->s_id, function, line, current->comm, &vaf); va_end(args); + save_error_info(sb, function, line); ext4_handle_error(sb); } @@ -905,6 +948,8 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); + if (sbi->s_chksum_driver) + crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); kfree(sbi); } @@ -1922,43 +1967,69 @@ failed: return 0; } -__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, - struct ext4_group_desc *gdp) +static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) { + int offset; __u16 crc = 0; + __le32 le_group = cpu_to_le32(block_group); - if (sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { - int offset = offsetof(struct ext4_group_desc, bg_checksum); - __le32 le_group = cpu_to_le32(block_group); - - crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); - crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); - crc = crc16(crc, (__u8 *)gdp, offset); - offset += sizeof(gdp->bg_checksum); /* skip checksum */ - /* for checksum of struct ext4_group_desc do the rest...*/ - if ((sbi->s_es->s_feature_incompat & - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && - offset < le16_to_cpu(sbi->s_es->s_desc_size)) - crc = crc16(crc, (__u8 *)gdp + offset, - le16_to_cpu(sbi->s_es->s_desc_size) - - offset); + if ((sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { + /* Use new metadata_csum algorithm */ + __u16 old_csum; + __u32 csum32; + + old_csum = gdp->bg_checksum; + gdp->bg_checksum = 0; + csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, + sizeof(le_group)); + csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, + sbi->s_desc_size); + gdp->bg_checksum = old_csum; + + crc = csum32 & 0xFFFF; + goto out; } + /* old crc16 code */ + offset = offsetof(struct ext4_group_desc, bg_checksum); + + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + /* for checksum of struct ext4_group_desc do the rest...*/ + if ((sbi->s_es->s_feature_incompat & + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + offset < le16_to_cpu(sbi->s_es->s_desc_size)) + crc = crc16(crc, (__u8 *)gdp + offset, + le16_to_cpu(sbi->s_es->s_desc_size) - + offset); + +out: return cpu_to_le16(crc); } -int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, +int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && - (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), + block_group, gdp))) return 0; return 1; } +void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, + struct ext4_group_desc *gdp) +{ + if (!ext4_has_group_desc_csum(sb)) + return; + gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); +} + /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors(struct super_block *sb, ext4_group_t *first_not_zeroed) @@ -2013,7 +2084,7 @@ static int ext4_check_descriptors(struct super_block *sb, return 0; } ext4_lock_group(sb, i); - if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { + if (!ext4_group_desc_csum_verify(sb, i, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, @@ -2417,6 +2488,23 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, return count; } +static ssize_t trigger_test_error(struct ext4_attr *a, + struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + int len = count; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (len && buf[len-1] == '\n') + len--; + + if (len) + ext4_error(sbi->s_sb, "%.*s", len, buf); + return count; +} + #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ @@ -2447,6 +2535,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); +EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2461,6 +2550,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_stream_req), ATTR_LIST(mb_group_prealloc), ATTR_LIST(max_writeback_mb_bump), + ATTR_LIST(trigger_fs_error), NULL, }; @@ -2957,6 +3047,44 @@ static void ext4_destroy_lazyinit_thread(void) kthread_stop(ext4_lazyinit_task); } +static int set_journal_csum_feature_set(struct super_block *sb) +{ + int ret = 1; + int compat, incompat; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + /* journal checksum v2 */ + compat = 0; + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; + } else { + /* journal checksum v1 */ + compat = JBD2_FEATURE_COMPAT_CHECKSUM; + incompat = 0; + } + + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ret = jbd2_journal_set_features(sbi->s_journal, + compat, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | + incompat); + } else if (test_opt(sb, JOURNAL_CHECKSUM)) { + ret = jbd2_journal_set_features(sbi->s_journal, + compat, 0, + incompat); + jbd2_journal_clear_features(sbi->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + } else { + jbd2_journal_clear_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | + JBD2_FEATURE_INCOMPAT_CSUM_V2); + } + + return ret; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) { char *orig_data = kstrdup(data, GFP_KERNEL); @@ -2993,6 +3121,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto out_free_orig; } sb->s_fs_info = sbi; + sbi->s_sb = sb; sbi->s_mount_opt = 0; sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); @@ -3032,13 +3161,54 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * Note: s_es must be initialized as soon as possible because * some ext4 macro-instructions depend on its value */ - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); + es = (struct ext4_super_block *) (bh->b_data + offset); sbi->s_es = es; sb->s_magic = le16_to_cpu(es->s_magic); if (sb->s_magic != EXT4_SUPER_MAGIC) goto cantfind_ext4; sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); + /* Warn if metadata_csum and gdt_csum are both set. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " + "redundant flags; please run fsck."); + + /* Check for a known checksum algorithm */ + if (!ext4_verify_csum_type(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "unknown checksum algorithm."); + silent = 1; + goto cantfind_ext4; + } + + /* Load the checksum driver */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(sbi->s_chksum_driver)) { + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); + ret = PTR_ERR(sbi->s_chksum_driver); + sbi->s_chksum_driver = NULL; + goto failed_mount; + } + } + + /* Check superblock checksum */ + if (!ext4_superblock_csum_verify(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "invalid superblock checksum. Run e2fsck?"); + silent = 1; + goto cantfind_ext4; + } + + /* Precompute checksum seed for all metadata */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, + sizeof(es->s_uuid)); + /* Set defaults before we parse the mount options */ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); set_opt(sb, INIT_INODE_TABLE); @@ -3200,7 +3370,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "Can't read superblock on 2nd try"); goto failed_mount; } - es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); + es = (struct ext4_super_block *)(bh->b_data + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { ext4_msg(sb, KERN_ERR, @@ -3392,6 +3562,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); + ret = -ENOMEM; goto failed_mount; } @@ -3449,6 +3620,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); + ret = err; goto failed_mount3; } @@ -3506,26 +3678,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto no_journal; } - if (ext4_blocks_count(es) > 0xffffffffULL && + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); goto failed_mount_wq; } - if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else if (test_opt(sb, JOURNAL_CHECKSUM)) { - jbd2_journal_set_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); - jbd2_journal_clear_features(sbi->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); - } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); + if (!set_journal_csum_feature_set(sb)) { + ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " + "feature set"); + goto failed_mount_wq; } /* We have now updated the journal if required, so we can @@ -3606,7 +3769,8 @@ no_journal: goto failed_mount4; } - ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); + if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -3641,7 +3805,7 @@ no_journal: } ext4_ext_init(sb); - err = ext4_mb_init(sb, needs_recovery); + err = ext4_mb_init(sb); if (err) { ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", err); @@ -3724,6 +3888,8 @@ failed_mount2: brelse(sbi->s_group_desc[i]); ext4_kvfree(sbi->s_group_desc); failed_mount: + if (sbi->s_chksum_driver) + crypto_free_shash(sbi->s_chksum_driver); if (sbi->s_proc) { remove_proc_entry("options", sbi->s_proc); remove_proc_entry(sb->s_id, ext4_proc_root); @@ -3847,7 +4013,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, goto out_bdev; } - es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); + es = (struct ext4_super_block *) (bh->b_data + offset); if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || !(le32_to_cpu(es->s_feature_incompat) & EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { @@ -4039,6 +4205,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeinodes_counter)); sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); + ext4_superblock_csum_set(sb, es); mark_buffer_dirty(sbh); if (sync) { error = sync_dirty_buffer(sbh); @@ -4333,7 +4500,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) struct ext4_group_desc *gdp = ext4_get_group_desc(sb, g, NULL); - if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { + if (!ext4_group_desc_csum_verify(sb, g, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e88748e55c0f..e56c9ed7d6e3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -122,6 +122,58 @@ const struct xattr_handler *ext4_xattr_handlers[] = { NULL }; +static __le32 ext4_xattr_block_csum(struct inode *inode, + sector_t block_nr, + struct ext4_xattr_header *hdr) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); + __u32 csum, old; + + old = hdr->h_checksum; + hdr->h_checksum = 0; + if (le32_to_cpu(hdr->h_refcount) != 1) { + block_nr = cpu_to_le64(block_nr); + csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, + sizeof(block_nr)); + } else + csum = ei->i_csum_seed; + csum = ext4_chksum(sbi, csum, (__u8 *)hdr, + EXT4_BLOCK_SIZE(inode->i_sb)); + hdr->h_checksum = old; + return cpu_to_le32(csum); +} + +static int ext4_xattr_block_csum_verify(struct inode *inode, + sector_t block_nr, + struct ext4_xattr_header *hdr) +{ + if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) + return 0; + return 1; +} + +static void ext4_xattr_block_csum_set(struct inode *inode, + sector_t block_nr, + struct ext4_xattr_header *hdr) +{ + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + return; + + hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); +} + +static inline int ext4_handle_dirty_xattr_block(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) +{ + ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh)); + return ext4_handle_dirty_metadata(handle, inode, bh); +} + static inline const struct xattr_handler * ext4_xattr_handler(int name_index) { @@ -156,12 +208,22 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) } static inline int -ext4_xattr_check_block(struct buffer_head *bh) +ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) { + int error; + + if (buffer_verified(bh)) + return 0; + if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) return -EIO; - return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); + if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) + return -EIO; + error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); + if (!error) + set_buffer_verified(bh); + return error; } static inline int @@ -224,7 +286,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(bh)) { + if (ext4_xattr_check_block(inode, bh)) { bad_block: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); @@ -369,7 +431,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) goto cleanup; ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext4_xattr_check_block(bh)) { + if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; @@ -492,7 +554,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (ce) mb_cache_entry_release(ce); unlock_buffer(bh); - error = ext4_handle_dirty_metadata(handle, inode, bh); + error = ext4_handle_dirty_xattr_block(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, 1); @@ -662,7 +724,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, ea_bdebug(bs->bh, "b_count=%d, refcount=%d", atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); - if (ext4_xattr_check_block(bs->bh)) { + if (ext4_xattr_check_block(inode, bs->bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; @@ -725,9 +787,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error == -EIO) goto bad_block; if (!error) - error = ext4_handle_dirty_metadata(handle, - inode, - bs->bh); + error = ext4_handle_dirty_xattr_block(handle, + inode, + bs->bh); if (error) goto cleanup; goto inserted; @@ -796,9 +858,9 @@ inserted: ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); - error = ext4_handle_dirty_metadata(handle, - inode, - new_bh); + error = ext4_handle_dirty_xattr_block(handle, + inode, + new_bh); if (error) goto cleanup_dquot; } @@ -855,8 +917,8 @@ getblk_failed: set_buffer_uptodate(new_bh); unlock_buffer(new_bh); ext4_xattr_cache_insert(new_bh); - error = ext4_handle_dirty_metadata(handle, - inode, new_bh); + error = ext4_handle_dirty_xattr_block(handle, + inode, new_bh); if (error) goto cleanup; } @@ -1193,7 +1255,7 @@ retry: error = -EIO; if (!bh) goto cleanup; - if (ext4_xattr_check_block(bh)) { + if (ext4_xattr_check_block(inode, bh)) { EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EIO; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 25b7387ff183..91f31ca7d9af 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -27,7 +27,9 @@ struct ext4_xattr_header { __le32 h_refcount; /* reference count */ __le32 h_blocks; /* number of disk blocks used */ __le32 h_hash; /* hash value of all attributes */ - __u32 h_reserved[4]; /* zero right now */ + __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ + /* id = inum if refcount=1, blknum otherwise */ + __u32 h_reserved[3]; /* zero right now */ }; struct ext4_xattr_ibody_header { diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index f32f346f4b0a..69a48c2944da 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -1,6 +1,8 @@ config JBD2 tristate select CRC32 + select CRYPTO + select CRYPTO_CRC32C help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 840f70f50792..216f4299f65e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -85,6 +85,24 @@ nope: __brelse(bh); } +static void jbd2_commit_block_csum_set(journal_t *j, + struct journal_head *descriptor) +{ + struct commit_header *h; + __u32 csum; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + h = (struct commit_header *)(jh2bh(descriptor)->b_data); + h->h_chksum_type = 0; + h->h_chksum_size = 0; + h->h_chksum[0] = 0; + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, + j->j_blocksize); + h->h_chksum[0] = cpu_to_be32(csum); +} + /* * Done it all: now submit the commit record. We should have * cleaned up our previous buffers by now, so if we are in abort @@ -128,6 +146,7 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } + jbd2_commit_block_csum_set(journal, descriptor); JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); @@ -301,6 +320,44 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } +static void jbd2_descr_block_csum_set(journal_t *j, + struct journal_head *descriptor) +{ + struct jbd2_journal_block_tail *tail; + __u32 csum; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + tail = (struct jbd2_journal_block_tail *) + (jh2bh(descriptor)->b_data + j->j_blocksize - + sizeof(struct jbd2_journal_block_tail)); + tail->t_checksum = 0; + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, + j->j_blocksize); + tail->t_checksum = cpu_to_be32(csum); +} + +static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, + struct buffer_head *bh, __u32 sequence) +{ + struct page *page = bh->b_page; + __u8 *addr; + __u32 csum; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + sequence = cpu_to_be32(sequence); + addr = kmap_atomic(page, KM_USER0); + csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, + sizeof(sequence)); + csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), + bh->b_size); + kunmap_atomic(addr, KM_USER0); + + tag->t_checksum = cpu_to_be32(csum); +} /* * jbd2_journal_commit_transaction * @@ -334,6 +391,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) unsigned long first_block; tid_t first_tid; int update_tail; + int csum_size = 0; + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + csum_size = sizeof(struct jbd2_journal_block_tail); /* * First job: lock down the current transaction and wait for @@ -627,7 +688,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag = (journal_block_tag_t *) tagp; write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); - tag->t_flags = cpu_to_be32(tag_flag); + tag->t_flags = cpu_to_be16(tag_flag); + jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), + commit_transaction->t_tid); tagp += tag_bytes; space_left -= tag_bytes; @@ -643,7 +706,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (bufs == journal->j_wbufsize || commit_transaction->t_buffers == NULL || - space_left < tag_bytes + 16) { + space_left < tag_bytes + 16 + csum_size) { jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); @@ -651,8 +714,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) submitting the IOs. "tag" still points to the last tag we set up. */ - tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); + tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); + jbd2_descr_block_csum_set(journal, descriptor); start_journal_io: for (i = 0; i < bufs; i++) { struct buffer_head *bh = wbuf[i]; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1afb701622b0..e9a3c4c85594 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -97,6 +97,43 @@ EXPORT_SYMBOL(jbd2_inode_cache); static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); +/* Checksumming functions */ +int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) +{ + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; +} + +static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) +{ + __u32 csum, old_csum; + + old_csum = sb->s_checksum; + sb->s_checksum = 0; + csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); + sb->s_checksum = old_csum; + + return cpu_to_be32(csum); +} + +int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) +{ + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + return sb->s_checksum == jbd2_superblock_csum(j, sb); +} + +void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) +{ + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + sb->s_checksum = jbd2_superblock_csum(j, sb); +} + /* * Helper function used to manage commit timeouts */ @@ -1348,6 +1385,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno); + jbd2_superblock_csum_set(journal, sb); read_unlock(&journal->j_state_lock); jbd2_write_superblock(journal, WRITE_SYNC); @@ -1376,6 +1414,9 @@ static int journal_get_superblock(journal_t *journal) } } + if (buffer_verified(bh)) + return 0; + sb = journal->j_superblock; err = -EINVAL; @@ -1413,6 +1454,43 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + /* Can't have checksum v1 and v2 on at the same time! */ + printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " + "at the same time!\n"); + goto out; + } + + if (!jbd2_verify_csum_type(journal, sb)) { + printk(KERN_ERR "JBD: Unknown checksum type\n"); + goto out; + } + + /* Load the checksum driver */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(journal->j_chksum_driver)) { + printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); + err = PTR_ERR(journal->j_chksum_driver); + journal->j_chksum_driver = NULL; + goto out; + } + } + + /* Check superblock checksum */ + if (!jbd2_superblock_csum_verify(journal, sb)) { + printk(KERN_ERR "JBD: journal checksum error\n"); + goto out; + } + + /* Precompute checksum seed for all metadata */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, + sizeof(sb->s_uuid)); + + set_buffer_verified(bh); + return 0; out: @@ -1564,6 +1642,8 @@ int jbd2_journal_destroy(journal_t *journal) iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); + if (journal->j_chksum_driver) + crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); kfree(journal); @@ -1653,6 +1733,10 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com int jbd2_journal_set_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { +#define INCOMPAT_FEATURE_ON(f) \ + ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) +#define COMPAT_FEATURE_ON(f) \ + ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) journal_superblock_t *sb; if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) @@ -1661,16 +1745,54 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; + /* Asking for checksumming v2 and v1? Only give them v2. */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && + compat & JBD2_FEATURE_COMPAT_CHECKSUM) + compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; + jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", compat, ro, incompat); sb = journal->j_superblock; + /* If enabling v2 checksums, update superblock */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + sb->s_checksum_type = JBD2_CRC32C_CHKSUM; + sb->s_feature_compat &= + ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); + + /* Load the checksum driver */ + if (journal->j_chksum_driver == NULL) { + journal->j_chksum_driver = crypto_alloc_shash("crc32c", + 0, 0); + if (IS_ERR(journal->j_chksum_driver)) { + printk(KERN_ERR "JBD: Cannot load crc32c " + "driver.\n"); + journal->j_chksum_driver = NULL; + return 0; + } + } + + /* Precompute checksum seed for all metadata */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_CSUM_V2)) + journal->j_csum_seed = jbd2_chksum(journal, ~0, + sb->s_uuid, + sizeof(sb->s_uuid)); + } + + /* If enabling v1 checksums, downgrade superblock */ + if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) + sb->s_feature_incompat &= + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); + sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_incompat |= cpu_to_be32(incompat); return 1; +#undef COMPAT_FEATURE_ON +#undef INCOMPAT_FEATURE_ON } /* @@ -1975,10 +2097,16 @@ int jbd2_journal_blocks_per_page(struct inode *inode) */ size_t journal_tag_bytes(journal_t *journal) { + journal_block_tag_t tag; + size_t x = 0; + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + x += sizeof(tag.t_checksum); + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return JBD2_TAG_SIZE64; + return x + JBD2_TAG_SIZE64; else - return JBD2_TAG_SIZE32; + return x + JBD2_TAG_SIZE32; } /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index c1a03354a22f..0131e4362534 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -174,6 +174,25 @@ static int jread(struct buffer_head **bhp, journal_t *journal, return 0; } +static int jbd2_descr_block_csum_verify(journal_t *j, + void *buf) +{ + struct jbd2_journal_block_tail *tail; + __u32 provided, calculated; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - + sizeof(struct jbd2_journal_block_tail)); + provided = tail->t_checksum; + tail->t_checksum = 0; + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); + tail->t_checksum = provided; + + provided = be32_to_cpu(provided); + return provided == calculated; +} /* * Count the number of in-use tags in a journal descriptor block. @@ -186,6 +205,9 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + size -= sizeof(struct jbd2_journal_block_tail); + tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data + tag_bytes) <= size) { @@ -193,10 +215,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) nr++; tagp += tag_bytes; - if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) + if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) tagp += 16; - if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) + if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) break; } @@ -353,6 +375,41 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, return 0; } +static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) +{ + struct commit_header *h; + __u32 provided, calculated; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + h = buf; + provided = h->h_chksum[0]; + h->h_chksum[0] = 0; + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); + h->h_chksum[0] = provided; + + provided = be32_to_cpu(provided); + return provided == calculated; +} + +static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, + void *buf, __u32 sequence) +{ + __u32 provided, calculated; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + sequence = cpu_to_be32(sequence); + calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, + sizeof(sequence)); + calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); + provided = be32_to_cpu(tag->t_checksum); + + return provided == cpu_to_be32(calculated); +} + static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { @@ -366,6 +423,7 @@ static int do_one_pass(journal_t *journal, int blocktype; int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ + int descr_csum_size = 0; /* * First thing is to establish what we expect to find in the log @@ -451,6 +509,18 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: + /* Verify checksum first */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_CSUM_V2)) + descr_csum_size = + sizeof(struct jbd2_journal_block_tail); + if (descr_csum_size > 0 && + !jbd2_descr_block_csum_verify(journal, + bh->b_data)) { + err = -EIO; + goto failed; + } + /* If it is a valid descriptor block, replay it * in pass REPLAY; if journal_checksums enabled, then * calculate checksums in PASS_SCAN, otherwise, @@ -481,11 +551,11 @@ static int do_one_pass(journal_t *journal, tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data + tag_bytes) - <= journal->j_blocksize) { + <= journal->j_blocksize - descr_csum_size) { unsigned long io_block; tag = (journal_block_tag_t *) tagp; - flags = be32_to_cpu(tag->t_flags); + flags = be16_to_cpu(tag->t_flags); io_block = next_log_block++; wrap(journal, next_log_block); @@ -516,6 +586,19 @@ static int do_one_pass(journal_t *journal, goto skip_write; } + /* Look for block corruption */ + if (!jbd2_block_tag_csum_verify( + journal, tag, obh->b_data, + be32_to_cpu(tmp->h_sequence))) { + brelse(obh); + success = -EIO; + printk(KERN_ERR "JBD: Invalid " + "checksum recovering " + "block %llu in log\n", + blocknr); + continue; + } + /* Find a buffer for the new * data being restored */ nbh = __getblk(journal->j_fs_dev, @@ -650,6 +733,19 @@ static int do_one_pass(journal_t *journal, } crc32_sum = ~0; } + if (pass == PASS_SCAN && + !jbd2_commit_block_csum_verify(journal, + bh->b_data)) { + info->end_transaction = next_commit_ID; + + if (!JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + journal->j_failed_commit = + next_commit_ID; + brelse(bh); + break; + } + } brelse(bh); next_commit_ID++; continue; @@ -706,6 +802,25 @@ static int do_one_pass(journal_t *journal, return err; } +static int jbd2_revoke_block_csum_verify(journal_t *j, + void *buf) +{ + struct jbd2_journal_revoke_tail *tail; + __u32 provided, calculated; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return 1; + + tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - + sizeof(struct jbd2_journal_revoke_tail)); + provided = tail->r_checksum; + tail->r_checksum = 0; + calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); + tail->r_checksum = provided; + + provided = be32_to_cpu(provided); + return provided == calculated; +} /* Scan a revoke record, marking all blocks mentioned as revoked. */ @@ -720,6 +835,9 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, offset = sizeof(jbd2_journal_revoke_header_t); max = be32_to_cpu(header->r_count); + if (!jbd2_revoke_block_csum_verify(journal, header)) + return -EINVAL; + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) record_len = 8; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 6973705d6a3d..f30b80b4ce8b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -578,6 +578,7 @@ static void write_one_revoke_record(journal_t *journal, struct jbd2_revoke_record_s *record, int write_op) { + int csum_size = 0; struct journal_head *descriptor; int offset; journal_header_t *header; @@ -592,9 +593,13 @@ static void write_one_revoke_record(journal_t *journal, descriptor = *descriptorp; offset = *offsetp; + /* Do we need to leave space at the end for a checksum? */ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + csum_size = sizeof(struct jbd2_journal_revoke_tail); + /* Make sure we have a descriptor with space left for the record */ if (descriptor) { - if (offset == journal->j_blocksize) { + if (offset >= journal->j_blocksize - csum_size) { flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } @@ -631,6 +636,24 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } +static void jbd2_revoke_csum_set(journal_t *j, + struct journal_head *descriptor) +{ + struct jbd2_journal_revoke_tail *tail; + __u32 csum; + + if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + return; + + tail = (struct jbd2_journal_revoke_tail *) + (jh2bh(descriptor)->b_data + j->j_blocksize - + sizeof(struct jbd2_journal_revoke_tail)); + tail->r_checksum = 0; + csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, + j->j_blocksize); + tail->r_checksum = cpu_to_be32(csum); +} + /* * Flush a revoke descriptor out to the journal. If we are aborting, * this is a noop; otherwise we are generating a buffer which needs to @@ -652,6 +675,8 @@ static void flush_descriptor(journal_t *journal, header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; header->r_count = cpu_to_be32(offset); + jbd2_revoke_csum_set(journal, descriptor); + set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index ddcd3549c6c2..fb1ab9533b67 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -162,8 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kmem_cache_alloc(transaction_cache, - gfp_mask | __GFP_ZERO); + new_transaction = kmem_cache_zalloc(transaction_cache, + gfp_mask); if (!new_transaction) { /* * If __GFP_FS is not present, then we may be diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 912c30a8ddb1..f334c7fab967 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -31,6 +31,7 @@ #include <linux/mutex.h> #include <linux/timer.h> #include <linux/slab.h> +#include <crypto/hash.h> #endif #define journal_oom_retry 1 @@ -147,12 +148,24 @@ typedef struct journal_header_s #define JBD2_CRC32_CHKSUM 1 #define JBD2_MD5_CHKSUM 2 #define JBD2_SHA1_CHKSUM 3 +#define JBD2_CRC32C_CHKSUM 4 #define JBD2_CRC32_CHKSUM_SIZE 4 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) /* * Commit block header for storing transactional checksums: + * + * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* + * fields are used to store a checksum of the descriptor and data blocks. + * + * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum + * field is used to store crc32c(uuid+commit_block). Each journal metadata + * block gets its own checksum, and data block checksums are stored in + * journal_block_tag (in the descriptor). The other h_chksum* fields are + * not used. + * + * Checksum v1 and v2 are mutually exclusive features. */ struct commit_header { __be32 h_magic; @@ -175,13 +188,19 @@ struct commit_header { typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ - __be32 t_flags; /* See below */ + __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ + __be16 t_flags; /* See below */ __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) +/* Tail of descriptor block, for checksumming */ +struct jbd2_journal_block_tail { + __be32 t_checksum; /* crc32c(uuid+descr_block) */ +}; + /* * The revoke descriptor: used on disk to describe a series of blocks to * be revoked from the log @@ -192,6 +211,10 @@ typedef struct jbd2_journal_revoke_header_s __be32 r_count; /* Count of bytes used in the block */ } jbd2_journal_revoke_header_t; +/* Tail of revoke block, for checksumming */ +struct jbd2_journal_revoke_tail { + __be32 r_checksum; /* crc32c(uuid+revoke_block) */ +}; /* Definitions for the journal tag flags word: */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ @@ -241,7 +264,10 @@ typedef struct journal_superblock_s __be32 s_max_trans_data; /* Limit of data blocks per trans. */ /* 0x0050 */ - __u32 s_padding[44]; + __u8 s_checksum_type; /* checksum type */ + __u8 s_padding2[3]; + __u32 s_padding[42]; + __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ @@ -263,13 +289,15 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 +#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ + JBD2_FEATURE_INCOMPAT_CSUM_V2) #ifdef __KERNEL__ @@ -939,6 +967,12 @@ struct journal_s * superblock pointer here */ void *j_private; + + /* Reference to checksum algorithm driver via cryptoapi */ + struct crypto_shash *j_chksum_driver; + + /* Precomputed journal UUID checksum for seeding other checksums */ + __u32 j_csum_seed; }; /* @@ -1268,6 +1302,25 @@ static inline int jbd_space_needed(journal_t *journal) extern int jbd_blocks_per_page(struct inode *inode); +static inline u32 jbd2_chksum(journal_t *journal, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; + } desc; + int err; + + desc.shash.tfm = journal->j_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index 6230f8556a4e..6133679bc4c0 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h @@ -12,6 +12,7 @@ enum jbd_state_bits { BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ + BH_Verified, /* Metadata block has been verified ok */ BH_JBDPrivateStart, /* First bit available for private use by FS */ }; @@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked) BUFFER_FNS(RevokeValid, revokevalid) TAS_BUFFER_FNS(RevokeValid, revokevalid) BUFFER_FNS(Freed, freed) +BUFFER_FNS(Verified, verified) static inline struct buffer_head *jh2bh(struct journal_head *jh) { |