diff options
Diffstat (limited to 'fs')
42 files changed, 864 insertions, 508 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index d8062745716a..e31f3691b151 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -140,6 +140,7 @@ config EXT4DEV_FS tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" depends on EXPERIMENTAL select JBD2 + select CRC16 help Ext4dev is a predecessor filesystem of the next generation extended fs ext4, based on ext3 filesystem code. It will be @@ -710,18 +710,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) /* * Now we are all set to call the retry method in async - * context. By setting this thread's io_wait context - * to point to the wait queue entry inside the currently - * running iocb for the duration of the retry, we ensure - * that async notification wakeups are queued by the - * operation instead of blocking waits, and when notified, - * cause the iocb to be kicked for continuation (through - * the aio_wake_function callback). + * context. */ - BUG_ON(current->io_wait != NULL); - current->io_wait = &iocb->ki_wait; ret = retry(iocb); - current->io_wait = NULL; if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { BUG_ON(!list_empty(&iocb->ki_wait.task_list)); @@ -1508,10 +1499,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) * Simply triggers a retry of the operation via kick_iocb. * * This callback is specified in the wait queue entry in - * a kiocb (current->io_wait points to this wait queue - * entry when an aio operation executes; it is used - * instead of a synchronous wait when an i/o blocking - * condition is encountered during aio). + * a kiocb. * * Note: * This routine is executed with the wait queue lock held. diff --git a/fs/attr.c b/fs/attr.c index ae58bd3f875f..966b73e25f82 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -103,12 +103,11 @@ EXPORT_SYMBOL(inode_setattr); int notify_change(struct dentry * dentry, struct iattr * attr) { struct inode *inode = dentry->d_inode; - mode_t mode; + mode_t mode = inode->i_mode; int error; struct timespec now; unsigned int ia_valid = attr->ia_valid; - mode = inode->i_mode; now = current_fs_time(inode->i_sb); attr->ia_ctime = now; @@ -125,18 +124,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (error) return error; } + + /* + * We now pass ATTR_KILL_S*ID to the lower level setattr function so + * that the function has the ability to reinterpret a mode change + * that's due to these bits. This adds an implicit restriction that + * no function will ever call notify_change with both ATTR_MODE and + * ATTR_KILL_S*ID set. + */ + if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) && + (ia_valid & ATTR_MODE)) + BUG(); + if (ia_valid & ATTR_KILL_SUID) { - attr->ia_valid &= ~ATTR_KILL_SUID; if (mode & S_ISUID) { - if (!(ia_valid & ATTR_MODE)) { - ia_valid = attr->ia_valid |= ATTR_MODE; - attr->ia_mode = inode->i_mode; - } - attr->ia_mode &= ~S_ISUID; + ia_valid = attr->ia_valid |= ATTR_MODE; + attr->ia_mode = (inode->i_mode & ~S_ISUID); } } if (ia_valid & ATTR_KILL_SGID) { - attr->ia_valid &= ~ ATTR_KILL_SGID; if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { if (!(ia_valid & ATTR_MODE)) { ia_valid = attr->ia_valid |= ATTR_MODE; @@ -145,7 +151,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr) attr->ia_mode &= ~S_ISGID; } } - if (!attr->ia_valid) + if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) return 0; if (ia_valid & ATTR_SIZE) diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 19a9cafb5ddf..be46805972f0 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -182,7 +182,7 @@ int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_toke { struct autofs_wait_queue *wq, **wql; - for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) { + for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) { if ( wq->wait_queue_token == wait_queue_token ) break; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 0d041a9cb348..1fe28e4754c2 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -376,7 +376,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok struct autofs_wait_queue *wq, **wql; mutex_lock(&sbi->wq_mutex); - for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) { + for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) { if (wq->wait_queue_token == wait_queue_token) break; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index dd4167762a8e..279f3c5e0ce3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1538,6 +1538,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } time_buf.Attributes = 0; + + /* skip mode change if it's just for clearing setuid/setgid */ + if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) + attrs->ia_valid &= ~ATTR_MODE; + if (attrs->ia_valid & ATTR_MODE) { cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); mode = attrs->ia_mode; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6dacd39bf048..a4284ccac1f9 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -3001,7 +3001,7 @@ static int __init init_sys32_ioctl(void) int i; for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) { - if (ioctl_start[i].next != 0) { + if (ioctl_start[i].next) { printk("ioctl translation %d bad\n",i); return -1; } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 5c817bd08389..350680fd7da7 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -148,7 +148,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i { struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; struct page *pages[BLKS_PER_BUF]; - unsigned i, blocknr, buffer, unread; + unsigned i, blocknr, buffer; unsigned long devsize; char *data; @@ -175,7 +175,6 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT; /* Ok, read in BLKS_PER_BUF pages completely first. */ - unread = 0; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; @@ -362,7 +361,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (offset & 3) return -EINVAL; - buf = kmalloc(256, GFP_KERNEL); + buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -376,7 +375,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int namelen, error; mutex_lock(&read_mutex); - de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256); + de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* @@ -426,7 +425,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s char *name; int namelen, retval; - de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+256); + de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* Try to take advantage of sorted directories */ diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5701f816faf4..0b1ab016fa2e 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -914,6 +914,14 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (rc < 0) goto out; } + + /* + * mode change is for clearing setuid/setgid bits. Allow lower fs + * to interpret this in its own way. + */ + if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + ia->ia_valid &= ~ATTR_MODE; + rc = notify_change(lower_dentry, ia); out: fsstack_copy_attr_all(inode, lower_inode, NULL); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 77b9953624f4..de6189291954 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -463,7 +463,7 @@ static void ep_free(struct eventpoll *ep) * holding "epmutex" we can be sure that no file cleanup code will hit * us during this operation. So we can avoid the lock on "ep->lock". */ - while ((rbp = rb_first(&ep->rbr)) != 0) { + while ((rbp = rb_first(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); } diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index dd1fd3c0fc05..a588e23841d4 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -47,7 +47,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; int ret = 0; - J_ASSERT(ext3_journal_current_handle() == 0); + J_ASSERT(ext3_journal_current_handle() == NULL); /* * data=writeback: diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2f2b6864db10..3dec003b773e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1028,7 +1028,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, } if (buffer_new(&dummy)) { J_ASSERT(create != 0); - J_ASSERT(handle != 0); + J_ASSERT(handle != NULL); /* * Now that we do not always journal data, we should diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 771f7ada15d9..44de1453c301 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -245,10 +245,10 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_bh; } - lock_buffer(bh); - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); + lock_buffer(gdb); + memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); - unlock_buffer(bh); + unlock_buffer(gdb); ext3_journal_dirty_metadata(handle, gdb); ext3_set_bit(bit, bh->b_data); brelse(gdb); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 141573de7a9a..81868c0bc40e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1620,7 +1620,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) } brelse (bh); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n", + blocksize); + goto out_fail; + } logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; bh = sb_bread(sb, logic_sb_block); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b74bf4368441..e906b65448e2 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -20,6 +20,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include "group.h" /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -42,6 +43,94 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +/* Initializes an uninitialized block bitmap if given, and returns the + * number of blocks free in the group. */ +unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, + int block_group, struct ext4_group_desc *gdp) +{ + unsigned long start; + int bit, bit_max; + unsigned free_blocks, group_blocks; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (bh) { + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks used to prevent allocation + * essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum bad for group %u\n", block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + memset(bh->b_data, 0, sb->s_blocksize); + } + + /* Check for superblock and gdt backups in this group */ + bit_max = ext4_bg_has_super(sb, block_group); + + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * + sbi->s_desc_per_block) { + if (bit_max) { + bit_max += ext4_bg_num_gdb(sb, block_group); + bit_max += + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + } + } else { /* For META_BG_BLOCK_GROUPS */ + int group_rel = (block_group - + le32_to_cpu(sbi->s_es->s_first_meta_bg)) % + EXT4_DESC_PER_BLOCK(sb); + if (group_rel == 0 || group_rel == 1 || + (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1)) + bit_max += 1; + } + + if (block_group == sbi->s_groups_count - 1) { + /* + * Even though mke2fs always initialize first and last group + * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need + * to make sure we calculate the right free blocks + */ + group_blocks = ext4_blocks_count(sbi->s_es) - + le32_to_cpu(sbi->s_es->s_first_data_block) - + (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); + } else { + group_blocks = EXT4_BLOCKS_PER_GROUP(sb); + } + + free_blocks = group_blocks - bit_max; + + if (bh) { + for (bit = 0; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + + start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(sbi->s_es->s_first_data_block); + + /* Set bits for block and inode bitmaps, and inode table */ + ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); + ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); + for (bit = (ext4_inode_table(sb, gdp) - start), + bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + + /* + * Also if the number of blocks within the group is + * less than the blocksize * 8 ( which is the size + * of bitmap ), set rest of the block bitmap to 1 + */ + mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); + } + + return free_blocks - sbi->s_itb_per_group - 2; +} + + /* * The free blocks are managed by bitmaps. A file system contains several * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap @@ -119,7 +208,7 @@ block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map) * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { int i; @@ -127,11 +216,24 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) struct buffer_head * bh = NULL; ext4_fsblk_t bitmap_blk; - desc = ext4_get_group_desc (sb, block_group, NULL); + desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) return NULL; bitmap_blk = ext4_block_bitmap(sb, desc); - bh = sb_bread(sb, bitmap_blk); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + bh = sb_getblk(sb, bitmap_blk); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_block_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, bitmap_blk); + } if (!bh) ext4_error (sb, __FUNCTION__, "Cannot read block bitmap - " @@ -627,6 +729,7 @@ do_more: desc->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) + group_freed); + desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_add(&sbi->s_freeblocks_counter, count); @@ -1685,8 +1788,11 @@ allocated: ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); percpu_counter_sub(&sbi->s_freeblocks_counter, num); diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0fb1e62b20d0..f612bef98315 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = { .compat_ioctl = ext4_compat_ioctl, #endif .fsync = ext4_sync_file, /* BKL held */ -#ifdef CONFIG_EXT4_INDEX .release = ext4_release_dir, -#endif }; @@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp, sb = inode->i_sb; -#ifdef CONFIG_EXT4_INDEX if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_COMPAT_DIR_INDEX) && ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || @@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp, */ EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; } -#endif stored = 0; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -232,7 +228,6 @@ out: return ret; } -#ifdef CONFIG_EXT4_INDEX /* * These functions convert from the major/minor hash to an f_pos * value. @@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp) return 0; } - -#endif diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 78beb096f57d..85287742f2ae 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -33,7 +33,7 @@ #include <linux/fs.h> #include <linux/time.h> #include <linux/ext4_jbd2.h> -#include <linux/jbd.h> +#include <linux/jbd2.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> @@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; - block = le32_to_cpu(ex->ee_start); + block = le32_to_cpu(ex->ee_start_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; return block; } @@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; - block = le32_to_cpu(ix->ei_leaf); + block = le32_to_cpu(ix->ei_leaf_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; return block; } @@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) */ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { - ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } @@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) */ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) { - ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); + ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } @@ -1409,8 +1409,7 @@ has_space: eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); nearex = path[depth].p_ext; nearex->ee_block = newext->ee_block; - nearex->ee_start = newext->ee_start; - nearex->ee_start_hi = newext->ee_start_hi; + ext4_ext_store_pblock(nearex, ext_pblock(newext)); nearex->ee_len = newext->ee_len; merge: @@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, } /* ex2: iblock to iblock + maxblocks-1 : initialised */ ex2->ee_block = cpu_to_le32(iblock); - ex2->ee_start = cpu_to_le32(newblock); ext4_ext_store_pblock(ex2, newblock); ex2->ee_len = cpu_to_le16(allocated); if (ex2 != ex) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2a167d7131fa..8d50879d1c2c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) struct inode *inode = dentry->d_inode; int ret = 0; - J_ASSERT(ext4_journal_current_handle() == 0); + J_ASSERT(ext4_journal_current_handle() == NULL); /* * data=writeback: diff --git a/fs/ext4/group.h b/fs/ext4/group.h new file mode 100644 index 000000000000..1577910bb58b --- /dev/null +++ b/fs/ext4/group.h @@ -0,0 +1,27 @@ +/* + * linux/fs/ext4/group.h + * + * Copyright (C) 2007 Cluster File Systems, Inc + * + * Author: Andreas Dilger <adilger@clusterfs.com> + */ + +#ifndef _LINUX_EXT4_GROUP_H +#define _LINUX_EXT4_GROUP_H + +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +struct buffer_head *read_block_bitmap(struct super_block *sb, + unsigned int block_group); +extern unsigned ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +#define ext4_free_blocks_after_init(sb, group, desc) \ + ext4_init_block_bitmap(sb, NULL, group, desc) +extern unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext4_group_desc *desc); +extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); +#endif /* _LINUX_EXT4_GROUP_H */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index d0c7793d9393..c61f37fd3f05 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -28,6 +28,7 @@ #include "xattr.h" #include "acl.h" +#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -43,6 +44,52 @@ * the free blocks count in the block. */ +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext4_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* Initializes an uninitialized inode bitmap */ +unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, int block_group, + struct ext4_group_desc *gdp) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If checksum is bad mark all blocks and inodes use to prevent + * allocation, essentially implementing a per-group read-only flag. */ + if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { + ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", + block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + gdp->bg_itable_unused = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return 0; + } + + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), + bh->b_data); + + return EXT4_INODES_PER_GROUP(sb); +} /* * Read the inode allocation bitmap for a given block_group, reading @@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group) desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext4_init_inode_bitmap(sb, bh, block_group, + desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + } if (!bh) ext4_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " @@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) if (is_directory) gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); + gdp->bg_checksum = ext4_group_desc_csum(sbi, + block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) struct ext4_sb_info *sbi; int err = 0; struct inode *ret; - int i; + int i, free = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -517,11 +578,13 @@ repeat_in_this_group: goto out; got: - ino += group * EXT4_INODES_PER_GROUP(sb) + 1; - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); + ino++; + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || + ino > EXT4_INODES_PER_GROUP(sb)) { + ext4_error(sb, __FUNCTION__, + "reserved inode or inode > inodes count - " + "block_group = %d, inode=%lu", group, + ino + group * EXT4_INODES_PER_GROUP(sb)); err = -EIO; goto fail; } @@ -529,13 +592,78 @@ got: BUFFER_TRACE(bh2, "get_write_access"); err = ext4_journal_get_write_access(handle, bh2); if (err) goto fail; + + /* We may have to initialize the block bitmap if it isn't already */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + struct buffer_head *block_bh = read_block_bitmap(sb, group); + + BUFFER_TRACE(block_bh, "get block bitmap access"); + err = ext4_journal_get_write_access(handle, block_bh); + if (err) { + brelse(block_bh); + goto fail; + } + + free = 0; + spin_lock(sb_bgl_lock(sbi, group)); + /* recheck and clear flag under lock if we still need to */ + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + free = ext4_free_blocks_after_init(sb, group, gdp); + gdp->bg_free_blocks_count = cpu_to_le16(free); + } + spin_unlock(sb_bgl_lock(sbi, group)); + + /* Don't need to dirty bitmap block if we didn't change it */ + if (free) { + BUFFER_TRACE(block_bh, "dirty block bitmap"); + err = ext4_journal_dirty_metadata(handle, block_bh); + } + + brelse(block_bh); + if (err) + goto fail; + } + spin_lock(sb_bgl_lock(sbi, group)); + /* If we didn't allocate from within the initialized part of the inode + * table then we need to initialize up to this inode. */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + + /* When marking the block group with + * ~EXT4_BG_INODE_UNINIT we don't want to depend + * on the value of bg_itable_unsed even though + * mke2fs could have initialized the same for us. + * Instead we calculated the value below + */ + + free = 0; + } else { + free = EXT4_INODES_PER_GROUP(sb) - + le16_to_cpu(gdp->bg_itable_unused); + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + * + */ + if (ino > free) + gdp->bg_itable_unused = + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); + } + gdp->bg_free_inodes_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); if (S_ISDIR(mode)) { gdp->bg_used_dirs_count = cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); } + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); err = ext4_journal_dirty_metadata(handle, bh2); @@ -557,7 +685,7 @@ got: inode->i_gid = current->fsgid; inode->i_mode = mode; - inode->i_ino = ino; + inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = @@ -573,11 +701,6 @@ got: /* dirsync only applies to directories */ if (!S_ISDIR(mode)) ei->i_flags &= ~EXT4_DIRSYNC_FL; -#ifdef EXT4_FRAGMENTS - ei->i_faddr = 0; - ei->i_frag_no = 0; - ei->i_frag_size = 0; -#endif ei->i_file_acl = 0; ei->i_dir_acl = 0; ei->i_dtime = 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0df2b1e06d0b..5489703d9573 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, } if (buffer_new(&dummy)) { J_ASSERT(create != 0); - J_ASSERT(handle != 0); + J_ASSERT(handle != NULL); /* * Now that we do not always journal data, we should @@ -2711,11 +2711,6 @@ void ext4_read_inode(struct inode * inode) } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); ei->i_flags = le32_to_cpu(raw_inode->i_flags); -#ifdef EXT4_FRAGMENTS - ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); - ei->i_frag_no = raw_inode->i_frag; - ei->i_frag_size = raw_inode->i_fsize; -#endif ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) @@ -2860,11 +2855,6 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags); -#ifdef EXT4_FRAGMENTS - raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); - raw_inode->i_frag = ei->i_frag_no; - raw_inode->i_fsize = ei->i_frag_size; -#endif if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_HURD)) raw_inode->i_file_acl_high = @@ -3243,12 +3233,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) iloc, handle); if (ret) { EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; - if (mnt_count != sbi->s_es->s_mnt_count) { + if (mnt_count != + le16_to_cpu(sbi->s_es->s_mnt_count)) { ext4_warning(inode->i_sb, __FUNCTION__, "Unable to expand inode %lu. Delete" " some EAs or run e2fsck.", inode->i_ino); - mnt_count = sbi->s_es->s_mnt_count; + mnt_count = + le16_to_cpu(sbi->s_es->s_mnt_count); } } } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5fdb862e71c4..94ee6f315dc1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -144,7 +144,6 @@ struct dx_map_entry u16 size; }; -#ifdef CONFIG_EXT4_INDEX static inline unsigned dx_get_block (struct dx_entry *entry); static void dx_set_block (struct dx_entry *entry, unsigned value); static inline unsigned dx_get_hash (struct dx_entry *entry); @@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) dx_set_block(new, block); dx_set_count(entries, count + 1); } -#endif - static void ext4_update_dx_flag(struct inode *inode) { @@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, name = dentry->d_name.name; if (namelen > EXT4_NAME_LEN) return NULL; -#ifdef CONFIG_EXT4_INDEX if (is_dx(dir)) { bh = ext4_dx_find_entry(dentry, res_dir, &err); /* @@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, return bh; dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); } -#endif nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); start = EXT4_I(dir)->i_dir_start_lookup; if (start >= nblocks) @@ -957,7 +952,6 @@ cleanup_and_exit: return ret; } -#ifdef CONFIG_EXT4_INDEX static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, struct ext4_dir_entry_2 **res_dir, int *err) { @@ -1025,7 +1019,6 @@ errout: dx_release (frames); return NULL; } -#endif static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) { @@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb, de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; } -#ifdef CONFIG_EXT4_INDEX /* * Move count entries from end of map between two memory locations. * Returns pointer to last entry moved. @@ -1266,8 +1258,6 @@ errout: *error = err; return NULL; } -#endif - /* * Add a new entry into a directory (leaf) block. If de is non-NULL, @@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, return 0; } -#ifdef CONFIG_EXT4_INDEX /* * This converts a one block unindexed directory to a 3 block indexed * directory, and adds the dentry to the indexed directory. @@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, return add_dirent_to_buf(handle, dentry, inode, de, bh); } -#endif /* * ext4_add_entry() @@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct super_block * sb; int retval; -#ifdef CONFIG_EXT4_INDEX int dx_fallback=0; -#endif unsigned blocksize; u32 block, blocks; @@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; -#ifdef CONFIG_EXT4_INDEX if (is_dx(dir)) { retval = ext4_dx_add_entry(handle, dentry, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) @@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, dx_fallback++; ext4_mark_inode_dirty(handle, dir); } -#endif blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0, offset = 0; block < blocks; block++) { bh = ext4_bread(handle, dir, block, 0, &retval); @@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, if (retval != -ENOSPC) return retval; -#ifdef CONFIG_EXT4_INDEX if (blocks == 1 && !dx_fallback && EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) return make_indexed_dir(handle, dentry, inode, bh); -#endif brelse(bh); } bh = ext4_append(handle, dir, &block, &retval); @@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry, return add_dirent_to_buf(handle, dentry, inode, de, bh); } -#ifdef CONFIG_EXT4_INDEX /* * Returns 0 for success, or a negative error value */ @@ -1644,7 +1625,6 @@ cleanup: dx_release(frames); return err; } -#endif /* * ext4_delete_entry deletes a directory entry by merging it with the diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 472fc0d3e1c0..bd8a52bb3999 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -16,6 +16,7 @@ #include <linux/errno.h> #include <linux/slab.h> +#include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -140,22 +141,29 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, } /* - * To avoid calling the atomic setbit hundreds or thousands of times, we only - * need to use it within a single byte (to ensure we get endianness right). - * We can use memset for the rest of the bitmap as there are no other users. + * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA. + * If that fails, restart the transaction & regain write access for the + * buffer head which is used for block_bitmap modifications. */ -static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +static int extend_or_restart_transaction(handle_t *handle, int thresh, + struct buffer_head *bh) { - int i; + int err; + + if (handle->h_buffer_credits >= thresh) + return 0; - if (start_bit >= end_bit) - return; + err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); + if (err < 0) + return err; + if (err) { + if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) + return err; + if ((err = ext4_journal_get_write_access(handle, bh))) + return err; + } - ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); - for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) - ext4_set_bit(i, bitmap); - if (i < end_bit) - memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); + return 0; } /* @@ -180,8 +188,9 @@ static int setup_new_group_blocks(struct super_block *sb, int i; int err = 0, err2; - handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks + - 2 + sbi->s_itb_per_group); + /* This transaction may be extended/restarted along the way */ + handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); + if (IS_ERR(handle)) return PTR_ERR(handle); @@ -208,6 +217,9 @@ static int setup_new_group_blocks(struct super_block *sb, ext4_debug("update backup group %#04lx (+%d)\n", block, bit); + if ((err = extend_or_restart_transaction(handle, 1, bh))) + goto exit_bh; + gdb = sb_getblk(sb, block); if (!gdb) { err = -EIO; @@ -217,10 +229,10 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(gdb); goto exit_bh; } - lock_buffer(bh); - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size); + lock_buffer(gdb); + memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); - unlock_buffer(bh); + unlock_buffer(gdb); ext4_journal_dirty_metadata(handle, gdb); ext4_set_bit(bit, bh->b_data); brelse(gdb); @@ -233,6 +245,9 @@ static int setup_new_group_blocks(struct super_block *sb, ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); + if ((err = extend_or_restart_transaction(handle, 1, bh))) + goto exit_bh; + if (IS_ERR(gdb = bclean(handle, sb, block))) { err = PTR_ERR(bh); goto exit_bh; @@ -254,6 +269,10 @@ static int setup_new_group_blocks(struct super_block *sb, struct buffer_head *it; ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); + + if ((err = extend_or_restart_transaction(handle, 1, bh))) + goto exit_bh; + if (IS_ERR(it = bclean(handle, sb, block))) { err = PTR_ERR(it); goto exit_bh; @@ -262,6 +281,10 @@ static int setup_new_group_blocks(struct super_block *sb, brelse(it); ext4_set_bit(bit, bh->b_data); } + + if ((err = extend_or_restart_transaction(handle, 2, bh))) + goto exit_bh; + mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), bh->b_data); ext4_journal_dirty_metadata(handle, bh); @@ -289,7 +312,6 @@ exit_journal: return err; } - /* * Iterate through the groups which hold BACKUP superblock/GDT copies in an * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before @@ -842,6 +864,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); + gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* * Make the new blocks and inodes valid next. We do this before diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4c8d31c61454..b11e9e2bcd01 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,12 +37,14 @@ #include <linux/quotaops.h> #include <linux/seq_file.h> #include <linux/log2.h> +#include <linux/crc16.h> #include <asm/uaccess.h> #include "xattr.h" #include "acl.h" #include "namei.h" +#include "group.h" static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -68,31 +70,31 @@ static void ext4_write_super_lockfs(struct super_block *sb); ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { - return le32_to_cpu(bg->bg_block_bitmap) | + return le32_to_cpu(bg->bg_block_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { - return le32_to_cpu(bg->bg_inode_bitmap) | + return le32_to_cpu(bg->bg_inode_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_table(struct super_block *sb, struct ext4_group_desc *bg) { - return le32_to_cpu(bg->bg_inode_table) | + return le32_to_cpu(bg->bg_inode_table_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } void ext4_block_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk) { - bg->bg_block_bitmap = cpu_to_le32((u32)blk); + bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); } @@ -100,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb, void ext4_inode_bitmap_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk) { - bg->bg_inode_bitmap = cpu_to_le32((u32)blk); + bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); } @@ -108,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb, void ext4_inode_table_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk) { - bg->bg_inode_table = cpu_to_le32((u32)blk); + bg->bg_inode_table_lo = cpu_to_le32((u32)blk); if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); } @@ -1037,7 +1039,7 @@ static int parse_options (char *options, struct super_block *sb, if (option < 0) return 0; if (option == 0) - option = JBD_DEFAULT_MAX_COMMIT_AGE; + option = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * option; break; case Opt_data_journal: @@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, return res; } +__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + __u16 crc = 0; + + if (sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int offset = offsetof(struct ext4_group_desc, bg_checksum); + __le32 le_group = cpu_to_le32(block_group); + + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + /* for checksum of struct ext4_group_desc do the rest...*/ + if ((sbi->s_es->s_feature_incompat & + cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + offset < le16_to_cpu(sbi->s_es->s_desc_size)) + crc = crc16(crc, (__u8 *)gdp + offset, + le16_to_cpu(sbi->s_es->s_desc_size) - + offset); + } + + return cpu_to_le16(crc); +} + +int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, + struct ext4_group_desc *gdp) +{ + if ((sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && + (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) + return 0; + + return 1; +} + /* Called at mount-time, super-block is locked */ static int ext4_check_descriptors (struct super_block * sb) { @@ -1319,13 +1358,17 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_fsblk_t inode_table; struct ext4_group_desc * gdp = NULL; int desc_block = 0; + int flexbg_flag = 0; int i; + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + flexbg_flag = 1; + ext4_debug ("Checking group descriptors"); for (i = 0; i < sbi->s_groups_count; i++) { - if (i == sbi->s_groups_count - 1) + if (i == sbi->s_groups_count - 1 || flexbg_flag) last_block = ext4_blocks_count(sbi->s_es) - 1; else last_block = first_block + @@ -1362,7 +1405,16 @@ static int ext4_check_descriptors (struct super_block * sb) i, inode_table); return 0; } - first_block += EXT4_BLOCKS_PER_GROUP(sb); + if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { + ext4_error(sb, __FUNCTION__, + "Checksum for group %d failed (%u!=%u)\n", i, + le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), + le16_to_cpu(gdp->bg_checksum)); + return 0; + } + if (!flexbg_flag) + first_block += EXT4_BLOCKS_PER_GROUP(sb); gdp = (struct ext4_group_desc *) ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); } @@ -1726,14 +1778,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } - sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (blocksize != sbi->s_frag_size) { - printk(KERN_ERR - "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n", - sbi->s_frag_size, blocksize); - goto failed_mount; - } sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || @@ -1747,7 +1791,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } else sbi->s_desc_size = EXT4_MIN_DESC_SIZE; sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); if (EXT4_INODE_SIZE(sb) == 0) goto cantfind_ext4; @@ -1771,12 +1814,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group); goto failed_mount; } - if (sbi->s_frags_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #fragments per group too big: %lu\n", - sbi->s_frags_per_group); - goto failed_mount; - } if (sbi->s_inodes_per_group > blocksize * 8) { printk (KERN_ERR "EXT4-fs: #inodes per group too big: %lu\n", @@ -2630,7 +2667,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; - } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { + } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { unsigned long ngroups = sbi->s_groups_count, i; ext4_fsblk_t overhead = 0; smp_rmb(); @@ -2665,14 +2702,14 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) overhead += ngroups * (2 + sbi->s_itb_per_group); sbi->s_overhead_last = overhead; smp_wmb(); - sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); + sbi->s_blocks_last = ext4_blocks_count(es); } buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); - es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); + ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b10d68fffb55..86387302c2a9 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } } else { /* Allocate a buffer where we construct the new block. */ - s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); + s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) goto cleanup; - memset(s->base, 0, sb->s_blocksize); header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); header(s->base)->h_blocks = cpu_to_le32(1); header(s->base)->h_refcount = cpu_to_le32(1); @@ -1121,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int total_ino, total_blk; void *base, *start, *end; int extra_isize = 0, error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize; + int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); down_write(&EXT4_I(inode)->xattr_sem); retry: @@ -1293,7 +1292,7 @@ retry: i.name = b_entry_name; i.value = buffer; - i.value_len = cpu_to_le32(size); + i.value_len = size; error = ext4_xattr_block_find(inode, &i, bs); if (error) goto cleanup; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d1acab931330..3763757f9fe7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -63,13 +63,21 @@ static u64 time_to_jiffies(unsigned long sec, unsigned long nsec) * Set dentry and possibly attribute timeouts from the lookup/mk* * replies */ -static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) +static void fuse_change_entry_timeout(struct dentry *entry, + struct fuse_entry_out *o) { fuse_dentry_settime(entry, time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); - if (entry->d_inode) - get_fuse_inode(entry->d_inode)->i_time = - time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +static u64 attr_timeout(struct fuse_attr_out *o) +{ + return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +static u64 entry_attr_timeout(struct fuse_entry_out *o) +{ + return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); } /* @@ -108,13 +116,19 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, struct dentry *entry, struct fuse_entry_out *outarg) { + struct fuse_conn *fc = get_fuse_conn(dir); + + memset(outarg, 0, sizeof(struct fuse_entry_out)); req->in.h.opcode = FUSE_LOOKUP; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; req->out.numargs = 1; - req->out.args[0].size = sizeof(struct fuse_entry_out); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(struct fuse_entry_out); req->out.args[0].value = outarg; } @@ -140,6 +154,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) struct fuse_req *req; struct fuse_req *forget_req; struct dentry *parent; + u64 attr_version; /* For negative dentries, always do a fresh lookup */ if (!inode) @@ -156,6 +171,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) return 0; } + spin_lock(&fc->lock); + attr_version = fc->attr_version; + spin_unlock(&fc->lock); + parent = dget_parent(entry); fuse_lookup_init(req, parent->d_inode, entry, &outarg); request_send(fc, req); @@ -180,8 +199,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) return 0; - fuse_change_attributes(inode, &outarg.attr); - fuse_change_timeout(entry, &outarg); + fuse_change_attributes(inode, &outarg.attr, + entry_attr_timeout(&outarg), + attr_version); + fuse_change_entry_timeout(entry, &outarg); } return 1; } @@ -228,6 +249,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; + u64 attr_version; if (entry->d_name.len > FUSE_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -242,6 +264,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, return ERR_PTR(PTR_ERR(forget_req)); } + spin_lock(&fc->lock); + attr_version = fc->attr_version; + spin_unlock(&fc->lock); + fuse_lookup_init(req, dir, entry, &outarg); request_send(fc, req); err = req->out.h.error; @@ -253,7 +279,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, err = -EIO; if (!err && outarg.nodeid) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr); + &outarg.attr, entry_attr_timeout(&outarg), + attr_version); if (!inode) { fuse_send_forget(fc, forget_req, outarg.nodeid, 1); return ERR_PTR(-ENOMEM); @@ -276,7 +303,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, entry->d_op = &fuse_dentry_operations; if (!err) - fuse_change_timeout(entry, &outarg); + fuse_change_entry_timeout(entry, &outarg); else fuse_invalidate_entry_cache(entry); return NULL; @@ -335,6 +362,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); + memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; req->in.h.opcode = FUSE_CREATE; @@ -345,7 +373,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; req->out.numargs = 2; - req->out.args[0].size = sizeof(outentry); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(outentry); req->out.args[0].value = &outentry; req->out.args[1].size = sizeof(outopen); req->out.args[1].value = &outopen; @@ -363,7 +394,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_put_request(fc, req); inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, - &outentry.attr); + &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); ff->fh = outopen.fh; @@ -373,7 +404,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, } fuse_put_request(fc, forget_req); d_instantiate(entry, inode); - fuse_change_timeout(entry, &outentry); + fuse_change_entry_timeout(entry, &outentry); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; @@ -410,9 +441,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, return PTR_ERR(forget_req); } + memset(&outarg, 0, sizeof(outarg)); req->in.h.nodeid = get_node_id(dir); req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; + else + req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; @@ -428,7 +463,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, - &outarg.attr); + &outarg.attr, entry_attr_timeout(&outarg), 0); if (!inode) { fuse_send_forget(fc, forget_req, outarg.nodeid, 1); return -ENOMEM; @@ -451,7 +486,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } else d_instantiate(entry, inode); - fuse_change_timeout(entry, &outarg); + fuse_change_entry_timeout(entry, &outarg); fuse_invalidate_attr(dir); return 0; @@ -663,52 +698,84 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, return err; } -static int fuse_do_getattr(struct inode *inode) +static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + struct kstat *stat) +{ + stat->dev = inode->i_sb->s_dev; + stat->ino = attr->ino; + stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); + stat->nlink = attr->nlink; + stat->uid = attr->uid; + stat->gid = attr->gid; + stat->rdev = inode->i_rdev; + stat->atime.tv_sec = attr->atime; + stat->atime.tv_nsec = attr->atimensec; + stat->mtime.tv_sec = attr->mtime; + stat->mtime.tv_nsec = attr->mtimensec; + stat->ctime.tv_sec = attr->ctime; + stat->ctime.tv_nsec = attr->ctimensec; + stat->size = attr->size; + stat->blocks = attr->blocks; + stat->blksize = (1 << inode->i_blkbits); +} + +static int fuse_do_getattr(struct inode *inode, struct kstat *stat, + struct file *file) { int err; - struct fuse_attr_out arg; + struct fuse_getattr_in inarg; + struct fuse_attr_out outarg; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_req(fc); + struct fuse_req *req; + u64 attr_version; + + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); + spin_lock(&fc->lock); + attr_version = fc->attr_version; + spin_unlock(&fc->lock); + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); + /* Directories have separate file-handle space */ + if (file && S_ISREG(inode->i_mode)) { + struct fuse_file *ff = file->private_data; + + inarg.getattr_flags |= FUSE_GETATTR_FH; + inarg.fh = ff->fh; + } req->in.h.opcode = FUSE_GETATTR; req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; req->out.numargs = 1; - req->out.args[0].size = sizeof(arg); - req->out.args[0].value = &arg; + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + else + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (!err) { - if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) { + if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; } else { - struct fuse_inode *fi = get_fuse_inode(inode); - fuse_change_attributes(inode, &arg.attr); - fi->i_time = time_to_jiffies(arg.attr_valid, - arg.attr_valid_nsec); + fuse_change_attributes(inode, &outarg.attr, + attr_timeout(&outarg), + attr_version); + if (stat) + fuse_fillattr(inode, &outarg.attr, stat); } } return err; } /* - * Check if attributes are still valid, and if not send a GETATTR - * request to refresh them. - */ -static int fuse_refresh_attributes(struct inode *inode) -{ - struct fuse_inode *fi = get_fuse_inode(inode); - - if (fi->i_time < get_jiffies_64()) - return fuse_do_getattr(inode); - else - return 0; -} - -/* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This * means, that the filesystem daemon is able to record the exact @@ -721,7 +788,7 @@ static int fuse_refresh_attributes(struct inode *inode) * for which the owner of the mount has ptrace privilege. This * excludes processes started by other users, suid or sgid processes. */ -static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) +int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) { if (fc->flags & FUSE_ALLOW_OTHER) return 1; @@ -795,11 +862,14 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) */ if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { - err = fuse_refresh_attributes(inode); - if (err) - return err; + struct fuse_inode *fi = get_fuse_inode(inode); + if (fi->i_time < get_jiffies_64()) { + err = fuse_do_getattr(inode, NULL, NULL); + if (err) + return err; - refreshed = true; + refreshed = true; + } } if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { @@ -809,7 +879,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) attributes. This is also needed, because the root node will at first have no permissions */ if (err == -EACCES && !refreshed) { - err = fuse_do_getattr(inode); + err = fuse_do_getattr(inode, NULL, NULL); if (!err) err = generic_permission(inode, mask, NULL); } @@ -825,7 +895,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) if (refreshed) return -EACCES; - err = fuse_do_getattr(inode); + err = fuse_do_getattr(inode, NULL, NULL); if (!err && !(inode->i_mode & S_IXUGO)) return -EACCES; } @@ -962,6 +1032,20 @@ static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) return file ? fuse_fsync_common(file, de, datasync, 1) : 0; } +static bool update_mtime(unsigned ivalid) +{ + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ + if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) + return false; + + /* In all other cases update */ + return true; +} + static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) { unsigned ivalid = iattr->ia_valid; @@ -974,16 +1058,19 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; - /* You can only _set_ these together (they may change by themselves) */ - if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { - arg->valid |= FATTR_ATIME | FATTR_MTIME; + if (ivalid & ATTR_ATIME) { + arg->valid |= FATTR_ATIME; arg->atime = iattr->ia_atime.tv_sec; - arg->mtime = iattr->ia_mtime.tv_sec; + arg->atimensec = iattr->ia_atime.tv_nsec; + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; } - if (ivalid & ATTR_FILE) { - struct fuse_file *ff = iattr->ia_file->private_data; - arg->valid |= FATTR_FH; - arg->fh = ff->fh; + if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { + arg->valid |= FATTR_MTIME; + arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; + if (!(ivalid & ATTR_MTIME_SET)) + arg->valid |= FATTR_MTIME_NOW; } } @@ -995,22 +1082,28 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ -static int fuse_setattr(struct dentry *entry, struct iattr *attr) +static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, + struct file *file) { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; int err; + if (!fuse_allow_task(fc, current)) + return -EACCES; + if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { err = inode_change_ok(inode, attr); if (err) return err; } + if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) + return 0; + if (attr->ia_valid & ATTR_SIZE) { unsigned long limit; if (IS_SWAPFILE(inode)) @@ -1027,14 +1120,28 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); + if (file) { + struct fuse_file *ff = file->private_data; + inarg.valid |= FATTR_FH; + inarg.fh = ff->fh; + } + if (attr->ia_valid & ATTR_SIZE) { + /* For mandatory locking in truncate */ + inarg.valid |= FATTR_LOCKOWNER; + inarg.lock_owner = fuse_lock_owner_id(fc, current->files); + } req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + else + req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; @@ -1050,11 +1157,18 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) return -EIO; } - fuse_change_attributes(inode, &outarg.attr); - fi->i_time = time_to_jiffies(outarg.attr_valid, outarg.attr_valid_nsec); + fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0); return 0; } +static int fuse_setattr(struct dentry *entry, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_FILE) + return fuse_do_setattr(entry, attr, attr->ia_file); + else + return fuse_do_setattr(entry, attr, NULL); +} + static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, struct kstat *stat) { @@ -1066,8 +1180,10 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, if (!fuse_allow_task(fc, current)) return -EACCES; - err = fuse_refresh_attributes(inode); - if (!err) { + if (fi->i_time < get_jiffies_64()) + err = fuse_do_getattr(inode, stat, NULL); + else { + err = 0; generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; } @@ -1172,6 +1288,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (!fuse_allow_task(fc, current)) + return -EACCES; + if (fc->no_listxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c4b98c03a46e..0fcdba9d47c0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -28,7 +28,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); - inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); + if (!fc->atomic_o_trunc) + inarg.flags &= ~O_TRUNC; req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; @@ -54,6 +56,7 @@ struct fuse_file *fuse_file_alloc(void) kfree(ff); ff = NULL; } + INIT_LIST_HEAD(&ff->write_entry); atomic_set(&ff->count, 0); } return ff; @@ -148,12 +151,18 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) { struct fuse_file *ff = file->private_data; if (ff) { + struct fuse_conn *fc = get_fuse_conn(inode); + fuse_release_fill(ff, get_node_id(inode), file->f_flags, isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); /* Hold vfsmount and dentry until release is finished */ ff->reserved_req->vfsmount = mntget(file->f_path.mnt); ff->reserved_req->dentry = dget(file->f_path.dentry); + + spin_lock(&fc->lock); + list_del(&ff->write_entry); + spin_unlock(&fc->lock); /* * Normally this will send the RELEASE request, * however if some asynchronous READ or WRITE requests @@ -180,7 +189,7 @@ static int fuse_release(struct inode *inode, struct file *file) * Scramble the ID space with XTEA, so that the value of the files_struct * pointer is not exposed to userspace. */ -static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) +u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) { u32 *k = fc->scramble_key; u64 v = (unsigned long) id; @@ -299,11 +308,19 @@ void fuse_read_fill(struct fuse_req *req, struct fuse_file *ff, } static size_t fuse_send_read(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count) + struct inode *inode, loff_t pos, size_t count, + fl_owner_t owner) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; + fuse_read_fill(req, ff, inode, pos, count, FUSE_READ); + if (owner != NULL) { + struct fuse_read_in *inarg = &req->misc.read_in; + + inarg->read_flags |= FUSE_READ_LOCKOWNER; + inarg->lock_owner = fuse_lock_owner_id(fc, owner); + } request_send(fc, req); return req->out.args[0].size; } @@ -327,7 +344,8 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.page_zeroing = 1; req->num_pages = 1; req->pages[0] = page; - fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE); + fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, + NULL); err = req->out.h.error; fuse_put_request(fc, req); if (!err) @@ -434,30 +452,47 @@ out: return err; } -static size_t fuse_send_write(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count) +static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, + struct inode *inode, loff_t pos, size_t count, + int writepage) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_file *ff = file->private_data; - struct fuse_write_in inarg; - struct fuse_write_out outarg; + struct fuse_write_in *inarg = &req->misc.write.in; + struct fuse_write_out *outarg = &req->misc.write.out; - memset(&inarg, 0, sizeof(struct fuse_write_in)); - inarg.fh = ff->fh; - inarg.offset = pos; - inarg.size = count; + memset(inarg, 0, sizeof(struct fuse_write_in)); + inarg->fh = ff->fh; + inarg->offset = pos; + inarg->size = count; + inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); req->in.argpages = 1; req->in.numargs = 2; - req->in.args[0].size = sizeof(struct fuse_write_in); - req->in.args[0].value = &inarg; + if (fc->minor < 9) + req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; + else + req->in.args[0].size = sizeof(struct fuse_write_in); + req->in.args[0].value = inarg; req->in.args[1].size = count; req->out.numargs = 1; req->out.args[0].size = sizeof(struct fuse_write_out); - req->out.args[0].value = &outarg; + req->out.args[0].value = outarg; +} + +static size_t fuse_send_write(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count, + fl_owner_t owner) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + fuse_write_fill(req, file->private_data, inode, pos, count, 0); + if (owner != NULL) { + struct fuse_write_in *inarg = &req->misc.write.in; + inarg->write_flags |= FUSE_WRITE_LOCKOWNER; + inarg->lock_owner = fuse_lock_owner_id(fc, owner); + } request_send(fc, req); - return outarg.size; + return req->misc.write.out.size; } static int fuse_write_begin(struct file *file, struct address_space *mapping, @@ -478,6 +513,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, int err; size_t nres; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); unsigned offset = pos & (PAGE_CACHE_SIZE - 1); struct fuse_req *req; @@ -491,7 +527,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, req->num_pages = 1; req->pages[0] = page; req->page_offset = offset; - nres = fuse_send_write(req, file, inode, pos, count); + nres = fuse_send_write(req, file, inode, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); if (!err && !nres) @@ -499,6 +535,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, if (!err) { pos += nres; spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; if (pos > inode->i_size) i_size_write(inode, pos); spin_unlock(&fc->lock); @@ -591,9 +628,11 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; nbytes = min(count, nbytes); if (write) - nres = fuse_send_write(req, file, inode, pos, nbytes); + nres = fuse_send_write(req, file, inode, pos, nbytes, + current->files); else - nres = fuse_send_read(req, file, inode, pos, nbytes); + nres = fuse_send_read(req, file, inode, pos, nbytes, + current->files); fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) @@ -695,7 +734,8 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, } static void fuse_lk_fill(struct fuse_req *req, struct file *file, - const struct file_lock *fl, int opcode, pid_t pid) + const struct file_lock *fl, int opcode, pid_t pid, + int flock) { struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); @@ -708,6 +748,8 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file, arg->lk.end = fl->fl_end; arg->lk.type = fl->fl_type; arg->lk.pid = pid; + if (flock) + arg->lk_flags |= FUSE_LK_FLOCK; req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; @@ -727,7 +769,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) if (IS_ERR(req)) return PTR_ERR(req); - fuse_lk_fill(req, file, fl, FUSE_GETLK, 0); + fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0); req->out.numargs = 1; req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; @@ -740,7 +782,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) return err; } -static int fuse_setlk(struct file *file, struct file_lock *fl) +static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) { struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); @@ -757,7 +799,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl) if (IS_ERR(req)) return PTR_ERR(req); - fuse_lk_fill(req, file, fl, opcode, pid); + fuse_lk_fill(req, file, fl, opcode, pid, flock); request_send(fc, req); err = req->out.h.error; /* locking is restartable */ @@ -783,8 +825,25 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) if (fc->no_lock) err = posix_lock_file_wait(file, fl); else - err = fuse_setlk(file, fl); + err = fuse_setlk(file, fl, 0); + } + return err; +} + +static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + int err; + + if (fc->no_lock) { + err = flock_lock_file_wait(file, fl); + } else { + /* emulate flock with POSIX locks */ + fl->fl_owner = (fl_owner_t) file; + err = fuse_setlk(file, fl, 1); } + return err; } @@ -836,6 +895,7 @@ static const struct file_operations fuse_file_operations = { .release = fuse_release, .fsync = fuse_fsync, .lock = fuse_file_lock, + .flock = fuse_file_flock, .splice_read = generic_file_splice_read, }; @@ -848,6 +908,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .release = fuse_release, .fsync = fuse_fsync, .lock = fuse_file_lock, + .flock = fuse_file_flock, /* no mmap and splice_read */ }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1764506fdd11..6c5461de1a5f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -67,6 +67,12 @@ struct fuse_inode { /** The sticky bit in inode->i_mode may have been removed, so preserve the original mode */ mode_t orig_i_mode; + + /** Version of last attribute change */ + u64 attr_version; + + /** Files usable in writepage. Protected by fc->lock */ + struct list_head write_files; }; /** FUSE specific file data */ @@ -79,6 +85,9 @@ struct fuse_file { /** Refcount */ atomic_t count; + + /** Entry on inode's write_files list */ + struct list_head write_entry; }; /** One input argument of a request */ @@ -210,6 +219,10 @@ struct fuse_req { struct fuse_init_in init_in; struct fuse_init_out init_out; struct fuse_read_in read_in; + struct { + struct fuse_write_in in; + struct fuse_write_out out; + } write; struct fuse_lk_in lk_in; } misc; @@ -317,6 +330,9 @@ struct fuse_conn { /** Do readpages asynchronously? Only set in INIT */ unsigned async_read : 1; + /** Do not send separate SETATTR request before open(O_TRUNC) */ + unsigned atomic_o_trunc : 1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -387,6 +403,9 @@ struct fuse_conn { /** Reserved request for the DESTROY message */ struct fuse_req *destroy_req; + + /** Version counter for attribute changes */ + u64 attr_version; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -416,7 +435,8 @@ extern const struct file_operations fuse_dev_operations; * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, - int generation, struct fuse_attr *attr); + int generation, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version); /** * Send FORGET command @@ -477,7 +497,8 @@ void fuse_init_symlink(struct inode *inode); /** * Change attributes of an inode */ -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version); /** * Initialize the client device @@ -565,3 +586,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc); * Is file type valid? */ int fuse_valid_type(int m); + +/** + * Is task allowed to perform filesystem operation? + */ +int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); + +u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fd0735715c14..9a68d6970845 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -56,6 +56,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->i_time = 0; fi->nodeid = 0; fi->nlookup = 0; + INIT_LIST_HEAD(&fi->write_files); fi->forget_req = fuse_request_alloc(); if (!fi->forget_req) { kmem_cache_free(fuse_inode_cachep, inode); @@ -68,6 +69,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) static void fuse_destroy_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); + BUG_ON(!list_empty(&fi->write_files)); if (fi->forget_req) fuse_request_free(fi->forget_req); kmem_cache_free(fuse_inode_cachep, inode); @@ -117,12 +119,22 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset) unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); } -void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) + +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; + spin_lock(&fc->lock); + if (attr_version != 0 && fi->attr_version > attr_version) { + spin_unlock(&fc->lock); + return; + } + fi->attr_version = ++fc->attr_version; + fi->i_time = attr_valid; + inode->i_ino = attr->ino; inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); inode->i_nlink = attr->nlink; @@ -136,6 +148,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_nsec = attr->ctimensec; + if (attr->blksize != 0) + inode->i_blkbits = ilog2(attr->blksize); + else + inode->i_blkbits = inode->i_sb->s_blocksize_bits; + /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the @@ -145,7 +162,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; - spin_lock(&fc->lock); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); @@ -194,7 +210,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp) } struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, - int generation, struct fuse_attr *attr) + int generation, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version) { struct inode *inode; struct fuse_inode *fi; @@ -222,7 +239,8 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, spin_lock(&fc->lock); fi->nlookup ++; spin_unlock(&fc->lock); - fuse_change_attributes(inode, attr); + fuse_change_attributes(inode, attr, attr_valid, attr_version); + return inode; } @@ -287,6 +305,11 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) struct fuse_statfs_out outarg; int err; + if (!fuse_allow_task(fc, current)) { + buf->f_type = FUSE_SUPER_MAGIC; + return 0; + } + req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -452,6 +475,7 @@ static struct fuse_conn *new_conn(void) } fc->reqctr = 0; fc->blocked = 1; + fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } out: @@ -483,7 +507,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) attr.mode = mode; attr.ino = FUSE_ROOT_ID; attr.nlink = 1; - return fuse_iget(sb, 1, 0, &attr); + return fuse_iget(sb, 1, 0, &attr, 0, 0); } static const struct super_operations fuse_super_operations = { @@ -514,6 +538,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_read = 1; if (!(arg->flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; + if (arg->flags & FUSE_ATOMIC_O_TRUNC) + fc->atomic_o_trunc = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -536,7 +562,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_FILE_OPS | + FUSE_ATOMIC_O_TRUNC; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a003d50edcdb..a263d82761df 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal) struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -792,14 +792,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - jbd_slab_free(jh->b_frozen_data, bh->b_size); + jbd_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index a6be78c05dce..5d9fec0b7ebd 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); -static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -218,7 +217,7 @@ static int journal_start_thread(journal_t *journal) if (IS_ERR(t)) return PTR_ERR(t); - wait_event(journal->j_wait_done_commit, journal->j_task != 0); + wait_event(journal->j_wait_done_commit, journal->j_task != NULL); return 0; } @@ -230,7 +229,8 @@ static void journal_kill_thread(journal_t *journal) while (journal->j_task) { wake_up(&journal->j_wait_commit); spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_done_commit, journal->j_task == 0); + wait_event(journal->j_wait_done_commit, + journal->j_task == NULL); spin_lock(&journal->j_state_lock); } spin_unlock(&journal->j_state_lock); @@ -334,10 +334,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - jbd_slab_free(tmp, bh_in->b_size); + jbd_free(tmp, bh_in->b_size); goto repeat; } @@ -654,7 +654,7 @@ static journal_t * journal_init_common (void) journal_t *journal; int err; - journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); + journal = kmalloc(sizeof(*journal), GFP_KERNEL); if (!journal) goto fail; memset(journal, 0, sizeof(*journal)); @@ -1095,13 +1095,6 @@ int journal_load(journal_t *journal) } } - /* - * Create a slab for this blocksize - */ - err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); - if (err) - return err; - /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1615,86 +1608,6 @@ int journal_blocks_per_page(struct inode *inode) } /* - * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. - */ -void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) -{ - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); -} - -/* - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed - * and allocate frozen and commit buffers from these slabs. - * - * Reason for doing this is to avoid, SLAB_DEBUG - since it could - * cause bh to cross page boundary. - */ - -#define JBD_MAX_SLABS 5 -#define JBD_SLAB_INDEX(size) (size >> 11) - -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; -static const char *jbd_slab_names[JBD_MAX_SLABS] = { - "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" -}; - -static void journal_destroy_jbd_slabs(void) -{ - int i; - - for (i = 0; i < JBD_MAX_SLABS; i++) { - if (jbd_slab[i]) - kmem_cache_destroy(jbd_slab[i]); - jbd_slab[i] = NULL; - } -} - -static int journal_create_jbd_slab(size_t slab_size) -{ - int i = JBD_SLAB_INDEX(slab_size); - - BUG_ON(i >= JBD_MAX_SLABS); - - /* - * Check if we already have a slab created for this size - */ - if (jbd_slab[i]) - return 0; - - /* - * Create a slab and force alignment to be same as slabsize - - * this will make sure that allocations won't cross the page - * boundary. - */ - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL); - if (!jbd_slab[i]) { - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; - } - return 0; -} - -void * jbd_slab_alloc(size_t size, gfp_t flags) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); -} - -void jbd_slab_free(void *ptr, size_t size) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - kmem_cache_free(jbd_slab[idx], ptr); -} - -/* * Journal_head storage management */ static struct kmem_cache *journal_head_cache; @@ -1739,14 +1652,14 @@ static struct journal_head *journal_alloc_journal_head(void) atomic_inc(&nr_journal_heads); #endif ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); - if (ret == 0) { + if (ret == NULL) { jbd_debug(1, "out of memory for journal_head\n"); if (time_after(jiffies, last_warning + 5*HZ)) { printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", __FUNCTION__); last_warning = jiffies; } - while (ret == 0) { + while (ret == NULL) { yield(); ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); } @@ -1881,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - jbd_slab_free(jh->b_frozen_data, bh->b_size); + jbd_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -2042,7 +1955,6 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); - journal_destroy_jbd_slabs(); } static int __init journal_init(void) diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 8df5bac0b7a5..9841b1e5af03 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = jbd_kmalloc(sizeof(*new_transaction), - GFP_NOFS); + new_transaction = kmalloc(sizeof(*new_transaction), + GFP_NOFS|__GFP_NOFAIL); if (!new_transaction) { ret = -ENOMEM; goto out; @@ -675,7 +675,7 @@ repeat: JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = - jbd_slab_alloc(jh2bh(jh)->b_size, + jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG @@ -735,7 +735,7 @@ done: out: if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd_slab_free(frozen_buffer, bh->b_size); + jbd_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -888,7 +888,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) repeat: if (!jh->b_committed_data) { - committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -915,7 +915,7 @@ repeat: out: journal_put_journal_head(jh); if (unlikely(committed_data)) - jbd_slab_free(committed_data, bh->b_size); + jbd_free(committed_data, bh->b_size); return err; } @@ -1172,7 +1172,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) } /* That test should have eliminated the following case: */ - J_ASSERT_JH(jh, jh->b_frozen_data == 0); + J_ASSERT_JH(jh, jh->b_frozen_data == NULL); JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); @@ -1522,7 +1522,7 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); if (jh->b_jlist != BJ_None) - J_ASSERT_JH(jh, transaction != 0); + J_ASSERT_JH(jh, transaction != NULL); switch (jh->b_jlist) { case BJ_None: @@ -1591,11 +1591,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (buffer_locked(bh) || buffer_dirty(bh)) goto out; - if (jh->b_next_transaction != 0) + if (jh->b_next_transaction != NULL) goto out; spin_lock(&journal->j_list_lock); - if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { + if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { /* A written-back ordered data buffer */ JBUFFER_TRACE(jh, "release data"); @@ -1603,7 +1603,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) journal_remove_journal_head(bh); __brelse(bh); } - } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { + } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { /* written-back checkpointed metadata buffer */ if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); @@ -1963,7 +1963,7 @@ void __journal_file_buffer(struct journal_head *jh, J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_transaction == transaction || - jh->b_transaction == 0); + jh->b_transaction == NULL); if (jh->b_transaction && jh->b_jlist == jlist) return; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c0f59d1b13dc..6986f334c643 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -278,7 +278,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (tag_bytes > JBD_TAG_SIZE32) + if (tag_bytes > JBD2_TAG_SIZE32) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } @@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_unlock(&journal->j_list_lock); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); jbd2_journal_write_revoke_records(journal, commit_transaction); @@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) { - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, -EIO); continue; } @@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) and repeat this loop: we'll fall into the refile-on-abort condition above. */ if (err) { - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); continue; } @@ -757,7 +757,7 @@ wait_for_iobuf: err = -EIO; if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); /* End of a transaction! Finally, we can do checkpoint processing: any buffers committed as a result of this @@ -801,14 +801,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - jbd2_slab_free(jh->b_frozen_data, bh->b_size); + jbd2_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f37324aee817..6ddc5531587c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); -static int jbd2_journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -335,10 +334,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS); + tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - jbd2_slab_free(tmp, bh_in->b_size); + jbd2_free(tmp, bh_in->b_size); goto repeat; } @@ -655,10 +654,9 @@ static journal_t * journal_init_common (void) journal_t *journal; int err; - journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL); + journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); if (!journal) goto fail; - memset(journal, 0, sizeof(*journal)); init_waitqueue_head(&journal->j_wait_transaction_locked); init_waitqueue_head(&journal->j_wait_logspace); @@ -672,7 +670,7 @@ static journal_t * journal_init_common (void) spin_lock_init(&journal->j_list_lock); spin_lock_init(&journal->j_state_lock); - journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); + journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; @@ -1096,13 +1094,6 @@ int jbd2_journal_load(journal_t *journal) } } - /* - * Create a slab for this blocksize - */ - err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); - if (err) - return err; - /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (jbd2_journal_recover(journal)) @@ -1621,89 +1612,9 @@ int jbd2_journal_blocks_per_page(struct inode *inode) size_t journal_tag_bytes(journal_t *journal) { if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return JBD_TAG_SIZE64; + return JBD2_TAG_SIZE64; else - return JBD_TAG_SIZE32; -} - -/* - * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. - */ -void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) -{ - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); -} - -/* - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed - * and allocate frozen and commit buffers from these slabs. - * - * Reason for doing this is to avoid, SLAB_DEBUG - since it could - * cause bh to cross page boundary. - */ - -#define JBD_MAX_SLABS 5 -#define JBD_SLAB_INDEX(size) (size >> 11) - -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; -static const char *jbd_slab_names[JBD_MAX_SLABS] = { - "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" -}; - -static void jbd2_journal_destroy_jbd_slabs(void) -{ - int i; - - for (i = 0; i < JBD_MAX_SLABS; i++) { - if (jbd_slab[i]) - kmem_cache_destroy(jbd_slab[i]); - jbd_slab[i] = NULL; - } -} - -static int jbd2_journal_create_jbd_slab(size_t slab_size) -{ - int i = JBD_SLAB_INDEX(slab_size); - - BUG_ON(i >= JBD_MAX_SLABS); - - /* - * Check if we already have a slab created for this size - */ - if (jbd_slab[i]) - return 0; - - /* - * Create a slab and force alignment to be same as slabsize - - * this will make sure that allocations won't cross the page - * boundary. - */ - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL); - if (!jbd_slab[i]) { - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; - } - return 0; -} - -void * jbd2_slab_alloc(size_t size, gfp_t flags) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); -} - -void jbd2_slab_free(void *ptr, size_t size) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - kmem_cache_free(jbd_slab[idx], ptr); + return JBD2_TAG_SIZE32; } /* @@ -1770,7 +1681,7 @@ static void journal_free_journal_head(struct journal_head *jh) { #ifdef CONFIG_JBD2_DEBUG atomic_dec(&nr_journal_heads); - memset(jh, JBD_POISON_FREE, sizeof(*jh)); + memset(jh, JBD2_POISON_FREE, sizeof(*jh)); #endif kmem_cache_free(jbd2_journal_head_cache, jh); } @@ -1893,13 +1804,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - jbd2_slab_free(jh->b_frozen_data, bh->b_size); + jbd2_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1953,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) /* * debugfs tunables */ -#if defined(CONFIG_JBD2_DEBUG) -u8 jbd2_journal_enable_debug; +#ifdef CONFIG_JBD2_DEBUG +u8 jbd2_journal_enable_debug __read_mostly; EXPORT_SYMBOL(jbd2_journal_enable_debug); -#endif - -#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS) #define JBD2_DEBUG_NAME "jbd2-debug" -struct dentry *jbd2_debugfs_dir, *jbd2_debug; +static struct dentry *jbd2_debugfs_dir; +static struct dentry *jbd2_debug; static void __init jbd2_create_debugfs_entry(void) { @@ -1975,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void) static void __exit jbd2_remove_debugfs_entry(void) { - if (jbd2_debug) - debugfs_remove(jbd2_debug); - if (jbd2_debugfs_dir) - debugfs_remove(jbd2_debugfs_dir); + debugfs_remove(jbd2_debug); + debugfs_remove(jbd2_debugfs_dir); } #else static void __init jbd2_create_debugfs_entry(void) { - do { - } while (0); } static void __exit jbd2_remove_debugfs_entry(void) { - do { - } while (0); } #endif @@ -2040,7 +1943,6 @@ static void jbd2_journal_destroy_caches(void) jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_handle_cache(); - jbd2_journal_destroy_jbd_slabs(); } static int __init journal_init(void) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index b50be8a044eb..d0ce627539ef 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -311,7 +311,7 @@ int jbd2_journal_skip_recovery(journal_t *journal) static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) { unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JBD_TAG_SIZE32) + if (tag_bytes > JBD2_TAG_SIZE32) block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; return block; } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 01d88975e0c5..3595fd432d5b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -352,7 +352,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, if (bh) BUFFER_TRACE(bh, "found on hash"); } -#ifdef JBD_EXPENSIVE_CHECKING +#ifdef JBD2_EXPENSIVE_CHECKING else { struct buffer_head *bh2; @@ -453,7 +453,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) } } -#ifdef JBD_EXPENSIVE_CHECKING +#ifdef JBD2_EXPENSIVE_CHECKING /* There better not be one left behind by now! */ record = find_revoke_record(journal, bh->b_blocknr); J_ASSERT_JH(jh, record == NULL); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7946ff43fc40..b1fcf2b3dca3 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = jbd_kmalloc(sizeof(*new_transaction), - GFP_NOFS); + new_transaction = kzalloc(sizeof(*new_transaction), + GFP_NOFS|__GFP_NOFAIL); if (!new_transaction) { ret = -ENOMEM; goto out; } - memset(new_transaction, 0, sizeof(*new_transaction)); } jbd_debug(3, "New handle %p going live.\n", handle); @@ -236,7 +235,7 @@ out: /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { - handle_t *handle = jbd_alloc_handle(GFP_NOFS); + handle_t *handle = jbd2_alloc_handle(GFP_NOFS); if (!handle) return NULL; memset(handle, 0, sizeof(*handle)); @@ -282,7 +281,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) err = start_this_handle(journal, handle); if (err < 0) { - jbd_free_handle(handle); + jbd2_free_handle(handle); current->journal_info = NULL; handle = ERR_PTR(err); } @@ -668,7 +667,7 @@ repeat: JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = - jbd2_slab_alloc(jh2bh(jh)->b_size, + jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG @@ -728,7 +727,7 @@ done: out: if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd2_slab_free(frozen_buffer, bh->b_size); + jbd2_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -881,7 +880,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) repeat: if (!jh->b_committed_data) { - committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -908,7 +907,7 @@ repeat: out: jbd2_journal_put_journal_head(jh); if (unlikely(committed_data)) - jbd2_slab_free(committed_data, bh->b_size); + jbd2_free(committed_data, bh->b_size); return err; } @@ -1411,7 +1410,7 @@ int jbd2_journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - jbd_free_handle(handle); + jbd2_free_handle(handle); return err; } diff --git a/fs/namei.c b/fs/namei.c index 464eeccb675b..1e5c71669164 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1659,8 +1659,10 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) error = locks_verify_locked(inode); if (!error) { DQUOT_INIT(inode); - - error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); + + error = do_truncate(dentry, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, + NULL); } put_write_access(inode); if (error) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6c22453d77ae..6d2f2a3eccf8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -357,6 +357,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + /* skip mode change if it's just for clearing setuid/setgid */ + if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + attr->ia_valid &= ~ATTR_MODE; + if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) attr->ia_valid &= ~ATTR_SIZE; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 819545d21670..46934c97f8f7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -364,14 +364,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; imode = iap->ia_mode |= (imode & ~S_IALLUGO); + /* if changing uid/gid revoke setuid/setgid in mode */ + if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { + iap->ia_valid |= ATTR_KILL_PRIV; + iap->ia_mode &= ~S_ISUID; + } + if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) + iap->ia_mode &= ~S_ISGID; + } else { + /* + * Revoke setuid/setgid bit on chown/chgrp + */ + if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) + iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; + if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) + iap->ia_valid |= ATTR_KILL_SGID; } - /* Revoke setuid/setgid bit on chown/chgrp */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) - iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_valid |= ATTR_KILL_SGID; - /* Change the attributes. */ iap->ia_valid |= ATTR_CTIME; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index e7905816c4ca..64965e1c21c4 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -111,7 +111,7 @@ utf8_wctomb(__u8 *s, wchar_t wc, int maxlen) int c, nc; const struct utf8_table *t; - if (s == 0) + if (!s) return 0; l = wc; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9ea12004fa57..0804289d355d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3061,7 +3061,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; int error; - unsigned int ia_valid = attr->ia_valid; + unsigned int ia_valid; + + /* must be turned off for recursive notify_change calls */ + ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); + reiserfs_write_lock(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check |