summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/aio.c16
-rw-r--r--fs/attr.c26
-rw-r--r--fs/autofs/waitq.c2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/cifs/inode.c5
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/cramfs/inode.c9
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext3/fsync.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/resize.c6
-rw-r--r--fs/ext3/super.c6
-rw-r--r--fs/ext4/balloc.c112
-rw-r--r--fs/ext4/dir.c7
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/group.h27
-rw-r--r--fs/ext4/ialloc.c151
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/namei.c20
-rw-r--r--fs/ext4/resize.c59
-rw-r--r--fs/ext4/super.c97
-rw-r--r--fs/ext4/xattr.c7
-rw-r--r--fs/fuse/dir.c241
-rw-r--r--fs/fuse/file.c111
-rw-r--r--fs/fuse/fuse_i.h32
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/journal.c108
-rw-r--r--fs/jbd/transaction.c24
-rw-r--r--fs/jbd2/commit.c16
-rw-r--r--fs/jbd2/journal.c128
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/revoke.c4
-rw-r--r--fs/jbd2/transaction.c19
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nls/nls_base.c2
-rw-r--r--fs/reiserfs/inode.c6
42 files changed, 864 insertions, 508 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index d8062745716a..e31f3691b151 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,7 @@ config EXT4DEV_FS
tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
depends on EXPERIMENTAL
select JBD2
+ select CRC16
help
Ext4dev is a predecessor filesystem of the next generation
extended fs ext4, based on ext3 filesystem code. It will be
diff --git a/fs/aio.c b/fs/aio.c
index d02f43b50a3d..f12db415c0f6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -710,18 +710,9 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
/*
* Now we are all set to call the retry method in async
- * context. By setting this thread's io_wait context
- * to point to the wait queue entry inside the currently
- * running iocb for the duration of the retry, we ensure
- * that async notification wakeups are queued by the
- * operation instead of blocking waits, and when notified,
- * cause the iocb to be kicked for continuation (through
- * the aio_wake_function callback).
+ * context.
*/
- BUG_ON(current->io_wait != NULL);
- current->io_wait = &iocb->ki_wait;
ret = retry(iocb);
- current->io_wait = NULL;
if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
BUG_ON(!list_empty(&iocb->ki_wait.task_list));
@@ -1508,10 +1499,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
* Simply triggers a retry of the operation via kick_iocb.
*
* This callback is specified in the wait queue entry in
- * a kiocb (current->io_wait points to this wait queue
- * entry when an aio operation executes; it is used
- * instead of a synchronous wait when an i/o blocking
- * condition is encountered during aio).
+ * a kiocb.
*
* Note:
* This routine is executed with the wait queue lock held.
diff --git a/fs/attr.c b/fs/attr.c
index ae58bd3f875f..966b73e25f82 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -103,12 +103,11 @@ EXPORT_SYMBOL(inode_setattr);
int notify_change(struct dentry * dentry, struct iattr * attr)
{
struct inode *inode = dentry->d_inode;
- mode_t mode;
+ mode_t mode = inode->i_mode;
int error;
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
- mode = inode->i_mode;
now = current_fs_time(inode->i_sb);
attr->ia_ctime = now;
@@ -125,18 +124,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
if (error)
return error;
}
+
+ /*
+ * We now pass ATTR_KILL_S*ID to the lower level setattr function so
+ * that the function has the ability to reinterpret a mode change
+ * that's due to these bits. This adds an implicit restriction that
+ * no function will ever call notify_change with both ATTR_MODE and
+ * ATTR_KILL_S*ID set.
+ */
+ if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) &&
+ (ia_valid & ATTR_MODE))
+ BUG();
+
if (ia_valid & ATTR_KILL_SUID) {
- attr->ia_valid &= ~ATTR_KILL_SUID;
if (mode & S_ISUID) {
- if (!(ia_valid & ATTR_MODE)) {
- ia_valid = attr->ia_valid |= ATTR_MODE;
- attr->ia_mode = inode->i_mode;
- }
- attr->ia_mode &= ~S_ISUID;
+ ia_valid = attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = (inode->i_mode & ~S_ISUID);
}
}
if (ia_valid & ATTR_KILL_SGID) {
- attr->ia_valid &= ~ ATTR_KILL_SGID;
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
if (!(ia_valid & ATTR_MODE)) {
ia_valid = attr->ia_valid |= ATTR_MODE;
@@ -145,7 +151,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
attr->ia_mode &= ~S_ISGID;
}
}
- if (!attr->ia_valid)
+ if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
return 0;
if (ia_valid & ATTR_SIZE)
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 19a9cafb5ddf..be46805972f0 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -182,7 +182,7 @@ int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_toke
{
struct autofs_wait_queue *wq, **wql;
- for ( wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next ) {
+ for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
if ( wq->wait_queue_token == wait_queue_token )
break;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0d041a9cb348..1fe28e4754c2 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -376,7 +376,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
struct autofs_wait_queue *wq, **wql;
mutex_lock(&sbi->wq_mutex);
- for (wql = &sbi->queues ; (wq = *wql) != 0 ; wql = &wq->next) {
+ for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
if (wq->wait_queue_token == wait_queue_token)
break;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dd4167762a8e..279f3c5e0ce3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1538,6 +1538,11 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
}
time_buf.Attributes = 0;
+
+ /* skip mode change if it's just for clearing setuid/setgid */
+ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attrs->ia_valid &= ~ATTR_MODE;
+
if (attrs->ia_valid & ATTR_MODE) {
cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode));
mode = attrs->ia_mode;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6dacd39bf048..a4284ccac1f9 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -3001,7 +3001,7 @@ static int __init init_sys32_ioctl(void)
int i;
for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
- if (ioctl_start[i].next != 0) {
+ if (ioctl_start[i].next) {
printk("ioctl translation %d bad\n",i);
return -1;
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 5c817bd08389..350680fd7da7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -148,7 +148,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
{
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct page *pages[BLKS_PER_BUF];
- unsigned i, blocknr, buffer, unread;
+ unsigned i, blocknr, buffer;
unsigned long devsize;
char *data;
@@ -175,7 +175,6 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
/* Ok, read in BLKS_PER_BUF pages completely first. */
- unread = 0;
for (i = 0; i < BLKS_PER_BUF; i++) {
struct page *page = NULL;
@@ -362,7 +361,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (offset & 3)
return -EINVAL;
- buf = kmalloc(256, GFP_KERNEL);
+ buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -376,7 +375,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
int namelen, error;
mutex_lock(&read_mutex);
- de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+256);
+ de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
name = (char *)(de+1);
/*
@@ -426,7 +425,7 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s
char *name;
int namelen, retval;
- de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+256);
+ de = cramfs_read(dir->i_sb, OFFSET(dir) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN);
name = (char *)(de+1);
/* Try to take advantage of sorted directories */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5701f816faf4..0b1ab016fa2e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -914,6 +914,14 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
if (rc < 0)
goto out;
}
+
+ /*
+ * mode change is for clearing setuid/setgid bits. Allow lower fs
+ * to interpret this in its own way.
+ */
+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+ ia->ia_valid &= ~ATTR_MODE;
+
rc = notify_change(lower_dentry, ia);
out:
fsstack_copy_attr_all(inode, lower_inode, NULL);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 77b9953624f4..de6189291954 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -463,7 +463,7 @@ static void ep_free(struct eventpoll *ep)
* holding "epmutex" we can be sure that no file cleanup code will hit
* us during this operation. So we can avoid the lock on "ep->lock".
*/
- while ((rbp = rb_first(&ep->rbr)) != 0) {
+ while ((rbp = rb_first(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index dd1fd3c0fc05..a588e23841d4 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -47,7 +47,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
struct inode *inode = dentry->d_inode;
int ret = 0;
- J_ASSERT(ext3_journal_current_handle() == 0);
+ J_ASSERT(ext3_journal_current_handle() == NULL);
/*
* data=writeback:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2f2b6864db10..3dec003b773e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1028,7 +1028,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
}
if (buffer_new(&dummy)) {
J_ASSERT(create != 0);
- J_ASSERT(handle != 0);
+ J_ASSERT(handle != NULL);
/*
* Now that we do not always journal data, we should
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 771f7ada15d9..44de1453c301 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -245,10 +245,10 @@ static int setup_new_group_blocks(struct super_block *sb,
brelse(gdb);
goto exit_bh;
}
- lock_buffer(bh);
- memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+ lock_buffer(gdb);
+ memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
- unlock_buffer(bh);
+ unlock_buffer(gdb);
ext3_journal_dirty_metadata(handle, gdb);
ext3_set_bit(bit, bh->b_data);
brelse(gdb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 141573de7a9a..81868c0bc40e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1620,7 +1620,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}
brelse (bh);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n",
+ blocksize);
+ goto out_fail;
+ }
logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b74bf4368441..e906b65448e2 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include "group.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -42,6 +43,94 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
}
+/* Initializes an uninitialized block bitmap if given, and returns the
+ * number of blocks free in the group. */
+unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ int block_group, struct ext4_group_desc *gdp)
+{
+ unsigned long start;
+ int bit, bit_max;
+ unsigned free_blocks, group_blocks;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (bh) {
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks used to prevent allocation
+ * essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum bad for group %u\n", block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+ memset(bh->b_data, 0, sb->s_blocksize);
+ }
+
+ /* Check for superblock and gdt backups in this group */
+ bit_max = ext4_bg_has_super(sb, block_group);
+
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+ sbi->s_desc_per_block) {
+ if (bit_max) {
+ bit_max += ext4_bg_num_gdb(sb, block_group);
+ bit_max +=
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ int group_rel = (block_group -
+ le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
+ EXT4_DESC_PER_BLOCK(sb);
+ if (group_rel == 0 || group_rel == 1 ||
+ (group_rel == EXT4_DESC_PER_BLOCK(sb) - 1))
+ bit_max += 1;
+ }
+
+ if (block_group == sbi->s_groups_count - 1) {
+ /*
+ * Even though mke2fs always initialize first and last group
+ * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
+ * to make sure we calculate the right free blocks
+ */
+ group_blocks = ext4_blocks_count(sbi->s_es) -
+ le32_to_cpu(sbi->s_es->s_first_data_block) -
+ (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
+ } else {
+ group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
+ }
+
+ free_blocks = group_blocks - bit_max;
+
+ if (bh) {
+ for (bit = 0; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+ start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(sbi->s_es->s_first_data_block);
+
+ /* Set bits for block and inode bitmaps, and inode table */
+ ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+ ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+ for (bit = (ext4_inode_table(sb, gdp) - start),
+ bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
+ ext4_set_bit(bit, bh->b_data);
+
+ /*
+ * Also if the number of blocks within the group is
+ * less than the blocksize * 8 ( which is the size
+ * of bitmap ), set rest of the block bitmap to 1
+ */
+ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+ }
+
+ return free_blocks - sbi->s_itb_per_group - 2;
+}
+
+
/*
* The free blocks are managed by bitmaps. A file system contains several
* blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
@@ -119,7 +208,7 @@ block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
*
* Return buffer_head on success or NULL in case of failure.
*/
-static struct buffer_head *
+struct buffer_head *
read_block_bitmap(struct super_block *sb, unsigned int block_group)
{
int i;
@@ -127,11 +216,24 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
struct buffer_head * bh = NULL;
ext4_fsblk_t bitmap_blk;
- desc = ext4_get_group_desc (sb, block_group, NULL);
+ desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
bitmap_blk = ext4_block_bitmap(sb, desc);
- bh = sb_bread(sb, bitmap_blk);
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ bh = sb_getblk(sb, bitmap_blk);
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_block_bitmap(sb, bh, block_group,
+ desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, bitmap_blk);
+ }
if (!bh)
ext4_error (sb, __FUNCTION__,
"Cannot read block bitmap - "
@@ -627,6 +729,7 @@ do_more:
desc->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
group_freed);
+ desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1685,8 +1788,11 @@ allocated:
ret_block, goal_hits, goal_attempts);
spin_lock(sb_bgl_lock(sbi, group_no));
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0fb1e62b20d0..f612bef98315 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -47,9 +47,7 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file, /* BKL held */
-#ifdef CONFIG_EXT4_INDEX
.release = ext4_release_dir,
-#endif
};
@@ -107,7 +105,6 @@ static int ext4_readdir(struct file * filp,
sb = inode->i_sb;
-#ifdef CONFIG_EXT4_INDEX
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
@@ -123,7 +120,6 @@ static int ext4_readdir(struct file * filp,
*/
EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
}
-#endif
stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -232,7 +228,6 @@ out:
return ret;
}
-#ifdef CONFIG_EXT4_INDEX
/*
* These functions convert from the major/minor hash to an f_pos
* value.
@@ -518,5 +513,3 @@ static int ext4_release_dir (struct inode * inode, struct file * filp)
return 0;
}
-
-#endif
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 78beb096f57d..85287742f2ae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -33,7 +33,7 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/ext4_jbd2.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
@@ -52,7 +52,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
- block = le32_to_cpu(ex->ee_start);
+ block = le32_to_cpu(ex->ee_start_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
return block;
}
@@ -65,7 +65,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
- block = le32_to_cpu(ix->ei_leaf);
+ block = le32_to_cpu(ix->ei_leaf_lo);
block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
return block;
}
@@ -77,7 +77,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
*/
static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
{
- ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
@@ -88,7 +88,7 @@ static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
*/
static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
{
- ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}
@@ -1409,8 +1409,7 @@ has_space:
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
- nearex->ee_start = newext->ee_start;
- nearex->ee_start_hi = newext->ee_start_hi;
+ ext4_ext_store_pblock(nearex, ext_pblock(newext));
nearex->ee_len = newext->ee_len;
merge:
@@ -2177,7 +2176,6 @@ int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
}
/* ex2: iblock to iblock + maxblocks-1 : initialised */
ex2->ee_block = cpu_to_le32(iblock);
- ex2->ee_start = cpu_to_le32(newblock);
ext4_ext_store_pblock(ex2, newblock);
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a167d7131fa..8d50879d1c2c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -47,7 +47,7 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
struct inode *inode = dentry->d_inode;
int ret = 0;
- J_ASSERT(ext4_journal_current_handle() == 0);
+ J_ASSERT(ext4_journal_current_handle() == NULL);
/*
* data=writeback:
diff --git a/fs/ext4/group.h b/fs/ext4/group.h
new file mode 100644
index 000000000000..1577910bb58b
--- /dev/null
+++ b/fs/ext4/group.h
@@ -0,0 +1,27 @@
+/*
+ * linux/fs/ext4/group.h
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ */
+
+#ifndef _LINUX_EXT4_GROUP_H
+#define _LINUX_EXT4_GROUP_H
+
+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
+ struct ext4_group_desc *gdp);
+struct buffer_head *read_block_bitmap(struct super_block *sb,
+ unsigned int block_group);
+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+#define ext4_free_blocks_after_init(sb, group, desc) \
+ ext4_init_block_bitmap(sb, NULL, group, desc)
+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int group,
+ struct ext4_group_desc *desc);
+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+#endif /* _LINUX_EXT4_GROUP_H */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index d0c7793d9393..c61f37fd3f05 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -28,6 +28,7 @@
#include "xattr.h"
#include "acl.h"
+#include "group.h"
/*
* ialloc.c contains the inodes allocation and deallocation routines
@@ -43,6 +44,52 @@
* the free blocks count in the block.
*/
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+ int i;
+
+ if (start_bit >= end_bit)
+ return;
+
+ ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+ ext4_set_bit(i, bitmap);
+ if (i < end_bit)
+ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/* Initializes an uninitialized inode bitmap */
+unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh, int block_group,
+ struct ext4_group_desc *gdp)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ J_ASSERT_BH(bh, buffer_locked(bh));
+
+ /* If checksum is bad mark all blocks and inodes use to prevent
+ * allocation, essentially implementing a per-group read-only flag. */
+ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
+ ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
+ block_group);
+ gdp->bg_free_blocks_count = 0;
+ gdp->bg_free_inodes_count = 0;
+ gdp->bg_itable_unused = 0;
+ memset(bh->b_data, 0xff, sb->s_blocksize);
+ return 0;
+ }
+
+ memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
+ bh->b_data);
+
+ return EXT4_INODES_PER_GROUP(sb);
+}
/*
* Read the inode allocation bitmap for a given block_group, reading
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
goto error_out;
-
- bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+ if (!buffer_uptodate(bh)) {
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ ext4_init_inode_bitmap(sb, bh, block_group,
+ desc);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
+ }
+ } else {
+ bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
+ }
if (!bh)
ext4_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi,
+ block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
@@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
struct ext4_sb_info *sbi;
int err = 0;
struct inode *ret;
- int i;
+ int i, free = 0;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
@@ -517,11 +578,13 @@ repeat_in_this_group:
goto out;
got:
- ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
- ext4_error (sb, "ext4_new_inode",
- "reserved inode or inode > inodes count - "
- "block_group = %d, inode=%lu", group, ino);
+ ino++;
+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+ ino > EXT4_INODES_PER_GROUP(sb)) {
+ ext4_error(sb, __FUNCTION__,
+ "reserved inode or inode > inodes count - "
+ "block_group = %d, inode=%lu", group,
+ ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO;
goto fail;
}
@@ -529,13 +592,78 @@ got:
BUFFER_TRACE(bh2, "get_write_access");
err = ext4_journal_get_write_access(handle, bh2);
if (err) goto fail;
+
+ /* We may have to initialize the block bitmap if it isn't already */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
+ gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ struct buffer_head *block_bh = read_block_bitmap(sb, group);
+
+ BUFFER_TRACE(block_bh, "get block bitmap access");
+ err = ext4_journal_get_write_access(handle, block_bh);
+ if (err) {
+ brelse(block_bh);
+ goto fail;
+ }
+
+ free = 0;
+ spin_lock(sb_bgl_lock(sbi, group));
+ /* recheck and clear flag under lock if we still need to */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ free = ext4_free_blocks_after_init(sb, group, gdp);
+ gdp->bg_free_blocks_count = cpu_to_le16(free);
+ }
+ spin_unlock(sb_bgl_lock(sbi, group));
+
+ /* Don't need to dirty bitmap block if we didn't change it */
+ if (free) {
+ BUFFER_TRACE(block_bh, "dirty block bitmap");
+ err = ext4_journal_dirty_metadata(handle, block_bh);
+ }
+
+ brelse(block_bh);
+ if (err)
+ goto fail;
+ }
+
spin_lock(sb_bgl_lock(sbi, group));
+ /* If we didn't allocate from within the initialized part of the inode
+ * table then we need to initialize up to this inode. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+ /* When marking the block group with
+ * ~EXT4_BG_INODE_UNINIT we don't want to depend
+ * on the value of bg_itable_unsed even though
+ * mke2fs could have initialized the same for us.
+ * Instead we calculated the value below
+ */
+
+ free = 0;
+ } else {
+ free = EXT4_INODES_PER_GROUP(sb) -
+ le16_to_cpu(gdp->bg_itable_unused);
+ }
+
+ /*
+ * Check the relative inode number against the last used
+ * relative inode number in this group. if it is greater
+ * we need to update the bg_itable_unused count
+ *
+ */
+ if (ino > free)
+ gdp->bg_itable_unused =
+ cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+ }
+
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
}
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -557,7 +685,7 @@ got:
inode->i_gid = current->fsgid;
inode->i_mode = mode;
- inode->i_ino = ino;
+ inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
@@ -573,11 +701,6 @@ got:
/* dirsync only applies to directories */
if (!S_ISDIR(mode))
ei->i_flags &= ~EXT4_DIRSYNC_FL;
-#ifdef EXT4_FRAGMENTS
- ei->i_faddr = 0;
- ei->i_frag_no = 0;
- ei->i_frag_size = 0;
-#endif
ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0df2b1e06d0b..5489703d9573 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1027,7 +1027,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
}
if (buffer_new(&dummy)) {
J_ASSERT(create != 0);
- J_ASSERT(handle != 0);
+ J_ASSERT(handle != NULL);
/*
* Now that we do not always journal data, we should
@@ -2711,11 +2711,6 @@ void ext4_read_inode(struct inode * inode)
}
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT4_FRAGMENTS
- ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
- ei->i_frag_no = raw_inode->i_frag;
- ei->i_frag_size = raw_inode->i_fsize;
-#endif
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
@@ -2860,11 +2855,6 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT4_FRAGMENTS
- raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
- raw_inode->i_frag = ei->i_frag_no;
- raw_inode->i_fsize = ei->i_frag_size;
-#endif
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
@@ -3243,12 +3233,14 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
iloc, handle);
if (ret) {
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
- if (mnt_count != sbi->s_es->s_mnt_count) {
+ if (mnt_count !=
+ le16_to_cpu(sbi->s_es->s_mnt_count)) {
ext4_warning(inode->i_sb, __FUNCTION__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
- mnt_count = sbi->s_es->s_mnt_count;
+ mnt_count =
+ le16_to_cpu(sbi->s_es->s_mnt_count);
}
}
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fdb862e71c4..94ee6f315dc1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -144,7 +144,6 @@ struct dx_map_entry
u16 size;
};
-#ifdef CONFIG_EXT4_INDEX
static inline unsigned dx_get_block (struct dx_entry *entry);
static void dx_set_block (struct dx_entry *entry, unsigned value);
static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -766,8 +765,6 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
dx_set_block(new, block);
dx_set_count(entries, count + 1);
}
-#endif
-
static void ext4_update_dx_flag(struct inode *inode)
{
@@ -869,7 +866,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
name = dentry->d_name.name;
if (namelen > EXT4_NAME_LEN)
return NULL;
-#ifdef CONFIG_EXT4_INDEX
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dentry, res_dir, &err);
/*
@@ -881,7 +877,6 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
return bh;
dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
}
-#endif
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
start = EXT4_I(dir)->i_dir_start_lookup;
if (start >= nblocks)
@@ -957,7 +952,6 @@ cleanup_and_exit:
return ret;
}
-#ifdef CONFIG_EXT4_INDEX
static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
struct ext4_dir_entry_2 **res_dir, int *err)
{
@@ -1025,7 +1019,6 @@ errout:
dx_release (frames);
return NULL;
}
-#endif
static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
@@ -1121,7 +1114,6 @@ static inline void ext4_set_de_type(struct super_block *sb,
de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
-#ifdef CONFIG_EXT4_INDEX
/*
* Move count entries from end of map between two memory locations.
* Returns pointer to last entry moved.
@@ -1266,8 +1258,6 @@ errout:
*error = err;
return NULL;
}
-#endif
-
/*
* Add a new entry into a directory (leaf) block. If de is non-NULL,
@@ -1364,7 +1354,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return 0;
}
-#ifdef CONFIG_EXT4_INDEX
/*
* This converts a one block unindexed directory to a 3 block indexed
* directory, and adds the dentry to the indexed directory.
@@ -1443,7 +1432,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
-#endif
/*
* ext4_add_entry()
@@ -1464,9 +1452,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
struct ext4_dir_entry_2 *de;
struct super_block * sb;
int retval;
-#ifdef CONFIG_EXT4_INDEX
int dx_fallback=0;
-#endif
unsigned blocksize;
u32 block, blocks;
@@ -1474,7 +1460,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
-#ifdef CONFIG_EXT4_INDEX
if (is_dx(dir)) {
retval = ext4_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
@@ -1483,7 +1468,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
}
-#endif
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0, offset = 0; block < blocks; block++) {
bh = ext4_bread(handle, dir, block, 0, &retval);
@@ -1493,11 +1477,9 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
if (retval != -ENOSPC)
return retval;
-#ifdef CONFIG_EXT4_INDEX
if (blocks == 1 && !dx_fallback &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
return make_indexed_dir(handle, dentry, inode, bh);
-#endif
brelse(bh);
}
bh = ext4_append(handle, dir, &block, &retval);
@@ -1509,7 +1491,6 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
return add_dirent_to_buf(handle, dentry, inode, de, bh);
}
-#ifdef CONFIG_EXT4_INDEX
/*
* Returns 0 for success, or a negative error value
*/
@@ -1644,7 +1625,6 @@ cleanup:
dx_release(frames);
return err;
}
-#endif
/*
* ext4_delete_entry deletes a directory entry by merging it with the
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 472fc0d3e1c0..bd8a52bb3999 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
+#include "group.h"
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@@ -140,22 +141,29 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
}
/*
- * To avoid calling the atomic setbit hundreds or thousands of times, we only
- * need to use it within a single byte (to ensure we get endianness right).
- * We can use memset for the rest of the bitmap as there are no other users.
+ * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
+ * If that fails, restart the transaction & regain write access for the
+ * buffer head which is used for block_bitmap modifications.
*/
-static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+static int extend_or_restart_transaction(handle_t *handle, int thresh,
+ struct buffer_head *bh)
{
- int i;
+ int err;
+
+ if (handle->h_buffer_credits >= thresh)
+ return 0;
- if (start_bit >= end_bit)
- return;
+ err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
+ if (err < 0)
+ return err;
+ if (err) {
+ if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
+ return err;
+ if ((err = ext4_journal_get_write_access(handle, bh)))
+ return err;
+ }
- ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
- for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
- ext4_set_bit(i, bitmap);
- if (i < end_bit)
- memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+ return 0;
}
/*
@@ -180,8 +188,9 @@ static int setup_new_group_blocks(struct super_block *sb,
int i;
int err = 0, err2;
- handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks +
- 2 + sbi->s_itb_per_group);
+ /* This transaction may be extended/restarted along the way */
+ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
+
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -208,6 +217,9 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
+ if ((err = extend_or_restart_transaction(handle, 1, bh)))
+ goto exit_bh;
+
gdb = sb_getblk(sb, block);
if (!gdb) {
err = -EIO;
@@ -217,10 +229,10 @@ static int setup_new_group_blocks(struct super_block *sb,
brelse(gdb);
goto exit_bh;
}
- lock_buffer(bh);
- memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+ lock_buffer(gdb);
+ memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
- unlock_buffer(bh);
+ unlock_buffer(gdb);
ext4_journal_dirty_metadata(handle, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
@@ -233,6 +245,9 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+ if ((err = extend_or_restart_transaction(handle, 1, bh)))
+ goto exit_bh;
+
if (IS_ERR(gdb = bclean(handle, sb, block))) {
err = PTR_ERR(bh);
goto exit_bh;
@@ -254,6 +269,10 @@ static int setup_new_group_blocks(struct super_block *sb,
struct buffer_head *it;
ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
+
+ if ((err = extend_or_restart_transaction(handle, 1, bh)))
+ goto exit_bh;
+
if (IS_ERR(it = bclean(handle, sb, block))) {
err = PTR_ERR(it);
goto exit_bh;
@@ -262,6 +281,10 @@ static int setup_new_group_blocks(struct super_block *sb,
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
+
+ if ((err = extend_or_restart_transaction(handle, 2, bh)))
+ goto exit_bh;
+
mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
ext4_journal_dirty_metadata(handle, bh);
@@ -289,7 +312,6 @@ exit_journal:
return err;
}
-
/*
* Iterate through the groups which hold BACKUP superblock/GDT copies in an
* ext4 filesystem. The counters should be initialized to 1, 5, and 7 before
@@ -842,6 +864,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
/*
* Make the new blocks and inodes valid next. We do this before
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4c8d31c61454..b11e9e2bcd01 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -37,12 +37,14 @@
#include <linux/quotaops.h>
#include <linux/seq_file.h>
#include <linux/log2.h>
+#include <linux/crc16.h>
#include <asm/uaccess.h>
#include "xattr.h"
#include "acl.h"
#include "namei.h"
+#include "group.h"
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -68,31 +70,31 @@ static void ext4_write_super_lockfs(struct super_block *sb);
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_block_bitmap) |
+ return le32_to_cpu(bg->bg_block_bitmap_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
}
ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_inode_bitmap) |
+ return le32_to_cpu(bg->bg_inode_bitmap_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
}
ext4_fsblk_t ext4_inode_table(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le32_to_cpu(bg->bg_inode_table) |
+ return le32_to_cpu(bg->bg_inode_table_lo) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
+ (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
}
void ext4_block_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+ bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
}
@@ -100,7 +102,7 @@ void ext4_block_bitmap_set(struct super_block *sb,
void ext4_inode_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_inode_bitmap = cpu_to_le32((u32)blk);
+ bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
}
@@ -108,7 +110,7 @@ void ext4_inode_bitmap_set(struct super_block *sb,
void ext4_inode_table_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk)
{
- bg->bg_inode_table = cpu_to_le32((u32)blk);
+ bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
}
@@ -1037,7 +1039,7 @@ static int parse_options (char *options, struct super_block *sb,
if (option < 0)
return 0;
if (option == 0)
- option = JBD_DEFAULT_MAX_COMMIT_AGE;
+ option = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * option;
break;
case Opt_data_journal:
@@ -1308,6 +1310,43 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
return res;
}
+__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ __u16 crc = 0;
+
+ if (sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
+ __le32 le_group = cpu_to_le32(block_group);
+
+ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
+ crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
+ crc = crc16(crc, (__u8 *)gdp, offset);
+ offset += sizeof(gdp->bg_checksum); /* skip checksum */
+ /* for checksum of struct ext4_group_desc do the rest...*/
+ if ((sbi->s_es->s_feature_incompat &
+ cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
+ offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ crc = crc16(crc, (__u8 *)gdp + offset,
+ le16_to_cpu(sbi->s_es->s_desc_size) -
+ offset);
+ }
+
+ return cpu_to_le16(crc);
+}
+
+int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
+ struct ext4_group_desc *gdp)
+{
+ if ((sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
+ (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
+ return 0;
+
+ return 1;
+}
+
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors (struct super_block * sb)
{
@@ -1319,13 +1358,17 @@ static int ext4_check_descriptors (struct super_block * sb)
ext4_fsblk_t inode_table;
struct ext4_group_desc * gdp = NULL;
int desc_block = 0;
+ int flexbg_flag = 0;
int i;
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ flexbg_flag = 1;
+
ext4_debug ("Checking group descriptors");
for (i = 0; i < sbi->s_groups_count; i++)
{
- if (i == sbi->s_groups_count - 1)
+ if (i == sbi->s_groups_count - 1 || flexbg_flag)
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1362,7 +1405,16 @@ static int ext4_check_descriptors (struct super_block * sb)
i, inode_table);
return 0;
}
- first_block += EXT4_BLOCKS_PER_GROUP(sb);
+ if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
+ ext4_error(sb, __FUNCTION__,
+ "Checksum for group %d failed (%u!=%u)\n", i,
+ le16_to_cpu(ext4_group_desc_csum(sbi, i,
+ gdp)),
+ le16_to_cpu(gdp->bg_checksum));
+ return 0;
+ }
+ if (!flexbg_flag)
+ first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
@@ -1726,14 +1778,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
}
- sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
- le32_to_cpu(es->s_log_frag_size);
- if (blocksize != sbi->s_frag_size) {
- printk(KERN_ERR
- "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
- sbi->s_frag_size, blocksize);
- goto failed_mount;
- }
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -1747,7 +1791,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
} else
sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
if (EXT4_INODE_SIZE(sb) == 0)
goto cantfind_ext4;
@@ -1771,12 +1814,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group);
goto failed_mount;
}
- if (sbi->s_frags_per_group > blocksize * 8) {
- printk (KERN_ERR
- "EXT4-fs: #fragments per group too big: %lu\n",
- sbi->s_frags_per_group);
- goto failed_mount;
- }
if (sbi->s_inodes_per_group > blocksize * 8) {
printk (KERN_ERR
"EXT4-fs: #inodes per group too big: %lu\n",
@@ -2630,7 +2667,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
if (test_opt(sb, MINIX_DF)) {
sbi->s_overhead_last = 0;
- } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
+ } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
unsigned long ngroups = sbi->s_groups_count, i;
ext4_fsblk_t overhead = 0;
smp_rmb();
@@ -2665,14 +2702,14 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
overhead += ngroups * (2 + sbi->s_itb_per_group);
sbi->s_overhead_last = overhead;
smp_wmb();
- sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
+ sbi->s_blocks_last = ext4_blocks_count(es);
}
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
- es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
+ ext4_free_blocks_count_set(es, buf->f_bfree);
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
buf->f_bavail = 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b10d68fffb55..86387302c2a9 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -750,12 +750,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
} else {
/* Allocate a buffer where we construct the new block. */
- s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+ s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
/* assert(header == s->base) */
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;
- memset(s->base, 0, sb->s_blocksize);
header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
header(s->base)->h_blocks = cpu_to_le32(1);
header(s->base)->h_refcount = cpu_to_le32(1);
@@ -1121,7 +1120,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
int total_ino, total_blk;
void *base, *start, *end;
int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
- int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize;
+ int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
down_write(&EXT4_I(inode)->xattr_sem);
retry:
@@ -1293,7 +1292,7 @@ retry:
i.name = b_entry_name;
i.value = buffer;
- i.value_len = cpu_to_le32(size);
+ i.value_len = size;
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto cleanup;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d1acab931330..3763757f9fe7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -63,13 +63,21 @@ static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
* Set dentry and possibly attribute timeouts from the lookup/mk*
* replies
*/
-static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
+static void fuse_change_entry_timeout(struct dentry *entry,
+ struct fuse_entry_out *o)
{
fuse_dentry_settime(entry,
time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
- if (entry->d_inode)
- get_fuse_inode(entry->d_inode)->i_time =
- time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+
+static u64 attr_timeout(struct fuse_attr_out *o)
+{
+ return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+
+static u64 entry_attr_timeout(struct fuse_entry_out *o)
+{
+ return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}
/*
@@ -108,13 +116,19 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
struct dentry *entry,
struct fuse_entry_out *outarg)
{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+
+ memset(outarg, 0, sizeof(struct fuse_entry_out));
req->in.h.opcode = FUSE_LOOKUP;
req->in.h.nodeid = get_node_id(dir);
req->in.numargs = 1;
req->in.args[0].size = entry->d_name.len + 1;
req->in.args[0].value = entry->d_name.name;
req->out.numargs = 1;
- req->out.args[0].size = sizeof(struct fuse_entry_out);
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(struct fuse_entry_out);
req->out.args[0].value = outarg;
}
@@ -140,6 +154,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
struct fuse_req *req;
struct fuse_req *forget_req;
struct dentry *parent;
+ u64 attr_version;
/* For negative dentries, always do a fresh lookup */
if (!inode)
@@ -156,6 +171,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
return 0;
}
+ spin_lock(&fc->lock);
+ attr_version = fc->attr_version;
+ spin_unlock(&fc->lock);
+
parent = dget_parent(entry);
fuse_lookup_init(req, parent->d_inode, entry, &outarg);
request_send(fc, req);
@@ -180,8 +199,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
return 0;
- fuse_change_attributes(inode, &outarg.attr);
- fuse_change_timeout(entry, &outarg);
+ fuse_change_attributes(inode, &outarg.attr,
+ entry_attr_timeout(&outarg),
+ attr_version);
+ fuse_change_entry_timeout(entry, &outarg);
}
return 1;
}
@@ -228,6 +249,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
struct fuse_req *forget_req;
+ u64 attr_version;
if (entry->d_name.len > FUSE_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
@@ -242,6 +264,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
return ERR_PTR(PTR_ERR(forget_req));
}
+ spin_lock(&fc->lock);
+ attr_version = fc->attr_version;
+ spin_unlock(&fc->lock);
+
fuse_lookup_init(req, dir, entry, &outarg);
request_send(fc, req);
err = req->out.h.error;
@@ -253,7 +279,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
err = -EIO;
if (!err && outarg.nodeid) {
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr);
+ &outarg.attr, entry_attr_timeout(&outarg),
+ attr_version);
if (!inode) {
fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
return ERR_PTR(-ENOMEM);
@@ -276,7 +303,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
entry->d_op = &fuse_dentry_operations;
if (!err)
- fuse_change_timeout(entry, &outarg);
+ fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
return NULL;
@@ -335,6 +362,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
flags &= ~O_NOCTTY;
memset(&inarg, 0, sizeof(inarg));
+ memset(&outentry, 0, sizeof(outentry));
inarg.flags = flags;
inarg.mode = mode;
req->in.h.opcode = FUSE_CREATE;
@@ -345,7 +373,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
req->in.args[1].size = entry->d_name.len + 1;
req->in.args[1].value = entry->d_name.name;
req->out.numargs = 2;
- req->out.args[0].size = sizeof(outentry);
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outentry);
req->out.args[0].value = &outentry;
req->out.args[1].size = sizeof(outopen);
req->out.args[1].value = &outopen;
@@ -363,7 +394,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
fuse_put_request(fc, req);
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
- &outentry.attr);
+ &outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
ff->fh = outopen.fh;
@@ -373,7 +404,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
}
fuse_put_request(fc, forget_req);
d_instantiate(entry, inode);
- fuse_change_timeout(entry, &outentry);
+ fuse_change_entry_timeout(entry, &outentry);
file = lookup_instantiate_filp(nd, entry, generic_file_open);
if (IS_ERR(file)) {
ff->fh = outopen.fh;
@@ -410,9 +441,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
return PTR_ERR(forget_req);
}
+ memset(&outarg, 0, sizeof(outarg));
req->in.h.nodeid = get_node_id(dir);
req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
request_send(fc, req);
err = req->out.h.error;
@@ -428,7 +463,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr);
+ &outarg.attr, entry_attr_timeout(&outarg), 0);
if (!inode) {
fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
return -ENOMEM;
@@ -451,7 +486,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
} else
d_instantiate(entry, inode);
- fuse_change_timeout(entry, &outarg);
+ fuse_change_entry_timeout(entry, &outarg);
fuse_invalidate_attr(dir);
return 0;
@@ -663,52 +698,84 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
return err;
}
-static int fuse_do_getattr(struct inode *inode)
+static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
+ struct kstat *stat)
+{
+ stat->dev = inode->i_sb->s_dev;
+ stat->ino = attr->ino;
+ stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+ stat->nlink = attr->nlink;
+ stat->uid = attr->uid;
+ stat->gid = attr->gid;
+ stat->rdev = inode->i_rdev;
+ stat->atime.tv_sec = attr->atime;
+ stat->atime.tv_nsec = attr->atimensec;
+ stat->mtime.tv_sec = attr->mtime;
+ stat->mtime.tv_nsec = attr->mtimensec;
+ stat->ctime.tv_sec = attr->ctime;
+ stat->ctime.tv_nsec = attr->ctimensec;
+ stat->size = attr->size;
+ stat->blocks = attr->blocks;
+ stat->blksize = (1 << inode->i_blkbits);
+}
+
+static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
+ struct file *file)
{
int err;
- struct fuse_attr_out arg;
+ struct fuse_getattr_in inarg;
+ struct fuse_attr_out outarg;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req(fc);
+ struct fuse_req *req;
+ u64 attr_version;
+
+ req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
+ spin_lock(&fc->lock);
+ attr_version = fc->attr_version;
+ spin_unlock(&fc->lock);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ /* Directories have separate file-handle space */
+ if (file && S_ISREG(inode->i_mode)) {
+ struct fuse_file *ff = file->private_data;
+
+ inarg.getattr_flags |= FUSE_GETATTR_FH;
+ inarg.fh = ff->fh;
+ }
req->in.h.opcode = FUSE_GETATTR;
req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
req->out.numargs = 1;
- req->out.args[0].size = sizeof(arg);
- req->out.args[0].value = &arg;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
- if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
err = -EIO;
} else {
- struct fuse_inode *fi = get_fuse_inode(inode);
- fuse_change_attributes(inode, &arg.attr);
- fi->i_time = time_to_jiffies(arg.attr_valid,
- arg.attr_valid_nsec);
+ fuse_change_attributes(inode, &outarg.attr,
+ attr_timeout(&outarg),
+ attr_version);
+ if (stat)
+ fuse_fillattr(inode, &outarg.attr, stat);
}
}
return err;
}
/*
- * Check if attributes are still valid, and if not send a GETATTR
- * request to refresh them.
- */
-static int fuse_refresh_attributes(struct inode *inode)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- if (fi->i_time < get_jiffies_64())
- return fuse_do_getattr(inode);
- else
- return 0;
-}
-
-/*
* Calling into a user-controlled filesystem gives the filesystem
* daemon ptrace-like capabilities over the requester process. This
* means, that the filesystem daemon is able to record the exact
@@ -721,7 +788,7 @@ static int fuse_refresh_attributes(struct inode *inode)
* for which the owner of the mount has ptrace privilege. This
* excludes processes started by other users, suid or sgid processes.
*/
-static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
{
if (fc->flags & FUSE_ALLOW_OTHER)
return 1;
@@ -795,11 +862,14 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
*/
if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
- err = fuse_refresh_attributes(inode);
- if (err)
- return err;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ if (fi->i_time < get_jiffies_64()) {
+ err = fuse_do_getattr(inode, NULL, NULL);
+ if (err)
+ return err;
- refreshed = true;
+ refreshed = true;
+ }
}
if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -809,7 +879,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
attributes. This is also needed, because the root
node will at first have no permissions */
if (err == -EACCES && !refreshed) {
- err = fuse_do_getattr(inode);
+ err = fuse_do_getattr(inode, NULL, NULL);
if (!err)
err = generic_permission(inode, mask, NULL);
}
@@ -825,7 +895,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
if (refreshed)
return -EACCES;
- err = fuse_do_getattr(inode);
+ err = fuse_do_getattr(inode, NULL, NULL);
if (!err && !(inode->i_mode & S_IXUGO))
return -EACCES;
}
@@ -962,6 +1032,20 @@ static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
}
+static bool update_mtime(unsigned ivalid)
+{
+ /* Always update if mtime is explicitly set */
+ if (ivalid & ATTR_MTIME_SET)
+ return true;
+
+ /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
+ if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
+ return false;
+
+ /* In all other cases update */
+ return true;
+}
+
static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
{
unsigned ivalid = iattr->ia_valid;
@@ -974,16 +1058,19 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid;
if (ivalid & ATTR_SIZE)
arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
- /* You can only _set_ these together (they may change by themselves) */
- if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
- arg->valid |= FATTR_ATIME | FATTR_MTIME;
+ if (ivalid & ATTR_ATIME) {
+ arg->valid |= FATTR_ATIME;
arg->atime = iattr->ia_atime.tv_sec;
- arg->mtime = iattr->ia_mtime.tv_sec;
+ arg->atimensec = iattr->ia_atime.tv_nsec;
+ if (!(ivalid & ATTR_ATIME_SET))
+ arg->valid |= FATTR_ATIME_NOW;
}
- if (ivalid & ATTR_FILE) {
- struct fuse_file *ff = iattr->ia_file->private_data;
- arg->valid |= FATTR_FH;
- arg->fh = ff->fh;
+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
+ arg->valid |= FATTR_MTIME;
+ arg->mtime = iattr->ia_mtime.tv_sec;
+ arg->mtimensec = iattr->ia_mtime.tv_nsec;
+ if (!(ivalid & ATTR_MTIME_SET))
+ arg->valid |= FATTR_MTIME_NOW;
}
}
@@ -995,22 +1082,28 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
* vmtruncate() doesn't allow for this case, so do the rlimit checking
* and the actual truncation by hand.
*/
-static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
+ struct file *file)
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
int err;
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
err = inode_change_ok(inode, attr);
if (err)
return err;
}
+ if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
+ return 0;
+
if (attr->ia_valid & ATTR_SIZE) {
unsigned long limit;
if (IS_SWAPFILE(inode))
@@ -1027,14 +1120,28 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(attr, &inarg);
+ if (file) {
+ struct fuse_file *ff = file->private_data;
+ inarg.valid |= FATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ if (attr->ia_valid & ATTR_SIZE) {
+ /* For mandatory locking in truncate */
+ inarg.valid |= FATTR_LOCKOWNER;
+ inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+ }
req->in.h.opcode = FUSE_SETATTR;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
request_send(fc, req);
err = req->out.h.error;
@@ -1050,11 +1157,18 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return -EIO;
}
- fuse_change_attributes(inode, &outarg.attr);
- fi->i_time = time_to_jiffies(outarg.attr_valid, outarg.attr_valid_nsec);
+ fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
return 0;
}
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_FILE)
+ return fuse_do_setattr(entry, attr, attr->ia_file);
+ else
+ return fuse_do_setattr(entry, attr, NULL);
+}
+
static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
struct kstat *stat)
{
@@ -1066,8 +1180,10 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
if (!fuse_allow_task(fc, current))
return -EACCES;
- err = fuse_refresh_attributes(inode);
- if (!err) {
+ if (fi->i_time < get_jiffies_64())
+ err = fuse_do_getattr(inode, stat, NULL);
+ else {
+ err = 0;
generic_fillattr(inode, stat);
stat->mode = fi->orig_i_mode;
}
@@ -1172,6 +1288,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
struct fuse_getxattr_out outarg;
ssize_t ret;
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
if (fc->no_listxattr)
return -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c4b98c03a46e..0fcdba9d47c0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -28,7 +28,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
- inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+ inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
+ if (!fc->atomic_o_trunc)
+ inarg.flags &= ~O_TRUNC;
req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
@@ -54,6 +56,7 @@ struct fuse_file *fuse_file_alloc(void)
kfree(ff);
ff = NULL;
}
+ INIT_LIST_HEAD(&ff->write_entry);
atomic_set(&ff->count, 0);
}
return ff;
@@ -148,12 +151,18 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
{
struct fuse_file *ff = file->private_data;
if (ff) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
fuse_release_fill(ff, get_node_id(inode), file->f_flags,
isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
/* Hold vfsmount and dentry until release is finished */
ff->reserved_req->vfsmount = mntget(file->f_path.mnt);
ff->reserved_req->dentry = dget(file->f_path.dentry);
+
+ spin_lock(&fc->lock);
+ list_del(&ff->write_entry);
+ spin_unlock(&fc->lock);
/*
* Normally this will send the RELEASE request,
* however if some asynchronous READ or WRITE requests
@@ -180,7 +189,7 @@ static int fuse_release(struct inode *inode, struct file *file)
* Scramble the ID space with XTEA, so that the value of the files_struct
* pointer is not exposed to userspace.
*/
-static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
{
u32 *k = fc->scramble_key;
u64 v = (unsigned long) id;
@@ -299,11 +308,19 @@ void fuse_read_fill(struct fuse_req *req, struct fuse_file *ff,
}
static size_t fuse_send_read(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count)
+ struct inode *inode, loff_t pos, size_t count,
+ fl_owner_t owner)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
+
fuse_read_fill(req, ff, inode, pos, count, FUSE_READ);
+ if (owner != NULL) {
+ struct fuse_read_in *inarg = &req->misc.read_in;
+
+ inarg->read_flags |= FUSE_READ_LOCKOWNER;
+ inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+ }
request_send(fc, req);
return req->out.args[0].size;
}
@@ -327,7 +344,8 @@ static int fuse_readpage(struct file *file, struct page *page)
req->out.page_zeroing = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE);
+ fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE,
+ NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err)
@@ -434,30 +452,47 @@ out:
return err;
}
-static size_t fuse_send_write(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count)
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+ struct inode *inode, loff_t pos, size_t count,
+ int writepage)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_file *ff = file->private_data;
- struct fuse_write_in inarg;
- struct fuse_write_out outarg;
+ struct fuse_write_in *inarg = &req->misc.write.in;
+ struct fuse_write_out *outarg = &req->misc.write.out;
- memset(&inarg, 0, sizeof(struct fuse_write_in));
- inarg.fh = ff->fh;
- inarg.offset = pos;
- inarg.size = count;
+ memset(inarg, 0, sizeof(struct fuse_write_in));
+ inarg->fh = ff->fh;
+ inarg->offset = pos;
+ inarg->size = count;
+ inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
req->in.h.opcode = FUSE_WRITE;
req->in.h.nodeid = get_node_id(inode);
req->in.argpages = 1;
req->in.numargs = 2;
- req->in.args[0].size = sizeof(struct fuse_write_in);
- req->in.args[0].value = &inarg;
+ if (fc->minor < 9)
+ req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
+ else
+ req->in.args[0].size = sizeof(struct fuse_write_in);
+ req->in.args[0].value = inarg;
req->in.args[1].size = count;
req->out.numargs = 1;
req->out.args[0].size = sizeof(struct fuse_write_out);
- req->out.args[0].value = &outarg;
+ req->out.args[0].value = outarg;
+}
+
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count,
+ fl_owner_t owner)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ fuse_write_fill(req, file->private_data, inode, pos, count, 0);
+ if (owner != NULL) {
+ struct fuse_write_in *inarg = &req->misc.write.in;
+ inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
+ inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+ }
request_send(fc, req);
- return outarg.size;
+ return req->misc.write.out.size;
}
static int fuse_write_begin(struct file *file, struct address_space *mapping,
@@ -478,6 +513,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
int err;
size_t nres;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
struct fuse_req *req;
@@ -491,7 +527,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
req->num_pages = 1;
req->pages[0] = page;
req->page_offset = offset;
- nres = fuse_send_write(req, file, inode, pos, count);
+ nres = fuse_send_write(req, file, inode, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err && !nres)
@@ -499,6 +535,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
if (!err) {
pos += nres;
spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
if (pos > inode->i_size)
i_size_write(inode, pos);
spin_unlock(&fc->lock);
@@ -591,9 +628,11 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
nbytes = min(count, nbytes);
if (write)
- nres = fuse_send_write(req, file, inode, pos, nbytes);
+ nres = fuse_send_write(req, file, inode, pos, nbytes,
+ current->files);
else
- nres = fuse_send_read(req, file, inode, pos, nbytes);
+ nres = fuse_send_read(req, file, inode, pos, nbytes,
+ current->files);
fuse_release_user_pages(req, !write);
if (req->out.h.error) {
if (!res)
@@ -695,7 +734,8 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
}
static void fuse_lk_fill(struct fuse_req *req, struct file *file,
- const struct file_lock *fl, int opcode, pid_t pid)
+ const struct file_lock *fl, int opcode, pid_t pid,
+ int flock)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -708,6 +748,8 @@ static void fuse_lk_fill(struct fuse_req *req, struct file *file,
arg->lk.end = fl->fl_end;
arg->lk.type = fl->fl_type;
arg->lk.pid = pid;
+ if (flock)
+ arg->lk_flags |= FUSE_LK_FLOCK;
req->in.h.opcode = opcode;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
@@ -727,7 +769,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
if (IS_ERR(req))
return PTR_ERR(req);
- fuse_lk_fill(req, file, fl, FUSE_GETLK, 0);
+ fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
req->out.numargs = 1;
req->out.args[0].size = sizeof(outarg);
req->out.args[0].value = &outarg;
@@ -740,7 +782,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
return err;
}
-static int fuse_setlk(struct file *file, struct file_lock *fl)
+static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -757,7 +799,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl)
if (IS_ERR(req))
return PTR_ERR(req);
- fuse_lk_fill(req, file, fl, opcode, pid);
+ fuse_lk_fill(req, file, fl, opcode, pid, flock);
request_send(fc, req);
err = req->out.h.error;
/* locking is restartable */
@@ -783,8 +825,25 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
if (fc->no_lock)
err = posix_lock_file_wait(file, fl);
else
- err = fuse_setlk(file, fl);
+ err = fuse_setlk(file, fl, 0);
+ }
+ return err;
+}
+
+static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (fc->no_lock) {
+ err = flock_lock_file_wait(file, fl);
+ } else {
+ /* emulate flock with POSIX locks */
+ fl->fl_owner = (fl_owner_t) file;
+ err = fuse_setlk(file, fl, 1);
}
+
return err;
}
@@ -836,6 +895,7 @@ static const struct file_operations fuse_file_operations = {
.release = fuse_release,
.fsync = fuse_fsync,
.lock = fuse_file_lock,
+ .flock = fuse_file_flock,
.splice_read = generic_file_splice_read,
};
@@ -848,6 +908,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
.release = fuse_release,
.fsync = fuse_fsync,
.lock = fuse_file_lock,
+ .flock = fuse_file_flock,
/* no mmap and splice_read */
};
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1764506fdd11..6c5461de1a5f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -67,6 +67,12 @@ struct fuse_inode {
/** The sticky bit in inode->i_mode may have been removed, so
preserve the original mode */
mode_t orig_i_mode;
+
+ /** Version of last attribute change */
+ u64 attr_version;
+
+ /** Files usable in writepage. Protected by fc->lock */
+ struct list_head write_files;
};
/** FUSE specific file data */
@@ -79,6 +85,9 @@ struct fuse_file {
/** Refcount */
atomic_t count;
+
+ /** Entry on inode's write_files list */
+ struct list_head write_entry;
};
/** One input argument of a request */
@@ -210,6 +219,10 @@ struct fuse_req {
struct fuse_init_in init_in;
struct fuse_init_out init_out;
struct fuse_read_in read_in;
+ struct {
+ struct fuse_write_in in;
+ struct fuse_write_out out;
+ } write;
struct fuse_lk_in lk_in;
} misc;
@@ -317,6 +330,9 @@ struct fuse_conn {
/** Do readpages asynchronously? Only set in INIT */
unsigned async_read : 1;
+ /** Do not send separate SETATTR request before open(O_TRUNC) */
+ unsigned atomic_o_trunc : 1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -387,6 +403,9 @@ struct fuse_conn {
/** Reserved request for the DESTROY message */
struct fuse_req *destroy_req;
+
+ /** Version counter for attribute changes */
+ u64 attr_version;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -416,7 +435,8 @@ extern const struct file_operations fuse_dev_operations;
* Get a filled in inode
*/
struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
- int generation, struct fuse_attr *attr);
+ int generation, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version);
/**
* Send FORGET command
@@ -477,7 +497,8 @@ void fuse_init_symlink(struct inode *inode);
/**
* Change attributes of an inode
*/
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version);
/**
* Initialize the client device
@@ -565,3 +586,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
* Is file type valid?
*/
int fuse_valid_type(int m);
+
+/**
+ * Is task allowed to perform filesystem operation?
+ */
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd0735715c14..9a68d6970845 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -56,6 +56,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->i_time = 0;
fi->nodeid = 0;
fi->nlookup = 0;
+ INIT_LIST_HEAD(&fi->write_files);
fi->forget_req = fuse_request_alloc();
if (!fi->forget_req) {
kmem_cache_free(fuse_inode_cachep, inode);
@@ -68,6 +69,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ BUG_ON(!list_empty(&fi->write_files));
if (fi->forget_req)
fuse_request_free(fi->forget_req);
kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,12 +119,22 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
loff_t oldsize;
+ spin_lock(&fc->lock);
+ if (attr_version != 0 && fi->attr_version > attr_version) {
+ spin_unlock(&fc->lock);
+ return;
+ }
+ fi->attr_version = ++fc->attr_version;
+ fi->i_time = attr_valid;
+
inode->i_ino = attr->ino;
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
inode->i_nlink = attr->nlink;
@@ -136,6 +148,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
inode->i_ctime.tv_sec = attr->ctime;
inode->i_ctime.tv_nsec = attr->ctimensec;
+ if (attr->blksize != 0)
+ inode->i_blkbits = ilog2(attr->blksize);
+ else
+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
+
/*
* Don't set the sticky bit in i_mode, unless we want the VFS
* to check permissions. This prevents failures due to the
@@ -145,7 +162,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
inode->i_mode &= ~S_ISVTX;
- spin_lock(&fc->lock);
oldsize = inode->i_size;
i_size_write(inode, attr->size);
spin_unlock(&fc->lock);
@@ -194,7 +210,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
}
struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
- int generation, struct fuse_attr *attr)
+ int generation, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version)
{
struct inode *inode;
struct fuse_inode *fi;
@@ -222,7 +239,8 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
spin_lock(&fc->lock);
fi->nlookup ++;
spin_unlock(&fc->lock);
- fuse_change_attributes(inode, attr);
+ fuse_change_attributes(inode, attr, attr_valid, attr_version);
+
return inode;
}
@@ -287,6 +305,11 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
struct fuse_statfs_out outarg;
int err;
+ if (!fuse_allow_task(fc, current)) {
+ buf->f_type = FUSE_SUPER_MAGIC;
+ return 0;
+ }
+
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -452,6 +475,7 @@ static struct fuse_conn *new_conn(void)
}
fc->reqctr = 0;
fc->blocked = 1;
+ fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
}
out:
@@ -483,7 +507,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
- return fuse_iget(sb, 1, 0, &attr);
+ return fuse_iget(sb, 1, 0, &attr, 0, 0);
}
static const struct super_operations fuse_super_operations = {
@@ -514,6 +538,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_read = 1;
if (!(arg->flags & FUSE_POSIX_LOCKS))
fc->no_lock = 1;
+ if (arg->flags & FUSE_ATOMIC_O_TRUNC)
+ fc->atomic_o_trunc = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -536,7 +562,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
- arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
+ arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_FILE_OPS |
+ FUSE_ATOMIC_O_TRUNC;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a003d50edcdb..a263d82761df 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -375,7 +375,7 @@ void journal_commit_transaction(journal_t *journal)
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
- jbd_slab_free(jh->b_committed_data, bh->b_size);
+ jbd_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
}
@@ -792,14 +792,14 @@ restart_loop:
* Otherwise, we can just throw away the frozen data now.
*/
if (jh->b_committed_data) {
- jbd_slab_free(jh->b_committed_data, bh->b_size);
+ jbd_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
}
} else if (jh->b_frozen_data) {
- jbd_slab_free(jh->b_frozen_data, bh->b_size);
+ jbd_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
}
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a6be78c05dce..5d9fec0b7ebd 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
/*
* Helper function used to manage commit timeouts
@@ -218,7 +217,7 @@ static int journal_start_thread(journal_t *journal)
if (IS_ERR(t))
return PTR_ERR(t);
- wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+ wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
return 0;
}
@@ -230,7 +229,8 @@ static void journal_kill_thread(journal_t *journal)
while (journal->j_task) {
wake_up(&journal->j_wait_commit);
spin_unlock(&journal->j_state_lock);
- wait_event(journal->j_wait_done_commit, journal->j_task == 0);
+ wait_event(journal->j_wait_done_commit,
+ journal->j_task == NULL);
spin_lock(&journal->j_state_lock);
}
spin_unlock(&journal->j_state_lock);
@@ -334,10 +334,10 @@ repeat:
char *tmp;
jbd_unlock_bh_state(bh_in);
- tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+ tmp = jbd_alloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
- jbd_slab_free(tmp, bh_in->b_size);
+ jbd_free(tmp, bh_in->b_size);
goto repeat;
}
@@ -654,7 +654,7 @@ static journal_t * journal_init_common (void)
journal_t *journal;
int err;
- journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+ journal = kmalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
goto fail;
memset(journal, 0, sizeof(*journal));
@@ -1095,13 +1095,6 @@ int journal_load(journal_t *journal)
}
}
- /*
- * Create a slab for this blocksize
- */
- err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
- if (err)
- return err;
-
/* Let the recovery code check whether it needs to recover any
* data from the journal. */
if (journal_recover(journal))
@@ -1615,86 +1608,6 @@ int journal_blocks_per_page(struct inode *inode)
}
/*
- * Simple support for retrying memory allocations. Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
- return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size) (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
- "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
-};
-
-static void journal_destroy_jbd_slabs(void)
-{
- int i;
-
- for (i = 0; i < JBD_MAX_SLABS; i++) {
- if (jbd_slab[i])
- kmem_cache_destroy(jbd_slab[i]);
- jbd_slab[i] = NULL;
- }
-}
-
-static int journal_create_jbd_slab(size_t slab_size)
-{
- int i = JBD_SLAB_INDEX(slab_size);
-
- BUG_ON(i >= JBD_MAX_SLABS);
-
- /*
- * Check if we already have a slab created for this size
- */
- if (jbd_slab[i])
- return 0;
-
- /*
- * Create a slab and force alignment to be same as slabsize -
- * this will make sure that allocations won't cross the page
- * boundary.
- */
- jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
- slab_size, slab_size, 0, NULL);
- if (!jbd_slab[i]) {
- printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
- return -ENOMEM;
- }
- return 0;
-}
-
-void * jbd_slab_alloc(size_t size, gfp_t flags)
-{
- int idx;
-
- idx = JBD_SLAB_INDEX(size);
- BUG_ON(jbd_slab[idx] == NULL);
- return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd_slab_free(void *ptr, size_t size)
-{
- int idx;
-
- idx = JBD_SLAB_INDEX(size);
- BUG_ON(jbd_slab[idx] == NULL);
- kmem_cache_free(jbd_slab[idx], ptr);
-}
-
-/*
* Journal_head storage management
*/
static struct kmem_cache *journal_head_cache;
@@ -1739,14 +1652,14 @@ static struct journal_head *journal_alloc_journal_head(void)
atomic_inc(&nr_journal_heads);
#endif
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
- if (ret == 0) {
+ if (ret == NULL) {
jbd_debug(1, "out of memory for journal_head\n");
if (time_after(jiffies, last_warning + 5*HZ)) {
printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
__FUNCTION__);
last_warning = jiffies;
}
- while (ret == 0) {
+ while (ret == NULL) {
yield();
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
}
@@ -1881,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
- jbd_slab_free(jh->b_frozen_data, bh->b_size);
+ jbd_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
- jbd_slab_free(jh->b_committed_data, bh->b_size);
+ jbd_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
@@ -2042,7 +1955,6 @@ static void journal_destroy_caches(void)
journal_destroy_revoke_caches();
journal_destroy_journal_head_cache();
journal_destroy_handle_cache();
- journal_destroy_jbd_slabs();
}
static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8df5bac0b7a5..9841b1e5af03 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,8 +96,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = jbd_kmalloc(sizeof(*new_transaction),
- GFP_NOFS);
+ new_transaction = kmalloc(sizeof(*new_transaction),
+ GFP_NOFS|__GFP_NOFAIL);
if (!new_transaction) {
ret = -ENOMEM;
goto out;
@@ -675,7 +675,7 @@ repeat:
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
frozen_buffer =
- jbd_slab_alloc(jh2bh(jh)->b_size,
+ jbd_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
@@ -735,7 +735,7 @@ done:
out:
if (unlikely(frozen_buffer)) /* It's usually NULL */
- jbd_slab_free(frozen_buffer, bh->b_size);
+ jbd_free(frozen_buffer, bh->b_size);
JBUFFER_TRACE(jh, "exit");
return error;
@@ -888,7 +888,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
__FUNCTION__);
@@ -915,7 +915,7 @@ repeat:
out:
journal_put_journal_head(jh);
if (unlikely(committed_data))
- jbd_slab_free(committed_data, bh->b_size);
+ jbd_free(committed_data, bh->b_size);
return err;
}
@@ -1172,7 +1172,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
/* That test should have eliminated the following case: */
- J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+ J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock);
@@ -1522,7 +1522,7 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
if (jh->b_jlist != BJ_None)
- J_ASSERT_JH(jh, transaction != 0);
+ J_ASSERT_JH(jh, transaction != NULL);
switch (jh->b_jlist) {
case BJ_None:
@@ -1591,11 +1591,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
if (buffer_locked(bh) || buffer_dirty(bh))
goto out;
- if (jh->b_next_transaction != 0)
+ if (jh->b_next_transaction != NULL)
goto out;
spin_lock(&journal->j_list_lock);
- if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+ if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
@@ -1603,7 +1603,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
journal_remove_journal_head(bh);
__brelse(bh);
}
- } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+ } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1963,7 +1963,7 @@ void __journal_file_buffer(struct journal_head *jh,
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
- jh->b_transaction == 0);
+ jh->b_transaction == NULL);
if (jh->b_transaction && jh->b_jlist == jlist)
return;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c0f59d1b13dc..6986f334c643 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -278,7 +278,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
unsigned long long block)
{
tag->t_blocknr = cpu_to_be32(block & (u32)~0);
- if (tag_bytes > JBD_TAG_SIZE32)
+ if (tag_bytes > JBD2_TAG_SIZE32)
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
@@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh);
- jbd2_slab_free(jh->b_committed_data, bh->b_size);
+ jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh);
}
@@ -475,7 +475,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_unlock(&journal->j_list_lock);
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
jbd2_journal_write_revoke_records(journal, commit_transaction);
@@ -533,7 +533,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
descriptor = jbd2_journal_get_descriptor_buffer(journal);
if (!descriptor) {
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, -EIO);
continue;
}
@@ -566,7 +566,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
and repeat this loop: we'll fall into the
refile-on-abort condition above. */
if (err) {
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
continue;
}
@@ -757,7 +757,7 @@ wait_for_iobuf:
err = -EIO;
if (err)
- __jbd2_journal_abort_hard(journal);
+ jbd2_journal_abort(journal, err);
/* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this
@@ -801,14 +801,14 @@ restart_loop:
* Otherwise, we can just throw away the frozen data now.
*/
if (jh->b_committed_data) {
- jbd2_slab_free(jh->b_committed_data, bh->b_size);
+ jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL;
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
}
} else if (jh->b_frozen_data) {
- jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+ jbd2_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f37324aee817..6ddc5531587c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno);
-static int jbd2_journal_create_jbd_slab(size_t slab_size);
/*
* Helper function used to manage commit timeouts
@@ -335,10 +334,10 @@ repeat:
char *tmp;
jbd_unlock_bh_state(bh_in);
- tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
+ tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
- jbd2_slab_free(tmp, bh_in->b_size);
+ jbd2_free(tmp, bh_in->b_size);
goto repeat;
}
@@ -655,10 +654,9 @@ static journal_t * journal_init_common (void)
journal_t *journal;
int err;
- journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+ journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
if (!journal)
goto fail;
- memset(journal, 0, sizeof(*journal));
init_waitqueue_head(&journal->j_wait_transaction_locked);
init_waitqueue_head(&journal->j_wait_logspace);
@@ -672,7 +670,7 @@ static journal_t * journal_init_common (void)
spin_lock_init(&journal->j_list_lock);
spin_lock_init(&journal->j_state_lock);
- journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
+ journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
/* The journal is marked for error until we succeed with recovery! */
journal->j_flags = JBD2_ABORT;
@@ -1096,13 +1094,6 @@ int jbd2_journal_load(journal_t *journal)
}
}
- /*
- * Create a slab for this blocksize
- */
- err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
- if (err)
- return err;
-
/* Let the recovery code check whether it needs to recover any
* data from the journal. */
if (jbd2_journal_recover(journal))
@@ -1621,89 +1612,9 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
size_t journal_tag_bytes(journal_t *journal)
{
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
- return JBD_TAG_SIZE64;
+ return JBD2_TAG_SIZE64;
else
- return JBD_TAG_SIZE32;
-}
-
-/*
- * Simple support for retrying memory allocations. Introduced to help to
- * debug different VM deadlock avoidance strategies.
- */
-void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
-{
- return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
-}
-
-/*
- * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
- * and allocate frozen and commit buffers from these slabs.
- *
- * Reason for doing this is to avoid, SLAB_DEBUG - since it could
- * cause bh to cross page boundary.
- */
-
-#define JBD_MAX_SLABS 5
-#define JBD_SLAB_INDEX(size) (size >> 11)
-
-static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
-static const char *jbd_slab_names[JBD_MAX_SLABS] = {
- "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
-};
-
-static void jbd2_journal_destroy_jbd_slabs(void)
-{
- int i;
-
- for (i = 0; i < JBD_MAX_SLABS; i++) {
- if (jbd_slab[i])
- kmem_cache_destroy(jbd_slab[i]);
- jbd_slab[i] = NULL;
- }
-}
-
-static int jbd2_journal_create_jbd_slab(size_t slab_size)
-{
- int i = JBD_SLAB_INDEX(slab_size);
-
- BUG_ON(i >= JBD_MAX_SLABS);
-
- /*
- * Check if we already have a slab created for this size
- */
- if (jbd_slab[i])
- return 0;
-
- /*
- * Create a slab and force alignment to be same as slabsize -
- * this will make sure that allocations won't cross the page
- * boundary.
- */
- jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
- slab_size, slab_size, 0, NULL);
- if (!jbd_slab[i]) {
- printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
- return -ENOMEM;
- }
- return 0;
-}
-
-void * jbd2_slab_alloc(size_t size, gfp_t flags)
-{
- int idx;
-
- idx = JBD_SLAB_INDEX(size);
- BUG_ON(jbd_slab[idx] == NULL);
- return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
-}
-
-void jbd2_slab_free(void *ptr, size_t size)
-{
- int idx;
-
- idx = JBD_SLAB_INDEX(size);
- BUG_ON(jbd_slab[idx] == NULL);
- kmem_cache_free(jbd_slab[idx], ptr);
+ return JBD2_TAG_SIZE32;
}
/*
@@ -1770,7 +1681,7 @@ static void journal_free_journal_head(struct journal_head *jh)
{
#ifdef CONFIG_JBD2_DEBUG
atomic_dec(&nr_journal_heads);
- memset(jh, JBD_POISON_FREE, sizeof(*jh));
+ memset(jh, JBD2_POISON_FREE, sizeof(*jh));
#endif
kmem_cache_free(jbd2_journal_head_cache, jh);
}
@@ -1893,13 +1804,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
- jbd2_slab_free(jh->b_frozen_data, bh->b_size);
+ jbd2_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
- jbd2_slab_free(jh->b_committed_data, bh->b_size);
+ jbd2_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
@@ -1953,16 +1864,14 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
/*
* debugfs tunables
*/
-#if defined(CONFIG_JBD2_DEBUG)
-u8 jbd2_journal_enable_debug;
+#ifdef CONFIG_JBD2_DEBUG
+u8 jbd2_journal_enable_debug __read_mostly;
EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#endif
-
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
#define JBD2_DEBUG_NAME "jbd2-debug"
-struct dentry *jbd2_debugfs_dir, *jbd2_debug;
+static struct dentry *jbd2_debugfs_dir;
+static struct dentry *jbd2_debug;
static void __init jbd2_create_debugfs_entry(void)
{
@@ -1975,24 +1884,18 @@ static void __init jbd2_create_debugfs_entry(void)
static void __exit jbd2_remove_debugfs_entry(void)
{
- if (jbd2_debug)
- debugfs_remove(jbd2_debug);
- if (jbd2_debugfs_dir)
- debugfs_remove(jbd2_debugfs_dir);
+ debugfs_remove(jbd2_debug);
+ debugfs_remove(jbd2_debugfs_dir);
}
#else
static void __init jbd2_create_debugfs_entry(void)
{
- do {
- } while (0);
}
static void __exit jbd2_remove_debugfs_entry(void)
{
- do {
- } while (0);
}
#endif
@@ -2040,7 +1943,6 @@ static void jbd2_journal_destroy_caches(void)
jbd2_journal_destroy_revoke_caches();
jbd2_journal_destroy_jbd2_journal_head_cache();
jbd2_journal_destroy_handle_cache();
- jbd2_journal_destroy_jbd_slabs();
}
static int __init journal_init(void)
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b50be8a044eb..d0ce627539ef 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -311,7 +311,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
{
unsigned long long block = be32_to_cpu(tag->t_blocknr);
- if (tag_bytes > JBD_TAG_SIZE32)
+ if (tag_bytes > JBD2_TAG_SIZE32)
block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
return block;
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 01d88975e0c5..3595fd432d5b 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -352,7 +352,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
if (bh)
BUFFER_TRACE(bh, "found on hash");
}
-#ifdef JBD_EXPENSIVE_CHECKING
+#ifdef JBD2_EXPENSIVE_CHECKING
else {
struct buffer_head *bh2;
@@ -453,7 +453,7 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
}
}
-#ifdef JBD_EXPENSIVE_CHECKING
+#ifdef JBD2_EXPENSIVE_CHECKING
/* There better not be one left behind by now! */
record = find_revoke_record(journal, bh->b_blocknr);
J_ASSERT_JH(jh, record == NULL);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7946ff43fc40..b1fcf2b3dca3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = jbd_kmalloc(sizeof(*new_transaction),
- GFP_NOFS);
+ new_transaction = kzalloc(sizeof(*new_transaction),
+ GFP_NOFS|__GFP_NOFAIL);
if (!new_transaction) {
ret = -ENOMEM;
goto out;
}
- memset(new_transaction, 0, sizeof(*new_transaction));
}
jbd_debug(3, "New handle %p going live.\n", handle);
@@ -236,7 +235,7 @@ out:
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
- handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+ handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
if (!handle)
return NULL;
memset(handle, 0, sizeof(*handle));
@@ -282,7 +281,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
err = start_this_handle(journal, handle);
if (err < 0) {
- jbd_free_handle(handle);
+ jbd2_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
}
@@ -668,7 +667,7 @@ repeat:
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
frozen_buffer =
- jbd2_slab_alloc(jh2bh(jh)->b_size,
+ jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
@@ -728,7 +727,7 @@ done:
out:
if (unlikely(frozen_buffer)) /* It's usually NULL */
- jbd2_slab_free(frozen_buffer, bh->b_size);
+ jbd2_free(frozen_buffer, bh->b_size);
JBUFFER_TRACE(jh, "exit");
return error;
@@ -881,7 +880,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
repeat:
if (!jh->b_committed_data) {
- committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+ committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
__FUNCTION__);
@@ -908,7 +907,7 @@ repeat:
out:
jbd2_journal_put_journal_head(jh);
if (unlikely(committed_data))
- jbd2_slab_free(committed_data, bh->b_size);
+ jbd2_free(committed_data, bh->b_size);
return err;
}
@@ -1411,7 +1410,7 @@ int jbd2_journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock);
}
- jbd_free_handle(handle);
+ jbd2_free_handle(handle);
return err;
}
diff --git a/fs/namei.c b/fs/namei.c
index 464eeccb675b..1e5c71669164 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1659,8 +1659,10 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
error = locks_verify_locked(inode);
if (!error) {
DQUOT_INIT(inode);
-
- error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
+
+ error = do_truncate(dentry, 0,
+ ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
+ NULL);
}
put_write_access(inode);
if (error)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6c22453d77ae..6d2f2a3eccf8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -357,6 +357,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+ /* skip mode change if it's just for clearing setuid/setgid */
+ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+ attr->ia_valid &= ~ATTR_MODE;
+
if (attr->ia_valid & ATTR_SIZE) {
if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
attr->ia_valid &= ~ATTR_SIZE;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 819545d21670..46934c97f8f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -364,14 +364,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+ /* if changing uid/gid revoke setuid/setgid in mode */
+ if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) {
+ iap->ia_valid |= ATTR_KILL_PRIV;
+ iap->ia_mode &= ~S_ISUID;
+ }
+ if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+ iap->ia_mode &= ~S_ISGID;
+ } else {
+ /*
+ * Revoke setuid/setgid bit on chown/chgrp
+ */
+ if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
+ iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
+ if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+ iap->ia_valid |= ATTR_KILL_SGID;
}
- /* Revoke setuid/setgid bit on chown/chgrp */
- if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
- iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV;
- if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
- iap->ia_valid |= ATTR_KILL_SGID;
-
/* Change the attributes. */
iap->ia_valid |= ATTR_CTIME;
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index e7905816c4ca..64965e1c21c4 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -111,7 +111,7 @@ utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
int c, nc;
const struct utf8_table *t;
- if (s == 0)
+ if (!s)
return 0;
l = wc;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9ea12004fa57..0804289d355d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3061,7 +3061,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
int error;
- unsigned int ia_valid = attr->ia_valid;
+ unsigned int ia_valid;
+
+ /* must be turned off for recursive notify_change calls */
+ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
+
reiserfs_write_lock(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
OpenPOWER on IntegriCloud