summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c20
-rw-r--r--fs/fat/dir.c13
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/ops_fstype.c495
-rw-r--r--fs/gfs2/super.c333
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/inode.c3
-rw-r--r--fs/io_uring.c243
-rw-r--r--fs/iomap/direct-io.c24
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/transaction.c12
-rw-r--r--fs/ocfs2/alloc.c20
-rw-r--r--fs/ocfs2/aops.c13
-rw-r--r--fs/ocfs2/blockcheck.c26
-rw-r--r--fs/ocfs2/cluster/heartbeat.c103
-rw-r--r--fs/ocfs2/dir.c3
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c55
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h16
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c7
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c23
-rw-r--r--fs/ocfs2/dlmglue.c27
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/file.c13
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/journal.h42
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/super.c10
-rw-r--r--fs/open.c8
-rw-r--r--fs/proc/meminfo.c8
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/xfs/xfs_file.c14
34 files changed, 780 insertions, 784 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d4e11b2e04f6..cec3b4146440 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -670,26 +670,6 @@ out:
* libraries. There is no binary dependent code anywhere else.
*/
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
-#endif
-
-static unsigned long randomize_stack_top(unsigned long stack_top)
-{
- unsigned long random_variable = 0;
-
- if (current->flags & PF_RANDOMIZE) {
- random_variable = get_random_long();
- random_variable &= STACK_RND_MASK;
- random_variable <<= PAGE_SHIFT;
- }
-#ifdef CONFIG_STACK_GROWSUP
- return PAGE_ALIGN(stack_top) + random_variable;
-#else
- return PAGE_ALIGN(stack_top) - random_variable;
-#endif
-}
-
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL; /* to shut gcc up */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 1bda2ab6745b..814ad2c2ba80 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1100,8 +1100,11 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
err = -ENOMEM;
goto error;
}
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[n]);
memset(bhs[n]->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bhs[n]);
+ unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
n++;
@@ -1158,6 +1161,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
de = (struct msdos_dir_entry *)bhs[0]->b_data;
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[0]);
/* filling the new directory slots ("." and ".." entries) */
memcpy(de[0].name, MSDOS_DOT, MSDOS_NAME);
memcpy(de[1].name, MSDOS_DOTDOT, MSDOS_NAME);
@@ -1180,6 +1185,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
de[0].size = de[1].size = 0;
memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
set_buffer_uptodate(bhs[0]);
+ unlock_buffer(bhs[0]);
mark_buffer_dirty_inode(bhs[0], dir);
err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
@@ -1237,11 +1243,14 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
/* fill the directory entry */
copy = min(size, sb->s_blocksize);
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(bhs[n]);
memcpy(bhs[n]->b_data, slots, copy);
- slots += copy;
- size -= copy;
set_buffer_uptodate(bhs[n]);
+ unlock_buffer(bhs[n]);
mark_buffer_dirty_inode(bhs[n], dir);
+ slots += copy;
+ size -= copy;
if (!size)
break;
n++;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 265983635f2b..3647c65a0f48 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -388,8 +388,11 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
err = -ENOMEM;
goto error;
}
+ /* Avoid race with userspace read via bdev */
+ lock_buffer(c_bh);
memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
set_buffer_uptodate(c_bh);
+ unlock_buffer(c_bh);
mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
if (sb->s_flags & SB_SYNCHRONOUS)
err = sync_dirty_buffer(c_bh);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6b450065b9d5..5f89c515f5bb 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -584,10 +584,10 @@ struct gfs2_args {
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
unsigned int ar_loccookie:1; /* use location based readdir
cookies */
- int ar_commit; /* Commit interval */
- int ar_statfs_quantum; /* The fast statfs interval */
- int ar_quota_quantum; /* The quota interval */
- int ar_statfs_percent; /* The % change to force sync */
+ s32 ar_commit; /* Commit interval */
+ s32 ar_statfs_quantum; /* The fast statfs interval */
+ s32 ar_quota_quantum; /* The quota interval */
+ s32 ar_statfs_percent; /* The % change to force sync */
};
struct gfs2_tune {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f3fd5cd9d43f..681b44682b0d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -21,6 +21,7 @@
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
+#include <linux/fs_parser.h>
#include "gfs2.h"
#include "incore.h"
@@ -1031,16 +1032,17 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
}
/**
- * fill_super - Read in superblock
+ * gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
- * @data: Mount options
+ * @args: Mount options
* @silent: Don't complain if it's not a GFS2 filesystem
*
- * Returns: errno
+ * Returns: -errno
*/
-
-static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
+static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct gfs2_args *args = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
int error;
@@ -1205,161 +1207,411 @@ fail_debug:
return error;
}
-static int set_gfs2_super(struct super_block *s, void *data)
+/**
+ * gfs2_get_tree - Get the GFS2 superblock and root directory
+ * @fc: The filesystem context
+ *
+ * Returns: 0 or -errno on error
+ */
+static int gfs2_get_tree(struct fs_context *fc)
{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ struct gfs2_args *args = fc->fs_private;
+ struct gfs2_sbd *sdp;
+ int error;
+
+ error = get_tree_bdev(fc, gfs2_fill_super);
+ if (error)
+ return error;
+
+ sdp = fc->root->d_sb->s_fs_info;
+ dput(fc->root);
+ if (args->ar_meta)
+ fc->root = dget(sdp->sd_master_dir);
+ else
+ fc->root = dget(sdp->sd_root_dir);
return 0;
}
-static int test_gfs2_super(struct super_block *s, void *ptr)
+static void gfs2_fc_free(struct fs_context *fc)
{
- struct block_device *bdev = ptr;
- return (bdev == s->s_bdev);
+ struct gfs2_args *args = fc->fs_private;
+
+ kfree(args);
}
-/**
- * gfs2_mount - Get the GFS2 superblock
- * @fs_type: The GFS2 filesystem type
- * @flags: Mount flags
- * @dev_name: The name of the device
- * @data: The mount arguments
- *
- * Q. Why not use get_sb_bdev() ?
- * A. We need to select one of two root directories to mount, independent
- * of whether this is the initial, or subsequent, mount of this sb
- *
- * Returns: 0 or -ve on error
- */
+enum gfs2_param {
+ Opt_lockproto,
+ Opt_locktable,
+ Opt_hostdata,
+ Opt_spectator,
+ Opt_ignore_local_fs,
+ Opt_localflocks,
+ Opt_localcaching,
+ Opt_debug,
+ Opt_upgrade,
+ Opt_acl,
+ Opt_quota,
+ Opt_suiddir,
+ Opt_data,
+ Opt_meta,
+ Opt_discard,
+ Opt_commit,
+ Opt_errors,
+ Opt_statfs_quantum,
+ Opt_statfs_percent,
+ Opt_quota_quantum,
+ Opt_barrier,
+ Opt_rgrplvb,
+ Opt_loccookie,
+};
-static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- struct block_device *bdev;
- struct super_block *s;
- fmode_t mode = FMODE_READ | FMODE_EXCL;
- int error;
- struct gfs2_args args;
- struct gfs2_sbd *sdp;
+enum opt_quota {
+ Opt_quota_unset = 0,
+ Opt_quota_off,
+ Opt_quota_account,
+ Opt_quota_on,
+};
+
+static const unsigned int opt_quota_values[] = {
+ [Opt_quota_off] = GFS2_QUOTA_OFF,
+ [Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
+ [Opt_quota_on] = GFS2_QUOTA_ON,
+};
- if (!(flags & SB_RDONLY))
- mode |= FMODE_WRITE;
+enum opt_data {
+ Opt_data_writeback = GFS2_DATA_WRITEBACK,
+ Opt_data_ordered = GFS2_DATA_ORDERED,
+};
- bdev = blkdev_get_by_path(dev_name, mode, fs_type);
- if (IS_ERR(bdev))
- return ERR_CAST(bdev);
+enum opt_errors {
+ Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
+ Opt_errors_panic = GFS2_ERRORS_PANIC,
+};
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = -EBUSY;
- goto error_bdev;
- }
- s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = PTR_ERR(s);
- if (IS_ERR(s))
- goto error_bdev;
-
- if (s->s_root) {
- /*
- * s_umount nests inside bd_mutex during
- * __invalidate_device(). blkdev_put() acquires
- * bd_mutex and can't be called under s_umount. Drop
- * s_umount temporarily. This is safe as we're
- * holding an active reference.
- */
- up_write(&s->s_umount);
- blkdev_put(bdev, mode);
- down_write(&s->s_umount);
- } else {
- /* s_mode must be set before deactivate_locked_super calls */
- s->s_mode = mode;
- }
+static const struct fs_parameter_spec gfs2_param_specs[] = {
+ fsparam_string ("lockproto", Opt_lockproto),
+ fsparam_string ("locktable", Opt_locktable),
+ fsparam_string ("hostdata", Opt_hostdata),
+ fsparam_flag ("spectator", Opt_spectator),
+ fsparam_flag ("norecovery", Opt_spectator),
+ fsparam_flag ("ignore_local_fs", Opt_ignore_local_fs),
+ fsparam_flag ("localflocks", Opt_localflocks),
+ fsparam_flag ("localcaching", Opt_localcaching),
+ fsparam_flag_no("debug", Opt_debug),
+ fsparam_flag ("upgrade", Opt_upgrade),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_flag_no("suiddir", Opt_suiddir),
+ fsparam_enum ("data", Opt_data),
+ fsparam_flag ("meta", Opt_meta),
+ fsparam_flag_no("discard", Opt_discard),
+ fsparam_s32 ("commit", Opt_commit),
+ fsparam_enum ("errors", Opt_errors),
+ fsparam_s32 ("statfs_quantum", Opt_statfs_quantum),
+ fsparam_s32 ("statfs_percent", Opt_statfs_percent),
+ fsparam_s32 ("quota_quantum", Opt_quota_quantum),
+ fsparam_flag_no("barrier", Opt_barrier),
+ fsparam_flag_no("rgrplvb", Opt_rgrplvb),
+ fsparam_flag_no("loccookie", Opt_loccookie),
+ /* quota can be a flag or an enum so it gets special treatment */
+ __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
+ {}
+};
- memset(&args, 0, sizeof(args));
- args.ar_quota = GFS2_QUOTA_DEFAULT;
- args.ar_data = GFS2_DATA_DEFAULT;
- args.ar_commit = 30;
- args.ar_statfs_quantum = 30;
- args.ar_quota_quantum = 60;
- args.ar_errors = GFS2_ERRORS_DEFAULT;
+static const struct fs_parameter_enum gfs2_param_enums[] = {
+ { Opt_quota, "off", Opt_quota_off },
+ { Opt_quota, "account", Opt_quota_account },
+ { Opt_quota, "on", Opt_quota_on },
+ { Opt_data, "writeback", Opt_data_writeback },
+ { Opt_data, "ordered", Opt_data_ordered },
+ { Opt_errors, "withdraw", Opt_errors_withdraw },
+ { Opt_errors, "panic", Opt_errors_panic },
+ {}
+};
- error = gfs2_mount_args(&args, data);
- if (error) {
- pr_warn("can't parse mount arguments\n");
- goto error_super;
+const struct fs_parameter_description gfs2_fs_parameters = {
+ .name = "gfs2",
+ .specs = gfs2_param_specs,
+ .enums = gfs2_param_enums,
+};
+
+/* Parse a single mount parameter */
+static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct gfs2_args *args = fc->fs_private;
+ struct fs_parse_result result;
+ int o;
+
+ o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+ if (o < 0)
+ return o;
+
+ switch (o) {
+ case Opt_lockproto:
+ strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_locktable:
+ strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_hostdata:
+ strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN);
+ break;
+ case Opt_spectator:
+ args->ar_spectator = 1;
+ break;
+ case Opt_ignore_local_fs:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_localflocks:
+ args->ar_localflocks = 1;
+ break;
+ case Opt_localcaching:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_debug:
+ if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
+ return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ args->ar_debug = result.boolean;
+ break;
+ case Opt_upgrade:
+ /* Retained for backwards compat only */
+ break;
+ case Opt_acl:
+ args->ar_posix_acl = result.boolean;
+ break;
+ case Opt_quota:
+ /* The quota option can be a flag or an enum. A non-zero int_32
+ result means that we have an enum index. Otherwise we have
+ to rely on the 'negated' flag to tell us whether 'quota' or
+ 'noquota' was specified. */
+ if (result.negated)
+ args->ar_quota = GFS2_QUOTA_OFF;
+ else if (result.int_32 > 0)
+ args->ar_quota = opt_quota_values[result.int_32];
+ else
+ args->ar_quota = GFS2_QUOTA_ON;
+ break;
+ case Opt_suiddir:
+ args->ar_suiddir = result.boolean;
+ break;
+ case Opt_data:
+ /* The uint_32 result maps directly to GFS2_DATA_* */
+ args->ar_data = result.uint_32;
+ break;
+ case Opt_meta:
+ args->ar_meta = 1;
+ break;
+ case Opt_discard:
+ args->ar_discard = result.boolean;
+ break;
+ case Opt_commit:
+ if (result.int_32 <= 0)
+ return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+ args->ar_commit = result.int_32;
+ break;
+ case Opt_statfs_quantum:
+ if (result.int_32 < 0)
+ return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+ args->ar_statfs_quantum = result.int_32;
+ break;
+ case Opt_quota_quantum:
+ if (result.int_32 <= 0)
+ return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+ args->ar_quota_quantum = result.int_32;
+ break;
+ case Opt_statfs_percent:
+ if (result.int_32 < 0 || result.int_32 > 100)
+ return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+ args->ar_statfs_percent = result.int_32;
+ break;
+ case Opt_errors:
+ if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
+ return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ args->ar_errors = result.uint_32;
+ break;
+ case Opt_barrier:
+ args->ar_nobarrier = result.boolean;
+ break;
+ case Opt_rgrplvb:
+ args->ar_rgrplvb = result.boolean;
+ break;
+ case Opt_loccookie:
+ args->ar_loccookie = result.boolean;
+ break;
+ default:
+ return invalf(fc, "gfs2: invalid mount option: %s", param->key);
}
+ return 0;
+}
- if (s->s_root) {
- error = -EBUSY;
- if ((flags ^ s->s_flags) & SB_RDONLY)
- goto error_super;
- } else {
- snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, &args, flags & SB_SILENT ? 1 : 0);
- if (error)
- goto error_super;
- s->s_flags |= SB_ACTIVE;
- bdev->bd_super = s;
+static int gfs2_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_args *oldargs = &sdp->sd_args;
+ struct gfs2_args *newargs = fc->fs_private;
+ struct gfs2_tune *gt = &sdp->sd_tune;
+ int error = 0;
+
+ sync_filesystem(sb);
+
+ spin_lock(&gt->gt_spin);
+ oldargs->ar_commit = gt->gt_logd_secs;
+ oldargs->ar_quota_quantum = gt->gt_quota_quantum;
+ if (gt->gt_statfs_slow)
+ oldargs->ar_statfs_quantum = 0;
+ else
+ oldargs->ar_statfs_quantum = gt->gt_statfs_quantum;
+ spin_unlock(&gt->gt_spin);
+
+ if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
+ errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+ return -EINVAL;
+ }
+ if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
+ errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+ return -EINVAL;
+ }
+ if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
+ errorf(fc, "gfs2: reconfiguration of host data not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_spectator != oldargs->ar_spectator) {
+ errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_localflocks != oldargs->ar_localflocks) {
+ errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+ return -EINVAL;
+ }
+ if (newargs->ar_meta != oldargs->ar_meta) {
+ errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+ return -EINVAL;
+ }
+ if (oldargs->ar_spectator)
+ fc->sb_flags |= SB_RDONLY;
+
+ if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
+ error = gfs2_make_fs_ro(sdp);
+ if (error)
+ errorf(fc, "gfs2: unable to remount read-only");
+ } else {
+ error = gfs2_make_fs_rw(sdp);
+ if (error)
+ errorf(fc, "gfs2: unable to remount read-write");
+ }
}
+ sdp->sd_args = *newargs;
- sdp = s->s_fs_info;
- if (args.ar_meta)
- return dget(sdp->sd_master_dir);
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= SB_POSIXACL;
+ else
+ sb->s_flags &= ~SB_POSIXACL;
+ if (sdp->sd_args.ar_nobarrier)
+ set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
else
- return dget(sdp->sd_root_dir);
-
-error_super:
- deactivate_locked_super(s);
- return ERR_PTR(error);
-error_bdev:
- blkdev_put(bdev, mode);
- return ERR_PTR(error);
+ clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ spin_lock(&gt->gt_spin);
+ gt->gt_logd_secs = newargs->ar_commit;
+ gt->gt_quota_quantum = newargs->ar_quota_quantum;
+ if (newargs->ar_statfs_quantum) {
+ gt->gt_statfs_slow = 0;
+ gt->gt_statfs_quantum = newargs->ar_statfs_quantum;
+ }
+ else {
+ gt->gt_statfs_slow = 1;
+ gt->gt_statfs_quantum = 30;
+ }
+ spin_unlock(&gt->gt_spin);
+
+ gfs2_online_uevent(sdp);
+ return error;
+}
+
+static const struct fs_context_operations gfs2_context_ops = {
+ .free = gfs2_fc_free,
+ .parse_param = gfs2_parse_param,
+ .get_tree = gfs2_get_tree,
+ .reconfigure = gfs2_reconfigure,
+};
+
+/* Set up the filesystem mount context */
+static int gfs2_init_fs_context(struct fs_context *fc)
+{
+ struct gfs2_args *args;
+
+ args = kzalloc(sizeof(*args), GFP_KERNEL);
+ if (args == NULL)
+ return -ENOMEM;
+
+ args->ar_quota = GFS2_QUOTA_DEFAULT;
+ args->ar_data = GFS2_DATA_DEFAULT;
+ args->ar_commit = 30;
+ args->ar_statfs_quantum = 30;
+ args->ar_quota_quantum = 60;
+ args->ar_errors = GFS2_ERRORS_DEFAULT;
+
+ fc->fs_private = args;
+ fc->ops = &gfs2_context_ops;
+ return 0;
}
-static int set_meta_super(struct super_block *s, void *ptr)
+static int set_meta_super(struct super_block *s, struct fs_context *fc)
{
return -EINVAL;
}
-static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int test_meta_super(struct super_block *s, struct fs_context *fc)
+{
+ return (fc->sget_key == s->s_bdev);
+}
+
+static int gfs2_meta_get_tree(struct fs_context *fc)
{
struct super_block *s;
struct gfs2_sbd *sdp;
struct path path;
int error;
- if (!dev_name || !*dev_name)
- return ERR_PTR(-EINVAL);
+ if (!fc->source || !*fc->source)
+ return -EINVAL;
- error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
+ error = kern_path(fc->source, LOOKUP_FOLLOW, &path);
if (error) {
pr_warn("path_lookup on %s returned error %d\n",
- dev_name, error);
- return ERR_PTR(error);
+ fc->source, error);
+ return error;
}
- s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
- path.dentry->d_sb->s_bdev);
+ fc->fs_type = &gfs2_fs_type;
+ fc->sget_key = path.dentry->d_sb->s_bdev;
+ s = sget_fc(fc, test_meta_super, set_meta_super);
path_put(&path);
if (IS_ERR(s)) {
pr_warn("gfs2 mount does not exist\n");
- return ERR_CAST(s);
+ return PTR_ERR(s);
}
- if ((flags ^ s->s_flags) & SB_RDONLY) {
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
deactivate_locked_super(s);
- return ERR_PTR(-EBUSY);
+ return -EBUSY;
}
sdp = s->s_fs_info;
- return dget(sdp->sd_master_dir);
+ fc->root = dget(sdp->sd_master_dir);
+ return 0;
+}
+
+static const struct fs_context_operations gfs2_meta_context_ops = {
+ .get_tree = gfs2_meta_get_tree,
+};
+
+static int gfs2_meta_init_fs_context(struct fs_context *fc)
+{
+ int ret = gfs2_init_fs_context(fc);
+
+ if (ret)
+ return ret;
+
+ fc->ops = &gfs2_meta_context_ops;
+ return 0;
}
static void gfs2_kill_sb(struct super_block *sb)
@@ -1383,7 +1635,8 @@ static void gfs2_kill_sb(struct super_block *sb)
struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
- .mount = gfs2_mount,
+ .init_fs_context = gfs2_init_fs_context,
+ .parameters = &gfs2_fs_parameters,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
@@ -1392,7 +1645,7 @@ MODULE_ALIAS_FS("gfs2");
struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
- .mount = gfs2_mount_meta,
+ .init_fs_context = gfs2_meta_init_fs_context,
.owner = THIS_MODULE,
};
MODULE_ALIAS_FS("gfs2meta");
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 644c70ae09f7..5fa1eec4fb4f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -44,258 +44,6 @@
#include "xattr.h"
#include "lops.h"
-#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
-
-enum {
- Opt_lockproto,
- Opt_locktable,
- Opt_hostdata,
- Opt_spectator,
- Opt_ignore_local_fs,
- Opt_localflocks,
- Opt_localcaching,
- Opt_debug,
- Opt_nodebug,
- Opt_upgrade,
- Opt_acl,
- Opt_noacl,
- Opt_quota_off,
- Opt_quota_account,
- Opt_quota_on,
- Opt_quota,
- Opt_noquota,
- Opt_suiddir,
- Opt_nosuiddir,
- Opt_data_writeback,
- Opt_data_ordered,
- Opt_meta,
- Opt_discard,
- Opt_nodiscard,
- Opt_commit,
- Opt_err_withdraw,
- Opt_err_panic,
- Opt_statfs_quantum,
- Opt_statfs_percent,
- Opt_quota_quantum,
- Opt_barrier,
- Opt_nobarrier,
- Opt_rgrplvb,
- Opt_norgrplvb,
- Opt_loccookie,
- Opt_noloccookie,
- Opt_error,
-};
-
-static const match_table_t tokens = {
- {Opt_lockproto, "lockproto=%s"},
- {Opt_locktable, "locktable=%s"},
- {Opt_hostdata, "hostdata=%s"},
- {Opt_spectator, "spectator"},
- {Opt_spectator, "norecovery"},
- {Opt_ignore_local_fs, "ignore_local_fs"},
- {Opt_localflocks, "localflocks"},
- {Opt_localcaching, "localcaching"},
- {Opt_debug, "debug"},
- {Opt_nodebug, "nodebug"},
- {Opt_upgrade, "upgrade"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_quota_off, "quota=off"},
- {Opt_quota_account, "quota=account"},
- {Opt_quota_on, "quota=on"},
- {Opt_quota, "quota"},
- {Opt_noquota, "noquota"},
- {Opt_suiddir, "suiddir"},
- {Opt_nosuiddir, "nosuiddir"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_data_ordered, "data=ordered"},
- {Opt_meta, "meta"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_commit, "commit=%d"},
- {Opt_err_withdraw, "errors=withdraw"},
- {Opt_err_panic, "errors=panic"},
- {Opt_statfs_quantum, "statfs_quantum=%d"},
- {Opt_statfs_percent, "statfs_percent=%d"},
- {Opt_quota_quantum, "quota_quantum=%d"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_rgrplvb, "rgrplvb"},
- {Opt_norgrplvb, "norgrplvb"},
- {Opt_loccookie, "loccookie"},
- {Opt_noloccookie, "noloccookie"},
- {Opt_error, NULL}
-};
-
-/**
- * gfs2_mount_args - Parse mount options
- * @args: The structure into which the parsed options will be written
- * @options: The options to parse
- *
- * Return: errno
- */
-
-int gfs2_mount_args(struct gfs2_args *args, char *options)
-{
- char *o;
- int token;
- substring_t tmp[MAX_OPT_ARGS];
- int rv;
-
- /* Split the options into tokens with the "," character and
- process them */
-
- while (1) {
- o = strsep(&options, ",");
- if (o == NULL)
- break;
- if (*o == '\0')
- continue;
-
- token = match_token(o, tokens, tmp);
- switch (token) {
- case Opt_lockproto:
- match_strlcpy(args->ar_lockproto, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_locktable:
- match_strlcpy(args->ar_locktable, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_hostdata:
- match_strlcpy(args->ar_hostdata, &tmp[0],
- GFS2_LOCKNAME_LEN);
- break;
- case Opt_spectator:
- args->ar_spectator = 1;
- break;
- case Opt_ignore_local_fs:
- /* Retained for backwards compat only */
- break;
- case Opt_localflocks:
- args->ar_localflocks = 1;
- break;
- case Opt_localcaching:
- /* Retained for backwards compat only */
- break;
- case Opt_debug:
- if (args->ar_errors == GFS2_ERRORS_PANIC) {
- pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
- return -EINVAL;
- }
- args->ar_debug = 1;
- break;
- case Opt_nodebug:
- args->ar_debug = 0;
- break;
- case Opt_upgrade:
- /* Retained for backwards compat only */
- break;
- case Opt_acl:
- args->ar_posix_acl = 1;
- break;
- case Opt_noacl:
- args->ar_posix_acl = 0;
- break;
- case Opt_quota_off:
- case Opt_noquota:
- args->ar_quota = GFS2_QUOTA_OFF;
- break;
- case Opt_quota_account:
- args->ar_quota = GFS2_QUOTA_ACCOUNT;
- break;
- case Opt_quota_on:
- case Opt_quota:
- args->ar_quota = GFS2_QUOTA_ON;
- break;
- case Opt_suiddir:
- args->ar_suiddir = 1;
- break;
- case Opt_nosuiddir:
- args->ar_suiddir = 0;
- break;
- case Opt_data_writeback:
- args->ar_data = GFS2_DATA_WRITEBACK;
- break;
- case Opt_data_ordered:
- args->ar_data = GFS2_DATA_ORDERED;
- break;
- case Opt_meta:
- args->ar_meta = 1;
- break;
- case Opt_discard:
- args->ar_discard = 1;
- break;
- case Opt_nodiscard:
- args->ar_discard = 0;
- break;
- case Opt_commit:
- rv = match_int(&tmp[0], &args->ar_commit);
- if (rv || args->ar_commit <= 0) {
- pr_warn("commit mount option requires a positive numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_statfs_quantum:
- rv = match_int(&tmp[0], &args->ar_statfs_quantum);
- if (rv || args->ar_statfs_quantum < 0) {
- pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_quota_quantum:
- rv = match_int(&tmp[0], &args->ar_quota_quantum);
- if (rv || args->ar_quota_quantum <= 0) {
- pr_warn("quota_quantum mount option requires a positive numeric argument\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_statfs_percent:
- rv = match_int(&tmp[0], &args->ar_statfs_percent);
- if (rv || args->ar_statfs_percent < 0 ||
- args->ar_statfs_percent > 100) {
- pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n");
- return rv ? rv : -EINVAL;
- }
- break;
- case Opt_err_withdraw:
- args->ar_errors = GFS2_ERRORS_WITHDRAW;
- break;
- case Opt_err_panic:
- if (args->ar_debug) {
- pr_warn("-o debug and -o errors=panic are mutually exclusive\n");
- return -EINVAL;
- }
- args->ar_errors = GFS2_ERRORS_PANIC;
- break;
- case Opt_barrier:
- args->ar_nobarrier = 0;
- break;
- case Opt_nobarrier:
- args->ar_nobarrier = 1;
- break;
- case Opt_rgrplvb:
- args->ar_rgrplvb = 1;
- break;
- case Opt_norgrplvb:
- args->ar_rgrplvb = 0;
- break;
- case Opt_loccookie:
- args->ar_loccookie = 1;
- break;
- case Opt_noloccookie:
- args->ar_loccookie = 0;
- break;
- case Opt_error:
- default:
- pr_warn("invalid mount option: %s\n", o);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
/**
* gfs2_jindex_free - Clear all the journal index information
* @sdp: The GFS2 superblock
@@ -847,7 +595,7 @@ out:
* Returns: errno
*/
-static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
struct gfs2_holder freeze_gh;
int error;
@@ -1227,84 +975,6 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
}
/**
- * gfs2_remount_fs - called when the FS is remounted
- * @sb: the filesystem
- * @flags: the remount flags
- * @data: extra data passed in (not used right now)
- *
- * Returns: errno
- */
-
-static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
-{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- struct gfs2_args args = sdp->sd_args; /* Default to current settings */
- struct gfs2_tune *gt = &sdp->sd_tune;
- int error;
-
- sync_filesystem(sb);
-
- spin_lock(&gt->gt_spin);
- args.ar_commit = gt->gt_logd_secs;
- args.ar_quota_quantum = gt->gt_quota_quantum;
- if (gt->gt_statfs_slow)
- args.ar_statfs_quantum = 0;
- else
- args.ar_statfs_quantum = gt->gt_statfs_quantum;
- spin_unlock(&gt->gt_spin);
- error = gfs2_mount_args(&args, data);
- if (error)
- return error;
-
- /* Not allowed to change locking details */
- if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
- strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
- strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
- return -EINVAL;
-
- /* Some flags must not be changed */
- if (args_neq(&args, &sdp->sd_args, spectator) ||
- args_neq(&args, &sdp->sd_args, localflocks) ||
- args_neq(&args, &sdp->sd_args, meta))
- return -EINVAL;
-
- if (sdp->sd_args.ar_spectator)
- *flags |= SB_RDONLY;
-
- if ((sb->s_flags ^ *flags) & SB_RDONLY) {
- if (*flags & SB_RDONLY)
- error = gfs2_make_fs_ro(sdp);
- else
- error = gfs2_make_fs_rw(sdp);
- }
-
- sdp->sd_args = args;
- if (sdp->sd_args.ar_posix_acl)
- sb->s_flags |= SB_POSIXACL;
- else
- sb->s_flags &= ~SB_POSIXACL;
- if (sdp->sd_args.ar_nobarrier)
- set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
- else
- clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
- spin_lock(&gt->gt_spin);
- gt->gt_logd_secs = args.ar_commit;
- gt->gt_quota_quantum = args.ar_quota_quantum;
- if (args.ar_statfs_quantum) {
- gt->gt_statfs_slow = 0;
- gt->gt_statfs_quantum = args.ar_statfs_quantum;
- }
- else {
- gt->gt_statfs_slow = 1;
- gt->gt_statfs_quantum = 30;
- }
- spin_unlock(&gt->gt_spin);
-
- gfs2_online_uevent(sdp);
- return error;
-}
-
-/**
* gfs2_drop_inode - Drop an inode (test for remote unlink)
* @inode: The inode to drop
*
@@ -1748,7 +1418,6 @@ const struct super_operations gfs2_super_ops = {
.freeze_super = gfs2_freeze,
.thaw_super = gfs2_unfreeze,
.statfs = gfs2_statfs,
- .remount_fs = gfs2_remount_fs,
.drop_inode = gfs2_drop_inode,
.show_options = gfs2_show_options,
};
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 9d49eaadb9d9..b8bf811a1305 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -24,8 +24,6 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
-extern int gfs2_mount_args(struct gfs2_args *args, char *data);
-
extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
@@ -33,6 +31,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
diff --git a/fs/inode.c b/fs/inode.c
index 64bf28cf05cd..fef457a42882 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->flags = 0;
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ atomic_set(&mapping->nr_thps, 0);
+#endif
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0dadbdbead0f..dd094b387cab 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -200,6 +200,7 @@ struct io_ring_ctx {
struct io_uring_sqe *sq_sqes;
struct list_head defer_list;
+ struct list_head timeout_list;
} ____cacheline_aligned_in_smp;
/* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
struct eventfd_ctx *cq_ev_fd;
+ atomic_t cq_timeouts;
} ____cacheline_aligned_in_smp;
struct io_rings *rings;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
struct wait_queue_entry wait;
};
+struct io_timeout {
+ struct file *file;
+ struct hrtimer timer;
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
struct file *file;
struct kiocb rw;
struct io_poll_iocb poll;
+ struct io_timeout timeout;
};
struct sqe_submit submit;
@@ -313,6 +321,7 @@ struct io_kiocb {
#define REQ_F_LINK_DONE 128 /* linked sqes done */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
+#define REQ_F_TIMEOUT 1024 /* timeout request */
u64 user_data;
u32 result;
u32 sequence;
@@ -344,6 +353,8 @@ struct io_submit_state {
};
static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+ long res);
static void __io_free_req(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->cancel_list);
INIT_LIST_HEAD(&ctx->defer_list);
+ INIT_LIST_HEAD(&ctx->timeout_list);
return ctx;
}
static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
+ /* timeout requests always honor sequence */
+ if (!(req->flags & REQ_F_TIMEOUT) &&
+ (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
return false;
return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
}
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
+ struct list_head *list)
{
struct io_kiocb *req;
- if (list_empty(&ctx->defer_list))
+ if (list_empty(list))
return NULL;
- req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
+ req = list_first_entry(list, struct io_kiocb, list);
if (!io_sequence_defer(ctx, req)) {
list_del_init(&req->list);
return req;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
return NULL;
}
+static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+{
+ return __io_get_deferred_req(ctx, &ctx->defer_list);
+}
+
+static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
+{
+ return __io_get_deferred_req(ctx, &ctx->timeout_list);
+}
+
static void __io_commit_cqring(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
@@ -446,25 +471,50 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- int rw;
+ int rw = 0;
- switch (req->submit.sqe->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- rw = !(req->rw.ki_flags & IOCB_DIRECT);
- break;
- default:
- rw = 0;
- break;
+ if (req->submit.sqe) {
+ switch (req->submit.sqe->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ rw = !(req->rw.ki_flags & IOCB_DIRECT);
+ break;
+ }
}
queue_work(ctx->sqo_wq[rw], &req->work);
}
+static void io_kill_timeout(struct io_kiocb *req)
+{
+ int ret;
+
+ ret = hrtimer_try_to_cancel(&req->timeout.timer);
+ if (ret != -1) {
+ atomic_inc(&req->ctx->cq_timeouts);
+ list_del(&req->list);
+ io_cqring_fill_event(req->ctx, req->user_data, 0);
+ __io_free_req(req);
+ }
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req, *tmp;
+
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+ io_kill_timeout(req);
+ spin_unlock_irq(&ctx->completion_lock);
+}
+
static void io_commit_cqring(struct io_ring_ctx *ctx)
{
struct io_kiocb *req;
+ while ((req = io_get_timeout_req(ctx)) != NULL)
+ io_kill_timeout(req);
+
__io_commit_cqring(ctx);
while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1248,6 +1298,51 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
}
}
+/*
+ * For files that don't have ->read_iter() and ->write_iter(), handle them
+ * by looping over ->read() or ->write() manually.
+ */
+static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
+ struct iov_iter *iter)
+{
+ ssize_t ret = 0;
+
+ /*
+ * Don't support polled IO through this interface, and we can't
+ * support non-blocking either. For the latter, this just causes
+ * the kiocb to be handled from an async context.
+ */
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EOPNOTSUPP;
+ if (kiocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ while (iov_iter_count(iter)) {
+ struct iovec iovec = iov_iter_iovec(iter);
+ ssize_t nr;
+
+ if (rw == READ) {
+ nr = file->f_op->read(file, iovec.iov_base,
+ iovec.iov_len, &kiocb->ki_pos);
+ } else {
+ nr = file->f_op->write(file, iovec.iov_base,
+ iovec.iov_len, &kiocb->ki_pos);
+ }
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != iovec.iov_len)
+ break;
+ iov_iter_advance(iter, nr);
+ }
+
+ return ret;
+}
+
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
bool force_nonblock)
{
@@ -1265,8 +1360,6 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
if (unlikely(!(file->f_mode & FMODE_READ)))
return -EBADF;
- if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
if (ret < 0)
@@ -1281,7 +1374,11 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
if (!ret) {
ssize_t ret2;
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (file->f_op->read_iter)
+ ret2 = call_read_iter(file, kiocb, &iter);
+ else
+ ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+
/*
* In case of a short read, punt to async. This can happen
* if we have data partially cached. Alternatively we can
@@ -1326,8 +1423,6 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
file = kiocb->ki_filp;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
- if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
if (ret < 0)
@@ -1365,7 +1460,10 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
}
kiocb->ki_flags |= IOCB_WRITE;
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (file->f_op->write_iter)
+ ret2 = call_write_iter(file, kiocb, &iter);
+ else
+ ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
} else {
@@ -1714,6 +1812,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!poll->file)
return -EBADF;
+ req->submit.sqe = NULL;
INIT_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
@@ -1765,6 +1864,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ipt.error;
}
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+ struct io_ring_ctx *ctx;
+ struct io_kiocb *req;
+ unsigned long flags;
+
+ req = container_of(timer, struct io_kiocb, timeout.timer);
+ ctx = req->ctx;
+ atomic_inc(&ctx->cq_timeouts);
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ list_del(&req->list);
+
+ io_cqring_fill_event(ctx, req->user_data, -ETIME);
+ io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+ io_cqring_ev_posted(ctx);
+
+ io_put_req(req);
+ return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ unsigned count, req_dist, tail_index;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct list_head *entry;
+ struct timespec ts;
+
+ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
+ sqe->len != 1)
+ return -EINVAL;
+ if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr,
+ sizeof(ts)))
+ return -EFAULT;
+
+ /*
+ * sqe->off holds how many events that need to occur for this
+ * timeout event to be satisfied.
+ */
+ count = READ_ONCE(sqe->off);
+ if (!count)
+ count = 1;
+
+ req->sequence = ctx->cached_sq_head + count - 1;
+ req->flags |= REQ_F_TIMEOUT;
+
+ /*
+ * Insertion sort, ensuring the first entry in the list is always
+ * the one we need first.
+ */
+ tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
+ req_dist = req->sequence - tail_index;
+ spin_lock_irq(&ctx->completion_lock);
+ list_for_each_prev(entry, &ctx->timeout_list) {
+ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+ unsigned dist;
+
+ dist = nxt->sequence - tail_index;
+ if (req_dist >= dist)
+ break;
+ }
+ list_add(&req->list, entry);
+ spin_unlock_irq(&ctx->completion_lock);
+
+ hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ req->timeout.timer.function = io_timeout_fn;
+ hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+ HRTIMER_MODE_REL);
+ return 0;
+}
+
static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -1842,6 +2016,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_RECVMSG:
ret = io_recvmsg(req, s->sqe, force_nonblock);
break;
+ case IORING_OP_TIMEOUT:
+ ret = io_timeout(req, s->sqe);
+ break;
default:
ret = -EINVAL;
break;
@@ -2098,13 +2275,11 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
- sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+ sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
if (sqe_copy) {
struct async_list *list;
- memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
s->sqe = sqe_copy;
-
memcpy(&req->submit, s, sizeof(*s));
list = io_async_list_from_sqe(ctx, s->sqe);
if (!io_add_to_prev_work(list, req)) {
@@ -2359,18 +2534,22 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
io_queue_link_head(ctx, link, &link->submit, shadow_req,
true);
link = NULL;
+ shadow_req = NULL;
}
prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
+ if (unlikely(!shadow_req))
+ goto out;
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
shadow_req->sequence = sqes[i].sequence;
}
+out:
if (unlikely(mm_fault)) {
io_cqring_add_event(ctx, sqes[i].sqe->user_data,
-EFAULT);
@@ -2436,7 +2615,7 @@ static int io_sq_thread(void *data)
* to sleep.
*/
if (inflight || !time_after(jiffies, timeout)) {
- cpu_relax();
+ cond_resched();
continue;
}
@@ -2545,18 +2724,22 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
io_queue_link_head(ctx, link, &link->submit, shadow_req,
force_nonblock);
link = NULL;
+ shadow_req = NULL;
}
prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
+ if (unlikely(!shadow_req))
+ goto out;
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
shadow_req->sequence = s.sequence;
}
+out:
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
@@ -2593,6 +2776,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz)
{
struct io_rings *rings = ctx->rings;
+ unsigned nr_timeouts;
int ret;
if (io_cqring_events(rings) >= min_events)
@@ -2611,7 +2795,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
- ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+ nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ /*
+ * Return if we have enough events, or if a timeout occured since
+ * we started waiting. For timeouts, we always want to return to
+ * userspace.
+ */
+ ret = wait_event_interruptible(ctx->wait,
+ io_cqring_events(rings) >= min_events ||
+ atomic_read(&ctx->cq_timeouts) != nr_timeouts);
restore_saved_sigmask_unless(ret == -ERESTARTSYS);
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -3282,6 +3474,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
percpu_ref_kill(&ctx->refs);
mutex_unlock(&ctx->uring_lock);
+ io_kill_timeouts(ctx);
io_poll_remove_all(ctx);
io_iopoll_reap_events(ctx);
wait_for_completion(&ctx->ctx_done);
@@ -3319,7 +3512,7 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
}
page = virt_to_head_page(ptr);
- if (sz > (PAGE_SIZE << compound_order(page)))
+ if (sz > page_size(page))
return -EINVAL;
pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 10517cea9682..1fc28c2da279 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -24,7 +24,7 @@
struct iomap_dio {
struct kiocb *iocb;
- iomap_dio_end_io_t *end_io;
+ const struct iomap_dio_ops *dops;
loff_t i_size;
loff_t size;
atomic_t ref;
@@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
+ const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
- ssize_t ret;
+ ssize_t ret = dio->error;
- if (dio->end_io) {
- ret = dio->end_io(iocb,
- dio->error ? dio->error : dio->size,
- dio->flags);
- } else {
- ret = dio->error;
- }
+ if (dops && dops->end_io)
+ ret = dops->end_io(iocb, dio->size, ret, dio->flags);
if (likely(!ret)) {
ret = dio->size;
@@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*
- * And this page cache invalidation has to be after dio->end_io(), as
- * some filesystems convert unwritten extents to real allocations in
- * end_io() when necessary, otherwise a racing buffer read would cache
+ * And this page cache invalidation has to be after ->end_io(), as some
+ * filesystems convert unwritten extents to real allocations in
+ * ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
if (!dio->error &&
@@ -396,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
*/
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
- const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+ const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
@@ -421,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
atomic_set(&dio->ref, 1);
dio->size = 0;
dio->i_size = i_size_read(inode);
- dio->end_io = end_io;
+ dio->dops = dops;
dio->error = 0;
dio->flags = 0;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 953990eb70a9..1c58859aa592 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
EXPORT_SYMBOL(jbd2_journal_invalidatepage);
EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index afc06daee5bb..bee8498d7792 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2622,18 +2622,6 @@ done:
return 0;
}
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode,
- JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
- LLONG_MAX);
-}
-
int jbd2_journal_inode_ranged_write(handle_t *handle,
struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
{
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0c335b51043d..f9baefc76cf9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
struct buffer_head *data_alloc_bh = NULL;
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
+ struct ocfs2_journal *journal = osb->journal;
BUG_ON(inode_trylock(tl_inode));
@@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out;
}
+ /* Appending truncate log(TA) and and flushing truncate log(TF) are
+ * two separated transactions. They can be both committed but not
+ * checkpointed. If crash occurs then, both two transaction will be
+ * replayed with several already released to global bitmap clusters.
+ * Then truncate log will be replayed resulting in cluster double free.
+ */
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
data_alloc_inode = ocfs2_get_system_file_inode(osb,
GLOBAL_BITMAP_SYSTEM_INODE,
OCFS2_INVALID_SLOT);
@@ -6792,6 +6807,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
struct page *page, int zero, u64 *phys)
{
int ret, partial = 0;
+ loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t length = to - from;
ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
if (ret)
@@ -6811,7 +6828,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
if (ret < 0)
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
if (ret < 0)
mlog_errno(ret);
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a4c905d6b575..8de1c9d644f6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -942,7 +942,8 @@ static void ocfs2_write_failure(struct inode *inode,
if (tmppage && page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
- ocfs2_jbd2_file_inode(wc->w_handle, inode);
+ ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
+ user_pos, user_len);
block_commit_write(tmppage, from, to);
}
@@ -2023,8 +2024,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
- if (handle && ocfs2_should_order_data(inode))
- ocfs2_jbd2_file_inode(handle, inode);
+ if (handle && ocfs2_should_order_data(inode)) {
+ loff_t start_byte =
+ ((loff_t)tmppage->index << PAGE_SHIFT) +
+ from;
+ loff_t length = to - from;
+ ocfs2_jbd2_inode_add_write(handle, inode,
+ start_byte, length);
+ }
block_commit_write(tmppage, from, to);
}
}
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 429e6a8359a5..eaf042feaf5e 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -231,14 +231,6 @@ static int blockcheck_u64_get(void *data, u64 *val)
}
DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
-static struct dentry *blockcheck_debugfs_create(const char *name,
- struct dentry *parent,
- u64 *value)
-{
- return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
- &blockcheck_fops);
-}
-
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
{
if (stats) {
@@ -250,16 +242,20 @@ static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent)
{
- stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("blockcheck", parent);
+ stats->b_debug_dir = dir;
+
+ debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir,
+ &stats->b_check_count, &blockcheck_fops);
- blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
- &stats->b_check_count);
+ debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir,
+ &stats->b_failure_count, &blockcheck_fops);
- blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
- &stats->b_failure_count);
+ debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir,
+ &stats->b_recover_count, &blockcheck_fops);
- blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
- &stats->b_recover_count);
}
#else
static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f1b613327ac8..a368350d4c27 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -225,10 +225,6 @@ struct o2hb_region {
unsigned int hr_region_num;
struct dentry *hr_debug_dir;
- struct dentry *hr_debug_livenodes;
- struct dentry *hr_debug_regnum;
- struct dentry *hr_debug_elapsed_time;
- struct dentry *hr_debug_pinned;
struct o2hb_debug_buf *hr_db_livenodes;
struct o2hb_debug_buf *hr_db_regnum;
struct o2hb_debug_buf *hr_db_elapsed_time;
@@ -1394,21 +1390,20 @@ void o2hb_exit(void)
kfree(o2hb_db_failedregions);
}
-static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
- struct o2hb_debug_buf **db, int db_len,
- int type, int size, int len, void *data)
+static void o2hb_debug_create(const char *name, struct dentry *dir,
+ struct o2hb_debug_buf **db, int db_len, int type,
+ int size, int len, void *data)
{
*db = kmalloc(db_len, GFP_KERNEL);
if (!*db)
- return NULL;
+ return;
(*db)->db_type = type;
(*db)->db_size = size;
(*db)->db_len = len;
(*db)->db_data = data;
- return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
- &o2hb_debug_fops);
+ debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops);
}
static void o2hb_debug_init(void)
@@ -1525,11 +1520,7 @@ static void o2hb_region_release(struct config_item *item)
kfree(reg->hr_slots);
- debugfs_remove(reg->hr_debug_livenodes);
- debugfs_remove(reg->hr_debug_regnum);
- debugfs_remove(reg->hr_debug_elapsed_time);
- debugfs_remove(reg->hr_debug_pinned);
- debugfs_remove(reg->hr_debug_dir);
+ debugfs_remove_recursive(reg->hr_debug_dir);
kfree(reg->hr_db_livenodes);
kfree(reg->hr_db_regnum);
kfree(reg->hr_db_elapsed_time);
@@ -1988,69 +1979,33 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
: NULL;
}
-static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
+static void o2hb_debug_region_init(struct o2hb_region *reg,
+ struct dentry *parent)
{
- int ret = -ENOMEM;
+ struct dentry *dir;
- reg->hr_debug_dir =
- debugfs_create_dir(config_item_name(&reg->hr_item), dir);
- if (!reg->hr_debug_dir) {
- mlog_errno(ret);
- goto bail;
- }
+ dir = debugfs_create_dir(config_item_name(&reg->hr_item), parent);
+ reg->hr_debug_dir = dir;
- reg->hr_debug_livenodes =
- o2hb_debug_create(O2HB_DEBUG_LIVENODES,
- reg->hr_debug_dir,
- &(reg->hr_db_livenodes),
- sizeof(*(reg->hr_db_livenodes)),
- O2HB_DB_TYPE_REGION_LIVENODES,
- sizeof(reg->hr_live_node_bitmap),
- O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_livenodes) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes),
+ sizeof(*(reg->hr_db_livenodes)),
+ O2HB_DB_TYPE_REGION_LIVENODES,
+ sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES,
+ reg);
- reg->hr_debug_regnum =
- o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
- reg->hr_debug_dir,
- &(reg->hr_db_regnum),
- sizeof(*(reg->hr_db_regnum)),
- O2HB_DB_TYPE_REGION_NUMBER,
- 0, O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_regnum) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum),
+ sizeof(*(reg->hr_db_regnum)),
+ O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg);
- reg->hr_debug_elapsed_time =
- o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
- reg->hr_debug_dir,
- &(reg->hr_db_elapsed_time),
- sizeof(*(reg->hr_db_elapsed_time)),
- O2HB_DB_TYPE_REGION_ELAPSED_TIME,
- 0, 0, reg);
- if (!reg->hr_debug_elapsed_time) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir,
+ &(reg->hr_db_elapsed_time),
+ sizeof(*(reg->hr_db_elapsed_time)),
+ O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg);
- reg->hr_debug_pinned =
- o2hb_debug_create(O2HB_DEBUG_REGION_PINNED,
- reg->hr_debug_dir,
- &(reg->hr_db_pinned),
- sizeof(*(reg->hr_db_pinned)),
- O2HB_DB_TYPE_REGION_PINNED,
- 0, 0, reg);
- if (!reg->hr_debug_pinned) {
- mlog_errno(ret);
- goto bail;
- }
+ o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned),
+ sizeof(*(reg->hr_db_pinned)),
+ O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg);
- ret = 0;
-bail:
- return ret;
}
static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
@@ -2106,11 +2061,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
if (ret)
goto unregister_handler;
- ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
- if (ret) {
- config_item_put(&reg->hr_item);
- goto unregister_handler;
- }
+ o2hb_debug_region_init(reg, o2hb_debug_dir);
return &reg->hr_item;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 784426dee56c..bdef72c0f099 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3636,7 +3636,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
int i, j, num_used;
u32 major_hash;
struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
- struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
+ struct ocfs2_dx_entry_list *orig_list, *tmp_list;
struct ocfs2_dx_entry *dx_entry;
tmp_list = &tmp_dx_leaf->dl_list;
@@ -3645,7 +3645,6 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
orig_list = &orig_dx_leaf->dl_list;
new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
- new_list = &new_dx_leaf->dl_list;
num_used = le16_to_cpu(orig_list->de_num_used);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 69a429b625cc..aaf24548b02a 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -142,7 +142,6 @@ struct dlm_ctxt
atomic_t res_tot_count;
atomic_t res_cur_count;
- struct dlm_debug_ctxt *dlm_debug_ctxt;
struct dentry *dlm_debugfs_subroot;
/* NOTE: Next three are protected by dlm_domain_lock */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index a4b58ba99927..4d0b452012b2 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -853,67 +853,34 @@ static const struct file_operations debug_state_fops = {
/* files in subroot */
void dlm_debug_init(struct dlm_ctxt *dlm)
{
- struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
/* for dumping dlm_ctxt */
- dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_state_fops);
+ debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_state_fops);
/* for dumping lockres */
- dc->debug_lockres_dentry =
- debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_lockres_fops);
+ debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops);
/* for dumping mles */
- dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_mle_fops);
+ debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops);
/* for dumping lockres on the purge list */
- dc->debug_purgelist_dentry =
- debugfs_create_file(DLM_DEBUGFS_PURGE_LIST,
- S_IFREG|S_IRUSR,
- dlm->dlm_debugfs_subroot,
- dlm, &debug_purgelist_fops);
-}
-
-void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
- struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
-
- if (dc) {
- debugfs_remove(dc->debug_purgelist_dentry);
- debugfs_remove(dc->debug_mle_dentry);
- debugfs_remove(dc->debug_lockres_dentry);
- debugfs_remove(dc->debug_state_dentry);
- kfree(dc);
- dc = NULL;
- }
+ debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, S_IFREG|S_IRUSR,
+ dlm->dlm_debugfs_subroot, dlm,
+ &debug_purgelist_fops);
}
/* subroot - domain dir */
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
- dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
- GFP_KERNEL);
- if (!dlm->dlm_debug_ctxt) {
- mlog_errno(-ENOMEM);
- return -ENOMEM;
- }
-
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
dlm_debugfs_root);
- return 0;
}
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
- debugfs_remove(dlm->dlm_debugfs_subroot);
+ debugfs_remove_recursive(dlm->dlm_debugfs_subroot);
}
/* debugfs root */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index 7d0c7c9013ce..f8fd8680a4b6 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -14,13 +14,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle);
#ifdef CONFIG_DEBUG_FS
-struct dlm_debug_ctxt {
- struct dentry *debug_state_dentry;
- struct dentry *debug_lockres_dentry;
- struct dentry *debug_mle_dentry;
- struct dentry *debug_purgelist_dentry;
-};
-
struct debug_lockres {
int dl_len;
char *dl_buf;
@@ -29,9 +22,8 @@ struct debug_lockres {
};
void dlm_debug_init(struct dlm_ctxt *dlm);
-void dlm_debug_shutdown(struct dlm_ctxt *dlm);
-int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
+void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_create_debugfs_root(void);
@@ -42,12 +34,8 @@ void dlm_destroy_debugfs_root(void);
static inline void dlm_debug_init(struct dlm_ctxt *dlm)
{
}
-static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
-{
-}
-static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
- return 0;
}
static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7338b5d4647c..ee6f459f9770 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -387,7 +387,6 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm)
static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
{
dlm_unregister_domain_handlers(dlm);
- dlm_debug_shutdown(dlm);
dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
@@ -1938,7 +1937,6 @@ bail:
if (status) {
dlm_unregister_domain_handlers(dlm);
- dlm_debug_shutdown(dlm);
dlm_complete_thread(dlm);
dlm_complete_recovery_thread(dlm);
dlm_destroy_dlm_worker(dlm);
@@ -1992,9 +1990,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
dlm->key = key;
dlm->node_num = o2nm_this_node();
- ret = dlm_create_debugfs_subroot(dlm);
- if (ret < 0)
- goto leave;
+ dlm_create_debugfs_subroot(dlm);
spin_lock_init(&dlm->spinlock);
spin_lock_init(&dlm->master_lock);
@@ -2056,6 +2052,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
mlog(0, "context init: refcount %u\n",
kref_read(&dlm->dlm_refs));
+ ret = 0;
leave:
if (ret < 0 && dlm) {
if (dlm->master_hash)
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index e78657742bd8..3883633e82eb 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
enum dlm_status status;
int actions = 0;
int in_use;
- u8 owner;
+ u8 owner;
+ int recovery_wait = 0;
mlog(0, "master_node = %d, valblk = %d\n", master_node,
flags & LKM_VALBLK);
@@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
}
if (flags & LKM_CANCEL)
lock->cancel_pending = 0;
- else
- lock->unlock_pending = 0;
-
+ else {
+ if (!lock->unlock_pending)
+ recovery_wait = 1;
+ else
+ lock->unlock_pending = 0;
+ }
}
/* get an extra ref on lock. if we are just switching
@@ -229,6 +233,17 @@ leave:
spin_unlock(&res->spinlock);
wake_up(&res->wq);
+ if (recovery_wait) {
+ spin_lock(&res->spinlock);
+ /* Unlock request will directly succeed after owner dies,
+ * and the lock is already removed from grant list. We have to
+ * wait for RECOVERING done or we miss the chance to purge it
+ * since the removement is much faster than RECOVERING proc.
+ */
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING);
+ spin_unlock(&res->spinlock);
+ }
+
/* let the caller's final dlm_lock_put handle the actual kfree */
if (actions & DLM_UNLOCK_FREE_LOCK) {
/* this should always be coupled with list removal */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 14207234fa3d..6e774c5ea13b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2508,9 +2508,7 @@ bail:
ocfs2_inode_unlock(inode, ex);
}
- if (local_bh)
- brelse(local_bh);
-
+ brelse(local_bh);
return status;
}
@@ -2593,8 +2591,7 @@ int ocfs2_inode_lock_atime(struct inode *inode,
*level = 1;
if (ocfs2_should_update_atime(inode, vfsmnt))
ocfs2_update_inode_atime(inode, bh);
- if (bh)
- brelse(bh);
+ brelse(bh);
} else
*level = 0;
@@ -3012,8 +3009,6 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
kref_init(&dlm_debug->d_refcnt);
INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
- dlm_debug->d_locking_state = NULL;
- dlm_debug->d_locking_filter = NULL;
dlm_debug->d_filter_secs = 0;
out:
return dlm_debug;
@@ -3282,27 +3277,19 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
{
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
- dlm_debug->d_locking_state = debugfs_create_file("locking_state",
- S_IFREG|S_IRUSR,
- osb->osb_debug_root,
- osb,
- &ocfs2_dlm_debug_fops);
+ debugfs_create_file("locking_state", S_IFREG|S_IRUSR,
+ osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops);
- dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
- 0600,
- osb->osb_debug_root,
- &dlm_debug->d_filter_secs);
+ debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
+ &dlm_debug->d_filter_secs);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
{
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
- if (dlm_debug) {
- debugfs_remove(dlm_debug->d_locking_state);
- debugfs_remove(dlm_debug->d_locking_filter);
+ if (dlm_debug)
ocfs2_put_dlm_debug(dlm_debug);
- }
}
int ocfs2_dlm_init(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index e66a249fe07c..e3e2d1b2af51 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -590,8 +590,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
*extent_flags = rec->e_flags;
}
out:
- if (eb_bh)
- brelse(eb_bh);
+ brelse(eb_bh);
return ret;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4435df3e5adb..2e982db3e1ae 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -706,7 +706,9 @@ leave:
* Thus, we need to explicitly order the zeroed pages.
*/
static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
- struct buffer_head *di_bh)
+ struct buffer_head *di_bh,
+ loff_t start_byte,
+ loff_t length)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle = NULL;
@@ -722,7 +724,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
goto out;
}
- ret = ocfs2_jbd2_file_inode(handle, inode);
+ ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -761,7 +763,9 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
BUG_ON(abs_from & (inode->i_blkbits - 1));
- handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
+ handle = ocfs2_zero_start_ordered_transaction(inode, di_bh,
+ abs_from,
+ abs_to - abs_from);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -2126,7 +2130,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
struct buffer_head *di_bh = NULL;
- loff_t end;
/*
* We start with a read level meta lock and only jump to an ex
@@ -2190,8 +2193,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
}
}
- end = pos + count;
-
ret = ocfs2_check_range_for_refcount(inode, pos, count);
if (ret == 1) {
ocfs2_inode_unlock(inode, meta_level);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7ad9d6590818..7c9dfd50c1c1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -534,7 +534,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
*/
mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
- "Inode %llu: system file state is ambigous\n",
+ "Inode %llu: system file state is ambiguous\n",
(unsigned long long)args->fi_blkno);
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index c0fe6ed08ab1..3103ba7f97a2 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,6 @@ static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
-void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
void ocfs2_complete_recovery(struct work_struct *work);
void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
@@ -232,8 +231,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
* ocfs2_journal_access_*() unless you intend to
* manage the checksum by hand.
* ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
- * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
- * the current handle commits.
+ * ocfs2_jbd2_inode_add_write - Mark an inode with range so that its data goes
+ * out before the current handle commits.
*/
/* You must always start_trans with a number of buffs > 0, but it's
@@ -441,7 +440,7 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir,
* previous dirblock update in the free list */
static inline int ocfs2_link_credits(struct super_block *sb)
{
- return 2*OCFS2_INODE_UPDATE_CREDITS + 4 +
+ return 2 * OCFS2_INODE_UPDATE_CREDITS + 4 +
ocfs2_quota_trans_credits(sb);
}
@@ -575,37 +574,12 @@ static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
return ocfs2_extent_recs_per_gd(sb);
}
-static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
- unsigned int clusters_to_del,
- struct ocfs2_dinode *fe,
- struct ocfs2_extent_list *last_el)
+static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode,
+ loff_t start_byte, loff_t length)
{
- /* for dinode + all headers in this pass + update to next leaf */
- u16 next_free = le16_to_cpu(last_el->l_next_free_rec);
- u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth);
- int credits = 1 + tree_depth + 1;
- int i;
-
- i = next_free - 1;
- BUG_ON(i < 0);
-
- /* We may be deleting metadata blocks, so metadata alloc dinode +
- one desc. block for each possible delete. */
- if (tree_depth && next_free == 1 &&
- ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del)
- credits += 1 + tree_depth;
-
- /* update to the truncate log. */
- credits += OCFS2_TRUNCATE_LOG_UPDATE;
-
- credits += ocfs2_quota_trans_credits(sb);
-
- return credits;
-}
-
-static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
-{
- return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode);
+ return jbd2_journal_inode_ranged_write(handle,
+ &OCFS2_I(inode)->ip_jinode,
+ start_byte, length);
}
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6f8e1c4fdb9c..8ea51cf27b97 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2486,7 +2486,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
struct inode *inode = NULL;
struct inode *orphan_dir = NULL;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
- struct ocfs2_dinode *di = NULL;
handle_t *handle = NULL;
char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
struct buffer_head *parent_di_bh = NULL;
@@ -2552,7 +2551,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
goto leave;
}
- di = (struct ocfs2_dinode *)new_di_bh->b_data;
status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
&orphan_insert, orphan_dir, false);
if (status < 0) {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fddbbd60f434..9150cfa4df7d 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -223,8 +223,6 @@ struct ocfs2_orphan_scan {
struct ocfs2_dlm_debug {
struct kref d_refcnt;
- struct dentry *d_locking_state;
- struct dentry *d_locking_filter;
u32 d_filter_secs;
struct list_head d_lockres_tracking;
};
@@ -401,7 +399,6 @@ struct ocfs2_super
struct ocfs2_dlm_debug *osb_dlm_debug;
struct dentry *osb_debug_root;
- struct dentry *osb_ctxt;
wait_queue_head_t recovery_event;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8b2f39506648..c81e86c62380 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1080,10 +1080,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
- osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR,
- osb->osb_debug_root,
- osb,
- &ocfs2_osb_debug_fops);
+ debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
+ osb, &ocfs2_osb_debug_fops);
if (ocfs2_meta_ecc(osb))
ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
@@ -1861,8 +1859,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
kset_unregister(osb->osb_dev_kset);
- debugfs_remove(osb->osb_ctxt);
-
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
@@ -1918,7 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_dlm_shutdown(osb, hangup_needed);
ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
- debugfs_remove(osb->osb_debug_root);
+ debugfs_remove_recursive(osb->osb_debug_root);
if (hangup_needed)
ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
diff --git a/fs/open.c b/fs/open.c
index a59abe3c669a..c60cd22cc052 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
+
+ /*
+ * XXX: Huge page cache doesn't support writing yet. Drop all page
+ * cache for this file before processing writes.
+ */
+ if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+ truncate_pagecache(inode, 0);
+
return 0;
cleanup_all:
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 465ea0153b2a..ac9247371871 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -8,7 +8,6 @@
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
#include <linux/percpu.h>
-#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
- show_val_kb(m, "Quicklists: ", quicklist_total_size());
-#endif
show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
@@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+ show_val_kb(m, "FileHugePages: ",
+ global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+ show_val_kb(m, "FilePmdMapped: ",
+ global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
#ifdef CONFIG_CMA
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index bf43d1d60059..9442631fd4af 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -417,6 +417,7 @@ struct mem_size_stats {
unsigned long lazyfree;
unsigned long anonymous_thp;
unsigned long shmem_thp;
+ unsigned long file_thp;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
@@ -461,7 +462,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked)
{
- int i, nr = compound ? 1 << compound_order(page) : 1;
+ int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
/*
@@ -588,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
else if (is_zone_device_page(page))
/* pass */;
else
- VM_BUG_ON_PAGE(1, page);
+ mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
@@ -809,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d952d5962e93..1ffb179f35d2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -370,21 +370,23 @@ static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
+ int error,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
unsigned int nofs_flag;
- int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (size <= 0)
- return size;
+ if (error)
+ return error;
+ if (!size)
+ return 0;
/*
* Capture amount written on completion as we can't reliably account
@@ -441,6 +443,10 @@ out:
return error;
}
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+ .end_io = xfs_dio_write_end_io,
+};
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@@ -541,7 +547,7 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
/*
* If unaligned, this is the only IO in-flight. If it has not yet
OpenPOWER on IntegriCloud