summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/inode.c11
-rw-r--r--fs/afs/internal.h3
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/autofs/autofs_i.h13
-rw-r--r--fs/autofs/expire.c143
-rw-r--r--fs/autofs/inode.c1
-rw-r--r--fs/autofs/root.c33
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/ioctl.c11
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb2misc.c7
-rw-r--r--fs/cifs/smb2ops.c4
-rw-r--r--fs/cifs/smb2pdu.c4
-rw-r--r--fs/cifs/smb2pdu.h12
-rw-r--r--fs/dax.c138
-rw-r--r--fs/devpts/inode.c47
-rw-r--r--fs/eventpoll.c118
-rw-r--r--fs/exec.c1
-rw-r--r--fs/f2fs/checkpoint.c159
-rw-r--r--fs/f2fs/data.c182
-rw-r--r--fs/f2fs/debug.c3
-rw-r--r--fs/f2fs/dir.c3
-rw-r--r--fs/f2fs/f2fs.h191
-rw-r--r--fs/f2fs/file.c216
-rw-r--r--fs/f2fs/gc.c163
-rw-r--r--fs/f2fs/inline.c36
-rw-r--r--fs/f2fs/inode.c154
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/f2fs/node.c380
-rw-r--r--fs/f2fs/node.h9
-rw-r--r--fs/f2fs/recovery.c31
-rw-r--r--fs/f2fs/segment.c399
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c162
-rw-r--r--fs/f2fs/sysfs.c44
-rw-r--r--fs/f2fs/xattr.c18
-rw-r--r--fs/fat/cache.c19
-rw-r--r--fs/fat/dir.c2
-rw-r--r--fs/fat/fat.h12
-rw-r--r--fs/fat/fatent.c108
-rw-r--r--fs/fat/file.c33
-rw-r--r--fs/fat/inode.c20
-rw-r--r--fs/fat/misc.c13
-rw-r--r--fs/fat/namei_msdos.c17
-rw-r--r--fs/fat/namei_vfat.c20
-rw-r--r--fs/fcntl.c74
-rw-r--r--fs/file_table.c69
-rw-r--r--fs/fuse/dev.c69
-rw-r--r--fs/fuse/dir.c10
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/fuse/inode.c45
-rw-r--r--fs/hfs/brec.c7
-rw-r--r--fs/hfsplus/Kconfig15
-rw-r--r--fs/hfsplus/Makefile2
-rw-r--r--fs/hfsplus/acl.h28
-rw-r--r--fs/hfsplus/brec.c7
-rw-r--r--fs/hfsplus/dir.c13
-rw-r--r--fs/hfsplus/extents.c18
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c11
-rw-r--r--fs/hfsplus/posix_acl.c144
-rw-r--r--fs/hfsplus/super.c8
-rw-r--r--fs/hfsplus/unicode.c62
-rw-r--r--fs/hfsplus/xattr.c6
-rw-r--r--fs/hfsplus/xattr.h3
-rw-r--r--fs/hfsplus/xattr_security.c13
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c46
-rw-r--r--fs/internal.h11
-rw-r--r--fs/ioctl.c1
-rw-r--r--fs/iomap.c2
-rw-r--r--fs/isofs/inode.c7
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svclock.c16
-rw-r--r--fs/lockd/svcsubs.c4
-rw-r--r--fs/locks.c22
-rw-r--r--fs/namei.c55
-rw-r--r--fs/namespace.c119
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/blocklayout/dev.c2
-rw-r--r--fs/nfs/callback.h12
-rw-r--r--fs/nfs/callback_proc.c97
-rw-r--r--fs/nfs/callback_xdr.c91
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/dir.c32
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c31
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs42proc.c209
-rw-r--r--fs/nfs/nfs42xdr.c98
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4client.c21
-rw-r--r--fs/nfs/nfs4file.c10
-rw-r--r--fs/nfs/nfs4idmap.c4
-rw-r--r--fs/nfs/nfs4proc.c156
-rw-r--r--fs/nfs/nfs4state.c40
-rw-r--r--fs/nfs/nfs4xdr.c1
-rw-r--r--fs/nfs/pagelist.c1
-rw-r--r--fs/nfs/pnfs.c123
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/pnfs_nfs.c16
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs3proc.c5
-rw-r--r--fs/nfsd/nfs4callback.c30
-rw-r--r--fs/nfsd/nfs4layouts.c11
-rw-r--r--fs/nfsd/nfs4proc.c41
-rw-r--r--fs/nfsd/nfs4state.c70
-rw-r--r--fs/nfsd/nfs4xdr.c43
-rw-r--r--fs/nfsd/nfsctl.c7
-rw-r--r--fs/nfsd/nfsd.h1
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/nfsproc.c5
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/dnotify/dnotify.c3
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/mark.c6
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/open.c44
-rw-r--r--fs/overlayfs/Kconfig19
-rw-r--r--fs/overlayfs/Makefile4
-rw-r--r--fs/overlayfs/copy_up.c190
-rw-r--r--fs/overlayfs/dir.c109
-rw-r--r--fs/overlayfs/export.c3
-rw-r--r--fs/overlayfs/file.c511
-rw-r--r--fs/overlayfs/inode.c175
-rw-r--r--fs/overlayfs/namei.c195
-rw-r--r--fs/overlayfs/overlayfs.h47
-rw-r--r--fs/overlayfs/ovl_entry.h6
-rw-r--r--fs/overlayfs/readdir.c19
-rw-r--r--fs/overlayfs/super.c103
-rw-r--r--fs/overlayfs/util.c252
-rw-r--r--fs/proc/Kconfig1
-rw-r--r--fs/proc/base.c50
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h25
-rw-r--r--fs/proc/kcore.c535
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/task_mmu.c319
-rw-r--r--fs/proc/task_nommu.c39
-rw-r--r--fs/proc/uptime.c4
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/quota/quota.c14
-rw-r--r--fs/read_write.c94
-rw-r--r--fs/reiserfs/item_ops.c16
-rw-r--r--fs/reiserfs/journal.c22
-rw-r--r--fs/reiserfs/procfs.c11
-rw-r--r--fs/reiserfs/reiserfs.h4
-rw-r--r--fs/reiserfs/xattr.c4
-rw-r--r--fs/super.c67
-rw-r--r--fs/sysv/inode.c6
-rw-r--r--fs/ubifs/Kconfig15
-rw-r--r--fs/ubifs/Makefile3
-rw-r--r--fs/ubifs/budget.c68
-rw-r--r--fs/ubifs/commit.c8
-rw-r--r--fs/ubifs/crypto.c4
-rw-r--r--fs/ubifs/debug.c42
-rw-r--r--fs/ubifs/debug.h14
-rw-r--r--fs/ubifs/dir.c42
-rw-r--r--fs/ubifs/file.c62
-rw-r--r--fs/ubifs/find.c59
-rw-r--r--fs/ubifs/gc.c66
-rw-r--r--fs/ubifs/io.c97
-rw-r--r--fs/ubifs/journal.c72
-rw-r--r--fs/ubifs/key.h14
-rw-r--r--fs/ubifs/log.c12
-rw-r--r--fs/ubifs/lprops.c94
-rw-r--r--fs/ubifs/lpt.c112
-rw-r--r--fs/ubifs/lpt_commit.c50
-rw-r--r--fs/ubifs/master.c2
-rw-r--r--fs/ubifs/misc.c11
-rw-r--r--fs/ubifs/misc.h16
-rw-r--r--fs/ubifs/orphan.c26
-rw-r--r--fs/ubifs/recovery.c14
-rw-r--r--fs/ubifs/replay.c13
-rw-r--r--fs/ubifs/sb.c18
-rw-r--r--fs/ubifs/scan.c2
-rw-r--r--fs/ubifs/shrinker.c12
-rw-r--r--fs/ubifs/super.c76
-rw-r--r--fs/ubifs/tnc.c111
-rw-r--r--fs/ubifs/tnc_commit.c28
-rw-r--r--fs/ubifs/tnc_misc.c40
-rw-r--r--fs/ubifs/ubifs.h39
-rw-r--r--fs/ubifs/xattr.c40
-rw-r--r--fs/udf/super.c93
-rw-r--r--fs/userfaultfd.c12
-rw-r--r--fs/xattr.c11
-rw-r--r--fs/xfs/scrub/repair.c46
-rw-r--r--fs/xfs/xfs_file.c29
-rw-r--r--fs/xfs/xfs_mount.c1
200 files changed, 6245 insertions, 3245 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e91028d4340a..66621e96f9af 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -167,7 +167,7 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
* of time to convert from RISC OS epoch to Unix epoch.
*/
static void
-adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
+adfs_adfs2unix_time(struct timespec64 *tv, struct inode *inode)
{
unsigned int high, low;
/* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
@@ -195,11 +195,11 @@ adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
/* convert from RISC OS to Unix epoch */
nsec -= nsec_unix_epoch_diff_risc_os_epoch;
- *tv = ns_to_timespec(nsec);
+ *tv = ns_to_timespec64(nsec);
return;
cur_time:
- *tv = timespec64_to_timespec(current_time(inode));
+ *tv = current_time(inode);
return;
too_early:
@@ -242,7 +242,6 @@ adfs_unix2adfs_time(struct inode *inode, unsigned int secs)
struct inode *
adfs_iget(struct super_block *sb, struct object_info *obj)
{
- struct timespec ts;
struct inode *inode;
inode = new_inode(sb);
@@ -271,9 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
inode->i_mode = adfs_atts2mode(sb, inode);
- ts = timespec64_to_timespec(inode->i_mtime);
- adfs_adfs2unix_time(&ts, inode);
- inode->i_mtime = timespec_to_timespec64(ts);
+ adfs_adfs2unix_time(&inode->i_mtime, inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9778df135717..871a228d7f37 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
#include <linux/fscache.h>
#include <linux/backing-dev.h>
#include <linux/uuid.h>
+#include <linux/mm_types.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -1076,7 +1077,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
-extern int afs_page_mkwrite(struct vm_fault *);
+extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
extern int afs_launder_page(struct page *);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 8b39e6ebb40b..19c04caf3c01 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -753,7 +753,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
* notification that a previously read-only page is about to become writable
* - if it returns an error, the caller will deliver a bus error signal
*/
-int afs_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
{
struct file *file = vmf->vma->vm_file;
struct inode *inode = file_inode(file);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 9400a9f6318a..9f9cadbfbd7a 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -18,6 +18,7 @@
#include <linux/string.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/uaccess.h>
@@ -26,6 +27,7 @@
#include <linux/list.h>
#include <linux/completion.h>
#include <linux/file.h>
+#include <linux/magic.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -124,7 +126,8 @@ struct autofs_sb_info {
static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
{
- return (struct autofs_sb_info *)(sb->s_fs_info);
+ return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+ NULL : (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@ -151,15 +154,9 @@ int autofs_expire_run(struct super_block *, struct vfsmount *,
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when);
+ struct autofs_sb_info *sbi, unsigned int how);
int autofs_expire_multi(struct super_block *, struct vfsmount *,
struct autofs_sb_info *, int __user *);
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
/* Device node initialization */
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index b332d3f6e730..d441244b79df 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -10,11 +10,9 @@
#include "autofs_i.h"
-static unsigned long now;
-
/* Check if a dentry can be expired */
static inline int autofs_can_expire(struct dentry *dentry,
- unsigned long timeout, int do_now)
+ unsigned long timeout, unsigned int how)
{
struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -22,16 +20,17 @@ static inline int autofs_can_expire(struct dentry *dentry,
if (ino == NULL)
return 0;
- if (!do_now) {
+ if (!(how & AUTOFS_EXP_IMMEDIATE)) {
/* Too young to die */
- if (!timeout || time_after(ino->last_used + timeout, now))
+ if (!timeout || time_after(ino->last_used + timeout, jiffies))
return 0;
}
return 1;
}
/* Check a mount point for busyness */
-static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
+static int autofs_mount_busy(struct vfsmount *mnt,
+ struct dentry *dentry, unsigned int how)
{
struct dentry *top = dentry;
struct path path = {.mnt = mnt, .dentry = dentry};
@@ -52,6 +51,12 @@ static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
goto done;
}
+ /* Not a submount, has a forced expire been requested */
+ if (how & AUTOFS_EXP_FORCED) {
+ status = 0;
+ goto done;
+ }
+
/* Update the expiry counter if fs is busy */
if (!may_umount_tree(path.mnt)) {
struct autofs_info *ino;
@@ -187,10 +192,14 @@ again:
static int autofs_direct_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
pr_debug("top %p %pd\n", top, top);
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* If it's busy update the expiry counters */
if (!may_umount_tree(mnt)) {
struct autofs_info *ino;
@@ -202,7 +211,7 @@ static int autofs_direct_busy(struct vfsmount *mnt,
}
/* Timeout of a direct mount is determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
@@ -215,7 +224,7 @@ static int autofs_direct_busy(struct vfsmount *mnt,
static int autofs_tree_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct autofs_info *top_ino = autofs_dentry_ino(top);
struct dentry *p;
@@ -237,7 +246,7 @@ static int autofs_tree_busy(struct vfsmount *mnt,
* If the fs is busy update the expiry counter.
*/
if (d_mountpoint(p)) {
- if (autofs_mount_busy(mnt, p)) {
+ if (autofs_mount_busy(mnt, p, how)) {
top_ino->last_used = jiffies;
dput(p);
return 1;
@@ -260,8 +269,12 @@ static int autofs_tree_busy(struct vfsmount *mnt,
}
}
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* Timeout of a tree mount is ultimately determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
@@ -270,7 +283,7 @@ static int autofs_tree_busy(struct vfsmount *mnt,
static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
struct dentry *parent,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct dentry *p;
@@ -282,11 +295,17 @@ static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
if (d_mountpoint(p)) {
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, p))
+ if (autofs_mount_busy(mnt, p, how))
continue;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return p;
+
/* Can we expire this guy */
- if (autofs_can_expire(p, timeout, do_now))
+ if (autofs_can_expire(p, timeout, how))
return p;
}
}
@@ -294,23 +313,21 @@ static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
}
/* Check if we can expire a direct mount (possibly a tree) */
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_direct(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
- unsigned long timeout;
struct dentry *root = dget(sb->s_root);
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
struct autofs_info *ino;
+ unsigned long timeout;
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(root);
/* No point expiring a pending mount */
@@ -321,7 +338,7 @@ struct dentry *autofs_expire_direct(struct super_block *sb,
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
@@ -346,10 +363,8 @@ out:
static struct dentry *should_expire(struct dentry *dentry,
struct vfsmount *mnt,
unsigned long timeout,
- int how)
+ unsigned int how)
{
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
- int exp_leaves = how & AUTOFS_EXP_LEAVES;
struct autofs_info *ino = autofs_dentry_ino(dentry);
unsigned int ino_count;
@@ -367,22 +382,33 @@ static struct dentry *should_expire(struct dentry *dentry,
pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, dentry))
+ if (autofs_mount_busy(mnt, dentry, how))
return NULL;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/* Can we expire this guy */
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
pr_debug("checking symlink %p %pd\n", dentry, dentry);
+
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/*
* A symlink can't be "busy" in the usual sense so
* just check last used for expire timeout.
*/
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
@@ -391,27 +417,33 @@ static struct dentry *should_expire(struct dentry *dentry,
return NULL;
/* Case 2: tree mount, expire iff entire tree is not busy */
- if (!exp_leaves) {
- /* Path walk currently on this dentry? */
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ if (!(how & AUTOFS_EXP_LEAVES)) {
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- if (!autofs_tree_busy(mnt, dentry, timeout, do_now))
+ if (!autofs_tree_busy(mnt, dentry, timeout, how))
return dentry;
/*
* Case 3: pseudo direct mount, expire individual leaves
* (autofs-4.1).
*/
} else {
- /* Path walk currently on this dentry? */
struct dentry *expired;
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- expired = autofs_check_leaves(mnt, dentry, timeout, do_now);
+ expired = autofs_check_leaves(mnt, dentry, timeout, how);
if (expired) {
if (expired == dentry)
dput(dentry);
@@ -427,10 +459,10 @@ static struct dentry *should_expire(struct dentry *dentry,
* - it is unused by any user process
* - it has been unused for exp_timeout time
*/
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_indirect(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
unsigned long timeout;
struct dentry *root = sb->s_root;
@@ -442,13 +474,10 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
- int flags = how;
-
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
@@ -457,7 +486,7 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
}
spin_unlock(&sbi->fs_lock);
- expired = should_expire(dentry, mnt, timeout, flags);
+ expired = should_expire(dentry, mnt, timeout, how);
if (!expired)
continue;
@@ -470,7 +499,7 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
/* Make sure a reference is not taken on found if
* things have changed.
*/
- flags &= ~AUTOFS_EXP_LEAVES;
+ how &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
if (!found || found != expired)
/* Something has changed, continue */
@@ -575,7 +604,7 @@ int autofs_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -584,15 +613,15 @@ int autofs_expire_run(struct super_block *sb,
}
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when)
+ struct autofs_sb_info *sbi, unsigned int how)
{
struct dentry *dentry;
int ret = -EAGAIN;
if (autofs_type_trigger(sbi->type))
- dentry = autofs_expire_direct(sb, mnt, sbi, when);
+ dentry = autofs_expire_direct(sb, mnt, sbi, how);
else
- dentry = autofs_expire_indirect(sb, mnt, sbi, when);
+ dentry = autofs_expire_indirect(sb, mnt, sbi, how);
if (dentry) {
struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -605,7 +634,7 @@ int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -622,10 +651,10 @@ int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_sb_info *sbi, int __user *arg)
{
- int do_now = 0;
+ unsigned int how = 0;
- if (arg && get_user(do_now, arg))
+ if (arg && get_user(how, arg))
return -EFAULT;
- return autofs_do_expire_multi(sb, mnt, sbi, do_now);
+ return autofs_do_expire_multi(sb, mnt, sbi, how);
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index b51980fc274e..846c052569dd 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -10,7 +10,6 @@
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
-#include <linux/magic.h>
#include "autofs_i.h"
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a3d414150578..782e57b911ab 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
BUG_ON(!ino);
autofs_clean_ino(ino);
@@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
- /* This allows root to remove symlinks */
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!autofs_oz_mode(sbi))
+ return -EACCES;
+
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs_dentry_ino(dentry->d_parent);
@@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
spin_lock(&sbi->lookup_lock);
if (!simple_empty(dentry)) {
spin_unlock(&sbi->lookup_lock);
@@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
BUG_ON(!ino);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 318be7864072..53af9f5253f4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3217,8 +3217,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
- struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+ struct file *dst_file, loff_t dst_loff,
+ u64 olen);
/* file.c */
int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3a5d2a41e5f..63600dc2ac4c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3592,13 +3592,13 @@ out_unlock:
return ret;
}
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
- struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+ struct file *dst_file, loff_t dst_loff,
+ u64 olen)
{
struct inode *src = file_inode(src_file);
struct inode *dst = file_inode(dst_file);
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
- ssize_t res;
if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
@@ -3609,10 +3609,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
return -EINVAL;
}
- res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
- if (res)
- return res;
- return olen;
+ return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
}
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f3a78efc3109..f047e87871a1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -148,5 +148,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.12"
+#define CIFS_VERSION "2.13"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 8b0502cd39af..aa23c00367ec 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -398,6 +398,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
goto setup_ntlmv2_ret;
}
*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+ if (!*pbuffer) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+ *buflen = 0;
+ goto setup_ntlmv2_ret;
+ }
sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 303d4592ebe7..db0453660ff6 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -238,6 +238,13 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
return 0;
/*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
+ /*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
* SMB2/SMB3 frame length (header + smb2 response specific data)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 541258447c4c..247a98e6c856 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1582,7 +1582,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
struct smb_rqst rqst[3];
int resp_buftype[3];
struct kvec rsp_iov[3];
- struct kvec open_iov[5]; /* 4 + potential padding. */
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
struct kvec qi_iov[1];
struct kvec close_iov[1];
struct cifs_ses *ses = tcon->ses;
@@ -1603,7 +1603,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
memset(&open_iov, 0, sizeof(open_iov));
rqst[0].rq_iov = open_iov;
- rqst[0].rq_nvec = 4;
+ rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2f1938011395..5740aa809be6 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2256,7 +2256,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
struct TCP_Server_Info *server;
struct cifs_tcon *tcon = oparms->tcon;
struct cifs_ses *ses = tcon->ses;
- struct kvec iov[5]; /* make sure at least one for each open context */
+ struct kvec iov[SMB2_CREATE_IOV_SIZE];
struct kvec rsp_iov = {NULL, 0};
int resp_buftype;
int rc = 0;
@@ -2274,7 +2274,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
memset(&rqst, 0, sizeof(struct smb_rqst));
memset(&iov, 0, sizeof(iov));
rqst.rq_iov = iov;
- rqst.rq_nvec = 5;
+ rqst.rq_nvec = SMB2_CREATE_IOV_SIZE;
rc = SMB2_open_init(tcon, &rqst, oplock, oparms, path);
if (rc)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index a2eeae9e0432..8fb7887f2b3d 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -614,6 +614,18 @@ struct smb2_tree_disconnect_rsp {
#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C
+/*
+ * Maximum number of iovs we need for an open/create request.
+ * [0] : struct smb2_create_req
+ * [1] : path
+ * [2] : lease context
+ * [3] : durable context
+ * [4] : posix context
+ * [5] : time warp context
+ * [6] : compound padding
+ */
+#define SMB2_CREATE_IOV_SIZE 7
+
struct smb2_create_req {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 57 */
diff --git a/fs/dax.c b/fs/dax.c
index 897b51e41d8f..f32d7125ad0f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
*
* Must be called with the i_pages lock held.
*/
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void ***slotp)
+static void *__get_unlocked_mapping_entry(struct address_space *mapping,
+ pgoff_t index, void ***slotp, bool (*wait_fn)(void))
{
void *entry, **slot;
struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
+ bool revalidate;
+
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
&slot);
if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
xa_unlock_irq(&mapping->i_pages);
- schedule();
+ revalidate = wait_fn();
finish_wait(wq, &ewait.wait);
xa_lock_irq(&mapping->i_pages);
+ if (revalidate)
+ return ERR_PTR(-EAGAIN);
}
}
-static void dax_unlock_mapping_entry(struct address_space *mapping,
- pgoff_t index)
+static bool entry_wait(void)
+{
+ schedule();
+ /*
+ * Never return an ERR_PTR() from
+ * __get_unlocked_mapping_entry(), just keep looping.
+ */
+ return false;
+}
+
+static void *get_unlocked_mapping_entry(struct address_space *mapping,
+ pgoff_t index, void ***slotp)
+{
+ return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
+}
+
+static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *entry, **slot;
@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index)
{
- dax_unlock_mapping_entry(mapping, index);
+ unlock_mapping_entry(mapping, index);
}
/*
@@ -319,18 +338,27 @@ static unsigned long dax_radix_end_pfn(void *entry)
for (pfn = dax_radix_pfn(entry); \
pfn < dax_radix_end_pfn(entry); pfn++)
-static void dax_associate_entry(void *entry, struct address_space *mapping)
+/*
+ * TODO: for reflink+dax we need a way to associate a single page with
+ * multiple address_space instances at different linear_page_index()
+ * offsets.
+ */
+static void dax_associate_entry(void *entry, struct address_space *mapping,
+ struct vm_area_struct *vma, unsigned long address)
{
- unsigned long pfn;
+ unsigned long size = dax_entry_size(entry), pfn, index;
+ int i = 0;
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
+ index = linear_page_index(vma, address & ~(size - 1));
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
WARN_ON_ONCE(page->mapping);
page->mapping = mapping;
+ page->index = index + i++;
}
}
@@ -348,6 +376,7 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
page->mapping = NULL;
+ page->index = 0;
}
}
@@ -364,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
return NULL;
}
+static bool entry_wait_revalidate(void)
+{
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+
+ /*
+ * Tell __get_unlocked_mapping_entry() to take a break, we need
+ * to revalidate page->mapping after dropping locks
+ */
+ return true;
+}
+
+bool dax_lock_mapping_entry(struct page *page)
+{
+ pgoff_t index;
+ struct inode *inode;
+ bool did_lock = false;
+ void *entry = NULL, **slot;
+ struct address_space *mapping;
+
+ rcu_read_lock();
+ for (;;) {
+ mapping = READ_ONCE(page->mapping);
+
+ if (!dax_mapping(mapping))
+ break;
+
+ /*
+ * In the device-dax case there's no need to lock, a
+ * struct dev_pagemap pin is sufficient to keep the
+ * inode alive, and we assume we have dev_pagemap pin
+ * otherwise we would not have a valid pfn_to_page()
+ * translation.
+ */
+ inode = mapping->host;
+ if (S_ISCHR(inode->i_mode)) {
+ did_lock = true;
+ break;
+ }
+
+ xa_lock_irq(&mapping->i_pages);
+ if (mapping != page->mapping) {
+ xa_unlock_irq(&mapping->i_pages);
+ continue;
+ }
+ index = page->index;
+
+ entry = __get_unlocked_mapping_entry(mapping, index, &slot,
+ entry_wait_revalidate);
+ if (!entry) {
+ xa_unlock_irq(&mapping->i_pages);
+ break;
+ } else if (IS_ERR(entry)) {
+ WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
+ continue;
+ }
+ lock_slot(mapping, slot);
+ did_lock = true;
+ xa_unlock_irq(&mapping->i_pages);
+ break;
+ }
+ rcu_read_unlock();
+
+ return did_lock;
+}
+
+void dax_unlock_mapping_entry(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+
+ if (S_ISCHR(inode->i_mode))
+ return;
+
+ unlock_mapping_entry(mapping, page->index);
+}
+
/*
* Find radix tree entry at given index. If it points to an exceptional entry,
* return it with the radix tree entry locked. If the radix tree doesn't
@@ -655,7 +762,6 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
{
void *vto, *kaddr;
pgoff_t pgoff;
- pfn_t pfn;
long rc;
int id;
@@ -664,7 +770,7 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
+ rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -709,7 +815,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
new_entry = dax_radix_locked_entry(pfn, flags);
if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
dax_disassociate_entry(entry, mapping, false);
- dax_associate_entry(new_entry, mapping);
+ dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
}
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
@@ -975,7 +1081,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
{
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
- void *kaddr;
int id, rc;
long length;
@@ -984,7 +1089,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
- &kaddr, pfnp);
+ NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
@@ -1060,15 +1165,13 @@ int __dax_zero_page_range(struct block_device *bdev,
pgoff_t pgoff;
long rc, id;
void *kaddr;
- pfn_t pfn;
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr,
- &pfn);
+ rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -1124,7 +1227,6 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
ssize_t map_len;
pgoff_t pgoff;
void *kaddr;
- pfn_t pfn;
if (fatal_signal_pending(current)) {
ret = -EINTR;
@@ -1136,7 +1238,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
break;
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
- &kaddr, &pfn);
+ &kaddr, NULL);
if (map_len < 0) {
ret = map_len;
break;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index e072e955ce33..c53814539070 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -46,7 +46,7 @@ static int pty_limit = NR_UNIX98_PTY_DEFAULT;
static int pty_reserve = NR_UNIX98_PTY_RESERVE;
static int pty_limit_min;
static int pty_limit_max = INT_MAX;
-static int pty_count;
+static atomic_t pty_count = ATOMIC_INIT(0);
static struct ctl_table pty_table[] = {
{
@@ -93,8 +93,6 @@ static struct ctl_table pty_root_table[] = {
{}
};
-static DEFINE_MUTEX(allocated_ptys_lock);
-
struct pts_mount_opts {
int setuid;
int setgid;
@@ -533,44 +531,25 @@ static struct file_system_type devpts_fs_type = {
int devpts_new_index(struct pts_fs_info *fsi)
{
- int index;
- int ida_ret;
-
-retry:
- if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
- return -ENOMEM;
-
- mutex_lock(&allocated_ptys_lock);
- if (pty_count >= (pty_limit -
- (fsi->mount_opts.reserve ? 0 : pty_reserve))) {
- mutex_unlock(&allocated_ptys_lock);
- return -ENOSPC;
- }
+ int index = -ENOSPC;
- ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
- if (ida_ret < 0) {
- mutex_unlock(&allocated_ptys_lock);
- if (ida_ret == -EAGAIN)
- goto retry;
- return -EIO;
- }
+ if (atomic_inc_return(&pty_count) >= (pty_limit -
+ (fsi->mount_opts.reserve ? 0 : pty_reserve)))
+ goto out;
- if (index >= fsi->mount_opts.max) {
- ida_remove(&fsi->allocated_ptys, index);
- mutex_unlock(&allocated_ptys_lock);
- return -ENOSPC;
- }
- pty_count++;
- mutex_unlock(&allocated_ptys_lock);
+ index = ida_alloc_max(&fsi->allocated_ptys, fsi->mount_opts.max - 1,
+ GFP_KERNEL);
+
+out:
+ if (index < 0)
+ atomic_dec(&pty_count);
return index;
}
void devpts_kill_index(struct pts_fs_info *fsi, int idx)
{
- mutex_lock(&allocated_ptys_lock);
- ida_remove(&fsi->allocated_ptys, idx);
- pty_count--;
- mutex_unlock(&allocated_ptys_lock);
+ ida_free(&fsi->allocated_ptys, idx);
+ atomic_dec(&pty_count);
}
/**
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 67db22fe99c5..42bbe6824b4b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -50,10 +50,10 @@
*
* 1) epmutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (spinlock)
+ * 3) ep->wq.lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a spinlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->wq.lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -85,7 +85,7 @@
* of epoll file descriptors, we use the current recursion depth as
* the lockdep subkey.
* It is possible to drop the "ep->mtx" and to use the global
- * mutex "epmutex" (together with "ep->lock") to have it working,
+ * mutex "epmutex" (together with "ep->wq.lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
* Events that require holding "epmutex" are very rare, while for
* normal operations the epoll private "ep->mtx" will guarantee
@@ -182,11 +182,10 @@ struct epitem {
* This structure is stored inside the "private_data" member of the file
* structure and represents the main data structure for the eventpoll
* interface.
+ *
+ * Access to it is protected by the lock inside wq.
*/
struct eventpoll {
- /* Protect the access to this structure */
- spinlock_t lock;
-
/*
* This mutex is used to ensure that files are not removed
* while epoll is using them. This is held during the event
@@ -210,7 +209,7 @@ struct eventpoll {
/*
* This is a single linked list that chains all the "struct epitem" that
* happened while transferring ready events to userspace w/out
- * holding ->lock.
+ * holding ->wq.lock.
*/
struct epitem *ovflist;
@@ -337,9 +336,9 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
}
/* Tells us if the item is currently linked */
-static inline int ep_is_linked(struct list_head *p)
+static inline int ep_is_linked(struct epitem *epi)
{
- return !list_empty(p);
+ return !list_empty(&epi->rdllink);
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
@@ -392,7 +391,6 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
return ep_events_available(ep) || busy_loop_timeout(start_time);
}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
* Busy poll if globally on and supporting sockets found && no events,
@@ -402,20 +400,16 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
*/
static void ep_busy_loop(struct eventpoll *ep, int nonblock)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(ep->napi_id);
if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep);
-#endif
}
static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
if (ep->napi_id)
ep->napi_id = 0;
-#endif
}
/*
@@ -423,7 +417,6 @@ static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
*/
static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
struct eventpoll *ep;
unsigned int napi_id;
struct socket *sock;
@@ -453,9 +446,24 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
/* record NAPI ID for use in next busy poll */
ep->napi_id = napi_id;
-#endif
}
+#else
+
+static inline void ep_busy_loop(struct eventpoll *ep, int nonblock)
+{
+}
+
+static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+{
+}
+
+static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+{
+}
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
/**
* ep_call_nested - Perform a bound (possibly) nested call, by checking
* that the recursion limit is not exceeded, and that
@@ -668,10 +676,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
{
__poll_t res;
int pwake = 0;
- unsigned long flags;
struct epitem *epi, *nepi;
LIST_HEAD(txlist);
+ lockdep_assert_irqs_enabled();
+
/*
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
@@ -688,17 +697,17 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* because we want the "sproc" callback to be able to do it
* in a lockless way.
*/
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
ep->ovflist = NULL;
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Now call the callback function.
*/
res = (*sproc)(ep, &txlist, priv);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -712,7 +721,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* queued into ->ovflist but the "txlist" might already
* contain them, and the list_splice() below takes care of them.
*/
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
@@ -740,7 +749,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!ep_locked)
mutex_unlock(&ep->mtx);
@@ -764,16 +773,12 @@ static void epi_rcu_free(struct rcu_head *head)
*/
static int ep_remove(struct eventpoll *ep, struct epitem *epi)
{
- unsigned long flags;
struct file *file = epi->ffd.file;
+ lockdep_assert_irqs_enabled();
+
/*
- * Removes poll wait queue hooks. We _have_ to do this without holding
- * the "ep->lock" otherwise a deadlock might occur. This because of the
- * sequence of the lock acquisition. Here we do "ep->lock" then the wait
- * queue head lock when unregistering the wait queue. The wakeup callback
- * will run by holding the wait queue head lock and will call our callback
- * that will try to get "ep->lock".
+ * Removes poll wait queue hooks.
*/
ep_unregister_pollwait(ep, epi);
@@ -784,10 +789,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
rb_erase_cached(&epi->rbn, &ep->rbr);
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -837,7 +842,7 @@ static void ep_free(struct eventpoll *ep)
* Walks through the whole tree by freeing each "struct epitem". At this
* point we are sure no poll callbacks will be lingering around, and also by
* holding "epmutex" we can be sure that no file cleanup code will hit
- * us during this operation. So we can avoid the lock on "ep->lock".
+ * us during this operation. So we can avoid the lock on "ep->wq.lock".
* We do not need to lock ep->mtx, either, we only do it to prevent
* a lockdep warning.
*/
@@ -1017,7 +1022,6 @@ static int ep_alloc(struct eventpoll **pep)
if (unlikely(!ep))
goto free_uid;
- spin_lock_init(&ep->lock);
mutex_init(&ep->mtx);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
@@ -1122,7 +1126,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
__poll_t pollflags = key_to_poll(key);
int ewake = 0;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->wq.lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1167,7 +1171,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
}
/* If this file is already in the ready list we exit soon */
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
}
@@ -1199,7 +1203,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->wq.lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1417,11 +1421,12 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
{
int error, pwake = 0;
__poll_t revents;
- unsigned long flags;
long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
+ lockdep_assert_irqs_enabled();
+
user_watches = atomic_long_read(&ep->user->epoll_watches);
if (unlikely(user_watches >= max_user_watches))
return -ENOSPC;
@@ -1484,13 +1489,13 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
- if (revents && !ep_is_linked(&epi->rdllink)) {
+ if (revents && !ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1501,7 +1506,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
atomic_long_inc(&ep->user->epoll_watches);
@@ -1527,10 +1532,10 @@ error_unregister:
* list, since that is used/cleaned only inside a section bound by "mtx".
* And ep_insert() is called with "mtx" held.
*/
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
@@ -1550,6 +1555,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
int pwake = 0;
poll_table pt;
+ lockdep_assert_irqs_enabled();
+
init_poll_funcptr(&pt, NULL);
/*
@@ -1572,9 +1579,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* 1) Flush epi changes above to other CPUs. This ensures
* we do not miss events from ep_poll_callback if an
* event occurs immediately after we call f_op->poll().
- * We need this because we did not take ep->lock while
+ * We need this because we did not take ep->wq.lock while
* changing epi above (but ep_poll_callback does take
- * ep->lock).
+ * ep->wq.lock).
*
* 2) We also need to ensure we do not miss _past_ events
* when calling f_op->poll(). This barrier also
@@ -1593,8 +1600,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- spin_lock_irq(&ep->lock);
- if (!ep_is_linked(&epi->rdllink)) {
+ spin_lock_irq(&ep->wq.lock);
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1604,7 +1611,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->wq.lock);
}
/* We have to call this outside the lock */
@@ -1739,11 +1746,12 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout)
{
int res = 0, eavail, timed_out = 0;
- unsigned long flags;
u64 slack = 0;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ lockdep_assert_irqs_enabled();
+
if (timeout > 0) {
struct timespec64 end_time = ep_set_mstimeout(timeout);
@@ -1756,7 +1764,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* caller specified a non blocking operation.
*/
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
goto check_events;
}
@@ -1765,7 +1773,7 @@ fetch_events:
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
if (!ep_events_available(ep)) {
/*
@@ -1807,11 +1815,11 @@ fetch_events:
break;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
}
__remove_wait_queue(&ep->wq, &wait);
@@ -1821,7 +1829,7 @@ check_events:
/* Is it worth to try to dig for events ? */
eavail = ep_events_available(ep);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Try to transfer events to user space. In case we get 0 events and
diff --git a/fs/exec.c b/fs/exec.c
index bdd0eacefdf5..1ebf6e5a521d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1145,6 +1145,7 @@ static int de_thread(struct task_struct *tsk)
*/
tsk->pid = leader->pid;
change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ transfer_pid(leader, tsk, PIDTYPE_TGID);
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 9f1c96caebda..e8b6b89bddb8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,6 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
+ f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -70,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.encrypted_page = NULL,
.is_meta = is_meta,
};
+ int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
@@ -84,9 +86,10 @@ repeat:
fio.page = page;
- if (f2fs_submit_page_bio(&fio)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(err);
}
lock_page(page);
@@ -95,14 +98,9 @@ repeat:
goto repeat;
}
- /*
- * if there is any IO error when accessing device, make our filesystem
- * readonly and make sure do not write checkpoint with non-uptodate
- * meta page.
- */
if (unlikely(!PageUptodate(page))) {
- memset(page_address(page), 0, PAGE_SIZE);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
}
out:
return page;
@@ -113,13 +111,32 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ int count = 0;
+
+retry:
+ page = __get_meta_page(sbi, index, true);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EIO &&
+ ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+
+ f2fs_stop_checkpoint(sbi, false);
+ f2fs_bug_on(sbi, 1);
+ }
+
+ return page;
+}
+
/* for POR only */
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
switch (type) {
@@ -140,8 +157,20 @@ bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -176,7 +205,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -242,11 +271,8 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -529,13 +555,12 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
spin_lock(&im->ino_lock);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
-#endif
+
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@@ -572,12 +597,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
- int err = f2fs_acquire_orphan_inode(sbi);
-
- if (err)
- goto err_out;
-
- __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
+ int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -600,14 +620,15 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
- f2fs_get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni);
+ if (err)
+ goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
- __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
@@ -639,7 +660,10 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
@@ -649,9 +673,15 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, start_blk + i);
+ struct page *page;
struct f2fs_orphan_block *orphan_blk;
+ page = f2fs_get_meta_page(sbi, start_blk + i);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -742,10 +772,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
__u32 crc = 0;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
+ if (IS_ERR(*cp_page))
+ return PTR_ERR(*cp_page);
+
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset > (blk_size - sizeof(__le32))) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
@@ -753,6 +787,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
@@ -772,14 +807,22 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
- goto invalid_cp1;
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
- goto invalid_cp2;
+ goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
@@ -787,9 +830,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
@@ -838,15 +880,15 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
- goto free_fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (f2fs_sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -859,6 +901,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
+ if (IS_ERR(cur_page))
+ goto free_fail_no_cp;
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -980,12 +1024,10 @@ retry:
iput(inode);
/* We need to give cpu to another writers. */
- if (ino == cur_ino) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ if (ino == cur_ino)
cond_resched();
- } else {
+ else
ino = cur_ino;
- }
} else {
/*
* We should submit bio, since it exists several
@@ -1119,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
@@ -1129,6 +1171,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
+ if (unlikely(f2fs_cp_error(sbi)))
+ break;
+
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1202,8 +1247,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
- f2fs_bug_on(sbi, err);
+ if (unlikely(err && f2fs_cp_error(sbi))) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1229,7 +1278,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
while (get_pages(sbi, F2FS_DIRTY_META)) {
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
/*
@@ -1309,7 +1358,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX,
FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
}
@@ -1348,10 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
- wait_on_all_pages_writeback(sbi);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1360,12 +1406,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ /*
+ * invalidate intermediate page cache borrowed from meta inode
+ * which are used for migration of encrypted inode's blocks.
+ */
+ if (f2fs_sb_has_encrypt(sbi->sb))
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
f2fs_release_ino_entry(sbi, false);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
@@ -1381,7 +1434,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
- return 0;
+ return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b7c9b58acf3e..382c1ef9a9e4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -126,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
bio->bi_status = BLK_STS_IOERR;
}
-#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -177,6 +175,8 @@ static void f2fs_write_end_io(struct bio *bio)
page->index != nid_of_node(page));
dec_page_count(sbi, type);
+ if (f2fs_in_warm_node_list(sbi, page))
+ f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
@@ -264,7 +264,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -441,7 +441,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
- verify_block_addr(fio, fio->new_blkaddr);
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -485,7 +488,7 @@ next:
spin_unlock(&io->io_lock);
}
- if (is_valid_blkaddr(fio->old_blkaddr))
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
@@ -534,19 +537,22 @@ out:
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+ unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
@@ -571,7 +577,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+ struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -869,6 +875,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
+ block_t old_blkaddr;
pgoff_t fofs;
blkcnt_t count = 1;
int err;
@@ -876,6 +883,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
+
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
@@ -885,11 +896,13 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
return err;
alloc:
- f2fs_get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ old_blkaddr = dn->data_blkaddr;
+ f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
f2fs_set_data_blkaddr(dn);
/* update i_size */
@@ -1045,7 +1058,13 @@ next_dnode:
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- if (!is_valid_blkaddr(blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1282,7 +1301,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1309,7 +1332,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
@@ -1425,11 +1452,11 @@ out:
* Note that the aops->readpages() function is ONLY used for read-ahead. If
* this function ever deviates from doing just read-ahead, it should either
* use ->readpage() or do the necessary surgery to decouple ->readpages()
- * readom read-ahead.
+ * from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
- unsigned nr_pages)
+ unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -1500,6 +1527,10 @@ got_it:
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1519,7 +1550,8 @@ submit_and_realloc:
bio = NULL;
}
if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
@@ -1563,7 +1595,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
return ret;
}
@@ -1580,12 +1612,13 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
+ struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
@@ -1597,17 +1630,25 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
- if (!IS_ERR(fio->encrypted_page))
- return 0;
+ if (IS_ERR(fio->encrypted_page)) {
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_writes(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
+ }
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
+ if (mpage) {
+ if (PageUptodate(mpage))
+ memcpy(page_address(mpage),
+ page_address(fio->encrypted_page), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
}
- return PTR_ERR(fio->encrypted_page);
+ return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
@@ -1691,6 +1732,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
+ struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -1699,11 +1741,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
- if (is_valid_blkaddr(fio->old_blkaddr)) {
- ipu_force = true;
- fio->need_lock = LOCK_DONE;
- goto got_it;
- }
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC))
+ return -EFAULT;
+
+ ipu_force = true;
+ fio->need_lock = LOCK_DONE;
+ goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
@@ -1722,11 +1766,17 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) &&
+ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1751,6 +1801,12 @@ got_it:
fio->need_lock = LOCK_REQ;
}
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ if (err)
+ goto out_writepage;
+
+ fio->version = ni.version;
+
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -2079,6 +2135,18 @@ continue_unlock:
return ret;
}
+static inline bool __should_serialize_io(struct inode *inode,
+ struct writeback_control *wbc)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return true;
+ if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
+ return true;
+ return false;
+}
+
static int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type)
@@ -2087,6 +2155,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct blk_plug plug;
int ret;
+ bool locked = false;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -2117,10 +2186,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
else if (atomic_read(&sbi->wb_sync_req[DATA]))
goto skip_write;
+ if (__should_serialize_io(inode, wbc)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
+
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_dec(&sbi->wb_sync_req[DATA]);
/*
@@ -2153,10 +2230,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -2251,8 +2332,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_available_free_memory(sbi, INMEM_PAGES)) {
+ if ((f2fs_is_atomic_file(inode) &&
+ !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
+ is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
@@ -2376,14 +2458,20 @@ unlock_out:
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
- unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
-
- if (offset & blocksize_mask)
- return -EINVAL;
-
- if (iov_iter_alignment(iter) & blocksize_mask)
- return -EINVAL;
-
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
return 0;
}
@@ -2401,7 +2489,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
err = check_direct_IO(inode, iter, offset);
if (err)
- return err;
+ return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, rw))
return 0;
@@ -2495,6 +2583,10 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+ /* don't remain PG_checked flag which was set during GC */
+ if (is_cold_data(page))
+ clear_cold_data(page);
+
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
f2fs_register_inmem_page(inode, page);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 2d65e77ae5cf..214a968962a1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
- si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks *
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7f955c4e86a4..ecc3a4e2be96 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
-#endif
+
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6799c3fc44e3..abf925664d9c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,7 +41,6 @@
} while (0)
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
@@ -56,16 +55,20 @@ enum {
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
+ FAULT_DISCARD,
FAULT_MAX,
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
-extern char *fault_name[FAULT_MAX];
+extern char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -178,7 +181,6 @@ enum {
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
-#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
@@ -194,7 +196,7 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -202,6 +204,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -226,6 +230,12 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+struct fsync_node_entry {
+ struct list_head list; /* list head */
+ struct page *page; /* warm node page pointer */
+ unsigned int seq_id; /* sequence id */
+};
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -242,9 +252,10 @@ struct discard_entry {
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
- D_PREP,
- D_SUBMIT,
- D_DONE,
+ D_PREP, /* initial */
+ D_PARTIAL, /* partially submitted */
+ D_SUBMIT, /* all submitted */
+ D_DONE, /* finished */
};
struct discard_info {
@@ -269,7 +280,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
+ unsigned char issuing; /* issuing discard */
int error; /* bio error */
+ spinlock_t lock; /* for state/bio_ref updating */
+ unsigned short bio_ref; /* bio reference count */
};
enum {
@@ -289,6 +303,7 @@ struct discard_policy {
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
+ bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
};
@@ -305,10 +320,12 @@ struct discard_cmd_control {
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
+ unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
+ bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
@@ -508,13 +525,12 @@ enum {
*/
};
+#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
-/* vector size for gang look-up from extent cache that consists of radix tree */
-#define EXT_TREE_VEC_SIZE 64
-
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -600,6 +616,8 @@ enum {
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
+
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
@@ -669,8 +687,8 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
- struct timespec i_crtime; /* inode creation time */
- struct timespec i_disk_time[4]; /* inode disk times */
+ struct timespec64 i_crtime; /* inode creation time */
+ struct timespec64 i_disk_time[4];/* inode disk times */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -698,22 +716,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
}
static inline bool __is_discard_mergeable(struct discard_info *back,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
- (back->len + front->len < DEF_MAX_DISCARD_LEN);
+ (back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
- struct discard_info *back)
+ struct discard_info *back, unsigned int max_len)
{
- return __is_discard_mergeable(back, cur);
+ return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
- return __is_discard_mergeable(cur, front);
+ return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
@@ -768,6 +786,7 @@ struct f2fs_nm_info {
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
+ spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
@@ -894,6 +913,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
@@ -1015,6 +1035,7 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1066,6 +1087,7 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
+ SBI_IS_SHUTDOWN, /* shutdown by ioctl */
};
enum {
@@ -1112,6 +1134,7 @@ struct f2fs_sb_info {
struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
+ struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1148,6 +1171,11 @@ struct f2fs_sb_info {
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ spinlock_t fsync_node_lock; /* for node entry lock */
+ struct list_head fsync_node_list; /* node list head */
+ unsigned int fsync_seg_id; /* sequence id */
+ unsigned int fsync_node_num; /* number of node entries */
+
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
@@ -1215,6 +1243,7 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
+ unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
@@ -1279,7 +1308,7 @@ struct f2fs_sb_info {
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, fault_name[type], \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1298,6 +1327,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
return false;
}
+#else
+#define f2fs_show_injection_info(type) do { } while (0)
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ return false;
+}
#endif
/* For write statistics. Suppose sector size is 512 bytes,
@@ -1326,7 +1361,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return 0;
+ return false;
return f2fs_time_over(sbi, REQ_TIME);
}
@@ -1650,13 +1685,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (ret)
return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
-#endif
+
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
@@ -1680,18 +1714,20 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
- percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- if (unlikely(release))
+ if (unlikely(release)) {
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
+ }
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
@@ -1863,12 +1899,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
-#endif
spin_lock(&sbi->stat_lock);
@@ -1953,17 +1987,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct page *page = find_lock_page(mapping, index);
+ struct page *page;
- if (page)
- return page;
+ if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
+ if (!for_write)
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK | FGP_ACCESSED);
+ else
+ page = find_lock_page(mapping, index);
+ if (page)
+ return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
- f2fs_show_injection_info(FAULT_PAGE_ALLOC);
- return NULL;
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
+ return NULL;
+ }
}
-#endif
+
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -1973,12 +2013,11 @@ static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
-#endif
+
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
@@ -2043,12 +2082,11 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
-#endif
+
return bio_alloc(GFP_KERNEL, npages);
}
@@ -2518,7 +2556,6 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
- struct timespec ts;
bool ret;
if (dsync) {
@@ -2534,16 +2571,13 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
i_size_read(inode) & ~PAGE_MASK)
return false;
- ts = timespec64_to_timespec(inode->i_atime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
- ts = timespec64_to_timespec(inode->i_ctime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
return false;
- ts = timespec64_to_timespec(inode->i_mtime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3,
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
&F2FS_I(inode)->i_crtime))
return false;
@@ -2587,12 +2621,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
-#endif
+
return kmalloc(size, flags);
}
@@ -2605,12 +2638,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
-#endif
+
return kvmalloc(size, flags);
}
@@ -2669,13 +2701,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
-static inline bool is_valid_blkaddr(block_t blkaddr)
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->op) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return false;
return true;
}
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
@@ -2790,16 +2848,21 @@ struct node_info;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -2808,11 +2871,12 @@ struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
void f2fs_move_node_page(struct page *node_page, int gc_type);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic);
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id);
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2820,7 +2884,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
@@ -2898,9 +2962,10 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
@@ -2924,6 +2989,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
void f2fs_update_dirty_page(struct inode *inode, struct page *page);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_checkpoint_caches(void);
@@ -3362,7 +3428,7 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
- return 0;
+ return false;
#endif
}
@@ -3373,4 +3439,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
F2FS_I_SB(inode)->s_ndevs);
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type);
+#else
+#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
+#endif
+
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6880c6f78d58..5474aaa274b9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -213,6 +213,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ unsigned int seq_id = 0;
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
@@ -275,7 +276,7 @@ go_write:
}
sync_nodes:
atomic_inc(&sbi->wb_sync_req[NODE]);
- ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic);
+ ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
@@ -301,7 +302,7 @@ sync_nodes:
* given fsync mark.
*/
if (!atomic) {
- ret = f2fs_wait_on_node_pages_writeback(sbi, ino);
+ ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
@@ -350,13 +351,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_blkaddr(blkaddr))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -420,7 +421,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -513,6 +522,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
f2fs_set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
@@ -654,12 +668,11 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
-#endif
+
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -782,22 +795,26 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size <= i_size_read(inode)) {
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
+ bool to_smaller = (attr->ia_size <= i_size_read(inode));
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_setsize(inode, attr->ia_size);
+
+ if (to_smaller)
err = f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
- if (err)
- return err;
- } else {
- /*
- * do not trim all blocks after i_size if target size is
- * larger than i_size.
- */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ /*
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
+ */
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
+ if (err)
+ return err;
+ if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -944,14 +961,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1054,7 +1076,12 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+ }
+
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
@@ -1161,25 +1188,33 @@ roll_back:
return ret;
}
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
- f2fs_drop_extent_tree(inode);
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ f2fs_lock_op(sbi);
+ f2fs_drop_extent_tree(inode);
+ truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
+
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
- pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
@@ -1194,25 +1229,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = offset >> PAGE_SHIFT;
- pg_end = (offset + len) >> PAGE_SHIFT;
-
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out_unlock;
-
- truncate_pagecache(inode, offset);
+ return ret;
- ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ ret = f2fs_do_collapse(inode, offset, len);
if (ret)
- goto out_unlock;
+ return ret;
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
@@ -1220,11 +1247,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache(inode, new_size);
ret = f2fs_truncate_blocks(inode, new_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out_unlock:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1290,12 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- goto out_sem;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1307,7 +1329,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1315,7 +1337,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1326,12 +1348,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_pagecache_range(inode,
+ (loff_t)index << PAGE_SHIFT,
+ ((loff_t)pg_end << PAGE_SHIFT) - 1);
+
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1340,7 +1371,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
+
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1368,9 +1402,6 @@ out:
else
f2fs_i_size_write(inode, new_size);
}
-out_sem:
- up_write(&F2FS_I(inode)->i_mmap_sem);
-
return ret;
}
@@ -1399,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
- goto out;
+ return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out;
-
- truncate_pagecache(inode, offset);
+ return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ truncate_pagecache(inode, offset);
+
while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
@@ -1432,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1597,7 +1630,7 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int flags = fi->i_flags;
- if (file_is_encrypt(inode))
+ if (f2fs_encrypted_inode(inode))
flags |= F2FS_ENCRYPT_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
flags |= F2FS_INLINE_DATA_FL;
@@ -1688,15 +1721,18 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+ ret = -EINVAL;
goto out;
+ }
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
if (!get_dirty_pages(inode))
goto skip_flush;
@@ -1704,18 +1740,20 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
- if (ret)
+ if (ret) {
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
+ }
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1733,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;
- inode_lock(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ inode_lock(inode);
if (f2fs_is_volatile_file(inode)) {
ret = -EINVAL;
@@ -1761,7 +1799,6 @@ err_out:
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
ret = -EINVAL;
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1853,6 +1890,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1866,7 +1905,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
__u32 in;
- int ret;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1889,6 +1928,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
@@ -1898,13 +1938,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
default:
ret = -EINVAL;
@@ -2107,7 +2150,7 @@ out:
return ret;
}
-static int f2fs_ioc_f2fs_write_checkpoint(struct file *filp, unsigned long arg)
+static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -2351,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
- down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
- if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
- inode_unlock(dst);
- goto out;
- }
}
ret = -EINVAL;
@@ -2404,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;
f2fs_balance_fs(sbi, true);
+
+ down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (src != dst) {
+ ret = -EBUSY;
+ if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ goto out_src;
+ }
+
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2416,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
-out_unlock:
- if (src != dst) {
+
+ if (src != dst)
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
+out_src:
+ up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+out_unlock:
+ if (src != dst)
inode_unlock(dst);
- }
out:
- up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
@@ -2782,7 +2830,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = 1;
+ f2fs_i_gc_failures_write(inode, 0);
goto done;
}
@@ -2888,7 +2936,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
- return f2fs_ioc_f2fs_write_checkpoint(filp, arg);
+ return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 9093be6e7a7d..5c8d00422237 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -53,12 +53,10 @@ static int gc_thread_func(void *data)
continue;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
@@ -517,7 +515,11 @@ next_step:
continue;
}
- f2fs_get_node_info(sbi, nid, &ni);
+ if (f2fs_get_node_info(sbi, nid, &ni)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
@@ -576,7 +578,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
- f2fs_get_node_info(sbi, nid, dni);
+ if (f2fs_get_node_info(sbi, nid, dni)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -594,6 +599,72 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
+static int ra_data_block(struct inode *inode, pgoff_t index)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ struct extent_info ei = {0, 0, 0};
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = inode->i_ino,
+ .type = DATA,
+ .temp = COLD,
+ .op = REQ_OP_READ,
+ .op_flags = 0,
+ .encrypted_page = NULL,
+ .in_list = false,
+ .retry = false,
+ };
+ int err;
+
+ page = f2fs_grab_cache_page(mapping, index, true);
+ if (!page)
+ return -ENOMEM;
+
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ goto put_page;
+ f2fs_put_dnode(&dn);
+
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC))) {
+ err = -EFAULT;
+ goto put_page;
+ }
+got_it:
+ /* read page */
+ fio.page = page;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
+ dn.data_blkaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto put_page;
+ }
+
+ err = f2fs_submit_page_bio(&fio);
+ if (err)
+ goto put_encrypted_page;
+ f2fs_put_page(fio.encrypted_page, 0);
+ f2fs_put_page(page, 1);
+ return 0;
+put_encrypted_page:
+ f2fs_put_page(fio.encrypted_page, 1);
+put_page:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
@@ -615,7 +686,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
- struct page *page;
+ struct page *page, *mpage;
block_t newaddr;
int err;
bool lfs_mode = test_opt(fio.sbi, LFS);
@@ -655,7 +726,10 @@ static void move_data_block(struct inode *inode, block_t bidx,
*/
f2fs_wait_on_page_writeback(page, DATA, true);
- f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto put_out;
+
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
@@ -675,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto recover_block;
}
+ mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
+ if (mpage) {
+ bool updated = false;
+
+ if (PageUptodate(mpage)) {
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ updated = true;
+ }
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
+ if (updated)
+ goto write_page;
+ }
+
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
@@ -691,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
+write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
@@ -865,22 +957,30 @@ next_step:
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- /* if inode uses special I/O path, let's go phase 3 */
- if (f2fs_post_read_required(inode)) {
- add_gc_inode(gc_list, inode);
- continue;
- }
-
if (!down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
iput(inode);
+ sbi->skipped_gc_rwsem++;
+ continue;
+ }
+
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
+ ofs_in_node;
+
+ if (f2fs_post_read_required(inode)) {
+ int err = ra_data_block(inode, start_bidx);
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err) {
+ iput(inode);
+ continue;
+ }
+ add_gc_inode(gc_list, inode);
continue;
}
- start_bidx = f2fs_start_bidx_of_node(nofs, inode);
data_page = f2fs_get_read_data_page(inode,
- start_bidx + ofs_in_node, REQ_RAHEAD,
- true);
+ start_bidx, REQ_RAHEAD, true);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
@@ -903,6 +1003,7 @@ next_step:
continue;
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
+ sbi->skipped_gc_rwsem++;
up_write(&fi->i_gc_rwsem[READ]);
continue;
}
@@ -986,7 +1087,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
goto next;
sum = page_address(sum_page);
- f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+ if (type != GET_SUM_TYPE((&sum->footer))) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
+ "type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto next;
+ }
/*
* this is to avoid deadlock:
@@ -1034,6 +1141,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
+ unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
@@ -1046,6 +1154,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
+ sbi->skipped_gc_rwsem = 0;
+ first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
@@ -1087,7 +1197,8 @@ gc_more:
total_freed += seg_freed;
if (gc_type == FG_GC) {
- if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
+ if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
+ sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
@@ -1096,15 +1207,23 @@ gc_more:
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
- if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
- if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
- skipped_round * 2 >= round)
- f2fs_drop_inmem_pages_all(sbi, true);
+ if (sync)
+ goto stop;
+
+ if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (skipped_round <= MAX_SKIP_GC_COUNT ||
+ skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
+ if (first_skipped < last_skipped &&
+ (last_skipped - first_skipped) >
+ sbi->skipped_gc_rwsem) {
+ f2fs_drop_inmem_pages_all(sbi, true);
+ segno = NULL_SEGNO;
+ goto gc_more;
+ }
if (gc_type == FG_GC)
ret = f2fs_write_checkpoint(sbi, &cpc);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 043830be5662..115dc219344b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -121,6 +121,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
+ struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
@@ -130,6 +131,24 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ if (err) {
+ f2fs_put_dnode(dn);
+ return err;
+ }
+
+ fio.version = ni.version;
+
+ if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(dn);
+ set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
+ f2fs_msg(fio.sbi->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EINVAL;
+ }
+
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
f2fs_do_read_inline_data(page, dn->inode_page);
@@ -363,6 +382,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
+ if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(&dn);
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EINVAL;
+ goto out;
+ }
+
f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
@@ -477,6 +507,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
@@ -668,7 +699,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ if (err)
+ goto out;
+
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index f121c864f4c0..959df2249875 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -68,13 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
- if (is_valid_blkaddr(addr))
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -121,7 +124,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
- if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
@@ -159,8 +162,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
+ return true;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_enable_inode_chksum(sbi, page))
+#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
+#endif
return true;
ri = &F2FS_NODE(page)->i;
@@ -185,9 +195,31 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
-static bool sanity_check_inode(struct inode *inode)
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
&& !f2fs_has_extra_attr(inode)) {
@@ -197,6 +229,64 @@ static bool sanity_check_inode(struct inode *inode)
__func__, inode->i_ino);
return false;
}
+
+ if (f2fs_has_extra_attr(inode) &&
+ !f2fs_sb_has_extra_attr(sbi->sb)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) is with extra_attr, "
+ "but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
+ "max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+
+ if (f2fs_has_inline_data(inode) &&
+ (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
return true;
}
@@ -207,6 +297,7 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
+ int err;
/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
@@ -268,6 +359,11 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -275,8 +371,15 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ if (S_ISREG(inode->i_mode)) {
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ }
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
@@ -297,9 +400,9 @@ static int do_read_inode(struct inode *inode)
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
- F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
- F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
- F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
f2fs_put_page(node_page, 1);
@@ -330,10 +433,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
- if (!sanity_check_inode(inode)) {
- ret = -EINVAL;
- goto bad_inode;
- }
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
@@ -470,10 +569,14 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (inode->i_nlink == 0)
clear_inline_node(node_page);
- F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
- F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
- F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
+#endif
}
void f2fs_update_inode_page(struct inode *inode)
@@ -558,12 +661,11 @@ retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
-#endif
+
if (!err) {
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
@@ -626,6 +728,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ int err;
/*
* clear nlink of inode in order to release resource of inode
@@ -648,10 +751,16 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
if (ni.blk_addr != NULL_ADDR) {
- int err = f2fs_acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -664,6 +773,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
set_inode_flag(inode, FI_FREE_NID);
}
+out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 231b7f3ea7d3..1f67e389169f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -51,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
- F2FS_I(inode)->i_crtime = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = sbi->s_next_generation++;
if (S_ISDIR(inode->i_mode))
@@ -246,7 +246,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
- strncpy(extlist[count], name, strlen(name));
+ memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
@@ -254,7 +254,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
- strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 10643b11bd59..dd2e45a661aa 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -28,6 +28,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
/*
* Check whether the given nid is within node id range.
@@ -112,25 +113,22 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- pgoff_t index = current_nat_addr(sbi, nid);
- return f2fs_get_meta_page(sbi, index);
+ return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
- pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
- src_off = current_nat_addr(sbi, nid);
- dst_off = next_nat_addr(sbi, src_off);
+ dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
- src_page = f2fs_get_meta_page(sbi, src_off);
+ src_page = get_current_nat_page(sbi, nid);
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
@@ -176,14 +174,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+
+ spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
- return radix_tree_lookup(&nm_i->nat_root, n);
+ struct nat_entry *ne;
+
+ ne = radix_tree_lookup(&nm_i->nat_root, n);
+
+ /* for recent accessed nat entry, move it to tail of lru list */
+ if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ spin_lock(&nm_i->nat_list_lock);
+ if (!list_empty(&ne->list))
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+ }
+
+ return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
@@ -194,7 +208,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
- list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
@@ -245,16 +258,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
+ spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
+ spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
+ spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
@@ -267,6 +285,72 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+ return NODE_MAPPING(sbi) == page->mapping &&
+ IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ spin_lock_init(&sbi->fsync_node_lock);
+ INIT_LIST_HEAD(&sbi->fsync_node_list);
+ sbi->fsync_seg_id = 0;
+ sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+ struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+ unsigned int seq_id;
+
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+ get_page(page);
+ fn->page = page;
+ INIT_LIST_HEAD(&fn->list);
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_add_tail(&fn->list, &sbi->fsync_node_list);
+ fn->seq_id = sbi->fsync_seg_id++;
+ seq_id = fn->seq_id;
+ sbi->fsync_node_num++;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+ if (fn->page == page) {
+ list_del(&fn->list);
+ sbi->fsync_node_num--;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ kmem_cache_free(fsync_node_entry_slab, fn);
+ put_page(page);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ sbi->fsync_seg_id = 0;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -371,7 +455,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -382,7 +466,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_blkaddr(new_blkaddr))
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -405,13 +489,25 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
- while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ spin_lock(&nm_i->nat_list_lock);
+ while (nr_shrink) {
struct nat_entry *ne;
+
+ if (list_empty(&nm_i->nat_entries))
+ break;
+
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
+ list_del(&ne->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
+
+ spin_lock(&nm_i->nat_list_lock);
}
+ spin_unlock(&nm_i->nat_list_lock);
+
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -419,7 +515,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
/*
* This function always returns success
*/
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -443,7 +539,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
- return;
+ return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
@@ -466,6 +562,9 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
up_read(&nm_i->nat_tree_lock);
page = f2fs_get_meta_page(sbi, index);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
@@ -473,6 +572,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
cache:
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
+ return 0;
}
/*
@@ -722,12 +822,15 @@ release_out:
return err;
}
-static void truncate_node(struct dnode_of_data *dn)
+static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ int err;
- f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
/* Deallocate node address */
f2fs_invalidate_blocks(sbi, ni.blk_addr);
@@ -750,11 +853,14 @@ static void truncate_node(struct dnode_of_data *dn)
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+
+ return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
+ int err;
if (dn->nid == 0)
return 1;
@@ -770,7 +876,10 @@ static int truncate_dnode(struct dnode_of_data *dn)
dn->node_page = page;
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ return err;
+
return 1;
}
@@ -835,7 +944,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
- truncate_node(dn);
+ ret = truncate_node(dn);
+ if (ret)
+ goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
@@ -893,7 +1004,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
@@ -1014,6 +1127,7 @@ int f2fs_truncate_xattr_node(struct inode *inode)
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
+ int err;
if (!nid)
return 0;
@@ -1022,10 +1136,15 @@ int f2fs_truncate_xattr_node(struct inode *inode)
if (IS_ERR(npage))
return PTR_ERR(npage);
+ set_new_dnode(&dn, inode, NULL, npage, nid);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_page(npage, 1);
+ return err;
+ }
+
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, NULL, npage, nid);
- truncate_node(&dn);
return 0;
}
@@ -1055,11 +1174,19 @@ int f2fs_remove_inode_page(struct inode *inode)
f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ f2fs_put_dnode(&dn);
+ return -EIO;
+ }
f2fs_bug_on(F2FS_I_SB(inode),
inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
- truncate_node(&dn);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
return 0;
}
@@ -1092,7 +1219,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
@@ -1140,13 +1271,21 @@ static int read_node_page(struct page *page, int op_flags)
.page = page,
.encrypted_page = NULL,
};
+ int err;
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
+#endif
return LOCKED_PAGE;
+ }
- f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni);
+ if (err)
+ return err;
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
@@ -1348,7 +1487,7 @@ continue_unlock:
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type)
+ enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
@@ -1365,6 +1504,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.io_type = io_type,
.io_wbc = wbc,
};
+ unsigned int seq;
trace_f2fs_writepage(page, NODE);
@@ -1374,10 +1514,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ IS_DNODE(page) && is_cold_node(page))
+ goto redirty_out;
+
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
+ if (f2fs_get_node_info(sbi, nid, &ni))
+ goto redirty_out;
+
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
@@ -1385,8 +1532,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
down_read(&sbi->node_write);
}
- f2fs_get_node_info(sbi, nid, &ni);
-
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
@@ -1396,11 +1541,22 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+ goto redirty_out;
+
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
set_page_writeback(page);
ClearPageError(page);
+
+ if (f2fs_in_warm_node_list(sbi, page)) {
+ seq = f2fs_add_fsync_node_entry(sbi, page);
+ if (seq_id)
+ *seq_id = seq;
+ }
+
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
@@ -1448,7 +1604,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
goto out_page;
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO))
+ &wbc, false, FS_GC_NODE_IO, NULL))
unlock_page(node_page);
goto release_page;
} else {
@@ -1465,11 +1621,13 @@ release_page:
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+ return __write_node_page(page, false, NULL, wbc, false,
+ FS_NODE_IO, NULL);
}
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic)
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id)
{
pgoff_t index;
pgoff_t last_idx = ULONG_MAX;
@@ -1550,7 +1708,7 @@ continue_unlock:
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
- FS_NODE_IO);
+ FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
@@ -1633,7 +1791,9 @@ next_step:
!is_cold_node(page)))
continue;
lock_node:
- if (!trylock_page(page))
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ lock_page(page);
+ else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1665,7 +1825,7 @@ continue_unlock:
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
- wbc, do_balance, io_type);
+ wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
@@ -1684,10 +1844,12 @@ continue_unlock:
}
if (step < 2) {
+ if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ goto out;
step++;
goto next_step;
}
-
+out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
@@ -1696,35 +1858,46 @@ continue_unlock:
return ret;
}
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id)
{
- pgoff_t index = 0;
- struct pagevec pvec;
+ struct fsync_node_entry *fn;
+ struct page *page;
+ struct list_head *head = &sbi->fsync_node_list;
+ unsigned long flags;
+ unsigned int cur_seq_id = 0;
int ret2, ret = 0;
- int nr_pages;
- pagevec_init(&pvec);
+ while (seq_id && cur_seq_id < seq_id) {
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ fn = list_first_entry(head, struct fsync_node_entry, list);
+ if (fn->seq_id > seq_id) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ cur_seq_id = fn->seq_id;
+ page = fn->page;
+ get_page(page);
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
- while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_WRITEBACK))) {
- int i;
+ f2fs_wait_on_page_writeback(page, NODE, true);
+ if (TestClearPageError(page))
+ ret = -EIO;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
+ put_page(page);
- if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE, true);
- if (TestClearPageError(page))
- ret = -EIO;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
+ if (ret)
+ break;
}
ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
+
return ret;
}
@@ -1774,6 +1947,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (IS_INODE(page))
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
@@ -1968,7 +2145,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_sb_info *sbi,
+static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1986,7 +2163,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+
+ if (blk_addr == NEW_ADDR)
+ return -EINVAL;
+
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
@@ -1995,6 +2175,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
+
+ return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
@@ -2050,11 +2232,11 @@ out:
up_read(&nm_i->nat_tree_lock);
}
-static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
+static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- int i = 0;
+ int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
@@ -2062,17 +2244,17 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
- return;
+ return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
}
/* readahead nat pages to be scanned */
@@ -2086,8 +2268,16 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
+ ret = scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
+
+ if (ret) {
+ up_read(&nm_i->nat_tree_lock);
+ f2fs_bug_on(sbi, !mount);
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "NAT is corrupt, run fsck to fix it");
+ return -EINVAL;
+ }
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -2108,13 +2298,19 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
+
+ return 0;
}
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
+ int ret;
+
mutex_lock(&NM_I(sbi)->build_lock);
- __f2fs_build_free_nids(sbi, sync, mount);
+ ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
+
+ return ret;
}
/*
@@ -2127,12 +2323,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
-#endif
+
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
@@ -2277,12 +2472,16 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
+ int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ if (err)
+ return err;
+
f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -2317,8 +2516,11 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
+ int err;
- f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni);
+ if (err)
+ return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
@@ -2372,7 +2574,7 @@ retry:
return 0;
}
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
@@ -2394,6 +2596,9 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = f2fs_get_tmp_page(sbi, idx);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
@@ -2405,6 +2610,7 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
+ return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2582,6 +2788,13 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
nid_t set_idx = 0;
LIST_HEAD(sets);
+ /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ if (enabled_nat_bits(sbi, cpc)) {
+ down_write(&nm_i->nat_tree_lock);
+ remove_nats_in_journal(sbi);
+ up_write(&nm_i->nat_tree_lock);
+ }
+
if (!nm_i->dirty_nat_cnt)
return;
@@ -2634,7 +2847,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ struct page *page;
+
+ page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ if (IS_ERR(page)) {
+ disable_nat_bits(sbi, true);
+ return PTR_ERR(page);
+ }
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2718,6 +2937,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
+ spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
@@ -2762,8 +2982,8 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
- NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
- if (!nm_i->free_nid_bitmap)
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
@@ -2801,8 +3021,7 @@ int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
- f2fs_build_free_nids(sbi, true, true);
- return 0;
+ return f2fs_build_free_nids(sbi, true, true);
}
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
@@ -2837,8 +3056,13 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
- for (idx = 0; idx < found; idx++)
+ for (idx = 0; idx < found; idx++) {
+ spin_lock(&nm_i->nat_list_lock);
+ list_del(&natvec[idx]->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, natvec[idx]);
+ }
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
@@ -2893,8 +3117,15 @@ int __init f2fs_create_node_manager_caches(void)
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
+
+ fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ sizeof(struct fsync_node_entry));
+ if (!fsync_node_entry_slab)
+ goto destroy_nat_entry_set;
return 0;
+destroy_nat_entry_set:
+ kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
@@ -2905,6 +3136,7 @@ fail:
void f2fs_destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e4a928..0f4db7a61254 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
+static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -444,6 +449,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 38f25f0b193a..95511ed11a22 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -241,8 +241,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
- unsigned int free_blocks = sbi->user_block_count -
- valid_user_blocks(sbi);
+ unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -252,10 +252,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page))
break;
@@ -471,7 +475,10 @@ retry_dn:
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err)
+ goto err;
+
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
@@ -507,14 +514,13 @@ retry_dn:
}
/* dest is valid block, try to recover from src to dest */
- if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = f2fs_reserve_new_block(&dn);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- while (err)
+ while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
err = f2fs_reserve_new_block(&dn);
-#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
@@ -568,12 +574,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
@@ -628,7 +638,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9efce174c51a..30779aaa9dba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -250,7 +250,13 @@ retry:
err = -EAGAIN;
goto next;
}
- f2fs_get_node_info(sbi, dn.nid, &ni);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
if (cur->old_addr == NEW_ADDR) {
f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -439,8 +445,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
int err;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
+ down_write(&fi->i_gc_rwsem[WRITE]);
+
+ f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
@@ -455,6 +463,8 @@ int f2fs_commit_inmem_pages(struct inode *inode)
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+
return err;
}
@@ -464,12 +474,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
@@ -503,7 +511,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) && !excess_dirty_nats(sbi))
+ if (!is_idle(sbi) &&
+ (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
@@ -511,6 +520,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
+ excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
@@ -831,9 +841,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
+ dc->issuing = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
+ spin_lock_init(&dc->lock);
+ dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
@@ -860,7 +873,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_dec(&dcc->issing_discard);
+ atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -875,9 +888,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->bio_ref) {
+ spin_unlock_irqrestore(&dc->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -893,10 +914,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ unsigned long flags;
dc->error = blk_status_to_errno(bio->bi_status);
- dc->state = D_DONE;
- complete_all(&dc->wait);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->bio_ref--;
+ if (!dc->bio_ref && dc->state == D_SUBMIT) {
+ dc->state = D_DONE;
+ complete_all(&dc->wait);
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
@@ -934,6 +962,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
+ dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
@@ -945,6 +974,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
+ dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -962,48 +992,115 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
}
}
-
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t lstart,
+ block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
+static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
- struct discard_cmd *dc)
+ struct discard_cmd *dc,
+ unsigned int *issued)
{
+ struct block_device *bdev = dc->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
+ block_t lstart, start, len, total_len;
+ int err = 0;
if (dc->state != D_PREP)
- return;
+ return 0;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return;
+ return 0;
- trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
+ trace_f2fs_issue_discard(bdev, dc->start, dc->len);
- dc->error = __blkdev_issue_discard(dc->bdev,
- SECTOR_FROM_BLOCK(dc->start),
- SECTOR_FROM_BLOCK(dc->len),
- GFP_NOFS, 0, &bio);
- if (!dc->error) {
- /* should keep before submission to avoid D_DONE right away */
- dc->state = D_SUBMIT;
- atomic_inc(&dcc->issued_discard);
- atomic_inc(&dcc->issing_discard);
- if (bio) {
- bio->bi_private = dc;
- bio->bi_end_io = f2fs_submit_discard_endio;
- bio->bi_opf |= flag;
- submit_bio(bio);
- list_move_tail(&dc->list, wait_list);
- __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
-
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ lstart = dc->lstart;
+ start = dc->start;
+ len = dc->len;
+ total_len = len;
+
+ dc->len = 0;
+
+ while (total_len && *issued < dpolicy->max_requests && !err) {
+ struct bio *bio = NULL;
+ unsigned long flags;
+ bool last = true;
+
+ if (len > max_discard_blocks) {
+ len = max_discard_blocks;
+ last = false;
}
- } else {
- __remove_discard_cmd(sbi, dc);
+
+ (*issued)++;
+ if (*issued == dpolicy->max_requests)
+ last = true;
+
+ dc->len += len;
+
+ if (time_to_inject(sbi, FAULT_DISCARD)) {
+ f2fs_show_injection_info(FAULT_DISCARD);
+ err = -EIO;
+ goto submit;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(start),
+ SECTOR_FROM_BLOCK(len),
+ GFP_NOFS, 0, &bio);
+submit:
+ if (err) {
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->state == D_PARTIAL)
+ dc->state = D_SUBMIT;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ break;
+ }
+
+ f2fs_bug_on(sbi, !bio);
+
+ /*
+ * should keep before submission to avoid D_DONE
+ * right away
+ */
+ spin_lock_irqsave(&dc->lock, flags);
+ if (last)
+ dc->state = D_SUBMIT;
+ else
+ dc->state = D_PARTIAL;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ atomic_inc(&dcc->issing_discard);
+ dc->issuing++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, start, start + len);
+
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ bio->bi_opf |= flag;
+ submit_bio(bio);
+
+ atomic_inc(&dcc->issued_discard);
+
+ f2fs_update_iostat(sbi, FS_DISCARD, 1);
+
+ lstart += len;
+ start += len;
+ total_len -= len;
+ len = total_len;
}
+
+ if (!err && len)
+ __update_discard_tree_range(sbi, bdev, lstart, start, len);
+ return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1084,10 +1181,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
- mutex_lock(&dcc->cmd_lock);
-
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
@@ -1127,7 +1225,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
- __is_discard_back_mergeable(&di, &prev_dc->di)) {
+ __is_discard_back_mergeable(&di, &prev_dc->di,
+ max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
@@ -1138,7 +1237,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
- __is_discard_front_mergeable(&di, &next_dc->di)) {
+ __is_discard_front_mergeable(&di, &next_dc->di,
+ max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
@@ -1161,8 +1261,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
-
- mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1177,10 +1275,72 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
blkstart -= FDEV(devi).start_blk;
}
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
+static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ unsigned int pos = dcc->next_pos;
+ unsigned int issued = 0;
+ bool io_interrupted = false;
+
+ mutex_lock(&dcc->cmd_lock);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
+ NULL, pos,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc) {
+ struct rb_node *node;
+ int err = 0;
+
+ if (dc->state != D_PREP)
+ goto next;
+
+ if (dpolicy->io_aware && !is_idle(sbi)) {
+ io_interrupted = true;
+ break;
+ }
+
+ dcc->next_pos = dc->lstart + dc->len;
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
+ break;
+next:
+ node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+ }
+
+ blk_finish_plug(&plug);
+
+ if (!dc)
+ dcc->next_pos = 0;
+
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (!issued && io_interrupted)
+ issued = -1;
+
+ return issued;
+}
+
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1188,19 +1348,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, iter = 0, issued = 0;
+ int i, issued = 0;
bool io_interrupted = false;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
+
+ if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
- f2fs_bug_on(sbi,
- !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1208,20 +1373,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi)) {
io_interrupted = true;
- goto skip;
+ break;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
- issued++;
-skip:
- if (++iter >= dpolicy->max_requests)
+ __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
- if (iter >= dpolicy->max_requests)
+ if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
@@ -1319,21 +1483,22 @@ next:
return trimmed;
}
-static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
+ unsigned int discard_blks;
- if (dpolicy) {
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
- return;
- }
+ if (dpolicy)
+ return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+
+ return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1386,6 +1551,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
+
+ f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
@@ -1643,21 +1810,30 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
+
+ if (need_align && end != -1)
+ end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
+ if (need_align) {
+ start = rounddown(start, sbi->segs_per_sec);
+ end = roundup(end, sbi->segs_per_sec);
+ }
- dirty_i->nr_dirty[PRE] -= end - start;
+ for (i = start; i < end; i++) {
+ if (test_and_clear_bit(i, prefree_map))
+ dirty_i->nr_dirty[PRE]--;
+ }
if (!test_opt(sbi, DISCARD))
continue;
@@ -1751,7 +1927,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
+ dcc->next_pos = 0;
dcc->root = RB_ROOT;
+ dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
@@ -1901,6 +2079,8 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
if (addr == NEW_ADDR)
return;
+ invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
@@ -1919,7 +2099,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -1983,7 +2163,7 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2366,7 +2546,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
return has_candidate;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
@@ -2376,12 +2556,15 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
+ unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2395,6 +2578,7 @@ next:
while (dc && dc->lstart <= end) {
struct rb_node *node;
+ int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
@@ -2404,19 +2588,24 @@ next:
goto skip;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
- if (++issued >= dpolicy->max_requests) {
+ if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
- __wait_all_discard_cmd(sbi, NULL);
+ trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
@@ -2425,6 +2614,8 @@ skip:
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
+
+ return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
@@ -2437,12 +2628,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- if (end <= MAIN_BLKADDR(sbi))
- return -EINVAL;
+ if (end < MAIN_BLKADDR(sbi))
+ goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -2454,6 +2646,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ if (need_align) {
+ start_segno = rounddown(start_segno, sbi->segs_per_sec);
+ end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ }
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
@@ -2469,24 +2665,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (err)
goto out;
- start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, end_segno + 1);
-
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
- __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (!test_opt(sbi, DISCARD)) {
- trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ if (test_opt(sbi, DISCARD))
+ goto out;
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, end_segno + 1);
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
+
+ trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
- range->len = F2FS_BLK_TO_BYTES(trimmed);
- }
out:
+ if (!err)
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2639,8 +2838,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- is_inode_flag_set(inode, FI_ATOMIC_FILE) ||
- is_inode_flag_set(inode, FI_VOLATILE_FILE))
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
} else {
@@ -2781,6 +2980,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(fio->sbi),
+ fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
@@ -2836,11 +3038,9 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
- struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
- f2fs_get_node_info(sbi, dn->nid, &ni);
- set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
@@ -2937,8 +3137,11 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
+ }
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
@@ -2992,7 +3195,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct page *cpage;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -3002,7 +3205,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
-static void read_compacted_summaries(struct f2fs_sb_info *sbi)
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
@@ -3014,6 +3217,8 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
start = start_sum_block(sbi);
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
@@ -3054,11 +3259,14 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
page = NULL;
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
+ return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -3070,6 +3278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
+ int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
@@ -3093,6 +3302,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
}
new = f2fs_get_meta_page(sbi, blk_addr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
@@ -3104,7 +3315,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- f2fs_restore_node_summary(sbi, segno, sum);
+ err = f2fs_restore_node_summary(sbi, segno, sum);
+ if (err)
+ goto out;
}
}
@@ -3124,8 +3337,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
+out:
f2fs_put_page(new, 1);
- return 0;
+ return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
@@ -3143,7 +3357,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
META_CP, true);
/* restore for compacted data summary */
- read_compacted_summaries(sbi);
+ err = read_compacted_summaries(sbi);
+ if (err)
+ return err;
type = CURSEG_HOT_NODE;
}
@@ -3274,7 +3490,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -3923,6 +4139,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f18fc82fbe99..b3d9e317ff0c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_blkaddr(blk_addr)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -215,7 +215,7 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
-#define MAX_SKIP_ATOMIC_COUNT 16
+#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct list_head list;
@@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
+ if (IS_CURSEC(sbi, secno))
+ goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
@@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
+skip_free:
spin_unlock(&free_i->segmap_lock);
}
@@ -645,13 +648,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (PAGE_TYPE_OF_BIO(fio->type) == META &&
- (!is_read_io(fio->op) || fio->is_meta))
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
- blk_addr >= MAIN_BLKADDR(sbi));
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
- blk_addr >= MAX_BLKADDR(sbi));
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 17bcff789c08..896b885f504e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -41,7 +41,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *fault_name[FAULT_MAX] = {
+char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -55,20 +55,24 @@ char *fault_name[FAULT_MAX] = {
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
};
-static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
- unsigned int rate)
+void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
- ffi->inject_type = (1 << FAULT_MAX) - 1;
- } else {
- memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
+
+ if (type)
+ ffi->inject_type = type;
+
+ if (!rate && !type)
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
@@ -113,6 +117,7 @@ enum {
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
+ Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
@@ -170,6 +175,7 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
+ {Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
@@ -347,12 +353,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return -1;
- }
return 0;
}
#endif
@@ -606,7 +606,18 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, arg);
+ f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
+ set_opt(sbi, FAULT_INJECTION);
+#else
+ f2fs_msg(sb, KERN_INFO,
+ "FAULT_INJECTION was not selected");
+#endif
+ break;
+ case Opt_fault_type:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
@@ -775,6 +786,19 @@ static int parse_options(struct super_block *sb, char *options)
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
+#else
+ if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be "
+ "mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
@@ -1030,6 +1054,10 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ f2fs_bug_on(sbi, sbi->fsync_node_num);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -1118,7 +1146,7 @@ static int f2fs_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
@@ -1141,7 +1169,7 @@ static int f2fs_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
@@ -1310,9 +1338,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (test_opt(sbi, FAULT_INJECTION))
+ if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
+ seq_printf(seq, ",fault_type=%u",
+ F2FS_OPTION(sbi).fault_info.inject_type);
+ }
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1343,6 +1374,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
+ seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
}
@@ -1355,7 +1388,8 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
- sbi->readdir_ra = 1;
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1365,12 +1399,12 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_has_blkzoned(sbi->sb)) {
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
set_opt(sbi, DISCARD);
- } else {
+ if (f2fs_sb_has_blkzoned(sbi->sb))
+ set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1379,9 +1413,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, POSIX_ACL);
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, 0);
-#endif
+ f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
@@ -2229,9 +2261,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (secs_per_zone > total_sections) {
+ if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone (%u > %u)",
+ "Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
return 1;
}
@@ -2282,12 +2314,20 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
+ unsigned int sit_segs, nat_segs;
+ unsigned int sit_bitmap_size, nat_bitmap_size;
+ unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
- fsmeta += le32_to_cpu(raw_super->segment_count_sit);
- fsmeta += le32_to_cpu(raw_super->segment_count_nat);
+ sit_segs = le32_to_cpu(raw_super->segment_count_sit);
+ fsmeta += sit_segs;
+ nat_segs = le32_to_cpu(raw_super->segment_count_nat);
+ fsmeta += nat_segs;
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
@@ -2304,6 +2344,16 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2318,6 +2368,28 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+ nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+
+ if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+ nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
+ return 1;
+ }
+
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -2651,6 +2723,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
+
+ sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -2700,9 +2774,6 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
-
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2771,6 +2842,7 @@ try_onemore:
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -2865,6 +2937,8 @@ try_onemore:
f2fs_init_ino_entry_info(sbi);
+ f2fs_init_fsync_node_info(sbi);
+
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
@@ -2912,10 +2986,11 @@ try_onemore:
err = PTR_ERR(root);
goto free_stats;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
+ !root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_node_inode;
+ goto free_stats;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -2929,10 +3004,7 @@ try_onemore:
goto free_root_inode;
#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
+ /* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
@@ -3090,9 +3162,19 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
- set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
- f2fs_stop_gc_thread(F2FS_SB(sb));
- f2fs_stop_discard_thread(F2FS_SB(sb));
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ set_sbi_flag(sbi, SBI_IS_CLOSE);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ f2fs_write_checkpoint(sbi, &cpc);
+ }
}
kill_block_super(sb);
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2e7e611deaef..81c0e5337443 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
@@ -252,6 +253,7 @@ out:
if (t >= 1) {
sbi->gc_mode = GC_URGENT;
if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
@@ -286,8 +288,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
a->struct_type == GC_THREAD);
- if (gc_entry)
- down_read(&sbi->sb->s_umount);
+ if (gc_entry) {
+ if (!down_read_trylock(&sbi->sb->s_umount))
+ return -EAGAIN;
+ }
ret = __sbi_store(a, sbi, buf, count);
if (gc_entry)
up_read(&sbi->sb->s_umount);
@@ -393,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -445,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
@@ -516,7 +522,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -543,7 +550,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -567,7 +575,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int iostat_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -609,6 +618,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ seq_puts(seq, "format: victim_secmap bitmaps\n");
+
+ for (i = 0; i < MAIN_SECS(sbi); i++) {
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
+ if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+ return 0;
+}
+
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -658,6 +689,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
segment_bits_seq_show, sb);
proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
iostat_info_seq_show, sb);
+ proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
+ victim_bits_seq_show, sb);
}
return 0;
}
@@ -668,6 +701,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 708271871f94..77a010e625f5 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -37,9 +37,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -62,9 +59,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -100,12 +94,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
+ unsigned char old_advise = F2FS_I(inode)->i_advise;
+ unsigned char new_advise;
+
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
- F2FS_I(inode)->i_advise |= *(char *)value;
+ new_advise = *(char *)value;
+ if (new_advise & ~FADVISE_MODIFIABLE_BITS)
+ return -EINVAL;
+
+ new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
+ new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
+
+ F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index e9bed49df6b7..78d501c1fb65 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -225,7 +225,8 @@ static inline void cache_init(struct fat_cache_id *cid, int fclus, int dclus)
int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
{
struct super_block *sb = inode->i_sb;
- const int limit = sb->s_maxbytes >> MSDOS_SB(sb)->cluster_bits;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const int limit = sb->s_maxbytes >> sbi->cluster_bits;
struct fat_entry fatent;
struct fat_cache_id cid;
int nr;
@@ -234,6 +235,12 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
*fclus = 0;
*dclus = MSDOS_I(inode)->i_start;
+ if (!fat_valid_entry(sbi, *dclus)) {
+ fat_fs_error_ratelimit(sb,
+ "%s: invalid start cluster (i_pos %lld, start %08x)",
+ __func__, MSDOS_I(inode)->i_pos, *dclus);
+ return -EIO;
+ }
if (cluster == 0)
return 0;
@@ -250,9 +257,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
fat_fs_error_ratelimit(sb,
- "%s: detected the cluster chain loop"
- " (i_pos %lld)", __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: detected the cluster chain loop (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
}
@@ -262,9 +268,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
goto out;
else if (nr == FAT_ENT_FREE) {
fat_fs_error_ratelimit(sb,
- "%s: invalid cluster chain (i_pos %lld)",
- __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: invalid cluster chain (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
} else if (nr == FAT_ENT_EOF) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 8e100c3bf72c..7f5f3699fc6c 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1130,7 +1130,7 @@ error:
return err;
}
-int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
+int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
{
struct super_block *sb = dir->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8fc1093da47d..9d7d2d5da28b 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -304,7 +304,7 @@ extern int fat_scan_logstart(struct inode *dir, int i_logstart,
struct fat_slot_info *sinfo);
extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
struct msdos_dir_entry **de);
-extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
+extern int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts);
extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
struct fat_slot_info *sinfo);
extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
@@ -348,6 +348,11 @@ static inline void fatent_brelse(struct fat_entry *fatent)
fatent->fat_inode = NULL;
}
+static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry)
+{
+ return FAT_START_ENT <= entry && entry < sbi->max_cluster;
+}
+
extern void fat_ent_access_init(struct super_block *sb);
extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
int entry);
@@ -357,6 +362,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
int nr_cluster);
extern int fat_free_clusters(struct inode *inode, int cluster);
extern int fat_count_free_clusters(struct super_block *sb);
+extern int fat_trim_fs(struct inode *inode, struct fstrim_range *range);
/* fat/file.c */
extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
@@ -406,9 +412,9 @@ void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...);
} while (0)
extern int fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs);
-extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index bac10de678cc..defc2168de91 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -4,6 +4,7 @@
*/
#include <linux/blkdev.h>
+#include <linux/sched/signal.h>
#include "fat.h"
struct fatent_operations {
@@ -23,7 +24,7 @@ static void fat12_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = entry + (entry >> 1);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -33,7 +34,7 @@ static void fat_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = (entry << sbi->fatent_shift);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -353,7 +354,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
int err, offset;
sector_t blocknr;
- if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
+ if (!fat_valid_entry(sbi, entry)) {
fatent_brelse(fatent);
fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
@@ -690,3 +691,104 @@ out:
unlock_fat(sbi);
return err;
}
+
+static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus),
+ nr_clus * sbi->sec_per_clus, GFP_NOFS, 0);
+}
+
+int fat_trim_fs(struct inode *inode, struct fstrim_range *range)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const struct fatent_operations *ops = sbi->fatent_ops;
+ struct fat_entry fatent;
+ u64 ent_start, ent_end, minlen, trimmed = 0;
+ u32 free = 0;
+ unsigned long reada_blocks, reada_mask, cur_block = 0;
+ int err = 0;
+
+ /*
+ * FAT data is organized as clusters, trim at the granulary of cluster.
+ *
+ * fstrim_range is in byte, convert vaules to cluster index.
+ * Treat sectors before data region as all used, not to trim them.
+ */
+ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT);
+ ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1;
+ minlen = range->minlen >> sbi->cluster_bits;
+
+ if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size)
+ return -EINVAL;
+ if (ent_end >= sbi->max_cluster)
+ ent_end = sbi->max_cluster - 1;
+
+ reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
+ reada_mask = reada_blocks - 1;
+
+ fatent_init(&fatent);
+ lock_fat(sbi);
+ fatent_set_entry(&fatent, ent_start);
+ while (fatent.entry <= ent_end) {
+ /* readahead of fat blocks */
+ if ((cur_block & reada_mask) == 0) {
+ unsigned long rest = sbi->fat_length - cur_block;
+ fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
+ }
+ cur_block++;
+
+ err = fat_ent_read_block(sb, &fatent);
+ if (err)
+ goto error;
+ do {
+ if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
+ free++;
+ } else if (free) {
+ if (free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+ free = 0;
+ }
+ } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end);
+
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto error;
+ }
+
+ if (need_resched()) {
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+ cond_resched();
+ lock_fat(sbi);
+ }
+ }
+ /* handle scenario when tail entries are all free */
+ if (free && free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+
+error:
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+
+ range->len = trimmed << sbi->cluster_bits;
+
+ return err;
+}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 4724cc9ad650..4f3d72fb1e60 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -121,6 +121,37 @@ static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr)
return put_user(sbi->vol_id, user_attr);
}
+static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range __user *user_range;
+ struct fstrim_range range;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ user_range = (struct fstrim_range __user *)arg;
+ if (copy_from_user(&range, user_range, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max_t(unsigned int, range.minlen,
+ q->limits.discard_granularity);
+
+ err = fat_trim_fs(inode, &range);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(user_range, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -133,6 +164,8 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return fat_ioctl_set_attributes(filp, user_attr);
case FAT_IOCTL_GET_VOLUME_ID:
return fat_ioctl_get_volume_id(inode, user_attr);
+ case FITRIM:
+ return fat_ioctl_fitrim(inode, arg);
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bfd589ea74c0..d6b81e31f9f5 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -508,7 +508,6 @@ static int fat_validate_dir(struct inode *dir)
/* doesn't deal with root inode */
int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
- struct timespec ts;
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
@@ -559,14 +558,11 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
- fat_time_fat2unix(sbi, &ts, de->time, de->date, 0);
- inode->i_mtime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
if (sbi->options.isvfat) {
- fat_time_fat2unix(sbi, &ts, de->ctime,
+ fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
de->cdate, de->ctime_cs);
- inode->i_ctime = timespec_to_timespec64(ts);
- fat_time_fat2unix(sbi, &ts, 0, de->adate, 0);
- inode->i_atime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -843,7 +839,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
static int __fat_write_inode(struct inode *inode, int wait)
{
- struct timespec ts;
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
@@ -881,16 +876,13 @@ retry:
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
- ts = timespec64_to_timespec(inode->i_mtime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->time,
+ fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
- ts = timespec64_to_timespec(inode->i_ctime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->ctime,
+ fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
&raw_entry->cdate, &raw_entry->ctime_cs);
- ts = timespec64_to_timespec(inode->i_atime);
- fat_time_unix2fat(sbi, &ts, &atime,
+ fat_time_unix2fat(sbi, &inode->i_atime, &atime,
&raw_entry->adate, NULL);
}
spin_unlock(&sbi->inode_hash_lock);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index f9bdc1e01c98..573836dcaefc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -180,17 +180,18 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
/* Linear day numbers of the respective 1sts in non-leap years. */
-static time_t days_in_year[] = {
+static long days_in_year[] = {
/* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
};
/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
-void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs)
{
u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
- time_t second, day, leap_day, month, year;
+ time64_t second;
+ long day, leap_day, month, year;
year = date >> 9;
month = max(1, (date >> 5) & 0xf);
@@ -205,7 +206,7 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
second = (time & 0x1f) << 1;
second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
second += (time >> 11) * SECS_PER_HOUR;
- second += (year * 365 + leap_day
+ second += (time64_t)(year * 365 + leap_day
+ days_in_year[month] + day
+ DAYS_DELTA) * SECS_PER_DAY;
@@ -224,11 +225,11 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
}
/* Convert linear UNIX date to a FAT time/date pair. */
-void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs)
{
struct tm tm;
- time_to_tm(ts->tv_sec,
+ time64_to_tm(ts->tv_sec,
(sbi->options.tz_set ? sbi->options.time_offset :
-sys_tz.tz_minuteswest) * SECS_PER_MIN, &tm);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 16a832c37d66..efb8c40c9d27 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -225,7 +225,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
/***** Creates a directory entry (name is already formatted). */
static int msdos_add_entry(struct inode *dir, const unsigned char *name,
int is_dir, int is_hid, int cluster,
- struct timespec *ts, struct fat_slot_info *sinfo)
+ struct timespec64 *ts, struct fat_slot_info *sinfo)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
@@ -250,7 +250,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
if (err)
return err;
- dir->i_ctime = dir->i_mtime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
@@ -267,7 +267,6 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode = NULL;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
unsigned char msdos_name[MSDOS_NAME];
int err, is_hid;
@@ -286,8 +285,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo);
if (err)
goto out;
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
@@ -347,7 +345,6 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
unsigned char msdos_name[MSDOS_NAME];
struct timespec64 ts;
- struct timespec t;
int err, is_hid, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
@@ -365,13 +362,12 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo);
if (err)
goto out_free;
inc_nlink(dir);
@@ -503,9 +499,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- struct timespec t = timespec64_to_timespec(ts);
err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 9a5469120caa..82cd1e69cbdf 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -577,7 +577,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
static int vfat_build_slots(struct inode *dir, const unsigned char *name,
int len, int is_dir, int cluster,
- struct timespec *ts,
+ struct timespec64 *ts,
struct msdos_dir_slot *slots, int *nr_slots)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
@@ -653,7 +653,7 @@ out_free:
}
static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
- int is_dir, int cluster, struct timespec *ts,
+ int is_dir, int cluster, struct timespec64 *ts,
struct fat_slot_info *sinfo)
{
struct msdos_dir_slot *slots;
@@ -678,7 +678,7 @@ static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
goto cleanup;
/* update timestamp */
- dir->i_ctime = dir->i_mtime = dir->i_atime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
@@ -762,14 +762,12 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo);
if (err)
goto out;
inode_inc_iversion(dir);
@@ -853,19 +851,17 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo);
if (err)
goto out_free;
inode_inc_iversion(dir);
@@ -904,7 +900,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode, *new_inode;
struct fat_slot_info old_sinfo, sinfo;
struct timespec64 ts;
- struct timespec t;
loff_t new_i_pos;
int err, is_dir, update_dotdot, corrupt = 0;
struct super_block *sb = old_dir->i_sb;
@@ -939,9 +934,8 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- t = timespec64_to_timespec(ts);
err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 12273b6ea56d..4137d96534a6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -116,7 +116,7 @@ int f_setown(struct file *filp, unsigned long arg, int force)
struct pid *pid = NULL;
int who = arg, ret = 0;
- type = PIDTYPE_PID;
+ type = PIDTYPE_TGID;
if (who < 0) {
/* avoid overflow below */
if (who == INT_MIN)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(f_setown);
void f_delown(struct file *filp)
{
- f_modown(filp, NULL, PIDTYPE_PID, 1);
+ f_modown(filp, NULL, PIDTYPE_TGID, 1);
}
pid_t f_getown(struct file *filp)
@@ -171,11 +171,11 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
switch (owner.type) {
case F_OWNER_TID:
- type = PIDTYPE_MAX;
+ type = PIDTYPE_PID;
break;
case F_OWNER_PID:
- type = PIDTYPE_PID;
+ type = PIDTYPE_TGID;
break;
case F_OWNER_PGRP:
@@ -206,11 +206,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
read_lock(&filp->f_owner.lock);
owner.pid = pid_vnr(filp->f_owner.pid);
switch (filp->f_owner.pid_type) {
- case PIDTYPE_MAX:
+ case PIDTYPE_PID:
owner.type = F_OWNER_TID;
break;
- case PIDTYPE_PID:
+ case PIDTYPE_TGID:
owner.type = F_OWNER_PID;
break;
@@ -723,7 +723,7 @@ static inline int sigio_perm(struct task_struct *p,
static void send_sigio_to_task(struct task_struct *p,
struct fown_struct *fown,
- int fd, int reason, int group)
+ int fd, int reason, enum pid_type type)
{
/*
* F_SETSIG can change ->signum lockless in parallel, make
@@ -767,11 +767,11 @@ static void send_sigio_to_task(struct task_struct *p,
else
si.si_band = mangle_poll(band_table[reason - POLL_IN]);
si.si_fd = fd;
- if (!do_send_sig_info(signum, &si, p, group))
+ if (!do_send_sig_info(signum, &si, p, type))
break;
/* fall-through: fall back on the old plain SIGIO signal */
case 0:
- do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
+ do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
}
}
@@ -780,34 +780,36 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
struct task_struct *p;
enum pid_type type;
struct pid *pid;
- int group = 1;
read_lock(&fown->lock);
type = fown->pid_type;
- if (type == PIDTYPE_MAX) {
- group = 0;
- type = PIDTYPE_PID;
- }
-
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
-
- read_lock(&tasklist_lock);
- do_each_pid_task(pid, type, p) {
- send_sigio_to_task(p, fown, fd, band, group);
- } while_each_pid_task(pid, type, p);
- read_unlock(&tasklist_lock);
+
+ if (type <= PIDTYPE_TGID) {
+ rcu_read_lock();
+ p = pid_task(pid, PIDTYPE_PID);
+ if (p)
+ send_sigio_to_task(p, fown, fd, band, type);
+ rcu_read_unlock();
+ } else {
+ read_lock(&tasklist_lock);
+ do_each_pid_task(pid, type, p) {
+ send_sigio_to_task(p, fown, fd, band, type);
+ } while_each_pid_task(pid, type, p);
+ read_unlock(&tasklist_lock);
+ }
out_unlock_fown:
read_unlock(&fown->lock);
}
static void send_sigurg_to_task(struct task_struct *p,
- struct fown_struct *fown, int group)
+ struct fown_struct *fown, enum pid_type type)
{
if (sigio_perm(p, fown, SIGURG))
- do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
+ do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
}
int send_sigurg(struct fown_struct *fown)
@@ -815,28 +817,30 @@ int send_sigurg(struct fown_struct *fown)
struct task_struct *p;
enum pid_type type;
struct pid *pid;
- int group = 1;
int ret = 0;
read_lock(&fown->lock);
type = fown->pid_type;
- if (type == PIDTYPE_MAX) {
- group = 0;
- type = PIDTYPE_PID;
- }
-
pid = fown->pid;
if (!pid)
goto out_unlock_fown;
ret = 1;
-
- read_lock(&tasklist_lock);
- do_each_pid_task(pid, type, p) {
- send_sigurg_to_task(p, fown, group);
- } while_each_pid_task(pid, type, p);
- read_unlock(&tasklist_lock);
+
+ if (type <= PIDTYPE_TGID) {
+ rcu_read_lock();
+ p = pid_task(pid, PIDTYPE_PID);
+ if (p)
+ send_sigurg_to_task(p, fown, type);
+ rcu_read_unlock();
+ } else {
+ read_lock(&tasklist_lock);
+ do_each_pid_task(pid, type, p) {
+ send_sigurg_to_task(p, fown, type);
+ } while_each_pid_task(pid, type, p);
+ read_unlock(&tasklist_lock);
+ }
out_unlock_fown:
read_unlock(&fown->lock);
return ret;
diff --git a/fs/file_table.c b/fs/file_table.c
index d6eccd04d703..e49af4caf15d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
static inline void file_free(struct file *f)
{
security_file_free(f);
- percpu_counter_dec(&nr_files);
+ if (!(f->f_mode & FMODE_NOACCOUNT))
+ percpu_counter_dec(&nr_files);
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
}
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write,
}
#endif
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+ struct file *f;
+ int error;
+
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ f->f_cred = get_cred(cred);
+ error = security_file_alloc(f);
+ if (unlikely(error)) {
+ file_free_rcu(&f->f_u.fu_rcuhead);
+ return ERR_PTR(error);
+ }
+
+ atomic_long_set(&f->f_count, 1);
+ rwlock_init(&f->f_owner.lock);
+ spin_lock_init(&f->f_lock);
+ mutex_init(&f->f_pos_lock);
+ eventpoll_init_file(f);
+ f->f_flags = flags;
+ f->f_mode = OPEN_FMODE(flags);
+ /* f->f_version: 0 */
+
+ return f;
+}
+
/* Find an unused file structure and return a pointer to it.
* Returns an error pointer if some error happend e.g. we over file
* structures limit, run out of memory or operation is not permitted.
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
{
static long old_max;
struct file *f;
- int error;
/*
* Privileged users can go above max_files
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
goto over;
}
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
- if (unlikely(!f))
- return ERR_PTR(-ENOMEM);
-
- f->f_cred = get_cred(cred);
- error = security_file_alloc(f);
- if (unlikely(error)) {
- file_free_rcu(&f->f_u.fu_rcuhead);
- return ERR_PTR(error);
- }
+ f = __alloc_file(flags, cred);
+ if (!IS_ERR(f))
+ percpu_counter_inc(&nr_files);
- atomic_long_set(&f->f_count, 1);
- rwlock_init(&f->f_owner.lock);
- spin_lock_init(&f->f_lock);
- mutex_init(&f->f_pos_lock);
- eventpoll_init_file(f);
- f->f_flags = flags;
- f->f_mode = OPEN_FMODE(flags);
- /* f->f_version: 0 */
- percpu_counter_inc(&nr_files);
return f;
over:
@@ -150,6 +162,21 @@ over:
return ERR_PTR(-ENFILE);
}
+/*
+ * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ *
+ * Should not be used unless there's a very good reason to do so.
+ */
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+{
+ struct file *f = __alloc_file(flags, cred);
+
+ if (!IS_ERR(f))
+ f->f_mode |= FMODE_NOACCOUNT;
+
+ return f;
+}
+
/**
* alloc_file - allocate and initialize a 'struct file'
*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c6b88fa85e2e..11ea2c4a38ab 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -127,6 +127,16 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
return !fc->initialized || (for_background && fc->blocked);
}
+static void fuse_drop_waiting(struct fuse_conn *fc)
+{
+ if (fc->connected) {
+ atomic_dec(&fc->num_waiting);
+ } else if (atomic_dec_and_test(&fc->num_waiting)) {
+ /* wake up aborters */
+ wake_up_all(&fc->blocked_waitq);
+ }
+}
+
static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
bool for_background)
{
@@ -175,7 +185,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
return req;
out:
- atomic_dec(&fc->num_waiting);
+ fuse_drop_waiting(fc);
return ERR_PTR(err);
}
@@ -285,7 +295,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
if (test_bit(FR_WAITING, &req->flags)) {
__clear_bit(FR_WAITING, &req->flags);
- atomic_dec(&fc->num_waiting);
+ fuse_drop_waiting(fc);
}
if (req->stolen_file)
@@ -371,7 +381,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_iqueue *fiq = &fc->iq;
if (test_and_set_bit(FR_FINISHED, &req->flags))
- return;
+ goto put_request;
spin_lock(&fiq->waitq.lock);
list_del_init(&req->intr_entry);
@@ -400,6 +410,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
wake_up(&req->waitq);
if (req->end)
req->end(fc, req);
+put_request:
fuse_put_request(fc, req);
}
@@ -1362,8 +1373,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!fud)
return -EPERM;
- bufs = kmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
- GFP_KERNEL);
+ bufs = kvmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
+ GFP_KERNEL);
if (!bufs)
return -ENOMEM;
@@ -1396,7 +1407,7 @@ out:
for (; page_nr < cs.nr_segs; page_nr++)
put_page(bufs[page_nr].page);
- kfree(bufs);
+ kvfree(bufs);
return ret;
}
@@ -1944,12 +1955,15 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
if (!fud)
return -EPERM;
- bufs = kmalloc_array(pipe->buffers, sizeof(struct pipe_buffer),
- GFP_KERNEL);
- if (!bufs)
+ pipe_lock(pipe);
+
+ bufs = kvmalloc_array(pipe->nrbufs, sizeof(struct pipe_buffer),
+ GFP_KERNEL);
+ if (!bufs) {
+ pipe_unlock(pipe);
return -ENOMEM;
+ }
- pipe_lock(pipe);
nbuf = 0;
rem = 0;
for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
@@ -2003,7 +2017,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe_buf_release(pipe, &bufs[idx]);
out:
- kfree(bufs);
+ kvfree(bufs);
return ret;
}
@@ -2087,8 +2101,7 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
if (fc->connected) {
struct fuse_dev *fud;
struct fuse_req *req, *next;
- LIST_HEAD(to_end1);
- LIST_HEAD(to_end2);
+ LIST_HEAD(to_end);
fc->connected = 0;
fc->blocked = 0;
@@ -2105,11 +2118,12 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
set_bit(FR_ABORTED, &req->flags);
if (!test_bit(FR_LOCKED, &req->flags)) {
set_bit(FR_PRIVATE, &req->flags);
- list_move(&req->list, &to_end1);
+ __fuse_get_request(req);
+ list_move(&req->list, &to_end);
}
spin_unlock(&req->waitq.lock);
}
- list_splice_init(&fpq->processing, &to_end2);
+ list_splice_tail_init(&fpq->processing, &to_end);
spin_unlock(&fpq->lock);
}
fc->max_background = UINT_MAX;
@@ -2117,9 +2131,9 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
spin_lock(&fiq->waitq.lock);
fiq->connected = 0;
- list_splice_init(&fiq->pending, &to_end2);
- list_for_each_entry(req, &to_end2, list)
+ list_for_each_entry(req, &fiq->pending, list)
clear_bit(FR_PENDING, &req->flags);
+ list_splice_tail_init(&fiq->pending, &to_end);
while (forget_pending(fiq))
kfree(dequeue_forget(fiq, 1, NULL));
wake_up_all_locked(&fiq->waitq);
@@ -2129,19 +2143,18 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
- while (!list_empty(&to_end1)) {
- req = list_first_entry(&to_end1, struct fuse_req, list);
- __fuse_get_request(req);
- list_del_init(&req->list);
- request_end(fc, req);
- }
- end_requests(fc, &to_end2);
+ end_requests(fc, &to_end);
} else {
spin_unlock(&fc->lock);
}
}
EXPORT_SYMBOL_GPL(fuse_abort_conn);
+void fuse_wait_aborted(struct fuse_conn *fc)
+{
+ wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
+}
+
int fuse_dev_release(struct inode *inode, struct file *file)
{
struct fuse_dev *fud = fuse_get_dev(file);
@@ -2149,9 +2162,15 @@ int fuse_dev_release(struct inode *inode, struct file *file)
if (fud) {
struct fuse_conn *fc = fud->fc;
struct fuse_pqueue *fpq = &fud->pq;
+ LIST_HEAD(to_end);
+ spin_lock(&fpq->lock);
WARN_ON(!list_empty(&fpq->io));
- end_requests(fc, &fpq->processing);
+ list_splice_init(&fpq->processing, &to_end);
+ spin_unlock(&fpq->lock);
+
+ end_requests(fc, &to_end);
+
/* Are we the last open device? */
if (atomic_dec_and_test(&fc->dev_count)) {
WARN_ON(fc->iq.fasync != NULL);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d80aab0d5982..0979609d6eba 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -355,11 +355,12 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
struct inode *inode;
struct dentry *newent;
bool outarg_valid = true;
+ bool locked;
- fuse_lock_inode(dir);
+ locked = fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
- fuse_unlock_inode(dir);
+ fuse_unlock_inode(dir, locked);
if (err == -ENOENT) {
outarg_valid = false;
err = 0;
@@ -1347,6 +1348,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
+ bool locked;
if (is_bad_inode(inode))
return -EIO;
@@ -1374,9 +1376,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
- fuse_lock_inode(inode);
+ locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
- fuse_unlock_inode(inode);
+ fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a201fb0ac64f..32d0b883e74f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/swap.h>
@@ -866,6 +867,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
}
if (WARN_ON(req->num_pages >= req->max_pages)) {
+ unlock_page(page);
fuse_put_request(fc, req);
return -EIO;
}
@@ -2048,7 +2050,7 @@ static void fuse_vma_close(struct vm_area_struct *vma)
* - sync(2)
* - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
*/
-static int fuse_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5256ad333b05..f78e9614bb5f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -862,6 +862,7 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc, bool is_abort);
+void fuse_wait_aborted(struct fuse_conn *fc);
/**
* Invalidate inode attributes
@@ -974,8 +975,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
void fuse_set_initialized(struct fuse_conn *fc);
-void fuse_unlock_inode(struct inode *inode);
-void fuse_lock_inode(struct inode *inode);
+void fuse_unlock_inode(struct inode *inode, bool locked);
+bool fuse_lock_inode(struct inode *inode);
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
size_t size, int flags);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a24df8861b40..db9e60b7eb69 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -208,7 +208,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_inode *fi = get_fuse_inode(inode);
bool is_wb = fc->writeback_cache;
loff_t oldsize;
- struct timespec old_mtime;
+ struct timespec64 old_mtime;
spin_lock(&fc->lock);
if ((attr_version != 0 && fi->attr_version > attr_version) ||
@@ -217,7 +217,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
return;
}
- old_mtime = timespec64_to_timespec(inode->i_mtime);
+ old_mtime = inode->i_mtime;
fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
@@ -237,7 +237,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
truncate_pagecache(inode, attr->size);
inval = true;
} else if (fc->auto_inval_data) {
- struct timespec new_mtime = {
+ struct timespec64 new_mtime = {
.tv_sec = attr->mtime,
.tv_nsec = attr->mtimensec,
};
@@ -246,7 +246,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
* Auto inval mode also checks and invalidates if mtime
* has changed.
*/
- if (!timespec_equal(&old_mtime, &new_mtime))
+ if (!timespec64_equal(&old_mtime, &new_mtime))
inval = true;
}
@@ -357,15 +357,21 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
return 0;
}
-void fuse_lock_inode(struct inode *inode)
+bool fuse_lock_inode(struct inode *inode)
{
- if (!get_fuse_conn(inode)->parallel_dirops)
+ bool locked = false;
+
+ if (!get_fuse_conn(inode)->parallel_dirops) {
mutex_lock(&get_fuse_inode(inode)->mutex);
+ locked = true;
+ }
+
+ return locked;
}
-void fuse_unlock_inode(struct inode *inode)
+void fuse_unlock_inode(struct inode *inode, bool locked)
{
- if (!get_fuse_conn(inode)->parallel_dirops)
+ if (locked)
mutex_unlock(&get_fuse_inode(inode)->mutex);
}
@@ -391,9 +397,6 @@ static void fuse_put_super(struct super_block *sb)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
- fuse_send_destroy(fc);
-
- fuse_abort_conn(fc, false);
mutex_lock(&fuse_mutex);
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
@@ -1210,16 +1213,25 @@ static struct dentry *fuse_mount(struct file_system_type *fs_type,
return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
}
-static void fuse_kill_sb_anon(struct super_block *sb)
+static void fuse_sb_destroy(struct super_block *sb)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
if (fc) {
+ fuse_send_destroy(fc);
+
+ fuse_abort_conn(fc, false);
+ fuse_wait_aborted(fc);
+
down_write(&fc->killsb);
fc->sb = NULL;
up_write(&fc->killsb);
}
+}
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+ fuse_sb_destroy(sb);
kill_anon_super(sb);
}
@@ -1242,14 +1254,7 @@ static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
static void fuse_kill_sb_blk(struct super_block *sb)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
-
- if (fc) {
- down_write(&fc->killsb);
- fc->sb = NULL;
- up_write(&fc->killsb);
- }
-
+ fuse_sb_destroy(sb);
kill_block_super(sb);
}
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index ad04a5741016..9a8772465a90 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -75,9 +75,10 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len)
if (!fd->bnode) {
if (!tree->root)
hfs_btree_inc_height(tree);
- fd->bnode = hfs_bnode_find(tree, tree->leaf_head);
- if (IS_ERR(fd->bnode))
- return PTR_ERR(fd->bnode);
+ node = hfs_bnode_find(tree, tree->leaf_head);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+ fd->bnode = node;
fd->record = -1;
}
new_node = NULL;
diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig
index 7cc8b4acf66a..a63371815aab 100644
--- a/fs/hfsplus/Kconfig
+++ b/fs/hfsplus/Kconfig
@@ -11,18 +11,3 @@ config HFSPLUS_FS
MacOS 8. It includes all Mac specific filesystem data such as
data forks and creator codes, but it also has several UNIX
style features such as file ownership and permissions.
-
-config HFSPLUS_FS_POSIX_ACL
- bool "HFS+ POSIX Access Control Lists"
- depends on HFSPLUS_FS
- select FS_POSIX_ACL
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- It needs to understand that POSIX ACLs are treated only under
- Linux. POSIX ACLs doesn't mean something under Mac OS X.
- Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
- which are part of the NFSv4 standard.
-
- If you don't know what Access Control Lists are, say N
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index f6a56542f8d7..9ed20e64b983 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -8,5 +8,3 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
-
-hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o
diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h
deleted file mode 100644
index 488c2b75cf41..000000000000
--- a/fs/hfsplus/acl.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/hfsplus/acl.h
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include <linux/posix_acl_xattr.h>
-
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
-
-/* posix_acl.c */
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type);
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type);
-extern int hfsplus_init_posix_acl(struct inode *, struct inode *);
-
-#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */
-#define hfsplus_get_posix_acl NULL
-#define hfsplus_set_posix_acl NULL
-
-static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 808f4d8c859c..ed8eacb34452 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -73,9 +73,10 @@ int hfs_brec_insert(struct hfs_find_data *fd, void *entry, int entry_len)
if (!fd->bnode) {
if (!tree->root)
hfs_btree_inc_height(tree);
- fd->bnode = hfs_bnode_find(tree, tree->leaf_head);
- if (IS_ERR(fd->bnode))
- return PTR_ERR(fd->bnode);
+ node = hfs_bnode_find(tree, tree->leaf_head);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+ fd->bnode = node;
fd->record = -1;
}
new_node = NULL;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index b5254378f011..f37662675c3a 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -18,7 +18,6 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static inline void hfsplus_instantiate(struct dentry *dentry,
struct inode *inode, u32 cnid)
@@ -78,13 +77,13 @@ again:
cpu_to_be32(HFSP_HARDLINK_TYPE) &&
entry.file.user_info.fdCreator ==
cpu_to_be32(HFSP_HFSPLUS_CREATOR) &&
+ HFSPLUS_SB(sb)->hidden_dir &&
(entry.file.create_date ==
HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
create_date ||
entry.file.create_date ==
HFSPLUS_I(d_inode(sb->s_root))->
- create_date) &&
- HFSPLUS_SB(sb)->hidden_dir) {
+ create_date)) {
struct qstr str;
char name[32];
@@ -455,7 +454,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
if (res)
goto out_err;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
@@ -496,7 +495,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
if (res)
goto failed_mknod;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
@@ -567,10 +566,6 @@ const struct inode_operations hfsplus_dir_inode_operations = {
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index e8770935ce6d..8e0f59767694 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -336,6 +336,9 @@ static int hfsplus_free_extents(struct super_block *sb,
int i;
int err = 0;
+ /* Mapping the allocation file may lock the extent tree */
+ WARN_ON(mutex_is_locked(&HFSPLUS_SB(sb)->ext_tree->tree_lock));
+
hfsplus_dump_extent(extent);
for (i = 0; i < 8; extent++, i++) {
count = be32_to_cpu(extent->block_count);
@@ -415,11 +418,13 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
if (res)
break;
start = be32_to_cpu(fd.key->ext.start_block);
- hfsplus_free_extents(sb, ext_entry,
- total_blocks - start,
- total_blocks);
hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
+ hfsplus_free_extents(sb, ext_entry, total_blocks - start,
+ total_blocks);
total_blocks = start;
+ mutex_lock(&fd.tree->tree_lock);
} while (total_blocks > blocks);
hfs_find_exit(&fd);
@@ -576,15 +581,20 @@ void hfsplus_file_truncate(struct inode *inode)
}
while (1) {
if (alloc_cnt == hip->first_blocks) {
+ mutex_unlock(&fd.tree->tree_lock);
hfsplus_free_extents(sb, hip->first_extents,
alloc_cnt, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->first_extents);
hip->first_blocks = blk_cnt;
+ mutex_lock(&fd.tree->tree_lock);
break;
}
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
if (res)
break;
+ hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
start = hip->cached_start;
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
@@ -596,7 +606,7 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt = start;
hip->cached_start = hip->cached_blocks = 0;
hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
- hfs_brec_remove(&fd);
+ mutex_lock(&fd.tree->tree_lock);
}
hfs_find_exit(&fd);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d9255abafb81..8e039435958a 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -31,7 +31,6 @@
#define DBG_EXTENT 0x00000020
#define DBG_BITMAP 0x00000040
#define DBG_ATTR_MOD 0x00000080
-#define DBG_ACL_MOD 0x00000100
#if 0
#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index c824f702feec..8e9427a42b81 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -21,7 +21,6 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_readpage(struct file *file, struct page *page)
{
@@ -267,12 +266,6 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
setattr_copy(inode, attr);
mark_inode_dirty(inode);
- if (attr->ia_valid & ATTR_MODE) {
- error = posix_acl_chmod(inode, inode->i_mode);
- if (unlikely(error))
- return error;
- }
-
return 0;
}
@@ -336,10 +329,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
deleted file mode 100644
index 066114dcc3a2..000000000000
--- a/fs/hfsplus/posix_acl.c
+++ /dev/null
@@ -1,144 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/fs/hfsplus/posix_acl.c
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include "hfsplus_fs.h"
-#include "xattr.h"
-#include "acl.h"
-
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
-{
- struct posix_acl *acl;
- char *xattr_name;
- char *value = NULL;
- ssize_t size;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- break;
- default:
- return ERR_PTR(-EINVAL);
- }
-
- size = __hfsplus_getxattr(inode, xattr_name, NULL, 0);
-
- if (size > 0) {
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return ERR_PTR(-ENOMEM);
- size = __hfsplus_getxattr(inode, xattr_name, value, size);
- }
-
- if (size > 0)
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- else if (size == -ENODATA)
- acl = NULL;
- else
- acl = ERR_PTR(size);
-
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- return acl;
-}
-
-static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type)
-{
- int err;
- char *xattr_name;
- size_t size = 0;
- char *value = NULL;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
-
- default:
- return -EINVAL;
- }
-
- if (acl) {
- size = posix_acl_xattr_size(acl->a_count);
- if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE))
- return -ENOMEM;
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return -ENOMEM;
- err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
- if (unlikely(err < 0))
- goto end_set_acl;
- }
-
- err = __hfsplus_setxattr(inode, xattr_name, value, size, 0);
-
-end_set_acl:
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- if (!err)
- set_cached_acl(inode, type, acl);
-
- return err;
-}
-
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type)
-{
- int err;
-
- if (type == ACL_TYPE_ACCESS && acl) {
- err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (err)
- return err;
- }
- return __hfsplus_set_posix_acl(inode, acl, type);
-}
-
-int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- int err = 0;
- struct posix_acl *default_acl, *acl;
-
- hfs_dbg(ACL_MOD,
- "[%s]: ino %lu, dir->ino %lu\n",
- __func__, inode->i_ino, dir->i_ino);
-
- if (S_ISLNK(inode->i_mode))
- return 0;
-
- err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
- if (err)
- return err;
-
- if (default_acl) {
- err = __hfsplus_set_posix_acl(inode, default_acl,
- ACL_TYPE_DEFAULT);
- posix_acl_release(default_acl);
- }
-
- if (acl) {
- if (!err)
- err = __hfsplus_set_posix_acl(inode, acl,
- ACL_TYPE_ACCESS);
- posix_acl_release(acl);
- }
- return err;
-}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a6c0f54c48c3..eb4535eba95d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -524,8 +524,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
- if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
+ if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) {
+ err = -EINVAL;
goto out_put_root;
+ }
inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -562,8 +564,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_hidden_dir;
}
- err = hfsplus_init_inode_security(sbi->hidden_dir,
- root, &str);
+ err = hfsplus_init_security(sbi->hidden_dir,
+ root, &str);
if (err == -EOPNOTSUPP)
err = 0; /* Operation is not supported. */
else if (err) {
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index dfa90c21948f..c8d1b2be7854 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -272,8 +272,8 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
return size;
}
-/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+/* Decomposes a non-Hangul unicode character. */
+static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
{
int off;
@@ -296,6 +296,51 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
return hfsplus_decompose_table + (off / 4);
}
+/*
+ * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
+ * precomposed Hangul, otherwise return the length of the decomposition.
+ *
+ * This function was adapted from sample code from the Unicode Standard
+ * Annex #15: Unicode Normalization Forms, version 3.2.0.
+ *
+ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
+ * under the Terms of Use in http://www.unicode.org/copyright.html.
+ */
+static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
+{
+ int index;
+ int l, v, t;
+
+ index = uc - Hangul_SBase;
+ if (index < 0 || index >= Hangul_SCount)
+ return 0;
+
+ l = Hangul_LBase + index / Hangul_NCount;
+ v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
+ t = Hangul_TBase + index % Hangul_TCount;
+
+ result[0] = l;
+ result[1] = v;
+ if (t != Hangul_TBase) {
+ result[2] = t;
+ return 3;
+ }
+ return 2;
+}
+
+/* Decomposes a single unicode character. */
+static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
+{
+ u16 *result;
+
+ /* Hangul is handled separately */
+ result = hangul_buffer;
+ *size = hfsplus_try_decompose_hangul(uc, result);
+ if (*size == 0)
+ result = hfsplus_decompose_nonhangul(uc, size);
+ return result;
+}
+
int hfsplus_asc2uni(struct super_block *sb,
struct hfsplus_unistr *ustr, int max_unistr_len,
const char *astr, int len)
@@ -303,13 +348,14 @@ int hfsplus_asc2uni(struct super_block *sb,
int size, dsize, decompose;
u16 *dstr, outlen = 0;
wchar_t c;
+ u16 dhangul[3];
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
while (outlen < max_unistr_len && len > 0) {
size = asc2unichar(sb, astr, len, &c);
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
@@ -344,6 +390,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
unsigned long hash;
wchar_t c;
u16 c2;
+ u16 dhangul[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -357,7 +404,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
len -= size;
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
@@ -396,6 +443,7 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
const char *astr1, *astr2;
u16 c1, c2;
wchar_t c;
+ u16 dhangul_1[3], dhangul_2[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -413,7 +461,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
len1 -= size;
if (decompose)
- dstr1 = decompose_unichar(c, &dsize1);
+ dstr1 = decompose_unichar(c, &dsize1,
+ dhangul_1);
if (!decompose || !dstr1) {
c1 = c;
dstr1 = &c1;
@@ -427,7 +476,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
len2 -= size;
if (decompose)
- dstr2 = decompose_unichar(c, &dsize2);
+ dstr2 = decompose_unichar(c, &dsize2,
+ dhangul_2);
if (!decompose || !dstr2) {
c2 = c;
dstr2 = &c2;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e538b758c448..d5403b4004c9 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -8,10 +8,8 @@
*/
#include "hfsplus_fs.h"
-#include <linux/posix_acl_xattr.h>
#include <linux/nls.h>
#include "xattr.h"
-#include "acl.h"
static int hfsplus_removexattr(struct inode *inode, const char *name);
@@ -19,10 +17,6 @@ const struct xattr_handler *hfsplus_xattr_handlers[] = {
&hfsplus_xattr_osx_handler,
&hfsplus_xattr_user_handler,
&hfsplus_xattr_trusted_handler,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
&hfsplus_xattr_security_handler,
NULL
};
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index a4e611d69710..d14e362b3eba 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -38,7 +38,4 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
int hfsplus_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr);
-int hfsplus_init_inode_security(struct inode *inode, struct inode *dir,
- const struct qstr *qstr);
-
#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index f5550b006e88..cfbe6a3bfb1e 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -12,7 +12,6 @@
#include "hfsplus_fs.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_security_getxattr(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
@@ -72,18 +71,6 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir,
&hfsplus_initxattrs, NULL);
}
-int hfsplus_init_inode_security(struct inode *inode,
- struct inode *dir,
- const struct qstr *qstr)
-{
- int err;
-
- err = hfsplus_init_posix_acl(inode, dir);
- if (!err)
- err = hfsplus_init_security(inode, dir, qstr);
- return err;
-}
-
const struct xattr_handler hfsplus_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = hfsplus_security_getxattr,
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 082b7c76dd0c..1aee39160ac5 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -565,7 +565,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
err = -EFSERROR;
goto end1;
}
- err = r == 2 ? -ENOSPC : r == 1 ? -EFSERROR : 0;
+ err = -ENOSPC;
goto end1;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 346a146c7617..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -410,7 +410,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
@@ -595,7 +594,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* allocation routines. If NUMA is configured, use page index
* as input to create an allocation policy.
*/
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
diff --git a/fs/inode.c b/fs/inode.c
index a06de4454232..42f6d25f32a5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1596,49 +1596,16 @@ sector_t bmap(struct inode *inode, sector_t block)
EXPORT_SYMBOL(bmap);
/*
- * Update times in overlayed inode from underlying real inode
- */
-static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
- bool rcu)
-{
- struct dentry *upperdentry;
-
- /*
- * Nothing to do if in rcu or if non-overlayfs
- */
- if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
- return;
-
- upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
-
- /*
- * If file is on lower then we can't update atime, so no worries about
- * stale mtime/ctime.
- */
- if (upperdentry) {
- struct inode *realinode = d_inode(upperdentry);
-
- if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
- !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
- inode->i_mtime = realinode->i_mtime;
- inode->i_ctime = realinode->i_ctime;
- }
- }
-}
-
-/*
* With relative atime, only update atime if the previous atime is
* earlier than either the ctime or mtime or if at least a day has
* passed since the last atime update.
*/
-static int relatime_need_update(const struct path *path, struct inode *inode,
- struct timespec now, bool rcu)
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+ struct timespec now)
{
- if (!(path->mnt->mnt_flags & MNT_RELATIME))
+ if (!(mnt->mnt_flags & MNT_RELATIME))
return 1;
-
- update_ovl_inode_times(path->dentry, inode, rcu);
/*
* Is mtime younger than atime? If yes, update atime:
*/
@@ -1709,8 +1676,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
* This function automatically handles read only file systems and media,
* as well as the "noatime" flag and inode specific "noatime" markers.
*/
-bool __atime_needs_update(const struct path *path, struct inode *inode,
- bool rcu)
+bool atime_needs_update(const struct path *path, struct inode *inode)
{
struct vfsmount *mnt = path->mnt;
struct timespec64 now;
@@ -1736,7 +1702,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
now = current_time(inode);
- if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu))
+ if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
return false;
if (timespec64_equal(&inode->i_atime, &now))
@@ -1751,7 +1717,7 @@ void touch_atime(const struct path *path)
struct inode *inode = d_inode(path->dentry);
struct timespec64 now;
- if (!__atime_needs_update(path, inode, false))
+ if (!atime_needs_update(path, inode))
return;
if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index 50a28fc71300..d410186bc369 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -82,10 +82,8 @@ extern void __init mnt_init(void);
extern int __mnt_want_write(struct vfsmount *);
extern int __mnt_want_write_file(struct file *);
-extern int mnt_want_write_file_path(struct file *);
extern void __mnt_drop_write(struct vfsmount *);
extern void __mnt_drop_write_file(struct file *);
-extern void mnt_drop_write_file_path(struct file *);
/*
* fs_struct.c
@@ -96,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
* file_table.c
*/
extern struct file *alloc_empty_file(int, const struct cred *);
+extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/*
* super.c
@@ -136,13 +135,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);
extern int dentry_needs_remove_privs(struct dentry *dentry);
-extern bool __atime_needs_update(const struct path *, struct inode *, bool);
-static inline bool atime_needs_update_rcu(const struct path *path,
- struct inode *inode)
-{
- return __atime_needs_update(path, inode, true);
-}
-
/*
* fs-writeback.c
*/
@@ -185,7 +177,6 @@ extern const struct dentry_operations ns_dentry_operations;
*/
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
unsigned long arg);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/*
* iomap support:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index b445b13fc59b..3212c29235ce 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
out:
return error;
}
+EXPORT_SYMBOL(vfs_ioctl);
static int ioctl_fibmap(struct file *filp, int __user *p)
{
diff --git a/fs/iomap.c b/fs/iomap.c
index 009071e73bc0..74762b1ec233 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -290,7 +290,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
sector_t sector;
if (iomap->type == IOMAP_INLINE) {
- WARN_ON_ONCE(poff);
+ WARN_ON_ONCE(pos);
iomap_read_inline_data(inode, page, iomap);
return PAGE_SIZE;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ec3fba7d492f..488a9e7f8f66 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -24,6 +24,7 @@
#include <linux/mpage.h>
#include <linux/user_namespace.h>
#include <linux/seq_file.h>
+#include <linux/blkdev.h>
#include "isofs.h"
#include "zisofs.h"
@@ -653,6 +654,12 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
/*
* What if bugger tells us to go beyond page size?
*/
+ if (bdev_logical_block_size(s->s_bdev) > 2048) {
+ printk(KERN_WARNING
+ "ISOFS: unsupported/invalid hardware sector size %d\n",
+ bdev_logical_block_size(s->s_bdev));
+ goto out_freesbi;
+ }
opt.blocksize = sb_min_blocksize(s, opt.blocksize);
sbi->s_high_sierra = 0; /* default is iso9660 */
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 96c1d14c18f1..c2a128678e6e 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -187,7 +187,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
continue;
if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
continue;
- if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
+ if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0)
continue;
/* Alright, we found a lock. Set the return status
* and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a2c0dfc6fdc0..d20b92f271c2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -128,7 +128,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
char *nodename = req->a_host->h_rpcclnt->cl_nodename;
nlmclnt_next_cookie(&argp->cookie);
- memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
+ memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh));
lock->caller = nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3701bccab478..74330daeab71 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -405,8 +405,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
__be32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
- file_inode(file->f_file)->i_sb->s_id,
- file_inode(file->f_file)->i_ino,
+ locks_inode(file->f_file)->i_sb->s_id,
+ locks_inode(file->f_file)->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
@@ -511,8 +511,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
__be32 ret;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
- file_inode(file->f_file)->i_sb->s_id,
- file_inode(file->f_file)->i_ino,
+ locks_inode(file->f_file)->i_sb->s_id,
+ locks_inode(file->f_file)->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -566,8 +566,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
int error;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
- file_inode(file->f_file)->i_sb->s_id,
- file_inode(file->f_file)->i_ino,
+ locks_inode(file->f_file)->i_sb->s_id,
+ locks_inode(file->f_file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
@@ -595,8 +595,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
int status = 0;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
- file_inode(file->f_file)->i_sb->s_id,
- file_inode(file->f_file)->i_ino,
+ locks_inode(file->f_file)->i_sb->s_id,
+ locks_inode(file->f_file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 4ec3d6e03e76..899360ba3b84 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -44,7 +44,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
{
- struct inode *inode = file_inode(file->f_file);
+ struct inode *inode = locks_inode(file->f_file);
dprintk("lockd: %s %s/%ld\n",
msg, inode->i_sb->s_id, inode->i_ino);
@@ -414,7 +414,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
{
struct super_block *sb = datap;
- return sb == file_inode(file->f_file)->i_sb;
+ return sb == locks_inode(file->f_file)->i_sb;
}
/**
diff --git a/fs/locks.c b/fs/locks.c
index bc047a7edc47..2ecb4db8c840 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,11 +139,6 @@
#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
-static inline bool is_remote_lock(struct file *filp)
-{
- return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
-}
-
static bool lease_breaking(struct file_lock *fl)
{
return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -542,7 +537,7 @@ lease_setup(struct file_lock *fl, void **priv)
if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
*priv = NULL;
- __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+ __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
}
static const struct lock_manager_operations lease_manager_ops = {
@@ -1651,8 +1646,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
if (flags & FL_LAYOUT)
return 0;
- if ((arg == F_RDLCK) &&
- (atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
+ if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
return -EAGAIN;
if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@ -1873,7 +1867,7 @@ EXPORT_SYMBOL(generic_setlease);
int
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
{
- if (filp->f_op->setlease && is_remote_lock(filp))
+ if (filp->f_op->setlease)
return filp->f_op->setlease(filp, arg, lease, priv);
else
return generic_setlease(filp, arg, lease, priv);
@@ -2020,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
if (error)
goto out_free;
- if (f.file->f_op->flock && is_remote_lock(f.file))
+ if (f.file->f_op->flock)
error = f.file->f_op->flock(f.file,
(can_sleep) ? F_SETLKW : F_SETLK,
lock);
@@ -2046,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
return 0;
@@ -2196,7 +2190,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
return posix_lock_file(filp, fl, conf);
@@ -2518,7 +2512,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
if (list_empty(&flctx->flc_flock))
return;
- if (filp->f_op->flock && is_remote_lock(filp))
+ if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl);
else
flock_lock_inode(inode, &fl);
@@ -2605,7 +2599,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
- if (filp->f_op->lock && is_remote_lock(filp))
+ if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index 3cd396277cd3..0cab6494978c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -887,6 +887,8 @@ static inline void put_link(struct nameidata *nd)
int sysctl_protected_symlinks __read_mostly = 0;
int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
/**
* may_follow_link - Check symlink following for unsafe situations
@@ -1003,6 +1005,45 @@ static int may_linkat(struct path *link)
return -EPERM;
}
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ * should be allowed, or not, on files that already
+ * exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ * - the file already exists
+ * - we are in a sticky directory
+ * - we don't own the file
+ * - the owner of the directory doesn't own the file
+ * - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+ struct inode * const inode)
+{
+ if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+ (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+ likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+ uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+ uid_eq(current_fsuid(), inode->i_uid))
+ return 0;
+
+ if (likely(dir->d_inode->i_mode & 0002) ||
+ (dir->d_inode->i_mode & 0020 &&
+ ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+ (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+ return -EACCES;
+ }
+ return 0;
+}
+
static __always_inline
const char *get_link(struct nameidata *nd)
{
@@ -1015,7 +1056,7 @@ const char *get_link(struct nameidata *nd)
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
- } else if (atime_needs_update_rcu(&last->link, inode)) {
+ } else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
@@ -3348,9 +3389,15 @@ finish_open:
if (error)
return error;
audit_inode(nd->name, nd->path.dentry, 0);
- error = -EISDIR;
- if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
- goto out;
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ if (d_is_dir(nd->path.dentry))
+ goto out;
+ error = may_create_in_sticky(dir,
+ d_backing_inode(nd->path.dentry));
+ if (unlikely(error))
+ goto out;
+ }
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
diff --git a/fs/namespace.c b/fs/namespace.c
index bd2f4c68506a..99186556f8d3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -61,9 +61,6 @@ __setup("mphash_entries=", set_mphash_entries);
static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
-static DEFINE_SPINLOCK(mnt_id_lock);
-static int mnt_id_start = 0;
-static int mnt_group_start = 1;
static struct hlist_head *mount_hashtable __read_mostly;
static struct hlist_head *mountpoint_hashtable __read_mostly;
@@ -101,50 +98,30 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
static int mnt_alloc_id(struct mount *mnt)
{
- int res;
+ int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
-retry:
- ida_pre_get(&mnt_id_ida, GFP_KERNEL);
- spin_lock(&mnt_id_lock);
- res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
- if (!res)
- mnt_id_start = mnt->mnt_id + 1;
- spin_unlock(&mnt_id_lock);
- if (res == -EAGAIN)
- goto retry;
-
- return res;
+ if (res < 0)
+ return res;
+ mnt->mnt_id = res;
+ return 0;
}
static void mnt_free_id(struct mount *mnt)
{
- int id = mnt->mnt_id;
- spin_lock(&mnt_id_lock);
- ida_remove(&mnt_id_ida, id);
- if (mnt_id_start > id)
- mnt_id_start = id;
- spin_unlock(&mnt_id_lock);
+ ida_free(&mnt_id_ida, mnt->mnt_id);
}
/*
* Allocate a new peer group ID
- *
- * mnt_group_ida is protected by namespace_sem
*/
static int mnt_alloc_group_id(struct mount *mnt)
{
- int res;
+ int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
- if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
- return -ENOMEM;
-
- res = ida_get_new_above(&mnt_group_ida,
- mnt_group_start,
- &mnt->mnt_group_id);
- if (!res)
- mnt_group_start = mnt->mnt_group_id + 1;
-
- return res;
+ if (res < 0)
+ return res;
+ mnt->mnt_group_id = res;
+ return 0;
}
/*
@@ -152,10 +129,7 @@ static int mnt_alloc_group_id(struct mount *mnt)
*/
void mnt_release_group_id(struct mount *mnt)
{
- int id = mnt->mnt_group_id;
- ida_remove(&mnt_group_ida, id);
- if (mnt_group_start > id)
- mnt_group_start = id;
+ ida_free(&mnt_group_ida, mnt->mnt_group_id);
mnt->mnt_group_id = 0;
}
@@ -431,74 +405,20 @@ int __mnt_want_write_file(struct file *file)
}
/**
- * mnt_want_write_file_path - get write access to a file's mount
- * @file: the file who's mount on which to take a write
- *
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
- *
- * Called by the vfs for cases when we have an open file at hand, but will do an
- * inode operation on it (important distinction for files opened on overlayfs,
- * since the file operations will come from the real underlying file, while
- * inode operations come from the overlay).
- */
-int mnt_want_write_file_path(struct file *file)
-{
- int ret;
-
- sb_start_write(file->f_path.mnt->mnt_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file->f_path.mnt->mnt_sb);
- return ret;
-}
-
-static inline int may_write_real(struct file *file)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct dentry *upperdentry;
-
- /* Writable file? */
- if (file->f_mode & FMODE_WRITER)
- return 0;
-
- /* Not overlayfs? */
- if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
- return 0;
-
- /* File refers to upper, writable layer? */
- upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
- if (upperdentry &&
- (file_inode(file) == d_inode(upperdentry) ||
- file_inode(file) == d_inode(dentry)))
- return 0;
-
- /* Lower layer: can't write to real file, sorry... */
- return -EPERM;
-}
-
-/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
* This is like mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
- *
- * Mostly called by filesystems from their ioctl operation before performing
- * modification. On overlayfs this needs to check if the file is on a read-only
- * lower layer and deny access in that case.
*/
int mnt_want_write_file(struct file *file)
{
int ret;
- ret = may_write_real(file);
- if (!ret) {
- sb_start_write(file_inode(file)->i_sb);
- ret = __mnt_want_write_file(file);
- if (ret)
- sb_end_write(file_inode(file)->i_sb);
- }
+ sb_start_write(file_inode(file)->i_sb);
+ ret = __mnt_want_write_file(file);
+ if (ret)
+ sb_end_write(file_inode(file)->i_sb);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write_file);
@@ -538,14 +458,9 @@ void __mnt_drop_write_file(struct file *file)
__mnt_drop_write(file->f_path.mnt);
}
-void mnt_drop_write_file_path(struct file *file)
-{
- mnt_drop_write(file->f_path.mnt);
-}
-
void mnt_drop_write_file(struct file *file)
{
- __mnt_drop_write(file->f_path.mnt);
+ __mnt_drop_write_file(file);
sb_end_write(file_inode(file)->i_sb);
}
EXPORT_SYMBOL(mnt_drop_write_file);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7cb5c38c19e4..06cb0c1d9aee 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -753,6 +753,7 @@ out:
case -ENODEV:
/* Our extent block devices are unavailable */
set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
+ /* Fall through */
case 0:
return lseg;
default:
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a7efd83779d2..dec5880ac6de 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -204,7 +204,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
chunk = div_u64(offset, dev->chunk_size);
div_u64_rem(chunk, dev->nr_children, &chunk_idx);
- if (chunk_idx > dev->nr_children) {
+ if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
__func__, chunk_idx, offset, dev->chunk_size);
/* error, should not happen */
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index a20a0bce40a4..8f34daf85f70 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -184,6 +184,18 @@ struct cb_notify_lock_args {
extern __be32 nfs4_callback_notify_lock(void *argp, void *resp,
struct cb_process_state *cps);
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+struct cb_offloadargs {
+ struct nfs_fh coa_fh;
+ nfs4_stateid coa_stateid;
+ uint32_t error;
+ uint64_t wr_count;
+ struct nfs_writeverf wr_writeverf;
+};
+
+extern __be32 nfs4_callback_offload(void *args, void *dummy,
+ struct cb_process_state *cps);
+#endif /* CONFIG_NFS_V4_2 */
extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
extern __be32 nfs4_callback_getattr(void *argp, void *resp,
struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 64c214fb9da6..fa515d5ea5ba 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -215,9 +215,9 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
{
u32 oldseq, newseq;
- /* Is the stateid still not initialised? */
+ /* Is the stateid not initialised? */
if (!pnfs_layout_is_valid(lo))
- return NFS4ERR_DELAY;
+ return NFS4ERR_NOMATCHING_LAYOUT;
/* Mismatched stateid? */
if (!nfs4_stateid_match_other(&lo->plh_stateid, new))
@@ -273,7 +273,6 @@ static u32 initiate_file_draining(struct nfs_client *clp,
rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid);
if (rv != NFS_OK)
goto unlock;
- pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
/*
* Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
@@ -283,19 +282,23 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto unlock;
}
- if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range,
be32_to_cpu(args->cbl_stateid.seqid))) {
+ case 0:
+ case -EBUSY:
+ /* There are layout segments that need to be returned */
rv = NFS4_OK;
- goto unlock;
- }
-
- /* Embrace your forgetfulness! */
- rv = NFS4ERR_NOMATCHING_LAYOUT;
+ break;
+ case -ENOENT:
+ /* Embrace your forgetfulness! */
+ rv = NFS4ERR_NOMATCHING_LAYOUT;
- if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
- NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
- &args->cbl_range);
+ if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
+ NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
+ &args->cbl_range);
+ }
}
unlock:
spin_unlock(&ino->i_lock);
@@ -328,8 +331,6 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
- write_seqcount_begin(&clp->cl_callback_count);
- write_seqcount_end(&clp->cl_callback_count);
if (args->cbl_recall_type == RETURN_FILE)
return initiate_file_draining(clp, args);
return initiate_bulk_draining(clp, args);
@@ -441,11 +442,14 @@ validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
* a match. If the slot is in use and the sequence numbers match, the
* client is still waiting for a response to the original request.
*/
-static bool referring_call_exists(struct nfs_client *clp,
+static int referring_call_exists(struct nfs_client *clp,
uint32_t nrclists,
- struct referring_call_list *rclists)
+ struct referring_call_list *rclists,
+ spinlock_t *lock)
+ __releases(lock)
+ __acquires(lock)
{
- bool status = false;
+ int status = 0;
int i, j;
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
@@ -468,8 +472,10 @@ static bool referring_call_exists(struct nfs_client *clp,
for (j = 0; j < rclist->rcl_nrefcalls; j++) {
ref = &rclist->rcl_refcalls[j];
+ spin_unlock(lock);
status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
ref->rc_sequenceid, HZ >> 1) < 0;
+ spin_lock(lock);
if (status)
goto out;
}
@@ -546,7 +552,8 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
* related callback was received before the response to the original
* call.
*/
- if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
+ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists,
+ &tbl->slot_tbl_lock) < 0) {
status = htonl(NFS4ERR_DELAY);
goto out_unlock;
}
@@ -660,3 +667,57 @@ __be32 nfs4_callback_notify_lock(void *argp, void *resp,
return htonl(NFS4_OK);
}
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+static void nfs4_copy_cb_args(struct nfs4_copy_state *cp_state,
+ struct cb_offloadargs *args)
+{
+ cp_state->count = args->wr_count;
+ cp_state->error = args->error;
+ if (!args->error) {
+ cp_state->verf.committed = args->wr_writeverf.committed;
+ memcpy(&cp_state->verf.verifier.data[0],
+ &args->wr_writeverf.verifier.data[0],
+ NFS4_VERIFIER_SIZE);
+ }
+}
+
+__be32 nfs4_callback_offload(void *data, void *dummy,
+ struct cb_process_state *cps)
+{
+ struct cb_offloadargs *args = data;
+ struct nfs_server *server;
+ struct nfs4_copy_state *copy;
+ bool found = false;
+
+ spin_lock(&cps->clp->cl_lock);
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
+ client_link) {
+ list_for_each_entry(copy, &server->ss_copies, copies) {
+ if (memcmp(args->coa_stateid.other,
+ copy->stateid.other,
+ sizeof(args->coa_stateid.other)))
+ continue;
+ nfs4_copy_cb_args(copy, args);
+ complete(&copy->completion);
+ found = true;
+ goto out;
+ }
+ }
+out:
+ rcu_read_unlock();
+ if (!found) {
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ if (!copy) {
+ spin_unlock(&cps->clp->cl_lock);
+ return htonl(NFS4ERR_SERVERFAULT);
+ }
+ memcpy(&copy->stateid, &args->coa_stateid, NFS4_STATEID_SIZE);
+ nfs4_copy_cb_args(copy, args);
+ list_add_tail(&copy->copies, &cps->clp->pending_cb_stateids);
+ }
+ spin_unlock(&cps->clp->cl_lock);
+
+ return 0;
+}
+#endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a813979b5be0..a87a56273407 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -38,6 +38,9 @@
#define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_NOTIFY_LOCK_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+#define CB_OP_OFFLOAD_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
+#endif /* CONFIG_NFS_V4_2 */
#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -527,7 +530,72 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp,
}
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+static __be32 decode_write_response(struct xdr_stream *xdr,
+ struct cb_offloadargs *args)
+{
+ __be32 *p;
+
+ /* skip the always zero field */
+ p = read_buf(xdr, 4);
+ if (unlikely(!p))
+ goto out;
+ p++;
+
+ /* decode count, stable_how, verifier */
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (unlikely(!p))
+ goto out;
+ p = xdr_decode_hyper(p, &args->wr_count);
+ args->wr_writeverf.committed = be32_to_cpup(p);
+ p = xdr_inline_decode(xdr, NFS4_VERIFIER_SIZE);
+ if (likely(p)) {
+ memcpy(&args->wr_writeverf.verifier.data[0], p,
+ NFS4_VERIFIER_SIZE);
+ return 0;
+ }
+out:
+ return htonl(NFS4ERR_RESOURCE);
+}
+
+static __be32 decode_offload_args(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct cb_offloadargs *args = data;
+ __be32 *p;
+ __be32 status;
+
+ /* decode fh */
+ status = decode_fh(xdr, &args->coa_fh);
+ if (unlikely(status != 0))
+ return status;
+ /* decode stateid */
+ status = decode_stateid(xdr, &args->coa_stateid);
+ if (unlikely(status != 0))
+ return status;
+
+ /* decode status */
+ p = read_buf(xdr, 4);
+ if (unlikely(!p))
+ goto out;
+ args->error = ntohl(*p++);
+ if (!args->error) {
+ status = decode_write_response(xdr, args);
+ if (unlikely(status != 0))
+ return status;
+ } else {
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out;
+ p = xdr_decode_hyper(p, &args->wr_count);
+ }
+ return 0;
+out:
+ return htonl(NFS4ERR_RESOURCE);
+}
+#endif /* CONFIG_NFS_V4_2 */
static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
{
if (unlikely(xdr_stream_encode_opaque(xdr, str, len) < 0))
@@ -773,7 +841,10 @@ preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op)
if (status != htonl(NFS4ERR_OP_ILLEGAL))
return status;
- if (op_nr == OP_CB_OFFLOAD)
+ if (op_nr == OP_CB_OFFLOAD) {
+ *op = &callback_ops[op_nr];
+ return htonl(NFS_OK);
+ } else
return htonl(NFS4ERR_NOTSUPP);
return htonl(NFS4ERR_OP_ILLEGAL);
}
@@ -883,16 +954,21 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
- if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
+ if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
+ if (cps.clp)
+ nfs_put_client(cps.clp);
goto out_invalidcred;
+ }
}
cps.minorversion = hdr_arg.minorversion;
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
- if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
+ if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) {
+ if (cps.clp)
+ nfs_put_client(cps.clp);
return rpc_system_err;
-
+ }
while (status == 0 && nops != hdr_arg.nops) {
status = process_op(nops, rqstp, &xdr_in,
rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
@@ -969,6 +1045,13 @@ static struct callback_op callback_ops[] = {
.res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ,
},
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+ [OP_CB_OFFLOAD] = {
+ .process_op = nfs4_callback_offload,
+ .decode_args = decode_offload_args,
+ .res_maxsize = CB_OP_OFFLOAD_RES_MAXSZ,
+ },
+#endif /* CONFIG_NFS_V4_2 */
};
/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 377a61654a88..96d5f8135eb9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -886,6 +886,7 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->delegations);
INIT_LIST_HEAD(&server->layouts);
INIT_LIST_HEAD(&server->state_owners_lru);
+ INIT_LIST_HEAD(&server->ss_copies);
atomic_set(&server->active, 0);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d7f158c3efc8..8bfaa658b2c1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -904,23 +904,29 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
filp, offset, whence);
- inode_lock(inode);
switch (whence) {
- case 1:
- offset += filp->f_pos;
- case 0:
- if (offset >= 0)
- break;
- default:
- offset = -EINVAL;
- goto out;
+ default:
+ return -EINVAL;
+ case SEEK_SET:
+ if (offset < 0)
+ return -EINVAL;
+ inode_lock(inode);
+ break;
+ case SEEK_CUR:
+ if (offset == 0)
+ return filp->f_pos;
+ inode_lock(inode);
+ offset += filp->f_pos;
+ if (offset < 0) {
+ inode_unlock(inode);
+ return -EINVAL;
+ }
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
dir_ctx->dir_cookie = 0;
dir_ctx->duped = 0;
}
-out:
inode_unlock(inode);
return offset;
}
@@ -1032,7 +1038,7 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
if (flags & LOOKUP_REVAL)
goto out_force;
out:
- return (inode->i_nlink == 0) ? -ENOENT : 0;
+ return (inode->i_nlink == 0) ? -ESTALE : 0;
out_force:
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -2499,7 +2505,9 @@ static int nfs_execute_ok(struct inode *inode, int mask)
struct nfs_server *server = NFS_SERVER(inode);
int ret = 0;
- if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) {
+ if (S_ISDIR(inode->i_mode))
+ return 0;
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 621c517b325c..aa12c3063bae 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -758,7 +758,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
{
- schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
+ queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
}
static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 81cca49a8375..29553fdba8af 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -532,13 +532,13 @@ const struct address_space_operations nfs_file_aops = {
* writable, implying that someone is about to modify the page through a
* shared-writable mapping
*/
-static int nfs_vm_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
- int ret = VM_FAULT_NOPAGE;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 8f003792ccde..cae43333ef16 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -812,7 +812,6 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req,
bool strict_iomode)
{
-retry_strict:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -825,16 +824,6 @@ retry_strict:
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
}
-
- /* If we don't have checking, do get a IOMODE_RW
- * segment, and the server wants to avoid READs
- * there, then retry!
- */
- if (pgio->pg_lseg && !strict_iomode &&
- ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
- strict_iomode = true;
- goto retry_strict;
- }
}
static void
@@ -849,14 +838,16 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
retry:
pnfs_generic_pg_check_layout(pgio);
/* Use full layout for now */
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
ff_layout_pg_get_read(pgio, req, false);
- else if (ff_layout_avoid_read_on_rw(pgio->pg_lseg))
+ if (!pgio->pg_lseg)
+ goto out_nolseg;
+ }
+ if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
ff_layout_pg_get_read(pgio, req, true);
-
- /* If no lseg, fall back to read through mds */
- if (pgio->pg_lseg == NULL)
- goto out_mds;
+ if (!pgio->pg_lseg)
+ goto out_nolseg;
+ }
ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
if (!ds) {
@@ -878,6 +869,9 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
return;
+out_nolseg:
+ if (pgio->pg_error < 0)
+ return;
out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -1323,6 +1317,7 @@ static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
hdr->res.count);
+ set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
}
static int ff_layout_read_prepare_common(struct rpc_task *task,
@@ -1507,6 +1502,7 @@ static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count, hdr->res.count,
hdr->res.verf->committed);
+ set_bit(NFS_LSEG_LAYOUTRETURN, &hdr->lseg->pls_flags);
}
static int ff_layout_write_prepare_common(struct rpc_task *task,
@@ -1615,6 +1611,7 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
nfs4_ff_layout_stat_io_end_write(task,
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
count, count, NFS_FILE_SYNC);
+ set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
}
static void ff_layout_commit_prepare_common(struct rpc_task *task,
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7173a4ee862c..9fce18548f7e 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -108,6 +108,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
case -EPROTONOSUPPORT:
dprintk("NFS_V3_ACL extension not supported; disabling\n");
server->caps &= ~NFS_CAP_ACLS;
+ /* fall through */
case -ENOTSUPP:
status = -EOPNOTSUPP;
default:
@@ -229,6 +230,7 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS_V3_ACL SETACL RPC not supported"
"(will not retry)\n");
server->caps &= ~NFS_CAP_ACLS;
+ /* fall through */
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 5f59b6f65a42..ac5b784a1de0 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -17,6 +17,7 @@
#include "internal.h"
#define NFSDBG_FACILITY NFSDBG_PROC
+static int nfs42_do_offload_cancel_async(struct file *dst, nfs4_stateid *std);
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
struct nfs_lock_context *lock, loff_t offset, loff_t len)
@@ -130,6 +131,91 @@ out_unlock:
return err;
}
+static int handle_async_copy(struct nfs42_copy_res *res,
+ struct nfs_server *server,
+ struct file *src,
+ struct file *dst,
+ nfs4_stateid *src_stateid)
+{
+ struct nfs4_copy_state *copy;
+ int status = NFS4_OK;
+ bool found_pending = false;
+ struct nfs_open_context *ctx = nfs_file_open_context(dst);
+
+ spin_lock(&server->nfs_client->cl_lock);
+ list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
+ copies) {
+ if (memcmp(&res->write_res.stateid, &copy->stateid,
+ NFS4_STATEID_SIZE))
+ continue;
+ found_pending = true;
+ list_del(&copy->copies);
+ break;
+ }
+ if (found_pending) {
+ spin_unlock(&server->nfs_client->cl_lock);
+ goto out;
+ }
+
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ if (!copy) {
+ spin_unlock(&server->nfs_client->cl_lock);
+ return -ENOMEM;
+ }
+ memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
+ init_completion(&copy->completion);
+ copy->parent_state = ctx->state;
+
+ list_add_tail(&copy->copies, &server->ss_copies);
+ spin_unlock(&server->nfs_client->cl_lock);
+
+ status = wait_for_completion_interruptible(&copy->completion);
+ spin_lock(&server->nfs_client->cl_lock);
+ list_del_init(&copy->copies);
+ spin_unlock(&server->nfs_client->cl_lock);
+ if (status == -ERESTARTSYS) {
+ goto out_cancel;
+ } else if (copy->flags) {
+ status = -EAGAIN;
+ goto out_cancel;
+ }
+out:
+ res->write_res.count = copy->count;
+ memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
+ status = -copy->error;
+
+ kfree(copy);
+ return status;
+out_cancel:
+ nfs42_do_offload_cancel_async(dst, &copy->stateid);
+ kfree(copy);
+ return status;
+}
+
+static int process_copy_commit(struct file *dst, loff_t pos_dst,
+ struct nfs42_copy_res *res)
+{
+ struct nfs_commitres cres;
+ int status = -ENOMEM;
+
+ cres.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+ if (!cres.verf)
+ goto out;
+
+ status = nfs4_proc_commit(dst, pos_dst, res->write_res.count, &cres);
+ if (status)
+ goto out_free;
+ if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+ &cres.verf->verifier)) {
+ dprintk("commit verf differs from copy verf\n");
+ status = -EAGAIN;
+ }
+out_free:
+ kfree(cres.verf);
+out:
+ return status;
+}
+
static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
struct file *dst,
@@ -168,9 +254,16 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (status)
return status;
- res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
- if (!res->commit_res.verf)
- return -ENOMEM;
+ res->commit_res.verf = NULL;
+ if (args->sync) {
+ res->commit_res.verf =
+ kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+ if (!res->commit_res.verf)
+ return -ENOMEM;
+ }
+ set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+ &dst_lock->open_context->state->flags);
+
status = nfs4_call_sync(server->client, server, &msg,
&args->seq_args, &res->seq_res, 0);
if (status == -ENOTSUPP)
@@ -178,18 +271,34 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (status)
goto out;
- if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+ if (args->sync &&
+ nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
&res->commit_res.verf->verifier)) {
status = -EAGAIN;
goto out;
}
+ if (!res->synchronous) {
+ status = handle_async_copy(res, server, src, dst,
+ &args->src_stateid);
+ if (status)
+ return status;
+ }
+
+ if ((!res->synchronous || !args->sync) &&
+ res->write_res.verifier.committed != NFS_FILE_SYNC) {
+ status = process_copy_commit(dst, pos_dst, res);
+ if (status)
+ return status;
+ }
+
truncate_pagecache_range(dst_inode, pos_dst,
pos_dst + res->write_res.count);
status = res->write_res.count;
out:
- kfree(res->commit_res.verf);
+ if (args->sync)
+ kfree(res->commit_res.verf);
return status;
}
@@ -206,6 +315,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
.dst_fh = NFS_FH(file_inode(dst)),
.dst_pos = pos_dst,
.count = count,
+ .sync = false,
};
struct nfs42_copy_res res;
struct nfs4_exception src_exception = {
@@ -247,7 +357,11 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (err == -ENOTSUPP) {
err = -EOPNOTSUPP;
break;
- } if (err == -EAGAIN) {
+ } else if (err == -EAGAIN) {
+ dst_exception.retry = 1;
+ continue;
+ } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) {
+ args.sync = true;
dst_exception.retry = 1;
continue;
}
@@ -264,6 +378,89 @@ out_put_src_lock:
return err;
}
+struct nfs42_offloadcancel_data {
+ struct nfs_server *seq_server;
+ struct nfs42_offload_status_args args;
+ struct nfs42_offload_status_res res;
+};
+
+static void nfs42_offload_cancel_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offloadcancel_data *data = calldata;
+
+ nfs4_setup_sequence(data->seq_server->nfs_client,
+ &data->args.osa_seq_args,
+ &data->res.osr_seq_res, task);
+}
+
+static void nfs42_offload_cancel_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs42_offloadcancel_data *data = calldata;
+
+ nfs41_sequence_done(task, &data->res.osr_seq_res);
+ if (task->tk_status &&
+ nfs4_async_handle_error(task, data->seq_server, NULL,
+ NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+}
+
+static void nfs42_free_offloadcancel_data(void *data)
+{
+ kfree(data);
+}
+
+static const struct rpc_call_ops nfs42_offload_cancel_ops = {
+ .rpc_call_prepare = nfs42_offload_cancel_prepare,
+ .rpc_call_done = nfs42_offload_cancel_done,
+ .rpc_release = nfs42_free_offloadcancel_data,
+};
+
+static int nfs42_do_offload_cancel_async(struct file *dst,
+ nfs4_stateid *stateid)
+{
+ struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
+ struct nfs42_offloadcancel_data *data = NULL;
+ struct nfs_open_context *ctx = nfs_file_open_context(dst);
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OFFLOAD_CANCEL],
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = dst_server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs42_offload_cancel_ops,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC,
+ };
+ int status;
+
+ if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
+ return -EOPNOTSUPP;
+
+ data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_NOFS);
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->seq_server = dst_server;
+ data->args.osa_src_fh = NFS_FH(file_inode(dst));
+ memcpy(&data->args.osa_stateid, stateid,
+ sizeof(data->args.osa_stateid));
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ task_setup_data.callback_data = data;
+ nfs4_init_sequence(&data->args.osa_seq_args, &data->res.osr_seq_res,
+ 1, 0);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = rpc_wait_for_completion_task(task);
+ if (status == -ENOTSUPP)
+ dst_server->caps &= ~NFS_CAP_OFFLOAD_CANCEL;
+ rpc_put_task(task);
+ return status;
+}
+
static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence)
{
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5966e1e7b1f5..69f72ed2bf87 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -26,6 +26,9 @@
NFS42_WRITE_RES_SIZE + \
1 /* cr_consecutive */ + \
1 /* cr_synchronous */)
+#define encode_offload_cancel_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_cancel_maxsz (op_decode_hdr_maxsz)
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
@@ -75,6 +78,12 @@
decode_putfh_maxsz + \
decode_copy_maxsz + \
decode_commit_maxsz)
+#define NFS4_enc_offload_cancel_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_offload_cancel_maxsz)
+#define NFS4_dec_offload_cancel_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_offload_cancel_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_deallocate_maxsz + \
@@ -141,10 +150,18 @@ static void encode_copy(struct xdr_stream *xdr,
encode_uint64(xdr, args->count);
encode_uint32(xdr, 1); /* consecutive = true */
- encode_uint32(xdr, 1); /* synchronous = true */
+ encode_uint32(xdr, args->sync);
encode_uint32(xdr, 0); /* src server list */
}
+static void encode_offload_cancel(struct xdr_stream *xdr,
+ const struct nfs42_offload_status_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_OFFLOAD_CANCEL, decode_offload_cancel_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
static void encode_deallocate(struct xdr_stream *xdr,
const struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@@ -256,7 +273,27 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dst_fh, &hdr);
encode_copy(xdr, args, &hdr);
- encode_copy_commit(xdr, args, &hdr);
+ if (args->sync)
+ encode_copy_commit(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode OFFLOAD_CANEL request
+ */
+static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_offload_status_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->osa_seq_args, &hdr);
+ encode_putfh(xdr, args->osa_src_fh, &hdr);
+ encode_offload_cancel(xdr, args, &hdr);
encode_nops(&hdr);
}
@@ -353,21 +390,23 @@ static int decode_write_response(struct xdr_stream *xdr,
struct nfs42_write_res *res)
{
__be32 *p;
+ int status, count;
- p = xdr_inline_decode(xdr, 4 + 8 + 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
-
- /*
- * We never use asynchronous mode, so warn if a server returns
- * a stateid.
- */
- if (unlikely(*p != 0)) {
- pr_err_once("%s: server has set unrequested "
- "asynchronous mode\n", __func__);
+ count = be32_to_cpup(p);
+ if (count > 1)
return -EREMOTEIO;
+ else if (count == 1) {
+ status = decode_opaque_fixed(xdr, &res->stateid,
+ NFS4_STATEID_SIZE);
+ if (unlikely(status))
+ goto out_overflow;
}
- p++;
+ p = xdr_inline_decode(xdr, 8 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
p = xdr_decode_hyper(p, &res->count);
res->verifier.committed = be32_to_cpup(p);
return decode_verifier(xdr, &res->verifier.verifier);
@@ -413,6 +452,12 @@ static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
return decode_copy_requirements(xdr, res);
}
+static int decode_offload_cancel(struct xdr_stream *xdr,
+ struct nfs42_offload_status_res *res)
+{
+ return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
+}
+
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -507,7 +552,34 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
status = decode_copy(xdr, res);
if (status)
goto out;
- status = decode_commit(xdr, &res->commit_res);
+ if (res->commit_res.verf)
+ status = decode_commit(xdr, &res->commit_res);
+out:
+ return status;
+}
+
+/*
+ * Decode OFFLOAD_CANCEL response
+ */
+static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_offload_status_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_offload_cancel(xdr, res);
+
out:
return status;
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 51beb6e38c90..3a6904173214 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -163,6 +163,9 @@ enum {
NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */
NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */
+#ifdef CONFIG_NFS_V4_2
+ NFS_CLNT_DST_SSC_COPY_STATE, /* dst server open state on client*/
+#endif /* CONFIG_NFS_V4_2 */
};
struct nfs4_state {
@@ -273,6 +276,9 @@ int nfs4_replace_transport(struct nfs_server *server,
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
+extern int nfs4_async_handle_error(struct rpc_task *task,
+ struct nfs_server *server,
+ struct nfs4_state *state, long *timeout);
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
@@ -505,7 +511,7 @@ extern int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res);
extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp);
-
+extern int nfs4_proc_commit(struct file *dst, __u64 offset, __u32 count, struct nfs_commitres *res);
extern const nfs4_stateid zero_stateid;
extern const nfs4_stateid invalid_stateid;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 979631411a0e..146e30862234 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -156,9 +156,23 @@ nfs4_shutdown_ds_clients(struct nfs_client *clp)
}
}
+static void
+nfs4_cleanup_callback(struct nfs_client *clp)
+{
+ struct nfs4_copy_state *cp_state;
+
+ while (!list_empty(&clp->pending_cb_stateids)) {
+ cp_state = list_entry(clp->pending_cb_stateids.next,
+ struct nfs4_copy_state, copies);
+ list_del(&cp_state->copies);
+ kfree(cp_state);
+ }
+}
+
void nfs41_shutdown_client(struct nfs_client *clp)
{
if (nfs4_has_session(clp)) {
+ nfs4_cleanup_callback(clp);
nfs4_shutdown_ds_clients(clp);
nfs4_destroy_session(clp->cl_session);
nfs4_destroy_clientid(clp);
@@ -202,6 +216,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
#if IS_ENABLED(CONFIG_NFS_V4_1)
init_waitqueue_head(&clp->cl_lock_waitq);
#endif
+ INIT_LIST_HEAD(&clp->pending_cb_stateids);
return clp;
error:
@@ -1127,7 +1142,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
nfs_server_copy_userdata(server, parent_server);
/* Get a client representation */
-#ifdef CONFIG_SUNRPC_XPRT_RDMA
+#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
rpc_set_port(data->addr, NFS_RDMA_PORT);
error = nfs4_set_client(server, data->hostname,
data->addr,
@@ -1139,7 +1154,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
parent_client->cl_net);
if (!error)
goto init_server;
-#endif /* CONFIG_SUNRPC_XPRT_RDMA */
+#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
rpc_set_port(data->addr, NFS_PORT);
error = nfs4_set_client(server, data->hostname,
@@ -1153,7 +1168,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
-#ifdef CONFIG_SUNRPC_XPRT_RDMA
+#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
init_server:
#endif
error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 6b3b372b59b9..4288a6ecaf75 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,10 +133,15 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t count, unsigned int flags)
{
+ ssize_t ret;
+
if (file_inode(file_in) == file_inode(file_out))
return -EINVAL;
-
- return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+retry:
+ ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
}
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
@@ -149,6 +154,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
ret = nfs42_proc_llseek(filep, offset, whence);
if (ret != -ENOTSUPP)
return ret;
+ /* Fall through */
default:
return nfs_file_llseek(filep, offset, whence);
}
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index b6f9d84ba19b..3f23b6840547 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -506,6 +506,7 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
switch (token) {
case Opt_find_uid:
im->im_type = IDMAP_TYPE_USER;
+ /* Fall through */
case Opt_find_gid:
im->im_conv = IDMAP_CONV_NAMETOID;
ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
@@ -513,9 +514,12 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
case Opt_find_user:
im->im_type = IDMAP_TYPE_USER;
+ /* Fall through */
case Opt_find_group:
im->im_conv = IDMAP_CONV_IDTONAME;
ret = match_int(&substr, &im->im_id);
+ if (ret)
+ goto out;
break;
default:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b790976d3913..34830f6457ea 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -449,6 +449,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
stateid);
goto wait_on_recovery;
}
+ /* Fall through */
case -NFS4ERR_OPENMODE:
if (inode) {
int err;
@@ -501,8 +502,10 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
ret = -EBUSY;
break;
}
+ /* Fall through */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
+ /* Fall through */
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
@@ -581,12 +584,19 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
ret = -EIO;
return ret;
out_retry:
- if (ret == 0)
+ if (ret == 0) {
exception->retry = 1;
+ /*
+ * For NFS4ERR_MOVED, the client transport will need to
+ * be recomputed after migration recovery has completed.
+ */
+ if (errorcode == -NFS4ERR_MOVED)
+ rpc_task_release_transport(task);
+ }
return ret;
}
-static int
+int
nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
struct nfs4_state *state, long *timeout)
{
@@ -1071,15 +1081,30 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
return nfs4_call_sync_sequence(clnt, server, msg, args, res);
}
-static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
- unsigned long timestamp)
+static void
+nfs4_inc_nlink_locked(struct inode *inode)
+{
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+ inc_nlink(inode);
+}
+
+static void
+nfs4_dec_nlink_locked(struct inode *inode)
+{
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+ drop_nlink(inode);
+}
+
+static void
+update_changeattr_locked(struct inode *dir, struct nfs4_change_info *cinfo,
+ unsigned long timestamp, unsigned long cache_validity)
{
struct nfs_inode *nfsi = NFS_I(dir);
- spin_lock(&dir->i_lock);
nfsi->cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
- | NFS_INO_INVALID_DATA;
+ | NFS_INO_INVALID_DATA
+ | cache_validity;
if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) {
nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
nfsi->attrtimeo_timestamp = jiffies;
@@ -1092,7 +1117,16 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
inode_set_iversion_raw(dir, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
+ nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
nfs_fscache_invalidate(dir);
+}
+
+static void
+update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
+ unsigned long timestamp, unsigned long cache_validity)
+{
+ spin_lock(&dir->i_lock);
+ update_changeattr_locked(dir, cinfo, timestamp, cache_validity);
spin_unlock(&dir->i_lock);
}
@@ -1354,6 +1388,7 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
break;
+ /* Fall through */
default:
return 0;
}
@@ -1773,6 +1808,10 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
data->o_res.delegation_type,
&data->o_res.delegation,
data->o_res.pagemod_limit);
+
+ if (data->o_res.do_recall)
+ nfs_async_inode_return_delegation(state->inode,
+ &data->o_res.delegation);
}
/*
@@ -2119,6 +2158,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
if (err)
break;
+ /* Fall through */
case FMODE_READ:
err = nfs4_open_recover_helper(opendata, FMODE_READ);
}
@@ -2248,6 +2288,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
+ /* Fall through */
case NFS4_OPEN_CLAIM_FH:
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
}
@@ -2481,7 +2522,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data,
if (data->file_created ||
inode_peek_iversion_raw(dir) != o_res->cinfo.after)
update_changeattr(dir, &o_res->cinfo,
- o_res->f_attr->time_start);
+ o_res->f_attr->time_start, 0);
}
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
@@ -2843,6 +2884,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
nfs_save_change_attribute(d_inode(opendata->dir)));
}
+ /* Parse layoutget results before we check for access */
+ pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
+
ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
if (ret != 0)
goto out;
@@ -2851,8 +2895,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
nfs_inode_attach_open_context(ctx);
if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
nfs4_schedule_stateid_recovery(server, state);
- else
- pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
}
out:
@@ -3220,7 +3262,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
calldata->res.lr_res = NULL;
break;
case -NFS4ERR_OLD_STATEID:
- if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid,
+ if (nfs4_layoutreturn_refresh_stateid(&calldata->arg.lr_args->stateid,
+ &calldata->arg.lr_args->range,
calldata->inode))
goto lr_restart;
/* Fallthrough */
@@ -4236,7 +4279,8 @@ out:
return status;
}
-static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name)
+static int
+_nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
{
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_removeargs args = {
@@ -4255,8 +4299,14 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name)
int status;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
- if (status == 0)
- update_changeattr(dir, &res.cinfo, timestamp);
+ if (status == 0) {
+ spin_lock(&dir->i_lock);
+ update_changeattr_locked(dir, &res.cinfo, timestamp, 0);
+ /* Removing a directory decrements nlink in the parent */
+ if (ftype == NF4DIR && dir->i_nlink > 2)
+ nfs4_dec_nlink_locked(dir);
+ spin_unlock(&dir->i_lock);
+ }
return status;
}
@@ -4273,7 +4323,7 @@ static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
nfs4_inode_make_writeable(inode);
}
do {
- err = _nfs4_proc_remove(dir, &dentry->d_name);
+ err = _nfs4_proc_remove(dir, &dentry->d_name, NF4REG);
trace_nfs4_remove(dir, &dentry->d_name, err);
err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
@@ -4287,7 +4337,7 @@ static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
int err;
do {
- err = _nfs4_proc_remove(dir, name);
+ err = _nfs4_proc_remove(dir, name, NF4DIR);
trace_nfs4_remove(dir, name, err);
err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
@@ -4331,7 +4381,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
&data->timeout) == -EAGAIN)
return 0;
if (task->tk_status == 0)
- update_changeattr(dir, &res->cinfo, res->dir_attr->time_start);
+ update_changeattr(dir, &res->cinfo,
+ res->dir_attr->time_start, 0);
return 1;
}
@@ -4373,9 +4424,18 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
if (task->tk_status == 0) {
- update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start);
- if (new_dir != old_dir)
- update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start);
+ if (new_dir != old_dir) {
+ /* Note: If we moved a directory, nlink will change */
+ update_changeattr(old_dir, &res->old_cinfo,
+ res->old_fattr->time_start,
+ NFS_INO_INVALID_OTHER);
+ update_changeattr(new_dir, &res->new_cinfo,
+ res->new_fattr->time_start,
+ NFS_INO_INVALID_OTHER);
+ } else
+ update_changeattr(old_dir, &res->old_cinfo,
+ res->old_fattr->time_start,
+ 0);
}
return 1;
}
@@ -4416,7 +4476,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
- update_changeattr(dir, &res.cinfo, res.fattr->time_start);
+ update_changeattr(dir, &res.cinfo, res.fattr->time_start, 0);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
@@ -4491,8 +4551,13 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
- update_changeattr(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start);
+ spin_lock(&dir->i_lock);
+ update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start, 0);
+ /* Creating a directory bumps nlink in the parent */
+ if (data->arg.ftype == NF4DIR)
+ nfs4_inc_nlink_locked(dir);
+ spin_unlock(&dir->i_lock);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
return status;
@@ -5073,6 +5138,40 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
}
+static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
+ struct nfs_commitres *res)
+{
+ struct inode *dst_inode = file_inode(dst);
+ struct nfs_server *server = NFS_SERVER(dst_inode);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
+ .rpc_argp = args,
+ .rpc_resp = res,
+ };
+
+ args->fh = NFS_FH(dst_inode);
+ return nfs4_call_sync(server->client, server, &msg,
+ &args->seq_args, &res->seq_res, 1);
+}
+
+int nfs4_proc_commit(struct file *dst, __u64 offset, __u32 count, struct nfs_commitres *res)
+{
+ struct nfs_commitargs args = {
+ .offset = offset,
+ .count = count,
+ };
+ struct nfs_server *dst_server = NFS_SERVER(file_inode(dst));
+ struct nfs4_exception exception = { };
+ int status;
+
+ do {
+ status = _nfs4_proc_commit(dst, &args, res);
+ status = nfs4_handle_exception(dst_server, status, &exception);
+ } while (exception.retry);
+
+ return status;
+}
+
struct nfs4_renewdata {
struct nfs_client *client;
unsigned long timestamp;
@@ -5902,7 +6001,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
data->res.lr_res = NULL;
break;
case -NFS4ERR_OLD_STATEID:
- if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid,
+ if (nfs4_layoutreturn_refresh_stateid(&data->args.lr_args->stateid,
+ &data->args.lr_args->range,
data->inode))
goto lr_restart;
/* Fallthrough */
@@ -6209,11 +6309,13 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
+ /* Fall through */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_EXPIRED:
nfs4_free_revoked_stateid(calldata->server,
&calldata->arg.stateid,
task->tk_msg.rpc_cred);
+ /* Fall through */
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
@@ -7727,7 +7829,7 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
}
out:
clp->cl_sp4_flags = flags;
- return 0;
+ return ret;
}
struct nfs41_exchange_id_data {
@@ -8168,7 +8270,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args,
args->bc_attrs.max_resp_sz = max_bc_payload;
args->bc_attrs.max_resp_sz_cached = 0;
args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
- args->bc_attrs.max_reqs = min_t(unsigned short, max_session_cb_slots, 1);
+ args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1);
dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
@@ -8851,7 +8953,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
server = NFS_SERVER(lrp->args.inode);
switch (task->tk_status) {
case -NFS4ERR_OLD_STATEID:
- if (nfs4_refresh_layout_stateid(&lrp->args.stateid,
+ if (nfs4_layoutreturn_refresh_stateid(&lrp->args.stateid,
+ &lrp->args.range,
lrp->args.inode))
goto out_restart;
/* Fallthrough */
@@ -9554,6 +9657,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_LGOPEN
| NFS_CAP_ALLOCATE
| NFS_CAP_COPY
+ | NFS_CAP_OFFLOAD_CANCEL
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2bf2eaa08ca7..3df0eb52da1c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -274,7 +274,7 @@ static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
static int nfs4_begin_drain_session(struct nfs_client *clp)
{
struct nfs4_session *ses = clp->cl_session;
- int ret = 0;
+ int ret;
if (clp->cl_slot_tbl)
return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
@@ -1525,6 +1525,7 @@ restart:
default:
pr_err("NFS: %s: unhandled error %d\n",
__func__, status);
+ /* Fall through */
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1588,6 +1589,22 @@ restart:
}
clear_bit(NFS_STATE_RECLAIM_NOGRACE,
&state->flags);
+#ifdef CONFIG_NFS_V4_2
+ if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
+ struct nfs4_copy_state *copy;
+
+ spin_lock(&sp->so_server->nfs_client->cl_lock);
+ list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+ if (memcmp(&state->stateid.other, &copy->parent_state->stateid.other, NFS4_STATEID_SIZE))
+ continue;
+ copy->flags = 1;
+ complete(&copy->completion);
+ printk("AGLO: server rebooted waking up the copy\n");
+ break;
+ }
+ spin_unlock(&sp->so_server->nfs_client->cl_lock);
+ }
+#endif /* CONFIG_NFS_V4_2 */
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
goto restart;
@@ -1597,6 +1614,7 @@ restart:
default:
printk(KERN_ERR "NFS: %s: unhandled error %d\n",
__func__, status);
+ /* Fall through */
case -ENOENT:
case -ENOMEM:
case -EACCES:
@@ -1608,6 +1626,7 @@ restart:
break;
case -EAGAIN:
ssleep(1);
+ /* Fall through */
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -1939,7 +1958,9 @@ static int nfs4_establish_lease(struct nfs_client *clp)
clp->cl_mvops->reboot_recovery_ops;
int status;
- nfs4_begin_drain_session(clp);
+ status = nfs4_begin_drain_session(clp);
+ if (status != 0)
+ return status;
cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
return -ENOENT;
@@ -2027,7 +2048,9 @@ static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
goto out;
}
- nfs4_begin_drain_session(clp);
+ status = nfs4_begin_drain_session(clp);
+ if (status != 0)
+ return status;
status = nfs4_replace_transport(server, locations);
if (status != 0) {
@@ -2190,9 +2213,11 @@ again:
case -ETIMEDOUT:
if (clnt->cl_softrtry)
break;
+ /* Fall through */
case -NFS4ERR_DELAY:
case -EAGAIN:
ssleep(1);
+ /* Fall through */
case -NFS4ERR_STALE_CLIENTID:
dprintk("NFS: %s after status %d, retrying\n",
__func__, status);
@@ -2204,6 +2229,7 @@ again:
}
if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
break;
+ /* Fall through */
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
/* No point in retrying if we already used RPC_AUTH_UNIX */
@@ -2374,7 +2400,9 @@ static int nfs4_reset_session(struct nfs_client *clp)
if (!nfs4_has_session(clp))
return 0;
- nfs4_begin_drain_session(clp);
+ status = nfs4_begin_drain_session(clp);
+ if (status != 0)
+ return status;
cred = nfs4_get_clid_cred(clp);
status = nfs4_proc_destroy_session(clp->cl_session, cred);
switch (status) {
@@ -2417,7 +2445,9 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
if (!nfs4_has_session(clp))
return 0;
- nfs4_begin_drain_session(clp);
+ ret = nfs4_begin_drain_session(clp);
+ if (ret != 0)
+ return ret;
cred = nfs4_get_clid_cred(clp);
ret = nfs4_proc_bind_conn_to_session(clp, cred);
if (cred)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cd41d2577a04..b7bde12d8cd5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7789,6 +7789,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC42(CLONE, enc_clone, dec_clone),
PROC42(COPY, enc_copy, dec_copy),
+ PROC42(OFFLOAD_CANCEL, enc_offload_cancel, dec_offload_cancel),
PROC(LOOKUPP, enc_lookupp, dec_lookupp),
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 67d19cd92e44..bb5476a6d264 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -561,6 +561,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
case FLUSH_COND_STABLE:
if (nfs_reqs_to_commit(cinfo))
break;
+ /* fall through */
default:
hdr->args.stable = NFS_FILE_SYNC;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bcc3addec3c5..e8f232de484f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,18 +361,32 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg,
/*
* Update the seqid of a layout stateid
*/
-bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode)
+bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_range *dst_range,
+ struct inode *inode)
{
struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
bool ret = false;
+ LIST_HEAD(head);
+ int err;
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) {
- dst->seqid = lo->plh_stateid.seqid;
- ret = true;
+ err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
+ if (err != -EBUSY) {
+ dst->seqid = lo->plh_stateid.seqid;
+ *dst_range = range;
+ ret = true;
+ }
}
spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&head);
return ret;
}
@@ -1018,7 +1032,6 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
nfs4_stateid_copy(&lgp->args.stateid, stateid);
lgp->gfp_flags = gfp_flags;
lgp->cred = get_rpccred(ctx->cred);
- lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count);
return lgp;
}
@@ -1160,12 +1173,21 @@ static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
struct pnfs_layout_segment *s;
+ enum pnfs_iomode iomode;
+ u32 seq;
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
- /* Defer layoutreturn until all lsegs are done */
+ seq = lo->plh_return_seq;
+ iomode = lo->plh_return_iomode;
+
+ /* Defer layoutreturn until all recalled lsegs are done */
list_for_each_entry(s, &lo->plh_segs, pls_list) {
+ if (seq && pnfs_seqid_is_newer(s->pls_seq, seq))
+ continue;
+ if (iomode != IOMODE_ANY && s->pls_range.iomode != iomode)
+ continue;
if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
return false;
}
@@ -1609,7 +1631,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
(range->iomode != ls_range->iomode &&
strict_iomode) ||
!pnfs_lseg_range_intersecting(ls_range, range))
- return 0;
+ return false;
/* range1 covers only the first byte in the range */
range1 = *range;
@@ -1631,7 +1653,6 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
- !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) &&
pnfs_lseg_range_match(&lseg->pls_range, range,
strict_iomode)) {
ret = pnfs_get_lseg(lseg);
@@ -1731,6 +1752,17 @@ static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
TASK_UNINTERRUPTIBLE);
}
+static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
+{
+ atomic_inc(&lo->plh_outstanding);
+}
+
+static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
+{
+ if (atomic_dec_and_test(&lo->plh_outstanding))
+ wake_up_var(&lo->plh_outstanding);
+}
+
static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo)
{
unsigned long *bitlock = &lo->plh_flags;
@@ -1791,12 +1823,6 @@ pnfs_update_layout(struct inode *ino,
goto out;
}
- if (iomode == IOMODE_READ && i_size_read(ino) == 0) {
- trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
- PNFS_UPDATE_LAYOUT_RD_ZEROLEN);
- goto out;
- }
-
if (pnfs_within_mdsthreshold(ctx, ino, iomode)) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_MDSTHRESH);
@@ -1830,6 +1856,21 @@ lookup_again:
goto out_unlock;
}
+ /*
+ * If the layout segment list is empty, but there are outstanding
+ * layoutget calls, then they might be subject to a layoutrecall.
+ */
+ if (list_empty(&lo->plh_segs) &&
+ atomic_read(&lo->plh_outstanding) != 0) {
+ spin_unlock(&ino->i_lock);
+ if (wait_var_event_killable(&lo->plh_outstanding,
+ atomic_read(&lo->plh_outstanding) == 0
+ || !list_empty(&lo->plh_segs)))
+ goto out_put_layout_hdr;
+ pnfs_put_layout_hdr(lo);
+ goto lookup_again;
+ }
+
lseg = pnfs_find_lseg(lo, &arg, strict_iomode);
if (lseg) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
@@ -1903,7 +1944,7 @@ lookup_again:
PNFS_UPDATE_LAYOUT_BLOCKED);
goto out_unlock;
}
- atomic_inc(&lo->plh_outstanding);
+ nfs_layoutget_begin(lo);
spin_unlock(&ino->i_lock);
_add_to_server_list(lo, server);
@@ -1920,14 +1961,14 @@ lookup_again:
if (!lgp) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
PNFS_UPDATE_LAYOUT_NOMEM);
- atomic_dec(&lo->plh_outstanding);
+ nfs_layoutget_end(lo);
goto out_put_layout_hdr;
}
lseg = nfs4_proc_layoutget(lgp, &timeout);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
- atomic_dec(&lo->plh_outstanding);
+ nfs_layoutget_end(lo);
if (IS_ERR(lseg)) {
switch(PTR_ERR(lseg)) {
case -EBUSY:
@@ -1935,15 +1976,6 @@ lookup_again:
lseg = NULL;
break;
case -ERECALLCONFLICT:
- /* Huh? We hold no layouts, how is there a recall? */
- if (first) {
- lseg = NULL;
- break;
- }
- /* Destroy the existing layout and start over */
- if (time_after(jiffies, giveup))
- pnfs_destroy_layout(NFS_I(ino));
- /* Fallthrough */
case -EAGAIN:
break;
default:
@@ -2022,7 +2054,7 @@ _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
goto out_unlock;
if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags))
goto out_unlock;
- atomic_inc(&lo->plh_outstanding);
+ nfs_layoutget_begin(lo);
spin_unlock(&ino->i_lock);
_add_to_server_list(lo, NFS_SERVER(ino));
return lo;
@@ -2146,9 +2178,6 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
} else
lo = NFS_I(lgp->args.inode)->layout;
- if (read_seqcount_retry(&srv->nfs_client->cl_callback_count,
- lgp->callback_count))
- return;
lseg = pnfs_layout_process(lgp);
if (!IS_ERR(lseg)) {
iomode = lgp->args.range.iomode;
@@ -2163,8 +2192,8 @@ void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
struct inode *inode = lgp->args.inode;
if (inode) {
struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
- atomic_dec(&lo->plh_outstanding);
pnfs_clear_first_layoutget(lo);
+ nfs_layoutget_end(lo);
}
pnfs_layoutget_free(lgp);
}
@@ -2238,15 +2267,31 @@ out_forget:
return ERR_PTR(-EAGAIN);
}
+static int
+mark_lseg_invalid_or_return(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ if (!mark_lseg_invalid(lseg, tmp_list))
+ return 0;
+ pnfs_cache_lseg_for_layoutreturn(lseg->pls_layout, lseg);
+ return 1;
+}
+
/**
* pnfs_mark_matching_lsegs_return - Free or return matching layout segments
* @lo: pointer to layout header
* @tmp_list: list header to be used with pnfs_free_lseg_list()
* @return_range: describe layout segment ranges to be returned
+ * @seq: stateid seqid to match
*
* This function is mainly intended for use by layoutrecall. It attempts
* to free the layout segment immediately, or else to mark it for return
* as soon as its reference count drops to zero.
+ *
+ * Returns
+ * - 0: a layoutreturn needs to be scheduled.
+ * - EBUSY: there are layout segment that are still in use.
+ * - ENOENT: there are no layout segments that need to be returned.
*/
int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
@@ -2259,9 +2304,6 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
- if (list_empty(&lo->plh_segs))
- return 0;
-
assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
@@ -2271,16 +2313,23 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
- if (mark_lseg_invalid(lseg, tmp_list))
+ if (mark_lseg_invalid_or_return(lseg, tmp_list))
continue;
remaining++;
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
}
- if (remaining)
+ if (remaining) {
pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+ return -EBUSY;
+ }
- return remaining;
+ if (!list_empty(&lo->plh_return_segs)) {
+ pnfs_set_plh_return_info(lo, return_range->iomode, seq);
+ return 0;
+ }
+
+ return -ENOENT;
}
void pnfs_error_mark_layout_for_return(struct inode *inode,
@@ -2305,7 +2354,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
- if (!pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0)) {
+ if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) {
nfs4_stateid stateid;
enum pnfs_iomode iomode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 3fe81424337d..ece367ebde69 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -259,7 +259,9 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
bool is_recall);
int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
bool is_recall);
-bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode);
+bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_range *dst_range,
+ struct inode *inode);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -780,7 +782,8 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void)
{
}
-static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst,
+static inline bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_range *dst_range,
struct inode *inode)
{
return false;
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 32ba2d471853..d5e4d3cd8c7f 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -61,7 +61,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference.
- * Note this must be called holding i_lock
+ * Note this must be called holding nfsi->commit_mutex
*/
void
pnfs_generic_clear_request_commit(struct nfs_page *req,
@@ -149,9 +149,7 @@ restart:
if (list_empty(&b->written)) {
freeme = b->wlseg;
b->wlseg = NULL;
- spin_unlock(&cinfo->inode->i_lock);
pnfs_put_lseg(freeme);
- spin_lock(&cinfo->inode->i_lock);
goto restart;
}
}
@@ -167,7 +165,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
LIST_HEAD(pages);
int i;
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
for (i = idx; i < fl_cinfo->nbuckets; i++) {
bucket = &fl_cinfo->buckets[i];
if (list_empty(&bucket->committing))
@@ -177,12 +175,12 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx)
list_for_each(pos, &bucket->committing)
cinfo->ds->ncommitting--;
list_splice_init(&bucket->committing, &pages);
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_retry_commit(&pages, freeme, cinfo, i);
pnfs_put_lseg(freeme);
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
}
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
static unsigned int
@@ -222,13 +220,13 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
struct list_head *pos;
bucket = &cinfo->ds->buckets[data->ds_commit_index];
- spin_lock(&cinfo->inode->i_lock);
+ mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
list_for_each(pos, &bucket->committing)
cinfo->ds->ncommitting--;
list_splice_init(&bucket->committing, pages);
data->lseg = bucket->clseg;
bucket->clseg = NULL;
- spin_unlock(&cinfo->inode->i_lock);
+ mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5e470e233c83..ac4b2f005778 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -884,7 +884,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
#endif
seq_printf(m, "\n");
- rpc_print_iostats(m, nfss->client);
+ rpc_clnt_show_stats(m, nfss->client);
return 0;
}
@@ -2899,7 +2899,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
if (!val)
return -EINVAL;
ret = kstrtoul(val, 0, &num);
- if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
+ if (ret || num > NFS_CALLBACK_MAXPORTNR)
return -EINVAL;
*((unsigned int *)kp->arg) = num;
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a057b4f45a46..586726a590d8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1406,6 +1406,8 @@ static void nfs_async_write_error(struct list_head *head)
static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
{
nfs_async_write_error(&hdr->pages);
+ filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
+ hdr->args.offset + hdr->args.count - 1);
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 36358d435cb0..426f55005697 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -102,6 +102,7 @@ struct nfsd_net {
time_t nfsd4_lease;
time_t nfsd4_grace;
+ bool somebody_reclaimed;
bool nfsd_net_up;
bool lockd_up;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6259a4b8579f..9eb8086ea841 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+ nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+ &argp->first, cnt);
if (!nvecs)
RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
@@ -289,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_nametoolong);
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+ page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
if (IS_ERR(argp->tname))
RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
@@ -302,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
fh_init(&resp->fh, NFS3_FHSIZE);
nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
argp->tname, &resp->fh);
+ kfree(argp->tname);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1f04d2a70d25..601bf33c26a0 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -746,30 +746,17 @@ static int max_cb_time(struct net *net)
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}
-static struct rpc_cred *callback_cred;
-
-int set_callback_cred(void)
-{
- if (callback_cred)
- return 0;
- callback_cred = rpc_lookup_machine_cred("nfs");
- if (!callback_cred)
- return -ENOMEM;
- return 0;
-}
-
-void cleanup_callback_cred(void)
-{
- if (callback_cred) {
- put_rpccred(callback_cred);
- callback_cred = NULL;
- }
-}
-
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
- return get_rpccred(callback_cred);
+ char *principal = clp->cl_cred.cr_targ_princ ?
+ clp->cl_cred.cr_targ_princ : "nfs";
+ struct rpc_cred *cred;
+
+ cred = rpc_lookup_machine_cred(principal);
+ if (!IS_ERR(cred))
+ get_rpccred(cred);
+ return cred;
} else {
struct rpc_auth *auth = client->cl_auth;
struct auth_cred acred = {};
@@ -980,6 +967,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
+ /* Fall through */
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 228faf00a594..2b36aa037ce0 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -133,27 +133,20 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
- /*
- * If flex file is configured, use it by default. Otherwise
- * check if the file system supports exporting a block-like layout.
- * If the block device supports reservations prefer the SCSI layout,
- * otherwise advertise the block layout.
- */
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES;
#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT
- /* overwrite flex file layout selection if needed */
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME;
#endif
#ifdef CONFIG_NFSD_SCSILAYOUT
- /* overwrite block layout selection if needed */
if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks &&
- sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+ sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops &&
+ blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue))
exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5d99e8810b85..b7bc6e1a85ac 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -354,6 +354,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct svc_fh *resfh = NULL;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool reclaim = false;
dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
(int)open->op_fname.len, open->op_fname.data,
@@ -424,6 +425,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ reclaim = true;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
@@ -452,6 +454,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
WARN(status && open->op_created,
"nfsd4_process_open2 failed to open newly-created file! status=%u\n",
be32_to_cpu(status));
+ if (reclaim && !status)
+ nn->somebody_reclaimed = true;
out:
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
@@ -982,24 +986,6 @@ out:
return status;
}
-static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
-{
- int i = 1;
- int buflen = write->wr_buflen;
-
- vec[0].iov_base = write->wr_head.iov_base;
- vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
- buflen -= vec[0].iov_len;
-
- while (buflen) {
- vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
- vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
- buflen -= vec[i].iov_len;
- i++;
- }
- return i;
-}
-
static __be32
nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -1027,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
write->wr_how_written = write->wr_stable_how;
gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
- nvecs = fill_in_write_vector(rqstp->rq_vec, write);
+ nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
+ &write->wr_head, write->wr_buflen);
+ if (!nvecs)
+ return nfserr_io;
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
@@ -1599,7 +1588,7 @@ static const char *nfsd4_op_name(unsigned opnum);
*/
static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
{
- struct nfsd4_op *op = &args->ops[0];
+ struct nfsd4_op *first_op = &args->ops[0];
/* These ordering requirements don't apply to NFSv4.0: */
if (args->minorversion == 0)
@@ -1607,12 +1596,17 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
/* This is weird, but OK, not our problem: */
if (args->opcnt == 0)
return nfs_ok;
- if (op->status == nfserr_op_illegal)
+ if (first_op->status == nfserr_op_illegal)
return nfs_ok;
- if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
+ if (!(nfsd4_ops[first_op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
return nfserr_op_not_in_session;
- if (op->opnum == OP_SEQUENCE)
+ if (first_op->opnum == OP_SEQUENCE)
return nfs_ok;
+ /*
+ * So first_op is something allowed outside a session, like
+ * EXCHANGE_ID; but then it has to be the only op in the
+ * compound:
+ */
if (args->opcnt != 1)
return nfserr_not_only_op;
return nfs_ok;
@@ -1726,6 +1720,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (status) {
op = &args->ops[0];
op->status = status;
+ resp->opcnt = 1;
goto encode_op;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 857141446d6b..b0ca0efd2875 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
target->cr_raw_principal = kstrdup(source->cr_raw_principal,
GFP_KERNEL);
- if ((source->cr_principal && ! target->cr_principal) ||
- (source->cr_raw_principal && ! target->cr_raw_principal))
+ target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL);
+ if ((source->cr_principal && !target->cr_principal) ||
+ (source->cr_raw_principal && !target->cr_raw_principal) ||
+ (source->cr_targ_princ && !target->cr_targ_princ))
return -ENOMEM;
target->cr_flavor = source->cr_flavor;
@@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
return false;
+ /* XXX: check that cr_targ_princ fields match ? */
if (cr1->cr_principal == cr2->cr_principal)
return true;
if (!cr1->cr_principal || !cr2->cr_principal)
@@ -2956,18 +2959,18 @@ out_no_session:
return status;
}
-static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
+static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid)
{
- if (!session)
+ if (!cstate->session)
return false;
- return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
+ return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid));
}
__be32
nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
- struct nfsd4_destroy_session *sessionid = &u->destroy_session;
+ struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid;
struct nfsd4_session *ses;
__be32 status;
int ref_held_by_me = 0;
@@ -2975,14 +2978,14 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
status = nfserr_not_only_op;
- if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
+ if (nfsd4_compound_in_session(cstate, sessionid)) {
if (!nfsd4_last_compound_op(r))
goto out;
ref_held_by_me++;
}
- dump_sessionid(__func__, &sessionid->sessionid);
+ dump_sessionid(__func__, sessionid);
spin_lock(&nn->client_lock);
- ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
+ ses = find_in_sessionid_hashtbl(sessionid, net, &status);
if (!ses)
goto out_client_lock;
status = nfserr_wrong_cred;
@@ -3945,9 +3948,9 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
- * callback (and since the lease code is serialized by the kernel
- * lock) we know the server hasn't removed the lease yet, we know
- * it's safe to take a reference.
+ * callback (and since the lease code is serialized by the
+ * i_lock) we know the server hasn't removed the lease yet, and
+ * we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&dp->dl_recall);
@@ -4693,6 +4696,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
}
+/*
+ * If we've waited a lease period but there are still clients trying to
+ * reclaim, wait a little longer to give them a chance to finish.
+ */
+static bool clients_still_reclaiming(struct nfsd_net *nn)
+{
+ unsigned long now = get_seconds();
+ unsigned long double_grace_period_end = nn->boot_time +
+ 2 * nn->nfsd4_lease;
+
+ if (!nn->somebody_reclaimed)
+ return false;
+ nn->somebody_reclaimed = false;
+ /*
+ * If we've given them *two* lease times to reclaim, and they're
+ * still not done, give up:
+ */
+ if (time_after(now, double_grace_period_end))
+ return false;
+ return true;
+}
+
static time_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -4706,6 +4731,11 @@ nfs4_laundromat(struct nfsd_net *nn)
time_t t, new_timeo = nn->nfsd4_lease;
dprintk("NFSD: laundromat service - starting\n");
+
+ if (clients_still_reclaiming(nn)) {
+ new_timeo = 0;
+ goto out;
+ }
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
spin_lock(&nn->client_lock);
@@ -4803,7 +4833,7 @@ nfs4_laundromat(struct nfsd_net *nn)
posix_unblock_lock(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
-
+out:
new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
}
@@ -6053,6 +6083,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
+ if (lock->lk_reclaim)
+ nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
nbl = NULL;
@@ -6293,7 +6325,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
return status;
}
- inode = file_inode(filp);
+ inode = locks_inode(filp);
flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -7199,14 +7231,10 @@ nfs4_state_start(void)
{
int ret;
- ret = set_callback_cred();
- if (ret)
- return ret;
-
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
- goto out_cleanup_cred;
+ goto out;
}
ret = nfsd4_create_callback_queue();
if (ret)
@@ -7217,8 +7245,7 @@ nfs4_state_start(void)
out_free_laundry:
destroy_workqueue(laundry_wq);
-out_cleanup_cred:
- cleanup_callback_cred();
+out:
return ret;
}
@@ -7255,7 +7282,6 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
- cleanup_callback_cred();
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a96843c59fc1..418fa9c78186 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1390,10 +1390,8 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
p += XDR_QUADLEN(dummy);
}
- /* ssp_window and ssp_num_gss_handles */
+ /* ignore ssp_window and ssp_num_gss_handles: */
READ_BUF(8);
- dummy = be32_to_cpup(p++);
- dummy = be32_to_cpup(p++);
break;
default:
goto xdr_error;
@@ -2006,6 +2004,31 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p;
}
+/*
+ * ctime (in NFSv4, time_metadata) is not writeable, and the client
+ * doesn't really care what resolution could theoretically be stored by
+ * the filesystem.
+ *
+ * The client cares how close together changes can be while still
+ * guaranteeing ctime changes. For most filesystems (which have
+ * timestamps with nanosecond fields) that is limited by the resolution
+ * of the time returned from current_time() (which I'm assuming to be
+ * 1/HZ).
+ */
+static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
+{
+ struct timespec ts;
+ u32 ns;
+
+ ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
+ ts = ns_to_timespec(ns);
+
+ p = xdr_encode_hyper(p, ts.tv_sec);
+ *p++ = cpu_to_be32(ts.tv_nsec);
+
+ return p;
+}
+
static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
{
*p++ = cpu_to_be32(c->atomic);
@@ -2797,9 +2820,7 @@ out_acl:
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(1);
- *p++ = cpu_to_be32(0);
+ p = encode_time_delta(p, d_inode(dentry));
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
p = xdr_reserve_space(xdr, 12);
@@ -2868,6 +2889,16 @@ out_acl:
goto out;
}
+ if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ goto out_resource;
+ if (IS_I_VERSION(d_inode(dentry)))
+ *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
+ else
+ *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
+ }
+
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
contextlen);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d107b4426f7e..7fb9f7c667b1 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -73,7 +73,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
#endif
-static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[NFSD_Fh] = write_filehandle,
[NFSD_FO_UnlockIP] = write_unlock_ip,
[NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -1237,8 +1237,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- nn->nfsd4_lease = 90; /* default lease time */
- nn->nfsd4_grace = 90;
+ nn->nfsd4_lease = 45; /* default lease time */
+ nn->nfsd4_grace = 45;
+ nn->somebody_reclaimed = false;
nn->clverifier_counter = prandom_u32();
nn->clientid_counter = prandom_u32();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3fce905d0365..066899929863 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -360,6 +360,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+ FATTR4_WORD2_CHANGE_ATTR_TYPE | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index a008e7634181..b319080288c3 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -451,7 +451,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
switch (fsid_type) {
case FSID_DEV:
if (!old_valid_dev(exp_sb(exp)->s_dev))
- return 0;
+ return false;
/* FALL THROUGH */
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
@@ -461,13 +461,13 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_UUID8:
case FSID_UUID16:
if (!is_root_export(exp))
- return 0;
+ return false;
/* fall through */
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
return exp->ex_uuid != NULL;
}
- return 1;
+ return true;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index f107f9fa8e15..0d20fd161225 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp)
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+ nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+ &argp->first, cnt);
if (!nvecs)
return nfserr_io;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
@@ -453,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
return nfserr_nametoolong;
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+ page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
if (IS_ERR(argp->tname))
return nfserrno(PTR_ERR(argp->tname));
@@ -465,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &newfh);
+ kfree(argp->tname);
fh_put(&argp->ffh);
fh_put(&newfh);
return nfserr;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f3772ea8ba0d..0b15dac7e609 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -617,8 +617,6 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
-extern int set_callback_cred(void);
-extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index c5fa3dee72fc..7da0fac71dc2 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -51,7 +51,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return err;
}
-static int nilfs_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6ffeca84d7c3..1b9067cf4511 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -834,7 +834,7 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount)
sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
- sbp[0]->s_mtime = cpu_to_le64(get_seconds());
+ sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds());
skip_mount_setup:
sbp[0]->s_state =
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index a6365e6bc047..58d77dc696eb 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -19,6 +19,7 @@
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/dnotify.h>
#include <linux/init.h>
#include <linux/spinlock.h>
@@ -353,7 +354,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
goto out;
}
- __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+ __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
error = attach_dn(dn, dn_mark, id, fd, filp, mask);
/* !error means that we attached the dn to the dn_mark, so don't free it */
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index eb4e75175cfb..94b52157bf8d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -8,6 +8,7 @@
#include <linux/mount.h>
#include <linux/sched.h>
#include <linux/sched/user.h>
+#include <linux/sched/signal.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/audit.h>
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 05506d60131c..59cdb27826de 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -132,13 +132,13 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
struct fsnotify_mark *mark;
assert_spin_locked(&conn->lock);
+ /* We can get detached connector here when inode is getting unlinked. */
+ if (!fsnotify_valid_obj_type(conn->type))
+ return;
hlist_for_each_entry(mark, &conn->list, obj_list) {
if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
new_mask |= mark->mask;
}
- if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
- return;
-
*fsnotify_conn_mask_p(conn) = new_mask;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 255f758af03a..9fa35cb6f6e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
len, false);
}
-static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
- u64 loff,
- u64 len,
- struct file *dst_file,
- u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
{
- int error;
-
- error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, true);
- if (error)
- return error;
- return len;
}
const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/open.c b/fs/open.c
index d98e19239bb7..0285ce7dbd51 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
long vfs_truncate(const struct path *path, loff_t length)
{
struct inode *inode;
- struct dentry *upperdentry;
long error;
inode = path->dentry->d_inode;
@@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length)
if (IS_APPEND(inode))
goto mnt_drop_write_and_out;
- /*
- * If this is an overlayfs then do as if opening the file so we get
- * write access on the upper inode, not on the overlay inode. For
- * non-overlay filesystems d_real() is an identity function.
- */
- upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
- error = PTR_ERR(upperdentry);
- if (IS_ERR(upperdentry))
- goto mnt_drop_write_and_out;
-
- error = get_write_access(upperdentry->d_inode);
+ error = get_write_access(inode);
if (error)
goto mnt_drop_write_and_out;
@@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
error = do_truncate(path->dentry, length, 0, NULL);
put_write_and_out:
- put_write_access(upperdentry->d_inode);
+ put_write_access(inode);
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
out:
@@ -707,12 +696,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
if (!f.file)
goto out;
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (error)
goto out_fput;
audit_file(f.file);
error = chown_common(&f.file->f_path, user, group);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
out_fput:
fdput(f);
out:
@@ -887,13 +876,8 @@ EXPORT_SYMBOL(file_path);
*/
int vfs_open(const struct path *path, struct file *file)
{
- struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
-
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
file->f_path = *path;
- return do_dentry_open(file, d_backing_inode(dentry), NULL);
+ return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
}
struct file *dentry_open(const struct path *path, int flags,
@@ -919,6 +903,24 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
+struct file *open_with_fake_path(const struct path *path, int flags,
+ struct inode *inode, const struct cred *cred)
+{
+ struct file *f = alloc_empty_file_noaccount(flags, cred);
+ if (!IS_ERR(f)) {
+ int error;
+
+ f->f_path = *path;
+ error = do_dentry_open(f, inode, NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ }
+ return f;
+}
+EXPORT_SYMBOL(open_with_fake_path);
+
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 9384164253ac..2ef91be2a04e 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT
bool "Overlayfs: turn on NFS export feature by default"
depends on OVERLAY_FS
depends on OVERLAY_FS_INDEX
+ depends on !OVERLAY_FS_METACOPY
help
If this config option is enabled then overlay filesystems will use
the index directory to decode overlay NFS file handles by default.
@@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO
For more information, see Documentation/filesystems/overlayfs.txt
If unsure, say N.
+
+config OVERLAY_FS_METACOPY
+ bool "Overlayfs: turn on metadata only copy up feature by default"
+ depends on OVERLAY_FS
+ select OVERLAY_FS_REDIRECT_DIR
+ help
+ If this config option is enabled then overlay filesystems will
+ copy up only metadata where appropriate and data copy up will
+ happen when a file is opened for WRITE operation. It is still
+ possible to turn off this feature globally with the "metacopy=off"
+ module option or on a filesystem instance basis with the
+ "metacopy=off" mount option.
+
+ Note, that this feature is not backward compatible. That is,
+ mounting an overlay which has metacopy only inodes on a kernel
+ that doesn't support this feature will have unexpected results.
+
+ If unsure, say N.
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 30802347a020..46e1ff8ac056 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,5 +4,5 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
- export.o
+overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
+ copy_up.o export.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ddaddb4ce4c3..296037afecdb 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -25,35 +25,20 @@
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
-static bool __read_mostly ovl_check_copy_up;
-module_param_named(check_copy_up, ovl_check_copy_up, bool,
- S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(ovl_check_copy_up,
- "Warn on copy-up when causing process also has a R/O fd open");
-
-static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
+static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
{
- const struct dentry *dentry = data;
-
- if (file_inode(f) == d_inode(dentry))
- pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
- f, fd, current->pid, current->comm);
+ pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
return 0;
}
-/*
- * Check the fds open by this process and warn if something like the following
- * scenario is about to occur:
- *
- * fd1 = open("foo", O_RDONLY);
- * fd2 = open("foo", O_RDWR);
- */
-static void ovl_do_check_copy_up(struct dentry *dentry)
+static int ovl_ccup_get(char *buf, const struct kernel_param *param)
{
- if (ovl_check_copy_up)
- iterate_fd(current->files, 0, ovl_check_fd, dentry);
+ return sprintf(buf, "N\n");
}
+module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
+MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
+
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
@@ -195,6 +180,16 @@ out_fput:
return error;
}
+static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
+{
+ struct iattr attr = {
+ .ia_valid = ATTR_SIZE,
+ .ia_size = stat->size,
+ };
+
+ return notify_change(upperdentry, &attr, NULL);
+}
+
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
@@ -403,6 +398,7 @@ struct ovl_copy_up_ctx {
bool tmpfile;
bool origin;
bool indexed;
+ bool metacopy;
};
static int ovl_link_up(struct ovl_copy_up_ctx *c)
@@ -505,28 +501,10 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
int err;
- if (S_ISREG(c->stat.mode)) {
- struct path upperpath;
-
- ovl_path_upper(c->dentry, &upperpath);
- BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = temp;
-
- err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
- if (err)
- return err;
- }
-
err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
return err;
- inode_lock(temp->d_inode);
- err = ovl_set_attr(temp, &c->stat);
- inode_unlock(temp->d_inode);
- if (err)
- return err;
-
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
@@ -540,7 +518,34 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
return err;
}
- return 0;
+ if (S_ISREG(c->stat.mode) && !c->metacopy) {
+ struct path upperpath, datapath;
+
+ ovl_path_upper(c->dentry, &upperpath);
+ BUG_ON(upperpath.dentry != NULL);
+ upperpath.dentry = temp;
+
+ ovl_path_lowerdata(c->dentry, &datapath);
+ err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ if (err)
+ return err;
+ }
+
+ if (c->metacopy) {
+ err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
+ NULL, 0, -EOPNOTSUPP);
+ if (err)
+ return err;
+ }
+
+ inode_lock(temp->d_inode);
+ if (c->metacopy)
+ err = ovl_set_size(temp, &c->stat);
+ if (!err)
+ err = ovl_set_attr(temp, &c->stat);
+ inode_unlock(temp->d_inode);
+
+ return err;
}
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
@@ -575,6 +580,8 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
if (err)
goto out;
+ if (!c->metacopy)
+ ovl_set_upperdata(d_inode(c->dentry));
inode = d_inode(c->dentry);
ovl_inode_update(inode, newdentry);
if (S_ISDIR(inode->i_mode))
@@ -677,6 +684,49 @@ out:
return err;
}
+static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
+ int flags)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (!ofs->config.metacopy)
+ return false;
+
+ if (!S_ISREG(mode))
+ return false;
+
+ if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
+ return false;
+
+ return true;
+}
+
+/* Copy up data of an inode which was copied up metadata only in the past. */
+static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
+{
+ struct path upperpath, datapath;
+ int err;
+
+ ovl_path_upper(c->dentry, &upperpath);
+ if (WARN_ON(upperpath.dentry == NULL))
+ return -EIO;
+
+ ovl_path_lowerdata(c->dentry, &datapath);
+ if (WARN_ON(datapath.dentry == NULL))
+ return -EIO;
+
+ err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
+ if (err)
+ return err;
+
+ err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
+ if (err)
+ return err;
+
+ ovl_set_upperdata(d_inode(c->dentry));
+ return err;
+}
+
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
int flags)
{
@@ -698,6 +748,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (err)
return err;
+ ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
+
if (parent) {
ovl_path_upper(parent, &parentpath);
ctx.destdir = parentpath.dentry;
@@ -719,9 +771,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (IS_ERR(ctx.link))
return PTR_ERR(ctx.link);
}
- ovl_do_check_copy_up(ctx.lowerpath.dentry);
- err = ovl_copy_up_start(dentry);
+ err = ovl_copy_up_start(dentry, flags);
/* err < 0: interrupted, err > 0: raced with another copy-up */
if (unlikely(err)) {
if (err > 0)
@@ -731,6 +782,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
err = ovl_do_copy_up(&ctx);
if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
err = ovl_link_up(&ctx);
+ if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+ err = ovl_copy_up_meta_inode_data(&ctx);
ovl_copy_up_end(dentry);
}
do_delayed_call(&done);
@@ -756,21 +809,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
struct dentry *next;
struct dentry *parent = NULL;
- /*
- * Check if copy-up has happened as well as for upper alias (in
- * case of hard links) is there.
- *
- * Both checks are lockless:
- * - false negatives: will recheck under oi->lock
- * - false positives:
- * + ovl_dentry_upper() uses memory barriers to ensure the
- * upper dentry is up-to-date
- * + ovl_dentry_has_upper_alias() relies on locking of
- * upper parent i_rwsem to prevent reordering copy-up
- * with rename.
- */
- if (ovl_dentry_upper(dentry) &&
- (ovl_dentry_has_upper_alias(dentry) || disconnected))
+ if (ovl_already_copied_up(dentry, flags))
break;
next = dget(dentry);
@@ -795,6 +834,41 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
return err;
}
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
+{
+ /* Copy up of disconnected dentry does not set upper alias */
+ if (ovl_already_copied_up(dentry, flags))
+ return false;
+
+ if (special_file(d_inode(dentry)->i_mode))
+ return false;
+
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return true;
+}
+
+int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
+{
+ int err = 0;
+
+ if (ovl_open_need_copy_up(dentry, file_flags)) {
+ err = ovl_want_write(dentry);
+ if (!err) {
+ err = ovl_copy_up_flags(dentry, file_flags);
+ ovl_drop_write(dentry);
+ }
+ }
+
+ return err;
+}
+
+int ovl_copy_up_with_data(struct dentry *dentry)
+{
+ return ovl_copy_up_flags(dentry, O_WRONLY);
+}
+
int ovl_copy_up(struct dentry *dentry)
{
return ovl_copy_up_flags(dentry, 0);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index f480b1a2cd2e..276914ae3c60 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,6 +24,8 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
+static int ovl_set_redirect(struct dentry *dentry, bool samedir);
+
int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
@@ -242,7 +244,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
.newinode = inode,
};
- ovl_dentry_version_inc(dentry->d_parent, false);
+ ovl_dir_modified(dentry->d_parent, false);
ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
/*
@@ -601,6 +603,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
if (!inode)
goto out_drop_write;
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_CREATING;
+ spin_unlock(&inode->i_lock);
+
inode_init_owner(inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
@@ -657,6 +663,12 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ if (ovl_is_metacopy_dentry(old)) {
+ err = ovl_set_redirect(old, false);
+ if (err)
+ goto out_drop_write;
+ }
+
err = ovl_nlink_start(old, &locked);
if (err)
goto out_drop_write;
@@ -722,7 +734,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
if (err)
goto out_d_drop;
- ovl_dentry_version_inc(dentry->d_parent, true);
+ ovl_dir_modified(dentry->d_parent, true);
out_d_drop:
d_drop(dentry);
out_dput_upper:
@@ -767,7 +779,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
- ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
+ ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
/*
* Keeping this dentry hashed would mean having to release
@@ -797,6 +809,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
int err;
bool locked = false;
const struct cred *old_cred;
+ struct dentry *upperdentry;
bool lower_positive = ovl_lower_positive(dentry);
LIST_HEAD(list);
@@ -832,6 +845,17 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
drop_nlink(dentry->d_inode);
}
ovl_nlink_end(dentry, locked);
+
+ /*
+ * Copy ctime
+ *
+ * Note: we fail to update ctime if there was no copy-up, only a
+ * whiteout
+ */
+ upperdentry = ovl_dentry_upper(dentry);
+ if (upperdentry)
+ ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
+
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -862,13 +886,13 @@ static bool ovl_can_move(struct dentry *dentry)
!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
}
-static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
+static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
{
char *buf, *ret;
struct dentry *d, *tmp;
int buflen = ovl_redirect_max + 1;
- if (samedir) {
+ if (!abs_redirect) {
ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
GFP_KERNEL);
goto out;
@@ -922,15 +946,43 @@ out:
return ret ? ret : ERR_PTR(-ENOMEM);
}
+static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
+{
+ struct dentry *lowerdentry;
+
+ if (!samedir)
+ return true;
+
+ if (d_is_dir(dentry))
+ return false;
+
+ /*
+ * For non-dir hardlinked files, we need absolute redirects
+ * in general as two upper hardlinks could be in different
+ * dirs. We could put a relative redirect now and convert
+ * it to absolute redirect later. But when nlink > 1 and
+ * indexing is on, that means relative redirect needs to be
+ * converted to absolute during copy up of another lower
+ * hardllink as well.
+ *
+ * So without optimizing too much, just check if lower is
+ * a hard link or not. If lower is hard link, put absolute
+ * redirect.
+ */
+ lowerdentry = ovl_dentry_lower(dentry);
+ return (d_inode(lowerdentry)->i_nlink > 1);
+}
+
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
{
int err;
const char *redirect = ovl_dentry_get_redirect(dentry);
+ bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
- if (redirect && (samedir || redirect[0] == '/'))
+ if (redirect && (!absolute_redirect || redirect[0] == '/'))
return 0;
- redirect = ovl_get_redirect(dentry, samedir);
+ redirect = ovl_get_redirect(dentry, absolute_redirect);
if (IS_ERR(redirect))
return PTR_ERR(redirect);
@@ -1106,22 +1158,20 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_dput;
err = 0;
- if (is_dir) {
- if (ovl_type_merge_or_lower(old))
- err = ovl_set_redirect(old, samedir);
- else if (!old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
- if (err)
- goto out_dput;
- }
- if (!overwrite && new_is_dir) {
- if (ovl_type_merge_or_lower(new))
- err = ovl_set_redirect(new, samedir);
- else if (!new_opaque && ovl_type_merge(old->d_parent))
- err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
- if (err)
- goto out_dput;
- }
+ if (ovl_type_merge_or_lower(old))
+ err = ovl_set_redirect(old, samedir);
+ else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
+ err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
+ if (err)
+ goto out_dput;
+
+ if (!overwrite && ovl_type_merge_or_lower(new))
+ err = ovl_set_redirect(new, samedir);
+ else if (!overwrite && new_is_dir && !new_opaque &&
+ ovl_type_merge(old->d_parent))
+ err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
+ if (err)
+ goto out_dput;
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
new_upperdir->d_inode, newdentry, flags);
@@ -1138,10 +1188,15 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
drop_nlink(d_inode(new));
}
- ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
- (!overwrite && ovl_type_origin(new)));
- ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
- (d_inode(new) && ovl_type_origin(new)));
+ ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
+ (!overwrite && ovl_type_origin(new)));
+ ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
+ (d_inode(new) && ovl_type_origin(new)));
+
+ /* copy ctime: */
+ ovl_copyattr(d_inode(olddentry), d_inode(old));
+ if (d_inode(new) && ovl_dentry_upper(new))
+ ovl_copyattr(d_inode(newdentry), d_inode(new));
out_dput:
dput(newdentry);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 9941ece61a14..8fa37cd7818a 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -317,6 +317,9 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
return ERR_CAST(inode);
}
+ if (upper)
+ ovl_set_flag(OVL_UPPERDATA, inode);
+
dentry = d_find_any_alias(inode);
if (!dentry) {
dentry = d_alloc_anon(inode->i_sb);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000000..32e9282893c9
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/xattr.h>
+#include <linux/uio.h>
+#include "overlayfs.h"
+
+static char ovl_whatisit(struct inode *inode, struct inode *realinode)
+{
+ if (realinode != ovl_inode_upper(inode))
+ return 'l';
+ if (ovl_has_upperdata(inode))
+ return 'u';
+ else
+ return 'm';
+}
+
+static struct file *ovl_open_realfile(const struct file *file,
+ struct inode *realinode)
+{
+ struct inode *inode = file_inode(file);
+ struct file *realfile;
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
+ realinode, current_cred());
+ revert_creds(old_cred);
+
+ pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+ file, file, ovl_whatisit(inode, realinode), file->f_flags,
+ realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+ return realfile;
+}
+
+#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
+
+static int ovl_change_flags(struct file *file, unsigned int flags)
+{
+ struct inode *inode = file_inode(file);
+ int err;
+
+ /* No atime modificaton on underlying */
+ flags |= O_NOATIME;
+
+ /* If some flag changed that cannot be changed then something's amiss */
+ if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
+ return -EIO;
+
+ flags &= OVL_SETFL_MASK;
+
+ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
+ return -EPERM;
+
+ if (flags & O_DIRECT) {
+ if (!file->f_mapping->a_ops ||
+ !file->f_mapping->a_ops->direct_IO)
+ return -EINVAL;
+ }
+
+ if (file->f_op->check_flags) {
+ err = file->f_op->check_flags(flags);
+ if (err)
+ return err;
+ }
+
+ spin_lock(&file->f_lock);
+ file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
+ spin_unlock(&file->f_lock);
+
+ return 0;
+}
+
+static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+ bool allow_meta)
+{
+ struct inode *inode = file_inode(file);
+ struct inode *realinode;
+
+ real->flags = 0;
+ real->file = file->private_data;
+
+ if (allow_meta)
+ realinode = ovl_inode_real(inode);
+ else
+ realinode = ovl_inode_realdata(inode);
+
+ /* Has it been copied up since we'd opened it? */
+ if (unlikely(file_inode(real->file) != realinode)) {
+ real->flags = FDPUT_FPUT;
+ real->file = ovl_open_realfile(file, realinode);
+
+ return PTR_ERR_OR_ZERO(real->file);
+ }
+
+ /* Did the flags change since open? */
+ if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
+ return ovl_change_flags(real->file, file->f_flags);
+
+ return 0;
+}
+
+static int ovl_real_fdget(const struct file *file, struct fd *real)
+{
+ return ovl_real_fdget_meta(file, real, false);
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+ struct dentry *dentry = file_dentry(file);
+ struct file *realfile;
+ int err;
+
+ err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+ if (err)
+ return err;
+
+ /* No longer need these flags, so don't pass them on to underlying fs */
+ file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+ realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+ if (IS_ERR(realfile))
+ return PTR_ERR(realfile);
+
+ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
+ file->f_mapping = realfile->f_mapping;
+
+ file->private_data = realfile;
+
+ return 0;
+}
+
+static int ovl_release(struct inode *inode, struct file *file)
+{
+ fput(file->private_data);
+
+ return 0;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *realinode = ovl_inode_real(file_inode(file));
+
+ return generic_file_llseek_size(file, offset, whence,
+ realinode->i_sb->s_maxbytes,
+ i_size_read(realinode));
+}
+
+static void ovl_file_accessed(struct file *file)
+{
+ struct inode *inode, *upperinode;
+
+ if (file->f_flags & O_NOATIME)
+ return;
+
+ inode = file_inode(file);
+ upperinode = ovl_inode_upper(inode);
+
+ if (!upperinode)
+ return;
+
+ if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
+ !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+ inode->i_mtime = upperinode->i_mtime;
+ inode->i_ctime = upperinode->i_ctime;
+ }
+
+ touch_atime(&file->f_path);
+}
+
+static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+{
+ int ifl = iocb->ki_flags;
+ rwf_t flags = 0;
+
+ if (ifl & IOCB_NOWAIT)
+ flags |= RWF_NOWAIT;
+ if (ifl & IOCB_HIPRI)
+ flags |= RWF_HIPRI;
+ if (ifl & IOCB_DSYNC)
+ flags |= RWF_DSYNC;
+ if (ifl & IOCB_SYNC)
+ flags |= RWF_SYNC;
+
+ return flags;
+}
+
+static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ revert_creds(old_cred);
+
+ ovl_file_accessed(file);
+
+ fdput(real);
+
+ return ret;
+}
+
+static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ goto out_unlock;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+
+ fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ int ret;
+
+ ret = ovl_real_fdget_meta(file, &real, !datasync);
+ if (ret)
+ return ret;
+
+ /* Don't sync lower file for fear of receiving EROFS error */
+ if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_fsync_range(real.file, start, end, datasync);
+ revert_creds(old_cred);
+ }
+
+ fdput(real);
+
+ return ret;
+}
+
+static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct file *realfile = file->private_data;
+ const struct cred *old_cred;
+ int ret;
+
+ if (!realfile->f_op->mmap)
+ return -ENODEV;
+
+ if (WARN_ON(file != vma->vm_file))
+ return -EIO;
+
+ vma->vm_file = get_file(realfile);
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = call_mmap(vma->vm_file, vma);
+ revert_creds(old_cred);
+
+ if (ret) {
+ /* Drop reference count from new vm_file value */
+ fput(realfile);
+ } else {
+ /* Drop reference count from previous vm_file value */
+ fput(file);
+ }
+
+ ovl_file_accessed(file);
+
+ return ret;
+}
+
+static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ int ret;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_fallocate(real.file, mode, offset, len);
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+
+ fdput(real);
+
+ return ret;
+}
+
+static long ovl_real_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fd real;
+ const struct cred *old_cred;
+ long ret;
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ old_cred = ovl_override_creds(file_inode(file)->i_sb);
+ ret = vfs_ioctl(real.file, cmd, arg);
+ revert_creds(old_cred);
+
+ fdput(real);
+
+ return ret;
+}
+
+static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ struct inode *inode = file_inode(file);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ ret = ovl_real_ioctl(file, cmd, arg);
+ break;
+
+ case FS_IOC_SETFLAGS:
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ ret = ovl_copy_up_with_data(file_dentry(file));
+ if (!ret) {
+ ret = ovl_real_ioctl(file, cmd, arg);
+
+ inode_lock(inode);
+ ovl_copyflags(ovl_inode_real(inode), inode);
+ inode_unlock(inode);
+ }
+
+ mnt_drop_write_file(file);
+ break;
+
+ default:
+ ret = -ENOTTY;
+ }
+
+ return ret;
+}
+
+static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return ovl_ioctl(file, cmd, arg);
+}
+
+enum ovl_copyop {
+ OVL_COPY,
+ OVL_CLONE,
+ OVL_DEDUPE,
+};
+
+static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ u64 len, unsigned int flags, enum ovl_copyop op)
+{
+ struct inode *inode_out = file_inode(file_out);
+ struct fd real_in, real_out;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ ret = ovl_real_fdget(file_out, &real_out);
+ if (ret)
+ return ret;
+
+ ret = ovl_real_fdget(file_in, &real_in);
+ if (ret) {
+ fdput(real_out);
+ return ret;
+ }
+
+ old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
+ switch (op) {
+ case OVL_COPY:
+ ret = vfs_copy_file_range(real_in.file, pos_in,
+ real_out.file, pos_out, len, flags);
+ break;
+
+ case OVL_CLONE:
+ ret = vfs_clone_file_range(real_in.file, pos_in,
+ real_out.file, pos_out, len);
+ break;
+
+ case OVL_DEDUPE:
+ ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
+ real_out.file, pos_out, len);
+ break;
+ }
+ revert_creds(old_cred);
+
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode_out), inode_out);
+
+ fdput(real_in);
+ fdput(real_out);
+
+ return ret;
+}
+
+static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
+ OVL_COPY);
+}
+
+static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+ OVL_CLONE);
+}
+
+static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, u64 len)
+{
+ /*
+ * Don't copy up because of a dedupe request, this wouldn't make sense
+ * most of the time (data would be duplicated instead of deduplicated).
+ */
+ if (!ovl_inode_upper(file_inode(file_in)) ||
+ !ovl_inode_upper(file_inode(file_out)))
+ return -EPERM;
+
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
+ OVL_DEDUPE);
+}
+
+const struct file_operations ovl_file_operations = {
+ .open = ovl_open,
+ .release = ovl_release,
+ .llseek = ovl_llseek,
+ .read_iter = ovl_read_iter,
+ .write_iter = ovl_write_iter,
+ .fsync = ovl_fsync,
+ .mmap = ovl_mmap,
+ .fallocate = ovl_fallocate,
+ .unlocked_ioctl = ovl_ioctl,
+ .compat_ioctl = ovl_compat_ioctl,
+
+ .copy_file_range = ovl_copy_file_range,
+ .clone_file_range = ovl_clone_file_range,
+ .dedupe_file_range = ovl_dedupe_file_range,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ed16a898caeb..e0bb217c01e2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -19,18 +19,10 @@
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
{
int err;
+ bool full_copy_up = false;
struct dentry *upperdentry;
const struct cred *old_cred;
- /*
- * Check for permissions before trying to copy-up. This is redundant
- * since it will be rechecked later by ->setattr() on upper dentry. But
- * without this, copy-up can be triggered by just about anybody.
- *
- * We don't initialize inode->size, which just means that
- * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
- * check for a swapfile (which this won't be anyway).
- */
err = setattr_prepare(dentry, attr);
if (err)
return err;
@@ -39,10 +31,33 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
goto out;
- err = ovl_copy_up(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+ err = -ETXTBSY;
+ if (atomic_read(&realinode->i_writecount) < 0)
+ goto out_drop_write;
+
+ /* Truncate should trigger data copy up as well */
+ full_copy_up = true;
+ }
+
+ if (!full_copy_up)
+ err = ovl_copy_up(dentry);
+ else
+ err = ovl_copy_up_with_data(dentry);
if (!err) {
+ struct inode *winode = NULL;
+
upperdentry = ovl_dentry_upper(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ winode = d_inode(upperdentry);
+ err = get_write_access(winode);
+ if (err)
+ goto out_drop_write;
+ }
+
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
attr->ia_valid &= ~ATTR_MODE;
@@ -53,7 +68,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+
+ if (winode)
+ put_write_access(winode);
}
+out_drop_write:
ovl_drop_write(dentry);
out:
return err;
@@ -133,6 +152,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
bool samefs = ovl_same_sb(dentry->d_sb);
struct ovl_layer *lower_layer = NULL;
int err;
+ bool metacopy_blocks = false;
+
+ metacopy_blocks = ovl_is_metacopy_dentry(dentry);
type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
@@ -154,7 +176,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
lower_layer = ovl_layer_lower(dentry);
} else if (OVL_TYPE_ORIGIN(type)) {
struct kstat lowerstat;
- u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
+ u32 lowermask = STATX_INO | STATX_BLOCKS |
+ (!is_dir ? STATX_NLINK : 0);
ovl_path_lower(dentry, &realpath);
err = vfs_getattr(&realpath, &lowerstat,
@@ -183,6 +206,35 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
stat->ino = lowerstat.ino;
lower_layer = ovl_layer_lower(dentry);
}
+
+ /*
+ * If we are querying a metacopy dentry and lower
+ * dentry is data dentry, then use the blocks we
+ * queried just now. We don't have to do additional
+ * vfs_getattr(). If lower itself is metacopy, then
+ * additional vfs_getattr() is unavoidable.
+ */
+ if (metacopy_blocks &&
+ realpath.dentry == ovl_dentry_lowerdata(dentry)) {
+ stat->blocks = lowerstat.blocks;
+ metacopy_blocks = false;
+ }
+ }
+
+ if (metacopy_blocks) {
+ /*
+ * If lower is not same as lowerdata or if there was
+ * no origin on upper, we can end up here.
+ */
+ struct kstat lowerdatastat;
+ u32 lowermask = STATX_BLOCKS;
+
+ ovl_path_lowerdata(dentry, &realpath);
+ err = vfs_getattr(&realpath, &lowerdatastat,
+ lowermask, flags);
+ if (err)
+ goto out;
+ stat->blocks = lowerdatastat.blocks;
}
}
@@ -304,6 +356,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
}
revert_creds(old_cred);
+ /* copy c/mtime */
+ ovl_copyattr(d_inode(realdentry), inode);
+
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -384,38 +439,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
-static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
-{
- /* Copy up of disconnected dentry does not set upper alias */
- if (ovl_dentry_upper(dentry) &&
- (ovl_dentry_has_upper_alias(dentry) ||
- (dentry->d_flags & DCACHE_DISCONNECTED)))
- return false;
-
- if (special_file(d_inode(dentry)->i_mode))
- return false;
-
- if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
- return false;
-
- return true;
-}
-
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
-{
- int err = 0;
-
- if (ovl_open_need_copy_up(dentry, file_flags)) {
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up_flags(dentry, file_flags);
- ovl_drop_write(dentry);
- }
- }
-
- return err;
-}
-
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
{
if (flags & S_ATIME) {
@@ -433,6 +456,23 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
return 0;
}
+static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ int err;
+ struct inode *realinode = ovl_inode_real(inode);
+ const struct cred *old_cred;
+
+ if (!realinode->i_op->fiemap)
+ return -EOPNOTSUPP;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
+ revert_creds(old_cred);
+
+ return err;
+}
+
static const struct inode_operations ovl_file_inode_operations = {
.setattr = ovl_setattr,
.permission = ovl_permission,
@@ -440,6 +480,7 @@ static const struct inode_operations ovl_file_inode_operations = {
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
+ .fiemap = ovl_fiemap,
};
static const struct inode_operations ovl_symlink_inode_operations = {
@@ -450,6 +491,15 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.update_time = ovl_update_time,
};
+static const struct inode_operations ovl_special_inode_operations = {
+ .setattr = ovl_setattr,
+ .permission = ovl_permission,
+ .getattr = ovl_getattr,
+ .listxattr = ovl_listxattr,
+ .get_acl = ovl_get_acl,
+ .update_time = ovl_update_time,
+};
+
/*
* It is possible to stack overlayfs instance on top of another
* overlayfs instance as lower layer. We need to annonate the
@@ -520,6 +570,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
+ inode->i_fop = &ovl_file_operations;
break;
case S_IFDIR:
@@ -532,7 +583,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
break;
default:
- inode->i_op = &ovl_file_inode_operations;
+ inode->i_op = &ovl_special_inode_operations;
init_special_inode(inode, mode, rdev);
break;
}
@@ -769,8 +820,9 @@ struct inode *ovl_get_inode(struct super_block *sb,
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
- bool is_dir;
+ bool is_dir, metacopy = false;
unsigned long ino = 0;
+ int err = -ENOMEM;
if (!realinode)
realinode = d_inode(lowerdentry);
@@ -787,7 +839,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
inode = ovl_iget5(sb, oip->newinode, key);
if (!inode)
- goto out_nomem;
+ goto out_err;
if (!(inode->i_state & I_NEW)) {
/*
* Verify that the underlying files stored in the inode
@@ -796,11 +848,12 @@ struct inode *ovl_get_inode(struct super_block *sb,
if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
true)) {
iput(inode);
- inode = ERR_PTR(-ESTALE);
- goto out;
+ err = -ESTALE;
+ goto out_err;
}
dput(upperdentry);
+ kfree(oip->redirect);
goto out;
}
@@ -812,11 +865,13 @@ struct inode *ovl_get_inode(struct super_block *sb,
} else {
/* Lower hardlink that will be broken on copy up */
inode = new_inode(sb);
- if (!inode)
- goto out_nomem;
+ if (!inode) {
+ err = -ENOMEM;
+ goto out_err;
+ }
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
- ovl_inode_init(inode, upperdentry, lowerdentry);
+ ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
if (upperdentry && ovl_is_impuredir(upperdentry))
ovl_set_flag(OVL_IMPURE, inode);
@@ -824,6 +879,20 @@ struct inode *ovl_get_inode(struct super_block *sb,
if (oip->index)
ovl_set_flag(OVL_INDEX, inode);
+ if (upperdentry) {
+ err = ovl_check_metacopy_xattr(upperdentry);
+ if (err < 0)
+ goto out_err;
+ metacopy = err;
+ if (!metacopy)
+ ovl_set_flag(OVL_UPPERDATA, inode);
+ }
+
+ OVL_I(inode)->redirect = oip->redirect;
+
+ if (bylower)
+ ovl_set_flag(OVL_CONST_INO, inode);
+
/* Check for non-merge dir that may have whiteouts */
if (is_dir) {
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
@@ -837,7 +906,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
out:
return inode;
-out_nomem:
- inode = ERR_PTR(-ENOMEM);
+out_err:
+ inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c993dd8db739..f28711846dd6 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -24,38 +24,20 @@ struct ovl_lookup_data {
bool stop;
bool last;
char *redirect;
+ bool metacopy;
};
static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
size_t prelen, const char *post)
{
int res;
- char *s, *next, *buf = NULL;
+ char *buf;
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
- if (res < 0) {
- if (res == -ENODATA || res == -EOPNOTSUPP)
- return 0;
- goto fail;
- }
- buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
+ buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
+ if (IS_ERR_OR_NULL(buf))
+ return PTR_ERR(buf);
- if (res == 0)
- goto invalid;
-
- res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
- if (res < 0)
- goto fail;
- if (res == 0)
- goto invalid;
if (buf[0] == '/') {
- for (s = buf; *s++ == '/'; s = next) {
- next = strchrnul(s, '/');
- if (s == next)
- goto invalid;
- }
/*
* One of the ancestor path elements in an absolute path
* lookup in ovl_lookup_layer() could have been opaque and
@@ -66,9 +48,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
*/
d->stop = false;
} else {
- if (strchr(buf, '/') != NULL)
- goto invalid;
-
+ res = strlen(buf) + 1;
memmove(buf + prelen, buf, res);
memcpy(buf, d->name.name, prelen);
}
@@ -80,16 +60,6 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
d->name.len = strlen(d->redirect);
return 0;
-
-err_free:
- kfree(buf);
- return 0;
-fail:
- pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
- goto err_free;
-invalid:
- pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
- goto err_free;
}
static int ovl_acceptable(void *ctx, struct dentry *dentry)
@@ -252,28 +222,39 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
d->stop = d->opaque = true;
goto put_and_out;
}
- if (!d_can_lookup(this)) {
+ /*
+ * This dentry should be a regular file if previous layer lookup
+ * found a metacopy dentry.
+ */
+ if (last_element && d->metacopy && !d_is_reg(this)) {
d->stop = true;
- if (d->is_dir)
- goto put_and_out;
-
- /*
- * NB: handle failure to lookup non-last element when non-dir
- * redirects become possible
- */
- WARN_ON(!last_element);
- goto out;
+ goto put_and_out;
}
- if (last_element)
- d->is_dir = true;
- if (d->last)
- goto out;
+ if (!d_can_lookup(this)) {
+ if (d->is_dir || !last_element) {
+ d->stop = true;
+ goto put_and_out;
+ }
+ err = ovl_check_metacopy_xattr(this);
+ if (err < 0)
+ goto out_err;
- if (ovl_is_opaquedir(this)) {
- d->stop = true;
+ d->metacopy = err;
+ d->stop = !d->metacopy;
+ if (!d->metacopy || d->last)
+ goto out;
+ } else {
if (last_element)
- d->opaque = true;
- goto out;
+ d->is_dir = true;
+ if (d->last)
+ goto out;
+
+ if (ovl_is_opaquedir(this)) {
+ d->stop = true;
+ if (last_element)
+ d->opaque = true;
+ goto out;
+ }
}
err = ovl_check_redirect(this, d, prelen, post);
if (err)
@@ -823,7 +804,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
- struct ovl_path *stack = NULL;
+ struct ovl_path *stack = NULL, *origin_path = NULL;
struct dentry *upperdir, *upperdentry = NULL;
struct dentry *origin = NULL;
struct dentry *index = NULL;
@@ -834,6 +815,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
struct dentry *this;
unsigned int i;
int err;
+ bool metacopy = false;
struct ovl_lookup_data d = {
.name = dentry->d_name,
.is_dir = false,
@@ -841,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
.stop = false,
.last = ofs->config.redirect_follow ? false : !poe->numlower,
.redirect = NULL,
+ .metacopy = false,
};
if (dentry->d_name.len > ofs->namelen)
@@ -859,7 +842,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
if (upperdentry && !d.is_dir) {
- BUG_ON(!d.stop || d.redirect);
+ unsigned int origin_ctr = 0;
+
/*
* Lookup copy up origin by decoding origin file handle.
* We may get a disconnected dentry, which is fine,
@@ -870,9 +854,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* number - it's the same as if we held a reference
* to a dentry in lower layer that was moved under us.
*/
- err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
+ err = ovl_check_origin(ofs, upperdentry, &origin_path,
+ &origin_ctr);
if (err)
goto out_put_upper;
+
+ if (d.metacopy)
+ metacopy = true;
}
if (d.redirect) {
@@ -913,7 +901,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* If no origin fh is stored in upper of a merge dir, store fh
* of lower dir and set upper parent "impure".
*/
- if (upperdentry && !ctr && !ofs->noxattr) {
+ if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
err = ovl_fix_origin(dentry, this, upperdentry);
if (err) {
dput(this);
@@ -925,18 +913,35 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* When "verify_lower" feature is enabled, do not merge with a
* lower dir that does not match a stored origin xattr. In any
* case, only verified origin is used for index lookup.
+ *
+ * For non-dir dentry, if index=on, then ensure origin
+ * matches the dentry found using path based lookup,
+ * otherwise error out.
*/
- if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
+ if (upperdentry && !ctr &&
+ ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
+ (!d.is_dir && ofs->config.index && origin_path))) {
err = ovl_verify_origin(upperdentry, this, false);
if (err) {
dput(this);
- break;
+ if (d.is_dir)
+ break;
+ goto out_put;
}
-
- /* Bless lower dir as verified origin */
origin = this;
}
+ if (d.metacopy)
+ metacopy = true;
+ /*
+ * Do not store intermediate metacopy dentries in chain,
+ * except top most lower metacopy dentry
+ */
+ if (d.metacopy && ctr) {
+ dput(this);
+ continue;
+ }
+
stack[ctr].dentry = this;
stack[ctr].layer = lower.layer;
ctr++;
@@ -968,13 +973,48 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
+ if (metacopy) {
+ /*
+ * Found a metacopy dentry but did not find corresponding
+ * data dentry
+ */
+ if (d.metacopy) {
+ err = -EIO;
+ goto out_put;
+ }
+
+ err = -EPERM;
+ if (!ofs->config.metacopy) {
+ pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
+ dentry);
+ goto out_put;
+ }
+ } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
+ if (WARN_ON(stack != NULL)) {
+ err = -EIO;
+ goto out_put;
+ }
+ stack = origin_path;
+ ctr = 1;
+ origin_path = NULL;
+ }
+
/*
* Lookup index by lower inode and verify it matches upper inode.
* We only trust dir index if we verified that lower dir matches
* origin, otherwise dir index entries may be inconsistent and we
- * ignore them. Always lookup index of non-dir and non-upper.
+ * ignore them.
+ *
+ * For non-dir upper metacopy dentry, we already set "origin" if we
+ * verified that lower matched upper origin. If upper origin was
+ * not present (because lower layer did not support fh encode/decode),
+ * or indexing is not enabled, do not set "origin" and skip looking up
+ * index. This case should be handled in same way as a non-dir upper
+ * without ORIGIN is handled.
+ *
+ * Always lookup index of non-dir non-metacopy and non-upper.
*/
- if (ctr && (!upperdentry || !d.is_dir))
+ if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
origin = stack[0].dentry;
if (origin && ovl_indexdir(dentry->d_sb) &&
@@ -1000,8 +1040,15 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (upperdentry)
ovl_dentry_set_upper_alias(dentry);
- else if (index)
+ else if (index) {
upperdentry = dget(index);
+ upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
+ if (IS_ERR(upperredirect)) {
+ err = PTR_ERR(upperredirect);
+ upperredirect = NULL;
+ goto out_free_oe;
+ }
+ }
if (upperdentry || ctr) {
struct ovl_inode_params oip = {
@@ -1009,22 +1056,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
.lowerpath = stack,
.index = index,
.numlower = ctr,
+ .redirect = upperredirect,
+ .lowerdata = (ctr > 1 && !d.is_dir) ?
+ stack[ctr - 1].dentry : NULL,
};
inode = ovl_get_inode(dentry->d_sb, &oip);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_free_oe;
-
- /*
- * NB: handle redirected hard links when non-dir redirects
- * become possible
- */
- WARN_ON(OVL_I(inode)->redirect);
- OVL_I(inode)->redirect = upperredirect;
}
revert_creds(old_cred);
+ if (origin_path) {
+ dput(origin_path->dentry);
+ kfree(origin_path);
+ }
dput(index);
kfree(stack);
kfree(d.redirect);
@@ -1039,6 +1086,10 @@ out_put:
dput(stack[i].dentry);
kfree(stack);
out_put_upper:
+ if (origin_path) {
+ dput(origin_path->dentry);
+ kfree(origin_path);
+ }
dput(upperdentry);
kfree(upperredirect);
out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7538b9b56237..f61839e1054c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/uuid.h>
+#include <linux/fs.h>
#include "ovl_entry.h"
enum ovl_path_type {
@@ -28,6 +29,7 @@ enum ovl_path_type {
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
+#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
enum ovl_inode_flag {
/* Pure upper dir that may contain non pure upper entries */
@@ -35,6 +37,9 @@ enum ovl_inode_flag {
/* Non-merge dir that may contain whiteout entries */
OVL_WHITEOUTS,
OVL_INDEX,
+ OVL_UPPERDATA,
+ /* Inode number will remain constant over copy up. */
+ OVL_CONST_INO,
};
enum ovl_entry_flag {
@@ -190,6 +195,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
return ret;
}
+static inline bool ovl_open_flags_need_copy_up(int flags)
+{
+ if (!flags)
+ return false;
+
+ return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
+}
+
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
@@ -206,15 +219,19 @@ bool ovl_dentry_weird(struct dentry *dentry);
enum ovl_path_type ovl_path_type(struct dentry *dentry);
void ovl_path_upper(struct dentry *dentry, struct path *path);
void ovl_path_lower(struct dentry *dentry, struct path *path);
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
struct dentry *ovl_i_dentry_upper(struct inode *inode);
struct inode *ovl_inode_upper(struct inode *inode);
struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_lowerdata(struct inode *inode);
struct inode *ovl_inode_real(struct inode *inode);
+struct inode *ovl_inode_realdata(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
@@ -225,18 +242,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
void ovl_dentry_set_upper_alias(struct dentry *dentry);
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
+bool ovl_has_upperdata(struct inode *inode);
+void ovl_set_upperdata(struct inode *inode);
bool ovl_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry);
+ struct dentry *lowerdentry, struct dentry *lowerdata);
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
-void ovl_dentry_version_inc(struct dentry *dentry, bool impurity);
+void ovl_dir_modified(struct dentry *dentry, bool impurity);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
-int ovl_copy_up_start(struct dentry *dentry);
+int ovl_copy_up_start(struct dentry *dentry, int flags);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_already_copied_up(struct dentry *dentry, int flags);
bool ovl_check_origin_xattr(struct dentry *dentry);
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
@@ -252,6 +274,9 @@ bool ovl_need_index(struct dentry *dentry);
int ovl_nlink_start(struct dentry *dentry, bool *locked);
void ovl_nlink_end(struct dentry *dentry, bool locked);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
+int ovl_check_metacopy_xattr(struct dentry *dentry);
+bool ovl_is_metacopy_dentry(struct dentry *dentry);
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
static inline bool ovl_is_impuredir(struct dentry *dentry)
{
@@ -324,7 +349,6 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
bool ovl_is_private_xattr(const char *name);
@@ -334,6 +358,8 @@ struct ovl_inode_params {
struct ovl_path *lowerpath;
struct dentry *index;
unsigned int numlower;
+ char *redirect;
+ struct dentry *lowerdata;
};
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
@@ -348,6 +374,14 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
to->i_atime = from->i_atime;
to->i_mtime = from->i_mtime;
to->i_ctime = from->i_ctime;
+ i_size_write(to, i_size_read(from));
+}
+
+static inline void ovl_copyflags(struct inode *from, struct inode *to)
+{
+ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
+
+ inode_set_flags(to, from->i_flags & mask, mask);
}
/* dir.c */
@@ -368,9 +402,14 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
int ovl_cleanup(struct inode *dir, struct dentry *dentry);
struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
+int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 41655a7d6894..ec237035333a 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -19,6 +19,7 @@ struct ovl_config {
bool index;
bool nfs_export;
int xino;
+ bool metacopy;
};
struct ovl_sb {
@@ -88,7 +89,10 @@ static inline struct ovl_entry *OVL_E(struct dentry *dentry)
}
struct ovl_inode {
- struct ovl_dir_cache *cache;
+ union {
+ struct ovl_dir_cache *cache; /* directory */
+ struct inode *lowerdata; /* regular file */
+ };
const char *redirect;
u64 version;
unsigned long flags;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index ef1fe42ff7bb..cc8303a806b4 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -668,6 +668,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
}
+static bool ovl_is_impure_dir(struct file *file)
+{
+ struct ovl_dir_file *od = file->private_data;
+ struct inode *dir = d_inode(file->f_path.dentry);
+
+ /*
+ * Only upper dir can be impure, but if we are in the middle of
+ * iterating a lower real dir, dir could be copied up and marked
+ * impure. We only want the impure cache if we started iterating
+ * a real upper dir to begin with.
+ */
+ return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
+
+}
+
static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
{
int err;
@@ -696,7 +711,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
rdt.parent_ino = stat.ino;
}
- if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) {
+ if (ovl_is_impure_dir(file)) {
rdt.cache = ovl_cache_get_impure(&file->f_path);
if (IS_ERR(rdt.cache))
return PTR_ERR(rdt.cache);
@@ -727,7 +742,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
*/
if (ovl_xino_bits(dentry->d_sb) ||
(ovl_same_sb(dentry->d_sb) &&
- (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
+ (ovl_is_impure_dir(file) ||
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
return ovl_iterate_real(file, ctx);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 704b37311467..2e0fc93c2c06 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe)
dput(oe->lowerstack[i].dentry);
}
+static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
+module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
+MODULE_PARM_DESC(ovl_metacopy_def,
+ "Default to on or off for the metadata only copy up feature");
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -74,31 +79,14 @@ static void ovl_dentry_release(struct dentry *dentry)
}
}
-static int ovl_check_append_only(struct inode *inode, int flag)
-{
- /*
- * This test was moot in vfs may_open() because overlay inode does
- * not have the S_APPEND flag, so re-check on real upper inode
- */
- if (IS_APPEND(inode)) {
- if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
- return -EPERM;
- if (flag & O_TRUNC)
- return -EPERM;
- }
-
- return 0;
-}
-
static struct dentry *ovl_d_real(struct dentry *dentry,
- const struct inode *inode,
- unsigned int open_flags, unsigned int flags)
+ const struct inode *inode)
{
struct dentry *real;
- int err;
- if (flags & D_REAL_UPPER)
- return ovl_dentry_upper(dentry);
+ /* It's an overlay file */
+ if (inode && d_inode(dentry) == inode)
+ return dentry;
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
@@ -106,28 +94,19 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
goto bug;
}
- if (open_flags) {
- err = ovl_open_maybe_copy_up(dentry, open_flags);
- if (err)
- return ERR_PTR(err);
- }
-
real = ovl_dentry_upper(dentry);
- if (real && (!inode || inode == d_inode(real))) {
- if (!inode) {
- err = ovl_check_append_only(d_inode(real), open_flags);
- if (err)
- return ERR_PTR(err);
- }
+ if (real && (inode == d_inode(real)))
+ return real;
+
+ if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
return real;
- }
- real = ovl_dentry_lower(dentry);
+ real = ovl_dentry_lowerdata(dentry);
if (!real)
goto bug;
/* Handle recursion */
- real = d_real(real, inode, open_flags, 0);
+ real = d_real(real, inode);
if (!inode || inode == d_inode(real))
return real;
@@ -205,6 +184,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
oi->flags = 0;
oi->__upperdentry = NULL;
oi->lower = NULL;
+ oi->lowerdata = NULL;
mutex_init(&oi->lock);
return &oi->vfs_inode;
@@ -223,8 +203,11 @@ static void ovl_destroy_inode(struct inode *inode)
dput(oi->__upperdentry);
iput(oi->lower);
+ if (S_ISDIR(inode->i_mode))
+ ovl_dir_cache_free(inode);
+ else
+ iput(oi->lowerdata);
kfree(oi->redirect);
- ovl_dir_cache_free(inode);
mutex_destroy(&oi->lock);
call_rcu(&inode->i_rcu, ovl_i_callback);
@@ -376,6 +359,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
"on" : "off");
if (ofs->config.xino != ovl_xino_def())
seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
+ if (ofs->config.metacopy != ovl_metacopy_def)
+ seq_printf(m, ",metacopy=%s",
+ ofs->config.metacopy ? "on" : "off");
return 0;
}
@@ -413,6 +399,8 @@ enum {
OPT_XINO_ON,
OPT_XINO_OFF,
OPT_XINO_AUTO,
+ OPT_METACOPY_ON,
+ OPT_METACOPY_OFF,
OPT_ERR,
};
@@ -429,6 +417,8 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_ON, "xino=on"},
{OPT_XINO_OFF, "xino=off"},
{OPT_XINO_AUTO, "xino=auto"},
+ {OPT_METACOPY_ON, "metacopy=on"},
+ {OPT_METACOPY_OFF, "metacopy=off"},
{OPT_ERR, NULL}
};
@@ -481,6 +471,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
+ int err;
config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
if (!config->redirect_mode)
@@ -555,6 +546,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->xino = OVL_XINO_AUTO;
break;
+ case OPT_METACOPY_ON:
+ config->metacopy = true;
+ break;
+
+ case OPT_METACOPY_OFF:
+ config->metacopy = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -569,7 +568,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->workdir = NULL;
}
- return ovl_parse_redirect_mode(config, config->redirect_mode);
+ err = ovl_parse_redirect_mode(config, config->redirect_mode);
+ if (err)
+ return err;
+
+ /* metacopy feature with upper requires redirect_dir=on */
+ if (config->upperdir && config->metacopy && !config->redirect_dir) {
+ pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
+ config->metacopy = false;
+ } else if (config->metacopy && !config->redirect_follow) {
+ pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
+ config->metacopy = false;
+ }
+
+ return 0;
}
#define OVL_WORKDIR_NAME "work"
@@ -1042,7 +1054,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
if (err) {
ofs->noxattr = true;
ofs->config.index = false;
- pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
+ ofs->config.metacopy = false;
+ pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
err = 0;
} else {
vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
@@ -1064,7 +1077,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
-
out:
mnt_drop_write(mnt);
return err;
@@ -1375,6 +1387,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ofs->config.index = ovl_index_def;
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
+ ofs->config.metacopy = ovl_metacopy_def;
err = ovl_parse_opt((char *) data, &ofs->config);
if (err)
goto out_err;
@@ -1445,6 +1458,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
}
}
+ if (ofs->config.metacopy && ofs->config.nfs_export) {
+ pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
+ ofs->config.nfs_export = false;
+ }
+
if (ofs->config.nfs_export)
sb->s_export_op = &ovl_export_operations;
@@ -1455,7 +1473,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ovl_super_operations;
sb->s_xattr = ovl_xattr_handlers;
sb->s_fs_info = ofs;
- sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
+ sb->s_flags |= SB_POSIXACL;
err = -ENOMEM;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
@@ -1474,8 +1492,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
/* Root is always merge -> can have whiteouts */
ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
+ ovl_set_upperdata(d_inode(root_dentry));
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
- ovl_dentry_lower(root_dentry));
+ ovl_dentry_lower(root_dentry), NULL);
sb->s_root = root_dentry;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 6f1078028c66..8cfb62cc8672 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -133,8 +133,10 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
* Non-dir dentry can hold lower dentry of its copy up origin.
*/
if (oe->numlower) {
- type |= __OVL_PATH_ORIGIN;
- if (d_is_dir(dentry))
+ if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
+ type |= __OVL_PATH_ORIGIN;
+ if (d_is_dir(dentry) ||
+ !ovl_has_upperdata(d_inode(dentry)))
type |= __OVL_PATH_MERGE;
}
} else {
@@ -164,6 +166,18 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
}
}
+void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe->numlower) {
+ path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
+ path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
+ } else {
+ *path = (struct path) { };
+ }
+}
+
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
{
enum ovl_path_type type = ovl_path_type(dentry);
@@ -195,6 +209,19 @@ struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
return oe->numlower ? oe->lowerstack[0].layer : NULL;
}
+/*
+ * ovl_dentry_lower() could return either a data dentry or metacopy dentry
+ * dependig on what is stored in lowerstack[0]. At times we need to find
+ * lower dentry which has data (and not metacopy dentry). This helper
+ * returns the lower data dentry.
+ */
+struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
+}
+
struct dentry *ovl_dentry_real(struct dentry *dentry)
{
return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
@@ -222,6 +249,26 @@ struct inode *ovl_inode_real(struct inode *inode)
return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
+/* Return inode which contains lower data. Do not return metacopy */
+struct inode *ovl_inode_lowerdata(struct inode *inode)
+{
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ return NULL;
+
+ return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
+}
+
+/* Return real inode which contains data. Does not return metacopy inode */
+struct inode *ovl_inode_realdata(struct inode *inode)
+{
+ struct inode *upperinode;
+
+ upperinode = ovl_inode_upper(inode);
+ if (upperinode && ovl_has_upperdata(inode))
+ return upperinode;
+
+ return ovl_inode_lowerdata(inode);
+}
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
{
@@ -279,6 +326,62 @@ void ovl_dentry_set_upper_alias(struct dentry *dentry)
ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
}
+static bool ovl_should_check_upperdata(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+
+ if (!ovl_inode_lower(inode))
+ return false;
+
+ return true;
+}
+
+bool ovl_has_upperdata(struct inode *inode)
+{
+ if (!ovl_should_check_upperdata(inode))
+ return true;
+
+ if (!ovl_test_flag(OVL_UPPERDATA, inode))
+ return false;
+ /*
+ * Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
+ * ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
+ * if setting of OVL_UPPERDATA is visible, then effects of writes
+ * before that are visible too.
+ */
+ smp_rmb();
+ return true;
+}
+
+void ovl_set_upperdata(struct inode *inode)
+{
+ /*
+ * Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
+ * if OVL_UPPERDATA flag is visible, then effects of write operations
+ * before it are visible as well.
+ */
+ smp_wmb();
+ ovl_set_flag(OVL_UPPERDATA, inode);
+}
+
+/* Caller should hold ovl_inode->lock */
+bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
+{
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
+}
+
+bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
+{
+ if (!ovl_open_flags_need_copy_up(flags))
+ return false;
+
+ return !ovl_has_upperdata(d_inode(dentry));
+}
+
bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
@@ -300,7 +403,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
}
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
- struct dentry *lowerdentry)
+ struct dentry *lowerdentry, struct dentry *lowerdata)
{
struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
@@ -308,8 +411,11 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
OVL_I(inode)->__upperdentry = upperdentry;
if (lowerdentry)
OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
+ if (lowerdata)
+ OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
ovl_copyattr(realinode, inode);
+ ovl_copyflags(realinode, inode);
if (!inode->i_ino)
inode->i_ino = realinode->i_ino;
}
@@ -333,7 +439,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
}
}
-void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
+static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
{
struct inode *inode = d_inode(dentry);
@@ -348,6 +454,14 @@ void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
OVL_I(inode)->version++;
}
+void ovl_dir_modified(struct dentry *dentry, bool impurity)
+{
+ /* Copy mtime/ctime */
+ ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
+
+ ovl_dentry_version_inc(dentry, impurity);
+}
+
u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
@@ -368,13 +482,51 @@ struct file *ovl_path_open(struct path *path, int flags)
return dentry_open(path, flags | O_NOATIME, current_cred());
}
-int ovl_copy_up_start(struct dentry *dentry)
+/* Caller should hold ovl_inode->lock */
+static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
+{
+ bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+ if (ovl_dentry_upper(dentry) &&
+ (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+ !ovl_dentry_needs_data_copy_up_locked(dentry, flags))
+ return true;
+
+ return false;
+}
+
+bool ovl_already_copied_up(struct dentry *dentry, int flags)
+{
+ bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
+
+ /*
+ * Check if copy-up has happened as well as for upper alias (in
+ * case of hard links) is there.
+ *
+ * Both checks are lockless:
+ * - false negatives: will recheck under oi->lock
+ * - false positives:
+ * + ovl_dentry_upper() uses memory barriers to ensure the
+ * upper dentry is up-to-date
+ * + ovl_dentry_has_upper_alias() relies on locking of
+ * upper parent i_rwsem to prevent reordering copy-up
+ * with rename.
+ */
+ if (ovl_dentry_upper(dentry) &&
+ (ovl_dentry_has_upper_alias(dentry) || disconnected) &&
+ !ovl_dentry_needs_data_copy_up(dentry, flags))
+ return true;
+
+ return false;
+}
+
+int ovl_copy_up_start(struct dentry *dentry, int flags)
{
struct ovl_inode *oi = OVL_I(d_inode(dentry));
int err;
err = mutex_lock_interruptible(&oi->lock);
- if (!err && ovl_dentry_has_upper_alias(dentry)) {
+ if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
mutex_unlock(&oi->lock);
}
@@ -675,3 +827,91 @@ err:
pr_err("overlayfs: failed to lock workdir+upperdir\n");
return -EIO;
}
+
+/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
+int ovl_check_metacopy_xattr(struct dentry *dentry)
+{
+ int res;
+
+ /* Only regular files can have metacopy xattr */
+ if (!S_ISREG(d_inode(dentry)->i_mode))
+ return 0;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return 0;
+ goto out;
+ }
+
+ return 1;
+out:
+ pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
+ return res;
+}
+
+bool ovl_is_metacopy_dentry(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (!d_is_reg(dentry))
+ return false;
+
+ if (ovl_dentry_upper(dentry)) {
+ if (!ovl_has_upperdata(d_inode(dentry)))
+ return true;
+ return false;
+ }
+
+ return (oe->numlower > 1);
+}
+
+char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
+{
+ int res;
+ char *s, *next, *buf = NULL;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return NULL;
+ goto fail;
+ }
+
+ buf = kzalloc(res + padding + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ if (res == 0)
+ goto invalid;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
+ if (res < 0)
+ goto fail;
+ if (res == 0)
+ goto invalid;
+
+ if (buf[0] == '/') {
+ for (s = buf; *s++ == '/'; s = next) {
+ next = strchrnul(s, '/');
+ if (s == next)
+ goto invalid;
+ }
+ } else {
+ if (strchr(buf, '/') != NULL)
+ goto invalid;
+ }
+
+ return buf;
+
+err_free:
+ kfree(buf);
+ return ERR_PTR(res);
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
+ goto err_free;
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
+ res = -EINVAL;
+ goto err_free;
+}
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 0eaeb41453f5..817c02b13b1d 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,6 +31,7 @@ config PROC_FS
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
+ select CRASH_CORE
help
Provides a virtual ELF core file of the live kernel. This can
be read with gdb and other ELF tools. No modifications can be
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aaffc0c30216..ccf86f16d9f0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -463,7 +463,7 @@ static int lstats_show_proc(struct seq_file *m, void *v)
if (!task)
return -ESRCH;
seq_puts(m, "Latency Top version : v0.1\n");
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < LT_SAVECOUNT; i++) {
struct latency_record *lr = &task->latency_record[i];
if (lr->backtrace[0]) {
int q;
@@ -1366,10 +1366,8 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
if (!task)
return -ESRCH;
len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
- len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
put_task_struct(task);
-
- return len;
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
}
static const struct file_operations proc_fail_nth_operations = {
@@ -2519,47 +2517,47 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
+ struct task_struct *task;
void *page;
- ssize_t length;
- struct task_struct *task = get_proc_task(inode);
-
- length = -ESRCH;
- if (!task)
- goto out_no_task;
+ int rv;
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
/* A task may only write its own attributes. */
- length = -EACCES;
- if (current != task)
- goto out;
+ if (current != task) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
+ rcu_read_unlock();
if (count > PAGE_SIZE)
count = PAGE_SIZE;
/* No partial writes. */
- length = -EINVAL;
if (*ppos != 0)
- goto out;
+ return -EINVAL;
page = memdup_user(buf, count);
if (IS_ERR(page)) {
- length = PTR_ERR(page);
+ rv = PTR_ERR(page);
goto out;
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
- if (length < 0)
+ rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
+ if (rv < 0)
goto out_free;
- length = security_setprocattr(file->f_path.dentry->d_name.name,
- page, count);
+ rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
mutex_unlock(&current->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
- put_task_struct(task);
-out_no_task:
- return length;
+ return rv;
}
static const struct file_operations proc_pid_attr_operations = {
@@ -3309,12 +3307,12 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
- REG("maps", S_IRUGO, proc_tid_maps_operations),
+ REG("maps", S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_PROC_CHILDREN
REG("children", S_IRUGO, proc_tid_children_operations),
#endif
#ifdef CONFIG_NUMA
- REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
+ REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
@@ -3324,7 +3322,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
- REG("smaps", S_IRUGO, proc_tid_smaps_operations),
+ REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bb1c1625b158..8ae109429a88 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -286,9 +286,9 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
if (!dir_emit_dots(file, ctx))
return 0;
+ i = ctx->pos - 2;
read_lock(&proc_subdir_lock);
de = pde_subdir_first(de);
- i = ctx->pos - 2;
for (;;) {
if (!de) {
read_unlock(&proc_subdir_lock);
@@ -309,8 +309,8 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
pde_put(de);
return 0;
}
- read_lock(&proc_subdir_lock);
ctx->pos++;
+ read_lock(&proc_subdir_lock);
next = pde_subdir_next(de);
pde_put(de);
de = next;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 85ffbd27f288..fc5306a31a1d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -105,8 +105,10 @@ void __init proc_init_kmemcache(void)
kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
SLAB_ACCOUNT|SLAB_PANIC, NULL);
proc_dir_entry_cache = kmem_cache_create_usercopy(
- "proc_dir_entry", SIZEOF_PDE_SLOT, 0, SLAB_PANIC,
- OFFSETOF_PDE_NAME, SIZEOF_PDE_INLINE_NAME, NULL);
+ "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC,
+ offsetof(struct proc_dir_entry, inline_name),
+ SIZEOF_PDE_INLINE_NAME, NULL);
+ BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
}
static int proc_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index da3dbfa09e79..5185d7f6a51e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,16 +65,13 @@ struct proc_dir_entry {
char inline_name[];
} __randomize_layout;
-#define OFFSETOF_PDE_NAME offsetof(struct proc_dir_entry, inline_name)
-#define SIZEOF_PDE_SLOT \
- (OFFSETOF_PDE_NAME + 34 <= 64 ? 64 : \
- OFFSETOF_PDE_NAME + 34 <= 128 ? 128 : \
- OFFSETOF_PDE_NAME + 34 <= 192 ? 192 : \
- OFFSETOF_PDE_NAME + 34 <= 256 ? 256 : \
- OFFSETOF_PDE_NAME + 34 <= 512 ? 512 : \
- 0)
-
-#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE_SLOT - OFFSETOF_PDE_NAME)
+#define SIZEOF_PDE ( \
+ sizeof(struct proc_dir_entry) < 128 ? 128 : \
+ sizeof(struct proc_dir_entry) < 192 ? 192 : \
+ sizeof(struct proc_dir_entry) < 256 ? 256 : \
+ sizeof(struct proc_dir_entry) < 512 ? 512 : \
+ 0)
+#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
@@ -116,12 +113,12 @@ static inline void *__PDE_DATA(const struct inode *inode)
return PDE(inode)->data;
}
-static inline struct pid *proc_pid(struct inode *inode)
+static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
}
-static inline struct task_struct *get_proc_task(struct inode *inode)
+static inline struct task_struct *get_proc_task(const struct inode *inode)
{
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
@@ -285,7 +282,6 @@ struct proc_maps_private {
struct inode *inode;
struct task_struct *task;
struct mm_struct *mm;
- struct mem_size_stats *rollup;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
@@ -297,12 +293,9 @@ struct proc_maps_private {
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
extern const struct file_operations proc_pid_maps_operations;
-extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
-extern const struct file_operations proc_tid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
-extern const struct file_operations proc_tid_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e64ecb9f2720..ad72261ee3fe 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -10,6 +10,7 @@
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
+#include <linux/crash_core.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kcore.h>
@@ -49,32 +50,23 @@ static struct proc_dir_entry *proc_root_kcore;
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
#endif
-/* An ELF note in memory */
-struct memelfnote
-{
- const char *name;
- int type;
- unsigned int datasz;
- void *data;
-};
-
static LIST_HEAD(kclist_head);
-static DEFINE_RWLOCK(kclist_lock);
+static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+ int type)
{
new->addr = (unsigned long)addr;
new->size = size;
new->type = type;
- write_lock(&kclist_lock);
list_add_tail(&new->list, &kclist_head);
- write_unlock(&kclist_lock);
}
-static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
+static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
+ size_t *data_offset)
{
size_t try, size;
struct kcore_list *m;
@@ -88,53 +80,19 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
size = try;
*nphdr = *nphdr + 1;
}
- *elf_buflen = sizeof(struct elfhdr) +
- (*nphdr + 2)*sizeof(struct elf_phdr) +
- 3 * ((sizeof(struct elf_note)) +
- roundup(sizeof(CORE_STR), 4)) +
- roundup(sizeof(struct elf_prstatus), 4) +
- roundup(sizeof(struct elf_prpsinfo), 4) +
- roundup(arch_task_struct_size, 4);
- *elf_buflen = PAGE_ALIGN(*elf_buflen);
- return size + *elf_buflen;
-}
-
-static void free_kclist_ents(struct list_head *head)
-{
- struct kcore_list *tmp, *pos;
- list_for_each_entry_safe(pos, tmp, head, list) {
- list_del(&pos->list);
- kfree(pos);
- }
+ *phdrs_len = *nphdr * sizeof(struct elf_phdr);
+ *notes_len = (4 * sizeof(struct elf_note) +
+ 3 * ALIGN(sizeof(CORE_STR), 4) +
+ VMCOREINFO_NOTE_NAME_BYTES +
+ ALIGN(sizeof(struct elf_prstatus), 4) +
+ ALIGN(sizeof(struct elf_prpsinfo), 4) +
+ ALIGN(arch_task_struct_size, 4) +
+ ALIGN(vmcoreinfo_size, 4));
+ *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
+ *notes_len);
+ return *data_offset + size;
}
-/*
- * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
- */
-static void __kcore_update_ram(struct list_head *list)
-{
- int nphdr;
- size_t size;
- struct kcore_list *tmp, *pos;
- LIST_HEAD(garbage);
-
- write_lock(&kclist_lock);
- if (kcore_need_update) {
- list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
- if (pos->type == KCORE_RAM
- || pos->type == KCORE_VMEMMAP)
- list_move(&pos->list, &garbage);
- }
- list_splice_tail(list, &kclist_head);
- } else
- list_splice(list, &garbage);
- kcore_need_update = 0;
- proc_root_kcore->size = get_kcore_size(&nphdr, &size);
- write_unlock(&kclist_lock);
-
- free_kclist_ents(&garbage);
-}
-
#ifdef CONFIG_HIGHMEM
/*
@@ -142,11 +100,9 @@ static void __kcore_update_ram(struct list_head *list)
* because memory hole is not as big as !HIGHMEM case.
* (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
*/
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *head)
{
- LIST_HEAD(head);
struct kcore_list *ent;
- int ret = 0;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
@@ -154,9 +110,8 @@ static int kcore_update_ram(void)
ent->addr = (unsigned long)__va(0);
ent->size = max_low_pfn << PAGE_SHIFT;
ent->type = KCORE_RAM;
- list_add(&ent->list, &head);
- __kcore_update_ram(&head);
- return ret;
+ list_add(&ent->list, head);
+ return 0;
}
#else /* !CONFIG_HIGHMEM */
@@ -255,11 +210,10 @@ free_out:
return 1;
}
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *list)
{
int nid, ret;
unsigned long end_pfn;
- LIST_HEAD(head);
/* Not inialized....update now */
/* find out "max pfn" */
@@ -271,258 +225,258 @@ static int kcore_update_ram(void)
end_pfn = node_end;
}
/* scan 0 to max_pfn */
- ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
- if (ret) {
- free_kclist_ents(&head);
+ ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
+ if (ret)
return -ENOMEM;
- }
- __kcore_update_ram(&head);
- return ret;
+ return 0;
}
#endif /* CONFIG_HIGHMEM */
-/*****************************************************************************/
-/*
- * determine size of ELF note
- */
-static int notesize(struct memelfnote *en)
-{
- int sz;
-
- sz = sizeof(struct elf_note);
- sz += roundup((strlen(en->name) + 1), 4);
- sz += roundup(en->datasz, 4);
-
- return sz;
-} /* end notesize() */
-
-/*****************************************************************************/
-/*
- * store a note in the header buffer
- */
-static char *storenote(struct memelfnote *men, char *bufp)
+static int kcore_update_ram(void)
{
- struct elf_note en;
-
-#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
-
- en.n_namesz = strlen(men->name) + 1;
- en.n_descsz = men->datasz;
- en.n_type = men->type;
-
- DUMP_WRITE(&en, sizeof(en));
- DUMP_WRITE(men->name, en.n_namesz);
-
- /* XXX - cast from long long to long to avoid need for libgcc.a */
- bufp = (char*) roundup((unsigned long)bufp,4);
- DUMP_WRITE(men->data, men->datasz);
- bufp = (char*) roundup((unsigned long)bufp,4);
-
-#undef DUMP_WRITE
-
- return bufp;
-} /* end storenote() */
+ LIST_HEAD(list);
+ LIST_HEAD(garbage);
+ int nphdr;
+ size_t phdrs_len, notes_len, data_offset;
+ struct kcore_list *tmp, *pos;
+ int ret = 0;
-/*
- * store an ELF coredump header in the supplied buffer
- * nphdr is the number of elf_phdr to insert
- */
-static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
-{
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
- struct elf_phdr *nhdr, *phdr;
- struct elfhdr *elf;
- struct memelfnote notes[3];
- off_t offset = 0;
- struct kcore_list *m;
+ down_write(&kclist_lock);
+ if (!xchg(&kcore_need_update, 0))
+ goto out;
- /* setup ELF header */
- elf = (struct elfhdr *) bufp;
- bufp += sizeof(struct elfhdr);
- offset += sizeof(struct elfhdr);
- memcpy(elf->e_ident, ELFMAG, SELFMAG);
- elf->e_ident[EI_CLASS] = ELF_CLASS;
- elf->e_ident[EI_DATA] = ELF_DATA;
- elf->e_ident[EI_VERSION]= EV_CURRENT;
- elf->e_ident[EI_OSABI] = ELF_OSABI;
- memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
- elf->e_type = ET_CORE;
- elf->e_machine = ELF_ARCH;
- elf->e_version = EV_CURRENT;
- elf->e_entry = 0;
- elf->e_phoff = sizeof(struct elfhdr);
- elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
- elf->e_ehsize = sizeof(struct elfhdr);
- elf->e_phentsize= sizeof(struct elf_phdr);
- elf->e_phnum = nphdr;
- elf->e_shentsize= 0;
- elf->e_shnum = 0;
- elf->e_shstrndx = 0;
-
- /* setup ELF PT_NOTE program header */
- nhdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
- nhdr->p_type = PT_NOTE;
- nhdr->p_offset = 0;
- nhdr->p_vaddr = 0;
- nhdr->p_paddr = 0;
- nhdr->p_filesz = 0;
- nhdr->p_memsz = 0;
- nhdr->p_flags = 0;
- nhdr->p_align = 0;
-
- /* setup ELF PT_LOAD program header for every area */
- list_for_each_entry(m, &kclist_head, list) {
- phdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
-
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
- phdr->p_vaddr = (size_t)m->addr;
- if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
- phdr->p_paddr = __pa(m->addr);
- else
- phdr->p_paddr = (elf_addr_t)-1;
- phdr->p_filesz = phdr->p_memsz = m->size;
- phdr->p_align = PAGE_SIZE;
+ ret = kcore_ram_list(&list);
+ if (ret) {
+ /* Couldn't get the RAM list, try again next time. */
+ WRITE_ONCE(kcore_need_update, 1);
+ list_splice_tail(&list, &garbage);
+ goto out;
}
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
- nhdr->p_offset = offset;
-
- /* set up the process status */
- notes[0].name = CORE_STR;
- notes[0].type = NT_PRSTATUS;
- notes[0].datasz = sizeof(struct elf_prstatus);
- notes[0].data = &prstatus;
-
- memset(&prstatus, 0, sizeof(struct elf_prstatus));
-
- nhdr->p_filesz = notesize(&notes[0]);
- bufp = storenote(&notes[0], bufp);
-
- /* set up the process info */
- notes[1].name = CORE_STR;
- notes[1].type = NT_PRPSINFO;
- notes[1].datasz = sizeof(struct elf_prpsinfo);
- notes[1].data = &prpsinfo;
-
- memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
- prpsinfo.pr_state = 0;
- prpsinfo.pr_sname = 'R';
- prpsinfo.pr_zomb = 0;
-
- strcpy(prpsinfo.pr_fname, "vmlinux");
- strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
-
- nhdr->p_filesz += notesize(&notes[1]);
- bufp = storenote(&notes[1], bufp);
+ list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+ if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
+ list_move(&pos->list, &garbage);
+ }
+ list_splice_tail(&list, &kclist_head);
- /* set up the task structure */
- notes[2].name = CORE_STR;
- notes[2].type = NT_TASKSTRUCT;
- notes[2].datasz = arch_task_struct_size;
- notes[2].data = current;
+ proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, &notes_len,
+ &data_offset);
- nhdr->p_filesz += notesize(&notes[2]);
- bufp = storenote(&notes[2], bufp);
+out:
+ up_write(&kclist_lock);
+ list_for_each_entry_safe(pos, tmp, &garbage, list) {
+ list_del(&pos->list);
+ kfree(pos);
+ }
+ return ret;
+}
-} /* end elf_kcore_store_hdr() */
+static void append_kcore_note(char *notes, size_t *i, const char *name,
+ unsigned int type, const void *desc,
+ size_t descsz)
+{
+ struct elf_note *note = (struct elf_note *)&notes[*i];
+
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = descsz;
+ note->n_type = type;
+ *i += sizeof(*note);
+ memcpy(&notes[*i], name, note->n_namesz);
+ *i = ALIGN(*i + note->n_namesz, 4);
+ memcpy(&notes[*i], desc, descsz);
+ *i = ALIGN(*i + descsz, 4);
+}
-/*****************************************************************************/
-/*
- * read from the ELF header and then kernel memory
- */
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
char *buf = file->private_data;
- ssize_t acc = 0;
- size_t size, tsz;
- size_t elf_buflen;
+ size_t phdrs_offset, notes_offset, data_offset;
+ size_t phdrs_len, notes_len;
+ struct kcore_list *m;
+ size_t tsz;
int nphdr;
unsigned long start;
+ size_t orig_buflen = buflen;
+ int ret = 0;
- read_lock(&kclist_lock);
- size = get_kcore_size(&nphdr, &elf_buflen);
+ down_read(&kclist_lock);
+
+ get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
+ phdrs_offset = sizeof(struct elfhdr);
+ notes_offset = phdrs_offset + phdrs_len;
+
+ /* ELF file header. */
+ if (buflen && *fpos < sizeof(struct elfhdr)) {
+ struct elfhdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELF_CLASS,
+ [EI_DATA] = ELF_DATA,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELF_OSABI,
+ },
+ .e_type = ET_CORE,
+ .e_machine = ELF_ARCH,
+ .e_version = EV_CURRENT,
+ .e_phoff = sizeof(struct elfhdr),
+ .e_flags = ELF_CORE_EFLAGS,
+ .e_ehsize = sizeof(struct elfhdr),
+ .e_phentsize = sizeof(struct elf_phdr),
+ .e_phnum = nphdr,
+ };
+
+ tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
+ if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
- if (buflen == 0 || *fpos >= size) {
- read_unlock(&kclist_lock);
- return 0;
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
}
- /* trim buflen to not go beyond EOF */
- if (buflen > size - *fpos)
- buflen = size - *fpos;
-
- /* construct an ELF core header if we'll need some of it */
- if (*fpos < elf_buflen) {
- char * elf_buf;
-
- tsz = elf_buflen - *fpos;
- if (buflen < tsz)
- tsz = buflen;
- elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf) {
- read_unlock(&kclist_lock);
- return -ENOMEM;
+ /* ELF program headers. */
+ if (buflen && *fpos < phdrs_offset + phdrs_len) {
+ struct elf_phdr *phdrs, *phdr;
+
+ phdrs = kzalloc(phdrs_len, GFP_KERNEL);
+ if (!phdrs) {
+ ret = -ENOMEM;
+ goto out;
}
- elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
- read_unlock(&kclist_lock);
- if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
- kfree(elf_buf);
- return -EFAULT;
+
+ phdrs[0].p_type = PT_NOTE;
+ phdrs[0].p_offset = notes_offset;
+ phdrs[0].p_filesz = notes_len;
+
+ phdr = &phdrs[1];
+ list_for_each_entry(m, &kclist_head, list) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
+ if (m->type == KCORE_REMAP)
+ phdr->p_vaddr = (size_t)m->vaddr;
+ else
+ phdr->p_vaddr = (size_t)m->addr;
+ if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
+ phdr->p_paddr = __pa(m->addr);
+ else if (m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa_symbol(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
+ phdr->p_filesz = phdr->p_memsz = m->size;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
}
- kfree(elf_buf);
+
+ tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
+ if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
+ tsz)) {
+ kfree(phdrs);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(phdrs);
+
+ buffer += tsz;
buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
- acc += tsz;
+ }
+
+ /* ELF note segment. */
+ if (buflen && *fpos < notes_offset + notes_len) {
+ struct elf_prstatus prstatus = {};
+ struct elf_prpsinfo prpsinfo = {
+ .pr_sname = 'R',
+ .pr_fname = "vmlinux",
+ };
+ char *notes;
+ size_t i = 0;
+
+ strlcpy(prpsinfo.pr_psargs, saved_command_line,
+ sizeof(prpsinfo.pr_psargs));
+
+ notes = kzalloc(notes_len, GFP_KERNEL);
+ if (!notes) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
+ sizeof(prpsinfo));
+ append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
+ arch_task_struct_size);
+ /*
+ * vmcoreinfo_size is mostly constant after init time, but it
+ * can be changed by crash_save_vmcoreinfo(). Racing here with a
+ * panic on another CPU before the machine goes down is insanely
+ * unlikely, but it's better to not leave potential buffer
+ * overflows lying around, regardless.
+ */
+ append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+ vmcoreinfo_data,
+ min(vmcoreinfo_size, notes_len - i));
+
+ tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
+ if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
+ kfree(notes);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(notes);
- /* leave now if filled buffer already */
- if (buflen == 0)
- return acc;
- } else
- read_unlock(&kclist_lock);
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
/*
* Check to see if our file offset matches with any of
* the addresses in the elf_phdr on our list.
*/
- start = kc_offset_to_vaddr(*fpos - elf_buflen);
+ start = kc_offset_to_vaddr(*fpos - data_offset);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
-
- while (buflen) {
- struct kcore_list *m;
- read_lock(&kclist_lock);
- list_for_each_entry(m, &kclist_head, list) {
- if (start >= m->addr && start < (m->addr+m->size))
- break;
+ m = NULL;
+ while (buflen) {
+ /*
+ * If this is the first iteration or the address is not within
+ * the previous entry, search for a matching entry.
+ */
+ if (!m || start < m->addr || start >= m->addr + m->size) {
+ list_for_each_entry(m, &kclist_head, list) {
+ if (start >= m->addr &&
+ start < m->addr + m->size)
+ break;
+ }
}
- read_unlock(&kclist_lock);
if (&m->list == &kclist_head) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_USER) {
/* User page is handled prior to normal kernel page: */
- if (copy_to_user(buffer, (char *)start, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, (char *)start, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
if (kern_addr_valid(start)) {
/*
@@ -530,29 +484,37 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
* hardened user copy kernel text checks.
*/
if (probe_kernel_read(buf, (void *) start, tsz)) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
} else {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
}
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
- acc += tsz;
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
- return acc;
+out:
+ up_read(&kclist_lock);
+ if (ret)
+ return ret;
+ return orig_buflen - buflen;
}
-
static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
@@ -592,9 +554,8 @@ static int __meminit kcore_callback(struct notifier_block *self,
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
- write_lock(&kclist_lock);
kcore_need_update = 1;
- write_unlock(&kclist_lock);
+ break;
}
return NOTIFY_OK;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 2fb04846ed11..edda898714eb 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -7,6 +7,7 @@
#include <linux/mman.h>
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
+#include <linux/percpu.h>
#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
@@ -121,6 +122,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
(unsigned long)VMALLOC_TOTAL >> 10);
show_val_kb(m, "VmallocUsed: ", 0ul);
show_val_kb(m, "VmallocChunk: ", 0ul);
+ show_val_kb(m, "Percpu: ", pcpu_nr_pages());
#ifdef CONFIG_MEMORY_FAILURE
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 59749dfaef67..535eda7857cf 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -183,7 +183,7 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_online_cpus();
+ unsigned int size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dfd73a4616ce..5ea1d64cb0b4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,7 +247,6 @@ static int proc_map_release(struct inode *inode, struct file *file)
if (priv->mm)
mmdrop(priv->mm);
- kfree(priv->rollup);
return seq_release_private(inode, file);
}
@@ -294,7 +293,7 @@ static void show_vma_header_prefix(struct seq_file *m,
}
static void
-show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
+show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
@@ -357,35 +356,18 @@ done:
seq_putc(m, '\n');
}
-static int show_map(struct seq_file *m, void *v, int is_pid)
+static int show_map(struct seq_file *m, void *v)
{
- show_map_vma(m, v, is_pid);
+ show_map_vma(m, v);
m_cache_vma(m, v);
return 0;
}
-static int show_pid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int pid_maps_open(struct inode *inode, struct file *file)
@@ -393,11 +375,6 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_tid_maps_op);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
@@ -405,13 +382,6 @@ const struct file_operations proc_pid_maps_operations = {
.release = proc_map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
-
/*
* Proportional Set Size(PSS): my share of RSS.
*
@@ -433,7 +403,6 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
- bool first;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -447,7 +416,6 @@ struct mem_size_stats {
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
- unsigned long first_vma_start;
u64 pss;
u64 pss_locked;
u64 swap_pss;
@@ -731,14 +699,9 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
}
#endif /* HUGETLB_PAGE */
-#define SEQ_PUT_DEC(str, val) \
- seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
-static int show_smap(struct seq_file *m, void *v, int is_pid)
+static void smap_gather_stats(struct vm_area_struct *vma,
+ struct mem_size_stats *mss)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct mem_size_stats mss_stack;
- struct mem_size_stats *mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
@@ -746,23 +709,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
#endif
.mm = vma->vm_mm,
};
- int ret = 0;
- bool rollup_mode;
- bool last_vma;
-
- if (priv->rollup) {
- rollup_mode = true;
- mss = priv->rollup;
- if (mss->first) {
- mss->first_vma_start = vma->vm_start;
- mss->first = false;
- }
- last_vma = !m_next_vma(priv, vma);
- } else {
- rollup_mode = false;
- memset(&mss_stack, 0, sizeof(mss_stack));
- mss = &mss_stack;
- }
smaps_walk.private = mss;
@@ -794,79 +740,116 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
walk_page_vma(vma, &smaps_walk);
if (vma->vm_flags & VM_LOCKED)
mss->pss_locked += mss->pss;
+}
- if (!rollup_mode) {
- show_map_vma(m, vma, is_pid);
- } else if (last_vma) {
- show_vma_header_prefix(
- m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
- seq_pad(m, ' ');
- seq_puts(m, "[rollup]\n");
- } else {
- ret = SEQ_SKIP;
- }
-
- if (!rollup_mode) {
- SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
- SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
- SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
- seq_puts(m, " kB\n");
- }
+#define SEQ_PUT_DEC(str, val) \
+ seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
- if (!rollup_mode || last_vma) {
- SEQ_PUT_DEC("Rss: ", mss->resident);
- SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
- SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
- SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
- SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
- SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
- SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
- SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
- SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
- SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
- SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
- seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
- mss->private_hugetlb >> 10, 7);
- SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
- SEQ_PUT_DEC(" kB\nSwapPss: ",
- mss->swap_pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nLocked: ",
- mss->pss_locked >> PSS_SHIFT);
- seq_puts(m, " kB\n");
- }
- if (!rollup_mode) {
- if (arch_pkeys_enabled())
- seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
- show_smap_vma_flags(m, vma);
- }
- m_cache_vma(m, vma);
- return ret;
+/* Show the contents common for smaps and smaps_rollup */
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+{
+ SEQ_PUT_DEC("Rss: ", mss->resident);
+ SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
+ SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
+ SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
+ SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
+ SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
+ SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
+ SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
+ SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
+ SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+ seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+ mss->private_hugetlb >> 10, 7);
+ SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
+ SEQ_PUT_DEC(" kB\nSwapPss: ",
+ mss->swap_pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nLocked: ",
+ mss->pss_locked >> PSS_SHIFT);
+ seq_puts(m, " kB\n");
}
-#undef SEQ_PUT_DEC
-static int show_pid_smap(struct seq_file *m, void *v)
+static int show_smap(struct seq_file *m, void *v)
{
- return show_smap(m, v, 1);
+ struct vm_area_struct *vma = v;
+ struct mem_size_stats mss;
+
+ memset(&mss, 0, sizeof(mss));
+
+ smap_gather_stats(vma, &mss);
+
+ show_map_vma(m, vma);
+
+ SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
+ SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+ SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
+ seq_puts(m, " kB\n");
+
+ __show_smap(m, &mss);
+
+ if (arch_pkeys_enabled())
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
+ show_smap_vma_flags(m, vma);
+
+ m_cache_vma(m, vma);
+
+ return 0;
}
-static int show_tid_smap(struct seq_file *m, void *v)
+static int show_smaps_rollup(struct seq_file *m, void *v)
{
- return show_smap(m, v, 0);
+ struct proc_maps_private *priv = m->private;
+ struct mem_size_stats mss;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long last_vma_end = 0;
+ int ret = 0;
+
+ priv->task = get_proc_task(priv->inode);
+ if (!priv->task)
+ return -ESRCH;
+
+ mm = priv->mm;
+ if (!mm || !mmget_not_zero(mm)) {
+ ret = -ESRCH;
+ goto out_put_task;
+ }
+
+ memset(&mss, 0, sizeof(mss));
+
+ down_read(&mm->mmap_sem);
+ hold_task_mempolicy(priv);
+
+ for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
+ smap_gather_stats(vma, &mss);
+ last_vma_end = vma->vm_end;
+ }
+
+ show_vma_header_prefix(m, priv->mm->mmap->vm_start,
+ last_vma_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+
+ __show_smap(m, &mss);
+
+ release_task_mempolicy(priv);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+out_put_task:
+ put_task_struct(priv->task);
+ priv->task = NULL;
+
+ return ret;
}
+#undef SEQ_PUT_DEC
static const struct seq_operations proc_pid_smaps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_smap
-};
-
-static const struct seq_operations proc_tid_smaps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_smap
+ .show = show_smap
};
static int pid_smaps_open(struct inode *inode, struct file *file)
@@ -874,28 +857,45 @@ static int pid_smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
+static int smaps_rollup_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ int ret;
struct proc_maps_private *priv;
- int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
-
- if (ret < 0)
- return ret;
- seq = file->private_data;
- priv = seq->private;
- priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
- if (!priv->rollup) {
- proc_map_release(inode, file);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
+ if (!priv)
return -ENOMEM;
+
+ ret = single_open(file, show_smaps_rollup, priv);
+ if (ret)
+ goto out_free;
+
+ priv->inode = inode;
+ priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR(priv->mm)) {
+ ret = PTR_ERR(priv->mm);
+
+ single_release(inode, file);
+ goto out_free;
}
- priv->rollup->first = true;
+
return 0;
+
+out_free:
+ kfree(priv);
+ return ret;
}
-static int tid_smaps_open(struct inode *inode, struct file *file)
+static int smaps_rollup_release(struct inode *inode, struct file *file)
{
- return do_maps_open(inode, file, &proc_tid_smaps_op);
+ struct seq_file *seq = file->private_data;
+ struct proc_maps_private *priv = seq->private;
+
+ if (priv->mm)
+ mmdrop(priv->mm);
+
+ kfree(priv);
+ return single_release(inode, file);
}
const struct file_operations proc_pid_smaps_operations = {
@@ -906,17 +906,10 @@ const struct file_operations proc_pid_smaps_operations = {
};
const struct file_operations proc_pid_smaps_rollup_operations = {
- .open = pid_smaps_rollup_open,
+ .open = smaps_rollup_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = proc_map_release,
-};
-
-const struct file_operations proc_tid_smaps_operations = {
- .open = tid_smaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
+ .release = smaps_rollup_release,
};
enum clear_refs_types {
@@ -1728,7 +1721,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
/*
* Display pages allocated per node and memory policy via /proc.
*/
-static int show_numa_map(struct seq_file *m, void *v, int is_pid)
+static int show_numa_map(struct seq_file *m, void *v)
{
struct numa_maps_private *numa_priv = m->private;
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1812,45 +1805,17 @@ out:
return 0;
}
-static int show_pid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 1);
-}
-
-static int show_tid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_numa_map,
+ .show = show_numa_map,
};
-static const struct seq_operations proc_tid_numa_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_numa_map,
-};
-
-static int numa_maps_open(struct inode *inode, struct file *file,
- const struct seq_operations *ops)
-{
- return proc_maps_open(inode, file, ops,
- sizeof(struct numa_maps_private));
-}
-
static int pid_numa_maps_open(struct inode *inode, struct file *file)
{
- return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
-}
-
-static int tid_numa_maps_open(struct inode *inode, struct file *file)
-{
- return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
+ return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
+ sizeof(struct numa_maps_private));
}
const struct file_operations proc_pid_numa_maps_operations = {
@@ -1860,10 +1825,4 @@ const struct file_operations proc_pid_numa_maps_operations = {
.release = proc_map_release,
};
-const struct file_operations proc_tid_numa_maps_operations = {
- .open = tid_numa_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
#endif /* CONFIG_NUMA */
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 5b62f57bd9bc..0b63d68dedb2 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -142,8 +142,7 @@ static int is_stack(struct vm_area_struct *vma)
/*
* display a single VMA to a sequenced file
*/
-static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
- int is_pid)
+static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
@@ -189,22 +188,11 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
-static int show_map(struct seq_file *m, void *_p, int is_pid)
+static int show_map(struct seq_file *m, void *_p)
{
struct rb_node *p = _p;
- return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
- is_pid);
-}
-
-static int show_pid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 0);
+ return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
@@ -260,14 +248,7 @@ static const struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_ops = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int maps_open(struct inode *inode, struct file *file,
@@ -308,11 +289,6 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return maps_open(inode, file, &proc_pid_maps_ops);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return maps_open(inode, file, &proc_tid_maps_ops);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
@@ -320,10 +296,3 @@ const struct file_operations proc_pid_maps_operations = {
.release = map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = map_release,
-};
-
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 3f723cb478af..a4c2791ab70b 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -9,7 +9,7 @@
static int uptime_proc_show(struct seq_file *m, void *v)
{
- struct timespec uptime;
+ struct timespec64 uptime;
struct timespec64 idle;
u64 nsec;
u32 rem;
@@ -19,7 +19,7 @@ static int uptime_proc_show(struct seq_file *m, void *v)
for_each_possible_cpu(i)
nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
- get_monotonic_boottime(&uptime);
+ ktime_get_boottime_ts64(&uptime);
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cfb6674331fd..cbde728f8ac6 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -225,6 +225,7 @@ out_unlock:
return ret;
}
+#ifdef CONFIG_MMU
static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
u64 start, size_t size)
{
@@ -259,6 +260,7 @@ out_unlock:
mutex_unlock(&vmcoredd_mutex);
return ret;
}
+#endif /* CONFIG_MMU */
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
/* Read from the ELF header and then the crash dump. On error, negative value is
@@ -379,7 +381,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_fault *vmf)
+static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 860bfbe7a07a..f0cbf58ad4da 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
#include <linux/quotaops.h>
#include <linux/types.h>
#include <linux/writeback.h>
+#include <linux/nospec.h>
static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
qid_t id)
@@ -120,8 +121,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
struct if_dqinfo uinfo;
int ret;
- /* This checks whether qc_state has enough entries... */
- BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
if (!sb->s_qcop->get_state)
return -ENOSYS;
ret = sb->s_qcop->get_state(sb, &state);
@@ -354,10 +353,10 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
* GETXSTATE quotactl has space for just one set of time limits so
* report them for the first enabled quota type
*/
- for (type = 0; type < XQM_MAXQUOTAS; type++)
+ for (type = 0; type < MAXQUOTAS; type++)
if (state.s_state[type].flags & QCI_ACCT_ENABLED)
break;
- BUG_ON(type == XQM_MAXQUOTAS);
+ BUG_ON(type == MAXQUOTAS);
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -427,10 +426,10 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
* GETXSTATV quotactl has space for just one set of time limits so
* report them for the first enabled quota type
*/
- for (type = 0; type < XQM_MAXQUOTAS; type++)
+ for (type = 0; type < MAXQUOTAS; type++)
if (state.s_state[type].flags & QCI_ACCT_ENABLED)
break;
- BUG_ON(type == XQM_MAXQUOTAS);
+ BUG_ON(type == MAXQUOTAS);
fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
@@ -701,8 +700,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
{
int ret;
- if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
+ if (type >= MAXQUOTAS)
return -EINVAL;
+ type = array_index_nospec(type, MAXQUOTAS);
/*
* Quota not supported on this fs? Check this before s_quota_types
* since they needn't be set if quota is not supported at all.
diff --git a/fs/read_write.c b/fs/read_write.c
index 153f8f690490..39b4a21dd933 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,6 +1964,44 @@ out_error:
}
EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
+int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos, u64 len)
+{
+ s64 ret;
+
+ ret = mnt_want_write_file(dst_file);
+ if (ret)
+ return ret;
+
+ ret = clone_verify_area(dst_file, dst_pos, len, true);
+ if (ret < 0)
+ goto out_drop_write;
+
+ ret = -EINVAL;
+ if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+ goto out_drop_write;
+
+ ret = -EXDEV;
+ if (src_file->f_path.mnt != dst_file->f_path.mnt)
+ goto out_drop_write;
+
+ ret = -EISDIR;
+ if (S_ISDIR(file_inode(dst_file)->i_mode))
+ goto out_drop_write;
+
+ ret = -EINVAL;
+ if (!dst_file->f_op->dedupe_file_range)
+ goto out_drop_write;
+
+ ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
+ dst_file, dst_pos, len);
+out_drop_write:
+ mnt_drop_write_file(dst_file);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range_one);
+
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
{
struct file_dedupe_range_info *info;
@@ -1972,11 +2010,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
u64 len;
int i;
int ret;
- bool is_admin = capable(CAP_SYS_ADMIN);
u16 count = same->dest_count;
- struct file *dst_file;
- loff_t dst_off;
- ssize_t deduped;
+ int deduped;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
@@ -2003,6 +2038,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
if (off + len > i_size_read(src))
return -EINVAL;
+ /* Arbitrary 1G limit on a single dedupe request, can be raised. */
+ len = min_t(u64, len, 1 << 30);
+
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
@@ -2010,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
}
for (i = 0, info = same->info; i < count; i++, info++) {
- struct inode *dst;
struct fd dst_fd = fdget(info->dest_fd);
+ struct file *dst_file = dst_fd.file;
- dst_file = dst_fd.file;
if (!dst_file) {
info->status = -EBADF;
goto next_loop;
}
- dst = file_inode(dst_file);
-
- ret = mnt_want_write_file(dst_file);
- if (ret) {
- info->status = ret;
- goto next_fdput;
- }
-
- dst_off = info->dest_offset;
- ret = clone_verify_area(dst_file, dst_off, len, true);
- if (ret < 0) {
- info->status = ret;
- goto next_file;
- }
- ret = 0;
if (info->reserved) {
info->status = -EINVAL;
- } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
- info->status = -EINVAL;
- } else if (file->f_path.mnt != dst_file->f_path.mnt) {
- info->status = -EXDEV;
- } else if (S_ISDIR(dst->i_mode)) {
- info->status = -EISDIR;
- } else if (dst_file->f_op->dedupe_file_range == NULL) {
- info->status = -EINVAL;
- } else {
- deduped = dst_file->f_op->dedupe_file_range(file, off,
- len, dst_file,
- info->dest_offset);
- if (deduped == -EBADE)
- info->status = FILE_DEDUPE_RANGE_DIFFERS;
- else if (deduped < 0)
- info->status = deduped;
- else
- info->bytes_deduped += deduped;
+ goto next_fdput;
}
-next_file:
- mnt_drop_write_file(dst_file);
+ deduped = vfs_dedupe_file_range_one(file, off, dst_file,
+ info->dest_offset, len);
+ if (deduped == -EBADE)
+ info->status = FILE_DEDUPE_RANGE_DIFFERS;
+ else if (deduped < 0)
+ info->status = deduped;
+ else
+ info->bytes_deduped = len;
+
next_fdput:
fdput(dst_fd);
next_loop:
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e3c558d1b78c..3a5a752d96c7 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -33,30 +33,22 @@ static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
return 0;
}
-static char *print_time(time_t t)
-{
- static char timebuf[256];
-
- sprintf(timebuf, "%ld", t);
- return timebuf;
-}
-
static void sd_print_item(struct item_head *ih, char *item)
{
printk("\tmode | size | nlinks | first direct | mtime\n");
if (stat_data_v1(ih)) {
struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
- printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
+ printk("\t0%-6o | %6u | %2u | %d | %u\n", sd_v1_mode(sd),
sd_v1_size(sd), sd_v1_nlink(sd),
sd_v1_first_direct_byte(sd),
- print_time(sd_v1_mtime(sd)));
+ sd_v1_mtime(sd));
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %u\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
- sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
+ sd_v2_rdev(sd), sd_v2_mtime(sd));
}
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 52eb5d293a34..8a76f9d14bc6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2381,7 +2381,7 @@ static int journal_read(struct super_block *sb)
struct reiserfs_journal_desc *desc;
unsigned int oldest_trans_id = 0;
unsigned int oldest_invalid_trans_id = 0;
- time_t start;
+ time64_t start;
unsigned long oldest_start = 0;
unsigned long cur_dblock = 0;
unsigned long newest_mount_id = 9;
@@ -2395,7 +2395,7 @@ static int journal_read(struct super_block *sb)
cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
reiserfs_info(sb, "checking transaction log (%pg)\n",
journal->j_dev_bd);
- start = get_seconds();
+ start = ktime_get_seconds();
/*
* step 1, read in the journal header block. Check the transaction
@@ -2556,7 +2556,7 @@ start_log_replay:
if (replay_count > 0) {
reiserfs_info(sb,
"replayed %d transactions in %lu seconds\n",
- replay_count, get_seconds() - start);
+ replay_count, ktime_get_seconds() - start);
}
/* needed to satisfy the locking in _update_journal_header_block */
reiserfs_write_lock(sb);
@@ -2914,7 +2914,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
int new_alloc)
{
struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
/* cannot restart while nested */
BUG_ON(!th->t_trans_id);
if (th->t_refcount > 1)
@@ -3023,7 +3023,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
struct super_block *sb, unsigned long nblocks,
int join)
{
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
unsigned int old_trans_id;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
struct reiserfs_transaction_handle myth;
@@ -3056,7 +3056,7 @@ relock:
PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock;
}
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* if there is no room in the journal OR
@@ -3119,7 +3119,7 @@ relock:
}
/* we are the first writer, set trans_id */
if (journal->j_trans_start_time == 0) {
- journal->j_trans_start_time = get_seconds();
+ journal->j_trans_start_time = ktime_get_seconds();
}
atomic_inc(&journal->j_wcount);
journal->j_len_alloc += nblocks;
@@ -3559,11 +3559,11 @@ static void flush_async_commits(struct work_struct *work)
*/
void reiserfs_flush_old_commits(struct super_block *sb)
{
- time_t now;
+ time64_t now;
struct reiserfs_transaction_handle th;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* safety check so we don't flush while we are replaying the log during
* mount
@@ -3613,7 +3613,7 @@ void reiserfs_flush_old_commits(struct super_block *sb)
static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
{
- time_t now;
+ time64_t now;
int flush = flags & FLUSH_ALL;
int commit_now = flags & COMMIT_NOW;
int wait_on_commit = flags & WAIT;
@@ -3694,7 +3694,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
}
/* deal with old transactions where we are the last writers */
- now = get_seconds();
+ now = ktime_get_seconds();
if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
commit_now = 1;
journal->j_next_async_flush = 1;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e39b3910d24d..f2cf3441fdfc 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -297,6 +297,13 @@ static int show_oidmap(struct seq_file *m, void *unused)
return 0;
}
+static time64_t ktime_mono_to_real_seconds(time64_t mono)
+{
+ ktime_t kt = ktime_set(mono, NSEC_PER_SEC/2);
+
+ return ktime_divns(ktime_mono_to_real(kt), NSEC_PER_SEC);
+}
+
static int show_journal(struct seq_file *m, void *unused)
{
struct super_block *sb = m->private;
@@ -325,7 +332,7 @@ static int show_journal(struct seq_file *m, void *unused)
"j_bcount: \t%lu\n"
"j_first_unflushed_offset: \t%lu\n"
"j_last_flush_trans_id: \t%u\n"
- "j_trans_start_time: \t%li\n"
+ "j_trans_start_time: \t%lli\n"
"j_list_bitmap_index: \t%i\n"
"j_must_wait: \t%i\n"
"j_next_full_flush: \t%i\n"
@@ -366,7 +373,7 @@ static int show_journal(struct seq_file *m, void *unused)
JF(j_bcount),
JF(j_first_unflushed_offset),
JF(j_last_flush_trans_id),
- JF(j_trans_start_time),
+ ktime_mono_to_real_seconds(JF(j_trans_start_time)),
JF(j_list_bitmap_index),
JF(j_must_wait),
JF(j_next_full_flush),
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index ae4811fecc1f..e5ca9ed79e54 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -271,7 +271,7 @@ struct reiserfs_journal_list {
struct mutex j_commit_mutex;
unsigned int j_trans_id;
- time_t j_timestamp;
+ time64_t j_timestamp; /* write-only but useful for crash dump analysis */
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_commit_bh; /* commit buffer head */
struct reiserfs_journal_cnode *j_realblock;
@@ -331,7 +331,7 @@ struct reiserfs_journal {
struct buffer_head *j_header_bh;
- time_t j_trans_start_time; /* time this transaction started */
+ time64_t j_trans_start_time; /* time this transaction started */
struct mutex j_mutex;
struct mutex j_flush_mutex;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ff94fad477e4..48cdfc81fe10 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -792,8 +792,10 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
return 0;
size = namelen + 1;
if (b->buf) {
- if (size > b->size)
+ if (b->pos + size > b->size) {
+ b->pos = -ERANGE;
return -ERANGE;
+ }
memcpy(b->buf + b->pos, name, namelen);
b->buf[b->pos + namelen] = 0;
}
diff --git a/fs/super.c b/fs/super.c
index 7429588d6b49..f3a8c008e164 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -981,58 +981,42 @@ void emergency_thaw_all(void)
}
}
-/*
- * Unnamed block devices are dummy devices used by virtual
- * filesystems which don't use real block-devices. -- jrs
- */
-
static DEFINE_IDA(unnamed_dev_ida);
-static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
-/* Many userspace utilities consider an FSID of 0 invalid.
- * Always return at least 1 from get_anon_bdev.
- */
-static int unnamed_dev_start = 1;
+/**
+ * get_anon_bdev - Allocate a block device for filesystems which don't have one.
+ * @p: Pointer to a dev_t.
+ *
+ * Filesystems which don't use real block devices can call this function
+ * to allocate a virtual block device.
+ *
+ * Context: Any context. Frequently called while holding sb_lock.
+ * Return: 0 on success, -EMFILE if there are no anonymous bdevs left
+ * or -ENOMEM if memory allocation failed.
+ */
int get_anon_bdev(dev_t *p)
{
int dev;
- int error;
- retry:
- if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
- return -ENOMEM;
- spin_lock(&unnamed_dev_lock);
- error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
- if (!error)
- unnamed_dev_start = dev + 1;
- spin_unlock(&unnamed_dev_lock);
- if (error == -EAGAIN)
- /* We raced and lost with another CPU. */
- goto retry;
- else if (error)
- return -EAGAIN;
-
- if (dev >= (1 << MINORBITS)) {
- spin_lock(&unnamed_dev_lock);
- ida_remove(&unnamed_dev_ida, dev);
- if (unnamed_dev_start > dev)
- unnamed_dev_start = dev;
- spin_unlock(&unnamed_dev_lock);
- return -EMFILE;
- }
- *p = MKDEV(0, dev & MINORMASK);
+ /*
+ * Many userspace utilities consider an FSID of 0 invalid.
+ * Always return at least 1 from get_anon_bdev.
+ */
+ dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1,
+ GFP_ATOMIC);
+ if (dev == -ENOSPC)
+ dev = -EMFILE;
+ if (dev < 0)
+ return dev;
+
+ *p = MKDEV(0, dev);
return 0;
}
EXPORT_SYMBOL(get_anon_bdev);
void free_anon_bdev(dev_t dev)
{
- int slot = MINOR(dev);
- spin_lock(&unnamed_dev_lock);
- ida_remove(&unnamed_dev_ida, slot);
- if (slot < unnamed_dev_start)
- unnamed_dev_start = slot;
- spin_unlock(&unnamed_dev_lock);
+ ida_free(&unnamed_dev_ida, MINOR(dev));
}
EXPORT_SYMBOL(free_anon_bdev);
@@ -1040,7 +1024,6 @@ int set_anon_super(struct super_block *s, void *data)
{
return get_anon_bdev(&s->s_dev);
}
-
EXPORT_SYMBOL(set_anon_super);
void kill_anon_super(struct super_block *sb)
@@ -1049,7 +1032,6 @@ void kill_anon_super(struct super_block *sb)
generic_shutdown_super(sb);
free_anon_bdev(dev);
}
-
EXPORT_SYMBOL(kill_anon_super);
void kill_litter_super(struct super_block *sb)
@@ -1058,7 +1040,6 @@ void kill_litter_super(struct super_block *sb)
d_genocide(sb->s_root);
kill_anon_super(sb);
}
-
EXPORT_SYMBOL(kill_litter_super);
static int ns_test_super(struct super_block *sb, void *data)
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index bec9f79adb25..499a20a5a010 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -35,7 +35,7 @@
static int sysv_sync_fs(struct super_block *sb, int wait)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
- unsigned long time = get_seconds(), old_time;
+ u32 time = (u32)ktime_get_real_seconds(), old_time;
mutex_lock(&sbi->s_lock);
@@ -46,8 +46,8 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
*/
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
- if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
- *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38 - time);
+ if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38u - old_time))
+ *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38u - time);
*sbi->s_sb_time = cpu_to_fs32(sbi, time);
mark_buffer_dirty(sbi->s_bh2);
}
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 83a961bf7280..bbc78549be4c 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -51,9 +51,20 @@ config UBIFS_ATIME_SUPPORT
If unsure, say 'N'
+config UBIFS_FS_XATTR
+ bool "UBIFS XATTR support"
+ depends on UBIFS_FS
+ default y
+ help
+ Saying Y here includes support for extended attributes (xattrs).
+ Xattrs are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page).
+
+ If unsure, say Y.
+
config UBIFS_FS_ENCRYPTION
bool "UBIFS Encryption"
- depends on UBIFS_FS && BLOCK
+ depends on UBIFS_FS && UBIFS_FS_XATTR && BLOCK
select FS_ENCRYPTION
default n
help
@@ -64,7 +75,7 @@ config UBIFS_FS_ENCRYPTION
config UBIFS_FS_SECURITY
bool "UBIFS Security Labels"
- depends on UBIFS_FS
+ depends on UBIFS_FS && UBIFS_FS_XATTR
default y
help
Security labels provide an access control facility to support Linux
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 9758f709c736..6197d7e539e4 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs.o
ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
-ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o
+ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o
ubifs-y += misc.o
ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o
+ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 11a11b32a2a9..7ef22baf9d15 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -439,16 +439,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
{
int err, idx_growth, data_growth, dd_growth, retried = 0;
- ubifs_assert(req->new_page <= 1);
- ubifs_assert(req->dirtied_page <= 1);
- ubifs_assert(req->new_dent <= 1);
- ubifs_assert(req->mod_dent <= 1);
- ubifs_assert(req->new_ino <= 1);
- ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
- ubifs_assert(req->dirtied_ino <= 4);
- ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
- ubifs_assert(!(req->new_ino_d & 7));
- ubifs_assert(!(req->dirtied_ino_d & 7));
+ ubifs_assert(c, req->new_page <= 1);
+ ubifs_assert(c, req->dirtied_page <= 1);
+ ubifs_assert(c, req->new_dent <= 1);
+ ubifs_assert(c, req->mod_dent <= 1);
+ ubifs_assert(c, req->new_ino <= 1);
+ ubifs_assert(c, req->new_ino_d <= UBIFS_MAX_INO_DATA);
+ ubifs_assert(c, req->dirtied_ino <= 4);
+ ubifs_assert(c, req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(c, !(req->new_ino_d & 7));
+ ubifs_assert(c, !(req->dirtied_ino_d & 7));
data_growth = calc_data_growth(c, req);
dd_growth = calc_dd_growth(c, req);
@@ -458,9 +458,9 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
again:
spin_lock(&c->space_lock);
- ubifs_assert(c->bi.idx_growth >= 0);
- ubifs_assert(c->bi.data_growth >= 0);
- ubifs_assert(c->bi.dd_growth >= 0);
+ ubifs_assert(c, c->bi.idx_growth >= 0);
+ ubifs_assert(c, c->bi.data_growth >= 0);
+ ubifs_assert(c, c->bi.dd_growth >= 0);
if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
dbg_budg("no space");
@@ -526,20 +526,20 @@ again:
*/
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
{
- ubifs_assert(req->new_page <= 1);
- ubifs_assert(req->dirtied_page <= 1);
- ubifs_assert(req->new_dent <= 1);
- ubifs_assert(req->mod_dent <= 1);
- ubifs_assert(req->new_ino <= 1);
- ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA);
- ubifs_assert(req->dirtied_ino <= 4);
- ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
- ubifs_assert(!(req->new_ino_d & 7));
- ubifs_assert(!(req->dirtied_ino_d & 7));
+ ubifs_assert(c, req->new_page <= 1);
+ ubifs_assert(c, req->dirtied_page <= 1);
+ ubifs_assert(c, req->new_dent <= 1);
+ ubifs_assert(c, req->mod_dent <= 1);
+ ubifs_assert(c, req->new_ino <= 1);
+ ubifs_assert(c, req->new_ino_d <= UBIFS_MAX_INO_DATA);
+ ubifs_assert(c, req->dirtied_ino <= 4);
+ ubifs_assert(c, req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
+ ubifs_assert(c, !(req->new_ino_d & 7));
+ ubifs_assert(c, !(req->dirtied_ino_d & 7));
if (!req->recalculate) {
- ubifs_assert(req->idx_growth >= 0);
- ubifs_assert(req->data_growth >= 0);
- ubifs_assert(req->dd_growth >= 0);
+ ubifs_assert(c, req->idx_growth >= 0);
+ ubifs_assert(c, req->data_growth >= 0);
+ ubifs_assert(c, req->dd_growth >= 0);
}
if (req->recalculate) {
@@ -561,13 +561,13 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
c->bi.dd_growth -= req->dd_growth;
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
- ubifs_assert(c->bi.idx_growth >= 0);
- ubifs_assert(c->bi.data_growth >= 0);
- ubifs_assert(c->bi.dd_growth >= 0);
- ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
- ubifs_assert(!(c->bi.idx_growth & 7));
- ubifs_assert(!(c->bi.data_growth & 7));
- ubifs_assert(!(c->bi.dd_growth & 7));
+ ubifs_assert(c, c->bi.idx_growth >= 0);
+ ubifs_assert(c, c->bi.data_growth >= 0);
+ ubifs_assert(c, c->bi.dd_growth >= 0);
+ ubifs_assert(c, c->bi.min_idx_lebs < c->main_lebs);
+ ubifs_assert(c, !(c->bi.idx_growth & 7));
+ ubifs_assert(c, !(c->bi.data_growth & 7));
+ ubifs_assert(c, !(c->bi.dd_growth & 7));
spin_unlock(&c->space_lock);
}
@@ -680,7 +680,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
int rsvd_idx_lebs, lebs;
long long available, outstanding, free;
- ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ ubifs_assert(c, c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
outstanding = c->bi.data_growth + c->bi.dd_growth;
available = ubifs_calc_available(c, c->bi.min_idx_lebs);
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 63f56619991d..591f2c7a48f0 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -91,9 +91,9 @@ static int nothing_to_commit(struct ubifs_info *c)
if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
return 0;
- ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
- ubifs_assert(c->dirty_pn_cnt == 0);
- ubifs_assert(c->dirty_nn_cnt == 0);
+ ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0);
+ ubifs_assert(c, c->dirty_pn_cnt == 0);
+ ubifs_assert(c, c->dirty_nn_cnt == 0);
return 1;
}
@@ -113,7 +113,7 @@ static int do_commit(struct ubifs_info *c)
struct ubifs_lp_stats lst;
dbg_cmt("start");
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error) {
err = -EROFS;
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 55c508fe8131..4aaedf2d7f44 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -32,7 +32,7 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
struct page *ret;
unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE);
- ubifs_assert(pad_len <= *out_len);
+ ubifs_assert(c, pad_len <= *out_len);
dn->compr_size = cpu_to_le16(in_len);
/* pad to full block cipher length */
@@ -63,7 +63,7 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
return -EINVAL;
}
- ubifs_assert(dlen <= UBIFS_BLOCK_SIZE);
+ ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE);
err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen,
offset_in_page(&dn->data), block);
if (err) {
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 7cd8a7b95299..564e330d05b1 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -134,7 +134,7 @@ const char *dbg_snprintf_key(const struct ubifs_info *c,
}
} else
len -= snprintf(p, len, "bad key format %d", c->key_fmt);
- ubifs_assert(len > 0);
+ ubifs_assert(c, len > 0);
return p;
}
@@ -276,7 +276,7 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
return;
pr_err("List of directory entries:\n");
- ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
+ ubifs_assert(c, !mutex_is_locked(&c->tnc_mutex));
lowest_dent_key(c, &key, inode->i_ino);
while (1) {
@@ -931,7 +931,7 @@ void ubifs_dump_tnc(struct ubifs_info *c)
pr_err("\n");
pr_err("(pid %d) start dumping TNC tree\n", current->pid);
- znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
level = znode->level;
pr_err("== Level %d ==\n", level);
while (znode) {
@@ -940,7 +940,7 @@ void ubifs_dump_tnc(struct ubifs_info *c)
pr_err("== Level %d ==\n", level);
}
ubifs_dump_znode(c, znode);
- znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
}
pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
}
@@ -1183,7 +1183,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
union ubifs_key key;
char key_buf[DBG_KEY_BUF_LEN];
- ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
+ ubifs_assert(c, !keys_cmp(c, &zbr1->key, &zbr2->key));
dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
if (!dent1)
return -ENOMEM;
@@ -1479,7 +1479,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
if (!dbg_is_chk_index(c))
return 0;
- ubifs_assert(mutex_is_locked(&c->tnc_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
if (!c->zroot.znode)
return 0;
@@ -1505,7 +1505,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
}
prev = znode;
- znode = ubifs_tnc_postorder_next(znode);
+ znode = ubifs_tnc_postorder_next(c, znode);
if (!znode)
break;
@@ -2036,7 +2036,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
long long blk_offs;
struct ubifs_data_node *dn = node;
- ubifs_assert(zbr->len >= UBIFS_DATA_NODE_SZ);
+ ubifs_assert(c, zbr->len >= UBIFS_DATA_NODE_SZ);
/*
* Search the inode node this data node belongs to and insert
@@ -2066,7 +2066,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
struct ubifs_dent_node *dent = node;
struct fsck_inode *fscki1;
- ubifs_assert(zbr->len >= UBIFS_DENT_NODE_SZ);
+ ubifs_assert(c, zbr->len >= UBIFS_DENT_NODE_SZ);
err = ubifs_validate_entry(c, dent);
if (err)
@@ -2461,7 +2461,7 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
{
struct ubifs_debug_info *d = c->dbg;
- ubifs_assert(dbg_is_tst_rcvry(c));
+ ubifs_assert(c, dbg_is_tst_rcvry(c));
if (!d->pc_cnt) {
/* First call - decide delay to the power cut */
@@ -3081,6 +3081,28 @@ void dbg_debugfs_exit(void)
debugfs_remove_recursive(dfs_rootdir);
}
+void ubifs_assert_failed(struct ubifs_info *c, const char *expr,
+ const char *file, int line)
+{
+ ubifs_err(c, "UBIFS assert failed: %s, in %s:%u", expr, file, line);
+
+ switch (c->assert_action) {
+ case ASSACT_PANIC:
+ BUG();
+ break;
+
+ case ASSACT_RO:
+ ubifs_ro_mode(c, -EINVAL);
+ break;
+
+ case ASSACT_REPORT:
+ default:
+ dump_stack();
+ break;
+
+ }
+}
+
/**
* ubifs_debugging_init - initialize UBIFS debugging.
* @c: UBIFS file-system description object
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e03d5179769a..64c6977c189b 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -148,19 +148,21 @@ struct ubifs_global_debug_info {
unsigned int tst_rcvry:1;
};
-#define ubifs_assert(expr) do { \
+void ubifs_assert_failed(struct ubifs_info *c, const char *expr,
+ const char *file, int line);
+
+#define ubifs_assert(c, expr) do { \
if (unlikely(!(expr))) { \
- pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \
- __func__, __LINE__, current->pid); \
- dump_stack(); \
+ ubifs_assert_failed((struct ubifs_info *)c, #expr, __FILE__, \
+ __LINE__); \
} \
} while (0)
#define ubifs_assert_cmt_locked(c) do { \
if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
up_write(&(c)->commit_sem); \
- pr_crit("commit lock is not locked!\n"); \
- ubifs_assert(0); \
+ ubifs_err(c, "commit lock is not locked!\n"); \
+ ubifs_assert(c, 0); \
} \
} while (0)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 9da224d4f2da..5767b373a8ff 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -240,8 +240,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
}
if (nm.hash) {
- ubifs_assert(fname_len(&nm) == 0);
- ubifs_assert(fname_name(&nm) == NULL);
+ ubifs_assert(c, fname_len(&nm) == 0);
+ ubifs_assert(c, fname_name(&nm) == NULL);
dent_key_init_hash(c, &key, dir->i_ino, nm.hash);
err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash);
} else {
@@ -404,7 +404,7 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
if (whiteout) {
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
- ubifs_assert(inode->i_op == &ubifs_file_inode_operations);
+ ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
}
err = ubifs_init_security(dir, inode, &dentry->d_name);
@@ -421,7 +421,7 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
} else {
d_tmpfile(dentry, inode);
}
- ubifs_assert(ui->dirty);
+ ubifs_assert(c, ui->dirty);
instantiated = 1;
mutex_unlock(&ui->ui_mutex);
@@ -556,7 +556,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
/* File positions 0 and 1 correspond to "." and ".." */
if (ctx->pos < 2) {
- ubifs_assert(!file->private_data);
+ ubifs_assert(c, !file->private_data);
if (!dir_emit_dots(file, ctx)) {
if (encrypted)
fscrypt_fname_free_buffer(&fstr);
@@ -597,7 +597,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
dbg_gen("ino %llu, new f_pos %#x",
(unsigned long long)le64_to_cpu(dent->inum),
key_hash_flash(c, &dent->key));
- ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
+ ubifs_assert(c, le64_to_cpu(dent->ch.sqnum) >
ubifs_inode(dir)->creat_sqnum);
fname_len(&nm) = le16_to_cpu(dent->nlen);
@@ -716,8 +716,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(inode_is_locked(dir));
- ubifs_assert(inode_is_locked(inode));
+ ubifs_assert(c, inode_is_locked(dir));
+ ubifs_assert(c, inode_is_locked(inode));
err = fscrypt_prepare_link(old_dentry, dir, dentry);
if (err)
@@ -804,8 +804,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
sz_change = CALC_DENT_SIZE(fname_len(&nm));
- ubifs_assert(inode_is_locked(dir));
- ubifs_assert(inode_is_locked(inode));
+ ubifs_assert(c, inode_is_locked(dir));
+ ubifs_assert(c, inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
goto out_fname;
@@ -896,8 +896,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
inode->i_ino, dir->i_ino);
- ubifs_assert(inode_is_locked(dir));
- ubifs_assert(inode_is_locked(inode));
+ ubifs_assert(c, inode_is_locked(dir));
+ ubifs_assert(c, inode_is_locked(inode));
err = ubifs_check_dir_empty(d_inode(dentry));
if (err)
return err;
@@ -1123,8 +1123,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
struct ubifs_inode *ui;
struct ubifs_inode *dir_ui = ubifs_inode(dir);
struct ubifs_info *c = dir->i_sb->s_fs_info;
- int err, len = strlen(symname);
- int sz_change = CALC_DENT_SIZE(len);
+ int err, sz_change, len = strlen(symname);
struct fscrypt_str disk_link;
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
.new_ino_d = ALIGN(len, 8),
@@ -1151,6 +1150,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
goto out_budg;
+ sz_change = CALC_DENT_SIZE(fname_len(&nm));
+
inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -1294,7 +1295,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry, new_dir->i_ino, flags);
if (unlink)
- ubifs_assert(inode_is_locked(new_inode));
+ ubifs_assert(c, inode_is_locked(new_inode));
if (unlink && is_dir) {
err = ubifs_check_dir_empty(new_inode);
@@ -1348,7 +1349,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
whiteout_ui = ubifs_inode(whiteout);
whiteout_ui->data = dev;
whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
- ubifs_assert(!whiteout_ui->dirty);
+ ubifs_assert(c, !whiteout_ui->dirty);
}
lock_4_inodes(old_dir, new_dir, new_inode, whiteout);
@@ -1508,7 +1509,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
int err;
struct fscrypt_name fst_nm, snd_nm;
- ubifs_assert(fst_inode && snd_inode);
+ ubifs_assert(c, fst_inode && snd_inode);
err = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &fst_nm);
if (err)
@@ -1555,12 +1556,13 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
unsigned int flags)
{
int err;
+ struct ubifs_info *c = old_dir->i_sb->s_fs_info;
if (flags & ~(RENAME_NOREPLACE | RENAME_WHITEOUT | RENAME_EXCHANGE))
return -EINVAL;
- ubifs_assert(inode_is_locked(old_dir));
- ubifs_assert(inode_is_locked(new_dir));
+ ubifs_assert(c, inode_is_locked(old_dir));
+ ubifs_assert(c, inode_is_locked(new_dir));
err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
flags);
@@ -1647,7 +1649,9 @@ const struct inode_operations ubifs_dir_inode_operations = {
.rename = ubifs_rename,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
+#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
+#endif
#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
#endif
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index fd7eb6fe9090..1b78f2e09218 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -71,7 +71,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
return err;
}
- ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+ ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
if (len <= 0 || len > UBIFS_BLOCK_SIZE)
@@ -115,12 +115,13 @@ static int do_readpage(struct page *page)
unsigned int block, beyond;
struct ubifs_data_node *dn;
struct inode *inode = page->mapping->host;
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
loff_t i_size = i_size_read(inode);
dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
inode->i_ino, page->index, i_size, page->flags);
- ubifs_assert(!PageChecked(page));
- ubifs_assert(!PagePrivate(page));
+ ubifs_assert(c, !PageChecked(page));
+ ubifs_assert(c, !PagePrivate(page));
addr = kmap(page);
@@ -441,8 +442,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
int skipped_read = 0;
struct page *page;
- ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (unlikely(c->ro_error))
return -EROFS;
@@ -481,7 +482,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
err = allocate_budget(c, page, ui, appending);
if (unlikely(err)) {
- ubifs_assert(err == -ENOSPC);
+ ubifs_assert(c, err == -ENOSPC);
/*
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
@@ -498,7 +499,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* everything and fall-back to slow-path.
*/
if (appending) {
- ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
mutex_unlock(&ui->ui_mutex);
}
unlock_page(page);
@@ -595,7 +596,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
* '__set_page_dirty_nobuffers()'.
*/
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
- ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
mutex_unlock(&ui->ui_mutex);
}
@@ -648,7 +649,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
dn = bu->buf + (bu->zbranch[nn].offs - offs);
- ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
+ ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
ubifs_inode(inode)->creat_sqnum);
len = le32_to_cpu(dn->size);
@@ -767,8 +768,8 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
bu->zbranch[bu->cnt - 1].len -
bu->zbranch[0].offs;
- ubifs_assert(bu->buf_len > 0);
- ubifs_assert(bu->buf_len <= c->leb_size);
+ ubifs_assert(c, bu->buf_len > 0);
+ ubifs_assert(c, bu->buf_len <= c->leb_size);
bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
if (!bu->buf)
goto out_bu_off;
@@ -920,7 +921,7 @@ static int do_writepage(struct page *page, int len)
#ifdef UBIFS_DEBUG
struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
- ubifs_assert(page->index <= ui->synced_i_size >> PAGE_SHIFT);
+ ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
@@ -949,7 +950,7 @@ static int do_writepage(struct page *page, int len)
ubifs_ro_mode(c, err);
}
- ubifs_assert(PagePrivate(page));
+ ubifs_assert(c, PagePrivate(page));
if (PageChecked(page))
release_new_page_budget(c);
else
@@ -1014,6 +1015,7 @@ static int do_writepage(struct page *page, int len)
static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
loff_t i_size = i_size_read(inode), synced_i_size;
pgoff_t end_index = i_size >> PAGE_SHIFT;
@@ -1022,7 +1024,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
inode->i_ino, page->index, page->flags);
- ubifs_assert(PagePrivate(page));
+ ubifs_assert(c, PagePrivate(page));
/* Is the page fully outside @i_size? (truncate in progress) */
if (page->index > end_index || (page->index == end_index && !len)) {
@@ -1167,7 +1169,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* 'ubifs_jnl_truncate()' will see an already
* truncated (and up to date) data node.
*/
- ubifs_assert(PagePrivate(page));
+ ubifs_assert(c, PagePrivate(page));
clear_page_dirty_for_io(page);
if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
@@ -1303,7 +1305,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
struct inode *inode = page->mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
- ubifs_assert(PagePrivate(page));
+ ubifs_assert(c, PagePrivate(page));
if (offset || length < PAGE_SIZE)
/* Partial page remains dirty */
return;
@@ -1365,11 +1367,10 @@ out:
* granularity, they are not updated. This is an optimization.
*/
static inline int mctime_update_needed(const struct inode *inode,
- const struct timespec *now)
+ const struct timespec64 *now)
{
- struct timespec64 now64 = timespec_to_timespec64(*now);
- if (!timespec64_equal(&inode->i_mtime, &now64) ||
- !timespec64_equal(&inode->i_ctime, &now64))
+ if (!timespec64_equal(&inode->i_mtime, now) ||
+ !timespec64_equal(&inode->i_ctime, now))
return 1;
return 0;
}
@@ -1425,7 +1426,7 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
*/
static int update_mctime(struct inode *inode)
{
- struct timespec now = timespec64_to_timespec(current_time(inode));
+ struct timespec64 now = current_time(inode);
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1462,13 +1463,15 @@ static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
static int ubifs_set_page_dirty(struct page *page)
{
int ret;
+ struct inode *inode = page->mapping->host;
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
ret = __set_page_dirty_nobuffers(page);
/*
* An attempt to dirty a page without budgeting for it - should not
* happen.
*/
- ubifs_assert(ret == 0);
+ ubifs_assert(c, ret == 0);
return ret;
}
@@ -1497,14 +1500,17 @@ static int ubifs_migrate_page(struct address_space *mapping,
static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
+ struct inode *inode = page->mapping->host;
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
+
/*
* An attempt to release a dirty page without budgeting for it - should
* not happen.
*/
if (PageWriteback(page))
return 0;
- ubifs_assert(PagePrivate(page));
- ubifs_assert(0);
+ ubifs_assert(c, PagePrivate(page));
+ ubifs_assert(c, 0);
ClearPagePrivate(page);
ClearPageChecked(page);
return 1;
@@ -1519,13 +1525,13 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
struct page *page = vmf->page;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct ubifs_info *c = inode->i_sb->s_fs_info;
- struct timespec now = timespec64_to_timespec(current_time(inode));
+ struct timespec64 now = current_time(inode);
struct ubifs_budget_req req = { .new_page = 1 };
int err, update_time;
dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
i_size_read(inode));
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (unlikely(c->ro_error))
return VM_FAULT_SIGBUS; /* -EROFS */
@@ -1654,7 +1660,9 @@ const struct address_space_operations ubifs_file_address_operations = {
const struct inode_operations ubifs_file_inode_operations = {
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
+#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
+#endif
#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
#endif
@@ -1664,7 +1672,9 @@ const struct inode_operations ubifs_symlink_inode_operations = {
.get_link = ubifs_get_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
+#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
+#endif
#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
#endif
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 9571616b5dda..f9646835b026 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -183,18 +183,18 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
&data);
if (err)
return ERR_PTR(err);
- ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+ ubifs_assert(c, data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
- ubifs_assert(lprops->lnum == data.lnum);
- ubifs_assert(lprops->free + lprops->dirty >= min_space);
- ubifs_assert(lprops->dirty >= c->dead_wm ||
+ ubifs_assert(c, lprops->lnum == data.lnum);
+ ubifs_assert(c, lprops->free + lprops->dirty >= min_space);
+ ubifs_assert(c, lprops->dirty >= c->dead_wm ||
(pick_free &&
lprops->free + lprops->dirty == c->leb_size));
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !exclude_index || !(lprops->flags & LPROPS_INDEX));
return lprops;
}
@@ -315,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
lp = idx_lp;
if (lp) {
- ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
+ ubifs_assert(c, lp->free + lp->dirty >= c->dead_wm);
goto found;
}
@@ -326,7 +326,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
err = PTR_ERR(lp);
goto out;
}
- ubifs_assert(lp->dirty >= c->dead_wm ||
+ ubifs_assert(c, lp->dirty >= c->dead_wm ||
(pick_free && lp->free + lp->dirty == c->leb_size));
found:
@@ -462,15 +462,15 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
&data);
if (err)
return ERR_PTR(err);
- ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+ ubifs_assert(c, data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
- ubifs_assert(lprops->lnum == data.lnum);
- ubifs_assert(lprops->free >= min_space);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->lnum == data.lnum);
+ ubifs_assert(c, lprops->free >= min_space);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
return lprops;
}
@@ -574,7 +574,7 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
}
dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
- ubifs_assert(*offs <= c->leb_size - min_space);
+ ubifs_assert(c, *offs <= c->leb_size - min_space);
return lnum;
out:
@@ -642,15 +642,15 @@ static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
&data);
if (err)
return ERR_PTR(err);
- ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+ ubifs_assert(c, data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return lprops;
- ubifs_assert(lprops->lnum == data.lnum);
- ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->lnum == data.lnum);
+ ubifs_assert(c, lprops->free + lprops->dirty == c->leb_size);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
return lprops;
}
@@ -690,7 +690,7 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
*/
if (c->in_a_category_cnt != c->main_lebs ||
c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
- ubifs_assert(c->freeable_cnt == 0);
+ ubifs_assert(c, c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
@@ -750,10 +750,7 @@ static int cmp_dirty_idx(const struct ubifs_lprops **a,
static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
int size)
{
- struct ubifs_lprops *t = *a;
-
- *a = *b;
- *b = t;
+ swap(*a, *b);
}
/**
@@ -870,15 +867,15 @@ static int find_dirty_idx_leb(struct ubifs_info *c)
if (err)
return err;
found:
- ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
+ ubifs_assert(c, data.lnum >= c->main_first && data.lnum < c->leb_cnt);
c->lscan_lnum = data.lnum;
lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
if (IS_ERR(lprops))
return PTR_ERR(lprops);
- ubifs_assert(lprops->lnum == data.lnum);
- ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert((lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->lnum == data.lnum);
+ ubifs_assert(c, lprops->free + lprops->dirty >= c->min_idx_node_sz);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, (lprops->flags & LPROPS_INDEX));
dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x",
lprops->lnum, lprops->free, lprops->dirty, lprops->flags);
@@ -947,8 +944,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)
}
dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
lp->free, lp->flags);
- ubifs_assert(lp->flags & LPROPS_TAKEN);
- ubifs_assert(lp->flags & LPROPS_INDEX);
+ ubifs_assert(c, lp->flags & LPROPS_TAKEN);
+ ubifs_assert(c, lp->flags & LPROPS_INDEX);
return lnum;
}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index a03a47cf880d..d2680e0b4a36 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -83,7 +83,7 @@ static int switch_gc_head(struct ubifs_info *c)
int err, gc_lnum = c->gc_lnum;
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
- ubifs_assert(gc_lnum != -1);
+ ubifs_assert(c, gc_lnum != -1);
dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
c->leb_size - wbuf->offs - wbuf->used);
@@ -131,10 +131,10 @@ static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
- ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
- ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
- ubifs_assert(sa->type == UBIFS_DATA_NODE);
- ubifs_assert(sb->type == UBIFS_DATA_NODE);
+ ubifs_assert(c, key_type(c, &sa->key) == UBIFS_DATA_KEY);
+ ubifs_assert(c, key_type(c, &sb->key) == UBIFS_DATA_KEY);
+ ubifs_assert(c, sa->type == UBIFS_DATA_NODE);
+ ubifs_assert(c, sb->type == UBIFS_DATA_NODE);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
@@ -175,9 +175,9 @@ static int nondata_nodes_cmp(void *priv, struct list_head *a,
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
- ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
+ ubifs_assert(c, key_type(c, &sa->key) != UBIFS_DATA_KEY &&
key_type(c, &sb->key) != UBIFS_DATA_KEY);
- ubifs_assert(sa->type != UBIFS_DATA_NODE &&
+ ubifs_assert(c, sa->type != UBIFS_DATA_NODE &&
sb->type != UBIFS_DATA_NODE);
/* Inodes go before directory entries */
@@ -189,13 +189,13 @@ static int nondata_nodes_cmp(void *priv, struct list_head *a,
if (sb->type == UBIFS_INO_NODE)
return 1;
- ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
+ ubifs_assert(c, key_type(c, &sa->key) == UBIFS_DENT_KEY ||
key_type(c, &sa->key) == UBIFS_XENT_KEY);
- ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
+ ubifs_assert(c, key_type(c, &sb->key) == UBIFS_DENT_KEY ||
key_type(c, &sb->key) == UBIFS_XENT_KEY);
- ubifs_assert(sa->type == UBIFS_DENT_NODE ||
+ ubifs_assert(c, sa->type == UBIFS_DENT_NODE ||
sa->type == UBIFS_XENT_NODE);
- ubifs_assert(sb->type == UBIFS_DENT_NODE ||
+ ubifs_assert(c, sb->type == UBIFS_DENT_NODE ||
sb->type == UBIFS_XENT_NODE);
inuma = key_inum(c, &sa->key);
@@ -250,7 +250,7 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
/* Separate data nodes and non-data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
- ubifs_assert(snod->type == UBIFS_INO_NODE ||
+ ubifs_assert(c, snod->type == UBIFS_INO_NODE ||
snod->type == UBIFS_DATA_NODE ||
snod->type == UBIFS_DENT_NODE ||
snod->type == UBIFS_XENT_NODE ||
@@ -266,7 +266,7 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
continue;
}
- ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
+ ubifs_assert(c, key_type(c, &snod->key) == UBIFS_DATA_KEY ||
key_type(c, &snod->key) == UBIFS_INO_KEY ||
key_type(c, &snod->key) == UBIFS_DENT_KEY ||
key_type(c, &snod->key) == UBIFS_XENT_KEY);
@@ -469,21 +469,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
int err = 0, lnum = lp->lnum;
- ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
+ ubifs_assert(c, c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
c->need_recovery);
- ubifs_assert(c->gc_lnum != lnum);
- ubifs_assert(wbuf->lnum != lnum);
+ ubifs_assert(c, c->gc_lnum != lnum);
+ ubifs_assert(c, wbuf->lnum != lnum);
if (lp->free + lp->dirty == c->leb_size) {
/* Special case - a free LEB */
dbg_gc("LEB %d is free, return it", lp->lnum);
- ubifs_assert(!(lp->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
if (lp->free != c->leb_size) {
/*
* Write buffers must be sync'd before unmapping
* freeable LEBs, because one of them may contain data
- * which obsoletes something in 'lp->pnum'.
+ * which obsoletes something in 'lp->lnum'.
*/
err = gc_sync_wbufs(c);
if (err)
@@ -513,7 +513,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
if (IS_ERR(sleb))
return PTR_ERR(sleb);
- ubifs_assert(!list_empty(&sleb->nodes));
+ ubifs_assert(c, !list_empty(&sleb->nodes));
snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
if (snod->type == UBIFS_IDX_NODE) {
@@ -525,7 +525,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
struct ubifs_idx_node *idx = snod->node;
int level = le16_to_cpu(idx->level);
- ubifs_assert(snod->type == UBIFS_IDX_NODE);
+ ubifs_assert(c, snod->type == UBIFS_IDX_NODE);
key_read(c, ubifs_idx_key(c, idx), &snod->key);
err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
snod->offs);
@@ -648,7 +648,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
ubifs_assert_cmt_locked(c);
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (ubifs_gc_should_commit(c))
return -EAGAIN;
@@ -661,7 +661,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
}
/* We expect the write-buffer to be empty on entry */
- ubifs_assert(!wbuf->used);
+ ubifs_assert(c, !wbuf->used);
for (i = 0; ; i++) {
int space_before, space_after;
@@ -752,7 +752,7 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
continue;
}
- ubifs_assert(ret == LEB_RETAINED);
+ ubifs_assert(c, ret == LEB_RETAINED);
space_after = c->leb_size - wbuf->offs - wbuf->used;
dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
space_after - space_before);
@@ -812,8 +812,8 @@ out_unlock:
return ret;
out:
- ubifs_assert(ret < 0);
- ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
+ ubifs_assert(c, ret < 0);
+ ubifs_assert(c, ret != -ENOSPC && ret != -EAGAIN);
ubifs_wbuf_sync_nolock(wbuf);
ubifs_ro_mode(c, ret);
mutex_unlock(&wbuf->io_mutex);
@@ -848,8 +848,8 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
lp = ubifs_fast_find_freeable(c);
if (!lp)
break;
- ubifs_assert(!(lp->flags & LPROPS_TAKEN));
- ubifs_assert(!(lp->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
err = ubifs_leb_unmap(c, lp->lnum);
if (err)
goto out;
@@ -858,8 +858,8 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
err = PTR_ERR(lp);
goto out;
}
- ubifs_assert(!(lp->flags & LPROPS_TAKEN));
- ubifs_assert(!(lp->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
}
/* Mark GC'd index LEBs OK to unmap after this commit finishes */
@@ -880,8 +880,8 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
err = -ENOMEM;
goto out;
}
- ubifs_assert(!(lp->flags & LPROPS_TAKEN));
- ubifs_assert(lp->flags & LPROPS_INDEX);
+ ubifs_assert(c, !(lp->flags & LPROPS_TAKEN));
+ ubifs_assert(c, lp->flags & LPROPS_INDEX);
/* Don't release the LEB until after the next commit */
flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
@@ -890,8 +890,8 @@ int ubifs_gc_start_commit(struct ubifs_info *c)
kfree(idx_gc);
goto out;
}
- ubifs_assert(lp->flags & LPROPS_TAKEN);
- ubifs_assert(!(lp->flags & LPROPS_INDEX));
+ ubifs_assert(c, lp->flags & LPROPS_TAKEN);
+ ubifs_assert(c, !(lp->flags & LPROPS_INDEX));
idx_gc->lnum = lp->lnum;
idx_gc->unmap = 1;
list_add(&idx_gc->list, &c->idx_gc);
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index fe77e9625e84..099bec94b820 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -119,7 +119,7 @@ int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
{
int err;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
if (!dbg_is_tst_rcvry(c))
@@ -139,7 +139,7 @@ int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
{
int err;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
if (!dbg_is_tst_rcvry(c))
@@ -159,7 +159,7 @@ int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
{
int err;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
if (!dbg_is_tst_rcvry(c))
@@ -178,7 +178,7 @@ int ubifs_leb_map(struct ubifs_info *c, int lnum)
{
int err;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
if (!dbg_is_tst_rcvry(c))
@@ -241,8 +241,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
uint32_t crc, node_crc, magic;
const struct ubifs_ch *ch = buf;
- ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
- ubifs_assert(!(offs & 7) && offs < c->leb_size);
+ ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(c, !(offs & 7) && offs < c->leb_size);
magic = le32_to_cpu(ch->magic);
if (magic != UBIFS_NODE_MAGIC) {
@@ -319,7 +319,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(pad >= 0 && !(pad & 7));
+ ubifs_assert(c, pad >= 0 && !(pad & 7));
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -382,7 +382,7 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
struct ubifs_ch *ch = node;
unsigned long long sqnum = next_sqnum(c);
- ubifs_assert(len >= UBIFS_CH_SZ);
+ ubifs_assert(c, len >= UBIFS_CH_SZ);
ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
ch->len = cpu_to_le32(len);
@@ -415,7 +415,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
struct ubifs_ch *ch = node;
unsigned long long sqnum = next_sqnum(c);
- ubifs_assert(len >= UBIFS_CH_SZ);
+ ubifs_assert(c, len >= UBIFS_CH_SZ);
ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
ch->len = cpu_to_le32(len);
@@ -448,9 +448,10 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
/**
* new_wbuf_timer - start new write-buffer timer.
+ * @c: UBIFS file-system description object
* @wbuf: write-buffer descriptor
*/
-static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
+static void new_wbuf_timer_nolock(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
{
ktime_t softlimit = ms_to_ktime(dirty_writeback_interval * 10);
unsigned long long delta = dirty_writeback_interval;
@@ -458,8 +459,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
/* centi to milli, milli to nano, then 10% */
delta *= 10ULL * NSEC_PER_MSEC / 10ULL;
- ubifs_assert(!hrtimer_active(&wbuf->timer));
- ubifs_assert(delta <= ULONG_MAX);
+ ubifs_assert(c, !hrtimer_active(&wbuf->timer));
+ ubifs_assert(c, delta <= ULONG_MAX);
if (wbuf->no_timer)
return;
@@ -508,14 +509,14 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
dbg_io("LEB %d:%d, %d bytes, jhead %s",
wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
- ubifs_assert(!(wbuf->avail & 7));
- ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
- ubifs_assert(wbuf->size >= c->min_io_size);
- ubifs_assert(wbuf->size <= c->max_write_size);
- ubifs_assert(wbuf->size % c->min_io_size == 0);
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !(wbuf->avail & 7));
+ ubifs_assert(c, wbuf->offs + wbuf->size <= c->leb_size);
+ ubifs_assert(c, wbuf->size >= c->min_io_size);
+ ubifs_assert(c, wbuf->size <= c->max_write_size);
+ ubifs_assert(c, wbuf->size % c->min_io_size == 0);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs >= c->max_write_size)
- ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
+ ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->ro_error)
return -EROFS;
@@ -576,11 +577,11 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs)
const struct ubifs_info *c = wbuf->c;
dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
- ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
- ubifs_assert(offs >= 0 && offs <= c->leb_size);
- ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
- ubifs_assert(lnum != wbuf->lnum);
- ubifs_assert(wbuf->used == 0);
+ ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt);
+ ubifs_assert(c, offs >= 0 && offs <= c->leb_size);
+ ubifs_assert(c, offs % c->min_io_size == 0 && !(offs & 7));
+ ubifs_assert(c, lnum != wbuf->lnum);
+ ubifs_assert(c, wbuf->used == 0);
spin_lock(&wbuf->lock);
wbuf->lnum = lnum;
@@ -610,7 +611,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
{
int err, i;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (!c->need_wbuf_sync)
return 0;
c->need_wbuf_sync = 0;
@@ -686,18 +687,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
dbg_ntype(((struct ubifs_ch *)buf)->node_type),
dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
- ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
- ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
- ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
- ubifs_assert(wbuf->size >= c->min_io_size);
- ubifs_assert(wbuf->size <= c->max_write_size);
- ubifs_assert(wbuf->size % c->min_io_size == 0);
- ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
- ubifs_assert(!c->ro_media && !c->ro_mount);
- ubifs_assert(!c->space_fixup);
+ ubifs_assert(c, len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
+ ubifs_assert(c, wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
+ ubifs_assert(c, !(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
+ ubifs_assert(c, wbuf->avail > 0 && wbuf->avail <= wbuf->size);
+ ubifs_assert(c, wbuf->size >= c->min_io_size);
+ ubifs_assert(c, wbuf->size <= c->max_write_size);
+ ubifs_assert(c, wbuf->size % c->min_io_size == 0);
+ ubifs_assert(c, mutex_is_locked(&wbuf->io_mutex));
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->space_fixup);
if (c->leb_size - wbuf->offs >= c->max_write_size)
- ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
+ ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size));
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC;
@@ -834,7 +835,7 @@ exit:
}
if (wbuf->used)
- new_wbuf_timer_nolock(wbuf);
+ new_wbuf_timer_nolock(c, wbuf);
return 0;
@@ -869,10 +870,10 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
buf_len);
- ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
- ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
- ubifs_assert(!c->ro_media && !c->ro_mount);
- ubifs_assert(!c->space_fixup);
+ ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(c, offs % c->min_io_size == 0 && offs < c->leb_size);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->space_fixup);
if (c->ro_error)
return -EROFS;
@@ -909,9 +910,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
- ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
- ubifs_assert(!(offs & 7) && offs < c->leb_size);
- ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
+ ubifs_assert(c, wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(c, !(offs & 7) && offs < c->leb_size);
+ ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT);
spin_lock(&wbuf->lock);
overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
@@ -984,10 +985,10 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
struct ubifs_ch *ch = buf;
dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
- ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
- ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
- ubifs_assert(!(offs & 7) && offs < c->leb_size);
- ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
+ ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(c, len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
+ ubifs_assert(c, !(offs & 7) && offs < c->leb_size);
+ ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT);
err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
if (err && err != -EBADMSG)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 07b4956e0425..802565a17733 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -111,7 +111,7 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* better to try to allocate space at the ends of eraseblocks. This is
* what the squeeze parameter does.
*/
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
squeeze = (jhead == BASEHD);
again:
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
@@ -215,7 +215,7 @@ out_unlock:
out_return:
/* An error occurred and the LEB has to be returned to lprops */
- ubifs_assert(err < 0);
+ ubifs_assert(c, err < 0);
err1 = ubifs_return_leb(c, lnum);
if (err1 && err == -EAGAIN)
/*
@@ -246,7 +246,7 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
{
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
- ubifs_assert(jhead != GCHD);
+ ubifs_assert(c, jhead != GCHD);
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
@@ -278,7 +278,7 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
int err;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
- ubifs_assert(jhead != GCHD);
+ ubifs_assert(c, jhead != GCHD);
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
@@ -317,6 +317,7 @@ again:
down_read(&c->commit_sem);
err = reserve_space(c, jhead, len);
if (!err)
+ /* c->commit_sem will get released via finish_reservation(). */
return 0;
up_read(&c->commit_sem);
@@ -548,7 +549,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
struct ubifs_ino_node *ino;
union ubifs_key dent_key, ino_key;
- ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
dlen = UBIFS_DENT_NODE_SZ + fname_len(nm) + 1;
ilen = UBIFS_INO_NODE_SZ;
@@ -664,6 +665,11 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
spin_lock(&ui->ui_lock);
ui->synced_i_size = ui->ui_size;
spin_unlock(&ui->ui_lock);
+ if (xent) {
+ spin_lock(&host_ui->ui_lock);
+ host_ui->synced_i_size = host_ui->ui_size;
+ spin_unlock(&host_ui->ui_lock);
+ }
mark_inode_clean(c, ui);
mark_inode_clean(c, host_ui);
return 0;
@@ -707,7 +713,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
(unsigned long)key_inum(c, key), key_block(c, key), len);
- ubifs_assert(len <= UBIFS_BLOCK_SIZE);
+ ubifs_assert(c, len <= UBIFS_BLOCK_SIZE);
if (encrypted)
dlen += UBIFS_CIPHER_BLOCK_SIZE;
@@ -738,7 +744,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
out_len = compr_len = dlen - UBIFS_DATA_NODE_SZ;
ubifs_compress(c, buf, len, &data->data, &compr_len, &compr_type);
- ubifs_assert(compr_len <= UBIFS_BLOCK_SIZE);
+ ubifs_assert(c, compr_len <= UBIFS_BLOCK_SIZE);
if (encrypted) {
err = ubifs_encrypt(inode, data, compr_len, &out_len, key_block(c, key));
@@ -898,7 +904,7 @@ int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
int err;
struct ubifs_inode *ui = ubifs_inode(inode);
- ubifs_assert(inode->i_nlink == 0);
+ ubifs_assert(c, inode->i_nlink == 0);
if (ui->del_cmtno != c->cmt_no)
/* A commit happened for sure */
@@ -953,10 +959,10 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
int twoparents = (fst_dir != snd_dir);
void *p;
- ubifs_assert(ubifs_inode(fst_dir)->data_len == 0);
- ubifs_assert(ubifs_inode(snd_dir)->data_len == 0);
- ubifs_assert(mutex_is_locked(&ubifs_inode(fst_dir)->ui_mutex));
- ubifs_assert(mutex_is_locked(&ubifs_inode(snd_dir)->ui_mutex));
+ ubifs_assert(c, ubifs_inode(fst_dir)->data_len == 0);
+ ubifs_assert(c, ubifs_inode(snd_dir)->data_len == 0);
+ ubifs_assert(c, mutex_is_locked(&ubifs_inode(fst_dir)->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&ubifs_inode(snd_dir)->ui_mutex));
dlen1 = UBIFS_DENT_NODE_SZ + fname_len(snd_nm) + 1;
dlen2 = UBIFS_DENT_NODE_SZ + fname_len(fst_nm) + 1;
@@ -1096,16 +1102,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
int move = (old_dir != new_dir);
struct ubifs_inode *uninitialized_var(new_ui);
- ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
- ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
- ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
- ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex));
+ ubifs_assert(c, ubifs_inode(old_dir)->data_len == 0);
+ ubifs_assert(c, ubifs_inode(new_dir)->data_len == 0);
+ ubifs_assert(c, mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex));
dlen1 = UBIFS_DENT_NODE_SZ + fname_len(new_nm) + 1;
dlen2 = UBIFS_DENT_NODE_SZ + fname_len(old_nm) + 1;
if (new_inode) {
new_ui = ubifs_inode(new_inode);
- ubifs_assert(mutex_is_locked(&new_ui->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&new_ui->ui_mutex));
ilen = UBIFS_INO_NODE_SZ;
if (!last_reference)
ilen += new_ui->data_len;
@@ -1282,8 +1288,7 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
int *new_len)
{
void *buf;
- int err, compr_type;
- u32 dlen, out_len, old_dlen;
+ int err, dlen, compr_type, out_len, old_dlen;
out_len = le32_to_cpu(dn->size);
buf = kmalloc_array(out_len, WORST_COMPR_FACTOR, GFP_NOFS);
@@ -1319,7 +1324,7 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in
dn->compr_size = 0;
}
- ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
+ ubifs_assert(c, out_len <= UBIFS_BLOCK_SIZE);
dn->compr_type = cpu_to_le16(compr_type);
dn->size = cpu_to_le32(*new_len);
*new_len = UBIFS_DATA_NODE_SZ + out_len;
@@ -1358,9 +1363,9 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
dbg_jnl("ino %lu, size %lld -> %lld",
(unsigned long)inum, old_size, new_size);
- ubifs_assert(!ui->data_len);
- ubifs_assert(S_ISREG(inode->i_mode));
- ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+ ubifs_assert(c, !ui->data_len);
+ ubifs_assert(c, S_ISREG(inode->i_mode));
+ ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
@@ -1388,7 +1393,16 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
else if (err)
goto out_free;
else {
- if (le32_to_cpu(dn->size) <= dlen)
+ int dn_len = le32_to_cpu(dn->size);
+
+ if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
+ ubifs_err(c, "bad data node (block %u, inode %lu)",
+ blk, inode->i_ino);
+ ubifs_dump_node(c, dn);
+ goto out_free;
+ }
+
+ if (dn_len <= dlen)
dlen = 0; /* Nothing to do */
else {
err = truncate_data_node(c, inode, blk, dn, &dlen);
@@ -1488,8 +1502,8 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
int sync = IS_DIRSYNC(host);
struct ubifs_inode *host_ui = ubifs_inode(host);
- ubifs_assert(inode->i_nlink == 0);
- ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
+ ubifs_assert(c, inode->i_nlink == 0);
+ ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
/*
* Since we are deleting the inode, we do not bother to attach any data
@@ -1598,9 +1612,9 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
int sync = IS_DIRSYNC(host);
dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
- ubifs_assert(host->i_nlink > 0);
- ubifs_assert(inode->i_nlink > 0);
- ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
+ ubifs_assert(c, host->i_nlink > 0);
+ ubifs_assert(c, inode->i_nlink > 0);
+ ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
len1 = UBIFS_INO_NODE_SZ + host_ui->data_len;
len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len;
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index b1f7c0caa3ac..2feff6cbbb77 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -161,8 +161,8 @@ static inline void dent_key_init(const struct ubifs_info *c,
{
uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm));
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
- ubifs_assert(!nm->hash && !nm->minor_hash);
+ ubifs_assert(c, !(hash & ~UBIFS_S_KEY_HASH_MASK));
+ ubifs_assert(c, !nm->hash && !nm->minor_hash);
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
@@ -179,7 +179,7 @@ static inline void dent_key_init_hash(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
uint32_t hash)
{
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+ ubifs_assert(c, !(hash & ~UBIFS_S_KEY_HASH_MASK));
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
@@ -198,7 +198,7 @@ static inline void dent_key_init_flash(const struct ubifs_info *c, void *k,
union ubifs_key *key = k;
uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm));
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+ ubifs_assert(c, !(hash & ~UBIFS_S_KEY_HASH_MASK));
key->j32[0] = cpu_to_le32(inum);
key->j32[1] = cpu_to_le32(hash |
(UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS));
@@ -231,7 +231,7 @@ static inline void xent_key_init(const struct ubifs_info *c,
{
uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm));
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+ ubifs_assert(c, !(hash & ~UBIFS_S_KEY_HASH_MASK));
key->u32[0] = inum;
key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
}
@@ -249,7 +249,7 @@ static inline void xent_key_init_flash(const struct ubifs_info *c, void *k,
union ubifs_key *key = k;
uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm));
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
+ ubifs_assert(c, !(hash & ~UBIFS_S_KEY_HASH_MASK));
key->j32[0] = cpu_to_le32(inum);
key->j32[1] = cpu_to_le32(hash |
(UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS));
@@ -280,7 +280,7 @@ static inline void data_key_init(const struct ubifs_info *c,
union ubifs_key *key, ino_t inum,
unsigned int block)
{
- ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
+ ubifs_assert(c, !(block & ~UBIFS_S_KEY_BLOCK_MASK));
key->u32[0] = inum;
key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS);
}
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 7cffa120a750..86b0828f5499 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -132,7 +132,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
while (*p) {
parent = *p;
b = rb_entry(parent, struct ubifs_bud, rb);
- ubifs_assert(bud->lnum != b->lnum);
+ ubifs_assert(c, bud->lnum != b->lnum);
if (bud->lnum < b->lnum)
p = &(*p)->rb_left;
else
@@ -145,7 +145,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
jhead = &c->jheads[bud->jhead];
list_add_tail(&bud->list, &jhead->buds_list);
} else
- ubifs_assert(c->replaying && c->ro_mount);
+ ubifs_assert(c, c->replaying && c->ro_mount);
/*
* Note, although this is a new bud, we anyway account this space now,
@@ -189,7 +189,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
}
mutex_lock(&c->log_mutex);
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error) {
err = -EROFS;
goto out_unlock;
@@ -244,7 +244,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
- ubifs_assert(c->lhead_lnum != c->ltail_lnum);
+ ubifs_assert(c, c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
@@ -301,7 +301,7 @@ static void remove_buds(struct ubifs_info *c)
{
struct rb_node *p;
- ubifs_assert(list_empty(&c->old_buds));
+ ubifs_assert(c, list_empty(&c->old_buds));
c->cmt_bud_bytes = 0;
spin_lock(&c->buds_lock);
p = rb_first(&c->buds);
@@ -409,7 +409,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
/* Switch to the next log LEB */
if (c->lhead_offs) {
c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
- ubifs_assert(c->lhead_lnum != c->ltail_lnum);
+ ubifs_assert(c, c->lhead_lnum != c->ltail_lnum);
c->lhead_offs = 0;
}
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f5a46844340c..fa8d775c9753 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -187,9 +187,9 @@ static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops,
/* Compare to some other LEB on the bottom of heap */
/* Pick a position kind of randomly */
cpos = (((size_t)lprops >> 4) & b) + b;
- ubifs_assert(cpos >= b);
- ubifs_assert(cpos < LPT_HEAP_SZ);
- ubifs_assert(cpos < heap->cnt);
+ ubifs_assert(c, cpos >= b);
+ ubifs_assert(c, cpos < LPT_HEAP_SZ);
+ ubifs_assert(c, cpos < heap->cnt);
val1 = get_heap_comp_val(lprops, cat);
val2 = get_heap_comp_val(heap->arr[cpos], cat);
@@ -230,8 +230,8 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
int hpos = lprops->hpos;
heap = &c->lpt_heap[cat - 1];
- ubifs_assert(hpos >= 0 && hpos < heap->cnt);
- ubifs_assert(heap->arr[hpos] == lprops);
+ ubifs_assert(c, hpos >= 0 && hpos < heap->cnt);
+ ubifs_assert(c, heap->arr[hpos] == lprops);
heap->cnt -= 1;
if (hpos < heap->cnt) {
heap->arr[hpos] = heap->arr[heap->cnt];
@@ -296,13 +296,13 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
list_add(&lprops->list, &c->frdi_idx_list);
break;
default:
- ubifs_assert(0);
+ ubifs_assert(c, 0);
}
lprops->flags &= ~LPROPS_CAT_MASK;
lprops->flags |= cat;
c->in_a_category_cnt += 1;
- ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
+ ubifs_assert(c, c->in_a_category_cnt <= c->main_lebs);
}
/**
@@ -324,20 +324,20 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
break;
case LPROPS_FREEABLE:
c->freeable_cnt -= 1;
- ubifs_assert(c->freeable_cnt >= 0);
+ ubifs_assert(c, c->freeable_cnt >= 0);
/* Fall through */
case LPROPS_UNCAT:
case LPROPS_EMPTY:
case LPROPS_FRDI_IDX:
- ubifs_assert(!list_empty(&lprops->list));
+ ubifs_assert(c, !list_empty(&lprops->list));
list_del(&lprops->list);
break;
default:
- ubifs_assert(0);
+ ubifs_assert(c, 0);
}
c->in_a_category_cnt -= 1;
- ubifs_assert(c->in_a_category_cnt >= 0);
+ ubifs_assert(c, c->in_a_category_cnt >= 0);
}
/**
@@ -369,7 +369,7 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
list_replace(&old_lprops->list, &new_lprops->list);
break;
default:
- ubifs_assert(0);
+ ubifs_assert(c, 0);
}
}
@@ -412,7 +412,7 @@ int ubifs_categorize_lprops(const struct ubifs_info *c,
return LPROPS_UNCAT;
if (lprops->free == c->leb_size) {
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
return LPROPS_EMPTY;
}
@@ -478,7 +478,7 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
*/
int ubifs_calc_dark(const struct ubifs_info *c, int spc)
{
- ubifs_assert(!(spc & 7));
+ ubifs_assert(c, !(spc & 7));
if (spc < c->dark_wm)
return spc;
@@ -543,27 +543,27 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
dbg_lp("LEB %d, free %d, dirty %d, flags %d",
lprops->lnum, free, dirty, flags);
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
- ubifs_assert(c->lst.empty_lebs >= 0 &&
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, c->lst.empty_lebs >= 0 &&
c->lst.empty_lebs <= c->main_lebs);
- ubifs_assert(c->freeable_cnt >= 0);
- ubifs_assert(c->freeable_cnt <= c->main_lebs);
- ubifs_assert(c->lst.taken_empty_lebs >= 0);
- ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs);
- ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7));
- ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7));
- ubifs_assert(!(c->lst.total_used & 7));
- ubifs_assert(free == LPROPS_NC || free >= 0);
- ubifs_assert(dirty == LPROPS_NC || dirty >= 0);
+ ubifs_assert(c, c->freeable_cnt >= 0);
+ ubifs_assert(c, c->freeable_cnt <= c->main_lebs);
+ ubifs_assert(c, c->lst.taken_empty_lebs >= 0);
+ ubifs_assert(c, c->lst.taken_empty_lebs <= c->lst.empty_lebs);
+ ubifs_assert(c, !(c->lst.total_free & 7) && !(c->lst.total_dirty & 7));
+ ubifs_assert(c, !(c->lst.total_dead & 7) && !(c->lst.total_dark & 7));
+ ubifs_assert(c, !(c->lst.total_used & 7));
+ ubifs_assert(c, free == LPROPS_NC || free >= 0);
+ ubifs_assert(c, dirty == LPROPS_NC || dirty >= 0);
if (!is_lprops_dirty(c, lprops)) {
lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum);
if (IS_ERR(lprops))
return lprops;
} else
- ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum));
+ ubifs_assert(c, lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum));
- ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
+ ubifs_assert(c, !(lprops->free & 7) && !(lprops->dirty & 7));
spin_lock(&c->space_lock);
if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
@@ -768,15 +768,15 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c)
struct ubifs_lprops *lprops;
struct ubifs_lpt_heap *heap;
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
heap = &c->lpt_heap[LPROPS_FREE - 1];
if (heap->cnt == 0)
return NULL;
lprops = heap->arr[0];
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
return lprops;
}
@@ -791,15 +791,15 @@ const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c)
{
struct ubifs_lprops *lprops;
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
if (list_empty(&c->empty_list))
return NULL;
lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
- ubifs_assert(lprops->free == c->leb_size);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->free == c->leb_size);
return lprops;
}
@@ -814,16 +814,16 @@ const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c)
{
struct ubifs_lprops *lprops;
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
if (list_empty(&c->freeable_list))
return NULL;
lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert(!(lprops->flags & LPROPS_INDEX));
- ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
- ubifs_assert(c->freeable_cnt > 0);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, !(lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->free + lprops->dirty == c->leb_size);
+ ubifs_assert(c, c->freeable_cnt > 0);
return lprops;
}
@@ -838,15 +838,15 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c)
{
struct ubifs_lprops *lprops;
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
if (list_empty(&c->frdi_idx_list))
return NULL;
lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list);
- ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
- ubifs_assert((lprops->flags & LPROPS_INDEX));
- ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
+ ubifs_assert(c, !(lprops->flags & LPROPS_TAKEN));
+ ubifs_assert(c, (lprops->flags & LPROPS_INDEX));
+ ubifs_assert(c, lprops->free + lprops->dirty == c->leb_size);
return lprops;
}
@@ -1089,10 +1089,6 @@ static int scan_check_cb(struct ubifs_info *c,
}
}
- buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
- if (!buf)
- return -ENOMEM;
-
/*
* After an unclean unmount, empty and freeable LEBs
* may contain garbage - do not scan them.
@@ -1111,6 +1107,10 @@ static int scan_check_cb(struct ubifs_info *c,
return LPT_SCAN_CONTINUE;
}
+ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
ret = PTR_ERR(sleb);
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 8e99dad18880..31393370e334 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -225,21 +225,22 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
/**
* pack_bits - pack bit fields end-to-end.
+ * @c: UBIFS file-system description object
* @addr: address at which to pack (passed and next address returned)
* @pos: bit position at which to pack (passed and next position returned)
* @val: value to pack
* @nrbits: number of bits of value to pack (1-32)
*/
-static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits)
+static void pack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos, uint32_t val, int nrbits)
{
uint8_t *p = *addr;
int b = *pos;
- ubifs_assert(nrbits > 0);
- ubifs_assert(nrbits <= 32);
- ubifs_assert(*pos >= 0);
- ubifs_assert(*pos < 8);
- ubifs_assert((val >> nrbits) == 0 || nrbits == 32);
+ ubifs_assert(c, nrbits > 0);
+ ubifs_assert(c, nrbits <= 32);
+ ubifs_assert(c, *pos >= 0);
+ ubifs_assert(c, *pos < 8);
+ ubifs_assert(c, (val >> nrbits) == 0 || nrbits == 32);
if (b) {
*p |= ((uint8_t)val) << b;
nrbits += b;
@@ -274,13 +275,14 @@ static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits)
/**
* ubifs_unpack_bits - unpack bit fields.
+ * @c: UBIFS file-system description object
* @addr: address at which to unpack (passed and next address returned)
* @pos: bit position at which to unpack (passed and next position returned)
* @nrbits: number of bits of value to unpack (1-32)
*
* This functions returns the value unpacked.
*/
-uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
+uint32_t ubifs_unpack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos, int nrbits)
{
const int k = 32 - nrbits;
uint8_t *p = *addr;
@@ -288,10 +290,10 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
uint32_t uninitialized_var(val);
const int bytes = (nrbits + b + 7) >> 3;
- ubifs_assert(nrbits > 0);
- ubifs_assert(nrbits <= 32);
- ubifs_assert(*pos >= 0);
- ubifs_assert(*pos < 8);
+ ubifs_assert(c, nrbits > 0);
+ ubifs_assert(c, nrbits <= 32);
+ ubifs_assert(c, *pos >= 0);
+ ubifs_assert(c, *pos < 8);
if (b) {
switch (bytes) {
case 2:
@@ -337,7 +339,7 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
p += nrbits >> 3;
*addr = p;
*pos = b;
- ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
+ ubifs_assert(c, (val >> nrbits) == 0 || nrbits - b == 32);
return val;
}
@@ -354,24 +356,24 @@ void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
int i, pos = 0;
uint16_t crc;
- pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS);
+ pack_bits(c, &addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS);
if (c->big_lpt)
- pack_bits(&addr, &pos, pnode->num, c->pcnt_bits);
+ pack_bits(c, &addr, &pos, pnode->num, c->pcnt_bits);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
- pack_bits(&addr, &pos, pnode->lprops[i].free >> 3,
+ pack_bits(c, &addr, &pos, pnode->lprops[i].free >> 3,
c->space_bits);
- pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3,
+ pack_bits(c, &addr, &pos, pnode->lprops[i].dirty >> 3,
c->space_bits);
if (pnode->lprops[i].flags & LPROPS_INDEX)
- pack_bits(&addr, &pos, 1, 1);
+ pack_bits(c, &addr, &pos, 1, 1);
else
- pack_bits(&addr, &pos, 0, 1);
+ pack_bits(c, &addr, &pos, 0, 1);
}
crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
c->pnode_sz - UBIFS_LPT_CRC_BYTES);
addr = buf;
pos = 0;
- pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+ pack_bits(c, &addr, &pos, crc, UBIFS_LPT_CRC_BITS);
}
/**
@@ -387,23 +389,23 @@ void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
int i, pos = 0;
uint16_t crc;
- pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS);
+ pack_bits(c, &addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS);
if (c->big_lpt)
- pack_bits(&addr, &pos, nnode->num, c->pcnt_bits);
+ pack_bits(c, &addr, &pos, nnode->num, c->pcnt_bits);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
int lnum = nnode->nbranch[i].lnum;
if (lnum == 0)
lnum = c->lpt_last + 1;
- pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits);
- pack_bits(&addr, &pos, nnode->nbranch[i].offs,
+ pack_bits(c, &addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits);
+ pack_bits(c, &addr, &pos, nnode->nbranch[i].offs,
c->lpt_offs_bits);
}
crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
c->nnode_sz - UBIFS_LPT_CRC_BYTES);
addr = buf;
pos = 0;
- pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+ pack_bits(c, &addr, &pos, crc, UBIFS_LPT_CRC_BITS);
}
/**
@@ -419,16 +421,16 @@ void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
int i, pos = 0;
uint16_t crc;
- pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS);
+ pack_bits(c, &addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS);
for (i = 0; i < c->lpt_lebs; i++) {
- pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits);
- pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits);
+ pack_bits(c, &addr, &pos, ltab[i].free, c->lpt_spc_bits);
+ pack_bits(c, &addr, &pos, ltab[i].dirty, c->lpt_spc_bits);
}
crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
c->ltab_sz - UBIFS_LPT_CRC_BYTES);
addr = buf;
pos = 0;
- pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+ pack_bits(c, &addr, &pos, crc, UBIFS_LPT_CRC_BITS);
}
/**
@@ -443,14 +445,14 @@ void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave)
int i, pos = 0;
uint16_t crc;
- pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS);
+ pack_bits(c, &addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS);
for (i = 0; i < c->lsave_cnt; i++)
- pack_bits(&addr, &pos, lsave[i], c->lnum_bits);
+ pack_bits(c, &addr, &pos, lsave[i], c->lnum_bits);
crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
c->lsave_sz - UBIFS_LPT_CRC_BYTES);
addr = buf;
pos = 0;
- pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
+ pack_bits(c, &addr, &pos, crc, UBIFS_LPT_CRC_BITS);
}
/**
@@ -465,7 +467,7 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty)
return;
dbg_lp("LEB %d add %d to %d",
lnum, dirty, c->ltab[lnum - c->lpt_first].dirty);
- ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+ ubifs_assert(c, lnum >= c->lpt_first && lnum <= c->lpt_last);
c->ltab[lnum - c->lpt_first].dirty += dirty;
}
@@ -481,7 +483,7 @@ static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
dbg_lp("LEB %d free %d dirty %d to %d %d",
lnum, c->ltab[lnum - c->lpt_first].free,
c->ltab[lnum - c->lpt_first].dirty, free, dirty);
- ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+ ubifs_assert(c, lnum >= c->lpt_first && lnum <= c->lpt_last);
c->ltab[lnum - c->lpt_first].free = free;
c->ltab[lnum - c->lpt_first].dirty = dirty;
}
@@ -639,7 +641,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
goto out;
}
- ubifs_assert(!c->ltab);
+ ubifs_assert(c, !c->ltab);
c->ltab = ltab; /* Needed by set_ltab */
/* Initialize LPT's own lprops */
@@ -918,7 +920,7 @@ static int check_lpt_crc(const struct ubifs_info *c, void *buf, int len)
uint8_t *addr = buf;
uint16_t crc, calc_crc;
- crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
+ crc = ubifs_unpack_bits(c, &addr, &pos, UBIFS_LPT_CRC_BITS);
calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
len - UBIFS_LPT_CRC_BYTES);
if (crc != calc_crc) {
@@ -944,7 +946,7 @@ static int check_lpt_type(const struct ubifs_info *c, uint8_t **addr,
{
int node_type;
- node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
+ node_type = ubifs_unpack_bits(c, addr, pos, UBIFS_LPT_TYPE_BITS);
if (node_type != type) {
ubifs_err(c, "invalid type (%d) in LPT node type %d",
node_type, type);
@@ -972,16 +974,16 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
if (err)
return err;
if (c->big_lpt)
- pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+ pnode->num = ubifs_unpack_bits(c, &addr, &pos, c->pcnt_bits);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
struct ubifs_lprops * const lprops = &pnode->lprops[i];
- lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits);
+ lprops->free = ubifs_unpack_bits(c, &addr, &pos, c->space_bits);
lprops->free <<= 3;
- lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits);
+ lprops->dirty = ubifs_unpack_bits(c, &addr, &pos, c->space_bits);
lprops->dirty <<= 3;
- if (ubifs_unpack_bits(&addr, &pos, 1))
+ if (ubifs_unpack_bits(c, &addr, &pos, 1))
lprops->flags = LPROPS_INDEX;
else
lprops->flags = 0;
@@ -1009,16 +1011,16 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
if (err)
return err;
if (c->big_lpt)
- nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+ nnode->num = ubifs_unpack_bits(c, &addr, &pos, c->pcnt_bits);
for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
int lnum;
- lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) +
+ lnum = ubifs_unpack_bits(c, &addr, &pos, c->lpt_lnum_bits) +
c->lpt_first;
if (lnum == c->lpt_last + 1)
lnum = 0;
nnode->nbranch[i].lnum = lnum;
- nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
+ nnode->nbranch[i].offs = ubifs_unpack_bits(c, &addr, &pos,
c->lpt_offs_bits);
}
err = check_lpt_crc(c, buf, c->nnode_sz);
@@ -1041,8 +1043,8 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
if (err)
return err;
for (i = 0; i < c->lpt_lebs; i++) {
- int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
- int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
+ int free = ubifs_unpack_bits(c, &addr, &pos, c->lpt_spc_bits);
+ int dirty = ubifs_unpack_bits(c, &addr, &pos, c->lpt_spc_bits);
if (free < 0 || free > c->leb_size || dirty < 0 ||
dirty > c->leb_size || free + dirty > c->leb_size)
@@ -1073,7 +1075,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
if (err)
return err;
for (i = 0; i < c->lsave_cnt; i++) {
- int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits);
+ int lnum = ubifs_unpack_bits(c, &addr, &pos, c->lnum_bits);
if (lnum < c->main_first || lnum >= c->leb_cnt)
return -EINVAL;
@@ -1515,7 +1517,7 @@ static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c,
branch->cnode->parent = n;
}
- ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags));
+ ubifs_assert(c, !test_bit(OBSOLETE_CNODE, &nnode->flags));
__set_bit(OBSOLETE_CNODE, &nnode->flags);
c->dirty_nn_cnt += 1;
@@ -1558,7 +1560,7 @@ static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c,
__clear_bit(COW_CNODE, &p->flags);
replace_cats(c, pnode, p);
- ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags));
+ ubifs_assert(c, !test_bit(OBSOLETE_CNODE, &pnode->flags));
__set_bit(OBSOLETE_CNODE, &pnode->flags);
c->dirty_pn_cnt += 1;
@@ -1613,7 +1615,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
pnode->lprops[iip].flags);
- ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags));
+ ubifs_assert(c, test_bit(DIRTY_CNODE, &pnode->flags));
return &pnode->lprops[iip];
}
@@ -1889,9 +1891,9 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
lprops->flags = ubifs_categorize_lprops(c, lprops);
}
} else {
- ubifs_assert(branch->lnum >= c->lpt_first &&
+ ubifs_assert(c, branch->lnum >= c->lpt_first &&
branch->lnum <= c->lpt_last);
- ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
+ ubifs_assert(c, branch->offs >= 0 && branch->offs < c->leb_size);
err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
c->pnode_sz, 1);
if (err)
@@ -1935,8 +1937,8 @@ int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
start_lnum = c->main_first;
}
- ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt);
- ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt);
+ ubifs_assert(c, start_lnum >= c->main_first && start_lnum < c->leb_cnt);
+ ubifs_assert(c, end_lnum >= c->main_first && end_lnum < c->leb_cnt);
if (!c->nroot) {
err = ubifs_read_nnode(c, NULL, 0);
@@ -2055,7 +2057,7 @@ again:
iip = pnode->iip;
while (1) {
h -= 1;
- ubifs_assert(h >= 0);
+ ubifs_assert(c, h >= 0);
nnode = path[h].ptr.nnode;
if (iip + 1 < UBIFS_LPT_FANOUT)
break;
@@ -2234,7 +2236,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
return 0;
while (cnode) {
- ubifs_assert(row >= 0);
+ ubifs_assert(c, row >= 0);
nnode = cnode->parent;
if (cnode->level) {
/* cnode is a nnode */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 78da65b2fb85..7ce30994bbba 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -34,13 +34,14 @@ static int dbg_populate_lsave(struct ubifs_info *c);
/**
* first_dirty_cnode - find first dirty cnode.
+ * @c: UBIFS file-system description object
* @nnode: nnode at which to start
*
* This function returns the first dirty cnode or %NULL if there is not one.
*/
-static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode)
+static struct ubifs_cnode *first_dirty_cnode(const struct ubifs_info *c, struct ubifs_nnode *nnode)
{
- ubifs_assert(nnode);
+ ubifs_assert(c, nnode);
while (1) {
int i, cont = 0;
@@ -64,16 +65,17 @@ static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode)
/**
* next_dirty_cnode - find next dirty cnode.
+ * @c: UBIFS file-system description object
* @cnode: cnode from which to begin searching
*
* This function returns the next dirty cnode or %NULL if there is not one.
*/
-static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode)
+static struct ubifs_cnode *next_dirty_cnode(const struct ubifs_info *c, struct ubifs_cnode *cnode)
{
struct ubifs_nnode *nnode;
int i;
- ubifs_assert(cnode);
+ ubifs_assert(c, cnode);
nnode = cnode->parent;
if (!nnode)
return NULL;
@@ -83,7 +85,7 @@ static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode)
if (cnode->level == 0)
return cnode; /* cnode is a pnode */
/* cnode is a nnode */
- return first_dirty_cnode((struct ubifs_nnode *)cnode);
+ return first_dirty_cnode(c, (struct ubifs_nnode *)cnode);
}
}
return (struct ubifs_cnode *)nnode;
@@ -106,15 +108,15 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
if (!test_bit(DIRTY_CNODE, &c->nroot->flags))
return 0;
- c->lpt_cnext = first_dirty_cnode(c->nroot);
+ c->lpt_cnext = first_dirty_cnode(c, c->nroot);
cnode = c->lpt_cnext;
if (!cnode)
return 0;
cnt += 1;
while (1) {
- ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
+ ubifs_assert(c, !test_bit(COW_CNODE, &cnode->flags));
__set_bit(COW_CNODE, &cnode->flags);
- cnext = next_dirty_cnode(cnode);
+ cnext = next_dirty_cnode(c, cnode);
if (!cnext) {
cnode->cnext = c->lpt_cnext;
break;
@@ -125,7 +127,7 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
}
dbg_cmt("committing %d cnodes", cnt);
dbg_lp("committing %d cnodes", cnt);
- ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt);
+ ubifs_assert(c, cnt == c->dirty_nn_cnt + c->dirty_pn_cnt);
return cnt;
}
@@ -141,7 +143,7 @@ static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
dbg_lp("LEB %d free %d dirty %d to %d +%d",
lnum, c->ltab[lnum - c->lpt_first].free,
c->ltab[lnum - c->lpt_first].dirty, free, dirty);
- ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
+ ubifs_assert(c, lnum >= c->lpt_first && lnum <= c->lpt_last);
c->ltab[lnum - c->lpt_first].free = free;
c->ltab[lnum - c->lpt_first].dirty += dirty;
}
@@ -237,7 +239,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
/* Try to place lsave and ltab nicely */
if (!done_lsave) {
@@ -280,7 +282,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
}
done_lsave = 1;
@@ -300,7 +302,7 @@ static int layout_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
}
c->ltab_lnum = lnum;
@@ -423,7 +425,7 @@ static int write_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = from = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
if (err)
@@ -480,7 +482,7 @@ static int write_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = from = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
if (err)
@@ -506,7 +508,7 @@ static int write_cnodes(struct ubifs_info *c)
if (err)
goto no_space;
offs = from = 0;
- ubifs_assert(lnum >= c->lpt_first &&
+ ubifs_assert(c, lnum >= c->lpt_first &&
lnum <= c->lpt_last);
err = ubifs_leb_unmap(c, lnum);
if (err)
@@ -806,7 +808,7 @@ static void populate_lsave(struct ubifs_info *c)
struct ubifs_lpt_heap *heap;
int i, cnt = 0;
- ubifs_assert(c->big_lpt);
+ ubifs_assert(c, c->big_lpt);
if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
c->lpt_drty_flgs |= LSAVE_DIRTY;
ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
@@ -1095,8 +1097,8 @@ static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf,
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int pos = 0, node_type;
- node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
- *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
+ node_type = ubifs_unpack_bits(c, &addr, &pos, UBIFS_LPT_TYPE_BITS);
+ *node_num = ubifs_unpack_bits(c, &addr, &pos, c->pcnt_bits);
return node_type;
}
@@ -1116,7 +1118,7 @@ static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8)
return 0;
- node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
+ node_type = ubifs_unpack_bits(c, &addr, &pos, UBIFS_LPT_TYPE_BITS);
if (node_type == UBIFS_LPT_NOT_A_NODE)
return 0;
node_len = get_lpt_node_len(c, node_type);
@@ -1124,7 +1126,7 @@ static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len)
return 0;
pos = 0;
addr = buf;
- crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
+ crc = ubifs_unpack_bits(c, &addr, &pos, UBIFS_LPT_CRC_BITS);
calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
node_len - UBIFS_LPT_CRC_BYTES);
if (crc != calc_crc)
@@ -1170,7 +1172,7 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
node_type = get_lpt_node_type(c, buf, &node_num);
node_len = get_lpt_node_len(c, node_type);
offs = c->leb_size - len;
- ubifs_assert(node_len != 0);
+ ubifs_assert(c, node_len != 0);
mutex_lock(&c->lp_mutex);
err = make_node_dirty(c, node_type, node_num, lnum, offs);
mutex_unlock(&c->lp_mutex);
@@ -1195,7 +1197,7 @@ static int lpt_gc(struct ubifs_info *c)
mutex_lock(&c->lp_mutex);
for (i = 0; i < c->lpt_lebs; i++) {
- ubifs_assert(!c->ltab[i].tgc);
+ ubifs_assert(c, !c->ltab[i].tgc);
if (i + c->lpt_first == c->nhead_lnum ||
c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
continue;
@@ -1271,7 +1273,7 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
populate_lsave(c);
cnt = get_cnodes_to_commit(c);
- ubifs_assert(cnt != 0);
+ ubifs_assert(c, cnt != 0);
err = layout_cnodes(c);
if (err)
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index c6a5e39e2ba5..9df4a41bba52 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -360,7 +360,7 @@ int ubifs_write_master(struct ubifs_info *c)
{
int err, lnum, offs, len;
- ubifs_assert(!c->ro_media && !c->ro_mount);
+ ubifs_assert(c, !c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
diff --git a/fs/ubifs/misc.c b/fs/ubifs/misc.c
index 586fd5b578a7..cd23de0f211d 100644
--- a/fs/ubifs/misc.c
+++ b/fs/ubifs/misc.c
@@ -56,3 +56,14 @@ void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...)
va_end(args);
}
+
+static char *assert_names[] = {
+ [ASSACT_REPORT] = "report",
+ [ASSACT_RO] = "read-only",
+ [ASSACT_PANIC] = "panic",
+};
+
+const char *ubifs_assert_action_name(struct ubifs_info *c)
+{
+ return assert_names[c->assert_action];
+}
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index caf83d68fb38..21d35d7dd975 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -105,25 +105,27 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
/**
* ubifs_compr_present - check if compressor was compiled in.
* @compr_type: compressor type to check
+ * @c: the UBIFS file-system description object
*
* This function returns %1 of compressor of type @compr_type is present, and
* %0 if not.
*/
-static inline int ubifs_compr_present(int compr_type)
+static inline int ubifs_compr_present(struct ubifs_info *c, int compr_type)
{
- ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
+ ubifs_assert(c, compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
return !!ubifs_compressors[compr_type]->capi_name;
}
/**
* ubifs_compr_name - get compressor name string by its type.
* @compr_type: compressor type
+ * @c: the UBIFS file-system description object
*
* This function returns compressor type string.
*/
-static inline const char *ubifs_compr_name(int compr_type)
+static inline const char *ubifs_compr_name(struct ubifs_info *c, int compr_type)
{
- ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
+ ubifs_assert(c, compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
return ubifs_compressors[compr_type]->name;
}
@@ -262,8 +264,8 @@ static inline void ubifs_get_lprops(struct ubifs_info *c)
*/
static inline void ubifs_release_lprops(struct ubifs_info *c)
{
- ubifs_assert(mutex_is_locked(&c->lp_mutex));
- ubifs_assert(c->lst.empty_lebs >= 0 &&
+ ubifs_assert(c, mutex_is_locked(&c->lp_mutex));
+ ubifs_assert(c, c->lst.empty_lebs >= 0 &&
c->lst.empty_lebs <= c->main_lebs);
mutex_unlock(&c->lp_mutex);
}
@@ -285,4 +287,6 @@ static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
return lnum;
}
+const char *ubifs_assert_action_name(struct ubifs_info *c);
+
#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index caf2d123e9ee..8f70494efb0c 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -172,8 +172,8 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
spin_lock(&c->orphan_lock);
last = &c->orph_cnext;
list_for_each_entry(orphan, &c->orph_new, new_list) {
- ubifs_assert(orphan->new);
- ubifs_assert(!orphan->cmt);
+ ubifs_assert(c, orphan->new);
+ ubifs_assert(c, !orphan->cmt);
orphan->new = 0;
orphan->cmt = 1;
*last = orphan;
@@ -244,7 +244,7 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
int err = 0;
if (atomic) {
- ubifs_assert(c->ohead_offs == 0);
+ ubifs_assert(c, c->ohead_offs == 0);
ubifs_prepare_node(c, c->orph_buf, len, 1);
len = ALIGN(len, c->min_io_size);
err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len);
@@ -276,7 +276,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
struct ubifs_orph_node *orph;
int gap, err, len, cnt, i;
- ubifs_assert(c->cmt_orphans > 0);
+ ubifs_assert(c, c->cmt_orphans > 0);
gap = c->leb_size - c->ohead_offs;
if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) {
c->ohead_lnum += 1;
@@ -295,14 +295,14 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
if (cnt > c->cmt_orphans)
cnt = c->cmt_orphans;
len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64);
- ubifs_assert(c->orph_buf);
+ ubifs_assert(c, c->orph_buf);
orph = c->orph_buf;
orph->ch.node_type = UBIFS_ORPH_NODE;
spin_lock(&c->orphan_lock);
cnext = c->orph_cnext;
for (i = 0; i < cnt; i++) {
orphan = cnext;
- ubifs_assert(orphan->cmt);
+ ubifs_assert(c, orphan->cmt);
orph->inos[i] = cpu_to_le64(orphan->inum);
orphan->cmt = 0;
cnext = orphan->cnext;
@@ -316,9 +316,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
else
/* Mark the last node of the commit */
orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63));
- ubifs_assert(c->ohead_offs + len <= c->leb_size);
- ubifs_assert(c->ohead_lnum >= c->orph_first);
- ubifs_assert(c->ohead_lnum <= c->orph_last);
+ ubifs_assert(c, c->ohead_offs + len <= c->leb_size);
+ ubifs_assert(c, c->ohead_lnum >= c->orph_first);
+ ubifs_assert(c, c->ohead_lnum <= c->orph_last);
err = do_write_orph_node(c, len, atomic);
c->ohead_offs += ALIGN(len, c->min_io_size);
c->ohead_offs = ALIGN(c->ohead_offs, 8);
@@ -388,7 +388,7 @@ static int consolidate(struct ubifs_info *c)
cnt += 1;
}
*last = NULL;
- ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
+ ubifs_assert(c, cnt == c->tot_orphans - c->new_orphans);
c->cmt_orphans = cnt;
c->ohead_lnum = c->orph_first;
c->ohead_offs = 0;
@@ -415,7 +415,7 @@ static int commit_orphans(struct ubifs_info *c)
{
int avail, atomic = 0, err;
- ubifs_assert(c->cmt_orphans > 0);
+ ubifs_assert(c, c->cmt_orphans > 0);
avail = avail_orphs(c);
if (avail < c->cmt_orphans) {
/* Not enough space to write new orphans, so consolidate */
@@ -446,8 +446,8 @@ static void erase_deleted(struct ubifs_info *c)
while (dnext) {
orphan = dnext;
dnext = orphan->dnext;
- ubifs_assert(!orphan->new);
- ubifs_assert(orphan->del);
+ ubifs_assert(c, !orphan->new);
+ ubifs_assert(c, orphan->del);
rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3af4472061cc..984e30e83c0b 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -444,7 +444,7 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
- ubifs_assert(!(*offs & 7));
+ ubifs_assert(c, !(*offs & 7));
empty_offs = ALIGN(*offs, c->min_io_size);
pad_len = empty_offs - *offs;
ubifs_pad(c, *buf, pad_len);
@@ -644,7 +644,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
if (IS_ERR(sleb))
return sleb;
- ubifs_assert(len >= 8);
+ ubifs_assert(c, len >= 8);
while (len >= 8) {
dbg_scan("look at LEB %d:%d (%d bytes left)",
lnum, offs, len);
@@ -966,7 +966,7 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
{
int err;
- ubifs_assert(!c->ro_mount || c->remounting_rw);
+ ubifs_assert(c, !c->ro_mount || c->remounting_rw);
dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
@@ -1187,8 +1187,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
return grab_empty_leb(c);
}
- ubifs_assert(!(lp.flags & LPROPS_INDEX));
- ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
+ ubifs_assert(c, !(lp.flags & LPROPS_INDEX));
+ ubifs_assert(c, lp.free + lp.dirty >= wbuf->offs);
/*
* We run the commit before garbage collection otherwise subsequent
@@ -1216,7 +1216,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
return err;
}
- ubifs_assert(err == LEB_RETAINED);
+ ubifs_assert(c, err == LEB_RETAINED);
if (err != LEB_RETAINED)
return -EINVAL;
@@ -1507,7 +1507,7 @@ int ubifs_recover_size(struct ubifs_info *c)
struct inode *inode;
struct ubifs_inode *ui;
- ubifs_assert(!e->inode);
+ ubifs_assert(c, !e->inode);
inode = ubifs_iget(c->vfs_sb, e->inum);
if (IS_ERR(inode))
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 85c2a43082b7..4844538eb926 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -273,6 +273,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
static int replay_entries_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
+ struct ubifs_info *c = priv;
struct replay_entry *ra, *rb;
cond_resched();
@@ -281,7 +282,7 @@ static int replay_entries_cmp(void *priv, struct list_head *a,
ra = list_entry(a, struct replay_entry, list);
rb = list_entry(b, struct replay_entry, list);
- ubifs_assert(ra->sqnum != rb->sqnum);
+ ubifs_assert(c, ra->sqnum != rb->sqnum);
if (ra->sqnum > rb->sqnum)
return 1;
return -1;
@@ -668,9 +669,9 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
goto out;
}
- ubifs_assert(ubifs_search_bud(c, lnum));
- ubifs_assert(sleb->endpt - offs >= used);
- ubifs_assert(sleb->endpt % c->min_io_size == 0);
+ ubifs_assert(c, ubifs_search_bud(c, lnum));
+ ubifs_assert(c, sleb->endpt - offs >= used);
+ ubifs_assert(c, sleb->endpt % c->min_io_size == 0);
b->dirty = sleb->endpt - offs - used;
b->free = c->leb_size - sleb->endpt;
@@ -706,7 +707,7 @@ static int replay_buds(struct ubifs_info *c)
if (err)
return err;
- ubifs_assert(b->sqnum > prev_sqnum);
+ ubifs_assert(c, b->sqnum > prev_sqnum);
prev_sqnum = b->sqnum;
}
@@ -1067,7 +1068,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
c->bi.uncommitted_idx *= c->max_idx_node_sz;
- ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
+ ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
c->lhead_lnum, c->lhead_offs, c->max_sqnum,
(unsigned long)c->highest_inum);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 8c25081a5109..bf17f58908ff 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -85,7 +85,7 @@ static int create_default_filesystem(struct ubifs_info *c)
long long tmp64, main_bytes;
__le64 tmp_le64;
__le32 tmp_le32;
- struct timespec ts;
+ struct timespec64 ts;
/* Some functions called from here depend on the @c->key_len filed */
c->key_len = UBIFS_SK_LEN;
@@ -301,8 +301,8 @@ static int create_default_filesystem(struct ubifs_info *c)
ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
ino->nlink = cpu_to_le32(2);
- ktime_get_real_ts(&ts);
- ts = timespec_trunc(ts, DEFAULT_TIME_GRAN);
+ ktime_get_real_ts64(&ts);
+ ts = timespec64_trunc(ts, DEFAULT_TIME_GRAN);
tmp_le64 = cpu_to_le64(ts.tv_sec);
ino->atime_sec = tmp_le64;
ino->ctime_sec = tmp_le64;
@@ -563,7 +563,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
* due to the unavailability of time-travelling equipment.
*/
if (c->fmt_version > UBIFS_FORMAT_VERSION) {
- ubifs_assert(!c->ro_media || c->ro_mount);
+ ubifs_assert(c, !c->ro_media || c->ro_mount);
if (!c->ro_mount ||
c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
ubifs_err(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
@@ -705,9 +705,9 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len)
{
int err;
- ubifs_assert(len >= 0);
- ubifs_assert(len % c->min_io_size == 0);
- ubifs_assert(len < c->leb_size);
+ ubifs_assert(c, len >= 0);
+ ubifs_assert(c, len % c->min_io_size == 0);
+ ubifs_assert(c, len < c->leb_size);
if (len == 0) {
dbg_mnt("unmap empty LEB %d", lnum);
@@ -817,8 +817,8 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
int err;
struct ubifs_sb_node *sup;
- ubifs_assert(c->space_fixup);
- ubifs_assert(!c->ro_mount);
+ ubifs_assert(c, c->space_fixup);
+ ubifs_assert(c, !c->ro_mount);
ubifs_msg(c, "start fixing up free space");
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 16f03d9929e5..ea88926163f4 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -176,7 +176,7 @@ void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
int lnum, int offs)
{
dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
- ubifs_assert(offs % c->min_io_size == 0);
+ ubifs_assert(c, offs % c->min_io_size == 0);
sleb->endpt = ALIGN(offs, c->min_io_size);
}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 9a9fb94a41c6..5eb5958723d4 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -71,10 +71,10 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
{
int total_freed = 0;
struct ubifs_znode *znode, *zprev;
- int time = get_seconds();
+ time64_t time = ktime_get_seconds();
- ubifs_assert(mutex_is_locked(&c->umount_mutex));
- ubifs_assert(mutex_is_locked(&c->tnc_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
+ ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
return 0;
@@ -89,7 +89,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
* changed only when the 'c->tnc_mutex' is held.
*/
zprev = NULL;
- znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
while (znode && total_freed < nr &&
atomic_long_read(&c->clean_zn_cnt) > 0) {
int freed;
@@ -125,7 +125,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
else
c->zroot.znode = NULL;
- freed = ubifs_destroy_tnc_subtree(znode);
+ freed = ubifs_destroy_tnc_subtree(c, znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
total_freed += freed;
@@ -136,7 +136,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
break;
zprev = znode;
- znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
+ znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
cond_resched();
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c5466c70d620..23e7042666a7 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -89,9 +89,9 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
if (ui->xattr && !S_ISREG(inode->i_mode))
return 5;
- if (!ubifs_compr_present(ui->compr_type)) {
+ if (!ubifs_compr_present(c, ui->compr_type)) {
ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in",
- inode->i_ino, ubifs_compr_name(ui->compr_type));
+ inode->i_ino, ubifs_compr_name(c, ui->compr_type));
}
err = dbg_check_dir(c, inode);
@@ -296,7 +296,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
- ubifs_assert(!ui->xattr);
+ ubifs_assert(c, !ui->xattr);
if (is_bad_inode(inode))
return 0;
@@ -349,7 +349,7 @@ static void ubifs_evict_inode(struct inode *inode)
goto out;
dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
- ubifs_assert(!atomic_read(&inode->i_count));
+ ubifs_assert(c, !atomic_read(&inode->i_count));
truncate_inode_pages_final(&inode->i_data);
@@ -384,9 +384,10 @@ done:
static void ubifs_dirty_inode(struct inode *inode, int flags)
{
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
- ubifs_assert(mutex_is_locked(&ui->ui_mutex));
+ ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
if (!ui->dirty) {
ui->dirty = 1;
dbg_gen("inode %lu", inode->i_ino);
@@ -416,7 +417,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = UBIFS_MAX_NLEN;
buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
- ubifs_assert(buf->f_bfree <= c->block_cnt);
+ ubifs_assert(c, buf->f_bfree <= c->block_cnt);
return 0;
}
@@ -441,9 +442,10 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root)
if (c->mount_opts.override_compr) {
seq_printf(s, ",compr=%s",
- ubifs_compr_name(c->mount_opts.compr_type));
+ ubifs_compr_name(c, c->mount_opts.compr_type));
}
+ seq_printf(s, ",assert=%s", ubifs_assert_action_name(c));
seq_printf(s, ",ubi=%d,vol=%d", c->vi.ubi_num, c->vi.vol_id);
return 0;
@@ -921,6 +923,7 @@ static int check_volume_empty(struct ubifs_info *c)
* Opt_chk_data_crc: check CRCs when reading data nodes
* Opt_no_chk_data_crc: do not check CRCs when reading data nodes
* Opt_override_compr: override default compressor
+ * Opt_assert: set ubifs_assert() action
* Opt_err: just end of array marker
*/
enum {
@@ -931,6 +934,7 @@ enum {
Opt_chk_data_crc,
Opt_no_chk_data_crc,
Opt_override_compr,
+ Opt_assert,
Opt_ignore,
Opt_err,
};
@@ -945,6 +949,7 @@ static const match_table_t tokens = {
{Opt_override_compr, "compr=%s"},
{Opt_ignore, "ubi=%s"},
{Opt_ignore, "vol=%s"},
+ {Opt_assert, "assert=%s"},
{Opt_err, NULL},
};
@@ -1045,6 +1050,26 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
c->default_compr = c->mount_opts.compr_type;
break;
}
+ case Opt_assert:
+ {
+ char *act = match_strdup(&args[0]);
+
+ if (!act)
+ return -ENOMEM;
+ if (!strcmp(act, "report"))
+ c->assert_action = ASSACT_REPORT;
+ else if (!strcmp(act, "read-only"))
+ c->assert_action = ASSACT_RO;
+ else if (!strcmp(act, "panic"))
+ c->assert_action = ASSACT_PANIC;
+ else {
+ ubifs_err(c, "unknown assert action \"%s\"", act);
+ kfree(act);
+ return -EINVAL;
+ }
+ kfree(act);
+ break;
+ }
case Opt_ignore:
break;
default:
@@ -1103,7 +1128,7 @@ static void destroy_journal(struct ubifs_info *c)
*/
static void bu_init(struct ubifs_info *c)
{
- ubifs_assert(c->bulk_read == 1);
+ ubifs_assert(c, c->bulk_read == 1);
if (c->bu.buf)
return; /* Already initialized */
@@ -1134,7 +1159,7 @@ again:
*/
static int check_free_space(struct ubifs_info *c)
{
- ubifs_assert(c->dark_wm > 0);
+ ubifs_assert(c, c->dark_wm > 0);
if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
ubifs_err(c, "insufficient free space to mount in R/W mode");
ubifs_dump_budg(c, &c->bi);
@@ -1234,9 +1259,9 @@ static int mount_ubifs(struct ubifs_info *c)
* Make sure the compressor which is set as default in the superblock
* or overridden by mount options is actually compiled in.
*/
- if (!ubifs_compr_present(c->default_compr)) {
+ if (!ubifs_compr_present(c, c->default_compr)) {
ubifs_err(c, "'compressor \"%s\" is not compiled in",
- ubifs_compr_name(c->default_compr));
+ ubifs_compr_name(c, c->default_compr));
err = -ENOTSUPP;
goto out_free;
}
@@ -1396,10 +1421,10 @@ static int mount_ubifs(struct ubifs_info *c)
* the journal head LEBs may also be accounted as
* "empty taken" if they are empty.
*/
- ubifs_assert(c->lst.taken_empty_lebs > 0);
+ ubifs_assert(c, c->lst.taken_empty_lebs > 0);
}
} else
- ubifs_assert(c->lst.taken_empty_lebs > 0);
+ ubifs_assert(c, c->lst.taken_empty_lebs > 0);
err = dbg_check_filesystem(c);
if (err)
@@ -1429,7 +1454,7 @@ static int mount_ubifs(struct ubifs_info *c)
UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
c->big_lpt ? ", big LPT model" : ", small LPT model");
- dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr));
+ dbg_gen("default compressor: %s", ubifs_compr_name(c, c->default_compr));
dbg_gen("data journal heads: %d",
c->jhead_cnt - NONDATA_JHEADS_CNT);
dbg_gen("log LEBs: %d (%d - %d)",
@@ -1610,7 +1635,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
goto out;
} else {
/* A readonly mount is not allowed to have orphans */
- ubifs_assert(c->tot_orphans == 0);
+ ubifs_assert(c, c->tot_orphans == 0);
err = ubifs_clear_orphans(c);
if (err)
goto out;
@@ -1727,8 +1752,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
{
int i, err;
- ubifs_assert(!c->need_recovery);
- ubifs_assert(!c->ro_mount);
+ ubifs_assert(c, !c->need_recovery);
+ ubifs_assert(c, !c->ro_mount);
mutex_lock(&c->umount_mutex);
if (c->bgt) {
@@ -1778,9 +1803,9 @@ static void ubifs_put_super(struct super_block *sb)
* to write them back because of I/O errors.
*/
if (!c->ro_error) {
- ubifs_assert(c->bi.idx_growth == 0);
- ubifs_assert(c->bi.dd_growth == 0);
- ubifs_assert(c->bi.data_growth == 0);
+ ubifs_assert(c, c->bi.idx_growth == 0);
+ ubifs_assert(c, c->bi.dd_growth == 0);
+ ubifs_assert(c, c->bi.data_growth == 0);
}
/*
@@ -1887,7 +1912,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
mutex_unlock(&c->bu_mutex);
}
- ubifs_assert(c->lst.taken_empty_lebs > 0);
+ ubifs_assert(c, c->lst.taken_empty_lebs > 0);
return 0;
}
@@ -2002,6 +2027,7 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
INIT_LIST_HEAD(&c->orph_list);
INIT_LIST_HEAD(&c->orph_new);
c->no_chk_data_crc = 1;
+ c->assert_action = ASSACT_RO;
c->highest_inum = UBIFS_FIRST_INO;
c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
@@ -2053,7 +2079,9 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
if (c->max_inode_sz > MAX_LFS_FILESIZE)
sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
sb->s_op = &ubifs_super_operations;
+#ifdef CONFIG_UBIFS_FS_XATTR
sb->s_xattr = ubifs_xattr_handlers;
+#endif
#ifdef CONFIG_UBIFS_FS_ENCRYPTION
sb->s_cop = &ubifs_crypt_operations;
#endif
@@ -2061,7 +2089,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
mutex_lock(&c->umount_mutex);
err = mount_ubifs(c);
if (err) {
- ubifs_assert(err < 0);
+ ubifs_assert(c, err < 0);
goto out_unlock;
}
@@ -2304,8 +2332,8 @@ late_initcall(ubifs_init);
static void __exit ubifs_exit(void)
{
- ubifs_assert(list_empty(&ubifs_infos));
- ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
+ WARN_ON(list_empty(&ubifs_infos));
+ WARN_ON(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
dbg_debugfs_exit();
ubifs_compressors_exit();
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 4a21e7f75e7a..bf416e512743 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -211,7 +211,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
__set_bit(DIRTY_ZNODE, &zn->flags);
__clear_bit(COW_ZNODE, &zn->flags);
- ubifs_assert(!ubifs_zn_obsolete(znode));
+ ubifs_assert(c, !ubifs_zn_obsolete(znode));
__set_bit(OBSOLETE_ZNODE, &znode->flags);
if (znode->level != 0) {
@@ -321,9 +321,9 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
void *lnc_node;
const struct ubifs_dent_node *dent = node;
- ubifs_assert(!zbr->leaf);
- ubifs_assert(zbr->len != 0);
- ubifs_assert(is_hash_key(c, &zbr->key));
+ ubifs_assert(c, !zbr->leaf);
+ ubifs_assert(c, zbr->len != 0);
+ ubifs_assert(c, is_hash_key(c, &zbr->key));
err = ubifs_validate_entry(c, dent);
if (err) {
@@ -355,8 +355,8 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
{
int err;
- ubifs_assert(!zbr->leaf);
- ubifs_assert(zbr->len != 0);
+ ubifs_assert(c, !zbr->leaf);
+ ubifs_assert(c, zbr->len != 0);
err = ubifs_validate_entry(c, node);
if (err) {
@@ -398,11 +398,11 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
{
int err;
- ubifs_assert(is_hash_key(c, &zbr->key));
+ ubifs_assert(c, is_hash_key(c, &zbr->key));
if (zbr->leaf) {
/* Read from the leaf node cache */
- ubifs_assert(zbr->len != 0);
+ ubifs_assert(c, zbr->len != 0);
memcpy(node, zbr->leaf, zbr->len);
return 0;
}
@@ -721,7 +721,7 @@ static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
while (1) {
err = tnc_prev(c, zn, n);
if (err == -ENOENT) {
- ubifs_assert(*n == 0);
+ ubifs_assert(c, *n == 0);
*n = -1;
return 0;
}
@@ -761,12 +761,12 @@ static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
err = tnc_next(c, zn, n);
if (err) {
/* Should be impossible */
- ubifs_assert(0);
+ ubifs_assert(c, 0);
if (err == -ENOENT)
err = -EINVAL;
return err;
}
- ubifs_assert(*n == 0);
+ ubifs_assert(c, *n == 0);
*n = -1;
}
return 0;
@@ -778,7 +778,7 @@ static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
return 0;
if (err == NAME_MATCHES)
return 1;
- ubifs_assert(err == NAME_GREATER);
+ ubifs_assert(c, err == NAME_GREATER);
}
} else {
int nn = *n;
@@ -802,7 +802,7 @@ static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
*n = nn;
if (err == NAME_MATCHES)
return 1;
- ubifs_assert(err == NAME_LESS);
+ ubifs_assert(c, err == NAME_LESS);
}
}
}
@@ -843,7 +843,7 @@ static int fallible_matches_name(struct ubifs_info *c,
err = NOT_ON_MEDIA;
goto out_free;
}
- ubifs_assert(err == 1);
+ ubifs_assert(c, err == 1);
err = lnc_add_directly(c, zbr, dent);
if (err)
@@ -923,7 +923,7 @@ static int fallible_resolve_collision(struct ubifs_info *c,
while (1) {
err = tnc_prev(c, zn, n);
if (err == -ENOENT) {
- ubifs_assert(*n == 0);
+ ubifs_assert(c, *n == 0);
*n = -1;
break;
}
@@ -935,12 +935,12 @@ static int fallible_resolve_collision(struct ubifs_info *c,
err = tnc_next(c, zn, n);
if (err) {
/* Should be impossible */
- ubifs_assert(0);
+ ubifs_assert(c, 0);
if (err == -ENOENT)
err = -EINVAL;
return err;
}
- ubifs_assert(*n == 0);
+ ubifs_assert(c, *n == 0);
*n = -1;
}
break;
@@ -1100,8 +1100,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
struct ubifs_znode *zp;
int *path = c->bottom_up_buf, p = 0;
- ubifs_assert(c->zroot.znode);
- ubifs_assert(znode);
+ ubifs_assert(c, c->zroot.znode);
+ ubifs_assert(c, znode);
if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
kfree(c->bottom_up_buf);
c->bottom_up_buf = kmalloc_array(c->zroot.znode->level,
@@ -1120,7 +1120,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
if (!zp)
break;
n = znode->iip;
- ubifs_assert(p < c->zroot.znode->level);
+ ubifs_assert(c, p < c->zroot.znode->level);
path[p++] = n;
if (!zp->cnext && ubifs_zn_dirty(znode))
break;
@@ -1134,18 +1134,18 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
zp = znode->parent;
if (zp) {
- ubifs_assert(path[p - 1] >= 0);
- ubifs_assert(path[p - 1] < zp->child_cnt);
+ ubifs_assert(c, path[p - 1] >= 0);
+ ubifs_assert(c, path[p - 1] < zp->child_cnt);
zbr = &zp->zbranch[path[--p]];
znode = dirty_cow_znode(c, zbr);
} else {
- ubifs_assert(znode == c->zroot.znode);
+ ubifs_assert(c, znode == c->zroot.znode);
znode = dirty_cow_znode(c, &c->zroot);
}
if (IS_ERR(znode) || !p)
break;
- ubifs_assert(path[p - 1] >= 0);
- ubifs_assert(path[p - 1] < znode->child_cnt);
+ ubifs_assert(c, path[p - 1] >= 0);
+ ubifs_assert(c, path[p - 1] < znode->child_cnt);
znode = znode->zbranch[path[p - 1]].znode;
}
@@ -1179,10 +1179,10 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
{
int err, exact;
struct ubifs_znode *znode;
- unsigned long time = get_seconds();
+ time64_t time = ktime_get_seconds();
dbg_tnck(key, "search key ");
- ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
+ ubifs_assert(c, key_type(c, key) < UBIFS_INVALID_KEY);
znode = c->zroot.znode;
if (unlikely(!znode)) {
@@ -1315,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
{
int err, exact;
struct ubifs_znode *znode;
- unsigned long time = get_seconds();
+ time64_t time = ktime_get_seconds();
dbg_tnck(key, "search and dirty key ");
@@ -1658,9 +1658,9 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
int rlen, overlap;
dbg_io("LEB %d:%d, length %d", lnum, offs, len);
- ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
- ubifs_assert(!(offs & 7) && offs < c->leb_size);
- ubifs_assert(offs + len <= c->leb_size);
+ ubifs_assert(c, wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
+ ubifs_assert(c, !(offs & 7) && offs < c->leb_size);
+ ubifs_assert(c, offs + len <= c->leb_size);
spin_lock(&wbuf->lock);
overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
@@ -1824,7 +1824,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
goto out_unlock;
}
- ubifs_assert(n >= 0);
+ ubifs_assert(c, n >= 0);
err = resolve_collision(c, key, &znode, &n, nm);
dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
@@ -1922,7 +1922,7 @@ static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode *znode;
union ubifs_key start_key;
- ubifs_assert(is_hash_key(c, key));
+ ubifs_assert(c, is_hash_key(c, key));
lowest_dent_key(c, &start_key, key_inum(c, key));
@@ -1993,8 +1993,8 @@ static void correct_parent_keys(const struct ubifs_info *c,
{
union ubifs_key *key, *key1;
- ubifs_assert(znode->parent);
- ubifs_assert(znode->iip == 0);
+ ubifs_assert(c, znode->parent);
+ ubifs_assert(c, znode->iip == 0);
key = &znode->zbranch[0].key;
key1 = &znode->parent->zbranch[0].key;
@@ -2011,6 +2011,7 @@ static void correct_parent_keys(const struct ubifs_info *c,
/**
* insert_zbranch - insert a zbranch into a znode.
+ * @c: UBIFS file-system description object
* @znode: znode into which to insert
* @zbr: zbranch to insert
* @n: slot number to insert to
@@ -2020,12 +2021,12 @@ static void correct_parent_keys(const struct ubifs_info *c,
* zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th
* slot, zbranches starting from @n have to be moved right.
*/
-static void insert_zbranch(struct ubifs_znode *znode,
+static void insert_zbranch(struct ubifs_info *c, struct ubifs_znode *znode,
const struct ubifs_zbranch *zbr, int n)
{
int i;
- ubifs_assert(ubifs_zn_dirty(znode));
+ ubifs_assert(c, ubifs_zn_dirty(znode));
if (znode->level) {
for (i = znode->child_cnt; i > n; i--) {
@@ -2079,16 +2080,16 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
int i, keep, move, appending = 0;
union ubifs_key *key = &zbr->key, *key1;
- ubifs_assert(n >= 0 && n <= c->fanout);
+ ubifs_assert(c, n >= 0 && n <= c->fanout);
/* Implement naive insert for now */
again:
zp = znode->parent;
if (znode->child_cnt < c->fanout) {
- ubifs_assert(n != c->fanout);
+ ubifs_assert(c, n != c->fanout);
dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level);
- insert_zbranch(znode, zbr, n);
+ insert_zbranch(c, znode, zbr, n);
/* Ensure parent's key is correct */
if (n == 0 && zp && znode->iip == 0)
@@ -2197,7 +2198,7 @@ do_split:
/* Insert new key and branch */
dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level);
- insert_zbranch(zi, zbr, n);
+ insert_zbranch(c, zi, zbr, n);
/* Insert new znode (produced by spitting) into the parent */
if (zp) {
@@ -2495,8 +2496,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
int i, err;
/* Delete without merge for now */
- ubifs_assert(znode->level == 0);
- ubifs_assert(n >= 0 && n < c->fanout);
+ ubifs_assert(c, znode->level == 0);
+ ubifs_assert(c, n >= 0 && n < c->fanout);
dbg_tnck(&znode->zbranch[n].key, "deleting key ");
zbr = &znode->zbranch[n];
@@ -2522,8 +2523,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
*/
do {
- ubifs_assert(!ubifs_zn_obsolete(znode));
- ubifs_assert(ubifs_zn_dirty(znode));
+ ubifs_assert(c, !ubifs_zn_obsolete(znode));
+ ubifs_assert(c, ubifs_zn_dirty(znode));
zp = znode->parent;
n = znode->iip;
@@ -2545,7 +2546,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
/* Remove from znode, entry n - 1 */
znode->child_cnt -= 1;
- ubifs_assert(znode->level != 0);
+ ubifs_assert(c, znode->level != 0);
for (i = n; i < znode->child_cnt; i++) {
znode->zbranch[i] = znode->zbranch[i + 1];
if (znode->zbranch[i].znode)
@@ -2578,8 +2579,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
c->zroot.offs = zbr->offs;
c->zroot.len = zbr->len;
c->zroot.znode = znode;
- ubifs_assert(!ubifs_zn_obsolete(zp));
- ubifs_assert(ubifs_zn_dirty(zp));
+ ubifs_assert(c, !ubifs_zn_obsolete(zp));
+ ubifs_assert(c, ubifs_zn_dirty(zp));
atomic_long_dec(&c->dirty_zn_cnt);
if (zp->cnext) {
@@ -2944,7 +2945,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
union ubifs_key *dkey;
dbg_tnck(key, "key ");
- ubifs_assert(is_hash_key(c, key));
+ ubifs_assert(c, is_hash_key(c, key));
mutex_lock(&c->tnc_mutex);
err = ubifs_lookup_level0(c, key, &znode, &n);
@@ -3031,7 +3032,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
if (!c->cnext)
return;
- ubifs_assert(c->cmt_state == COMMIT_BROKEN);
+ ubifs_assert(c, c->cmt_state == COMMIT_BROKEN);
cnext = c->cnext;
do {
struct ubifs_znode *znode = cnext;
@@ -3053,8 +3054,8 @@ void ubifs_tnc_close(struct ubifs_info *c)
long n, freed;
n = atomic_long_read(&c->clean_zn_cnt);
- freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
- ubifs_assert(freed == n);
+ freed = ubifs_destroy_tnc_subtree(c, c->zroot.znode);
+ ubifs_assert(c, freed == n);
atomic_long_sub(n, &ubifs_clean_zn_cnt);
}
kfree(c->gap_lebs);
@@ -3167,7 +3168,7 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
struct ubifs_znode *znode, *zn;
int n, nn;
- ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
+ ubifs_assert(c, key_type(c, key) < UBIFS_INVALID_KEY);
/*
* The arguments have probably been read off flash, so don't assume
@@ -3206,7 +3207,7 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
if (IS_ERR(znode))
return znode;
ubifs_search_zbranch(c, znode, key, &n);
- ubifs_assert(n >= 0);
+ ubifs_assert(c, n >= 0);
}
if (znode->level == level + 1)
break;
@@ -3497,7 +3498,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
if (err < 0)
goto out_unlock;
- ubifs_assert(err == 0);
+ ubifs_assert(c, err == 0);
key = &znode->zbranch[n].key;
if (!key_in_range(c, key, &from_key, &to_key))
goto out_unlock;
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index a9df94ad46a3..dba87d09b989 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -87,8 +87,8 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
atomic_long_dec(&c->dirty_zn_cnt);
- ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(ubifs_zn_cow(znode));
+ ubifs_assert(c, ubifs_zn_dirty(znode));
+ ubifs_assert(c, ubifs_zn_cow(znode));
/*
* Note, unlike 'write_index()' we do not add memory barriers here
@@ -115,9 +115,9 @@ static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
{
int len, gap_remains, gap_pos, written, pad_len;
- ubifs_assert((gap_start & 7) == 0);
- ubifs_assert((gap_end & 7) == 0);
- ubifs_assert(gap_end >= gap_start);
+ ubifs_assert(c, (gap_start & 7) == 0);
+ ubifs_assert(c, (gap_end & 7) == 0);
+ ubifs_assert(c, gap_end >= gap_start);
gap_remains = gap_end - gap_start;
if (!gap_remains)
@@ -131,7 +131,7 @@ static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
const int alen = ALIGN(len, 8);
int err;
- ubifs_assert(alen <= gap_remains);
+ ubifs_assert(c, alen <= gap_remains);
err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
lnum, gap_pos, len);
if (err)
@@ -259,7 +259,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
struct ubifs_idx_node *idx;
int in_use, level;
- ubifs_assert(snod->type == UBIFS_IDX_NODE);
+ ubifs_assert(c, snod->type == UBIFS_IDX_NODE);
idx = snod->node;
key_read(c, ubifs_idx_key(c, idx), &snod->key);
level = le16_to_cpu(idx->level);
@@ -373,7 +373,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
p = c->gap_lebs;
do {
- ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
+ ubifs_assert(c, p < c->gap_lebs + c->lst.idx_lebs);
written = layout_leb_in_gaps(c, p);
if (written < 0) {
err = written;
@@ -639,7 +639,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
}
cnt += 1;
while (1) {
- ubifs_assert(!ubifs_zn_cow(znode));
+ ubifs_assert(c, !ubifs_zn_cow(znode));
__set_bit(COW_ZNODE, &znode->flags);
znode->alt = 0;
cnext = find_next_dirty(znode);
@@ -652,7 +652,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
cnt += 1;
}
dbg_cmt("committing %d znodes", cnt);
- ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt));
+ ubifs_assert(c, cnt == atomic_long_read(&c->dirty_zn_cnt));
return cnt;
}
@@ -760,7 +760,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
err = layout_commit(c, no_space, cnt);
if (err)
goto out_free;
- ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
+ ubifs_assert(c, atomic_long_read(&c->dirty_zn_cnt) == 0);
err = free_unused_idx_lebs(c);
if (err)
goto out;
@@ -781,7 +781,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
* budgeting subsystem to assume the index is already committed,
* even though it is not.
*/
- ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
+ ubifs_assert(c, c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
c->bi.old_idx_sz = c->calc_idx_sz;
c->bi.uncommitted_idx = 0;
c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
@@ -887,8 +887,8 @@ static int write_index(struct ubifs_info *c)
/* Grab some stuff from znode while we still can */
cnext = znode->cnext;
- ubifs_assert(ubifs_zn_dirty(znode));
- ubifs_assert(ubifs_zn_cow(znode));
+ ubifs_assert(c, ubifs_zn_dirty(znode));
+ ubifs_assert(c, ubifs_zn_cow(znode));
/*
* It is important that other threads should see %DIRTY_ZNODE
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index 93f5b7859e6f..d90ee01076a9 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -31,19 +31,21 @@
/**
* ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal.
+ * @c: UBIFS file-system description object
* @zr: root of the subtree to traverse
* @znode: previous znode
*
* This function implements levelorder TNC traversal. The LNC is ignored.
* Returns the next element or %NULL if @znode is already the last one.
*/
-struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
+struct ubifs_znode *ubifs_tnc_levelorder_next(const struct ubifs_info *c,
+ struct ubifs_znode *zr,
struct ubifs_znode *znode)
{
int level, iip, level_search = 0;
struct ubifs_znode *zn;
- ubifs_assert(zr);
+ ubifs_assert(c, zr);
if (unlikely(!znode))
return zr;
@@ -58,7 +60,7 @@ struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
iip = znode->iip;
while (1) {
- ubifs_assert(znode->level <= zr->level);
+ ubifs_assert(c, znode->level <= zr->level);
/*
* First walk up until there is a znode with next branch to
@@ -85,7 +87,7 @@ struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
level_search = 1;
iip = -1;
znode = ubifs_tnc_find_child(zr, 0);
- ubifs_assert(znode);
+ ubifs_assert(c, znode);
}
/* Switch to the next index */
@@ -111,7 +113,7 @@ struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
}
if (zn) {
- ubifs_assert(zn->level >= 0);
+ ubifs_assert(c, zn->level >= 0);
return zn;
}
}
@@ -140,7 +142,7 @@ int ubifs_search_zbranch(const struct ubifs_info *c,
int uninitialized_var(cmp);
const struct ubifs_zbranch *zbr = &znode->zbranch[0];
- ubifs_assert(end > beg);
+ ubifs_assert(c, end > beg);
while (end > beg) {
mid = (beg + end) >> 1;
@@ -158,13 +160,13 @@ int ubifs_search_zbranch(const struct ubifs_info *c,
*n = end - 1;
/* The insert point is after *n */
- ubifs_assert(*n >= -1 && *n < znode->child_cnt);
+ ubifs_assert(c, *n >= -1 && *n < znode->child_cnt);
if (*n == -1)
- ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0);
+ ubifs_assert(c, keys_cmp(c, key, &zbr[0].key) < 0);
else
- ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0);
+ ubifs_assert(c, keys_cmp(c, key, &zbr[*n].key) > 0);
if (*n + 1 < znode->child_cnt)
- ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0);
+ ubifs_assert(c, keys_cmp(c, key, &zbr[*n + 1].key) < 0);
return 0;
}
@@ -195,16 +197,18 @@ struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode)
/**
* ubifs_tnc_postorder_next - next TNC tree element in postorder traversal.
+ * @c: UBIFS file-system description object
* @znode: previous znode
*
* This function implements postorder TNC traversal. The LNC is ignored.
* Returns the next element or %NULL if @znode is already the last one.
*/
-struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
+struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
+ struct ubifs_znode *znode)
{
struct ubifs_znode *zn;
- ubifs_assert(znode);
+ ubifs_assert(c, znode);
if (unlikely(!znode->parent))
return NULL;
@@ -220,18 +224,20 @@ struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
/**
* ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree.
+ * @c: UBIFS file-system description object
* @znode: znode defining subtree to destroy
*
* This function destroys subtree of the TNC tree. Returns number of clean
* znodes in the subtree.
*/
-long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
+long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
+ struct ubifs_znode *znode)
{
struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode);
long clean_freed = 0;
int n;
- ubifs_assert(zn);
+ ubifs_assert(c, zn);
while (1) {
for (n = 0; n < zn->child_cnt; n++) {
if (!zn->zbranch[n].znode)
@@ -252,7 +258,7 @@ long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
return clean_freed;
}
- zn = ubifs_tnc_postorder_next(zn);
+ zn = ubifs_tnc_postorder_next(c, zn);
}
}
@@ -410,7 +416,7 @@ struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
int err;
struct ubifs_znode *znode;
- ubifs_assert(!zbr->znode);
+ ubifs_assert(c, !zbr->znode);
/*
* A slab cache is not presently used for znodes because the znode size
* depends on the fanout which is stored in the superblock.
@@ -435,7 +441,7 @@ struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
zbr->znode = znode;
znode->parent = parent;
- znode->time = get_seconds();
+ znode->time = ktime_get_seconds();
znode->iip = iip;
return znode;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 04bf84d71e7b..4368cde476b0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -258,6 +258,18 @@ enum {
LEB_RETAINED,
};
+/*
+ * Action taken upon a failed ubifs_assert().
+ * @ASSACT_REPORT: just report the failed assertion
+ * @ASSACT_RO: switch to read-only mode
+ * @ASSACT_PANIC: call BUG() and possible panic the kernel
+ */
+enum {
+ ASSACT_REPORT = 0,
+ ASSACT_RO,
+ ASSACT_PANIC,
+};
+
/**
* struct ubifs_old_idx - index node obsoleted since last commit start.
* @rb: rb-tree node
@@ -758,7 +770,7 @@ struct ubifs_znode {
struct ubifs_znode *parent;
struct ubifs_znode *cnext;
unsigned long flags;
- unsigned long time;
+ time64_t time;
int level;
int child_cnt;
int iip;
@@ -1015,6 +1027,7 @@ struct ubifs_debug_info;
* @bulk_read: enable bulk-reads
* @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
* @rw_incompat: the media is not R/W compatible
+ * @assert_action: action to take when a ubifs_assert() fails
*
* @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
* @calc_idx_sz
@@ -1256,6 +1269,7 @@ struct ubifs_info {
unsigned int bulk_read:1;
unsigned int default_compr:2;
unsigned int rw_incompat:1;
+ unsigned int assert_action:2;
struct mutex tnc_mutex;
struct ubifs_zbranch zroot;
@@ -1608,14 +1622,17 @@ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
/* tnc_misc.c */
-struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
+struct ubifs_znode *ubifs_tnc_levelorder_next(const struct ubifs_info *c,
+ struct ubifs_znode *zr,
struct ubifs_znode *znode);
int ubifs_search_zbranch(const struct ubifs_info *c,
const struct ubifs_znode *znode,
const union ubifs_key *key, int *n);
struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode);
-struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode);
-long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr);
+struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
+ struct ubifs_znode *znode);
+long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
+ struct ubifs_znode *zr);
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip);
@@ -1698,7 +1715,7 @@ struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
-uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
+uint32_t ubifs_unpack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos, int nrbits);
struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
/* Needed only in debugging code in lpt_commit.c */
int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
@@ -1755,7 +1772,13 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
size_t size, int flags, bool check_lock);
ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
size_t size);
+
+#ifdef CONFIG_UBIFS_FS_XATTR
void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
+#else
+static inline void ubifs_evict_xattr_inode(struct ubifs_info *c,
+ ino_t xattr_inum) { }
+#endif
#ifdef CONFIG_UBIFS_FS_SECURITY
extern int ubifs_init_security(struct inode *dentry, struct inode *inode,
@@ -1812,14 +1835,16 @@ static inline int ubifs_encrypt(const struct inode *inode,
unsigned int in_len, unsigned int *out_len,
int block)
{
- ubifs_assert(0);
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
+ ubifs_assert(c, 0);
return -EOPNOTSUPP;
}
static inline int ubifs_decrypt(const struct inode *inode,
struct ubifs_data_node *dn,
unsigned int *out_len, int block)
{
- ubifs_assert(0);
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
+ ubifs_assert(c, 0);
return -EOPNOTSUPP;
}
#else
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 6f720fdf5020..61afdfee4b28 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -152,6 +152,12 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
ui->data_len = size;
mutex_lock(&host_ui->ui_mutex);
+
+ if (!host->i_nlink) {
+ err = -ENOENT;
+ goto out_noent;
+ }
+
host->i_ctime = current_time(host);
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm));
@@ -184,6 +190,7 @@ out_cancel:
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
host_ui->xattr_names -= fname_len(nm);
host_ui->flags &= ~UBIFS_CRYPT_FL;
+out_noent:
mutex_unlock(&host_ui->ui_mutex);
out_free:
make_bad_inode(inode);
@@ -216,7 +223,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
struct ubifs_budget_req req = { .dirtied_ino = 2,
.dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) };
- ubifs_assert(ui->data_len == inode->i_size);
+ ubifs_assert(c, ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
@@ -235,6 +242,12 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
mutex_unlock(&ui->ui_mutex);
mutex_lock(&host_ui->ui_mutex);
+
+ if (!host->i_nlink) {
+ err = -ENOENT;
+ goto out_noent;
+ }
+
host->i_ctime = current_time(host);
host_ui->xattr_size -= CALC_XATTR_BYTES(old_size);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -256,6 +269,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
out_cancel:
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
host_ui->xattr_size += CALC_XATTR_BYTES(old_size);
+out_noent:
mutex_unlock(&host_ui->ui_mutex);
make_bad_inode(inode);
out_free:
@@ -291,7 +305,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
int err;
if (check_lock)
- ubifs_assert(inode_is_locked(host));
+ ubifs_assert(c, inode_is_locked(host));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -374,8 +388,8 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
}
ui = ubifs_inode(inode);
- ubifs_assert(inode->i_size == ui->data_len);
- ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len);
+ ubifs_assert(c, inode->i_size == ui->data_len);
+ ubifs_assert(c, ubifs_inode(host)->xattr_size > ui->data_len);
mutex_lock(&ui->ui_mutex);
if (buf) {
@@ -462,7 +476,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- ubifs_assert(written <= size);
+ ubifs_assert(c, written <= size);
return written;
}
@@ -475,13 +489,19 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1,
.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
- ubifs_assert(ui->data_len == inode->i_size);
+ ubifs_assert(c, ui->data_len == inode->i_size);
err = ubifs_budget_space(c, &req);
if (err)
return err;
mutex_lock(&host_ui->ui_mutex);
+
+ if (!host->i_nlink) {
+ err = -ENOENT;
+ goto out_noent;
+ }
+
host->i_ctime = current_time(host);
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm));
@@ -501,6 +521,7 @@ out_cancel:
host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm));
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
host_ui->xattr_names += fname_len(nm);
+out_noent:
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
make_bad_inode(inode);
@@ -538,7 +559,10 @@ static int ubifs_xattr_remove(struct inode *host, const char *name)
union ubifs_key key;
int err;
- ubifs_assert(inode_is_locked(host));
+ ubifs_assert(c, inode_is_locked(host));
+
+ if (!host->i_nlink)
+ return -ENOENT;
if (fname_len(&nm) > UBIFS_MAX_NLEN)
return -ENAMETOOLONG;
@@ -561,7 +585,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name)
goto out_free;
}
- ubifs_assert(inode->i_nlink == 1);
+ ubifs_assert(c, inode->i_nlink == 1);
clear_nlink(inode);
err = remove_xattr(c, host, inode, &nm);
if (err)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3040dc2a32f6..6f515651a2c2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -764,9 +764,7 @@ static int udf_find_fileset(struct super_block *sb,
struct kernel_lb_addr *root)
{
struct buffer_head *bh = NULL;
- long lastblock;
uint16_t ident;
- struct udf_sb_info *sbi;
if (fileset->logicalBlockNum != 0xFFFFFFFF ||
fileset->partitionReferenceNum != 0xFFFF) {
@@ -779,69 +777,11 @@ static int udf_find_fileset(struct super_block *sb,
return 1;
}
- }
-
- sbi = UDF_SB(sb);
- if (!bh) {
- /* Search backwards through the partitions */
- struct kernel_lb_addr newfileset;
-
-/* --> cvg: FIXME - is it reasonable? */
- return 1;
-
- for (newfileset.partitionReferenceNum = sbi->s_partitions - 1;
- (newfileset.partitionReferenceNum != 0xFFFF &&
- fileset->logicalBlockNum == 0xFFFFFFFF &&
- fileset->partitionReferenceNum == 0xFFFF);
- newfileset.partitionReferenceNum--) {
- lastblock = sbi->s_partmaps
- [newfileset.partitionReferenceNum]
- .s_partition_len;
- newfileset.logicalBlockNum = 0;
-
- do {
- bh = udf_read_ptagged(sb, &newfileset, 0,
- &ident);
- if (!bh) {
- newfileset.logicalBlockNum++;
- continue;
- }
-
- switch (ident) {
- case TAG_IDENT_SBD:
- {
- struct spaceBitmapDesc *sp;
- sp = (struct spaceBitmapDesc *)
- bh->b_data;
- newfileset.logicalBlockNum += 1 +
- ((le32_to_cpu(sp->numOfBytes) +
- sizeof(struct spaceBitmapDesc)
- - 1) >> sb->s_blocksize_bits);
- brelse(bh);
- break;
- }
- case TAG_IDENT_FSD:
- *fileset = newfileset;
- break;
- default:
- newfileset.logicalBlockNum++;
- brelse(bh);
- bh = NULL;
- break;
- }
- } while (newfileset.logicalBlockNum < lastblock &&
- fileset->logicalBlockNum == 0xFFFFFFFF &&
- fileset->partitionReferenceNum == 0xFFFF);
- }
- }
-
- if ((fileset->logicalBlockNum != 0xFFFFFFFF ||
- fileset->partitionReferenceNum != 0xFFFF) && bh) {
udf_debug("Fileset at block=%u, partition=%u\n",
fileset->logicalBlockNum,
fileset->partitionReferenceNum);
- sbi->s_partition = fileset->partitionReferenceNum;
+ UDF_SB(sb)->s_partition = fileset->partitionReferenceNum;
udf_load_fileset(sb, bh, root);
brelse(bh);
return 0;
@@ -1570,10 +1510,16 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
*/
#define PART_DESC_ALLOC_STEP 32
+struct part_desc_seq_scan_data {
+ struct udf_vds_record rec;
+ u32 partnum;
+};
+
struct desc_seq_scan_data {
struct udf_vds_record vds[VDS_POS_LENGTH];
unsigned int size_part_descs;
- struct udf_vds_record *part_descs_loc;
+ unsigned int num_part_descs;
+ struct part_desc_seq_scan_data *part_descs_loc;
};
static struct udf_vds_record *handle_partition_descriptor(
@@ -1582,10 +1528,14 @@ static struct udf_vds_record *handle_partition_descriptor(
{
struct partitionDesc *desc = (struct partitionDesc *)bh->b_data;
int partnum;
+ int i;
partnum = le16_to_cpu(desc->partitionNumber);
- if (partnum >= data->size_part_descs) {
- struct udf_vds_record *new_loc;
+ for (i = 0; i < data->num_part_descs; i++)
+ if (partnum == data->part_descs_loc[i].partnum)
+ return &(data->part_descs_loc[i].rec);
+ if (data->num_part_descs >= data->size_part_descs) {
+ struct part_desc_seq_scan_data *new_loc;
unsigned int new_size = ALIGN(partnum, PART_DESC_ALLOC_STEP);
new_loc = kcalloc(new_size, sizeof(*new_loc), GFP_KERNEL);
@@ -1597,7 +1547,7 @@ static struct udf_vds_record *handle_partition_descriptor(
data->part_descs_loc = new_loc;
data->size_part_descs = new_size;
}
- return &(data->part_descs_loc[partnum]);
+ return &(data->part_descs_loc[data->num_part_descs++].rec);
}
@@ -1647,6 +1597,7 @@ static noinline int udf_process_sequence(
memset(data.vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
data.size_part_descs = PART_DESC_ALLOC_STEP;
+ data.num_part_descs = 0;
data.part_descs_loc = kcalloc(data.size_part_descs,
sizeof(*data.part_descs_loc),
GFP_KERNEL);
@@ -1658,7 +1609,6 @@ static noinline int udf_process_sequence(
* are in it.
*/
for (; (!done && block <= lastblock); block++) {
-
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
break;
@@ -1730,13 +1680,10 @@ static noinline int udf_process_sequence(
}
/* Now handle prevailing Partition Descriptors */
- for (i = 0; i < data.size_part_descs; i++) {
- if (data.part_descs_loc[i].block) {
- ret = udf_load_partdesc(sb,
- data.part_descs_loc[i].block);
- if (ret < 0)
- return ret;
- }
+ for (i = 0; i < data.num_part_descs; i++) {
+ ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block);
+ if (ret < 0)
+ return ret;
}
return 0;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 15c265d450bf..bfa0ec69f924 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -340,17 +340,15 @@ out:
* fatal_signal_pending()s, and the mmap_sem must be released before
* returning it.
*/
-int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
{
struct mm_struct *mm = vmf->vma->vm_mm;
struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq;
- int ret;
+ vm_fault_t ret = VM_FAULT_SIGBUS;
bool must_wait, return_to_userland;
long blocking_state;
- ret = VM_FAULT_SIGBUS;
-
/*
* We don't do userfault handling for the final child pid update.
*
@@ -910,7 +908,7 @@ wakeup:
*/
spin_lock(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
spin_unlock(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
@@ -1066,7 +1064,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
* anyway.
*/
list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->fault_wqh, &uwq->wq);
+ add_wait_queue(&ctx->fault_wqh, &uwq->wq);
write_seqcount_end(&ctx->refile_seq);
@@ -1215,7 +1213,7 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
diff --git a/fs/xattr.c b/fs/xattr.c
index f9cb1db187b7..daa732550088 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -23,7 +23,6 @@
#include <linux/posix_acl_xattr.h>
#include <linux/uaccess.h>
-#include "internal.h"
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
@@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
if (!f.file)
return error;
audit_file(f.file);
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = setxattr(f.file->f_path.dentry, name, value, size, flags);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
}
fdput(f);
return error;
@@ -539,7 +538,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
- posix_acl_fix_xattr_to_user(kvalue, size);
+ posix_acl_fix_xattr_to_user(kvalue, error);
if (size && copy_to_user(value, kvalue, error))
error = -EFAULT;
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
@@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
if (!f.file)
return error;
audit_file(f.file);
- error = mnt_want_write_file_path(f.file);
+ error = mnt_want_write_file(f.file);
if (!error) {
error = removexattr(f.file->f_path.dentry, name);
- mnt_drop_write_file_path(f.file);
+ mnt_drop_write_file(f.file);
}
fdput(f);
return error;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 17cf48564390..9f08dd9bf1d5 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -195,8 +195,8 @@ xrep_calc_ag_resblks(
struct xfs_scrub_metadata *sm = sc->sm;
struct xfs_perag *pag;
struct xfs_buf *bp;
- xfs_agino_t icount = 0;
- xfs_extlen_t aglen = 0;
+ xfs_agino_t icount = NULLAGINO;
+ xfs_extlen_t aglen = NULLAGBLOCK;
xfs_extlen_t usedlen;
xfs_extlen_t freelen;
xfs_extlen_t bnobt_sz;
@@ -208,20 +208,14 @@ xrep_calc_ag_resblks(
if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
return 0;
- /* Use in-core counters if possible. */
pag = xfs_perag_get(mp, sm->sm_agno);
- if (pag->pagi_init)
+ if (pag->pagi_init) {
+ /* Use in-core icount if possible. */
icount = pag->pagi_count;
-
- /*
- * Otherwise try to get the actual counters from disk; if not, make
- * some worst case assumptions.
- */
- if (icount == 0) {
+ } else {
+ /* Try to get the actual counters from disk. */
error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp);
- if (error) {
- icount = mp->m_sb.sb_agblocks / mp->m_sb.sb_inopblock;
- } else {
+ if (!error) {
icount = pag->pagi_count;
xfs_buf_relse(bp);
}
@@ -229,18 +223,32 @@ xrep_calc_ag_resblks(
/* Now grab the block counters from the AGF. */
error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
- if (error) {
- aglen = mp->m_sb.sb_agblocks;
- freelen = aglen;
- usedlen = aglen;
- } else {
+ if (!error) {
aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length);
- freelen = pag->pagf_freeblks;
+ freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks);
usedlen = aglen - freelen;
xfs_buf_relse(bp);
}
xfs_perag_put(pag);
+ /* If the icount is impossible, make some worst-case assumptions. */
+ if (icount == NULLAGINO ||
+ !xfs_verify_agino(mp, sm->sm_agno, icount)) {
+ xfs_agino_t first, last;
+
+ xfs_agino_range(mp, sm->sm_agno, &first, &last);
+ icount = last - first + 1;
+ }
+
+ /* If the block counts are impossible, make worst-case assumptions. */
+ if (aglen == NULLAGBLOCK ||
+ aglen != xfs_ag_block_count(mp, sm->sm_agno) ||
+ freelen >= aglen) {
+ aglen = xfs_ag_block_count(mp, sm->sm_agno);
+ freelen = aglen;
+ usedlen = aglen;
+ }
+
trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
freelen, usedlen);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5eaef2c17293..61a5ad2600e8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -931,31 +931,16 @@ xfs_file_clone_range(
len, false);
}
-STATIC ssize_t
+STATIC int
xfs_file_dedupe_range(
- struct file *src_file,
- u64 loff,
- u64 len,
- struct file *dst_file,
- u64 dst_loff)
+ struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
{
- struct inode *srci = file_inode(src_file);
- u64 max_dedupe;
- int error;
-
- /*
- * Since we have to read all these pages in to compare them, cut
- * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
- * That means we won't do more than MAX_RW_COUNT IO per request.
- */
- max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
- if (len > max_dedupe)
- len = max_dedupe;
- error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, true);
- if (error)
- return error;
- return len;
}
STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 99db27d6ac8a..02d15098dbee 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -637,6 +637,7 @@ xfs_check_summary_counts(
*/
if (XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
+ !xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
OpenPOWER on IntegriCloud