summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c29
-rw-r--r--fs/afs/cache.c43
-rw-r--r--fs/aio.c27
-rw-r--r--fs/autofs4/autofs_i.h22
-rw-r--r--fs/autofs4/dev-ioctl.c44
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_flat.c10
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/btrfs/check-integrity.c41
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/raid56.c8
-rw-r--r--fs/btrfs/scrub.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/buffer.c35
-rw-r--r--fs/ceph/cache.c31
-rw-r--r--fs/cifs/cache.c31
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/cifssmb.c15
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/smb2ops.c204
-rw-r--r--fs/cifs/smb2pdu.c34
-rw-r--r--fs/cifs/smb2pdu.h10
-rw-r--r--fs/cifs/smb2proto.h6
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/dax.c363
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c56
-rw-r--r--fs/exofs/ore.c2
-rw-r--r--fs/ext2/file.c25
-rw-r--r--fs/ext4/file.c48
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/readpage.c2
-rw-r--r--fs/ext4/super.c26
-rw-r--r--fs/f2fs/data.c10
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/fat/namei_vfat.c35
-rw-r--r--fs/fscache/page.c5
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hugetlbfs/inode.c41
-rw-r--r--fs/inode.c2
-rw-r--r--fs/iomap.c4
-rw-r--r--fs/isofs/inode.c11
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/kernfs/dir.c111
-rw-r--r--fs/kernfs/file.c10
-rw-r--r--fs/kernfs/inode.c9
-rw-r--r--fs/kernfs/kernfs-internal.h9
-rw-r--r--fs/kernfs/mount.c94
-rw-r--r--fs/kernfs/symlink.c6
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c15
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/fscache-index.c40
-rw-r--r--fs/nilfs2/page.c3
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/notify/dnotify/dnotify.c2
-rw-r--r--fs/ocfs2/acl.c2
-rw-r--r--fs/ocfs2/acl.h7
-rw-r--r--fs/ocfs2/alloc.c22
-rw-r--r--fs/ocfs2/alloc.h3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c44
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/file.c7
-rw-r--r--fs/ocfs2/journal.c1
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/quota_global.c36
-rw-r--r--fs/ocfs2/quota_local.c17
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/generic.c34
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/meminfo.c10
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c278
-rw-r--r--fs/proc/task_nommu.c5
-rw-r--r--fs/pstore/inode.c24
-rw-r--r--fs/quota/dquot.c504
-rw-r--r--fs/quota/quota_tree.c10
-rw-r--r--fs/quota/quota_v1.c11
-rw-r--r--fs/quota/quota_v2.c93
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c5
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/udf/inode.c13
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/udf/super.c12
-rw-r--r--fs/userfaultfd.c41
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_file.c2
109 files changed, 1566 insertions, 1305 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 103ca5e1267b..64c58eb26159 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -151,34 +151,6 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
return FSCACHE_CHECKAUX_OKAY;
}
-static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
-{
- struct v9fs_inode *v9inode = cookie_netfs_data;
- struct pagevec pvec;
- pgoff_t first;
- int loop, nr_pages;
-
- pagevec_init(&pvec, 0);
- first = 0;
-
- for (;;) {
- nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
- first,
- PAGEVEC_SIZE - pagevec_count(&pvec));
- if (!nr_pages)
- break;
-
- for (loop = 0; loop < nr_pages; loop++)
- ClearPageFsCache(pvec.pages[loop]);
-
- first = pvec.pages[nr_pages - 1]->index + 1;
-
- pvec.nr = nr_pages;
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
const struct fscache_cookie_def v9fs_cache_inode_index_def = {
.name = "9p.inode",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
@@ -186,7 +158,6 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
.get_attr = v9fs_cache_inode_get_attr,
.get_aux = v9fs_cache_inode_get_aux,
.check_aux = v9fs_cache_inode_check_aux,
- .now_uncached = v9fs_cache_inode_now_uncached,
};
void v9fs_cache_inode_get_cookie(struct inode *inode)
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 577763c3d88b..1fe855191261 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -39,7 +39,6 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
const void *buffer,
uint16_t buflen);
-static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
struct fscache_netfs afs_cache_netfs = {
.name = "afs",
@@ -75,7 +74,6 @@ struct fscache_cookie_def afs_vnode_cache_index_def = {
.get_attr = afs_vnode_cache_get_attr,
.get_aux = afs_vnode_cache_get_aux,
.check_aux = afs_vnode_cache_check_aux,
- .now_uncached = afs_vnode_cache_now_uncached,
};
/*
@@ -359,44 +357,3 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
_leave(" = SUCCESS");
return FSCACHE_CHECKAUX_OKAY;
}
-
-/*
- * indication the cookie is no longer uncached
- * - this function is called when the backing store currently caching a cookie
- * is removed
- * - the netfs should use this to clean up any markers indicating cached pages
- * - this is mandatory for any object that may have data
- */
-static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
-{
- struct afs_vnode *vnode = cookie_netfs_data;
- struct pagevec pvec;
- pgoff_t first;
- int loop, nr_pages;
-
- _enter("{%x,%x,%Lx}",
- vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version);
-
- pagevec_init(&pvec, 0);
- first = 0;
-
- for (;;) {
- /* grab a bunch of pages to clean */
- nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
- first,
- PAGEVEC_SIZE - pagevec_count(&pvec));
- if (!nr_pages)
- break;
-
- for (loop = 0; loop < nr_pages; loop++)
- ClearPageFsCache(pvec.pages[loop]);
-
- first = pvec.pages[nr_pages - 1]->index + 1;
-
- pvec.nr = nr_pages;
- pagevec_release(&pvec);
- cond_resched();
- }
-
- _leave("");
-}
diff --git a/fs/aio.c b/fs/aio.c
index dcad3a66748c..b5d69f28d8b1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
pgoff_t idx;
int rc;
+ /*
+ * We cannot support the _NO_COPY case here, because copy needs to
+ * happen under the ctx->completion_lock. That does not work with the
+ * migration workflow of MIGRATE_SYNC_NO_COPY.
+ */
+ if (mode == MIGRATE_SYNC_NO_COPY)
+ return -EINVAL;
+
rc = 0;
/* mapping->private_lock here protects against the kioctx teardown. */
@@ -441,10 +449,9 @@ static const struct address_space_operations aio_ctx_aops = {
#endif
};
-static int aio_setup_ring(struct kioctx *ctx)
+static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
{
struct aio_ring *ring;
- unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
unsigned long size, unused;
int nr_pages;
@@ -707,6 +714,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
int err = -ENOMEM;
/*
+ * Store the original nr_events -- what userspace passed to io_setup(),
+ * for counting against the global limit -- before it changes.
+ */
+ unsigned int max_reqs = nr_events;
+
+ /*
* We keep track of the number of available ringbuffer slots, to prevent
* overflow (reqs_available), and we also use percpu counters for this.
*
@@ -724,14 +737,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL);
}
- if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL))
+ if (!nr_events || (unsigned long)max_reqs > aio_max_nr)
return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
- ctx->max_reqs = nr_events;
+ ctx->max_reqs = max_reqs;
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->completion_lock);
@@ -753,7 +766,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if (!ctx->cpu)
goto err;
- err = aio_setup_ring(ctx);
+ err = aio_setup_ring(ctx, nr_events);
if (err < 0)
goto err;
@@ -764,8 +777,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
/* limit the number of system wide aios */
spin_lock(&aio_nr_lock);
- if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
- aio_nr + nr_events < aio_nr) {
+ if (aio_nr + ctx->max_reqs > aio_max_nr ||
+ aio_nr + ctx->max_reqs < aio_nr) {
spin_unlock(&aio_nr_lock);
err = -EAGAIN;
goto err_ctx;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981aa54f..4737615f0eaa 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -11,10 +11,21 @@
#include <linux/auto_fs4.h>
#include <linux/auto_dev-ioctl.h>
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/uaccess.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/completion.h>
+#include <asm/current.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -24,17 +35,6 @@
#define AUTOFS_DEV_IOCTL_IOC_COUNT \
(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD)
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <asm/current.h>
-#include <linux/uaccess.h>
-
#ifdef pr_fmt
#undef pr_fmt
#endif
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index dd9f1bebb5a3..b7c816f39404 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -93,17 +93,17 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
* at the end of the struct.
*/
static struct autofs_dev_ioctl *
- copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
{
struct autofs_dev_ioctl tmp, *res;
- if (copy_from_user(&tmp, in, sizeof(tmp)))
+ if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE))
return ERR_PTR(-EFAULT);
- if (tmp.size < sizeof(tmp))
+ if (tmp.size < AUTOFS_DEV_IOCTL_SIZE)
return ERR_PTR(-EINVAL);
- if (tmp.size > (PATH_MAX + sizeof(tmp)))
+ if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX)
return ERR_PTR(-ENAMETOOLONG);
res = memdup_user(in, tmp.size);
@@ -133,8 +133,8 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
- if (param->size > sizeof(*param)) {
- err = invalid_str(param->path, param->size - sizeof(*param));
+ if (param->size > AUTOFS_DEV_IOCTL_SIZE) {
+ err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE);
if (err) {
pr_warn(
"path string terminator missing for cmd(0x%08x)\n",
@@ -258,11 +258,6 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
if (err)
goto out;
- /*
- * Find autofs super block that has the device number
- * corresponding to the autofs fs we want to open.
- */
-
filp = dentry_open(&path, O_RDONLY, current_cred());
path_put(&path);
if (IS_ERR(filp)) {
@@ -451,7 +446,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
dev_t devid;
int err = -ENOENT;
- if (param->size <= sizeof(*param)) {
+ if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
err = -EINVAL;
goto out;
}
@@ -539,7 +534,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
unsigned int devid, magic;
int err = -ENOENT;
- if (param->size <= sizeof(*param)) {
+ if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
err = -EINVAL;
goto out;
}
@@ -628,10 +623,6 @@ static int _autofs_dev_ioctl(unsigned int command,
ioctl_fn fn = NULL;
int err = 0;
- /* only root can play with this */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
cmd = _IOC_NR(command);
@@ -640,6 +631,14 @@ static int _autofs_dev_ioctl(unsigned int command,
return -ENOTTY;
}
+ /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD
+ * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+ */
+ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD &&
+ cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
/* Copy the parameters into kernel space. */
param = copy_dev_ioctl(user);
if (IS_ERR(param))
@@ -706,7 +705,8 @@ out:
return err;
}
-static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
+static long autofs_dev_ioctl(struct file *file, unsigned int command,
+ unsigned long u)
{
int err;
@@ -715,9 +715,10 @@ static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
}
#ifdef CONFIG_COMPAT
-static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u)
+static long autofs_dev_ioctl_compat(struct file *file, unsigned int command,
+ unsigned long u)
{
- return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u));
+ return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u));
}
#else
#define autofs_dev_ioctl_compat NULL
@@ -733,7 +734,8 @@ static const struct file_operations _dev_ioctl_fops = {
static struct miscdevice _autofs_dev_ioctl_misc = {
.minor = AUTOFS_MINOR,
.name = AUTOFS_DEVICE_NAME,
- .fops = &_dev_ioctl_fops
+ .fops = &_dev_ioctl_fops,
+ .mode = 0644,
};
MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6466153f2bf0..ec45d24875b1 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -252,7 +252,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
- NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+ NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
#ifdef ELF_HWCAP2
NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index cf93a4fad012..5aa9199dfb13 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -650,7 +650,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
NEW_AUX_ENT(AT_EUID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->euid));
NEW_AUX_ENT(AT_GID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->gid));
NEW_AUX_ENT(AT_EGID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->egid));
- NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
+ NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
#ifdef ARCH_DLINFO
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a1e6860b6f46..ce6537c50ec1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -192,13 +192,11 @@ static int decompress_exec(
memset(&strm, 0, sizeof(strm));
strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
- if (strm.workspace == NULL) {
- pr_debug("no memory for decompress workspace\n");
+ if (!strm.workspace)
return -ENOMEM;
- }
+
buf = kmalloc(LBUFSIZE, GFP_KERNEL);
- if (buf == NULL) {
- pr_debug("no memory for read buffer\n");
+ if (!buf) {
retval = -ENOMEM;
goto out_free;
}
@@ -890,7 +888,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
* as we're past the point of no return and are dealing with shared
* libraries.
*/
- bprm.cred_prepared = 1;
+ bprm.called_set_creds = 1;
res = prepare_binprm(&bprm);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9941dc8342df..bb715b2fcfb8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -223,7 +223,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
}
bio_init(&bio, vecs, nr_pages);
- bio.bi_bdev = bdev;
+ bio_set_dev(&bio, bdev);
bio.bi_iter.bi_sector = pos >> 9;
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
@@ -362,7 +362,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
blk_start_plug(&plug);
for (;;) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
@@ -1451,6 +1451,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+ bdev->bd_partno = partno;
if (!partno) {
ret = -ENXIO;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 11d37c94ce05..fb07e3c22b9a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -296,8 +296,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state *ds,
struct btrfsic_dev_state_hashtable *h);
static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
struct btrfsic_dev_state_hashtable *h);
static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
@@ -385,8 +384,7 @@ static int btrfsic_process_superblock_dev_mirror(
int superblock_mirror_num,
struct btrfsic_dev_state **selected_dev_state,
struct btrfs_super_block *selected_super);
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev);
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev);
static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
u64 bytenr,
struct btrfsic_dev_state *dev_state,
@@ -626,17 +624,15 @@ static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
list_del(&ds->collision_resolving_node);
}
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
struct btrfsic_dev_state_hashtable *h)
{
const unsigned int hashval =
- (((unsigned int)((uintptr_t)bdev)) &
- (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+ dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1);
struct btrfsic_dev_state *ds;
list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
- if (ds->bdev == bdev)
+ if (ds->bdev->bd_dev == dev)
return ds;
}
@@ -668,7 +664,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
if (!device->bdev || !device->name)
continue;
- dev_state = btrfsic_dev_state_lookup(device->bdev);
+ dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev);
BUG_ON(NULL == dev_state);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
ret = btrfsic_process_superblock_dev_mirror(
@@ -1556,7 +1552,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
}
device = multi->stripes[0].dev;
- block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
+ block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
@@ -1639,7 +1635,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
unsigned int j;
bio = btrfs_io_bio_alloc(num_pages - i);
- bio->bi_bdev = block_ctx->dev->bdev;
+ bio_set_dev(bio, block_ctx->dev->bdev);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
@@ -2654,7 +2650,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
pr_info("btrfsic: error, kmalloc failed!\n");
return NULL;
}
- dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
+ dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
if (NULL == dev_state) {
pr_info("btrfsic: error, lookup dev_state failed!\n");
btrfsic_block_free(block);
@@ -2734,10 +2730,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
}
}
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev)
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
{
- return btrfsic_dev_state_hashtable_lookup(bdev,
+ return btrfsic_dev_state_hashtable_lookup(dev,
&btrfsic_dev_state_hashtable);
}
@@ -2751,7 +2746,7 @@ int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bh() might also be called before
* btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
+ dev_state = btrfsic_dev_state_lookup(bh->b_bdev->bd_dev);
/* Only called to write the superblock (incl. FLUSH/FUA) */
if (NULL != dev_state &&
@@ -2808,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
+ dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0;
@@ -2824,10 +2819,10 @@ static void __btrfsic_submit_bio(struct bio *bio)
bio_is_patched = 0;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
+ pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
bio_op(bio), bio->bi_opf, segs,
(unsigned long long)bio->bi_iter.bi_sector,
- dev_bytenr, bio->bi_bdev);
+ dev_bytenr, bio->bi_disk);
mapped_datav = kmalloc_array(segs,
sizeof(*mapped_datav), GFP_NOFS);
@@ -2856,8 +2851,8 @@ static void __btrfsic_submit_bio(struct bio *bio)
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
- bio_op(bio), bio->bi_opf, bio->bi_bdev);
+ pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n",
+ bio_op(bio), bio->bi_opf, bio->bi_disk);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -2998,7 +2993,7 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
continue;
ds = btrfsic_dev_state_hashtable_lookup(
- device->bdev,
+ device->bdev->bd_dev,
&btrfsic_dev_state_hashtable);
if (NULL != ds) {
state = ds->state;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f45b61fe9a9a..4f428a48d513 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3499,7 +3499,7 @@ static void write_dev_flush(struct btrfs_device *device)
bio_reset(bio);
bio->bi_end_io = btrfs_end_empty_barrier;
- bio->bi_bdev = device->bdev;
+ bio_set_dev(bio, device->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0aff9b278c19..42b12a85ab49 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2033,7 +2033,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
bio_put(bio);
return -EIO;
}
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(bio, page, length, pg_offset);
@@ -2335,7 +2335,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
- bio->bi_bdev = fs_info->fs_devices->latest_bdev;
+ bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
bio->bi_iter.bi_size = 0;
bio->bi_private = data;
@@ -2675,7 +2675,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2cf6ba40f7c4..24a62224b24b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1090,7 +1090,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && stripe->dev->bdev &&
!last->bi_status &&
- last->bi_bdev == stripe->dev->bdev) {
+ last->bi_disk == stripe->dev->bdev->bd_disk &&
+ last->bi_partno == stripe->dev->bdev->bd_partno) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
return 0;
@@ -1100,7 +1101,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* put a new bio on the list */
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
bio->bi_iter.bi_size = 0;
- bio->bi_bdev = stripe->dev->bdev;
+ bio_set_dev(bio, stripe->dev->bdev);
bio->bi_iter.bi_sector = disk_start >> 9;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -1347,7 +1348,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
- bio->bi_bdev == stripe->dev->bdev) {
+ bio->bi_disk == stripe->dev->bdev->bd_disk &&
+ bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
}
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6f1e4c984b94..b0b71e8e4c36 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1738,7 +1738,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
WARN_ON(!page->page);
bio = btrfs_io_bio_alloc(1);
- bio->bi_bdev = page->dev->bdev;
+ bio_set_dev(bio, page->dev->bdev);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
@@ -1826,7 +1826,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
}
bio = btrfs_io_bio_alloc(1);
- bio->bi_bdev = page_bad->dev->bdev;
+ bio_set_dev(bio, page_bad->dev->bdev);
bio->bi_iter.bi_sector = page_bad->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -1921,7 +1921,7 @@ again:
bio->bi_private = sbio;
bio->bi_end_io = scrub_wr_bio_end_io;
- bio->bi_bdev = sbio->dev->bdev;
+ bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
sbio->status = 0;
@@ -1964,7 +1964,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
sbio = sctx->wr_curr_bio;
sctx->wr_curr_bio = NULL;
- WARN_ON(!sbio->bio->bi_bdev);
+ WARN_ON(!sbio->bio->bi_disk);
scrub_pending_bio_inc(sctx);
/* process all writes in a single worker thread. Then the block layer
* orders the requests before sending them to the driver which
@@ -2321,7 +2321,7 @@ again:
bio->bi_private = sbio;
bio->bi_end_io = scrub_bio_end_io;
- bio->bi_bdev = sbio->dev->bdev;
+ bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
sbio->status = 0;
@@ -4627,7 +4627,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bd679bc7a1a9..002aa318da67 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6188,7 +6188,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
rcu_read_unlock();
}
#endif
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
btrfs_bio_counter_inc_noblocked(fs_info);
diff --git a/fs/buffer.c b/fs/buffer.c
index 5715dac7821f..170df856bdb9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1627,20 +1627,17 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
struct pagevec pvec;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
pgoff_t end;
- int i;
+ int i, count;
struct buffer_head *bh;
struct buffer_head *head;
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
pagevec_init(&pvec, 0);
- while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
+ while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
+ count = pagevec_count(&pvec);
+ for (i = 0; i < count; i++) {
struct page *page = pvec.pages[i];
- index = page->index;
- if (index > end)
- break;
if (!page_has_buffers(page))
continue;
/*
@@ -1670,7 +1667,9 @@ unlock_page:
}
pagevec_release(&pvec);
cond_resched();
- index++;
+ /* End of range already reached? */
+ if (index > end || !index)
+ break;
}
}
EXPORT_SYMBOL(clean_bdev_aliases);
@@ -3057,7 +3056,7 @@ void guard_bio_eod(int op, struct bio *bio)
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned truncated_bytes;
- maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
+ maxsector = get_capacity(bio->bi_disk);
if (!maxsector)
return;
@@ -3116,7 +3115,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
}
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -3549,10 +3548,10 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
pagevec_init(&pvec, 0);
do {
- unsigned want, nr_pages, i;
+ unsigned nr_pages, i;
- want = min_t(unsigned, end - index, PAGEVEC_SIZE);
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
+ nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
+ end - 1);
if (nr_pages == 0)
break;
@@ -3573,10 +3572,6 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
lastoff < page_offset(page))
goto check_range;
- /* Searching done if the page index is out of range. */
- if (page->index >= end)
- goto not_found;
-
lock_page(page);
if (likely(page->mapping == inode->i_mapping) &&
page_has_buffers(page)) {
@@ -3589,12 +3584,6 @@ page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
unlock_page(page);
lastoff = page_offset(page) + PAGE_SIZE;
}
-
- /* Searching done if fewer pages returned than wanted. */
- if (nr_pages < want)
- break;
-
- index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index < end);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 337f88673ed9..174d6e6569a8 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -194,36 +194,6 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
return FSCACHE_CHECKAUX_OKAY;
}
-static void ceph_fscache_inode_now_uncached(void* cookie_netfs_data)
-{
- struct ceph_inode_info* ci = cookie_netfs_data;
- struct pagevec pvec;
- pgoff_t first;
- int loop, nr_pages;
-
- pagevec_init(&pvec, 0);
- first = 0;
-
- dout("ceph inode 0x%p now uncached", ci);
-
- while (1) {
- nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
- PAGEVEC_SIZE - pagevec_count(&pvec));
-
- if (!nr_pages)
- break;
-
- for (loop = 0; loop < nr_pages; loop++)
- ClearPageFsCache(pvec.pages[loop]);
-
- first = pvec.pages[nr_pages - 1]->index + 1;
-
- pvec.nr = nr_pages;
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
.name = "CEPH.inode",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
@@ -231,7 +201,6 @@ static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
.get_attr = ceph_fscache_inode_get_attr,
.get_aux = ceph_fscache_inode_get_aux,
.check_aux = ceph_fscache_inode_check_aux,
- .now_uncached = ceph_fscache_inode_now_uncached,
};
void ceph_fscache_register_inode_cookie(struct inode *inode)
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 6c665bf4a27c..2c14020e5e1d 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -292,36 +292,6 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
return FSCACHE_CHECKAUX_OKAY;
}
-static void cifs_fscache_inode_now_uncached(void *cookie_netfs_data)
-{
- struct cifsInodeInfo *cifsi = cookie_netfs_data;
- struct pagevec pvec;
- pgoff_t first;
- int loop, nr_pages;
-
- pagevec_init(&pvec, 0);
- first = 0;
-
- cifs_dbg(FYI, "%s: cifs inode 0x%p now uncached\n", __func__, cifsi);
-
- for (;;) {
- nr_pages = pagevec_lookup(&pvec,
- cifsi->vfs_inode.i_mapping, first,
- PAGEVEC_SIZE - pagevec_count(&pvec));
- if (!nr_pages)
- break;
-
- for (loop = 0; loop < nr_pages; loop++)
- ClearPageFsCache(pvec.pages[loop]);
-
- first = pvec.pages[nr_pages - 1]->index + 1;
-
- pvec.nr = nr_pages;
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
const struct fscache_cookie_def cifs_fscache_inode_object_def = {
.name = "CIFS.uniqueid",
.type = FSCACHE_COOKIE_TYPE_DATAFILE,
@@ -329,5 +299,4 @@ const struct fscache_cookie_def cifs_fscache_inode_object_def = {
.get_attr = cifs_fscache_inode_get_attr,
.get_aux = cifs_fscache_inode_get_aux,
.check_aux = cifs_fscache_inode_check_aux,
- .now_uncached = cifs_fscache_inode_now_uncached,
};
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 221693fe49ec..808486c29f0d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -421,7 +421,7 @@ struct smb_version_operations {
size_t, struct cifs_sb_info *);
int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
const char *, const void *, const __u16,
- const struct nls_table *, int);
+ const struct nls_table *, struct cifs_sb_info *);
struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
const char *, u32 *);
struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6eb3147132e3..4143c9dec463 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -484,7 +484,8 @@ extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
const char *fileName, const char *ea_name,
const void *ea_value, const __u16 ea_value_len,
- const struct nls_table *nls_codepage, int remap_special_chars);
+ const struct nls_table *nls_codepage,
+ struct cifs_sb_info *cifs_sb);
extern int CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon,
__u16 fid, struct cifs_ntsd **acl_inf, __u32 *buflen);
extern int CIFSSMBSetCIFSACL(const unsigned int, struct cifs_tcon *, __u16,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 118a63e7e221..35dc5bf01ee2 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -178,6 +178,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* reconnect the same SMB session
*/
mutex_lock(&ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, ses);
if (rc == 0 && ses->need_reconnect)
rc = cifs_setup_session(0, ses, nls_codepage);
@@ -6264,7 +6276,7 @@ int
CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
const char *fileName, const char *ea_name, const void *ea_value,
const __u16 ea_value_len, const struct nls_table *nls_codepage,
- int remap)
+ struct cifs_sb_info *cifs_sb)
{
struct smb_com_transaction2_spi_req *pSMB = NULL;
struct smb_com_transaction2_spi_rsp *pSMBr = NULL;
@@ -6273,6 +6285,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
int rc = 0;
int bytes_returned = 0;
__u16 params, param_offset, byte_count, offset, count;
+ int remap = cifs_remap(cifs_sb);
cifs_dbg(FYI, "In SetEA\n");
SetEARetry:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 83a8f52cd879..5aa2d278ca84 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -509,7 +509,8 @@ server_unresponsive(struct TCP_Server_Info *server)
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
- if (server->tcpStatus == CifsGood &&
+ if ((server->tcpStatus == CifsGood ||
+ server->tcpStatus == CifsNeedNegotiate) &&
time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
server->hostname, (2 * server->echo_interval) / HZ);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index cfacf2c97e94..fb2934b9b97c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,194 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+static ssize_t
+move_smb2_ea_to_cifs(char *dst, size_t dst_size,
+ struct smb2_file_full_ea_info *src, size_t src_size,
+ const unsigned char *ea_name)
+{
+ int rc = 0;
+ unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0;
+ char *name, *value;
+ size_t name_len, value_len, user_name_len;
+
+ while (src_size > 0) {
+ name = &src->ea_data[0];
+ name_len = (size_t)src->ea_name_length;
+ value = &src->ea_data[src->ea_name_length + 1];
+ value_len = (size_t)le16_to_cpu(src->ea_value_length);
+
+ if (name_len == 0) {
+ break;
+ }
+
+ if (src_size < 8 + name_len + 1 + value_len) {
+ cifs_dbg(FYI, "EA entry goes beyond length of list\n");
+ rc = -EIO;
+ goto out;
+ }
+
+ if (ea_name) {
+ if (ea_name_len == name_len &&
+ memcmp(ea_name, name, name_len) == 0) {
+ rc = value_len;
+ if (dst_size == 0)
+ goto out;
+ if (dst_size < value_len) {
+ rc = -ERANGE;
+ goto out;
+ }
+ memcpy(dst, value, value_len);
+ goto out;
+ }
+ } else {
+ /* 'user.' plus a terminating null */
+ user_name_len = 5 + 1 + name_len;
+
+ rc += user_name_len;
+
+ if (dst_size >= user_name_len) {
+ dst_size -= user_name_len;
+ memcpy(dst, "user.", 5);
+ dst += 5;
+ memcpy(dst, src->ea_data, name_len);
+ dst += name_len;
+ *dst = 0;
+ ++dst;
+ } else if (dst_size == 0) {
+ /* skip copy - calc size only */
+ } else {
+ /* stop before overrun buffer */
+ rc = -ERANGE;
+ break;
+ }
+ }
+
+ if (!src->next_entry_offset)
+ break;
+
+ if (src_size < le32_to_cpu(src->next_entry_offset)) {
+ /* stop before overrun buffer */
+ rc = -ERANGE;
+ break;
+ }
+ src_size -= le32_to_cpu(src->next_entry_offset);
+ src = (void *)((char *)src +
+ le32_to_cpu(src->next_entry_offset));
+ }
+
+ /* didn't find the named attribute */
+ if (ea_name)
+ rc = -ENODATA;
+
+out:
+ return (ssize_t)rc;
+}
+
+static ssize_t
+smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
+ const unsigned char *path, const unsigned char *ea_name,
+ char *ea_data, size_t buf_size,
+ struct cifs_sb_info *cifs_sb)
+{
+ int rc;
+ __le16 *utf16_path;
+ __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+ struct smb2_file_full_ea_info *smb2_data;
+
+ utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_EA;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = 0;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+ kfree(utf16_path);
+ if (rc) {
+ cifs_dbg(FYI, "open failed rc=%d\n", rc);
+ return rc;
+ }
+
+ smb2_data = kzalloc(SMB2_MAX_EA_BUF, GFP_KERNEL);
+ if (smb2_data == NULL) {
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ return -ENOMEM;
+ }
+
+ rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ smb2_data);
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+
+ if (!rc)
+ rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data,
+ SMB2_MAX_EA_BUF, ea_name);
+
+ kfree(smb2_data);
+ return rc;
+}
+
+
+static int
+smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *path, const char *ea_name, const void *ea_value,
+ const __u16 ea_value_len, const struct nls_table *nls_codepage,
+ struct cifs_sb_info *cifs_sb)
+{
+ int rc;
+ __le16 *utf16_path;
+ __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+ struct smb2_file_full_ea_info *ea;
+ int ea_name_len = strlen(ea_name);
+ int len;
+
+ if (ea_name_len > 255)
+ return -EINVAL;
+
+ utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_WRITE_EA;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = 0;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+ kfree(utf16_path);
+ if (rc) {
+ cifs_dbg(FYI, "open failed rc=%d\n", rc);
+ return rc;
+ }
+
+ len = sizeof(ea) + ea_name_len + ea_value_len + 1;
+ ea = kzalloc(len, GFP_KERNEL);
+ if (ea == NULL) {
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+ return -ENOMEM;
+ }
+
+ ea->ea_name_length = ea_name_len;
+ ea->ea_value_length = cpu_to_le16(ea_value_len);
+ memcpy(ea->ea_data, ea_name, ea_name_len + 1);
+ memcpy(ea->ea_data + ea_name_len + 1, ea_value, ea_value_len);
+
+ rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea,
+ len);
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+
+ return rc;
+}
+
static bool
smb2_can_echo(struct TCP_Server_Info *server)
{
@@ -2572,6 +2760,10 @@ struct smb_version_operations smb20_operations = {
.dir_needs_close = smb2_dir_needs_close,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = smb2_query_eas,
+ .set_EA = smb2_set_ea,
+#endif /* CIFS_XATTR */
#ifdef CONFIG_CIFS_ACL
.get_acl = get_smb2_acl,
.get_acl_by_fid = get_smb2_acl_by_fid,
@@ -2662,6 +2854,10 @@ struct smb_version_operations smb21_operations = {
.enum_snapshots = smb3_enum_snapshots,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = smb2_query_eas,
+ .set_EA = smb2_set_ea,
+#endif /* CIFS_XATTR */
#ifdef CONFIG_CIFS_ACL
.get_acl = get_smb2_acl,
.get_acl_by_fid = get_smb2_acl_by_fid,
@@ -2762,6 +2958,10 @@ struct smb_version_operations smb30_operations = {
.receive_transform = smb3_receive_transform,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = smb2_query_eas,
+ .set_EA = smb2_set_ea,
+#endif /* CIFS_XATTR */
#ifdef CONFIG_CIFS_ACL
.get_acl = get_smb2_acl,
.get_acl_by_fid = get_smb2_acl_by_fid,
@@ -2863,6 +3063,10 @@ struct smb_version_operations smb311_operations = {
.receive_transform = smb3_receive_transform,
.get_dfs_refer = smb2_get_dfs_refer,
.select_sectype = smb2_select_sectype,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = smb2_query_eas,
+ .set_EA = smb2_set_ea,
+#endif /* CIFS_XATTR */
};
#endif /* CIFS_SMB311 */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7aa67206f6da..5531e7ee1210 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -238,6 +238,18 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
* the same SMB session
*/
mutex_lock(&tcon->ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, tcon->ses);
if (!rc && tcon->ses->need_reconnect)
rc = cifs_setup_session(0, tcon->ses, nls_codepage);
@@ -2145,6 +2157,18 @@ qinf_exit:
return rc;
}
+int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid,
+ struct smb2_file_full_ea_info *data)
+{
+ return query_info(xid, tcon, persistent_fid, volatile_fid,
+ FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0,
+ SMB2_MAX_EA_BUF,
+ sizeof(struct smb2_file_full_ea_info),
+ (void **)&data,
+ NULL);
+}
+
int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, struct smb2_file_all_info *data)
{
@@ -3185,6 +3209,16 @@ SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
}
int
+SMB2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid,
+ struct smb2_file_full_ea_info *buf, int len)
+{
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ current->tgid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE,
+ 0, 1, (void **)&buf, &len);
+}
+
+int
SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
const u64 persistent_fid, const u64 volatile_fid,
__u8 oplock_level)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 2826882c81d1..393ed5f4e1b6 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1178,6 +1178,16 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
char FileName[0]; /* Name to be assigned to new link */
} __packed; /* level 11 Set */
+#define SMB2_MAX_EA_BUF 2048
+
+struct smb2_file_full_ea_info { /* encoding of response for level 15 */
+ __le32 next_entry_offset;
+ __u8 flags;
+ __u8 ea_name_length;
+ __le16 ea_value_length;
+ char ea_data[0]; /* \0 terminated name plus value */
+} __packed; /* level 15 Set */
+
/*
* This level 18, although with struct with same name is different from cifs
* level 0x107. Level 0x107 has an extra u64 between AccessFlags and
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 1cadaf9f3c58..003217099ef3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -132,6 +132,9 @@ extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
+extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_file_id, u64 volatile_file_id,
+ struct smb2_file_full_ea_info *data);
extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct smb2_file_all_info *data);
@@ -169,6 +172,9 @@ extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
+extern int SMB2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid,
+ struct smb2_file_full_ea_info *buf, int len);
extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid);
extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index de50e749ff05..52f975d848a0 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -84,7 +84,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
if (pTcon->ses->server->ops->set_EA)
rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
full_path, name, value, (__u16)size,
- cifs_sb->local_nls, cifs_remap(cifs_sb));
+ cifs_sb->local_nls, cifs_sb);
break;
case XATTR_CIFS_ACL: {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 2dd4a7af7dd7..d27b326d96f4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1331,8 +1331,6 @@ COMPATIBLE_IOCTL(DMX_SET_FILTER)
COMPATIBLE_IOCTL(DMX_SET_PES_FILTER)
COMPATIBLE_IOCTL(DMX_SET_BUFFER_SIZE)
COMPATIBLE_IOCTL(DMX_GET_PES_PIDS)
-COMPATIBLE_IOCTL(DMX_GET_CAPS)
-COMPATIBLE_IOCTL(DMX_SET_SOURCE)
COMPATIBLE_IOCTL(DMX_GET_STC)
COMPATIBLE_IOCTL(FE_GET_INFO)
COMPATIBLE_IOCTL(FE_DISEQC_RESET_OVERLOAD)
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 6181e9526860..483784d5eb73 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -115,7 +115,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
err = -ENOMEM;
goto errout;
}
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
diff --git a/fs/dax.c b/fs/dax.c
index ab925dc6647a..6afcacb3a87b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -42,6 +42,9 @@
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
+/* The 'colour' (ie low bits) within a PMD of a page offset. */
+#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
+
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
static int __init init_dax_wait_table(void)
@@ -54,6 +57,40 @@ static int __init init_dax_wait_table(void)
}
fs_initcall(init_dax_wait_table);
+/*
+ * We use lowest available bit in exceptional entry for locking, one bit for
+ * the entry size (PMD) and two more to tell us if the entry is a zero page or
+ * an empty entry that is just used for locking. In total four special bits.
+ *
+ * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
+ * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
+ * block allocation.
+ */
+#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
+#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
+#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
+#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
+#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
+
+static unsigned long dax_radix_sector(void *entry)
+{
+ return (unsigned long)entry >> RADIX_DAX_SHIFT;
+}
+
+static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+{
+ return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
+ ((unsigned long)sector << RADIX_DAX_SHIFT) |
+ RADIX_DAX_ENTRY_LOCK);
+}
+
+static unsigned int dax_radix_order(void *entry)
+{
+ if ((unsigned long)entry & RADIX_DAX_PMD)
+ return PMD_SHIFT - PAGE_SHIFT;
+ return 0;
+}
+
static int dax_is_pmd_entry(void *entry)
{
return (unsigned long)entry & RADIX_DAX_PMD;
@@ -66,7 +103,7 @@ static int dax_is_pte_entry(void *entry)
static int dax_is_zero_entry(void *entry)
{
- return (unsigned long)entry & RADIX_DAX_HZP;
+ return (unsigned long)entry & RADIX_DAX_ZERO_PAGE;
}
static int dax_is_empty_entry(void *entry)
@@ -98,7 +135,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
* the range covered by the PMD map to the same bit lock.
*/
if (dax_is_pmd_entry(entry))
- index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
+ index &= ~PG_PMD_COLOUR;
key->mapping = mapping;
key->entry_start = index;
@@ -121,6 +158,31 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
}
/*
+ * We do not necessarily hold the mapping->tree_lock when we call this
+ * function so it is possible that 'entry' is no longer a valid item in the
+ * radix tree. This is okay because all we really need to do is to find the
+ * correct waitqueue where tasks might be waiting for that old 'entry' and
+ * wake them.
+ */
+static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+ pgoff_t index, void *entry, bool wake_all)
+{
+ struct exceptional_entry_key key;
+ wait_queue_head_t *wq;
+
+ wq = dax_entry_waitqueue(mapping, index, entry, &key);
+
+ /*
+ * Checking for locked entry and prepare_to_wait_exclusive() happens
+ * under mapping->tree_lock, ditto for entry handling in our callers.
+ * So at this point all tasks that could have seen our entry locked
+ * must be in the waitqueue and the following check will see them.
+ */
+ if (waitqueue_active(wq))
+ __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+}
+
+/*
* Check whether the given slot is locked. The function must be called with
* mapping->tree_lock held
*/
@@ -181,7 +243,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
for (;;) {
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
&slot);
- if (!entry || !radix_tree_exceptional_entry(entry) ||
+ if (!entry ||
+ WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
!slot_locked(mapping, slot)) {
if (slotp)
*slotp = slot;
@@ -216,14 +279,9 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
}
static void put_locked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void *entry)
+ pgoff_t index)
{
- if (!radix_tree_exceptional_entry(entry)) {
- unlock_page(entry);
- put_page(entry);
- } else {
- dax_unlock_mapping_entry(mapping, index);
- }
+ dax_unlock_mapping_entry(mapping, index);
}
/*
@@ -233,7 +291,7 @@ static void put_locked_mapping_entry(struct address_space *mapping,
static void put_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void *entry)
{
- if (!radix_tree_exceptional_entry(entry))
+ if (!entry)
return;
/* We have to wake up next waiter for the radix tree entry lock */
@@ -241,15 +299,15 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
}
/*
- * Find radix tree entry at given index. If it points to a page, return with
- * the page locked. If it points to the exceptional entry, return with the
- * radix tree entry locked. If the radix tree doesn't contain given index,
- * create empty exceptional entry for the index and return with it locked.
+ * Find radix tree entry at given index. If it points to an exceptional entry,
+ * return it with the radix tree entry locked. If the radix tree doesn't
+ * contain given index, create an empty exceptional entry for the index and
+ * return with it locked.
*
* When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
* either return that locked entry or will return an error. This error will
- * happen if there are any 4k entries (either zero pages or DAX entries)
- * within the 2MiB range that we are requesting.
+ * happen if there are any 4k entries within the 2MiB range that we are
+ * requesting.
*
* We always favor 4k entries over 2MiB entries. There isn't a flow where we
* evict 4k entries in order to 'upgrade' them to a 2MiB entry. A 2MiB
@@ -276,18 +334,21 @@ restart:
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, &slot);
+ if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
+ entry = ERR_PTR(-EIO);
+ goto out_unlock;
+ }
+
if (entry) {
if (size_flag & RADIX_DAX_PMD) {
- if (!radix_tree_exceptional_entry(entry) ||
- dax_is_pte_entry(entry)) {
+ if (dax_is_pte_entry(entry)) {
put_unlocked_mapping_entry(mapping, index,
entry);
entry = ERR_PTR(-EEXIST);
goto out_unlock;
}
} else { /* trying to grab a PTE entry */
- if (radix_tree_exceptional_entry(entry) &&
- dax_is_pmd_entry(entry) &&
+ if (dax_is_pmd_entry(entry) &&
(dax_is_zero_entry(entry) ||
dax_is_empty_entry(entry))) {
pmd_downgrade = true;
@@ -321,7 +382,7 @@ restart:
mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
if (err) {
if (pmd_downgrade)
- put_locked_mapping_entry(mapping, index, entry);
+ put_locked_mapping_entry(mapping, index);
return ERR_PTR(err);
}
spin_lock_irq(&mapping->tree_lock);
@@ -371,52 +432,12 @@ restart:
spin_unlock_irq(&mapping->tree_lock);
return entry;
}
- /* Normal page in radix tree? */
- if (!radix_tree_exceptional_entry(entry)) {
- struct page *page = entry;
-
- get_page(page);
- spin_unlock_irq(&mapping->tree_lock);
- lock_page(page);
- /* Page got truncated? Retry... */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- put_page(page);
- goto restart;
- }
- return page;
- }
entry = lock_slot(mapping, slot);
out_unlock:
spin_unlock_irq(&mapping->tree_lock);
return entry;
}
-/*
- * We do not necessarily hold the mapping->tree_lock when we call this
- * function so it is possible that 'entry' is no longer a valid item in the
- * radix tree. This is okay because all we really need to do is to find the
- * correct waitqueue where tasks might be waiting for that old 'entry' and
- * wake them.
- */
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
- pgoff_t index, void *entry, bool wake_all)
-{
- struct exceptional_entry_key key;
- wait_queue_head_t *wq;
-
- wq = dax_entry_waitqueue(mapping, index, entry, &key);
-
- /*
- * Checking for locked entry and prepare_to_wait_exclusive() happens
- * under mapping->tree_lock, ditto for entry handling in our callers.
- * So at this point all tasks that could have seen our entry locked
- * must be in the waitqueue and the following check will see them.
- */
- if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
-}
-
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
pgoff_t index, bool trunc)
{
@@ -426,7 +447,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
spin_lock_irq(&mapping->tree_lock);
entry = get_unlocked_mapping_entry(mapping, index, NULL);
- if (!entry || !radix_tree_exceptional_entry(entry))
+ if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
goto out;
if (!trunc &&
(radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
@@ -468,50 +489,6 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
return __dax_invalidate_mapping_entry(mapping, index, false);
}
-/*
- * The user has performed a load from a hole in the file. Allocating
- * a new page in the file would cause excessive storage usage for
- * workloads with sparse files. We allocate a page cache page instead.
- * We'll kick it out of the page cache if it's ever written to,
- * otherwise it will simply fall out of the page cache under memory
- * pressure without ever having been dirtied.
- */
-static int dax_load_hole(struct address_space *mapping, void **entry,
- struct vm_fault *vmf)
-{
- struct inode *inode = mapping->host;
- struct page *page;
- int ret;
-
- /* Hole page already exists? Return it... */
- if (!radix_tree_exceptional_entry(*entry)) {
- page = *entry;
- goto finish_fault;
- }
-
- /* This will replace locked radix tree entry with a hole page */
- page = find_or_create_page(mapping, vmf->pgoff,
- vmf->gfp_mask | __GFP_ZERO);
- if (!page) {
- ret = VM_FAULT_OOM;
- goto out;
- }
-
-finish_fault:
- vmf->page = page;
- ret = finish_fault(vmf);
- vmf->page = NULL;
- *entry = page;
- if (!ret) {
- /* Grab reference for PTE that is now referencing the page */
- get_page(page);
- ret = VM_FAULT_NOPAGE;
- }
-out:
- trace_dax_load_hole(inode, vmf, ret);
- return ret;
-}
-
static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
sector_t sector, size_t size, struct page *to,
unsigned long vaddr)
@@ -552,47 +529,27 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
unsigned long flags)
{
struct radix_tree_root *page_tree = &mapping->page_tree;
- int error = 0;
- bool hole_fill = false;
void *new_entry;
pgoff_t index = vmf->pgoff;
if (vmf->flags & FAULT_FLAG_WRITE)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- /* Replacing hole page with block mapping? */
- if (!radix_tree_exceptional_entry(entry)) {
- hole_fill = true;
- /*
- * Unmap the page now before we remove it from page cache below.
- * The page is locked so it cannot be faulted in again.
- */
- unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
- PAGE_SIZE, 0);
- error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
- if (error)
- return ERR_PTR(error);
- } else if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_HZP)) {
- /* replacing huge zero page with PMD block mapping */
- unmap_mapping_range(mapping,
- (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
+ if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
+ /* we are replacing a zero page with block mapping */
+ if (dax_is_pmd_entry(entry))
+ unmap_mapping_range(mapping,
+ (vmf->pgoff << PAGE_SHIFT) & PMD_MASK,
+ PMD_SIZE, 0);
+ else /* pte entry */
+ unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
+ PAGE_SIZE, 0);
}
spin_lock_irq(&mapping->tree_lock);
new_entry = dax_radix_locked_entry(sector, flags);
- if (hole_fill) {
- __delete_from_page_cache(entry, NULL);
- /* Drop pagecache reference */
- put_page(entry);
- error = __radix_tree_insert(page_tree, index,
- dax_radix_order(new_entry), new_entry);
- if (error) {
- new_entry = ERR_PTR(error);
- goto unlock;
- }
- mapping->nrexceptional++;
- } else if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
* Only swap our new entry into the radix tree if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -609,23 +566,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
WARN_ON_ONCE(ret != entry);
__radix_tree_replace(page_tree, node, slot,
new_entry, NULL, NULL);
+ entry = new_entry;
}
+
if (vmf->flags & FAULT_FLAG_WRITE)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
- unlock:
+
spin_unlock_irq(&mapping->tree_lock);
- if (hole_fill) {
- radix_tree_preload_end();
- /*
- * We don't need hole page anymore, it has been replaced with
- * locked radix tree entry now.
- */
- if (mapping->a_ops->freepage)
- mapping->a_ops->freepage(entry);
- unlock_page(entry);
- put_page(entry);
- }
- return new_entry;
+ return entry;
}
static inline unsigned long
@@ -727,7 +675,7 @@ static int dax_writeback_one(struct block_device *bdev,
spin_lock_irq(&mapping->tree_lock);
entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
/* Entry got punched out / reallocated? */
- if (!entry2 || !radix_tree_exceptional_entry(entry2))
+ if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
goto put_unlocked;
/*
* Entry got reallocated elsewhere? No need to writeback. We have to
@@ -799,7 +747,7 @@ static int dax_writeback_one(struct block_device *bdev,
trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
dax_unlock:
dax_read_unlock(id);
- put_locked_mapping_entry(mapping, index, entry);
+ put_locked_mapping_entry(mapping, index);
return ret;
put_unlocked:
@@ -874,11 +822,10 @@ EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
static int dax_insert_mapping(struct address_space *mapping,
struct block_device *bdev, struct dax_device *dax_dev,
- sector_t sector, size_t size, void **entryp,
+ sector_t sector, size_t size, void *entry,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = vmf->address;
- void *entry = *entryp;
void *ret, *kaddr;
pgoff_t pgoff;
int id, rc;
@@ -899,47 +846,48 @@ static int dax_insert_mapping(struct address_space *mapping,
ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0);
if (IS_ERR(ret))
return PTR_ERR(ret);
- *entryp = ret;
trace_dax_insert_mapping(mapping->host, vmf, ret);
- return vm_insert_mixed(vma, vaddr, pfn);
+ if (vmf->flags & FAULT_FLAG_WRITE)
+ return vm_insert_mixed_mkwrite(vma, vaddr, pfn);
+ else
+ return vm_insert_mixed(vma, vaddr, pfn);
}
-/**
- * dax_pfn_mkwrite - handle first write to DAX page
- * @vmf: The description of the fault
+/*
+ * The user has performed a load from a hole in the file. Allocating a new
+ * page in the file would cause excessive storage usage for workloads with
+ * sparse files. Instead we insert a read-only mapping of the 4k zero page.
+ * If this page is ever written to we will re-fault and change the mapping to
+ * point to real DAX storage instead.
*/
-int dax_pfn_mkwrite(struct vm_fault *vmf)
+static int dax_load_hole(struct address_space *mapping, void *entry,
+ struct vm_fault *vmf)
{
- struct file *file = vmf->vma->vm_file;
- struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- void *entry, **slot;
- pgoff_t index = vmf->pgoff;
+ unsigned long vaddr = vmf->address;
+ int ret = VM_FAULT_NOPAGE;
+ struct page *zero_page;
+ void *entry2;
- spin_lock_irq(&mapping->tree_lock);
- entry = get_unlocked_mapping_entry(mapping, index, &slot);
- if (!entry || !radix_tree_exceptional_entry(entry)) {
- if (entry)
- put_unlocked_mapping_entry(mapping, index, entry);
- spin_unlock_irq(&mapping->tree_lock);
- trace_dax_pfn_mkwrite_no_entry(inode, vmf, VM_FAULT_NOPAGE);
- return VM_FAULT_NOPAGE;
+ zero_page = ZERO_PAGE(0);
+ if (unlikely(!zero_page)) {
+ ret = VM_FAULT_OOM;
+ goto out;
}
- radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
- entry = lock_slot(mapping, slot);
- spin_unlock_irq(&mapping->tree_lock);
- /*
- * If we race with somebody updating the PTE and finish_mkwrite_fault()
- * fails, we don't care. We need to return VM_FAULT_NOPAGE and retry
- * the fault in either case.
- */
- finish_mkwrite_fault(vmf);
- put_locked_mapping_entry(mapping, index, entry);
- trace_dax_pfn_mkwrite(inode, vmf, VM_FAULT_NOPAGE);
- return VM_FAULT_NOPAGE;
+
+ entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+ RADIX_DAX_ZERO_PAGE);
+ if (IS_ERR(entry2)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+
+ vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+out:
+ trace_dax_load_hole(inode, vmf, ret);
+ return ret;
}
-EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
static bool dax_range_is_aligned(struct block_device *bdev,
unsigned int offset, unsigned int length)
@@ -1059,6 +1007,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (map_len > end - pos)
map_len = end - pos;
+ /*
+ * The userspace address for the memory copy has already been
+ * validated via access_ok() in either vfs_read() or
+ * vfs_write(), depending on which operation we are doing.
+ */
if (iov_iter_rw(iter) == WRITE)
map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
@@ -1223,7 +1176,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
- sector, PAGE_SIZE, &entry, vmf->vma, vmf);
+ sector, PAGE_SIZE, entry, vmf->vma, vmf);
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
@@ -1231,7 +1184,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (!(vmf->flags & FAULT_FLAG_WRITE)) {
- vmf_ret = dax_load_hole(mapping, &entry, vmf);
+ vmf_ret = dax_load_hole(mapping, entry, vmf);
goto finish_iomap;
}
/*FALLTHRU*/
@@ -1258,21 +1211,15 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
}
unlock_entry:
- put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ put_locked_mapping_entry(mapping, vmf->pgoff);
out:
trace_dax_pte_fault_done(inode, vmf, vmf_ret);
return vmf_ret;
}
#ifdef CONFIG_FS_DAX_PMD
-/*
- * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
- * more often than one might expect in the below functions.
- */
-#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
-
static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
- loff_t pos, void **entryp)
+ loff_t pos, void *entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1283,7 +1230,7 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
void *ret = NULL, *kaddr;
long length = 0;
pgoff_t pgoff;
- pfn_t pfn;
+ pfn_t pfn = {};
int id;
if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
@@ -1303,11 +1250,10 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
goto unlock_fallback;
dax_read_unlock(id);
- ret = dax_insert_mapping_entry(mapping, vmf, *entryp, sector,
+ ret = dax_insert_mapping_entry(mapping, vmf, entry, sector,
RADIX_DAX_PMD);
if (IS_ERR(ret))
goto fallback;
- *entryp = ret;
trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
@@ -1321,7 +1267,7 @@ fallback:
}
static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
- void **entryp)
+ void *entry)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
@@ -1336,11 +1282,10 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
if (unlikely(!zero_page))
goto fallback;
- ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
- RADIX_DAX_PMD | RADIX_DAX_HZP);
+ ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+ RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE);
if (IS_ERR(ret))
goto fallback;
- *entryp = ret;
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
@@ -1416,10 +1361,10 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
goto fallback;
/*
- * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
- * PMD or a HZP entry. If it can't (because a 4k page is already in
- * the tree, for instance), it will return -EEXIST and we just fall
- * back to 4k entries.
+ * grab_mapping_entry() will make sure we get a 2MiB empty entry, a
+ * 2MiB zero page entry or a DAX PMD. If it can't (because a 4k page
+ * is already in the tree, for instance), it will return -EEXIST and
+ * we just fall back to 4k entries.
*/
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
@@ -1452,13 +1397,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
+ result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
break;
- result = dax_pmd_load_hole(vmf, &iomap, &entry);
+ result = dax_pmd_load_hole(vmf, &iomap, entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1481,7 +1426,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
&iomap);
}
unlock_entry:
- put_locked_mapping_entry(mapping, pgoff, entry);
+ put_locked_mapping_entry(mapping, pgoff);
fallback:
if (result == VM_FAULT_FALLBACK) {
split_huge_pmd(vma, vmf->pmd, vmf->address);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 08cf27811e5a..5fa2211e49ae 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -111,7 +111,7 @@ struct dio {
int op;
int op_flags;
blk_qc_t bio_cookie;
- struct block_device *bio_bdev;
+ struct gendisk *bio_disk;
struct inode *inode;
loff_t i_size; /* i_size when submitted */
dio_iodone_t *end_io; /* IO completion function */
@@ -377,7 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
*/
bio = bio_alloc(GFP_KERNEL, nr_vecs);
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_sector;
bio_set_op_attrs(bio, dio->op, dio->op_flags);
if (dio->is_async)
@@ -412,7 +412,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty)
bio_set_pages_dirty(bio);
- dio->bio_bdev = bio->bi_bdev;
+ dio->bio_disk = bio->bi_disk;
if (sdio->submit_io) {
sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
@@ -458,7 +458,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ !blk_mq_poll(dio->bio_disk->queue, dio->bio_cookie))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index adbe328b957c..2fabd19cdeea 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -205,7 +205,7 @@ struct eventpoll {
struct list_head rdllist;
/* RB tree root used to store monitored fd structs */
- struct rb_root rbr;
+ struct rb_root_cached rbr;
/*
* This is a single linked list that chains all the "struct epitem" that
@@ -796,7 +796,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);
- rb_erase(&epi->rbn, &ep->rbr);
+ rb_erase_cached(&epi->rbn, &ep->rbr);
spin_lock_irqsave(&ep->lock, flags);
if (ep_is_linked(&epi->rdllink))
@@ -840,7 +840,7 @@ static void ep_free(struct eventpoll *ep)
/*
* Walks through the whole tree by unregistering poll callbacks.
*/
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_unregister_pollwait(ep, epi);
@@ -856,7 +856,7 @@ static void ep_free(struct eventpoll *ep)
* a lockdep warning.
*/
mutex_lock(&ep->mtx);
- while ((rbp = rb_first(&ep->rbr)) != NULL) {
+ while ((rbp = rb_first_cached(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
cond_resched();
@@ -963,7 +963,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
struct rb_node *rbp;
mutex_lock(&ep->mtx);
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
struct inode *inode = file_inode(epi->ffd.file);
@@ -1040,7 +1040,7 @@ static int ep_alloc(struct eventpoll **pep)
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
- ep->rbr = RB_ROOT;
+ ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
ep->user = user;
@@ -1066,7 +1066,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
struct epoll_filefd ffd;
ep_set_ffd(&ffd, file, fd);
- for (rbp = ep->rbr.rb_node; rbp; ) {
+ for (rbp = ep->rbr.rb_root.rb_node; rbp; ) {
epi = rb_entry(rbp, struct epitem, rbn);
kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
if (kcmp > 0)
@@ -1088,7 +1088,7 @@ static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long t
struct rb_node *rbp;
struct epitem *epi;
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (epi->ffd.fd == tfd) {
if (toff == 0)
@@ -1273,20 +1273,22 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
{
int kcmp;
- struct rb_node **p = &ep->rbr.rb_node, *parent = NULL;
+ struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL;
struct epitem *epic;
+ bool leftmost = true;
while (*p) {
parent = *p;
epic = rb_entry(parent, struct epitem, rbn);
kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
- if (kcmp > 0)
+ if (kcmp > 0) {
p = &parent->rb_right;
- else
+ leftmost = false;
+ } else
p = &parent->rb_left;
}
rb_link_node(&epi->rbn, parent, p);
- rb_insert_color(&epi->rbn, &ep->rbr);
+ rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost);
}
@@ -1530,7 +1532,7 @@ error_remove_epi:
list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);
- rb_erase(&epi->rbn, &ep->rbr);
+ rb_erase_cached(&epi->rbn, &ep->rbr);
error_unregister:
ep_unregister_pollwait(ep, epi);
@@ -1878,7 +1880,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1;
list_add(&ep->visited_list_link, &visited_list);
- for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
diff --git a/fs/exec.c b/fs/exec.c
index 62175cbcc801..01a9fb9d8ac3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1259,6 +1259,12 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
perf_event_comm(tsk, exec);
}
+/*
+ * Calling this is the point of no return. None of the failures will be
+ * seen by userspace since either the process is already taking a fatal
+ * signal (via de_thread() or coredump), or will have SEGV raised
+ * (after exec_mmap()) by search_binary_handlers (see below).
+ */
int flush_old_exec(struct linux_binprm * bprm)
{
int retval;
@@ -1286,7 +1292,13 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval)
goto out;
- bprm->mm = NULL; /* We're using it now */
+ /*
+ * After clearing bprm->mm (to mark that current is using the
+ * prepared mm now), we have nothing left of the original
+ * process. If anything from here on returns an error, the check
+ * in search_binary_handler() will SEGV current.
+ */
+ bprm->mm = NULL;
set_fs(USER_DS);
current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
@@ -1331,15 +1343,38 @@ EXPORT_SYMBOL(would_dump);
void setup_new_exec(struct linux_binprm * bprm)
{
+ /*
+ * Once here, prepare_binrpm() will not be called any more, so
+ * the final state of setuid/setgid/fscaps can be merged into the
+ * secureexec flag.
+ */
+ bprm->secureexec |= bprm->cap_elevated;
+
+ if (bprm->secureexec) {
+ /* Make sure parent cannot signal privileged process. */
+ current->pdeath_signal = 0;
+
+ /*
+ * For secureexec, reset the stack limit to sane default to
+ * avoid bad behavior from the prior rlimits. This has to
+ * happen before arch_pick_mmap_layout(), which examines
+ * RLIMIT_STACK, but after the point of no return to avoid
+ * needing to clean up the change on failure.
+ */
+ if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
+ current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+ }
+
arch_pick_mmap_layout(current->mm);
- /* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
- if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
- set_dumpable(current->mm, SUID_DUMP_USER);
- else
+ /* Figure out dumpability. */
+ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
+ bprm->secureexec)
set_dumpable(current->mm, suid_dumpable);
+ else
+ set_dumpable(current->mm, SUID_DUMP_USER);
arch_setup_new_exec();
perf_event_exec();
@@ -1351,15 +1386,6 @@ void setup_new_exec(struct linux_binprm * bprm)
*/
current->mm->task_size = TASK_SIZE;
- /* install the new credentials */
- if (!uid_eq(bprm->cred->uid, current_euid()) ||
- !gid_eq(bprm->cred->gid, current_egid())) {
- current->pdeath_signal = 0;
- } else {
- if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
- set_dumpable(current->mm, suid_dumpable);
- }
-
/* An exec changes our domain. We are no longer part of the thread
group */
current->self_exec_id++;
@@ -1548,7 +1574,7 @@ int prepare_binprm(struct linux_binprm *bprm)
retval = security_bprm_set_creds(bprm);
if (retval)
return retval;
- bprm->cred_prepared = 1;
+ bprm->called_set_creds = 1;
memset(bprm->buf, 0, BINPRM_BUF_SIZE);
return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 8bb72807e70d..3c6a9c156b7a 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -869,7 +869,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
goto out;
}
- bio->bi_bdev = NULL;
+ bio->bi_disk = NULL;
bio->bi_next = NULL;
per_dev->offset = master_dev->offset;
per_dev->length = master_dev->length;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index d34d32bdc944..ff3a3636a5ca 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -107,29 +107,6 @@ static int ext2_dax_fault(struct vm_fault *vmf)
return ret;
}
-static int ext2_dax_pfn_mkwrite(struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct ext2_inode_info *ei = EXT2_I(inode);
- loff_t size;
- int ret;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vmf->vma->vm_file);
- down_read(&ei->dax_sem);
-
- /* check that the faulting page hasn't raced with truncate */
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- ret = VM_FAULT_SIGBUS;
- else
- ret = dax_pfn_mkwrite(vmf);
-
- up_read(&ei->dax_sem);
- sb_end_pagefault(inode->i_sb);
- return ret;
-}
-
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
/*
@@ -138,7 +115,7 @@ static const struct vm_operations_struct ext2_dax_vm_ops = {
* will always fail and fail back to regular faults.
*/
.page_mkwrite = ext2_dax_fault,
- .pfn_mkwrite = ext2_dax_pfn_mkwrite,
+ .pfn_mkwrite = ext2_dax_fault,
};
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 197653ea6041..57dcaea762c3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -324,41 +324,11 @@ static int ext4_dax_fault(struct vm_fault *vmf)
return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
}
-/*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
- * handler we check for races agaist truncate. Note that since we cycle through
- * i_mmap_sem, we are sure that also any hole punching that began before we
- * were called is finished by now and so if it included part of the file we
- * are working on, our pte will get unmapped and the check for pte_same() in
- * wp_pfn_shared() fails. Thus fault gets retried and things work out as
- * desired.
- */
-static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct super_block *sb = inode->i_sb;
- loff_t size;
- int ret;
-
- sb_start_pagefault(sb);
- file_update_time(vmf->vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- ret = VM_FAULT_SIGBUS;
- else
- ret = dax_pfn_mkwrite(vmf);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- sb_end_pagefault(sb);
-
- return ret;
-}
-
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.huge_fault = ext4_dax_huge_fault,
.page_mkwrite = ext4_dax_fault,
- .pfn_mkwrite = ext4_dax_pfn_mkwrite,
+ .pfn_mkwrite = ext4_dax_fault,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -507,12 +477,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
pagevec_init(&pvec, 0);
do {
- int i, num;
+ int i;
unsigned long nr_pages;
- num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
- (pgoff_t)num);
+ nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
+ &index, end);
if (nr_pages == 0)
break;
@@ -531,9 +500,6 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
goto out;
}
- if (page->index > end)
- goto out;
-
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping)) {
@@ -576,14 +542,10 @@ next:
unlock_page(page);
}
- /* The no. of pages is less than our desired, we are done. */
- if (nr_pages < num)
- break;
-
- index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
+ /* There are no pages upto endoff - that would be a hole in there. */
if (whence == SEEK_HOLE && lastoff < endoff) {
found = 1;
*offset = lastoff;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 714396760616..e963508ea35f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1720,13 +1720,12 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
pagevec_init(&pvec, 0);
while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
+
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
@@ -1737,7 +1736,6 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
}
unlock_page(page);
}
- index = pvec.pages[nr_pages - 1]->index + 1;
pagevec_release(&pvec);
}
}
@@ -2348,17 +2346,13 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
pagevec_init(&pvec, 0);
while (start <= end) {
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
- PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
+ &start, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
- /* Up to 'end' pages must be contiguous */
- BUG_ON(page->index != start);
bh = head = page_buffers(page);
do {
if (lblk < mpd->map.m_lblk)
@@ -2403,7 +2397,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
pagevec_release(&pvec);
return err;
}
- start++;
}
pagevec_release(&pvec);
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c2fce4478cca..55ad7dd149d0 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -300,7 +300,7 @@ static void ext4_end_bio(struct bio *bio)
char b[BDEVNAME_SIZE];
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
- bdevname(bio->bi_bdev, b),
+ bio_devname(bio, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
bio->bi_status)) {
@@ -375,7 +375,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 40a5497b0f60..04c90643af7a 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -254,7 +254,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
fscrypt_release_ctx(ctx);
goto set_error_page;
}
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9e7be58756b..93aece6891f2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5215,7 +5215,7 @@ static int ext4_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
@@ -5238,7 +5238,7 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
@@ -5284,18 +5284,13 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-/* Helper function for writing quotas on sync - we need to start transaction
- * before quota file is locked for write. Otherwise the are possible deadlocks:
- * Process 1 Process 2
- * ext4_create() quota_sync()
- * jbd2_journal_start() write_dquot()
- * dquot_initialize() down(dqio_mutex)
- * down(dqio_mutex) jbd2_journal_start()
- *
- */
#ifdef CONFIG_QUOTA
+/*
+ * Helper functions so that transaction is started before we acquire dqio_sem
+ * to keep correct lock ordering of transaction > dqio_sem
+ */
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@ -5430,6 +5425,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
ext4_msg(sb, KERN_WARNING,
"Quota file not on filesystem root. "
"Journaled quota will not work");
+ sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+ } else {
+ /*
+ * Clear the flag just in case mount options changed since
+ * last time.
+ */
+ sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
}
/*
@@ -5526,7 +5528,7 @@ static int ext4_enable_quotas(struct super_block *sb)
test_opt(sb, PRJQUOTA),
};
- sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f4150c64..fb96bb71da00 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -142,7 +142,7 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
}
}
if (bio) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
}
return bdev;
@@ -161,7 +161,8 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
static bool __same_bdev(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
- return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+ struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
+ return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
}
/*
@@ -2252,7 +2253,10 @@ int f2fs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
set_page_private(newpage, page_private(page));
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f964b68718c1..6f8fc4a6e701 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -447,7 +447,7 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
int ret;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
ret = submit_bio_wait(bio);
bio_put(bio);
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6a7152d0c250..02c066663a3a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,6 +19,8 @@
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/namei.h>
+#include <linux/kernel.h>
+
#include "fat.h"
static inline unsigned long vfat_d_version(struct dentry *dentry)
@@ -510,10 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
struct nls_table *nls)
{
const unsigned char *ip;
- unsigned char nc;
unsigned char *op;
- unsigned int ec;
- int i, k, fill;
+ int i, fill;
int charlen;
if (utf8) {
@@ -530,33 +530,22 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
i < len && *outlen < FAT_LFN_LEN;
*outlen += 1) {
if (escape && (*ip == ':')) {
+ u8 uc[2];
+
if (i > len - 5)
return -EINVAL;
- ec = 0;
- for (k = 1; k < 5; k++) {
- nc = ip[k];
- ec <<= 4;
- if (nc >= '0' && nc <= '9') {
- ec |= nc - '0';
- continue;
- }
- if (nc >= 'a' && nc <= 'f') {
- ec |= nc - ('a' - 10);
- continue;
- }
- if (nc >= 'A' && nc <= 'F') {
- ec |= nc - ('A' - 10);
- continue;
- }
+
+ if (hex2bin(uc, ip + 1, 2) < 0)
return -EINVAL;
- }
- *op++ = ec & 0xFF;
- *op++ = ec >> 8;
+
+ *(wchar_t *)op = uc[0] << 8 | uc[1];
+
+ op += 2;
ip += 5;
i += 5;
} else {
charlen = nls->char2uni(ip, len - i,
- (wchar_t *)op);
+ (wchar_t *)op);
if (charlen < 0)
return -EINVAL;
ip += charlen;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index c8c4f79c7ce1..0ad3fd3ad0b4 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -1178,11 +1178,10 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
pagevec_init(&pvec, 0);
next = 0;
do {
- if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
+ if (!pagevec_lookup(&pvec, mapping, &next))
break;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- next = page->index;
if (PageFsCache(page)) {
__fscache_wait_on_page_write(cookie, page);
__fscache_uncache_page(cookie, page);
@@ -1190,7 +1189,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
}
pagevec_release(&pvec);
cond_resched();
- } while (++next);
+ } while (next);
_leave("");
}
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7dabbe721dba..c8ff7b7954f0 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -268,7 +268,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio->bi_end_io = gfs2_end_log_write;
bio->bi_private = sdp;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 61ef6c9be816..52de1036d9f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -221,7 +221,7 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
bio = bio_alloc(GFP_NOIO, num);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
while (num > 0) {
bh = *bhs;
if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c0a4b3778f3f..84593587691d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -242,7 +242,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = end_bio_io_page;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index e254fa0f0697..10032b919a85 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -65,7 +65,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio_set_op_attrs(bio, op, op_flags);
if (op != WRITE && data)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 28d2753be094..59073e9f01a4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -334,7 +334,7 @@ static void remove_huge_page(struct page *page)
}
static void
-hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end)
+hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
{
struct vm_area_struct *vma;
@@ -401,9 +401,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
const pgoff_t end = lend >> huge_page_shift(h);
struct vm_area_struct pseudo_vma;
struct pagevec pvec;
- pgoff_t next;
+ pgoff_t next, index;
int i, freed = 0;
- long lookup_nr = PAGEVEC_SIZE;
bool truncate_op = (lend == LLONG_MAX);
memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
@@ -412,33 +411,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
next = start;
while (next < end) {
/*
- * Don't grab more pages than the number left in the range.
- */
- if (end - next < lookup_nr)
- lookup_nr = end - next;
-
- /*
* When no more pages are found, we are done.
*/
- if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
+ if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
break;
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
u32 hash;
- /*
- * The page (index) could be beyond end. This is
- * only possible in the punch hole case as end is
- * max page offset in the truncate case.
- */
- next = page->index;
- if (next >= end)
- break;
-
+ index = page->index;
hash = hugetlb_fault_mutex_hash(h, current->mm,
&pseudo_vma,
- mapping, next, 0);
+ mapping, index, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
@@ -455,8 +440,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
i_mmap_lock_write(mapping);
hugetlb_vmdelete_list(&mapping->i_mmap,
- next * pages_per_huge_page(h),
- (next + 1) * pages_per_huge_page(h));
+ index * pages_per_huge_page(h),
+ (index + 1) * pages_per_huge_page(h));
i_mmap_unlock_write(mapping);
}
@@ -475,14 +460,13 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
freed++;
if (!truncate_op) {
if (unlikely(hugetlb_unreserve_pages(inode,
- next, next + 1, 1)))
+ index, index + 1, 1)))
hugetlb_fix_reserve_counts(inode);
}
unlock_page(page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
- ++next;
huge_pagevec_release(&pvec);
cond_resched();
}
@@ -514,7 +498,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_size_write(inode, offset);
i_mmap_lock_write(mapping);
- if (!RB_EMPTY_ROOT(&mapping->i_mmap))
+ if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, offset, LLONG_MAX);
@@ -539,7 +523,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
inode_lock(inode);
i_mmap_lock_write(mapping);
- if (!RB_EMPTY_ROOT(&mapping->i_mmap))
+ if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT);
@@ -846,7 +830,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/inode.c b/fs/inode.c
index 6a1626e0edaf..210054157a49 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -353,7 +353,7 @@ void address_space_init_once(struct address_space *mapping)
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
- mapping->i_mmap = RB_ROOT;
+ mapping->i_mmap = RB_ROOT_CACHED;
}
EXPORT_SYMBOL(address_space_init_once);
diff --git a/fs/iomap.c b/fs/iomap.c
index 8554a8d75281..269b24a01f32 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -805,7 +805,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
- bio->bi_bdev = iomap->bdev;
+ bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_private = dio;
@@ -884,7 +884,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return 0;
bio = bio_alloc(GFP_KERNEL, nr_pages);
- bio->bi_bdev = iomap->bdev;
+ bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_write_hint = dio->iocb->ki_hint;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 217a5e7815da..f1ed935322db 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -96,7 +96,7 @@ static int __init init_inodecache(void)
0, (SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
- if (isofs_inode_cachep == NULL)
+ if (!isofs_inode_cachep)
return -ENOMEM;
return 0;
}
@@ -678,7 +678,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
if (isonum_711(vdp->type) == ISO_VD_END)
break;
if (isonum_711(vdp->type) == ISO_VD_PRIMARY) {
- if (pri == NULL) {
+ if (!pri) {
pri = (struct iso_primary_descriptor *)vdp;
/* Save the buffer in case we need it ... */
pri_bh = bh;
@@ -742,7 +742,7 @@ root_found:
goto out_freebh;
}
- if (joliet_level && (pri == NULL || !opt.rock)) {
+ if (joliet_level && (!pri || !opt.rock)) {
/* This is the case of Joliet with the norock mount flag.
* A disc with both Joliet and Rock Ridge is handled later
*/
@@ -1298,7 +1298,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
unsigned long block;
int high_sierra = sbi->s_high_sierra;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh;
struct iso_directory_record *de;
struct iso_directory_record *tmpde = NULL;
unsigned int de_len;
@@ -1320,8 +1320,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
int frag1 = bufsize - offset;
tmpde = kmalloc(de_len, GFP_KERNEL);
- if (tmpde == NULL) {
- printk(KERN_INFO "%s: out of memory\n", __func__);
+ if (!tmpde) {
ret = -ENOMEM;
goto fail;
}
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a21f0e9eecd4..0e5d412c0b01 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1995,7 +1995,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio->bi_bdev = log->bdev;
+ bio_set_dev(bio, log->bdev);
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
@@ -2139,7 +2139,7 @@ static void lbmStartIO(struct lbuf * bp)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio->bi_bdev = log->bdev;
+ bio_set_dev(bio, log->bdev);
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 65120a471729..1c4b9ad4d7ab 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -430,7 +430,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
bio->bi_private = page;
@@ -510,7 +510,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
submit_bio(bio);
bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector =
pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index db5900aaa55a..89d1dc19340b 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -21,6 +21,7 @@
DEFINE_MUTEX(kernfs_mutex);
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
+static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
@@ -507,6 +508,10 @@ void kernfs_put(struct kernfs_node *kn)
struct kernfs_node *parent;
struct kernfs_root *root;
+ /*
+ * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
+ * depends on this to filter reused stale node
+ */
if (!kn || !atomic_dec_and_test(&kn->count))
return;
root = kernfs_root(kn);
@@ -533,7 +538,9 @@ void kernfs_put(struct kernfs_node *kn)
simple_xattrs_free(&kn->iattr->xattrs);
}
kfree(kn->iattr);
- ida_simple_remove(&root->ino_ida, kn->ino);
+ spin_lock(&kernfs_idr_lock);
+ idr_remove(&root->ino_idr, kn->id.ino);
+ spin_unlock(&kernfs_idr_lock);
kmem_cache_free(kernfs_node_cache, kn);
kn = parent;
@@ -542,7 +549,7 @@ void kernfs_put(struct kernfs_node *kn)
goto repeat;
} else {
/* just released the root kn, free @root too */
- ida_destroy(&root->ino_ida);
+ idr_destroy(&root->ino_idr);
kfree(root);
}
}
@@ -559,7 +566,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
if (d_really_is_negative(dentry))
goto out_bad_unlocked;
- kn = dentry->d_fsdata;
+ kn = kernfs_dentry_node(dentry);
mutex_lock(&kernfs_mutex);
/* The kernfs node has been deactivated */
@@ -567,7 +574,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
goto out_bad;
/* The kernfs node has been moved? */
- if (dentry->d_parent->d_fsdata != kn->parent)
+ if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
goto out_bad;
/* The kernfs node has been renamed */
@@ -587,14 +594,8 @@ out_bad_unlocked:
return 0;
}
-static void kernfs_dop_release(struct dentry *dentry)
-{
- kernfs_put(dentry->d_fsdata);
-}
-
const struct dentry_operations kernfs_dops = {
.d_revalidate = kernfs_dop_revalidate,
- .d_release = kernfs_dop_release,
};
/**
@@ -610,8 +611,9 @@ const struct dentry_operations kernfs_dops = {
*/
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{
- if (dentry->d_sb->s_op == &kernfs_sops)
- return dentry->d_fsdata;
+ if (dentry->d_sb->s_op == &kernfs_sops &&
+ !d_really_is_negative(dentry))
+ return kernfs_dentry_node(dentry);
return NULL;
}
@@ -620,6 +622,8 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
unsigned flags)
{
struct kernfs_node *kn;
+ u32 gen;
+ int cursor;
int ret;
name = kstrdup_const(name, GFP_KERNEL);
@@ -630,11 +634,25 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
if (!kn)
goto err_out1;
- ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+ idr_preload(GFP_KERNEL);
+ spin_lock(&kernfs_idr_lock);
+ cursor = idr_get_cursor(&root->ino_idr);
+ ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
+ if (ret >= 0 && ret < cursor)
+ root->next_generation++;
+ gen = root->next_generation;
+ spin_unlock(&kernfs_idr_lock);
+ idr_preload_end();
if (ret < 0)
goto err_out2;
- kn->ino = ret;
+ kn->id.ino = ret;
+ kn->id.generation = gen;
+ /*
+ * set ino first. This barrier is paired with atomic_inc_not_zero in
+ * kernfs_find_and_get_node_by_ino
+ */
+ smp_mb__before_atomic();
atomic_set(&kn->count, 1);
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
@@ -666,6 +684,54 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
return kn;
}
+/*
+ * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
+ * @root: the kernfs root
+ * @ino: inode number
+ *
+ * RETURNS:
+ * NULL on failure. Return a kernfs node with reference counter incremented
+ */
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino)
+{
+ struct kernfs_node *kn;
+
+ rcu_read_lock();
+ kn = idr_find(&root->ino_idr, ino);
+ if (!kn)
+ goto out;
+
+ /*
+ * Since kernfs_node is freed in RCU, it's possible an old node for ino
+ * is freed, but reused before RCU grace period. But a freed node (see
+ * kernfs_put) or an incompletedly initialized node (see
+ * __kernfs_new_node) should have 'count' 0. We can use this fact to
+ * filter out such node.
+ */
+ if (!atomic_inc_not_zero(&kn->count)) {
+ kn = NULL;
+ goto out;
+ }
+
+ /*
+ * The node could be a new node or a reused node. If it's a new node,
+ * we are ok. If it's reused because of RCU (because of
+ * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
+ * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
+ * hence we can use 'ino' to filter stale node.
+ */
+ if (kn->id.ino != ino)
+ goto out;
+ rcu_read_unlock();
+
+ return kn;
+out:
+ rcu_read_unlock();
+ kernfs_put(kn);
+ return NULL;
+}
+
/**
* kernfs_add_one - add kernfs_node to parent without warning
* @kn: kernfs_node to be added
@@ -875,13 +941,14 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
if (!root)
return ERR_PTR(-ENOMEM);
- ida_init(&root->ino_ida);
+ idr_init(&root->ino_idr);
INIT_LIST_HEAD(&root->supers);
+ root->next_generation = 1;
kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
KERNFS_DIR);
if (!kn) {
- ida_destroy(&root->ino_ida);
+ idr_destroy(&root->ino_idr);
kfree(root);
return ERR_PTR(-ENOMEM);
}
@@ -984,7 +1051,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
unsigned int flags)
{
struct dentry *ret;
- struct kernfs_node *parent = dentry->d_parent->d_fsdata;
+ struct kernfs_node *parent = dir->i_private;
struct kernfs_node *kn;
struct inode *inode;
const void *ns = NULL;
@@ -1001,8 +1068,6 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
ret = NULL;
goto out_unlock;
}
- kernfs_get(kn);
- dentry->d_fsdata = kn;
/* attach dentry and inode */
inode = kernfs_get_inode(dir->i_sb, kn);
@@ -1039,7 +1104,7 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
int ret;
@@ -1059,7 +1124,7 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- struct kernfs_node *kn = old_dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
struct kernfs_node *new_parent = new_dir->i_private;
struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
int ret;
@@ -1572,7 +1637,7 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
- struct kernfs_node *parent = dentry->d_fsdata;
+ struct kernfs_node *parent = kernfs_dentry_node(dentry);
struct kernfs_node *pos = file->private_data;
const void *ns = NULL;
@@ -1589,7 +1654,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
const char *name = pos->name;
unsigned int type = dt_type(pos);
int len = strlen(name);
- ino_t ino = pos->ino;
+ ino_t ino = pos->id.ino;
ctx->pos = pos->hash;
file->private_data = pos;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e6c8954a4e89..9698e51656b1 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -616,7 +616,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
static int kernfs_fop_open(struct inode *inode, struct file *file)
{
- struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_root *root = kernfs_root(kn);
const struct kernfs_ops *ops;
struct kernfs_open_file *of;
@@ -768,7 +768,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
- struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_open_file *of = kernfs_of(filp);
if (kn->flags & KERNFS_HAS_RELEASE) {
@@ -835,7 +835,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
{
struct kernfs_open_file *of = kernfs_of(filp);
- struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
struct kernfs_open_node *on = kn->attr.open;
if (!kernfs_get_active(kn))
@@ -895,7 +895,7 @@ repeat:
* have the matching @file available. Look up the inodes
* and generate the events manually.
*/
- inode = ilookup(info->sb, kn->ino);
+ inode = ilookup(info->sb, kn->id.ino);
if (!inode)
continue;
@@ -903,7 +903,7 @@ repeat:
if (parent) {
struct inode *p_inode;
- p_inode = ilookup(info->sb, parent->ino);
+ p_inode = ilookup(info->sb, parent->id.ino);
if (p_inode) {
fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index fb4b4a79a0d6..a34303981deb 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -112,7 +112,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
int error;
if (!kn)
@@ -154,7 +154,7 @@ static int kernfs_node_setsecdata(struct kernfs_iattrs *attrs, void **secdata,
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_iattrs *attrs;
attrs = kernfs_iattrs(kn);
@@ -203,8 +203,8 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- struct kernfs_node *kn = path->dentry->d_fsdata;
struct inode *inode = d_inode(path->dentry);
+ struct kernfs_node *kn = inode->i_private;
mutex_lock(&kernfs_mutex);
kernfs_refresh_inode(kn, inode);
@@ -220,6 +220,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
inode->i_private = kn;
inode->i_mapping->a_ops = &kernfs_aops;
inode->i_op = &kernfs_iops;
+ inode->i_generation = kn->id.generation;
set_default_inode_attr(inode, kn->mode);
kernfs_refresh_inode(kn, inode);
@@ -265,7 +266,7 @@ struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
{
struct inode *inode;
- inode = iget_locked(sb, kn->ino);
+ inode = iget_locked(sb, kn->id.ino);
if (inode && (inode->i_state & I_NEW))
kernfs_init_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144ab4251..0f260dcca177 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -70,6 +70,13 @@ struct kernfs_super_info {
};
#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
+static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
+{
+ if (d_really_is_negative(dentry))
+ return NULL;
+ return d_inode(dentry)->i_private;
+}
+
extern const struct super_operations kernfs_sops;
extern struct kmem_cache *kernfs_node_cache;
@@ -98,6 +105,8 @@ int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
unsigned flags);
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino);
/*
* file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a45be1..95a7c88baed9 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -16,6 +16,7 @@
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/seq_file.h>
+#include <linux/exportfs.h>
#include "kernfs-internal.h"
@@ -33,7 +34,7 @@ static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{
- struct kernfs_root *root = kernfs_root(dentry->d_fsdata);
+ struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->show_options)
@@ -43,7 +44,7 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
{
- struct kernfs_node *node = dentry->d_fsdata;
+ struct kernfs_node *node = kernfs_dentry_node(dentry);
struct kernfs_root *root = kernfs_root(node);
struct kernfs_syscall_ops *scops = root->syscall_ops;
@@ -64,6 +65,78 @@ const struct super_operations kernfs_sops = {
.show_path = kernfs_sop_show_path,
};
+/*
+ * Similar to kernfs_fh_get_inode, this one gets kernfs node from inode
+ * number and generation
+ */
+struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root,
+ const union kernfs_node_id *id)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get_node_by_ino(root, id->ino);
+ if (!kn)
+ return NULL;
+ if (kn->id.generation != id->generation) {
+ kernfs_put(kn);
+ return NULL;
+ }
+ return kn;
+}
+
+static struct inode *kernfs_fh_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
+{
+ struct kernfs_super_info *info = kernfs_info(sb);
+ struct inode *inode;
+ struct kernfs_node *kn;
+
+ if (ino == 0)
+ return ERR_PTR(-ESTALE);
+
+ kn = kernfs_find_and_get_node_by_ino(info->root, ino);
+ if (!kn)
+ return ERR_PTR(-ESTALE);
+ inode = kernfs_get_inode(sb, kn);
+ kernfs_put(kn);
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+
+ if (generation && inode->i_generation != generation) {
+ /* we didn't find the right inode.. */
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+
+static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ kernfs_fh_get_inode);
+}
+
+static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ kernfs_fh_get_inode);
+}
+
+static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
+{
+ struct kernfs_node *kn = kernfs_dentry_node(child);
+
+ return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent));
+}
+
+static const struct export_operations kernfs_export_ops = {
+ .fh_to_dentry = kernfs_fh_to_dentry,
+ .fh_to_parent = kernfs_fh_to_parent,
+ .get_parent = kernfs_get_parent_dentry,
+};
+
/**
* kernfs_root_from_sb - determine kernfs_root associated with a super_block
* @sb: the super_block in question
@@ -159,6 +232,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
sb->s_magic = magic;
sb->s_op = &kernfs_sops;
sb->s_xattr = kernfs_xattr_handlers;
+ if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
+ sb->s_export_op = &kernfs_export_ops;
sb->s_time_gran = 1;
/* get root inode, initialize and unlock it */
@@ -176,8 +251,6 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
pr_debug("%s: could not get root dentry!\n", __func__);
return -ENOMEM;
}
- kernfs_get(info->root->kn);
- root->d_fsdata = info->root->kn;
sb->s_root = root;
sb->s_d_op = &kernfs_dops;
return 0;
@@ -283,7 +356,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
void kernfs_kill_sb(struct super_block *sb)
{
struct kernfs_super_info *info = kernfs_info(sb);
- struct kernfs_node *root_kn = sb->s_root->d_fsdata;
mutex_lock(&kernfs_mutex);
list_del(&info->node);
@@ -295,7 +367,6 @@ void kernfs_kill_sb(struct super_block *sb)
*/
kill_anon_super(sb);
kfree(info);
- kernfs_put(root_kn);
}
/**
@@ -330,7 +401,16 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
void __init kernfs_init(void)
{
+
+ /*
+ * the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
+ * can access the slab lock free. This could introduce stale nodes,
+ * please see how kernfs_find_and_get_node_by_ino filters out stale
+ * nodes.
+ */
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
sizeof(struct kernfs_node),
- 0, SLAB_PANIC, NULL);
+ 0,
+ SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
+ NULL);
}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 1684af4a8b9b..08ccabd7047f 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -98,9 +98,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
return 0;
}
-static int kernfs_getlink(struct dentry *dentry, char *path)
+static int kernfs_getlink(struct inode *inode, char *path)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_node *parent = kn->parent;
struct kernfs_node *target = kn->symlink.target_kn;
int error;
@@ -124,7 +124,7 @@ static const char *kernfs_iop_get_link(struct dentry *dentry,
body = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!body)
return ERR_PTR(-ENOMEM);
- error = kernfs_getlink(dentry, body);
+ error = kernfs_getlink(inode, body);
if (unlikely(error < 0)) {
kfree(body);
return ERR_PTR(error);
diff --git a/fs/mpage.c b/fs/mpage.c
index 2e4c41ccb5c9..37bb77c1302c 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -83,7 +83,7 @@ mpage_alloc(struct block_device *bdev,
}
if (bio) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_sector;
}
return bio;
diff --git a/fs/namei.c b/fs/namei.c
index ddb6a7c2b3d4..1180f9c58093 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
- LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
- path->dentry->d_inode)
- return -EISDIR;
+ LOOKUP_OPEN | LOOKUP_CREATE |
+ LOOKUP_AUTOMOUNT))) {
+ /* Positive dentry that isn't meant to trigger an
+ * automount, EISDIR will allow it to be used,
+ * otherwise there's no mount here "now" so return
+ * ENOENT.
+ */
+ if (path->dentry->d_inode)
+ return -EISDIR;
+ else
+ return -ENOENT;
+ }
if (path->dentry->d_sb->s_user_ns != &init_user_ns)
return -EACCES;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d8863a804b15..995d707537da 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -130,7 +130,7 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
if (bio) {
bio->bi_iter.bi_sector = disk_sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_end_io = end_io;
bio->bi_private = par;
}
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
index 777b055063f6..3025fe8584a0 100644
--- a/fs/nfs/fscache-index.c
+++ b/fs/nfs/fscache-index.c
@@ -252,45 +252,6 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
}
/*
- * Indication from FS-Cache that the cookie is no longer cached
- * - This function is called when the backing store currently caching a cookie
- * is removed
- * - The netfs should use this to clean up any markers indicating cached pages
- * - This is mandatory for any object that may have data
- */
-static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data)
-{
- struct nfs_inode *nfsi = cookie_netfs_data;
- struct pagevec pvec;
- pgoff_t first;
- int loop, nr_pages;
-
- pagevec_init(&pvec, 0);
- first = 0;
-
- dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi);
-
- for (;;) {
- /* grab a bunch of pages to unmark */
- nr_pages = pagevec_lookup(&pvec,
- nfsi->vfs_inode.i_mapping,
- first,
- PAGEVEC_SIZE - pagevec_count(&pvec));
- if (!nr_pages)
- break;
-
- for (loop = 0; loop < nr_pages; loop++)
- ClearPageFsCache(pvec.pages[loop]);
-
- first = pvec.pages[nr_pages - 1]->index + 1;
-
- pvec.nr = nr_pages;
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
-/*
* Get an extra reference on a read context.
* - This function can be absent if the completion function doesn't require a
* context.
@@ -330,7 +291,6 @@ const struct fscache_cookie_def nfs_fscache_inode_object_def = {
.get_attr = nfs_fscache_inode_get_attr,
.get_aux = nfs_fscache_inode_get_aux,
.check_aux = nfs_fscache_inode_check_aux,
- .now_uncached = nfs_fscache_inode_now_uncached,
.get_context = nfs_fh_get_context,
.put_context = nfs_fh_put_context,
};
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index f11a3ad2df0c..8616c46d33da 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -312,10 +312,9 @@ void nilfs_copy_back_pages(struct address_space *dmap,
pagevec_init(&pvec, 0);
repeat:
- n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
+ n = pagevec_lookup(&pvec, smap, &index);
if (!n)
return;
- index = pvec.pages[n - 1]->index + 1;
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i], *dpage;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index e73c86d9855c..6c5009cc4e6f 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -400,7 +400,7 @@ static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
bio = bio_alloc(GFP_NOIO, nr_vecs);
}
if (likely(bio)) {
- bio->bi_bdev = nilfs->ns_bdev;
+ bio_set_dev(bio, nilfs->ns_bdev);
bio->bi_iter.bi_sector =
start << (nilfs->ns_blocksize_bits - 9);
}
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 2430a0415995..cba328315929 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -133,7 +133,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
kmem_cache_free(dnotify_mark_cache, dn_mark);
}
-static struct fsnotify_ops dnotify_fsnotify_ops = {
+static const struct fsnotify_ops dnotify_fsnotify_ops = {
.handle_event = dnotify_handle_event,
.free_mark = dnotify_free_mark,
};
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e50a387959bf..40b5cc97f7b0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -221,7 +221,7 @@ out:
/*
* Set the access or default ACL of an inode.
*/
-int ocfs2_set_acl(handle_t *handle,
+static int ocfs2_set_acl(handle_t *handle,
struct inode *inode,
struct buffer_head *di_bh,
int type,
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 2783a75b3999..7be0bb756286 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -28,13 +28,6 @@ struct ocfs2_acl_entry {
struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type);
int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-int ocfs2_set_acl(handle_t *handle,
- struct inode *inode,
- struct buffer_head *di_bh,
- int type,
- struct posix_acl *acl,
- struct ocfs2_alloc_context *meta_ac,
- struct ocfs2_alloc_context *data_ac);
extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index fb15a96df0b6..a177eae3aa1a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -955,8 +955,7 @@ int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
/*
* How many free extents have we got before we need more meta data?
*/
-int ocfs2_num_free_extents(struct ocfs2_super *osb,
- struct ocfs2_extent_tree *et)
+int ocfs2_num_free_extents(struct ocfs2_extent_tree *et)
{
int retval;
struct ocfs2_extent_list *el = NULL;
@@ -1933,14 +1932,12 @@ out:
* the new changes.
*
* left_rec: the record on the left.
- * left_child_el: is the child list pointed to by left_rec
* right_rec: the record to the right of left_rec
* right_child_el: is the child list pointed to by right_rec
*
* By definition, this only works on interior nodes.
*/
static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
- struct ocfs2_extent_list *left_child_el,
struct ocfs2_extent_rec *right_rec,
struct ocfs2_extent_list *right_child_el)
{
@@ -2003,7 +2000,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
*/
BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1));
- ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el,
+ ocfs2_adjust_adjacent_records(&root_el->l_recs[i],
&root_el->l_recs[i + 1], right_el);
}
@@ -2060,8 +2057,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
el = right_path->p_node[i].el;
right_rec = &el->l_recs[0];
- ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
- right_el);
+ ocfs2_adjust_adjacent_records(left_rec, right_rec, right_el);
ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
@@ -2509,7 +2505,7 @@ out_ret_path:
static int ocfs2_update_edge_lengths(handle_t *handle,
struct ocfs2_extent_tree *et,
- int subtree_index, struct ocfs2_path *path)
+ struct ocfs2_path *path)
{
int i, idx, ret;
struct ocfs2_extent_rec *rec;
@@ -2755,8 +2751,7 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
if (del_right_subtree) {
ocfs2_unlink_subtree(handle, et, left_path, right_path,
subtree_index, dealloc);
- ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
- left_path);
+ ret = ocfs2_update_edge_lengths(handle, et, left_path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3060,8 +3055,7 @@ static int ocfs2_remove_rightmost_path(handle_t *handle,
ocfs2_unlink_subtree(handle, et, left_path, path,
subtree_index, dealloc);
- ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
- left_path);
+ ret = ocfs2_update_edge_lengths(handle, et, left_path);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4790,7 +4784,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
if (mark_unwritten)
flags = OCFS2_EXT_UNWRITTEN;
- free_extents = ocfs2_num_free_extents(osb, et);
+ free_extents = ocfs2_num_free_extents(et);
if (free_extents < 0) {
status = free_extents;
mlog_errno(status);
@@ -5668,7 +5662,7 @@ static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
*ac = NULL;
- num_free_extents = ocfs2_num_free_extents(osb, et);
+ num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 4a5152ec88a3..27b75cf32cfa 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -144,8 +144,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_cached_dealloc_ctxt *dealloc,
u64 refcount_loc, bool refcount_tree_locked);
-int ocfs2_num_free_extents(struct ocfs2_super *osb,
- struct ocfs2_extent_tree *et);
+int ocfs2_num_free_extents(struct ocfs2_extent_tree *et);
/*
* how many new metadata chunks would an allocation need at maximum?
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index ffe003982d95..d0206042d068 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -505,8 +505,7 @@ static inline void o2hb_bio_wait_dec(struct o2hb_bio_wait_ctxt *wc,
}
}
-static void o2hb_wait_on_io(struct o2hb_region *reg,
- struct o2hb_bio_wait_ctxt *wc)
+static void o2hb_wait_on_io(struct o2hb_bio_wait_ctxt *wc)
{
o2hb_bio_wait_dec(wc, 1);
wait_for_completion(&wc->wc_io_complete);
@@ -554,7 +553,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
/* Must put everything in 512 byte sectors for the bio... */
bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
- bio->bi_bdev = reg->hr_bdev;
+ bio_set_dev(bio, reg->hr_bdev);
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
bio_set_op_attrs(bio, op, op_flags);
@@ -608,7 +607,7 @@ static int o2hb_read_slots(struct o2hb_region *reg,
status = 0;
bail_and_wait:
- o2hb_wait_on_io(reg, &wc);
+ o2hb_wait_on_io(&wc);
if (wc.wc_error && !status)
status = wc.wc_error;
@@ -1162,7 +1161,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* before we can go to steady state. This ensures that
* people we find in our steady state have seen us.
*/
- o2hb_wait_on_io(reg, &write_wc);
+ o2hb_wait_on_io(&write_wc);
if (write_wc.wc_error) {
/* Do not re-arm the write timeout on I/O error - we
* can't be sure that the new block ever made it to
@@ -1275,7 +1274,7 @@ static int o2hb_thread(void *data)
o2hb_prepare_block(reg, 0);
ret = o2hb_issue_node_write(reg, &write_wc);
if (ret == 0)
- o2hb_wait_on_io(reg, &write_wc);
+ o2hb_wait_on_io(&write_wc);
else
mlog_errno(ret);
}
@@ -2576,22 +2575,6 @@ void o2hb_unregister_callback(const char *region_uuid,
}
EXPORT_SYMBOL_GPL(o2hb_unregister_callback);
-int o2hb_check_node_heartbeating(u8 node_num)
-{
- unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
-
- o2hb_fill_node_map(testing_map, sizeof(testing_map));
- if (!test_bit(node_num, testing_map)) {
- mlog(ML_HEARTBEAT,
- "node (%u) does not have heartbeating enabled.\n",
- node_num);
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating);
-
int o2hb_check_node_heartbeating_no_sem(u8 node_num)
{
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
@@ -2626,23 +2609,6 @@ int o2hb_check_node_heartbeating_from_callback(u8 node_num)
}
EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_from_callback);
-/* Makes sure our local node is configured with a node number, and is
- * heartbeating. */
-int o2hb_check_local_node_heartbeating(void)
-{
- u8 node_num;
-
- /* if this node was set then we have networking */
- node_num = o2nm_this_node();
- if (node_num == O2NM_MAX_NODES) {
- mlog(ML_HEARTBEAT, "this node has not been configured.\n");
- return 0;
- }
-
- return o2hb_check_node_heartbeating(node_num);
-}
-EXPORT_SYMBOL_GPL(o2hb_check_local_node_heartbeating);
-
/*
* this is just a hack until we get the plumbing which flips file systems
* read only and drops the hb ref instead of killing the node dead.
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3ecb9f337b7d..febe6312ceff 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3249,7 +3249,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
spin_unlock(&OCFS2_I(dir)->ip_lock);
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
parent_fe_bh);
- num_free_extents = ocfs2_num_free_extents(osb, &et);
+ num_free_extents = ocfs2_num_free_extents(&et);
if (num_free_extents < 0) {
status = num_free_extents;
mlog_errno(status);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 66e59d3163ea..6e41fc8fabbe 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -713,13 +713,6 @@ leave:
return status;
}
-int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
- u32 clusters_to_add, int mark_unwritten)
-{
- return __ocfs2_extend_allocation(inode, logical_start,
- clusters_to_add, mark_unwritten);
-}
-
/*
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index d5e5fa7f0743..36304434eacf 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1348,7 +1348,6 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
ocfs2_schedule_truncate_log_flush(osb, 0);
osb->local_alloc_copy = NULL;
- osb->dirty = 0;
/* queue to recover orphan slots for all offline slots */
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index e52a2852d50d..7eb3b0a6347e 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -175,7 +175,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- num_free_extents = ocfs2_num_free_extents(osb, et);
+ num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 0c39d71c67a1..9a50f222ac97 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -320,7 +320,6 @@ struct ocfs2_super
u64 system_dir_blkno;
u64 bitmap_blkno;
u32 bitmap_cpg;
- u8 *uuid;
char *uuid_str;
u32 uuid_hash;
u8 *vol_label;
@@ -388,9 +387,8 @@ struct ocfs2_super
unsigned int osb_resv_level;
unsigned int osb_dir_resv_level;
- /* Next three fields are for local node slot recovery during
+ /* Next two fields are for local node slot recovery during
* mount. */
- int dirty;
struct ocfs2_dinode *local_alloc_copy;
struct ocfs2_quota_recovery *quota_rec;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index cec495a921e3..c94b6baaa551 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -33,7 +33,7 @@
* Locking of quotas with OCFS2 is rather complex. Here are rules that
* should be obeyed by all the functions:
* - any write of quota structure (either to local or global file) is protected
- * by dqio_mutex or dquot->dq_lock.
+ * by dqio_sem or dquot->dq_lock.
* - any modification of global quota file holds inode cluster lock, i_mutex,
* and ip_alloc_sem of the global quota file (achieved by
* ocfs2_lock_global_qf). It also has to hold qinfo_lock.
@@ -42,9 +42,9 @@
*
* A rough sketch of locking dependencies (lf = local file, gf = global file):
* Normal filesystem operation:
- * start_trans -> dqio_mutex -> write to lf
+ * start_trans -> dqio_sem -> write to lf
* Syncing of local and global file:
- * ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
+ * ocfs2_lock_global_qf -> start_trans -> dqio_sem -> qinfo_lock ->
* write to gf
* -> write to lf
* Acquire dquot for the first time:
@@ -60,7 +60,7 @@
* Recovery:
* inode cluster lock of recovered lf
* -> read bitmaps -> ip_alloc_sem of lf
- * -> ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
+ * -> ocfs2_lock_global_qf -> start_trans -> dqio_sem -> qinfo_lock ->
* write to gf
*/
@@ -443,13 +443,17 @@ static int __ocfs2_global_write_info(struct super_block *sb, int type)
int ocfs2_global_write_info(struct super_block *sb, int type)
{
int err;
- struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct ocfs2_mem_dqinfo *info = dqopt->info[type].dqi_priv;
+ down_write(&dqopt->dqio_sem);
err = ocfs2_qinfo_lock(info, 1);
if (err < 0)
- return err;
+ goto out_sem;
err = __ocfs2_global_write_info(sb, type);
ocfs2_qinfo_unlock(info, 1);
+out_sem:
+ up_write(&dqopt->dqio_sem);
return err;
}
@@ -500,7 +504,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
/* Update space and inode usage. Get also other information from
* global quota file so that we don't overwrite any changes there.
* We are */
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
spacechange = dquot->dq_dqb.dqb_curspace -
OCFS2_DQUOT(dquot)->dq_origspace;
inodechange = dquot->dq_dqb.dqb_curinodes -
@@ -556,7 +560,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
__clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
err = ocfs2_qinfo_lock(info, freeing);
if (err < 0) {
mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
@@ -611,7 +615,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
mlog_errno(status);
goto out_ilock;
}
- mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ down_write(&sb_dqopt(sb)->dqio_sem);
status = ocfs2_sync_dquot(dquot);
if (status < 0)
mlog_errno(status);
@@ -619,7 +623,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
status = ocfs2_local_write_dquot(dquot);
if (status < 0)
mlog_errno(status);
- mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out_ilock:
ocfs2_unlock_global_qf(oinfo, 1);
@@ -666,9 +670,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
mlog_errno(status);
goto out;
}
- mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
+ down_write(&sb_dqopt(dquot->dq_sb)->dqio_sem);
status = ocfs2_local_write_dquot(dquot);
- mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
+ up_write(&sb_dqopt(dquot->dq_sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out:
return status;
@@ -920,10 +924,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* In case user set some limits, sync dquot immediately to global
* quota file so that information propagates quicker */
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
if (dquot->dq_flags & mask)
sync = 1;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
/* This is a slight hack but we can't afford getting global quota
* lock if we already have a transaction started. */
if (!sync || journal_current_handle()) {
@@ -939,7 +943,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
mlog_errno(status);
goto out_ilock;
}
- mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ down_write(&sb_dqopt(sb)->dqio_sem);
status = ocfs2_sync_dquot(dquot);
if (status < 0) {
mlog_errno(status);
@@ -948,7 +952,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* Now write updated local dquot structure */
status = ocfs2_local_write_dquot(dquot);
out_dlock:
- mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out_ilock:
ocfs2_unlock_global_qf(oinfo, 1);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 32c5a40c1257..aa700fd10610 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -520,8 +520,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
mlog_errno(status);
goto out_drop_lock;
}
- mutex_lock(&sb_dqopt(sb)->dqio_mutex);
- spin_lock(&dq_data_lock);
+ down_write(&sb_dqopt(sb)->dqio_sem);
+ spin_lock(&dquot->dq_dqb_lock);
/* Add usage from quota entry into quota changes
* of our node. Auxiliary variables are important
* due to signedness */
@@ -529,7 +529,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
inodechange = le64_to_cpu(dqblk->dqb_inodemod);
dquot->dq_dqb.dqb_curspace += spacechange;
dquot->dq_dqb.dqb_curinodes += inodechange;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
/* We want to drop reference held by the crashed
* node. Since we have our own reference we know
* global structure actually won't be freed. */
@@ -553,7 +553,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
unlock_buffer(qbh);
ocfs2_journal_dirty(handle, qbh);
out_commit:
- mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(OCFS2_SB(sb), handle);
out_drop_lock:
ocfs2_unlock_global_qf(oinfo, 1);
@@ -691,9 +691,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
struct ocfs2_quota_recovery *rec;
int locked = 0;
- /* We don't need the lock and we have to acquire quota file locks
- * which will later depend on this lock */
- mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
@@ -772,7 +769,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
goto out_err;
}
- mutex_lock(&sb_dqopt(sb)->dqio_mutex);
return 0;
out_err:
if (oinfo) {
@@ -786,7 +782,6 @@ out_err:
kfree(oinfo);
}
brelse(bh);
- mutex_lock(&sb_dqopt(sb)->dqio_mutex);
return -1;
}
@@ -882,12 +877,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns,
od->dq_dquot.dq_id));
- spin_lock(&dq_data_lock);
+ spin_lock(&od->dq_dquot.dq_dqb_lock);
dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
od->dq_origspace);
dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
od->dq_originodes);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&od->dq_dquot.dq_dqb_lock);
trace_olq_set_dquot(
(unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
(unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index f8933cb53d68..ab156e35ec00 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2851,7 +2851,7 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb,
int *credits)
{
int ret = 0, meta_add = 0;
- int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
+ int num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 6ad3533940ba..71f22c8fbffd 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2700,7 +2700,7 @@ int ocfs2_lock_allocators(struct inode *inode,
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
- num_free_extents = ocfs2_num_free_extents(osb, et);
+ num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 83005f486451..3f936be379a9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2486,7 +2486,6 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
if (dirty) {
/* Recovery will be completed after we've mounted the
* rest of the volume. */
- osb->dirty = 1;
osb->local_alloc_copy = local_alloc;
local_alloc = NULL;
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index f70c3778d600..5fdf269ba82e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6800,7 +6800,7 @@ static int ocfs2_lock_reflink_xattr_rec_allocators(
*credits += 1;
/* count in the xattr tree change. */
- num_free_extents = ocfs2_num_free_extents(osb, xt_et);
+ num_free_extents = ocfs2_num_free_extents(xt_et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 98fd8f6df851..e5d89a0d0b8a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2931,6 +2931,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
+ REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
@@ -3324,6 +3325,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_tid_smaps_operations),
+ REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index e3cda0b5968f..793a67574668 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -40,8 +40,8 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
{
- return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
- subdir_node);
+ return rb_entry_safe(rb_first_cached(&dir->subdir),
+ struct proc_dir_entry, subdir_node);
}
static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@@ -54,7 +54,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
const char *name,
unsigned int len)
{
- struct rb_node *node = dir->subdir.rb_node;
+ struct rb_node *node = dir->subdir.rb_root.rb_node;
while (node) {
struct proc_dir_entry *de = rb_entry(node,
@@ -75,8 +75,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
static bool pde_subdir_insert(struct proc_dir_entry *dir,
struct proc_dir_entry *de)
{
- struct rb_root *root = &dir->subdir;
- struct rb_node **new = &root->rb_node, *parent = NULL;
+ struct rb_root_cached *root = &dir->subdir;
+ struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
+ bool leftmost = true;
/* Figure out where to put new node */
while (*new) {
@@ -88,15 +89,16 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
parent = *new;
if (result < 0)
new = &(*new)->rb_left;
- else if (result > 0)
+ else if (result > 0) {
new = &(*new)->rb_right;
- else
+ leftmost = false;
+ } else
return false;
}
/* Add new node and rebalance tree. */
rb_link_node(&de->subdir_node, parent, new);
- rb_insert_color(&de->subdir_node, root);
+ rb_insert_color_cached(&de->subdir_node, root, leftmost);
return true;
}
@@ -369,7 +371,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
ent->namelen = qstr.len;
ent->mode = mode;
ent->nlink = nlink;
- ent->subdir = RB_ROOT;
+ ent->subdir = RB_ROOT_CACHED;
atomic_set(&ent->count, 1);
spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers);
@@ -499,6 +501,14 @@ out:
}
EXPORT_SYMBOL(proc_create_data);
+struct proc_dir_entry *proc_create(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops)
+{
+ return proc_create_data(name, mode, parent, proc_fops, NULL);
+}
+EXPORT_SYMBOL(proc_create);
+
void proc_set_size(struct proc_dir_entry *de, loff_t size)
{
de->size = size;
@@ -545,7 +555,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = pde_subdir_find(parent, fn, len);
if (de)
- rb_erase(&de->subdir_node, &parent->subdir);
+ rb_erase_cached(&de->subdir_node, &parent->subdir);
write_unlock(&proc_subdir_lock);
if (!de) {
WARN(1, "name '%s'\n", name);
@@ -582,13 +592,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
write_unlock(&proc_subdir_lock);
return -ENOENT;
}
- rb_erase(&root->subdir_node, &parent->subdir);
+ rb_erase_cached(&root->subdir_node, &parent->subdir);
de = root;
while (1) {
next = pde_subdir_first(de);
if (next) {
- rb_erase(&next->subdir_node, &de->subdir);
+ rb_erase_cached(&next->subdir_node, &de->subdir);
de = next;
continue;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa2b89071630..a34195e92b20 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -40,7 +40,7 @@ struct proc_dir_entry {
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
struct proc_dir_entry *parent;
- struct rb_root subdir;
+ struct rb_root_cached subdir;
struct rb_node subdir_node;
void *data;
atomic_t count; /* use count */
@@ -269,10 +269,12 @@ extern int proc_remount(struct super_block *, int *, char *);
/*
* task_[no]mmu.c
*/
+struct mem_size_stats;
struct proc_maps_private {
struct inode *inode;
struct task_struct *task;
struct mm_struct *mm;
+ struct mem_size_stats *rollup;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
@@ -288,6 +290,7 @@ extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
extern const struct file_operations proc_tid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_pid_smaps_rollup_operations;
extern const struct file_operations proc_tid_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 509a61668d90..cdd979724c74 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -80,7 +80,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]);
show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]);
- show_val_kb(m, "Mlocked: ", global_page_state(NR_MLOCK));
+ show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK));
#ifdef CONFIG_HIGHMEM
show_val_kb(m, "HighTotal: ", i.totalhigh);
@@ -114,9 +114,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ",
global_node_page_state(NR_SLAB_UNRECLAIMABLE));
seq_printf(m, "KernelStack: %8lu kB\n",
- global_page_state(NR_KERNEL_STACK_KB));
+ global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
- global_page_state(NR_PAGETABLE));
+ global_zone_page_state(NR_PAGETABLE));
#ifdef CONFIG_QUICKLIST
show_val_kb(m, "Quicklists: ", quicklist_total_size());
#endif
@@ -124,7 +124,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
show_val_kb(m, "Bounce: ",
- global_page_state(NR_BOUNCE));
+ global_zone_page_state(NR_BOUNCE));
show_val_kb(m, "WritebackTmp: ",
global_node_page_state(NR_WRITEBACK_TEMP));
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
@@ -151,7 +151,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_CMA
show_val_kb(m, "CmaTotal: ", totalcma_pages);
show_val_kb(m, "CmaFree: ",
- global_page_state(NR_FREE_CMA_PAGES));
+ global_zone_page_state(NR_FREE_CMA_PAGES));
#endif
hugetlb_report_meminfo(m);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d72fc40241d9..a2bf369c923d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -196,7 +196,7 @@ static __net_init int proc_net_ns_init(struct net *net)
if (!netd)
goto out;
- netd->subdir = RB_ROOT;
+ netd->subdir = RB_ROOT_CACHED;
netd->data = net;
netd->nlink = 2;
netd->namelen = 3;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index deecb397daa3..926fb27f4ca2 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -210,7 +210,7 @@ struct proc_dir_entry proc_root = {
.proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations,
.parent = &proc_root,
- .subdir = RB_ROOT,
+ .subdir = RB_ROOT_CACHED,
.name = "/proc",
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fe8f3265e877..7b40e11ede9b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -253,6 +253,7 @@ static int proc_map_release(struct inode *inode, struct file *file)
if (priv->mm)
mmdrop(priv->mm);
+ kfree(priv->rollup);
return seq_release_private(inode, file);
}
@@ -267,8 +268,7 @@ static int do_maps_open(struct inode *inode, struct file *file,
* Indicate if the VMA is a stack for the given task; for
* /proc/PID/maps that is the stack of the main task.
*/
-static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma)
+static int is_stack(struct vm_area_struct *vma)
{
/*
* We make no effort to guess what a given thread considers to be
@@ -279,12 +279,28 @@ static int is_stack(struct proc_maps_private *priv,
vma->vm_end >= vma->vm_mm->start_stack;
}
+static void show_vma_header_prefix(struct seq_file *m,
+ unsigned long start, unsigned long end,
+ vm_flags_t flags, unsigned long long pgoff,
+ dev_t dev, unsigned long ino)
+{
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+ start,
+ end,
+ flags & VM_READ ? 'r' : '-',
+ flags & VM_WRITE ? 'w' : '-',
+ flags & VM_EXEC ? 'x' : '-',
+ flags & VM_MAYSHARE ? 's' : 'p',
+ pgoff,
+ MAJOR(dev), MINOR(dev), ino);
+}
+
static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
- struct proc_maps_private *priv = m->private;
vm_flags_t flags = vma->vm_flags;
unsigned long ino = 0;
unsigned long long pgoff = 0;
@@ -301,17 +317,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
start = vma->vm_start;
end = vma->vm_end;
-
- seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
- start,
- end,
- flags & VM_READ ? 'r' : '-',
- flags & VM_WRITE ? 'w' : '-',
- flags & VM_EXEC ? 'x' : '-',
- flags & VM_MAYSHARE ? 's' : 'p',
- pgoff,
- MAJOR(dev), MINOR(dev), ino);
+ show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
/*
* Print the dentry name for named mappings, and a
@@ -342,7 +348,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- if (is_stack(priv, vma))
+ if (is_stack(vma))
name = "[stack]";
}
@@ -430,6 +436,7 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
+ bool first;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -443,7 +450,9 @@ struct mem_size_stats {
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
+ unsigned long first_vma_start;
u64 pss;
+ u64 pss_locked;
u64 swap_pss;
bool check_shmem_swap;
};
@@ -538,6 +547,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_private_entry(swpent))
+ page = device_private_entry_to_page(swpent);
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) {
page = find_get_entry(vma->vm_file->f_mapping,
@@ -597,13 +608,14 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
- smaps_pmd_entry(pmd, addr, walk);
+ if (pmd_present(*pmd))
+ smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
- return 0;
+ goto out;
}
if (pmd_trans_unstable(pmd))
- return 0;
+ goto out;
/*
* The mmap_sem held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
@@ -613,6 +625,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
for (; addr != end; pte++, addr += PAGE_SIZE)
smaps_pte_entry(pte, addr, walk);
pte_unmap_unlock(pte - 1, ptl);
+out:
cond_resched();
return 0;
}
@@ -652,6 +665,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_NORESERVE)] = "nr",
[ilog2(VM_HUGETLB)] = "ht",
[ilog2(VM_ARCH_1)] = "ar",
+ [ilog2(VM_WIPEONFORK)] = "wf",
[ilog2(VM_DONTDUMP)] = "dd",
#ifdef CONFIG_MEM_SOFT_DIRTY
[ilog2(VM_SOFTDIRTY)] = "sd",
@@ -700,6 +714,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_private_entry(swpent))
+ page = device_private_entry_to_page(swpent);
}
if (page) {
int mapcount = page_mapcount(page);
@@ -719,18 +735,36 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
static int show_smap(struct seq_file *m, void *v, int is_pid)
{
+ struct proc_maps_private *priv = m->private;
struct vm_area_struct *vma = v;
- struct mem_size_stats mss;
+ struct mem_size_stats mss_stack;
+ struct mem_size_stats *mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
.hugetlb_entry = smaps_hugetlb_range,
#endif
.mm = vma->vm_mm,
- .private = &mss,
};
+ int ret = 0;
+ bool rollup_mode;
+ bool last_vma;
+
+ if (priv->rollup) {
+ rollup_mode = true;
+ mss = priv->rollup;
+ if (mss->first) {
+ mss->first_vma_start = vma->vm_start;
+ mss->first = false;
+ }
+ last_vma = !m_next_vma(priv, vma);
+ } else {
+ rollup_mode = false;
+ memset(&mss_stack, 0, sizeof(mss_stack));
+ mss = &mss_stack;
+ }
- memset(&mss, 0, sizeof mss);
+ smaps_walk.private = mss;
#ifdef CONFIG_SHMEM
if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
@@ -748,9 +782,9 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
!(vma->vm_flags & VM_WRITE)) {
- mss.swap = shmem_swapped;
+ mss->swap = shmem_swapped;
} else {
- mss.check_shmem_swap = true;
+ mss->check_shmem_swap = true;
smaps_walk.pte_hole = smaps_pte_hole;
}
}
@@ -758,54 +792,71 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
/* mmap_sem is held in m_start */
walk_page_vma(vma, &smaps_walk);
+ if (vma->vm_flags & VM_LOCKED)
+ mss->pss_locked += mss->pss;
+
+ if (!rollup_mode) {
+ show_map_vma(m, vma, is_pid);
+ } else if (last_vma) {
+ show_vma_header_prefix(
+ m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+ } else {
+ ret = SEQ_SKIP;
+ }
- show_map_vma(m, vma, is_pid);
-
- seq_printf(m,
- "Size: %8lu kB\n"
- "Rss: %8lu kB\n"
- "Pss: %8lu kB\n"
- "Shared_Clean: %8lu kB\n"
- "Shared_Dirty: %8lu kB\n"
- "Private_Clean: %8lu kB\n"
- "Private_Dirty: %8lu kB\n"
- "Referenced: %8lu kB\n"
- "Anonymous: %8lu kB\n"
- "LazyFree: %8lu kB\n"
- "AnonHugePages: %8lu kB\n"
- "ShmemPmdMapped: %8lu kB\n"
- "Shared_Hugetlb: %8lu kB\n"
- "Private_Hugetlb: %7lu kB\n"
- "Swap: %8lu kB\n"
- "SwapPss: %8lu kB\n"
- "KernelPageSize: %8lu kB\n"
- "MMUPageSize: %8lu kB\n"
- "Locked: %8lu kB\n",
- (vma->vm_end - vma->vm_start) >> 10,
- mss.resident >> 10,
- (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
- mss.shared_clean >> 10,
- mss.shared_dirty >> 10,
- mss.private_clean >> 10,
- mss.private_dirty >> 10,
- mss.referenced >> 10,
- mss.anonymous >> 10,
- mss.lazyfree >> 10,
- mss.anonymous_thp >> 10,
- mss.shmem_thp >> 10,
- mss.shared_hugetlb >> 10,
- mss.private_hugetlb >> 10,
- mss.swap >> 10,
- (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
- vma_kernel_pagesize(vma) >> 10,
- vma_mmu_pagesize(vma) >> 10,
- (vma->vm_flags & VM_LOCKED) ?
- (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
-
- arch_show_smap(m, vma);
- show_smap_vma_flags(m, vma);
+ if (!rollup_mode)
+ seq_printf(m,
+ "Size: %8lu kB\n"
+ "KernelPageSize: %8lu kB\n"
+ "MMUPageSize: %8lu kB\n",
+ (vma->vm_end - vma->vm_start) >> 10,
+ vma_kernel_pagesize(vma) >> 10,
+ vma_mmu_pagesize(vma) >> 10);
+
+
+ if (!rollup_mode || last_vma)
+ seq_printf(m,
+ "Rss: %8lu kB\n"
+ "Pss: %8lu kB\n"
+ "Shared_Clean: %8lu kB\n"
+ "Shared_Dirty: %8lu kB\n"
+ "Private_Clean: %8lu kB\n"
+ "Private_Dirty: %8lu kB\n"
+ "Referenced: %8lu kB\n"
+ "Anonymous: %8lu kB\n"
+ "LazyFree: %8lu kB\n"
+ "AnonHugePages: %8lu kB\n"
+ "ShmemPmdMapped: %8lu kB\n"
+ "Shared_Hugetlb: %8lu kB\n"
+ "Private_Hugetlb: %7lu kB\n"
+ "Swap: %8lu kB\n"
+ "SwapPss: %8lu kB\n"
+ "Locked: %8lu kB\n",
+ mss->resident >> 10,
+ (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
+ mss->shared_clean >> 10,
+ mss->shared_dirty >> 10,
+ mss->private_clean >> 10,
+ mss->private_dirty >> 10,
+ mss->referenced >> 10,
+ mss->anonymous >> 10,
+ mss->lazyfree >> 10,
+ mss->anonymous_thp >> 10,
+ mss->shmem_thp >> 10,
+ mss->shared_hugetlb >> 10,
+ mss->private_hugetlb >> 10,
+ mss->swap >> 10,
+ (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
+ (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+
+ if (!rollup_mode) {
+ arch_show_smap(m, vma);
+ show_smap_vma_flags(m, vma);
+ }
m_cache_vma(m, vma);
- return 0;
+ return ret;
}
static int show_pid_smap(struct seq_file *m, void *v)
@@ -837,6 +888,25 @@ static int pid_smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
+static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ struct proc_maps_private *priv;
+ int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
+
+ if (ret < 0)
+ return ret;
+ seq = file->private_data;
+ priv = seq->private;
+ priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
+ if (!priv->rollup) {
+ proc_map_release(inode, file);
+ return -ENOMEM;
+ }
+ priv->rollup->first = true;
+ return 0;
+}
+
static int tid_smaps_open(struct inode *inode, struct file *file)
{
return do_maps_open(inode, file, &proc_tid_smaps_op);
@@ -849,6 +919,13 @@ const struct file_operations proc_pid_smaps_operations = {
.release = proc_map_release,
};
+const struct file_operations proc_pid_smaps_rollup_operations = {
+ .open = pid_smaps_rollup_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_map_release,
+};
+
const struct file_operations proc_tid_smaps_operations = {
.open = tid_smaps_open,
.read = seq_read,
@@ -904,17 +981,22 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
{
pmd_t pmd = *pmdp;
- /* See comment in change_huge_pmd() */
- pmdp_invalidate(vma, addr, pmdp);
- if (pmd_dirty(*pmdp))
- pmd = pmd_mkdirty(pmd);
- if (pmd_young(*pmdp))
- pmd = pmd_mkyoung(pmd);
-
- pmd = pmd_wrprotect(pmd);
- pmd = pmd_clear_soft_dirty(pmd);
-
- set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ if (pmd_present(pmd)) {
+ /* See comment in change_huge_pmd() */
+ pmdp_invalidate(vma, addr, pmdp);
+ if (pmd_dirty(*pmdp))
+ pmd = pmd_mkdirty(pmd);
+ if (pmd_young(*pmdp))
+ pmd = pmd_mkyoung(pmd);
+
+ pmd = pmd_wrprotect(pmd);
+ pmd = pmd_clear_soft_dirty(pmd);
+
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+ pmd = pmd_swp_clear_soft_dirty(pmd);
+ set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+ }
}
#else
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
@@ -939,6 +1021,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
}
+ if (!pmd_present(*pmd))
+ goto out;
+
page = pmd_page(*pmd);
/* Clear accessed and referenced bits. */
@@ -1181,7 +1266,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
- page = vm_normal_page(vma, addr, pte);
+ page = _vm_normal_page(vma, addr, pte, true);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
@@ -1194,6 +1279,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
+
+ if (is_device_private_entry(entry))
+ page = device_private_entry_to_page(entry);
}
if (page && !PageAnon(page))
@@ -1220,27 +1308,33 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+ struct page *page = NULL;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
- /*
- * Currently pmd for thp is always present because thp
- * can not be swapped-out, migrated, or HWPOISONed
- * (split in such cases instead.)
- * This if-check is just to prepare for future implementation.
- */
if (pmd_present(pmd)) {
- struct page *page = pmd_page(pmd);
-
- if (page_mapcount(page) == 1)
- flags |= PM_MMAP_EXCLUSIVE;
+ page = pmd_page(pmd);
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
}
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+ else if (is_swap_pmd(pmd)) {
+ swp_entry_t entry = pmd_to_swp_entry(pmd);
+
+ frame = swp_type(entry) |
+ (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+ flags |= PM_SWAP;
+ VM_BUG_ON(!is_pmd_migration_entry(pmd));
+ page = migration_entry_to_page(entry);
+ }
+#endif
+
+ if (page && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
for (; addr != end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(frame, flags);
@@ -1673,7 +1767,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else if (is_stack(proc_priv, vma)) {
+ } else if (is_stack(vma)) {
seq_puts(m, " stack");
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 23266694db11..dea90b566a6e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -125,8 +125,7 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static int is_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma)
+static int is_stack(struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
@@ -178,7 +177,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm && is_stack(priv, vma)) {
+ } else if (mm && is_stack(vma)) {
seq_pad(m, ' ');
seq_printf(m, "[stack]");
}
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index fefd22611cf6..d814723fb27d 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,7 +36,6 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
-#include <linux/syslog.h>
#include "internal.h"
@@ -132,18 +131,6 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
.show = pstore_ftrace_seq_show,
};
-static int pstore_check_syslog_permissions(struct pstore_private *ps)
-{
- switch (ps->record->type) {
- case PSTORE_TYPE_DMESG:
- case PSTORE_TYPE_CONSOLE:
- return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
- SYSLOG_FROM_READER);
- default:
- return 0;
- }
-}
-
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -163,10 +150,6 @@ static int pstore_file_open(struct inode *inode, struct file *file)
int err;
const struct seq_operations *sops = NULL;
- err = pstore_check_syslog_permissions(ps);
- if (err)
- return err;
-
if (ps->record->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
@@ -204,11 +187,6 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = d_inode(dentry)->i_private;
struct pstore_record *record = p->record;
- int err;
-
- err = pstore_check_syslog_permissions(p);
- if (err)
- return err;
if (!record->psi->erase)
return -EPERM;
@@ -471,7 +449,7 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
inode = pstore_get_inode(sb);
if (inode) {
- inode->i_mode = S_IFDIR | 0755;
+ inode->i_mode = S_IFDIR | 0750;
inode->i_op = &pstore_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 566e6ef99f07..8381db9db6d9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,16 +82,19 @@
#include <linux/uaccess.h>
/*
- * There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats.
- * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
- * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
- * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
- * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects
- * modifications of quota state (on quotaon and quotaoff) and readers who care
- * about latest values take it as well.
+ * There are five quota SMP locks:
+ * * dq_list_lock protects all lists with quotas and quota formats.
+ * * dquot->dq_dqb_lock protects data from dq_dqb
+ * * inode->i_lock protects inode->i_blocks, i_bytes and also guards
+ * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that
+ * dquot_transfer() can stabilize amount it transfers
+ * * dq_data_lock protects mem_dqinfo structures and modifications of dquot
+ * pointers in the inode
+ * * dq_state_lock protects modifications of quota state (on quotaon and
+ * quotaoff) and readers who care about latest values take it as well.
*
- * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
+ * The spinlock ordering is hence:
+ * dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock,
* dq_list_lock > dq_state_lock
*
* Note that some things (eg. sb pointer, type, id) doesn't change during
@@ -110,17 +113,14 @@
* sure they cannot race with quotaon which first sets S_NOQUOTA flag and
* then drops all pointers to dquots from an inode.
*
- * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
- * from inodes (dquot_alloc_space() and such don't check the dq_lock).
- * Currently dquot is locked only when it is being read to memory (or space for
- * it is being allocated) on the first dqget() and when it is being released on
- * the last dqput(). The allocation and release oparations are serialized by
- * the dq_lock and by checking the use count in dquot_release(). Write
- * operations on dquots don't hold dq_lock as they copy data under dq_data_lock
- * spinlock to internal buffers before writing.
+ * Each dquot has its dq_lock mutex. Dquot is locked when it is being read to
+ * memory (or space for it is being allocated) on the first dqget(), when it is
+ * being written out, and when it is being released on the last dqput(). The
+ * allocation and release operations are serialized by the dq_lock and by
+ * checking the use count in dquot_release().
*
* Lock ordering (including related VFS locks) is the following:
- * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
+ * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_sem
*/
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
@@ -129,6 +129,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
DEFINE_STATIC_SRCU(dquot_srcu);
+static DECLARE_WAIT_QUEUE_HEAD(dquot_ref_wq);
+
void __quota_error(struct super_block *sb, const char *func,
const char *fmt, ...)
{
@@ -247,6 +249,7 @@ struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
static qsize_t inode_get_rsv_space(struct inode *inode);
+static qsize_t __inode_get_rsv_space(struct inode *inode);
static int __dquot_initialize(struct inode *inode, int type);
static inline unsigned int
@@ -342,6 +345,12 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
{
int ret = 1;
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ return 0;
+
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
+ return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);
+
/* If quota is dirty already, we don't have to acquire dq_list_lock */
if (test_bit(DQ_MOD_B, &dquot->dq_flags))
return 1;
@@ -381,18 +390,26 @@ static inline void dqput_all(struct dquot **dquot)
dqput(dquot[cnt]);
}
-/* This function needs dq_list_lock */
static inline int clear_dquot_dirty(struct dquot *dquot)
{
- if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags))
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
+ return test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags);
+
+ spin_lock(&dq_list_lock);
+ if (!test_and_clear_bit(DQ_MOD_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
return 0;
+ }
list_del_init(&dquot->dq_dirty);
+ spin_unlock(&dq_list_lock);
return 1;
}
void mark_info_dirty(struct super_block *sb, int type)
{
- set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags);
+ spin_lock(&dq_data_lock);
+ sb_dqopt(sb)->info[type].dqi_flags |= DQF_INFO_DIRTY;
+ spin_unlock(&dq_data_lock);
}
EXPORT_SYMBOL(mark_info_dirty);
@@ -406,7 +423,6 @@ int dquot_acquire(struct dquot *dquot)
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
mutex_lock(&dquot->dq_lock);
- mutex_lock(&dqopt->dqio_mutex);
if (!test_bit(DQ_READ_B, &dquot->dq_flags))
ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
if (ret < 0)
@@ -436,7 +452,6 @@ int dquot_acquire(struct dquot *dquot)
smp_mb__before_atomic();
set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_iolock:
- mutex_unlock(&dqopt->dqio_mutex);
mutex_unlock(&dquot->dq_lock);
return ret;
}
@@ -450,21 +465,17 @@ int dquot_commit(struct dquot *dquot)
int ret = 0;
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
- mutex_lock(&dqopt->dqio_mutex);
- spin_lock(&dq_list_lock);
- if (!clear_dquot_dirty(dquot)) {
- spin_unlock(&dq_list_lock);
- goto out_sem;
- }
- spin_unlock(&dq_list_lock);
+ mutex_lock(&dquot->dq_lock);
+ if (!clear_dquot_dirty(dquot))
+ goto out_lock;
/* Inactive dquot can be only if there was error during read/init
* => we have better not writing it */
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
else
ret = -EIO;
-out_sem:
- mutex_unlock(&dqopt->dqio_mutex);
+out_lock:
+ mutex_unlock(&dquot->dq_lock);
return ret;
}
EXPORT_SYMBOL(dquot_commit);
@@ -481,7 +492,6 @@ int dquot_release(struct dquot *dquot)
/* Check whether we are not racing with some other dqget() */
if (atomic_read(&dquot->dq_count) > 1)
goto out_dqlock;
- mutex_lock(&dqopt->dqio_mutex);
if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot);
/* Write the info */
@@ -493,7 +503,6 @@ int dquot_release(struct dquot *dquot)
ret = ret2;
}
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
- mutex_unlock(&dqopt->dqio_mutex);
out_dqlock:
mutex_unlock(&dquot->dq_lock);
return ret;
@@ -530,22 +539,18 @@ restart:
continue;
/* Wait for dquot users */
if (atomic_read(&dquot->dq_count)) {
- DEFINE_WAIT(wait);
-
dqgrab(dquot);
- prepare_to_wait(&dquot->dq_wait_unused, &wait,
- TASK_UNINTERRUPTIBLE);
spin_unlock(&dq_list_lock);
- /* Once dqput() wakes us up, we know it's time to free
+ /*
+ * Once dqput() wakes us up, we know it's time to free
* the dquot.
* IMPORTANT: we rely on the fact that there is always
* at most one process waiting for dquot to free.
* Otherwise dq_count would be > 1 and we would never
* wake up.
*/
- if (atomic_read(&dquot->dq_count) > 1)
- schedule();
- finish_wait(&dquot->dq_wait_unused, &wait);
+ wait_event(dquot_ref_wq,
+ atomic_read(&dquot->dq_count) == 1);
dqput(dquot);
/* At this moment dquot() need not exist (it could be
* reclaimed by prune_dqcache(). Hence we must
@@ -629,11 +634,9 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
while (!list_empty(dirty)) {
dquot = list_first_entry(dirty, struct dquot,
dq_dirty);
- /* Dirty and inactive can be only bad dquot... */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
- clear_dquot_dirty(dquot);
- continue;
- }
+
+ WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
+
/* Now we have active dquot from which someone is
* holding reference so we can safely just increase
* use count */
@@ -759,12 +762,12 @@ we_slept:
/* Releasing dquot during quotaoff phase? */
if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) &&
atomic_read(&dquot->dq_count) == 1)
- wake_up(&dquot->dq_wait_unused);
+ wake_up(&dquot_ref_wq);
spin_unlock(&dq_list_lock);
return;
}
/* Need to release dquot? */
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
+ if (dquot_dirty(dquot)) {
spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
ret = dquot->dq_sb->dq_op->write_dquot(dquot);
@@ -776,14 +779,10 @@ we_slept:
* We clear dirty bit anyway, so that we avoid
* infinite loop here
*/
- spin_lock(&dq_list_lock);
clear_dquot_dirty(dquot);
- spin_unlock(&dq_list_lock);
}
goto we_slept;
}
- /* Clear flag in case dquot was inactive (something bad happened) */
- clear_dquot_dirty(dquot);
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
spin_unlock(&dq_list_lock);
dquot->dq_sb->dq_op->release_dquot(dquot);
@@ -818,10 +817,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
INIT_LIST_HEAD(&dquot->dq_inuse);
INIT_HLIST_NODE(&dquot->dq_hash);
INIT_LIST_HEAD(&dquot->dq_dirty);
- init_waitqueue_head(&dquot->dq_wait_unused);
dquot->dq_sb = sb;
dquot->dq_id = make_kqid_invalid(type);
atomic_set(&dquot->dq_count, 1);
+ spin_lock_init(&dquot->dq_dqb_lock);
return dquot;
}
@@ -1079,42 +1078,6 @@ static void drop_dquot_ref(struct super_block *sb, int type)
}
}
-static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
-{
- dquot->dq_dqb.dqb_curinodes += number;
-}
-
-static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
-{
- dquot->dq_dqb.dqb_curspace += number;
-}
-
-static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
-{
- dquot->dq_dqb.dqb_rsvspace += number;
-}
-
-/*
- * Claim reserved quota space
- */
-static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
-{
- if (dquot->dq_dqb.dqb_rsvspace < number) {
- WARN_ON_ONCE(1);
- number = dquot->dq_dqb.dqb_rsvspace;
- }
- dquot->dq_dqb.dqb_curspace += number;
- dquot->dq_dqb.dqb_rsvspace -= number;
-}
-
-static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
-{
- if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
- number = dquot->dq_dqb.dqb_curspace;
- dquot->dq_dqb.dqb_rsvspace += number;
- dquot->dq_dqb.dqb_curspace -= number;
-}
-
static inline
void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
{
@@ -1278,21 +1241,24 @@ static int ignore_hardlimit(struct dquot *dquot)
!(info->dqi_flags & DQF_ROOT_SQUASH));
}
-/* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, qsize_t inodes,
- struct dquot_warn *warn)
+static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes,
+ struct dquot_warn *warn)
{
- qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
+ qsize_t newinodes;
+ int ret = 0;
+ spin_lock(&dquot->dq_dqb_lock);
+ newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
- return 0;
+ goto add;
if (dquot->dq_dqb.dqb_ihardlimit &&
newinodes > dquot->dq_dqb.dqb_ihardlimit &&
!ignore_hardlimit(dquot)) {
prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
- return -EDQUOT;
+ ret = -EDQUOT;
+ goto out;
}
if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1301,7 +1267,8 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime &&
!ignore_hardlimit(dquot)) {
prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
- return -EDQUOT;
+ ret = -EDQUOT;
+ goto out;
}
if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1311,30 +1278,40 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() +
sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
}
+add:
+ dquot->dq_dqb.dqb_curinodes = newinodes;
- return 0;
+out:
+ spin_unlock(&dquot->dq_dqb_lock);
+ return ret;
}
-/* needs dq_data_lock */
-static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
- struct dquot_warn *warn)
+static int dquot_add_space(struct dquot *dquot, qsize_t space,
+ qsize_t rsv_space, unsigned int flags,
+ struct dquot_warn *warn)
{
qsize_t tspace;
struct super_block *sb = dquot->dq_sb;
+ int ret = 0;
+ spin_lock(&dquot->dq_dqb_lock);
if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags))
- return 0;
+ goto add;
tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
- + space;
+ + space + rsv_space;
+
+ if (flags & DQUOT_SPACE_NOFAIL)
+ goto add;
if (dquot->dq_dqb.dqb_bhardlimit &&
tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
- if (!prealloc)
+ if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
- return -EDQUOT;
+ ret = -EDQUOT;
+ goto out;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1342,28 +1319,34 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
dquot->dq_dqb.dqb_btime &&
ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
- if (!prealloc)
+ if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
- return -EDQUOT;
+ ret = -EDQUOT;
+ goto out;
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
tspace > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
- if (!prealloc) {
+ if (flags & DQUOT_SPACE_WARN) {
prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() +
sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
- }
- else
+ } else {
/*
* We don't allow preallocation to exceed softlimit so exceeding will
* be always printed
*/
- return -EDQUOT;
+ ret = -EDQUOT;
+ goto out;
+ }
}
-
- return 0;
+add:
+ dquot->dq_dqb.dqb_rsvspace += rsv_space;
+ dquot->dq_dqb.dqb_curspace += space;
+out:
+ spin_unlock(&dquot->dq_dqb_lock);
+ return ret;
}
static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@@ -1502,8 +1485,15 @@ static int __dquot_initialize(struct inode *inode, int type)
* did a write before quota was turned on
*/
rsv = inode_get_rsv_space(inode);
- if (unlikely(rsv))
- dquot_resv_space(dquots[cnt], rsv);
+ if (unlikely(rsv)) {
+ spin_lock(&inode->i_lock);
+ /* Get reservation again under proper lock */
+ rsv = __inode_get_rsv_space(inode);
+ spin_lock(&dquots[cnt]->dq_dqb_lock);
+ dquots[cnt]->dq_dqb.dqb_rsvspace += rsv;
+ spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ spin_unlock(&inode->i_lock);
+ }
}
}
out_lock:
@@ -1598,39 +1588,12 @@ static qsize_t *inode_reserved_space(struct inode * inode)
return inode->i_sb->dq_op->get_reserved_space(inode);
}
-void inode_add_rsv_space(struct inode *inode, qsize_t number)
-{
- spin_lock(&inode->i_lock);
- *inode_reserved_space(inode) += number;
- spin_unlock(&inode->i_lock);
-}
-EXPORT_SYMBOL(inode_add_rsv_space);
-
-void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+static qsize_t __inode_get_rsv_space(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- *inode_reserved_space(inode) -= number;
- __inode_add_bytes(inode, number);
- spin_unlock(&inode->i_lock);
-}
-EXPORT_SYMBOL(inode_claim_rsv_space);
-
-void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
-{
- spin_lock(&inode->i_lock);
- *inode_reserved_space(inode) += number;
- __inode_sub_bytes(inode, number);
- spin_unlock(&inode->i_lock);
-}
-EXPORT_SYMBOL(inode_reclaim_rsv_space);
-
-void inode_sub_rsv_space(struct inode *inode, qsize_t number)
-{
- spin_lock(&inode->i_lock);
- *inode_reserved_space(inode) -= number;
- spin_unlock(&inode->i_lock);
+ if (!inode->i_sb->dq_op->get_reserved_space)
+ return 0;
+ return *inode_reserved_space(inode);
}
-EXPORT_SYMBOL(inode_sub_rsv_space);
static qsize_t inode_get_rsv_space(struct inode *inode)
{
@@ -1639,28 +1602,11 @@ static qsize_t inode_get_rsv_space(struct inode *inode)
if (!inode->i_sb->dq_op->get_reserved_space)
return 0;
spin_lock(&inode->i_lock);
- ret = *inode_reserved_space(inode);
+ ret = __inode_get_rsv_space(inode);
spin_unlock(&inode->i_lock);
return ret;
}
-static void inode_incr_space(struct inode *inode, qsize_t number,
- int reserve)
-{
- if (reserve)
- inode_add_rsv_space(inode, number);
- else
- inode_add_bytes(inode, number);
-}
-
-static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
-{
- if (reserve)
- inode_sub_rsv_space(inode, number);
- else
- inode_sub_bytes(inode, number);
-}
-
/*
* This functions updates i_blocks+i_bytes fields and quota information
* (together with appropriate checks).
@@ -1682,7 +1628,13 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
struct dquot **dquots;
if (!dquot_active(inode)) {
- inode_incr_space(inode, number, reserve);
+ if (reserve) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ spin_unlock(&inode->i_lock);
+ } else {
+ inode_add_bytes(inode, number);
+ }
goto out;
}
@@ -1691,27 +1643,41 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
continue;
- ret = check_bdq(dquots[cnt], number,
- !(flags & DQUOT_SPACE_WARN), &warn[cnt]);
- if (ret && !(flags & DQUOT_SPACE_NOFAIL)) {
- spin_unlock(&dq_data_lock);
+ if (flags & DQUOT_SPACE_RESERVE) {
+ ret = dquot_add_space(dquots[cnt], 0, number, flags,
+ &warn[cnt]);
+ } else {
+ ret = dquot_add_space(dquots[cnt], number, 0, flags,
+ &warn[cnt]);
+ }
+ if (ret) {
+ /* Back out changes we already did */
+ for (cnt--; cnt >= 0; cnt--) {
+ if (!dquots[cnt])
+ continue;
+ spin_lock(&dquots[cnt]->dq_dqb_lock);
+ if (flags & DQUOT_SPACE_RESERVE) {
+ dquots[cnt]->dq_dqb.dqb_rsvspace -=
+ number;
+ } else {
+ dquots[cnt]->dq_dqb.dqb_curspace -=
+ number;
+ }
+ spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ }
+ spin_unlock(&inode->i_lock);
goto out_flush_warn;
}
}
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
- continue;
- if (reserve)
- dquot_resv_space(dquots[cnt], number);
- else
- dquot_incr_space(dquots[cnt], number);
- }
- inode_incr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
+ if (reserve)
+ *inode_reserved_space(inode) += number;
+ else
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
if (reserve)
goto out_flush_warn;
@@ -1740,23 +1706,26 @@ int dquot_alloc_inode(struct inode *inode)
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
continue;
- ret = check_idq(dquots[cnt], 1, &warn[cnt]);
- if (ret)
+ ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]);
+ if (ret) {
+ for (cnt--; cnt >= 0; cnt--) {
+ if (!dquots[cnt])
+ continue;
+ /* Back out changes we already did */
+ spin_lock(&dquots[cnt]->dq_dqb_lock);
+ dquots[cnt]->dq_dqb.dqb_curinodes--;
+ spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ }
goto warn_put_all;
- }
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
- continue;
- dquot_incr_inodes(dquots[cnt], 1);
+ }
}
warn_put_all:
- spin_unlock(&dq_data_lock);
+ spin_unlock(&inode->i_lock);
if (ret == 0)
mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
@@ -1774,21 +1743,33 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
int cnt, index;
if (!dquot_active(inode)) {
- inode_claim_rsv_space(inode, number);
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
return 0;
}
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt])
- dquot_claim_reserved_space(dquots[cnt], number);
+ if (dquots[cnt]) {
+ struct dquot *dquot = dquots[cnt];
+
+ spin_lock(&dquot->dq_dqb_lock);
+ if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
+ number = dquot->dq_dqb.dqb_rsvspace;
+ dquot->dq_dqb.dqb_curspace += number;
+ dquot->dq_dqb.dqb_rsvspace -= number;
+ spin_unlock(&dquot->dq_dqb_lock);
+ }
}
/* Update inode bytes */
- inode_claim_rsv_space(inode, number);
- spin_unlock(&dq_data_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
return 0;
@@ -1804,21 +1785,33 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
int cnt, index;
if (!dquot_active(inode)) {
- inode_reclaim_rsv_space(inode, number);
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ __inode_sub_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
return;
}
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt])
- dquot_reclaim_reserved_space(dquots[cnt], number);
+ if (dquots[cnt]) {
+ struct dquot *dquot = dquots[cnt];
+
+ spin_lock(&dquot->dq_dqb_lock);
+ if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
+ number = dquot->dq_dqb.dqb_curspace;
+ dquot->dq_dqb.dqb_rsvspace += number;
+ dquot->dq_dqb.dqb_curspace -= number;
+ spin_unlock(&dquot->dq_dqb_lock);
+ }
}
/* Update inode bytes */
- inode_reclaim_rsv_space(inode, number);
- spin_unlock(&dq_data_lock);
+ *inode_reserved_space(inode) += number;
+ __inode_sub_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
return;
@@ -1836,19 +1829,26 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
int reserve = flags & DQUOT_SPACE_RESERVE, index;
if (!dquot_active(inode)) {
- inode_decr_space(inode, number, reserve);
+ if (reserve) {
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ spin_unlock(&inode->i_lock);
+ } else {
+ inode_sub_bytes(inode, number);
+ }
return;
}
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN;
if (!dquots[cnt])
continue;
+ spin_lock(&dquots[cnt]->dq_dqb_lock);
wtype = info_bdq_free(dquots[cnt], number);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn[cnt], dquots[cnt], wtype);
@@ -1856,9 +1856,13 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
dquot_free_reserved_space(dquots[cnt], number);
else
dquot_decr_space(dquots[cnt], number);
+ spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
- inode_decr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
+ if (reserve)
+ *inode_reserved_space(inode) -= number;
+ else
+ __inode_sub_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
if (reserve)
goto out_unlock;
@@ -1884,19 +1888,21 @@ void dquot_free_inode(struct inode *inode)
dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
- spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN;
if (!dquots[cnt])
continue;
+ spin_lock(&dquots[cnt]->dq_dqb_lock);
wtype = info_idq_free(dquots[cnt], 1);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn[cnt], dquots[cnt], wtype);
dquot_decr_inodes(dquots[cnt], 1);
+ spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn);
@@ -1917,7 +1923,7 @@ EXPORT_SYMBOL(dquot_free_inode);
*/
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
- qsize_t space, cur_space;
+ qsize_t cur_space;
qsize_t rsv_space = 0;
qsize_t inode_usage = 1;
struct dquot *transfer_from[MAXQUOTAS] = {};
@@ -1944,14 +1950,18 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
}
spin_lock(&dq_data_lock);
+ spin_lock(&inode->i_lock);
if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
+ spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
return 0;
}
- cur_space = inode_get_bytes(inode);
- rsv_space = inode_get_rsv_space(inode);
- space = cur_space + rsv_space;
- /* Build the transfer_from list and check the limits */
+ cur_space = __inode_get_bytes(inode);
+ rsv_space = __inode_get_rsv_space(inode);
+ /*
+ * Build the transfer_from list, check limits, and update usage in
+ * the target structures.
+ */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
/*
* Skip changes for same uid or gid or for turned off quota-type.
@@ -1963,28 +1973,33 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue;
is_valid[cnt] = 1;
transfer_from[cnt] = i_dquot(inode)[cnt];
- ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]);
+ ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
+ &warn_to[cnt]);
if (ret)
goto over_quota;
- ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]);
- if (ret)
+ ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
+ &warn_to[cnt]);
+ if (ret) {
+ dquot_decr_inodes(transfer_to[cnt], inode_usage);
goto over_quota;
+ }
}
- /*
- * Finally perform the needed transfer from transfer_from to transfer_to
- */
+ /* Decrease usage for source structures and update quota pointers */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!is_valid[cnt])
continue;
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
int wtype;
+
+ spin_lock(&transfer_from[cnt]->dq_dqb_lock);
wtype = info_idq_free(transfer_from[cnt], inode_usage);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_inodes[cnt],
transfer_from[cnt], wtype);
- wtype = info_bdq_free(transfer_from[cnt], space);
+ wtype = info_bdq_free(transfer_from[cnt],
+ cur_space + rsv_space);
if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_space[cnt],
transfer_from[cnt], wtype);
@@ -1992,14 +2007,11 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
dquot_decr_space(transfer_from[cnt], cur_space);
dquot_free_reserved_space(transfer_from[cnt],
rsv_space);
+ spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
}
-
- dquot_incr_inodes(transfer_to[cnt], inode_usage);
- dquot_incr_space(transfer_to[cnt], cur_space);
- dquot_resv_space(transfer_to[cnt], rsv_space);
-
i_dquot(inode)[cnt] = transfer_to[cnt];
}
+ spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(transfer_from);
@@ -2013,6 +2025,17 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
transfer_to[cnt] = transfer_from[cnt];
return 0;
over_quota:
+ /* Back out changes we already did */
+ for (cnt--; cnt >= 0; cnt--) {
+ if (!is_valid[cnt])
+ continue;
+ spin_lock(&transfer_to[cnt]->dq_dqb_lock);
+ dquot_decr_inodes(transfer_to[cnt], inode_usage);
+ dquot_decr_space(transfer_to[cnt], cur_space);
+ dquot_free_reserved_space(transfer_to[cnt], rsv_space);
+ spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
+ }
+ spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
flush_warnings(warn_to);
return ret;
@@ -2066,29 +2089,21 @@ EXPORT_SYMBOL(dquot_transfer);
*/
int dquot_commit_info(struct super_block *sb, int type)
{
- int ret;
struct quota_info *dqopt = sb_dqopt(sb);
- mutex_lock(&dqopt->dqio_mutex);
- ret = dqopt->ops[type]->write_file_info(sb, type);
- mutex_unlock(&dqopt->dqio_mutex);
- return ret;
+ return dqopt->ops[type]->write_file_info(sb, type);
}
EXPORT_SYMBOL(dquot_commit_info);
int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
{
struct quota_info *dqopt = sb_dqopt(sb);
- int err;
if (!sb_has_quota_active(sb, qid->type))
return -ESRCH;
if (!dqopt->ops[qid->type]->get_next_id)
return -ENOSYS;
- mutex_lock(&dqopt->dqio_mutex);
- err = dqopt->ops[qid->type]->get_next_id(sb, qid);
- mutex_unlock(&dqopt->dqio_mutex);
- return err;
+ return dqopt->ops[qid->type]->get_next_id(sb, qid);
}
EXPORT_SYMBOL(dquot_get_next_id);
@@ -2337,15 +2352,14 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
dqopt->info[type].dqi_format = fmt;
dqopt->info[type].dqi_fmt_id = format_id;
INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
- mutex_lock(&dqopt->dqio_mutex);
error = dqopt->ops[type]->read_file_info(sb, type);
- if (error < 0) {
- mutex_unlock(&dqopt->dqio_mutex);
+ if (error < 0)
goto out_file_init;
- }
- if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) {
+ spin_lock(&dq_data_lock);
dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
- mutex_unlock(&dqopt->dqio_mutex);
+ spin_unlock(&dq_data_lock);
+ }
spin_lock(&dq_state_lock);
dqopt->flags |= dquot_state_flag(flags, type);
spin_unlock(&dq_state_lock);
@@ -2572,7 +2586,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
struct mem_dqblk *dm = &dquot->dq_dqb;
memset(di, 0, sizeof(*di));
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
di->d_spc_hardlimit = dm->dqb_bhardlimit;
di->d_spc_softlimit = dm->dqb_bsoftlimit;
di->d_ino_hardlimit = dm->dqb_ihardlimit;
@@ -2581,7 +2595,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
di->d_ino_count = dm->dqb_curinodes;
di->d_spc_timer = dm->dqb_btime;
di->d_ino_timer = dm->dqb_itime;
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
}
int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
@@ -2645,7 +2659,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
(di->d_ino_hardlimit > dqi->dqi_max_ino_limit)))
return -ERANGE;
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
if (di->d_fieldmask & QC_SPACE) {
dm->dqb_curspace = di->d_space - dm->dqb_rsvspace;
check_blim = 1;
@@ -2711,7 +2725,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
clear_bit(DQ_FAKE_B, &dquot->dq_flags);
else
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
mark_dquot_dirty(dquot);
return 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 0738972e8d3f..bb3f59bcfcf5 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -379,7 +379,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
if (!ddquot)
return -ENOMEM;
- /* dq_off is guarded by dqio_mutex */
+ /* dq_off is guarded by dqio_sem */
if (!dquot->dq_off) {
ret = dq_insert_tree(info, dquot);
if (ret < 0) {
@@ -389,9 +389,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
return ret;
}
}
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
dquot->dq_off);
if (ret != info->dqi_entry_size) {
@@ -649,14 +649,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
kfree(ddquot);
goto out;
}
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags);
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
kfree(ddquot);
out:
dqstats_inc(DQST_READS);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 8fe79beced5c..7ac5298aba70 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -56,8 +56,9 @@ static int v1_read_dqblk(struct dquot *dquot)
{
int type = dquot->dq_id.type;
struct v1_disk_dqblk dqblk;
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
- if (!sb_dqopt(dquot->dq_sb)->files[type])
+ if (!dqopt->files[type])
return -EINVAL;
/* Set structure to 0s in case read fails/is after end of file */
@@ -160,6 +161,7 @@ static int v1_read_file_info(struct super_block *sb, int type)
struct v1_disk_dqblk dqblk;
int ret;
+ down_read(&dqopt->dqio_sem);
ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
sizeof(struct v1_disk_dqblk), v1_dqoff(0));
if (ret != sizeof(struct v1_disk_dqblk)) {
@@ -176,6 +178,7 @@ static int v1_read_file_info(struct super_block *sb, int type)
dqopt->info[type].dqi_bgrace =
dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
out:
+ up_read(&dqopt->dqio_sem);
return ret;
}
@@ -185,7 +188,7 @@ static int v1_write_file_info(struct super_block *sb, int type)
struct v1_disk_dqblk dqblk;
int ret;
- dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
+ down_write(&dqopt->dqio_sem);
ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
sizeof(struct v1_disk_dqblk), v1_dqoff(0));
if (ret != sizeof(struct v1_disk_dqblk)) {
@@ -193,8 +196,11 @@ static int v1_write_file_info(struct super_block *sb, int type)
ret = -EIO;
goto out;
}
+ spin_lock(&dq_data_lock);
+ dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
dqblk.dqb_itime = dqopt->info[type].dqi_igrace;
dqblk.dqb_btime = dqopt->info[type].dqi_bgrace;
+ spin_unlock(&dq_data_lock);
ret = sb->s_op->quota_write(sb, type, (char *)&dqblk,
sizeof(struct v1_disk_dqblk), v1_dqoff(0));
if (ret == sizeof(struct v1_disk_dqblk))
@@ -202,6 +208,7 @@ static int v1_write_file_info(struct super_block *sb, int type)
else if (ret > 0)
ret = -EIO;
out:
+ up_write(&dqopt->dqio_sem);
return ret;
}
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index ca71bf881ad1..c0187cda2c1e 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -65,9 +65,11 @@ static int v2_read_header(struct super_block *sb, int type,
if (size != sizeof(struct v2_disk_dqheader)) {
quota_error(sb, "Failed header read: expected=%zd got=%zd",
sizeof(struct v2_disk_dqheader), size);
- return 0;
+ if (size < 0)
+ return size;
+ return -EIO;
}
- return 1;
+ return 0;
}
/* Check whether given file is really vfsv0 quotafile */
@@ -77,7 +79,7 @@ static int v2_check_quota_file(struct super_block *sb, int type)
static const uint quota_magics[] = V2_INITQMAGICS;
static const uint quota_versions[] = V2_INITQVERSIONS;
- if (!v2_read_header(sb, type, &dqhead))
+ if (v2_read_header(sb, type, &dqhead))
return 0;
if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
@@ -90,29 +92,38 @@ static int v2_read_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
struct v2_disk_dqheader dqhead;
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct mem_dqinfo *info = &dqopt->info[type];
struct qtree_mem_dqinfo *qinfo;
ssize_t size;
unsigned int version;
+ int ret;
- if (!v2_read_header(sb, type, &dqhead))
- return -1;
+ down_read(&dqopt->dqio_sem);
+ ret = v2_read_header(sb, type, &dqhead);
+ if (ret < 0)
+ goto out;
version = le32_to_cpu(dqhead.dqh_version);
if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
- (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
- return -1;
+ (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
if (size != sizeof(struct v2_disk_dqinfo)) {
quota_error(sb, "Can't read info structure");
- return -1;
+ if (size < 0)
+ ret = size;
+ else
+ ret = -EIO;
+ goto out;
}
info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
if (!info->dqi_priv) {
- printk(KERN_WARNING
- "Not enough memory for quota information structure.\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
qinfo = info->dqi_priv;
if (version == 0) {
@@ -147,17 +158,22 @@ static int v2_read_file_info(struct super_block *sb, int type)
qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
qinfo->dqi_ops = &v2r1_qtree_ops;
}
- return 0;
+ ret = 0;
+out:
+ up_read(&dqopt->dqio_sem);
+ return ret;
}
/* Write information header to quota file */
static int v2_write_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct mem_dqinfo *info = &dqopt->info[type];
struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
ssize_t size;
+ down_write(&dqopt->dqio_sem);
spin_lock(&dq_data_lock);
info->dqi_flags &= ~DQF_INFO_DIRTY;
dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
@@ -170,6 +186,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+ up_write(&dqopt->dqio_sem);
if (size != sizeof(struct v2_disk_dqinfo)) {
quota_error(sb, "Can't write info structure");
return -1;
@@ -285,17 +302,51 @@ static int v2r1_is_id(void *dp, struct dquot *dquot)
static int v2_read_dquot(struct dquot *dquot)
{
- return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ int ret;
+
+ down_read(&dqopt->dqio_sem);
+ ret = qtree_read_dquot(
+ sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
+ dquot);
+ up_read(&dqopt->dqio_sem);
+ return ret;
}
static int v2_write_dquot(struct dquot *dquot)
{
- return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ int ret;
+ bool alloc = false;
+
+ /*
+ * If space for dquot is already allocated, we don't need any
+ * protection as we'll only overwrite the place of dquot. We are
+ * still protected by concurrent writes of the same dquot by
+ * dquot->dq_lock.
+ */
+ if (!dquot->dq_off) {
+ alloc = true;
+ down_write(&dqopt->dqio_sem);
+ }
+ ret = qtree_write_dquot(
+ sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
+ dquot);
+ if (alloc)
+ up_write(&dqopt->dqio_sem);
+ return ret;
}
static int v2_release_dquot(struct dquot *dquot)
{
- return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ int ret;
+
+ down_write(&dqopt->dqio_sem);
+ ret = qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
+ up_write(&dqopt->dqio_sem);
+
+ return ret;
}
static int v2_free_file_info(struct super_block *sb, int type)
@@ -306,7 +357,13 @@ static int v2_free_file_info(struct super_block *sb, int type)
static int v2_get_next_id(struct super_block *sb, struct kqid *qid)
{
- return qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int ret;
+
+ down_read(&dqopt->dqio_sem);
+ ret = qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+ up_read(&dqopt->dqio_sem);
+ return ret;
}
static const struct quota_format_ops v2_format_ops = {
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2ef7ce75c062..3ac1f2387083 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -228,7 +228,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
if (!pages)
goto out_free;
- nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
+ nr = find_get_pages(inode->i_mapping, &pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a11d773e5ff3..0c882a0e2a6e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1481,7 +1481,7 @@ static int flush_journal_list(struct super_block *s,
if ((!was_jwait) && !buffer_locked(saved_bh)) {
reiserfs_warning(s, "journal-813",
"BAD! buffer %llu %cdirty %cjwait, "
- "not in a newer tranasction",
+ "not in a newer transaction",
(unsigned long long)saved_bh->
b_blocknr, was_dirty ? ' ' : '!',
was_jwait ? ' ' : '!');
diff --git a/fs/stat.c b/fs/stat.c
index c35610845ab1..8a6aa8caf891 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -710,7 +710,7 @@ loff_t inode_get_bytes(struct inode *inode)
loff_t ret;
spin_lock(&inode->i_lock);
- ret = (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
+ ret = __inode_get_bytes(inode);
spin_unlock(&inode->i_lock);
return ret;
}
diff --git a/fs/super.c b/fs/super.c
index 6bc3352adcf3..221cfa1f4e92 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -242,7 +242,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
atomic_set(&s->s_active, 1);
mutex_init(&s->s_vfs_rename_mutex);
lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
- mutex_init(&s->s_dquot.dqio_mutex);
+ init_rwsem(&s->s_dquot.dqio_sem);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
diff --git a/fs/sync.c b/fs/sync.c
index 27d6b8bbcb6a..2e3fd7d94d2d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -335,11 +335,6 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
goto out_put;
mapping = f.file->f_mapping;
- if (!mapping) {
- ret = -EINVAL;
- goto out_put;
- }
-
ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
ret = file_fdatawait_range(f.file, offset, endbyte);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f90a466ea5db..a02aa59d1e24 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1490,7 +1490,10 @@ static int ubifs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
}
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
#endif
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 18fdb9d90812..8dacf4f57414 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -114,7 +114,7 @@ static void udf_update_extent_cache(struct inode *inode, loff_t estart,
__udf_clear_extent_cache(inode);
if (pos->bh)
get_bh(pos->bh);
- memcpy(&iinfo->cached_extent.epos, pos, sizeof(struct extent_position));
+ memcpy(&iinfo->cached_extent.epos, pos, sizeof(*pos));
iinfo->cached_extent.lstart = estart;
switch (iinfo->i_alloc_type) {
case ICBTAG_FLAG_AD_SHORT:
@@ -1572,13 +1572,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
{
struct udf_inode_info *iinfo = UDF_I(inode);
iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
-
- if (!iinfo->i_ext.i_data) {
- udf_err(inode->i_sb, "(ino %ld) no free memory\n",
- inode->i_ino);
+ if (!iinfo->i_ext.i_data)
return -ENOMEM;
- }
-
return 0;
}
@@ -1703,7 +1698,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
dsea->impUseLength = cpu_to_le32(sizeof(struct regid));
}
eid = (struct regid *)dsea->impUse;
- memset(eid, 0, sizeof(struct regid));
+ memset(eid, 0, sizeof(*eid));
strcpy(eid->ident, UDF_ID_DEVELOPER);
eid->identSuffix[0] = UDF_OS_CLASS_UNIX;
eid->identSuffix[1] = UDF_OS_ID_LINUX;
@@ -1754,7 +1749,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
- memset(&(efe->impIdent), 0, sizeof(struct regid));
+ memset(&(efe->impIdent), 0, sizeof(efe->impIdent));
strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 385ee89d5824..885198dfd9f8 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1184,7 +1184,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
ncfi.fileVersionNum = ocfi.fileVersionNum;
ncfi.fileCharacteristics = ocfi.fileCharacteristics;
- memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(struct long_ad));
+ memcpy(&(ncfi.icb), &(ocfi.icb), sizeof(ocfi.icb));
udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
/* The old fid may have moved - find it again */
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 462ac2e9258c..93c59630512b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -266,11 +266,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
{
struct udf_sb_info *sbi = UDF_SB(sb);
- sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
- GFP_KERNEL);
+ sbi->s_partmaps = kcalloc(count, sizeof(*sbi->s_partmaps), GFP_KERNEL);
if (!sbi->s_partmaps) {
- udf_err(sb, "Unable to allocate space for %d partition maps\n",
- count);
sbi->s_partitions = 0;
return -ENOMEM;
}
@@ -324,7 +321,8 @@ static void udf_sb_free_partitions(struct super_block *sb)
{
struct udf_sb_info *sbi = UDF_SB(sb);
int i;
- if (sbi->s_partmaps == NULL)
+
+ if (!sbi->s_partmaps)
return;
for (i = 0; i < sbi->s_partitions; i++)
udf_free_partition(&sbi->s_partmaps[i]);
@@ -1071,7 +1069,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
else
bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
- if (bitmap == NULL)
+ if (!bitmap)
return NULL;
bitmap->s_nr_groups = nr_groups;
@@ -2099,7 +2097,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
uopt.fmode = UDF_INVALID_MODE;
uopt.dmode = UDF_INVALID_MODE;
- sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 886085b47c75..ef4b48d1ea42 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -178,7 +178,8 @@ static inline void msg_init(struct uffd_msg *msg)
static inline struct uffd_msg userfault_msg(unsigned long address,
unsigned int flags,
- unsigned long reason)
+ unsigned long reason,
+ unsigned int features)
{
struct uffd_msg msg;
msg_init(&msg);
@@ -202,6 +203,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
* write protect fault.
*/
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
+ if (features & UFFD_FEATURE_THREAD_ID)
+ msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
return msg;
}
@@ -370,13 +373,34 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
+ if (ctx->features & UFFD_FEATURE_SIGBUS)
+ goto out;
+
/*
* If it's already released don't get it. This avoids to loop
* in __get_user_pages if userfaultfd_release waits on the
* caller of handle_userfault to release the mmap_sem.
*/
- if (unlikely(ACCESS_ONCE(ctx->released)))
+ if (unlikely(ACCESS_ONCE(ctx->released))) {
+ /*
+ * Don't return VM_FAULT_SIGBUS in this case, so a non
+ * cooperative manager can close the uffd after the
+ * last UFFDIO_COPY, without risking to trigger an
+ * involuntary SIGBUS if the process was starting the
+ * userfaultfd while the userfaultfd was still armed
+ * (but after the last UFFDIO_COPY). If the uffd
+ * wasn't already closed when the userfault reached
+ * this point, that would normally be solved by
+ * userfaultfd_must_wait returning 'false'.
+ *
+ * If we were to return VM_FAULT_SIGBUS here, the non
+ * cooperative manager would be instead forced to
+ * always call UFFDIO_UNREGISTER before it can safely
+ * close the uffd.
+ */
+ ret = VM_FAULT_NOPAGE;
goto out;
+ }
/*
* Check that we can return VM_FAULT_RETRY.
@@ -419,7 +443,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
+ uwq.msg = userfault_msg(vmf->address, vmf->flags, reason,
+ ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
@@ -1194,7 +1219,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
struct uffdio_register __user *user_uffdio_register;
unsigned long vm_flags, new_flags;
bool found;
- bool non_anon_pages;
+ bool basic_ioctls;
unsigned long start, end, vma_end;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1260,7 +1285,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* Search for not compatible vmas.
*/
found = false;
- non_anon_pages = false;
+ basic_ioctls = false;
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
cond_resched();
@@ -1299,8 +1324,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
/*
* Note vmas containing huge pages
*/
- if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur))
- non_anon_pages = true;
+ if (is_vm_hugetlb_page(cur))
+ basic_ioctls = true;
found = true;
}
@@ -1371,7 +1396,7 @@ out_unlock:
* userland which ioctls methods are guaranteed to
* succeed on this range.
*/
- if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC :
+ if (put_user(basic_ioctls ? UFFD_API_RANGE_IOCTLS_BASIC :
UFFD_API_RANGE_IOCTLS,
&user_uffdio_register->ioctls))
ret = -EFAULT;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f9efd67f6fa1..fffae1390d7f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -540,7 +540,7 @@ xfs_init_bio_from_bh(
struct buffer_head *bh)
{
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
}
static struct xfs_ioend *
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 72f038492ba8..b1c9711e79a4 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1281,7 +1281,7 @@ next_chunk:
nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, nr_pages);
- bio->bi_bdev = bp->b_target->bt_bdev;
+ bio_set_dev(bio, bp->b_target->bt_bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0debbc7e3f03..ec3e44fcf771 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1101,7 +1101,7 @@ xfs_filemap_pfn_mkwrite(
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
- ret = dax_pfn_mkwrite(vmf);
+ ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
OpenPOWER on IntegriCloud