summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/coredump.c30
-rw-r--r--fs/ecryptfs/keystore.c6
-rw-r--r--fs/eventfd.c42
-rw-r--r--fs/ext4/dir.c2
-rw-r--r--fs/fat/Kconfig18
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/fs-writeback.c37
-rw-r--r--fs/jffs2/gc.c64
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/nodemgmt.c4
-rw-r--r--fs/jffs2/wbuf.c6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c59
-rw-r--r--fs/nfs/blocklayout/blocklayout.h14
-rw-r--r--fs/nfs/blocklayout/dev.c144
-rw-r--r--fs/nfs/blocklayout/extent_tree.c44
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
-rw-r--r--fs/nfsd/Kconfig28
-rw-r--r--fs/nfsd/Makefile4
-rw-r--r--fs/nfsd/blocklayout.c298
-rw-r--r--fs/nfsd/blocklayoutxdr.c77
-rw-r--r--fs/nfsd/blocklayoutxdr.h14
-rw-r--r--fs/nfsd/nfs3proc.c7
-rw-r--r--fs/nfsd/nfs4layouts.c31
-rw-r--r--fs/nfsd/nfs4proc.c9
-rw-r--r--fs/nfsd/nfs4recover.c1
-rw-r--r--fs/nfsd/nfs4state.c29
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/pnfs.h8
-rw-r--r--fs/nfsd/vfs.h19
-rw-r--r--fs/ocfs2/Makefile3
-rw-r--r--fs/ocfs2/filecheck.c606
-rw-r--r--fs/ocfs2/filecheck.h49
-rw-r--r--fs/ocfs2/inode.c225
-rw-r--r--fs/ocfs2/inode.h3
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/stackglue.c3
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/open.c6
-rw-r--r--fs/ubifs/Makefile1
-rw-r--r--fs/ubifs/misc.c57
-rw-r--r--fs/ubifs/ubifs.h41
-rw-r--r--fs/ubifs/xattr.c1
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_pnfs.h2
47 files changed, 1834 insertions, 208 deletions
diff --git a/fs/coredump.c b/fs/coredump.c
index 9ea87e9fdccf..47c32c3bfa1d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -32,6 +32,9 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
#include <linux/timekeeping.h>
#include <asm/uaccess.h>
@@ -649,6 +652,8 @@ void do_coredump(const siginfo_t *siginfo)
}
} else {
struct inode *inode;
+ int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
@@ -687,10 +692,27 @@ void do_coredump(const siginfo_t *siginfo)
* what matters is that at least one of the two processes
* writes its coredump successfully, not which one.
*/
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW |
- O_LARGEFILE | O_EXCL,
- 0600);
+ if (need_suid_safe) {
+ /*
+ * Using user namespaces, normal user tasks can change
+ * their current->fs->root to point to arbitrary
+ * directories. Since the intention of the "only dump
+ * with a fully qualified path" rule is to control where
+ * coredumps may be placed using root privileges,
+ * current->fs->root must not be used. Instead, use the
+ * root directory of init_task.
+ */
+ struct path root;
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+ cprm.file = file_open_root(root.dentry, root.mnt,
+ cn.corename, open_flags, 0600);
+ path_put(&root);
+ } else {
+ cprm.file = filp_open(cn.corename, open_flags, 0600);
+ }
if (IS_ERR(cprm.file))
goto fail_unlock;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c5c84dfb5b3e..9893d1538122 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -635,8 +635,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
(*packet_size) = 0;
rc = ecryptfs_find_auth_tok_for_sig(
@@ -922,8 +921,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ed70cf9fdc7b..1231cd1999d8 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -121,8 +121,46 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
u64 count;
poll_wait(file, &ctx->wqh, wait);
- smp_rmb();
- count = ctx->count;
+
+ /*
+ * All writes to ctx->count occur within ctx->wqh.lock. This read
+ * can be done outside ctx->wqh.lock because we know that poll_wait
+ * takes that lock (through add_wait_queue) if our caller will sleep.
+ *
+ * The read _can_ therefore seep into add_wait_queue's critical
+ * section, but cannot move above it! add_wait_queue's spin_lock acts
+ * as an acquire barrier and ensures that the read be ordered properly
+ * against the writes. The following CAN happen and is safe:
+ *
+ * poll write
+ * ----------------- ------------
+ * lock ctx->wqh.lock (in poll_wait)
+ * count = ctx->count
+ * __add_wait_queue
+ * unlock ctx->wqh.lock
+ * lock ctx->qwh.lock
+ * ctx->count += n
+ * if (waitqueue_active)
+ * wake_up_locked_poll
+ * unlock ctx->qwh.lock
+ * eventfd_poll returns 0
+ *
+ * but the following, which would miss a wakeup, cannot happen:
+ *
+ * poll write
+ * ----------------- ------------
+ * count = ctx->count (INVALID!)
+ * lock ctx->qwh.lock
+ * ctx->count += n
+ * **waitqueue_active is false**
+ * **no wake_up_locked_poll!**
+ * unlock ctx->qwh.lock
+ * lock ctx->wqh.lock (in poll_wait)
+ * __add_wait_queue
+ * unlock ctx->wqh.lock
+ * eventfd_poll returns 0
+ */
+ count = READ_ONCE(ctx->count);
if (count > 0)
events |= POLLIN;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 33f5e2a50cf8..50ba27cbed03 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -285,7 +285,7 @@ errout:
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
- return is_compat_task();
+ return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 182f9ffe2b51..3ff1772f612e 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -93,8 +93,24 @@ config FAT_DEFAULT_IOCHARSET
that most of your FAT filesystems use, and can be overridden
with the "iocharset" mount option for FAT filesystems.
Note that "utf8" is not recommended for FAT filesystems.
- If unsure, you shouldn't set "utf8" here.
+ If unsure, you shouldn't set "utf8" here - select the next option
+ instead if you would like to use UTF-8 encoded file names by default.
See <file:Documentation/filesystems/vfat.txt> for more information.
Enable any character sets you need in File Systems/Native Language
Support.
+
+config FAT_DEFAULT_UTF8
+ bool "Enable FAT UTF-8 option by default"
+ depends on VFAT_FS
+ default n
+ help
+ Set this if you would like to have "utf8" mount option set
+ by default when mounting FAT filesystems.
+
+ Even if you say Y here can always disable UTF-8 for
+ particular mount by adding "utf8=0" to mount options.
+
+ Say Y if you use UTF-8 encoding for file names, N otherwise.
+
+ See <file:Documentation/filesystems/vfat.txt> for more information.
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a5599052116c..226281068a46 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1127,7 +1127,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
}
opts->name_check = 'n';
opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
- opts->utf8 = opts->unicode_xlate = 0;
+ opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
opts->tz_set = 0;
@@ -1135,6 +1135,8 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
opts->errors = FAT_ERRORS_RO;
*debug = 0;
+ opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+
if (!options)
goto out;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index d59712dfa3e7..ca3c3dd01789 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -228,7 +228,7 @@ long do_handle_open(int mountdirfd,
path_put(&path);
return fd;
}
- file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ file = file_open_root(path.dentry, path.mnt, "", open_flag, 0);
if (IS_ERR(file)) {
put_unused_fd(fd);
retval = PTR_ERR(file);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5c46ed9f3e14..fee81e8768c9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode)
wb_get(wb);
spin_unlock(&inode->i_lock);
spin_lock(&wb->list_lock);
- wb_put(wb); /* not gonna deref it anymore */
/* i_wb may have changed inbetween, can't use inode_to_wb() */
- if (likely(wb == inode->i_wb))
- return wb; /* @inode already has ref */
+ if (likely(wb == inode->i_wb)) {
+ wb_put(wb); /* @inode already has ref */
+ return wb;
+ }
spin_unlock(&wb->list_lock);
+ wb_put(wb);
cpu_relax();
spin_lock(&inode->i_lock);
}
@@ -1337,10 +1339,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
* and does more profound writeback list handling in writeback_sb_inodes().
*/
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+ struct writeback_control *wbc)
{
+ struct bdi_writeback *wb;
int ret = 0;
spin_lock(&inode->i_lock);
@@ -1378,7 +1380,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
ret = __writeback_single_inode(inode, wbc);
wbc_detach_inode(wbc);
- spin_lock(&wb->list_lock);
+
+ wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
* If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1453,6 +1456,7 @@ static long writeback_sb_inodes(struct super_block *sb,
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
+ struct bdi_writeback *tmp_wb;
if (inode->i_sb != sb) {
if (work->sb) {
@@ -1543,15 +1547,23 @@ static long writeback_sb_inodes(struct super_block *sb,
cond_resched();
}
-
- spin_lock(&wb->list_lock);
+ /*
+ * Requeue @inode if still dirty. Be careful as @inode may
+ * have been switched to another wb in the meantime.
+ */
+ tmp_wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL))
wrote++;
- requeue_inode(inode, wb, &wbc);
+ requeue_inode(inode, tmp_wb, &wbc);
inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
+ if (unlikely(tmp_wb != wb)) {
+ spin_unlock(&tmp_wb->list_lock);
+ spin_lock(&wb->list_lock);
+ }
+
/*
* bail out to wb_writeback() often enough to check
* background threshold and other termination conditions.
@@ -2338,7 +2350,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
*/
int write_inode_now(struct inode *inode, int sync)
{
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2350,7 +2361,7 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- return writeback_single_inode(inode, wb, &wbc);
+ return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(write_inode_now);
@@ -2367,7 +2378,7 @@ EXPORT_SYMBOL(write_inode_now);
*/
int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
- return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+ return writeback_single_inode(inode, wbc);
}
EXPORT_SYMBOL(sync_inode);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 95d5880a63ee..7e553f286775 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -134,37 +134,59 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
if (mutex_lock_interruptible(&c->alloc_sem))
return -EINTR;
+
for (;;) {
+ /* We can't start doing GC until we've finished checking
+ the node CRCs etc. */
+ int bucket, want_ino;
+
spin_lock(&c->erase_completion_lock);
if (!c->unchecked_size)
break;
-
- /* We can't start doing GC yet. We haven't finished checking
- the node CRCs etc. Do it now. */
-
- /* checked_ino is protected by the alloc_sem */
- if (c->checked_ino > c->highest_ino && xattr) {
- pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
- c->unchecked_size);
- jffs2_dbg_dump_block_lists_nolock(c);
- spin_unlock(&c->erase_completion_lock);
- mutex_unlock(&c->alloc_sem);
- return -ENOSPC;
- }
-
spin_unlock(&c->erase_completion_lock);
if (!xattr)
xattr = jffs2_verify_xattr(c);
spin_lock(&c->inocache_lock);
+ /* Instead of doing the inodes in numeric order, doing a lookup
+ * in the hash for each possible number, just walk the hash
+ * buckets of *existing* inodes. This means that we process
+ * them out-of-order, but it can be a lot faster if there's
+ * a sparse inode# space. Which there often is. */
+ want_ino = c->check_ino;
+ for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
+ for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
+ if (ic->ino < want_ino)
+ continue;
+
+ if (ic->state != INO_STATE_CHECKEDABSENT &&
+ ic->state != INO_STATE_PRESENT)
+ goto got_next; /* with inocache_lock held */
+
+ jffs2_dbg(1, "Skipping ino #%u already checked\n",
+ ic->ino);
+ }
+ want_ino = 0;
+ }
- ic = jffs2_get_ino_cache(c, c->checked_ino++);
+ /* Point c->check_ino past the end of the last bucket. */
+ c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
+ ~c->inocache_hashsize) - 1;
- if (!ic) {
- spin_unlock(&c->inocache_lock);
- continue;
- }
+ spin_unlock(&c->inocache_lock);
+
+ pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
+ c->unchecked_size);
+ jffs2_dbg_dump_block_lists_nolock(c);
+ mutex_unlock(&c->alloc_sem);
+ return -ENOSPC;
+
+ got_next:
+ /* For next time round the loop, we want c->checked_ino to indicate
+ * the *next* one we want to check. And since we're walking the
+ * buckets rather than doing it sequentially, it's: */
+ c->check_ino = ic->ino + c->inocache_hashsize;
if (!ic->pino_nlink) {
jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
@@ -176,8 +198,6 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
switch(ic->state) {
case INO_STATE_CHECKEDABSENT:
case INO_STATE_PRESENT:
- jffs2_dbg(1, "Skipping ino #%u already checked\n",
- ic->ino);
spin_unlock(&c->inocache_lock);
continue;
@@ -196,7 +216,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
ic->ino);
/* We need to come back again for the _same_ inode. We've
made no progress in this case, but that should be OK */
- c->checked_ino--;
+ c->check_ino = ic->ino;
mutex_unlock(&c->alloc_sem);
sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 046fee8b6e9b..778275f48a87 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -49,7 +49,7 @@ struct jffs2_sb_info {
struct mtd_info *mtd;
uint32_t highest_ino;
- uint32_t checked_ino;
+ uint32_t check_ino; /* *NEXT* inode to be checked */
unsigned int flags;
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index b6bd4affd9ad..cda0774c2c9c 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -846,8 +846,8 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
return 1;
if (c->unchecked_size) {
- jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
- c->unchecked_size, c->checked_ino);
+ jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, check_ino #%d\n",
+ c->unchecked_size, c->check_ino);
return 1;
}
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5a3da3f52908..b25d28a21212 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1183,22 +1183,20 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
{
- struct nand_ecclayout *oinfo = c->mtd->ecclayout;
-
if (!c->mtd->oobsize)
return 0;
/* Cleanmarker is out-of-band, so inline size zero */
c->cleanmarker_size = 0;
- if (!oinfo || oinfo->oobavail == 0) {
+ if (c->mtd->oobavail == 0) {
pr_err("inconsistent device description\n");
return -EINVAL;
}
jffs2_dbg(1, "using OOB on NAND\n");
- c->oobavail = oinfo->oobavail;
+ c->oobavail = c->mtd->oobavail;
/* Initialise write buffer */
init_rwsem(&c->wbuf_sem);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 8bc870e4c467..02e4d87d2ed3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
kfree(bl);
}
-static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
- gfp_t gfp_flags)
+static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags, bool is_scsi_layout)
{
struct pnfs_block_layout *bl;
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
bl->bl_ext_ro = RB_ROOT;
spin_lock_init(&bl->bl_ext_lock);
+ bl->bl_scsi_layout = is_scsi_layout;
return &bl->bl_layout;
}
+static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags)
+{
+ return __bl_alloc_layout_hdr(inode, gfp_flags, false);
+}
+
+static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags)
+{
+ return __bl_alloc_layout_hdr(inode, gfp_flags, true);
+}
+
static void bl_free_lseg(struct pnfs_layout_segment *lseg)
{
dprintk("%s enter\n", __func__);
@@ -889,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.sync = pnfs_generic_sync,
};
+static struct pnfs_layoutdriver_type scsilayout_type = {
+ .id = LAYOUT_SCSI,
+ .name = "LAYOUT_SCSI",
+ .owner = THIS_MODULE,
+ .flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_READ_WHOLE_PAGE,
+ .read_pagelist = bl_read_pagelist,
+ .write_pagelist = bl_write_pagelist,
+ .alloc_layout_hdr = sl_alloc_layout_hdr,
+ .free_layout_hdr = bl_free_layout_hdr,
+ .alloc_lseg = bl_alloc_lseg,
+ .free_lseg = bl_free_lseg,
+ .return_range = bl_return_range,
+ .prepare_layoutcommit = bl_prepare_layoutcommit,
+ .cleanup_layoutcommit = bl_cleanup_layoutcommit,
+ .set_layoutdriver = bl_set_layoutdriver,
+ .alloc_deviceid_node = bl_alloc_deviceid_node,
+ .free_deviceid_node = bl_free_deviceid_node,
+ .pg_read_ops = &bl_pg_read_ops,
+ .pg_write_ops = &bl_pg_write_ops,
+ .sync = pnfs_generic_sync,
+};
+
+
static int __init nfs4blocklayout_init(void)
{
int ret;
dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
- ret = pnfs_register_layoutdriver(&blocklayout_type);
+ ret = bl_init_pipefs();
if (ret)
goto out;
- ret = bl_init_pipefs();
+
+ ret = pnfs_register_layoutdriver(&blocklayout_type);
if (ret)
- goto out_unregister;
+ goto out_cleanup_pipe;
+
+ ret = pnfs_register_layoutdriver(&scsilayout_type);
+ if (ret)
+ goto out_unregister_block;
return 0;
-out_unregister:
+out_unregister_block:
pnfs_unregister_layoutdriver(&blocklayout_type);
+out_cleanup_pipe:
+ bl_cleanup_pipefs();
out:
return ret;
}
@@ -914,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
__func__);
- bl_cleanup_pipefs();
+ pnfs_unregister_layoutdriver(&scsilayout_type);
pnfs_unregister_layoutdriver(&blocklayout_type);
+ bl_cleanup_pipefs();
}
MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c556640dcf3b..bc21205309e0 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
*/
#define PNFS_BLOCK_UUID_LEN 128
-
struct pnfs_block_volume {
enum pnfs_block_volume_type type;
union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
u32 volumes_count;
u32 volumes[PNFS_BLOCK_MAX_DEVICES];
} stripe;
+ struct {
+ enum scsi_code_set code_set;
+ enum scsi_designator_type designator_type;
+ int designator_len;
+ u8 designator[256];
+ u64 pr_key;
+ } scsi;
};
};
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
struct block_device *bdev;
u64 disk_offset;
+ u64 pr_key;
+ bool pr_registered;
+
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_rw;
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
+ bool bl_scsi_layout;
};
static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
dev_t bl_resolve_deviceid(struct nfs_server *server,
struct pnfs_block_volume *b, gfp_t gfp_mask);
int __init bl_init_pipefs(void);
-void __exit bl_cleanup_pipefs(void);
+void bl_cleanup_pipefs(void);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbdfe577..e5b89675263e 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -1,11 +1,12 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/blkdev.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
+#include <linux/pr.h>
#include "blocklayout.h"
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
+ if (dev->pr_registered) {
+ const struct pr_ops *ops =
+ dev->bdev->bd_disk->fops->pr_ops;
+ int error;
+
+ error = ops->pr_register(dev->bdev, dev->pr_key, 0,
+ false);
+ if (error)
+ pr_err("failed to unregister PR key.\n");
+ }
+
if (dev->bdev)
blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
}
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
for (i = 0; i < b->stripe.volumes_count; i++)
b->stripe.volumes[i] = be32_to_cpup(p++);
break;
+ case PNFS_BLOCK_VOLUME_SCSI:
+ p = xdr_inline_decode(xdr, 4 + 4 + 4);
+ if (!p)
+ return -EIO;
+ b->scsi.code_set = be32_to_cpup(p++);
+ b->scsi.designator_type = be32_to_cpup(p++);
+ b->scsi.designator_len = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, b->scsi.designator_len);
+ if (!p)
+ return -EIO;
+ if (b->scsi.designator_len > 256)
+ return -EIO;
+ memcpy(&b->scsi.designator, p, b->scsi.designator_len);
+ p = xdr_inline_decode(xdr, 8);
+ if (!p)
+ return -EIO;
+ p = xdr_decode_hyper(p, &b->scsi.pr_key);
+ break;
default:
dprintk("unknown volume type!\n");
return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
return 0;
}
+static bool
+bl_validate_designator(struct pnfs_block_volume *v)
+{
+ switch (v->scsi.designator_type) {
+ case PS_DESIGNATOR_EUI64:
+ if (v->scsi.code_set != PS_CODE_SET_BINARY)
+ return false;
+
+ if (v->scsi.designator_len != 8 &&
+ v->scsi.designator_len != 10 &&
+ v->scsi.designator_len != 16)
+ return false;
+
+ return true;
+ case PS_DESIGNATOR_NAA:
+ if (v->scsi.code_set != PS_CODE_SET_BINARY)
+ return false;
+
+ if (v->scsi.designator_len != 8 &&
+ v->scsi.designator_len != 16)
+ return false;
+
+ return true;
+ case PS_DESIGNATOR_T10:
+ case PS_DESIGNATOR_NAME:
+ pr_err("pNFS: unsupported designator "
+ "(code set %d, type %d, len %d.\n",
+ v->scsi.code_set,
+ v->scsi.designator_type,
+ v->scsi.designator_len);
+ return false;
+ default:
+ pr_err("pNFS: invalid designator "
+ "(code set %d, type %d, len %d.\n",
+ v->scsi.code_set,
+ v->scsi.designator_type,
+ v->scsi.designator_len);
+ return false;
+ }
+}
+
+static int
+bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
+ struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+{
+ struct pnfs_block_volume *v = &volumes[idx];
+ const struct pr_ops *ops;
+ const char *devname;
+ int error;
+
+ if (!bl_validate_designator(v))
+ return -EINVAL;
+
+ switch (v->scsi.designator_len) {
+ case 8:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
+ v->scsi.designator);
+ break;
+ case 12:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
+ v->scsi.designator);
+ break;
+ case 16:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
+ v->scsi.designator);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
+ if (IS_ERR(d->bdev)) {
+ pr_warn("pNFS: failed to open device %s (%ld)\n",
+ devname, PTR_ERR(d->bdev));
+ kfree(devname);
+ return PTR_ERR(d->bdev);
+ }
+
+ kfree(devname);
+
+ d->len = i_size_read(d->bdev->bd_inode);
+ d->map = bl_map_simple;
+ d->pr_key = v->scsi.pr_key;
+
+ pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
+ d->bdev->bd_disk->disk_name, d->pr_key);
+
+ ops = d->bdev->bd_disk->fops->pr_ops;
+ if (!ops) {
+ pr_err("pNFS: block device %s does not support reservations.",
+ d->bdev->bd_disk->disk_name);
+ error = -EINVAL;
+ goto out_blkdev_put;
+ }
+
+ error = ops->pr_register(d->bdev, 0, d->pr_key, true);
+ if (error) {
+ pr_err("pNFS: failed to register key for block device %s.",
+ d->bdev->bd_disk->disk_name);
+ goto out_blkdev_put;
+ }
+
+ d->pr_registered = true;
+ return 0;
+
+out_blkdev_put:
+ blkdev_put(d->bdev, FMODE_READ);
+ return error;
+}
+
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_concat(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+ case PNFS_BLOCK_VOLUME_SCSI:
+ return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
dprintk("unsupported volume type: %d\n", volumes[idx].type);
return -EIO;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 35ab51c04814..720b3ff55fa9 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
return err;
}
-static size_t ext_tree_layoutupdate_size(size_t count)
+static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
{
- return sizeof(__be32) /* number of entries */ +
- PNFS_BLOCK_EXTENT_SIZE * count;
+ if (bl->bl_scsi_layout)
+ return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
+ else
+ return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
}
static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
}
}
+static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
+{
+ p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
+ NFS4_DEVICEID4_SIZE);
+ p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+ p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+ p = xdr_encode_hyper(p, 0LL);
+ *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
+ return p;
+}
+
+static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
+{
+ p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+ return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+}
+
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count)
{
@@ -496,19 +515,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
continue;
(*count)++;
- if (ext_tree_layoutupdate_size(*count) > buffer_size) {
+ if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
/* keep counting.. */
ret = -ENOSPC;
continue;
}
- p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
- NFS4_DEVICEID4_SIZE);
- p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
- p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
- p = xdr_encode_hyper(p, 0LL);
- *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
-
+ if (bl->bl_scsi_layout)
+ p = encode_scsi_range(be, p);
+ else
+ p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
}
spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@ retry:
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(count);
+ buffer_size = ext_tree_layoutupdate_size(bl, count);
count = 0;
arg->layoutupdate_pages =
@@ -556,7 +572,7 @@ retry:
}
*start_p = cpu_to_be32(count);
- arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
+ arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
void *p = start_p, *end = p + arg->layoutupdate_len;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index dbe5839cdeba..9fb067a6f7e0 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -281,7 +281,7 @@ out:
return ret;
}
-void __exit bl_cleanup_pipefs(void)
+void bl_cleanup_pipefs(void)
{
rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
unregister_pernet_subsys(&nfs4blocklayout_net_ops);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index a0b77fc1bd39..c9f583d7bac8 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -84,12 +84,30 @@ config NFSD_V4
If unsure, say N.
config NFSD_PNFS
- bool "NFSv4.1 server support for Parallel NFS (pNFS)"
- depends on NFSD_V4
+ bool
+
+config NFSD_BLOCKLAYOUT
+ bool "NFSv4.1 server support for pNFS block layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
+ help
+ This option enables support for the exporting pNFS block layouts
+ in the kernel's NFS server. The pNFS block layout enables NFS
+ clients to directly perform I/O to block devices accesible to both
+ the server and the clients. See RFC 5663 for more details.
+
+ If unsure, say N.
+
+config NFSD_SCSILAYOUT
+ bool "NFSv4.1 server support for pNFS SCSI layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
help
- This option enables support for the parallel NFS features of the
- minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
- server.
+ This option enables support for the exporting pNFS SCSI layouts
+ in the kernel's NFS server. The pNFS SCSI layout enables NFS
+ clients to directly perform I/O to SCSI devices accesible to both
+ the server and the clients. See draft-ietf-nfsv4-scsi-layout for
+ more details.
If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e120c6..3ae5f3c77e28 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d9421bd5e..e55b5242614d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -1,11 +1,14 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/exportfs.h>
#include <linux/genhd.h>
#include <linux/slab.h>
+#include <linux/pr.h>
#include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
#include "blocklayoutxdr.h"
#include "pnfs.h"
@@ -13,37 +16,6 @@
#define NFSDDBG_FACILITY NFSDDBG_PNFS
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- struct pnfs_block_deviceaddr *dev;
- struct pnfs_block_volume *b;
-
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
- gdp->gd_device = dev;
-
- dev->nr_volumes = 1;
- b = &dev->volumes[0];
-
- b->type = PNFS_BLOCK_VOLUME_SIMPLE;
- b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
- return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
- &b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- if (sb->s_bdev != sb->s_bdev->bd_contains)
- return nfserr_inval;
- return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
static __be32
nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@ out_layoutunavailable:
}
static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
- struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+ struct iomap *iomaps, int nr_iomaps)
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct iomap *iomaps;
- int nr_iomaps;
int error;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
-
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
return nfserrno(error);
}
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+ b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+ return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+ &b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
const struct nfsd4_layout_ops bl_layout_ops = {
/*
* Pretend that we send notification to the client. This is a blatant
@@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = {
.encode_layoutget = nfsd4_block_encode_layoutget,
.proc_layoutcommit = nfsd4_block_proc_layoutcommit,
};
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+ struct pnfs_block_volume *b)
+{
+ struct request_queue *q = bdev->bd_disk->queue;
+ struct request *rq;
+ size_t bufflen = 252, len, id_len;
+ u8 *buf, *d, type, assoc;
+ int error;
+
+ buf = kzalloc(bufflen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ rq = blk_get_request(q, READ, GFP_KERNEL);
+ if (IS_ERR(rq)) {
+ error = -ENOMEM;
+ goto out_free_buf;
+ }
+ blk_rq_set_block_pc(rq);
+
+ error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+ if (error)
+ goto out_put_request;
+
+ rq->cmd[0] = INQUIRY;
+ rq->cmd[1] = 1;
+ rq->cmd[2] = 0x83;
+ rq->cmd[3] = bufflen >> 8;
+ rq->cmd[4] = bufflen & 0xff;
+ rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+ error = blk_execute_rq(rq->q, NULL, rq, 1);
+ if (error) {
+ pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+ rq->errors);
+ goto out_put_request;
+ }
+
+ len = (buf[2] << 8) + buf[3] + 4;
+ if (len > bufflen) {
+ pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+ len);
+ goto out_put_request;
+ }
+
+ d = buf + 4;
+ for (d = buf + 4; d < buf + len; d += id_len + 4) {
+ id_len = d[3];
+ type = d[1] & 0xf;
+ assoc = (d[1] >> 4) & 0x3;
+
+ /*
+ * We only care about a EUI-64 and NAA designator types
+ * with LU association.
+ */
+ if (assoc != 0x00)
+ continue;
+ if (type != 0x02 && type != 0x03)
+ continue;
+ if (id_len != 8 && id_len != 12 && id_len != 16)
+ continue;
+
+ b->scsi.code_set = PS_CODE_SET_BINARY;
+ b->scsi.designator_type = type == 0x02 ?
+ PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+ b->scsi.designator_len = id_len;
+ memcpy(b->scsi.designator, d + 4, id_len);
+
+ /*
+ * If we found a 8 or 12 byte descriptor continue on to
+ * see if a 16 byte one is available. If we find a
+ * 16 byte descriptor we're done.
+ */
+ if (id_len == 16)
+ break;
+ }
+
+out_put_request:
+ blk_put_request(rq);
+out_free_buf:
+ kfree(buf);
+ return error;
+}
+
+#define NFSD_MDS_PR_KEY 0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+ return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+ const struct pr_ops *ops;
+ int error;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SCSI;
+ b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+ error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+ if (error)
+ return error;
+
+ ops = sb->s_bdev->bd_disk->fops->pr_ops;
+ if (!ops) {
+ pr_err("pNFS: device %s does not support PRs.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+ if (error) {
+ pr_err("pNFS: failed to register key for device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+ PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+ if (error) {
+ pr_err("pNFS: failed to reserve device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+ struct nfs4_client *clp = ls->ls_stid.sc_client;
+ struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+ bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+ nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+ .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
+ .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
+ .proc_layoutget = nfsd4_block_proc_layoutget,
+ .encode_layoutget = nfsd4_block_encode_layoutget,
+ .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
+ .fence_client = nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 6d834dc9bbc8..6c3b316f932e 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/exportfs.h>
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_encode_hyper(p, b->simple.offset);
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break;
+ case PNFS_BLOCK_VOLUME_SCSI:
+ len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+ p = xdr_reserve_space(xdr, len);
+ if (!p)
+ return -ETOOSMALL;
+
+ *p++ = cpu_to_be32(b->type);
+ *p++ = cpu_to_be32(b->scsi.code_set);
+ *p++ = cpu_to_be32(b->scsi.designator_type);
+ p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+ p = xdr_encode_hyper(p, b->scsi.pr_key);
+ break;
default:
return -ENOTSUPP;
}
@@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, expected, i;
+ u32 nr_iomaps, i;
if (len < sizeof(u32)) {
dprintk("%s: extent array too small: %u\n", __func__, len);
return -EINVAL;
}
+ len -= sizeof(u32);
+ if (len % PNFS_BLOCK_EXTENT_SIZE) {
+ dprintk("%s: extent array invalid: %u\n", __func__, len);
+ return -EINVAL;
+ }
nr_iomaps = be32_to_cpup(p++);
- expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
- if (len != expected) {
+ if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, expected);
+ __func__, len, nr_iomaps);
return -EINVAL;
}
@@ -155,3 +171,54 @@ fail:
kfree(iomaps);
return -EINVAL;
}
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size)
+{
+ struct iomap *iomaps;
+ u32 nr_iomaps, expected, i;
+
+ if (len < sizeof(u32)) {
+ dprintk("%s: extent array too small: %u\n", __func__, len);
+ return -EINVAL;
+ }
+
+ nr_iomaps = be32_to_cpup(p++);
+ expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+ if (len != expected) {
+ dprintk("%s: extent array size mismatch: %u/%u\n",
+ __func__, len, expected);
+ return -EINVAL;
+ }
+
+ iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+ if (!iomaps) {
+ dprintk("%s: failed to allocate extent array\n", __func__);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nr_iomaps; i++) {
+ u64 val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].offset = val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].length = val;
+ }
+
+ *iomapp = iomaps;
+ return nr_iomaps;
+fail:
+ kfree(iomaps);
+ return -EINVAL;
+}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 6de925fe8499..397bc7563a49 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -15,6 +15,11 @@ struct pnfs_block_extent {
enum pnfs_block_extent_state es;
};
+struct pnfs_block_range {
+ u64 foff;
+ u64 len;
+};
+
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
* Not actually limited by the protocol.
@@ -29,6 +34,13 @@ struct pnfs_block_volume {
u32 sig_len;
u8 sig[PNFS_BLOCK_UUID_LEN];
} simple;
+ struct {
+ enum scsi_code_set code_set;
+ enum scsi_designator_type designator_type;
+ int designator_len;
+ u8 designator[256];
+ u64 pr_key;
+ } scsi;
};
};
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp);
int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7f785c..51c3b06e8036 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
{
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned long cnt = min(argp->count, max_blocksize);
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = min(argp->count, max_blocksize);
+ resp->count = cnt;
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
-
- resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+ inode->i_size);
}
RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ce2d010d3b17..825c7bc8d789 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
+#include <linux/blkdev.h>
#include <linux/kmod.h>
#include <linux/file.h>
#include <linux/jhash.h>
@@ -26,7 +27,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
[LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ [LAYOUT_SCSI] = &scsi_layout_ops,
+#endif
};
/* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
+ /*
+ * Check if the file system supports exporting a block-like layout.
+ * If the block device supports reservations prefer the SCSI layout,
+ * otherwise advertise the block layout.
+ */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ /* overwrite block layout selection if needed */
+ if (sb->s_export_op->map_blocks &&
+ sb->s_export_op->commit_blocks &&
+ sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+ exp->ex_layout_type = LAYOUT_SCSI;
+#endif
}
static void
@@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
- trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
printk(KERN_WARNING
"nfsd: client %s failed to respond to layout recall. "
" Fencing..\n", addr_str);
@@ -626,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
container_of(cb, struct nfs4_layout_stateid, ls_recall);
struct nfsd_net *nn;
ktime_t now, cutoff;
+ const struct nfsd4_layout_ops *ops;
LIST_HEAD(reaplist);
@@ -661,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/*
* Unknown error or non-responding client, we'll need to fence.
*/
- nfsd4_cb_layout_fail(ls);
+ trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls);
+ else
+ nfsd4_cb_layout_fail(ls);
return -1;
}
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4cba7865f496..de1ff1d98bb1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -864,12 +864,10 @@ static __be32
nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_secinfo *secinfo)
{
- struct svc_fh resfh;
struct svc_export *exp;
struct dentry *dentry;
__be32 err;
- fh_init(&resfh, NFS4_FHSIZE);
err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
@@ -878,6 +876,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&exp, &dentry);
if (err)
return err;
+ fh_unlock(&cstate->current_fh);
if (d_really_is_negative(dentry)) {
exp_put(exp);
err = nfserr_noent;
@@ -1269,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
goto out;
nfserr = nfs_ok;
- if (gdp->gd_maxcount != 0)
- nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+ if (gdp->gd_maxcount != 0) {
+ nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+ cstate->session->se_client, gdp);
+ }
gdp->gd_notify_types &= ops->notify_types;
out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 195fe2668207..66eaeb1e8c2c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1266,6 +1266,7 @@ nfsd4_umh_cltrack_init(struct net *net)
/* XXX: The usermode helper s not working in container yet. */
if (net != &init_net) {
pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
+ kfree(grace_start);
return -EINVAL;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c484a2b6cd10..0462eeddfff9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2408,7 +2408,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
default: /* checked by xdr code */
WARN_ON_ONCE(1);
case SP4_SSV:
- return nfserr_encr_alg_unsupp;
+ status = nfserr_encr_alg_unsupp;
+ goto out_nolock;
}
/* Cases below refer to rfc 5661 section 18.35.4: */
@@ -2586,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
return nfs_ok;
}
+/*
+ * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
+ * These are based on similar macros in linux/sunrpc/msg_prot.h .
+ */
+#define RPC_MAX_HEADER_WITH_AUTH_SYS \
+ (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
+
+#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
+ (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
+
#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \
- RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \
- RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
+ sizeof(__be32))
static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
{
ca->headerpadsz = 0;
- /*
- * These RPC_MAX_HEADER macros are overkill, especially since we
- * don't even do gss on the backchannel yet. But this is still
- * less than 1k. Tighten up this estimate in the unlikely event
- * it turns out to be a problem for some client:
- */
if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
return nfserr_toosmall;
if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
@@ -2710,10 +2716,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_conn;
}
status = nfs_ok;
- /*
- * We do not support RDMA or persistent sessions
- */
+ /* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
+ /* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
init_session(rqstp, new, conf, cr_ses);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef0955a979..9df898ba648f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
READ_BUF(4);
rename->rn_snamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_snamelen + 4);
+ READ_BUF(rename->rn_snamelen);
SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+ READ_BUF(4);
rename->rn_tnamelen = be32_to_cpup(p++);
READ_BUF(rename->rn_tnamelen);
SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
@@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
READ_BUF(8);
setclientid->se_callback_prog = be32_to_cpup(p++);
setclientid->se_callback_netid_len = be32_to_cpup(p++);
-
- READ_BUF(setclientid->se_callback_netid_len + 4);
+ READ_BUF(setclientid->se_callback_netid_len);
SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+ READ_BUF(4);
setclientid->se_callback_addr_len = be32_to_cpup(p++);
- READ_BUF(setclientid->se_callback_addr_len + 4);
+ READ_BUF(setclientid->se_callback_addr_len);
SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+ READ_BUF(4);
setclientid->se_callback_ident = be32_to_cpup(p++);
DECODE_TAIL;
@@ -1835,8 +1837,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
READ_BUF(4);
argp->taglen = be32_to_cpup(p++);
- READ_BUF(argp->taglen + 8);
+ READ_BUF(argp->taglen);
SAVEMEM(argp->tag, argp->taglen);
+ READ_BUF(8);
argp->minorversion = be32_to_cpup(p++);
argp->opcnt = be32_to_cpup(p++);
max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
@@ -3060,7 +3063,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
NFS4_MAX_SESSIONID_LEN);
*p++ = cpu_to_be32(bcts->dir);
- /* Sorry, we do not yet support RDMA over 4.1: */
+ /* Upshifting from TCP to RDMA is not supported */
*p++ = cpu_to_be32(0);
}
return nfserr;
@@ -3362,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
+ long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3370,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;
+ len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
read->rd_offset, &maxcount);
if (nfserr) {
@@ -3382,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3453,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;
+ len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index d4c4453674c6..7d073b9b1553 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
u32 notify_types;
__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+ struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
+
+ void (*fence_client)(struct nfs4_layout_stateid *ls);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba316f23f..2d573ec057f8 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -139,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
+static inline bool nfsd_eof_on_read(long requested, long read,
+ loff_t offset, loff_t size)
+{
+ /* We assume a short read means eof: */
+ if (requested > read)
+ return true;
+ /*
+ * A non-short read might also reach end of file. The spec
+ * still requires us to set eof in that case.
+ *
+ * Further operations may have modified the file size since
+ * the read, so the following check is not atomic with the read.
+ * We've only seen that cause a problem for a client in the case
+ * where the read returned a count of 0 without setting eof.
+ * That case was fixed by the addition of the above check.
+ */
+ return (offset + read >= size);
+}
+
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index ce210d4951a1..e27e6527912b 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -41,7 +41,8 @@ ocfs2-objs := \
quota_local.o \
quota_global.o \
xattr.o \
- acl.o
+ acl.o \
+ filecheck.o
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
new file mode 100644
index 000000000000..2cabbcf2f28e
--- /dev/null
+++ b/fs/ocfs2/filecheck.c
@@ -0,0 +1,606 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.c
+ *
+ * Code which implements online file check.
+ *
+ * Copyright (C) 2016 SuSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/sysctl.h>
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "ocfs2_fs.h"
+#include "stackglue.h"
+#include "inode.h"
+
+#include "filecheck.h"
+
+
+/* File check error strings,
+ * must correspond with error number in header file.
+ */
+static const char * const ocfs2_filecheck_errs[] = {
+ "SUCCESS",
+ "FAILED",
+ "INPROGRESS",
+ "READONLY",
+ "INJBD",
+ "INVALIDINO",
+ "BLOCKECC",
+ "BLOCKNO",
+ "VALIDFLAG",
+ "GENERATION",
+ "UNSUPPORTED"
+};
+
+static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
+static LIST_HEAD(ocfs2_filecheck_sysfs_list);
+
+struct ocfs2_filecheck {
+ struct list_head fc_head; /* File check entry list head */
+ spinlock_t fc_lock;
+ unsigned int fc_max; /* Maximum number of entry in list */
+ unsigned int fc_size; /* Current entry count in list */
+ unsigned int fc_done; /* Finished entry count in list */
+};
+
+struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per mounting */
+ struct list_head fs_list;
+ atomic_t fs_count;
+ struct super_block *fs_sb;
+ struct kset *fs_devicekset;
+ struct kset *fs_fcheckkset;
+ struct ocfs2_filecheck *fs_fcheck;
+};
+
+#define OCFS2_FILECHECK_MAXSIZE 100
+#define OCFS2_FILECHECK_MINSIZE 10
+
+/* File check operation type */
+enum {
+ OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */
+ OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */
+ OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */
+};
+
+struct ocfs2_filecheck_entry {
+ struct list_head fe_list;
+ unsigned long fe_ino;
+ unsigned int fe_type;
+ unsigned int fe_done:1;
+ unsigned int fe_status:31;
+};
+
+struct ocfs2_filecheck_args {
+ unsigned int fa_type;
+ union {
+ unsigned long fa_ino;
+ unsigned int fa_len;
+ };
+};
+
+static const char *
+ocfs2_filecheck_error(int errno)
+{
+ if (!errno)
+ return ocfs2_filecheck_errs[errno];
+
+ BUG_ON(errno < OCFS2_FILECHECK_ERR_START ||
+ errno > OCFS2_FILECHECK_ERR_END);
+ return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf);
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count);
+static struct kobj_attribute ocfs2_attr_filecheck_chk =
+ __ATTR(check, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_fix =
+ __ATTR(fix, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_set =
+ __ATTR(set, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+
+static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
+{
+ schedule();
+ return 0;
+}
+
+static void
+ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ struct ocfs2_filecheck_entry *p;
+
+ if (!atomic_dec_and_test(&entry->fs_count))
+ wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait,
+ TASK_UNINTERRUPTIBLE);
+
+ spin_lock(&entry->fs_fcheck->fc_lock);
+ while (!list_empty(&entry->fs_fcheck->fc_head)) {
+ p = list_first_entry(&entry->fs_fcheck->fc_head,
+ struct ocfs2_filecheck_entry, fe_list);
+ list_del(&p->fe_list);
+ BUG_ON(!p->fe_done); /* To free a undone file check entry */
+ kfree(p);
+ }
+ spin_unlock(&entry->fs_fcheck->fc_lock);
+
+ kset_unregister(entry->fs_fcheckkset);
+ kset_unregister(entry->fs_devicekset);
+ kfree(entry->fs_fcheck);
+ kfree(entry);
+}
+
+static void
+ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+}
+
+static int ocfs2_filecheck_sysfs_del(const char *devname)
+{
+ struct ocfs2_filecheck_sysfs_entry *p;
+
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+ if (!strcmp(p->fs_sb->s_id, devname)) {
+ list_del(&p->fs_list);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ ocfs2_filecheck_sysfs_free(p);
+ return 0;
+ }
+ }
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return 1;
+}
+
+static void
+ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ if (atomic_dec_and_test(&entry->fs_count))
+ wake_up_atomic_t(&entry->fs_count);
+}
+
+static struct ocfs2_filecheck_sysfs_entry *
+ocfs2_filecheck_sysfs_get(const char *devname)
+{
+ struct ocfs2_filecheck_sysfs_entry *p = NULL;
+
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+ if (!strcmp(p->fs_sb->s_id, devname)) {
+ atomic_inc(&p->fs_count);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return p;
+ }
+ }
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return NULL;
+}
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb)
+{
+ int ret = 0;
+ struct kset *device_kset = NULL;
+ struct kset *fcheck_kset = NULL;
+ struct ocfs2_filecheck *fcheck = NULL;
+ struct ocfs2_filecheck_sysfs_entry *entry = NULL;
+ struct attribute **attrs = NULL;
+ struct attribute_group attrgp;
+
+ if (!ocfs2_kset)
+ return -ENOMEM;
+
+ attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
+ if (!attrs) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ attrs[0] = &ocfs2_attr_filecheck_chk.attr;
+ attrs[1] = &ocfs2_attr_filecheck_fix.attr;
+ attrs[2] = &ocfs2_attr_filecheck_set.attr;
+ attrs[3] = NULL;
+ memset(&attrgp, 0, sizeof(attrgp));
+ attrgp.attrs = attrs;
+ }
+
+ fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
+ if (!fcheck) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ INIT_LIST_HEAD(&fcheck->fc_head);
+ spin_lock_init(&fcheck->fc_lock);
+ fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
+ fcheck->fc_size = 0;
+ fcheck->fc_done = 0;
+ }
+
+ if (strlen(sb->s_id) <= 0) {
+ mlog(ML_ERROR,
+ "Cannot get device basename when create filecheck sysfs\n");
+ ret = -ENODEV;
+ goto error;
+ }
+
+ device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
+ if (!device_kset) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ fcheck_kset = kset_create_and_add("filecheck", NULL,
+ &device_kset->kobj);
+ if (!fcheck_kset) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
+ if (ret)
+ goto error;
+
+ entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ atomic_set(&entry->fs_count, 1);
+ entry->fs_sb = sb;
+ entry->fs_devicekset = device_kset;
+ entry->fs_fcheckkset = fcheck_kset;
+ entry->fs_fcheck = fcheck;
+ ocfs2_filecheck_sysfs_add(entry);
+ }
+
+ kfree(attrs);
+ return 0;
+
+error:
+ kfree(attrs);
+ kfree(entry);
+ kfree(fcheck);
+ kset_unregister(fcheck_kset);
+ kset_unregister(device_kset);
+ return ret;
+}
+
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+{
+ return ocfs2_filecheck_sysfs_del(sb->s_id);
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int count);
+static int
+ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int len)
+{
+ int ret;
+
+ if ((len < OCFS2_FILECHECK_MINSIZE) || (len > OCFS2_FILECHECK_MAXSIZE))
+ return -EINVAL;
+
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
+ mlog(ML_ERROR,
+ "Cannot set online file check maximum entry number "
+ "to %u due to too many pending entries(%u)\n",
+ len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
+ ret = -EBUSY;
+ } else {
+ if (len < ent->fs_fcheck->fc_size)
+ BUG_ON(!ocfs2_filecheck_erase_entries(ent,
+ ent->fs_fcheck->fc_size - len));
+
+ ent->fs_fcheck->fc_max = len;
+ ret = 0;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+ return ret;
+}
+
+#define OCFS2_FILECHECK_ARGS_LEN 24
+static int
+ocfs2_filecheck_args_get_long(const char *buf, size_t count,
+ unsigned long *val)
+{
+ char buffer[OCFS2_FILECHECK_ARGS_LEN];
+
+ memcpy(buffer, buf, count);
+ buffer[count] = '\0';
+
+ if (kstrtoul(buffer, 0, val))
+ return 1;
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_type_parse(const char *name, unsigned int *type)
+{
+ if (!strncmp(name, "fix", 4))
+ *type = OCFS2_FILECHECK_TYPE_FIX;
+ else if (!strncmp(name, "check", 6))
+ *type = OCFS2_FILECHECK_TYPE_CHK;
+ else if (!strncmp(name, "set", 4))
+ *type = OCFS2_FILECHECK_TYPE_SET;
+ else
+ return 1;
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
+ struct ocfs2_filecheck_args *args)
+{
+ unsigned long val = 0;
+ unsigned int type;
+
+ /* too short/long args length */
+ if ((count < 1) || (count >= OCFS2_FILECHECK_ARGS_LEN))
+ return 1;
+
+ if (ocfs2_filecheck_type_parse(name, &type))
+ return 1;
+ if (ocfs2_filecheck_args_get_long(buf, count, &val))
+ return 1;
+
+ if (val <= 0)
+ return 1;
+
+ args->fa_type = type;
+ if (type == OCFS2_FILECHECK_TYPE_SET)
+ args->fa_len = (unsigned int)val;
+ else
+ args->fa_ino = val;
+
+ return 0;
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+
+ ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
+ unsigned int type;
+ struct ocfs2_filecheck_entry *p;
+ struct ocfs2_filecheck_sysfs_entry *ent;
+
+ if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
+ return -EINVAL;
+
+ ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+ if (!ent) {
+ mlog(ML_ERROR,
+ "Cannot get the corresponding entry via device basename %s\n",
+ kobj->name);
+ return -ENODEV;
+ }
+
+ if (type == OCFS2_FILECHECK_TYPE_SET) {
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+ goto exit;
+ }
+
+ ret = snprintf(buf, remain, "INO\t\tDONE\tERROR\n");
+ total += ret;
+ remain -= ret;
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+ if (p->fe_type != type)
+ continue;
+
+ ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
+ p->fe_ino, p->fe_done,
+ ocfs2_filecheck_error(p->fe_status));
+ if (ret < 0) {
+ total = ret;
+ break;
+ }
+ if (ret == remain) {
+ /* snprintf() didn't fit */
+ total = -E2BIG;
+ break;
+ }
+ total += ret;
+ remain -= ret;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+exit:
+ ocfs2_filecheck_sysfs_put(ent);
+ return total;
+}
+
+static int
+ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
+{
+ struct ocfs2_filecheck_entry *p;
+
+ list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+ if (p->fe_done) {
+ list_del(&p->fe_list);
+ kfree(p);
+ ent->fs_fcheck->fc_size--;
+ ent->fs_fcheck->fc_done--;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int count)
+{
+ unsigned int i = 0;
+ unsigned int ret = 0;
+
+ while (i++ < count) {
+ if (ocfs2_filecheck_erase_entry(ent))
+ ret++;
+ else
+ break;
+ }
+
+ return (ret == count ? 1 : 0);
+}
+
+static void
+ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+ struct ocfs2_filecheck_entry *entry)
+{
+ entry->fe_done = 1;
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ ent->fs_fcheck->fc_done++;
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+}
+
+static unsigned int
+ocfs2_filecheck_handle(struct super_block *sb,
+ unsigned long ino, unsigned int flags)
+{
+ unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
+ struct inode *inode = NULL;
+ int rc;
+
+ inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+ if (IS_ERR(inode)) {
+ rc = (int)(-(long)inode);
+ if (rc >= OCFS2_FILECHECK_ERR_START &&
+ rc < OCFS2_FILECHECK_ERR_END)
+ ret = rc;
+ else
+ ret = OCFS2_FILECHECK_ERR_FAILED;
+ } else
+ iput(inode);
+
+ return ret;
+}
+
+static void
+ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+ struct ocfs2_filecheck_entry *entry)
+{
+ if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
+ entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
+ else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
+ entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
+ else
+ entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
+
+ ocfs2_filecheck_done_entry(ent, entry);
+}
+
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ocfs2_filecheck_args args;
+ struct ocfs2_filecheck_entry *entry;
+ struct ocfs2_filecheck_sysfs_entry *ent;
+ ssize_t ret = 0;
+
+ if (count == 0)
+ return count;
+
+ if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
+ mlog(ML_ERROR, "Invalid arguments for online file check\n");
+ return -EINVAL;
+ }
+
+ ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+ if (!ent) {
+ mlog(ML_ERROR,
+ "Cannot get the corresponding entry via device basename %s\n",
+ kobj->parent->name);
+ return -ENODEV;
+ }
+
+ if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
+ ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
+ goto exit;
+ }
+
+ entry = kmalloc(sizeof(struct ocfs2_filecheck_entry), GFP_NOFS);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+ (ent->fs_fcheck->fc_done == 0)) {
+ mlog(ML_ERROR,
+ "Cannot do more file check "
+ "since file check queue(%u) is full now\n",
+ ent->fs_fcheck->fc_max);
+ ret = -EBUSY;
+ kfree(entry);
+ } else {
+ if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+ (ent->fs_fcheck->fc_done > 0)) {
+ /* Delete the oldest entry which was done,
+ * make sure the entry size in list does
+ * not exceed maximum value
+ */
+ BUG_ON(!ocfs2_filecheck_erase_entry(ent));
+ }
+
+ entry->fe_ino = args.fa_ino;
+ entry->fe_type = args.fa_type;
+ entry->fe_done = 0;
+ entry->fe_status = OCFS2_FILECHECK_ERR_INPROGRESS;
+ list_add_tail(&entry->fe_list, &ent->fs_fcheck->fc_head);
+ ent->fs_fcheck->fc_size++;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+ if (!ret)
+ ocfs2_filecheck_handle_entry(ent, entry);
+
+exit:
+ ocfs2_filecheck_sysfs_put(ent);
+ return (!ret ? count : ret);
+}
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
new file mode 100644
index 000000000000..e5cd002a2c09
--- /dev/null
+++ b/fs/ocfs2/filecheck.h
@@ -0,0 +1,49 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.h
+ *
+ * Online file check.
+ *
+ * Copyright (C) 2016 SuSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+
+#ifndef FILECHECK_H
+#define FILECHECK_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+
+/* File check errno */
+enum {
+ OCFS2_FILECHECK_ERR_SUCCESS = 0, /* Success */
+ OCFS2_FILECHECK_ERR_FAILED = 1000, /* Other failure */
+ OCFS2_FILECHECK_ERR_INPROGRESS, /* In progress */
+ OCFS2_FILECHECK_ERR_READONLY, /* Read only */
+ OCFS2_FILECHECK_ERR_INJBD, /* Buffer in jbd */
+ OCFS2_FILECHECK_ERR_INVALIDINO, /* Invalid ino */
+ OCFS2_FILECHECK_ERR_BLOCKECC, /* Block ecc */
+ OCFS2_FILECHECK_ERR_BLOCKNO, /* Block number */
+ OCFS2_FILECHECK_ERR_VALIDFLAG, /* Inode valid flag */
+ OCFS2_FILECHECK_ERR_GENERATION, /* Inode generation */
+ OCFS2_FILECHECK_ERR_UNSUPPORTED /* Unsupported */
+};
+
+#define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED
+#define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb);
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+
+#endif /* FILECHECK_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 36294446d960..ba495beff1c2 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
#include "xattr.h"
#include "refcounttree.h"
#include "ocfs2_trace.h"
+#include "filecheck.h"
#include "buffer_head_io.h"
@@ -74,6 +75,14 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh);
+static int ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+ struct buffer_head **bh,
+ int flags, int type);
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+
void ocfs2_set_inode_flags(struct inode *inode)
{
unsigned int flags = OCFS2_I(inode)->ip_attr;
@@ -127,6 +136,7 @@ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type)
{
+ int rc = 0;
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
@@ -161,12 +171,17 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
}
trace_ocfs2_iget5_locked(inode->i_state);
if (inode->i_state & I_NEW) {
- ocfs2_read_locked_inode(inode, &args);
+ rc = ocfs2_read_locked_inode(inode, &args);
unlock_new_inode(inode);
}
if (is_bad_inode(inode)) {
iput(inode);
- inode = ERR_PTR(-ESTALE);
+ if ((flags & OCFS2_FI_FLAG_FILECHECK_CHK) ||
+ (flags & OCFS2_FI_FLAG_FILECHECK_FIX))
+ /* Return OCFS2_FILECHECK_ERR_XXX related errno */
+ inode = ERR_PTR(rc);
+ else
+ inode = ERR_PTR(-ESTALE);
goto bail;
}
@@ -410,7 +425,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
struct ocfs2_super *osb;
struct ocfs2_dinode *fe;
struct buffer_head *bh = NULL;
- int status, can_lock;
+ int status, can_lock, lock_level = 0;
u32 generation = 0;
status = -EINVAL;
@@ -478,7 +493,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
mlog_errno(status);
return status;
}
- status = ocfs2_inode_lock(inode, NULL, 0);
+ status = ocfs2_inode_lock(inode, NULL, lock_level);
if (status) {
make_bad_inode(inode);
mlog_errno(status);
@@ -495,16 +510,32 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
if (can_lock) {
- status = ocfs2_read_inode_block_full(inode, &bh,
- OCFS2_BH_IGNORE_CACHE);
+ if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+ status = ocfs2_filecheck_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE, 0);
+ else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+ status = ocfs2_filecheck_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE, 1);
+ else
+ status = ocfs2_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
/*
* If buffer is in jbd, then its checksum may not have been
* computed as yet.
*/
- if (!status && !buffer_jbd(bh))
- status = ocfs2_validate_inode_block(osb->sb, bh);
+ if (!status && !buffer_jbd(bh)) {
+ if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+ status = ocfs2_filecheck_validate_inode_block(
+ osb->sb, bh);
+ else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+ status = ocfs2_filecheck_repair_inode_block(
+ osb->sb, bh);
+ else
+ status = ocfs2_validate_inode_block(
+ osb->sb, bh);
+ }
}
if (status < 0) {
mlog_errno(status);
@@ -532,11 +563,24 @@ static int ocfs2_read_locked_inode(struct inode *inode,
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
+ if (buffer_dirty(bh) && !buffer_jbd(bh)) {
+ if (can_lock) {
+ ocfs2_inode_unlock(inode, lock_level);
+ lock_level = 1;
+ ocfs2_inode_lock(inode, NULL, lock_level);
+ }
+ status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = 0;
bail:
if (can_lock)
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock(inode, lock_level);
if (status < 0)
make_bad_inode(inode);
@@ -1397,6 +1441,169 @@ bail:
return rc;
}
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int rc = 0;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ trace_ocfs2_filecheck_validate_inode_block(
+ (unsigned long long)bh->b_blocknr);
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ /*
+ * Call ocfs2_validate_meta_ecc() first since it has ecc repair
+ * function, but we should not return error immediately when ecc
+ * validation fails, because the reason is quite likely the invalid
+ * inode number inputed.
+ */
+ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
+ if (rc) {
+ mlog(ML_ERROR,
+ "Filecheck: checksum failed for dinode %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ rc = -OCFS2_FILECHECK_ERR_BLOCKECC;
+ }
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7, di->i_signature);
+ rc = -OCFS2_FILECHECK_ERR_INVALIDINO;
+ goto bail;
+ } else if (rc)
+ goto bail;
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: i_blkno is %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ rc = -OCFS2_FILECHECK_ERR_BLOCKNO;
+ goto bail;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: OCFS2_VALID_FL "
+ "not set\n",
+ (unsigned long long)bh->b_blocknr);
+ rc = -OCFS2_FILECHECK_ERR_VALIDFLAG;
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: fs_generation is %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ rc = -OCFS2_FILECHECK_ERR_GENERATION;
+ goto bail;
+ }
+
+bail:
+ return rc;
+}
+
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int changed = 0;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ if (!ocfs2_filecheck_validate_inode_block(sb, bh))
+ return 0;
+
+ trace_ocfs2_filecheck_repair_inode_block(
+ (unsigned long long)bh->b_blocknr);
+
+ if (ocfs2_is_hard_readonly(OCFS2_SB(sb)) ||
+ ocfs2_is_soft_readonly(OCFS2_SB(sb))) {
+ mlog(ML_ERROR,
+ "Filecheck: cannot repair dinode #%llu "
+ "on readonly filesystem\n",
+ (unsigned long long)bh->b_blocknr);
+ return -OCFS2_FILECHECK_ERR_READONLY;
+ }
+
+ if (buffer_jbd(bh)) {
+ mlog(ML_ERROR,
+ "Filecheck: cannot repair dinode #%llu, "
+ "its buffer is in jbd\n",
+ (unsigned long long)bh->b_blocknr);
+ return -OCFS2_FILECHECK_ERR_INJBD;
+ }
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ /* Cannot fix invalid inode block */
+ return -OCFS2_FILECHECK_ERR_INVALIDINO;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ /* Cannot just add VALID_FL flag back as a fix,
+ * need more things to check here.
+ */
+ return -OCFS2_FILECHECK_ERR_VALIDFLAG;
+ }
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ di->i_blkno = cpu_to_le64(bh->b_blocknr);
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: i_blkno to %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ di->i_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: fs_generation to %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ }
+
+ if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
+ ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
+ mark_buffer_dirty(bh);
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: compute meta ecc\n",
+ (unsigned long long)bh->b_blocknr);
+ }
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+ struct buffer_head **bh,
+ int flags, int type)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ if (!type) /* Check inode block */
+ rc = ocfs2_read_blocks(INODE_CACHE(inode),
+ OCFS2_I(inode)->ip_blkno,
+ 1, &tmp, flags,
+ ocfs2_filecheck_validate_inode_block);
+ else /* Repair inode block */
+ rc = ocfs2_read_blocks(INODE_CACHE(inode),
+ OCFS2_I(inode)->ip_blkno,
+ 1, &tmp, flags,
+ ocfs2_filecheck_repair_inode_block);
+
+ /* If ocfs2_read_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
int flags)
{
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index aac8b86f312e..01635e016b3e 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -139,6 +139,9 @@ int ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2
+#define OCFS2_FI_FLAG_FILECHECK_CHK 0x4
+#define OCFS2_FI_FLAG_FILECHECK_FIX 0x8
+
struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index a52a2dbc064e..24b7e7f591dc 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1540,6 +1540,8 @@ DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state);
DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_repair_inode_block);
TRACE_EVENT(ocfs2_inode_is_valid_to_delete,
TP_PROTO(void *task, void *dc_task, unsigned long long ino,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5d965e83bd43..13219ed73e1d 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -629,7 +629,8 @@ static struct attribute_group ocfs2_attr_group = {
.attrs = ocfs2_attrs,
};
-static struct kset *ocfs2_kset;
+struct kset *ocfs2_kset;
+EXPORT_SYMBOL_GPL(ocfs2_kset);
static void ocfs2_sysfs_exit(void)
{
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 66334a30cea8..f2dce10fae54 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,4 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
+extern struct kset *ocfs2_kset;
+
#endif /* STACKGLUE_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 302854ee0985..ccc9386c42c5 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -74,6 +74,7 @@
#include "suballoc.h"
#include "buffer_head_io.h"
+#include "filecheck.h"
static struct kmem_cache *ocfs2_inode_cachep;
struct kmem_cache *ocfs2_dquot_cachep;
@@ -1205,6 +1206,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
/* Start this when the mount is almost sure of being successful */
ocfs2_orphan_scan_start(osb);
+ /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
+ ocfs2_filecheck_create_sysfs(sb);
+
return status;
read_super_error:
@@ -1668,6 +1672,7 @@ static void ocfs2_put_super(struct super_block *sb)
ocfs2_sync_blockdev(sb);
ocfs2_dismount_volume(sb, 0);
+ ocfs2_filecheck_remove_sysfs(sb);
}
static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/open.c b/fs/open.c
index 55bdc75e2172..17cb6b1dab75 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -992,14 +992,12 @@ struct file *filp_open(const char *filename, int flags, umode_t mode)
EXPORT_SYMBOL(filp_open);
struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
- const char *filename, int flags)
+ const char *filename, int flags, umode_t mode)
{
struct open_flags op;
- int err = build_open_flags(flags, 0, &op);
+ int err = build_open_flags(flags, mode, &op);
if (err)
return ERR_PTR(err);
- if (flags & O_CREAT)
- return ERR_PTR(-EINVAL);
return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 2c6f0cb816b4..c54a24360f85 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -4,3 +4,4 @@ ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o
+ubifs-y += misc.o
diff --git a/fs/ubifs/misc.c b/fs/ubifs/misc.c
new file mode 100644
index 000000000000..486a2844949f
--- /dev/null
+++ b/fs/ubifs/misc.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include "ubifs.h"
+
+/* Normal UBIFS messages */
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_notice("UBIFS (ubi%d:%d): %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, &vaf);
+
+ va_end(args);
+} \
+
+/* UBIFS error messages */
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("UBIFS error (ubi%d:%d pid %d): %ps: %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, current->pid,
+ __builtin_return_address(0),
+ &vaf);
+
+ va_end(args);
+} \
+
+/* UBIFS warning messages */
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_warn("UBIFS warning (ubi%d:%d pid %d): %ps: %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, current->pid,
+ __builtin_return_address(0),
+ &vaf);
+
+ va_end(args);
+}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a5697de763f5..c2a57e193a81 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -42,30 +42,6 @@
/* Version of this UBIFS implementation */
#define UBIFS_VERSION 1
-/* Normal UBIFS messages */
-#define ubifs_msg(c, fmt, ...) \
- pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
-/* UBIFS error messages */
-#define ubifs_err(c, fmt, ...) \
- pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
- __func__, ##__VA_ARGS__)
-/* UBIFS warning messages */
-#define ubifs_warn(c, fmt, ...) \
- pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
- __func__, ##__VA_ARGS__)
-/*
- * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
- * object as an argument.
- */
-#define ubifs_errc(c, fmt, ...) \
- do { \
- if (!(c)->probing) \
- ubifs_err(c, fmt, ##__VA_ARGS__); \
- } while (0)
-
/* UBIFS file system VFS magic number */
#define UBIFS_SUPER_MAGIC 0x24051905
@@ -1802,4 +1778,21 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
#include "misc.h"
#include "key.h"
+/* Normal UBIFS messages */
+__printf(2, 3)
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
+/*
+ * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
+ * object as an argument.
+ */
+#define ubifs_errc(c, fmt, ...) \
+do { \
+ if (!(c)->probing) \
+ ubifs_err(c, fmt, ##__VA_ARGS__); \
+} while (0)
+
#endif /* !__UBIFS_H__ */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c7f4d434d098..b043e044121d 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -59,7 +59,6 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
/*
* Limit the number of extended attributes per inode so that the total size
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index f64639176670..3542d94fddce 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -121,4 +121,5 @@ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 2816d42507bc..a1b2dd828b9d 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_PNFS
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
.get_uuid = xfs_fs_get_uuid,
.map_blocks = xfs_fs_map_blocks,
.commit_blocks = xfs_fs_commit_blocks,
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index 8147ac108820..93f74853961b 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -1,7 +1,7 @@
#ifndef _XFS_PNFS_H
#define _XFS_PNFS_H 1
-#ifdef CONFIG_NFSD_PNFS
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
struct iomap *iomap, bool write, u32 *device_generation);
OpenPOWER on IntegriCloud