summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig16
-rw-r--r--fs/nfsd/Makefile3
-rw-r--r--fs/nfsd/acl.h8
-rw-r--r--fs/nfsd/blocklayout.c3
-rw-r--r--fs/nfsd/export.c13
-rw-r--r--fs/nfsd/filecache.c1093
-rw-r--r--fs/nfsd/filecache.h64
-rw-r--r--fs/nfsd/netns.h10
-rw-r--r--fs/nfsd/nfs3proc.c14
-rw-r--r--fs/nfsd/nfs3xdr.c37
-rw-r--r--fs/nfsd/nfs4callback.c150
-rw-r--r--fs/nfsd/nfs4layouts.c14
-rw-r--r--fs/nfsd/nfs4proc.c523
-rw-r--r--fs/nfsd/nfs4recover.c407
-rw-r--r--fs/nfsd/nfs4state.c514
-rw-r--r--fs/nfsd/nfs4xdr.c228
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c42
-rw-r--r--fs/nfsd/nfsd.h37
-rw-r--r--fs/nfsd/nfsfh.h9
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c70
-rw-r--r--fs/nfsd/state.h56
-rw-r--r--fs/nfsd/stats.c12
-rw-r--r--fs/nfsd/trace.h146
-rw-r--r--fs/nfsd/vfs.c470
-rw-r--r--fs/nfsd/vfs.h55
-rw-r--r--fs/nfsd/xdr3.h6
-rw-r--r--fs/nfsd/xdr4.h58
29 files changed, 3251 insertions, 821 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index d25f6bbe7006..f368f3215f88 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -3,6 +3,7 @@ config NFSD
tristate "NFS server support"
depends on INET
depends on FILE_LOCKING
+ depends on FSNOTIFY
select LOCKD
select SUNRPC
select EXPORTFS
@@ -72,7 +73,8 @@ config NFSD_V4
select NFSD_V3
select FS_POSIX_ACL
select SUNRPC_GSS
- select CRYPTO
+ select CRYPTO_MD5
+ select CRYPTO_SHA256
select GRACE_PERIOD
help
This option enables support in your system's NFS server for
@@ -132,6 +134,16 @@ config NFSD_FLEXFILELAYOUT
If unsure, say N.
+config NFSD_V4_2_INTER_SSC
+ bool "NFSv4.2 inter server to server COPY"
+ depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+ help
+ This option enables support for NFSv4.2 inter server to
+ server copy where the destination server calls the NFSv4.2
+ client to read the data to copy from the source server.
+
+ If unsure, say N.
+
config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY
@@ -147,7 +159,7 @@ config NFSD_V4_SECURITY_LABEL
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
- depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+ depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN
help
This option enables support for manually injecting faults
into the NFS server. This is intended to be used for
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 2bfb58eefad1..6a40b1afe703 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,7 +11,8 @@ obj-$(CONFIG_NFSD) += nfsd.o
nfsd-y += trace.o
nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
- export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+ export.o auth.o lockd.o nfscache.o nfsxdr.o \
+ stats.o filecache.o
nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 4cd7c69a6cb9..ba14d2f4b64f 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,14 +39,6 @@ struct nfs4_acl;
struct svc_fh;
struct svc_rqst;
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation. This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
- / sizeof(struct nfs4_ace))
-
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 66d4c55eb48e..9bbaa671c079 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -15,6 +15,7 @@
#include "blocklayoutxdr.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
@@ -404,7 +405,7 @@ static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
- struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+ struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index baa01956a5b3..15422c951fd1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
#include "nfsfh.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
@@ -232,6 +233,17 @@ static struct cache_head *expkey_alloc(void)
return NULL;
}
+static void expkey_flush(void)
+{
+ /*
+ * Take the nfsd_mutex here to ensure that the file cache is not
+ * destroyed while we're in the middle of flushing.
+ */
+ mutex_lock(&nfsd_mutex);
+ nfsd_file_cache_purge(current->nsproxy->net_ns);
+ mutex_unlock(&nfsd_mutex);
+}
+
static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
@@ -244,6 +256,7 @@ static const struct cache_detail svc_expkey_cache_template = {
.init = expkey_init,
.update = expkey_update,
.alloc = expkey_alloc,
+ .flush = expkey_flush,
};
static int
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..22e77ede9f14
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,1093 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS 12
+#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
+#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+#define NFSD_FILE_SHUTDOWN (1)
+#define NFSD_FILE_LRU_THRESHOLD (4096UL)
+#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+ struct hlist_head nfb_head;
+ spinlock_t nfb_lock;
+ unsigned int nfb_count;
+ unsigned int nfb_maxcount;
+};
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+struct nfsd_fcache_disposal {
+ struct list_head list;
+ struct work_struct work;
+ struct net *net;
+ spinlock_t lock;
+ struct list_head freeme;
+ struct rcu_head rcu;
+};
+
+static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+
+static struct kmem_cache *nfsd_file_slab;
+static struct kmem_cache *nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+static struct list_lru nfsd_file_lru;
+static long nfsd_file_lru_flags;
+static struct fsnotify_group *nfsd_file_fsnotify_group;
+static atomic_long_t nfsd_filecache_count;
+static struct delayed_work nfsd_filecache_laundrette;
+static DEFINE_SPINLOCK(laundrette_lock);
+static LIST_HEAD(laundrettes);
+
+static void nfsd_file_gc(void);
+
+static void
+nfsd_file_schedule_laundrette(void)
+{
+ long count = atomic_long_read(&nfsd_filecache_count);
+
+ if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+ return;
+
+ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+ NFSD_LAUNDRETTE_DELAY);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+ struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+ put_cred(nf->nf_cred);
+ kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+ struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+ nfm_mark);
+
+ kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+ if (!refcount_inc_not_zero(&nfm->nfm_ref))
+ return NULL;
+ return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+ if (refcount_dec_and_test(&nfm->nfm_ref)) {
+ fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(&nfm->nfm_mark);
+ }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+{
+ int err;
+ struct fsnotify_mark *mark;
+ struct nfsd_file_mark *nfm = NULL, *new;
+ struct inode *inode = nf->nf_inode;
+
+ do {
+ mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+ nfsd_file_fsnotify_group);
+ if (mark) {
+ nfm = nfsd_file_mark_get(container_of(mark,
+ struct nfsd_file_mark,
+ nfm_mark));
+ mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+ if (nfm) {
+ fsnotify_put_mark(mark);
+ break;
+ }
+ /* Avoid soft lockup race with nfsd_file_mark_put() */
+ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(mark);
+ } else
+ mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
+
+ /* allocate a new nfm */
+ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+ new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+ refcount_set(&new->nfm_ref, 1);
+
+ err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+ /*
+ * If the add was successful, then return the object.
+ * Otherwise, we need to put the reference we hold on the
+ * nfm_mark. The fsnotify code will take a reference and put
+ * it on failure, so we can't just free it directly. It's also
+ * not safe to call fsnotify_destroy_mark on it as the
+ * mark->group will be NULL. Thus, we can't let the nfm_ref
+ * counter drive the destruction at this point.
+ */
+ if (likely(!err))
+ nfm = new;
+ else
+ fsnotify_put_mark(&new->nfm_mark);
+ } while (unlikely(err == -EEXIST));
+
+ return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ struct net *net)
+{
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+ if (nf) {
+ INIT_HLIST_NODE(&nf->nf_node);
+ INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+ nf->nf_net = net;
+ nf->nf_flags = 0;
+ nf->nf_inode = inode;
+ nf->nf_hashval = hashval;
+ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = may & NFSD_FILE_MAY_MASK;
+ if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+ if (may & NFSD_MAY_WRITE)
+ __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+ if (may & NFSD_MAY_READ)
+ __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ }
+ nf->nf_mark = NULL;
+ init_rwsem(&nf->nf_rwsem);
+ trace_nfsd_file_alloc(nf);
+ }
+ return nf;
+}
+
+static bool
+nfsd_file_free(struct nfsd_file *nf)
+{
+ bool flush = false;
+
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, NULL);
+ fput(nf->nf_file);
+ flush = true;
+ }
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+ return flush;
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+ struct address_space *mapping;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return false;
+ mapping = file->f_mapping;
+ return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return 0;
+ return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash(nf);
+
+ if (nfsd_file_check_write_error(nf))
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+ hlist_del_rcu(&nf->nf_node);
+ atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_do_unhash(nf);
+ if (!list_empty(&nf->nf_lru))
+ list_lru_del(&nfsd_file_lru, &nf->nf_lru);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+ lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+ trace_nfsd_file_unhash_and_release_locked(nf);
+ if (!nfsd_file_unhash(nf))
+ return false;
+ /* keep final reference for nfsd_file_lru_dispose */
+ if (refcount_dec_not_one(&nf->nf_ref))
+ return true;
+
+ list_add(&nf->nf_lru, dispose);
+ return true;
+}
+
+static void
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+ trace_nfsd_file_put(nf);
+
+ if (refcount_dec_and_test(&nf->nf_ref)) {
+ WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+ nfsd_file_free(nf);
+ }
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+ bool is_hashed;
+
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+ nfsd_file_put_noref(nf);
+ return;
+ }
+
+ filemap_flush(nf->nf_file->f_mapping);
+ is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+ nfsd_file_put_noref(nf);
+ if (is_hashed)
+ nfsd_file_schedule_laundrette();
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+ if (likely(refcount_inc_not_zero(&nf->nf_ref)))
+ return nf;
+ return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ nfsd_file_put_noref(nf);
+ }
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+ bool flush = false;
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
+ if (!refcount_dec_and_test(&nf->nf_ref))
+ continue;
+ if (nfsd_file_free(nf))
+ flush = true;
+ }
+ if (flush)
+ flush_delayed_fput();
+}
+
+static void
+nfsd_file_list_remove_disposal(struct list_head *dst,
+ struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&l->lock);
+ list_splice_init(&l->freeme, dst);
+ spin_unlock(&l->lock);
+}
+
+static void
+nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net == net) {
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void
+nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+ struct net *net)
+{
+ struct nfsd_file *nf, *tmp;
+
+ list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+ if (nf->nf_net == net)
+ list_move_tail(&nf->nf_lru, dst);
+ }
+}
+
+static void
+nfsd_file_dispose_list_delayed(struct list_head *dispose)
+{
+ LIST_HEAD(list);
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+ nfsd_file_list_add_disposal(&list, nf->nf_net);
+ }
+}
+
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ spinlock_t *lock, void *arg)
+ __releases(lock)
+ __acquires(lock)
+{
+ struct list_head *head = arg;
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ /*
+ * Do a lockless refcount check. The hashtable holds one reference, so
+ * we look to see if anything else has a reference, or if any have
+ * been put since the shrinker last ran. Those don't get unhashed and
+ * released.
+ *
+ * Note that in the put path, we set the flag and then decrement the
+ * counter. Here we check the counter and then test and clear the flag.
+ * That order is deliberate to ensure that we can do this locklessly.
+ */
+ if (refcount_read(&nf->nf_ref) > 1)
+ goto out_skip;
+
+ /*
+ * Don't throw out files that are still undergoing I/O or
+ * that have uncleared errors pending.
+ */
+ if (nfsd_file_check_writeback(nf))
+ goto out_skip;
+
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+ goto out_skip;
+
+ if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+ goto out_skip;
+
+ list_lru_isolate_move(lru, &nf->nf_lru, head);
+ return LRU_REMOVED;
+out_skip:
+ return LRU_SKIP;
+}
+
+static unsigned long
+nfsd_file_lru_walk_list(struct shrink_control *sc)
+{
+ LIST_HEAD(head);
+ struct nfsd_file *nf;
+ unsigned long ret;
+
+ if (sc)
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &head);
+ else
+ ret = list_lru_walk(&nfsd_file_lru,
+ nfsd_file_lru_cb,
+ &head, LONG_MAX);
+ list_for_each_entry(nf, &head, nf_lru) {
+ spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ nfsd_file_do_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+ }
+ nfsd_file_dispose_list_delayed(&head);
+ return ret;
+}
+
+static void
+nfsd_file_gc(void)
+{
+ nfsd_file_lru_walk_list(NULL);
+}
+
+static void
+nfsd_file_gc_worker(struct work_struct *work)
+{
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+ return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ return nfsd_file_lru_walk_list(sc);
+}
+
+static struct shrinker nfsd_file_shrinker = {
+ .scan_objects = nfsd_file_lru_scan,
+ .count_objects = nfsd_file_lru_count,
+ .seeks = 1,
+};
+
+static void
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+ struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+ struct hlist_node *tmp;
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+ if (inode == nf->nf_inode)
+ nfsd_file_unhash_and_release_locked(nf, dispose);
+ }
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list_sync(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+ NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+ trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+ nfsd_file_dispose_list_delayed(&dispose);
+}
+
+/**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
+ */
+static void
+nfsd_file_delayed_close(struct work_struct *work)
+{
+ LIST_HEAD(head);
+ struct nfsd_fcache_disposal *l = container_of(work,
+ struct nfsd_fcache_disposal, work);
+
+ nfsd_file_list_remove_disposal(&head, l);
+ nfsd_file_dispose_list(&head);
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+ void *data)
+{
+ struct file_lock *fl = data;
+
+ /* Only close files for F_SETLEASE leases */
+ if (fl->fl_flags & FL_LEASE)
+ nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+ return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+ .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 mask, const void *data, int data_type,
+ const struct qstr *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info)
+{
+ trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+ /* Should be no marks on non-regular files */
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /* don't close files if this was not the last link */
+ if (mask & FS_ATTRIB) {
+ if (inode->i_nlink)
+ return 0;
+ }
+
+ nfsd_file_close_inode(inode);
+ return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ .handle_event = nfsd_file_fsnotify_handle_event,
+ .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+ int ret = -ENOMEM;
+ unsigned int i;
+
+ clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+ if (nfsd_file_hashtbl)
+ return 0;
+
+ nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+ if (!nfsd_filecache_wq)
+ goto out;
+
+ nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+ sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+ if (!nfsd_file_hashtbl) {
+ pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+ goto out_err;
+ }
+
+ nfsd_file_slab = kmem_cache_create("nfsd_file",
+ sizeof(struct nfsd_file), 0, 0, NULL);
+ if (!nfsd_file_slab) {
+ pr_err("nfsd: unable to create nfsd_file_slab\n");
+ goto out_err;
+ }
+
+ nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
+ sizeof(struct nfsd_file_mark), 0, 0, NULL);
+ if (!nfsd_file_mark_slab) {
+ pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+ goto out_err;
+ }
+
+
+ ret = list_lru_init(&nfsd_file_lru);
+ if (ret) {
+ pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = register_shrinker(&nfsd_file_shrinker);
+ if (ret) {
+ pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
+ goto out_lru;
+ }
+
+ ret = lease_register_notifier(&nfsd_file_lease_notifier);
+ if (ret) {
+ pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+ goto out_shrinker;
+ }
+
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+ nfsd_file_fsnotify_group = NULL;
+ goto out_notifier;
+ }
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+ spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+ }
+
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
+out:
+ return ret;
+out_notifier:
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+ unregister_shrinker(&nfsd_file_shrinker);
+out_lru:
+ list_lru_destroy(&nfsd_file_lru);
+out_err:
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+ goto out;
+}
+
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+ unsigned int i;
+ struct nfsd_file *nf;
+ struct hlist_node *next;
+ LIST_HEAD(dispose);
+ bool del;
+
+ if (!nfsd_file_hashtbl)
+ return;
+
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+ spin_lock(&nfb->nfb_lock);
+ hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+ if (net && nf->nf_net != net)
+ continue;
+ del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+ /*
+ * Deadlock detected! Something marked this entry as
+ * unhased, but hasn't removed it from the hash list.
+ */
+ WARN_ON_ONCE(!del);
+ }
+ spin_unlock(&nfb->nfb_lock);
+ nfsd_file_dispose_list(&dispose);
+ }
+}
+
+static struct nfsd_fcache_disposal *
+nfsd_alloc_fcache_disposal(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = kmalloc(sizeof(*l), GFP_KERNEL);
+ if (!l)
+ return NULL;
+ INIT_WORK(&l->work, nfsd_file_delayed_close);
+ l->net = net;
+ spin_lock_init(&l->lock);
+ INIT_LIST_HEAD(&l->freeme);
+ return l;
+}
+
+static void
+nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ rcu_assign_pointer(l->net, NULL);
+ cancel_work_sync(&l->work);
+ nfsd_file_dispose_list(&l->freeme);
+ kfree_rcu(l, rcu);
+}
+
+static void
+nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_add_tail_rcu(&l->list, &laundrettes);
+ spin_unlock(&laundrette_lock);
+}
+
+static void
+nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_del_rcu(&l->list);
+ spin_unlock(&laundrette_lock);
+}
+
+static int
+nfsd_alloc_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = nfsd_alloc_fcache_disposal(net);
+ if (!l)
+ return -ENOMEM;
+ nfsd_add_fcache_disposal(l);
+ return 0;
+}
+
+static void
+nfsd_free_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net != net)
+ continue;
+ nfsd_del_fcache_disposal(l);
+ rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
+ return;
+ }
+ rcu_read_unlock();
+}
+
+int
+nfsd_file_cache_start_net(struct net *net)
+{
+ return nfsd_alloc_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown_net(struct net *net)
+{
+ nfsd_file_cache_purge(net);
+ nfsd_free_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+ set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+ unregister_shrinker(&nfsd_file_shrinker);
+ /*
+ * make sure all callers of nfsd_file_lru_cb are done before
+ * calling nfsd_file_cache_purge
+ */
+ cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+ nfsd_file_cache_purge(NULL);
+ list_lru_destroy(&nfsd_file_lru);
+ rcu_barrier();
+ fsnotify_put_group(nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group = NULL;
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ fsnotify_wait_marks_destroyed();
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ kfree(nfsd_file_hashtbl);
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+}
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+ int i;
+
+ if (!uid_eq(c1->fsuid, c2->fsuid))
+ return false;
+ if (!gid_eq(c1->fsgid, c2->fsgid))
+ return false;
+ if (c1->group_info == NULL || c2->group_info == NULL)
+ return c1->group_info == c2->group_info;
+ if (c1->group_info->ngroups != c2->group_info->ngroups)
+ return false;
+ for (i = 0; i < c1->group_info->ngroups; i++) {
+ if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+ return false;
+ }
+ return true;
+}
+
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+ unsigned int hashval, struct net *net)
+{
+ struct nfsd_file *nf;
+ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+ hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+ nf_node) {
+ if ((need & nf->nf_may) != need)
+ continue;
+ if (nf->nf_inode != inode)
+ continue;
+ if (nf->nf_net != net)
+ continue;
+ if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+ continue;
+ if (nfsd_file_get(nf) != NULL)
+ return nf;
+ }
+ return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
+ *
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+ bool ret = false;
+ struct nfsd_file *nf;
+ unsigned int hashval;
+
+ hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+ nf_node) {
+ if (inode == nf->nf_inode) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ trace_nfsd_file_is_cached(inode, hashval, (int)ret);
+ return ret;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ __be32 status;
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_file *nf, *new;
+ struct inode *inode;
+ unsigned int hashval;
+ bool retry = true;
+
+ /* FIXME: skip this if fh_dentry is already set? */
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
+
+ inode = d_inode(fhp->fh_dentry);
+ hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+retry:
+ rcu_read_lock();
+ nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+ rcu_read_unlock();
+ if (nf)
+ goto wait_for_construction;
+
+ new = nfsd_file_alloc(inode, may_flags, hashval, net);
+ if (!new) {
+ trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+ NULL, nfserr_jukebox);
+ return nfserr_jukebox;
+ }
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+ if (nf == NULL)
+ goto open_file;
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ nfsd_file_slab_free(&new->nf_rcu);
+
+wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ if (!retry) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+ retry = false;
+ nfsd_file_put_noref(nf);
+ goto retry;
+ }
+
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+ bool write = (may_flags & NFSD_MAY_WRITE);
+
+ if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+ (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+ status = nfserrno(nfsd_open_break_lease(
+ file_inode(nf->nf_file), may_flags));
+ if (status == nfs_ok) {
+ clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+ if (write)
+ clear_bit(NFSD_FILE_BREAK_WRITE,
+ &nf->nf_flags);
+ }
+ }
+ }
+out:
+ if (status == nfs_ok) {
+ *pnf = nf;
+ } else {
+ nfsd_file_put(nf);
+ nf = NULL;
+ }
+
+ trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
+ return status;
+open_file:
+ nf = new;
+ /* Take reference for the hashtable */
+ refcount_inc(&nf->nf_ref);
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+ hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+ ++nfsd_file_hashtbl[hashval].nfb_count;
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+ nfsd_file_hashtbl[hashval].nfb_count);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+ nfsd_file_gc();
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark)
+ status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+ may_flags, &nf->nf_file);
+ else
+ status = nfserr_jukebox;
+ /*
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+ if (status != nfs_ok || inode->i_nlink == 0) {
+ bool do_free;
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ do_free = nfsd_file_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ if (do_free)
+ nfsd_file_put_noref(nf);
+ }
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+ unsigned int i, count = 0, longest = 0;
+ unsigned long hits = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+ * accuracy. We do take the nfsd_mutex simply to ensure that we
+ * don't end up racing with server shutdown
+ */
+ mutex_lock(&nfsd_mutex);
+ if (nfsd_file_hashtbl) {
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ count += nfsd_file_hashtbl[i].nfb_count;
+ longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+ }
+ }
+ mutex_unlock(&nfsd_mutex);
+
+ for_each_possible_cpu(i)
+ hits += per_cpu(nfsd_file_cache_hits, i);
+
+ seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "longest chain: %u\n", longest);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ return 0;
+}
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..7872df5a0fe3
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,64 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+ struct fsnotify_mark nfm_mark;
+ refcount_t nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+ struct hlist_node nf_node;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
+ struct file *nf_file;
+ const struct cred *nf_cred;
+ struct net *nf_net;
+#define NFSD_FILE_HASHED (0)
+#define NFSD_FILE_PENDING (1)
+#define NFSD_FILE_BREAK_READ (2)
+#define NFSD_FILE_BREAK_WRITE (3)
+#define NFSD_FILE_REFERENCED (4)
+ unsigned long nf_flags;
+ struct inode *nf_inode;
+ unsigned int nf_hashval;
+ refcount_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+ struct rw_semaphore nf_rwsem;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+int nfsd_file_cache_start_net(struct net *net);
+void nfsd_file_cache_shutdown_net(struct net *net);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+int nfsd_file_cache_stats_open(struct inode *, struct file *);
+#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index bdfe5bcb3dcd..2baf32311e00 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -40,7 +40,7 @@ struct nfsd_net {
struct lock_manager nfsd4_manager;
bool grace_ended;
- time_t boot_time;
+ time64_t boot_time;
/* internal mount of the "nfsd" pseudofilesystem: */
struct vfsmount *nfsd_mnt;
@@ -92,8 +92,8 @@ struct nfsd_net {
bool in_grace;
const struct nfsd4_client_tracking_ops *client_tracking_ops;
- time_t nfsd4_lease;
- time_t nfsd4_grace;
+ time64_t nfsd4_lease;
+ time64_t nfsd4_grace;
bool somebody_reclaimed;
bool track_reclaim_completes;
@@ -104,6 +104,7 @@ struct nfsd_net {
/* Time of server startup */
struct timespec64 nfssvc_boot;
+ seqlock_t boot_lock;
/*
* Max number of connections this nfsd container will allow. Defaults
@@ -179,4 +180,7 @@ struct nfsd_net {
extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
+
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9bc32af4e2da..288bc76b4574 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -172,13 +172,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
- if (nfserr == 0) {
- struct inode *inode = d_inode(resp->fh.fh_dentry);
- resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
- inode->i_size);
- }
-
+ &resp->count,
+ &resp->eof);
RETURN_STATUS(nfserr);
}
@@ -208,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
- resp->committed);
+ resp->committed, resp->verf);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
@@ -688,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_inval);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+ nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count,
+ resp->verf);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index fcf31822c74c..aae514d40b64 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -27,18 +27,19 @@ static u32 nfs3_ftypes[] = {
NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
};
+
/*
* XDR functions for basic NFS types
*/
static __be32 *
-encode_time3(__be32 *p, struct timespec *time)
+encode_time3(__be32 *p, struct timespec64 *time)
{
*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
return p;
}
static __be32 *
-decode_time3(__be32 *p, struct timespec *time)
+decode_time3(__be32 *p, struct timespec64 *time)
{
time->tv_sec = ntohl(*p++);
time->tv_nsec = ntohl(*p++);
@@ -166,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
struct user_namespace *userns = nfsd_user_namespace(rqstp);
- struct timespec ts;
*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
*p++ = htonl((u32) (stat->mode & S_IALLUGO));
*p++ = htonl((u32) stat->nlink);
@@ -182,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
*p++ = htonl((u32) MINOR(stat->rdev));
p = encode_fsid(p, fhp);
p = xdr_encode_hyper(p, stat->ino);
- ts = timespec64_to_timespec(stat->atime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->mtime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->ctime);
- p = encode_time3(p, &ts);
+ p = encode_time3(p, &stat->atime);
+ p = encode_time3(p, &stat->mtime);
+ p = encode_time3(p, &stat->ctime);
return p;
}
@@ -276,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp)
stat.size = inode->i_size;
}
- fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime);
- fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime);
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
fhp->fh_pre_saved = true;
@@ -329,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if ((args->check_guard = ntohl(*p++)) != 0) {
- struct timespec time;
+ struct timespec64 time;
p = decode_time3(p, &time);
args->guardtime = time.tv_sec;
}
@@ -750,15 +747,13 @@ int
nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_writeres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
p = encode_wcc_data(rqstp, p, &resp->fh);
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
- /* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+ *p++ = resp->verf[0];
+ *p++ = resp->verf[1];
}
return xdr_ressize_check(rqstp, p);
}
@@ -860,13 +855,11 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len_unlocked(name, dparent, namlen);
+ dchild = lookup_positive_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
goto out;
- if (d_really_is_negative(dchild))
- goto out;
if (dchild->d_inode->i_ino != ino)
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
@@ -1124,14 +1117,12 @@ int
nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_commitres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
- /* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+ *p++ = resp->verf[0];
+ *p++ = resp->verf[1];
}
return xdr_ressize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 397eb7820929..c3b11a715082 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -512,11 +512,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb != NULL) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
@@ -604,11 +602,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
@@ -663,11 +660,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
}
@@ -759,11 +755,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
}
/*
@@ -828,7 +823,41 @@ static const struct rpc_program cb_program = {
static int max_cb_time(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
+
+ /*
+ * nfsd4_lease is set to at most one hour in __nfsd4_write_time,
+ * so we can use 32-bit math on it. Warn if that assumption
+ * ever stops being true.
+ */
+ if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
+ return 360 * HZ;
+
+ return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
+}
+
+static struct workqueue_struct *callback_wq;
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+ return queue_work(callback_wq, &cb->cb_work);
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+ atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+ if (atomic_dec_and_test(&clp->cl_cb_inflight))
+ wake_up_var(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+ wait_var_event(&clp->cl_cb_inflight,
+ !atomic_read(&clp->cl_cb_inflight));
}
static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
@@ -942,14 +971,21 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
clp->cl_cb_state = NFSD4_CB_UP;
}
+static void nfsd4_cb_probe_release(void *calldata)
+{
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+ nfsd41_cb_inflight_end(clp);
+
+}
+
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
/* XXX: release method to ensure we set the cb channel down if
* necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
+ .rpc_release = nfsd4_cb_probe_release,
};
-static struct workqueue_struct *callback_wq;
-
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
@@ -980,9 +1016,12 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
-static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
+static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!cb->cb_holds_slot &&
+ test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
@@ -991,9 +1030,31 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
}
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
+ cb->cb_holds_slot = true;
return true;
}
+static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (cb->cb_holds_slot) {
+ cb->cb_holds_slot = false;
+ clear_bit(0, &clp->cl_cb_slot_busy);
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ }
+}
+
+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_release_slot(cb);
+ if (cb->cb_ops && cb->cb_ops->release)
+ cb->cb_ops->release(cb);
+ nfsd41_cb_inflight_end(clp);
+}
+
/*
* TODO: cb_sequence should support referring call lists, cachethis, multiple
* slots, and mark callback channel down on communication errors.
@@ -1010,11 +1071,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
*/
cb->cb_seq_status = 1;
cb->cb_status = 0;
- if (minorversion) {
- if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
- return;
- cb->cb_holds_slot = true;
- }
+ if (minorversion && !nfsd41_cb_get_slot(cb, task))
+ return;
rpc_call_start(task);
}
@@ -1077,13 +1135,12 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
}
break;
default:
+ nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
dprintk("%s: unprocessed error %d\n", __func__,
cb->cb_seq_status);
}
- cb->cb_holds_slot = false;
- clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_wake_up_next(&clp->cl_cb_waitq);
+ nfsd41_cb_release_slot(cb);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
@@ -1096,8 +1153,10 @@ retry_nowait:
ret = false;
goto out;
need_restart:
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ task->tk_status = 0;
+ cb->cb_need_restart = true;
+ }
return false;
}
@@ -1139,9 +1198,9 @@ static void nfsd4_cb_release(void *calldata)
struct nfsd4_callback *cb = calldata;
if (cb->cb_need_restart)
- nfsd4_run_cb(cb);
+ nfsd4_queue_cb(cb);
else
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);
}
@@ -1175,6 +1234,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
*/
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
+ nfsd41_cb_inflight_wait_complete(clp);
}
/* requires cl_lock: */
@@ -1192,6 +1252,12 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
return NULL;
}
+/*
+ * Note there isn't a lot of locking in this code; instead we depend on
+ * the fact that it is run from the callback_wq, which won't run two
+ * work items at once. So, for example, callback_wq handles all access
+ * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
+ */
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
struct nfs4_cb_conn conn;
@@ -1260,8 +1326,7 @@ nfsd4_run_cb_work(struct work_struct *work)
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
- if (cb->cb_ops && cb->cb_ops->release)
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1270,6 +1335,7 @@ nfsd4_run_cb_work(struct work_struct *work)
*/
if (!cb->cb_ops && clp->cl_minorversion) {
clp->cl_cb_state = NFSD4_CB_UP;
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1295,5 +1361,9 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
void nfsd4_run_cb(struct nfsd4_callback *cb)
{
- queue_work(callback_wq, &cb->cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+
+ nfsd41_cb_inflight_begin(clp);
+ if (!nfsd4_queue_cb(cb))
+ nfsd41_cb_inflight_end(clp);
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index a79e24b79095..e12409eca7cc 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -169,8 +169,8 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
spin_unlock(&fp->fi_lock);
if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
- vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
- fput(ls->ls_file);
+ vfs_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
+ nfsd_file_put(ls->ls_file);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -197,7 +197,7 @@ nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
fl->fl_end = OFFSET_MAX;
fl->fl_owner = ls;
fl->fl_pid = current->tgid;
- fl->fl_file = ls->ls_file;
+ fl->fl_file = ls->ls_file->nf_file;
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
if (status) {
@@ -236,13 +236,13 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
NFSPROC4_CLNT_CB_LAYOUT);
if (parent->sc_type == NFS4_DELEG_STID)
- ls->ls_file = get_file(fp->fi_deleg_file);
+ ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) {
- fput(ls->ls_file);
+ nfsd_file_put(ls->ls_file);
put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL;
@@ -626,7 +626,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
- argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/* Client gets 2 lease periods to return it */
cutoff = ktime_add_ns(task->tk_start,
- nn->nfsd4_lease * NSEC_PER_SEC * 2);
+ (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2);
if (ktime_before(now, cutoff)) {
rpc_delay(task, HZ/100); /* 10 mili-seconds */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8beda999e134..0e75f7fb5fec 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,7 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/sunrpc/addr.h>
#include "idmap.h"
#include "cache.h"
@@ -232,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
if (!*resfh)
return nfserr_jukebox;
fh_init(*resfh, NFS4_FHSIZE);
- open->op_truncate = 0;
+ open->op_truncate = false;
if (open->op_create) {
/* FIXME: check session persistence and pnfs flags.
@@ -365,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
- open->op_created = 0;
+ open->op_created = false;
/*
* RFC5661 18.51.3
* Before RECLAIM_COMPLETE done, server should deny new lock
@@ -503,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 ret;
fh_put(&cstate->current_fh);
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
putfh->pf_fhlen);
- return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+ ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ if (ret == nfserr_stale && putfh->no_verify) {
+ SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN);
+ ret = 0;
+ }
+#endif
+ return ret;
}
static __be32
@@ -530,9 +539,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_restorefh;
fh_dup2(&cstate->current_fh, &cstate->save_fh);
- if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) {
memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -542,9 +551,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
fh_dup2(&cstate->save_fh, &cstate->current_fh);
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -568,17 +577,11 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
{
- __be32 verf[2];
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ __be32 *verf = (__be32 *)verifier->data;
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage.
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- memcpy(verifier->data, verf, sizeof(verifier->data));
+ BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+ nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
}
static __be32
@@ -587,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_commit *commit = &u->commit;
- gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
- commit->co_count);
+ commit->co_count,
+ (__be32 *)commit->co_verf.data);
}
static __be32
@@ -761,7 +764,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_read *read = &u->read;
__be32 status;
- read->rd_filp = NULL;
+ read->rd_nf = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
@@ -782,7 +785,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
- &read->rd_filp, &read->rd_tmp_file);
+ &read->rd_nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -798,8 +801,8 @@ out:
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
- if (u->read.rd_filp)
- fput(u->read.rd_filp);
+ if (u->read.rd_nf)
+ nfsd_file_put(u->read.rd_nf);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
u->read.rd_offset, u->read.rd_length);
}
@@ -981,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time_t)0);
+ 0, (time64_t)0);
out:
fh_drop_write(&cstate->current_fh);
return status;
@@ -993,7 +996,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
int nvecs;
@@ -1005,23 +1008,23 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- stateid, WR_STATE, &filp, NULL);
+ stateid, WR_STATE, &nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
}
write->wr_how_written = write->wr_stable_how;
- gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written);
- fput(filp);
+ write->wr_how_written,
+ (__be32 *)write->wr_verifier.data);
+ nfsd_file_put(nf);
write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1031,8 +1034,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static __be32
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
- stateid_t *src_stateid, struct file **src,
- stateid_t *dst_stateid, struct file **dst)
+ stateid_t *src_stateid, struct nfsd_file **src,
+ stateid_t *dst_stateid, struct nfsd_file **dst)
{
__be32 status;
@@ -1054,8 +1057,8 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
/* fix up for NFS-specific error code */
- if (!S_ISREG(file_inode(*src)->i_mode) ||
- !S_ISREG(file_inode(*dst)->i_mode)) {
+ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+ !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
@@ -1063,9 +1066,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out:
return status;
out_put_dst:
- fput(*dst);
+ nfsd_file_put(*dst);
out_put_src:
- fput(*src);
+ nfsd_file_put(*src);
goto out;
}
@@ -1074,7 +1077,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_clone *clone = &u->clone;
- struct file *src, *dst;
+ struct nfsd_file *src, *dst;
__be32 status;
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1083,10 +1086,11 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = nfsd4_clone_file_range(src, clone->cl_src_pos,
- dst, clone->cl_dst_pos, clone->cl_count);
+ dst, clone->cl_dst_pos, clone->cl_count,
+ EX_ISSYNC(cstate->current_fh.fh_export));
- fput(dst);
- fput(src);
+ nfsd_file_put(dst);
+ nfsd_file_put(src);
out:
return status;
}
@@ -1140,6 +1144,207 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
while ((copy = nfsd4_get_copy(clp)) != NULL)
nfsd4_stop_copy(copy);
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+
+extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid);
+extern void nfs42_ssc_close(struct file *filep);
+
+extern void nfs_sb_deactive(struct super_block *sb);
+
+#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
+
+/**
+ * Support one copy source server for now.
+ */
+static __be32
+nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ struct vfsmount **mount)
+{
+ struct file_system_type *type;
+ struct vfsmount *ss_mnt;
+ struct nfs42_netaddr *naddr;
+ struct sockaddr_storage tmp_addr;
+ size_t tmp_addrlen, match_netid_len = 3;
+ char *startsep = "", *endsep = "", *match_netid = "tcp";
+ char *ipaddr, *dev_name, *raw_data;
+ int len, raw_len;
+ __be32 status = nfserr_inval;
+
+ naddr = &nss->u.nl4_addr;
+ tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
+ naddr->addr_len,
+ (struct sockaddr *)&tmp_addr,
+ sizeof(tmp_addr));
+ if (tmp_addrlen == 0)
+ goto out_err;
+
+ if (tmp_addr.ss_family == AF_INET6) {
+ startsep = "[";
+ endsep = "]";
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ }
+
+ if (naddr->netid_len != match_netid_len ||
+ strncmp(naddr->netid, match_netid, naddr->netid_len))
+ goto out_err;
+
+ /* Construct the raw data for the vfs_kern_mount call */
+ len = RPC_MAX_ADDRBUFLEN + 1;
+ ipaddr = kzalloc(len, GFP_KERNEL);
+ if (!ipaddr)
+ goto out_err;
+
+ rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len);
+
+ /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/
+
+ raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr);
+ raw_data = kzalloc(raw_len, GFP_KERNEL);
+ if (!raw_data)
+ goto out_free_ipaddr;
+
+ snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr);
+
+ status = nfserr_nodev;
+ type = get_fs_type("nfs");
+ if (!type)
+ goto out_free_rawdata;
+
+ /* Set the server:<export> for the vfs_kern_mount call */
+ dev_name = kzalloc(len + 5, GFP_KERNEL);
+ if (!dev_name)
+ goto out_free_rawdata;
+ snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+
+ /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
+ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
+ module_put(type->owner);
+ if (IS_ERR(ss_mnt))
+ goto out_free_devname;
+
+ status = 0;
+ *mount = ss_mnt;
+
+out_free_devname:
+ kfree(dev_name);
+out_free_rawdata:
+ kfree(raw_data);
+out_free_ipaddr:
+ kfree(ipaddr);
+out_err:
+ return status;
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+ nfs_sb_deactive(ss_mnt->mnt_sb);
+ mntput(ss_mnt);
+}
+
+/**
+ * nfsd4_setup_inter_ssc
+ *
+ * Verify COPY destination stateid.
+ * Connect to the source server with NFSv4.1.
+ * Create the source struct file for nfsd_copy_range.
+ * Called with COPY cstate:
+ * SAVED_FH: source filehandle
+ * CURRENT_FH: destination filehandle
+ */
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy, struct vfsmount **mount)
+{
+ struct svc_fh *s_fh = NULL;
+ stateid_t *s_stid = &copy->cp_src_stateid;
+ __be32 status = nfserr_inval;
+
+ /* Verify the destination stateid and set dst struct file*/
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &copy->cp_dst_stateid,
+ WR_STATE, &copy->nf_dst, NULL);
+ if (status)
+ goto out;
+
+ status = nfsd4_interssc_connect(&copy->cp_src, rqstp, mount);
+ if (status)
+ goto out;
+
+ s_fh = &cstate->save_fh;
+
+ copy->c_fh.size = s_fh->fh_handle.fh_size;
+ memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size);
+ copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
+ memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
+ sizeof(stateid_opaque_t));
+
+ status = 0;
+out:
+ return status;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ struct nfsd_file *dst)
+{
+ nfs42_ssc_close(src->nf_file);
+ nfsd_file_put(src);
+ nfsd_file_put(dst);
+ mntput(ss_mnt);
+}
+
+#else /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy,
+ struct vfsmount **mount)
+{
+ *mount = NULL;
+ return nfserr_inval;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ struct nfsd_file *dst)
+{
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+}
+
+static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ return nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
+ &copy->nf_src, &copy->cp_dst_stateid,
+ &copy->nf_dst);
+}
+
+static void
+nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+{
+ nfsd_file_put(src);
+ nfsd_file_put(dst);
+}
static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
@@ -1176,8 +1381,9 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
do {
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
- copy->file_dst, dst_pos, bytes_total);
+ bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
+ src_pos, copy->nf_dst->nf_file, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
@@ -1204,12 +1410,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
status = nfs_ok;
}
- fput(copy->file_src);
- fput(copy->file_dst);
+ if (!copy->cp_intra) /* Inter server SSC */
+ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
+ copy->nf_dst);
+ else
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+
return status;
}
-static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
{
dst->cp_src_pos = src->cp_src_pos;
dst->cp_dst_pos = src->cp_dst_pos;
@@ -1218,16 +1428,26 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
memcpy(&dst->fh, &src->fh, sizeof(src->fh));
dst->cp_clp = src->cp_clp;
- dst->file_dst = get_file(src->file_dst);
- dst->file_src = get_file(src->file_src);
+ dst->nf_dst = nfsd_file_get(src->nf_dst);
+ dst->cp_intra = src->cp_intra;
+ if (src->cp_intra) /* for inter, file_src doesn't exist yet */
+ dst->nf_src = nfsd_file_get(src->nf_src);
+
memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+ memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server));
+ memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+ memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+ dst->ss_mnt = src->ss_mnt;
+
+ return 0;
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
{
- nfs4_free_cp_state(copy);
- fput(copy->file_dst);
- fput(copy->file_src);
+ nfs4_free_copy_state(copy);
+ nfsd_file_put(copy->nf_dst);
+ if (copy->cp_intra)
+ nfsd_file_put(copy->nf_src);
spin_lock(&copy->cp_clp->async_lock);
list_del(&copy->copies);
spin_unlock(&copy->cp_clp->async_lock);
@@ -1239,7 +1459,24 @@ static int nfsd4_do_async_copy(void *data)
struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
struct nfsd4_copy *cb_copy;
+ if (!copy->cp_intra) { /* Inter server SSC */
+ copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+ if (!copy->nf_src) {
+ copy->nfserr = nfserr_serverfault;
+ nfsd4_interssc_disconnect(copy->ss_mnt);
+ goto do_callback;
+ }
+ copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
+ &copy->stateid);
+ if (IS_ERR(copy->nf_src->nf_file)) {
+ copy->nfserr = nfserr_offload_denied;
+ nfsd4_interssc_disconnect(copy->ss_mnt);
+ goto do_callback;
+ }
+ }
+
copy->nfserr = nfsd4_do_copy(copy, 0);
+do_callback:
cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!cb_copy)
goto out;
@@ -1251,6 +1488,8 @@ static int nfsd4_do_async_copy(void *data)
&nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
nfsd4_run_cb(&cb_copy->cp_cb);
out:
+ if (!copy->cp_intra)
+ kfree(copy->nf_src);
cleanup_async_copy(copy);
return 0;
}
@@ -1263,11 +1502,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd4_copy *async_copy = NULL;
- status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
- &copy->file_src, &copy->cp_dst_stateid,
- &copy->file_dst);
- if (status)
- goto out;
+ if (!copy->cp_intra) { /* Inter server SSC */
+ if (!inter_copy_offload_enable || copy->cp_synchronous) {
+ status = nfserr_notsupp;
+ goto out;
+ }
+ status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
+ &copy->ss_mnt);
+ if (status)
+ return nfserr_offload_denied;
+ } else {
+ status = nfsd4_setup_intra_ssc(rqstp, cstate, copy);
+ if (status)
+ return status;
+ }
copy->cp_clp = cstate->clp;
memcpy(&copy->fh, &cstate->current_fh.fh_handle,
@@ -1278,15 +1526,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserrno(-ENOMEM);
async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!async_copy)
- goto out;
- if (!nfs4_init_cp_state(nn, copy)) {
- kfree(async_copy);
- goto out;
- }
+ goto out_err;
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_err;
refcount_set(&async_copy->refcount, 1);
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
sizeof(copy->cp_stateid));
- dup_copy_fields(copy, async_copy);
+ status = dup_copy_fields(copy, async_copy);
+ if (status)
+ goto out_err;
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
@@ -1297,12 +1545,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_unlock(&async_copy->cp_clp->async_lock);
wake_up_process(async_copy->copy_task);
status = nfs_ok;
- } else
+ } else {
status = nfsd4_do_copy(copy, 1);
+ }
out:
return status;
out_err:
- cleanup_async_copy(async_copy);
+ if (async_copy)
+ cleanup_async_copy(async_copy);
+ status = nfserrno(-ENOMEM);
+ if (!copy->cp_intra)
+ nfsd4_interssc_disconnect(copy->ss_mnt);
goto out;
}
@@ -1313,7 +1566,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
spin_lock(&clp->async_lock);
list_for_each_entry(copy, &clp->async_copies, copies) {
- if (memcmp(&copy->cp_stateid, stateid, NFS4_STATEID_SIZE))
+ if (memcmp(&copy->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
continue;
refcount_inc(&copy->refcount);
spin_unlock(&clp->async_lock);
@@ -1329,16 +1582,61 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp,
union nfsd4_op_u *u)
{
struct nfsd4_offload_status *os = &u->offload_status;
- __be32 status = 0;
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
copy = find_async_copy(clp, &os->stateid);
- if (copy)
+ if (!copy) {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ return manage_cpntf_state(nn, &os->stateid, clp, NULL);
+ } else
nfsd4_stop_copy(copy);
- else
- status = nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_stid *stid;
+ struct nfs4_cpntf_state *cps;
+ struct nfs4_client *clp = cstate->clp;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &cn->cpn_src_stateid, RD_STATE, NULL,
+ &stid);
+ if (status)
+ return status;
+
+ cn->cpn_sec = nn->nfsd4_lease;
+ cn->cpn_nsec = 0;
+
+ status = nfserrno(-ENOMEM);
+ cps = nfs4_alloc_init_cpntf_state(nn, stid);
+ if (!cps)
+ goto out;
+ memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
+
+ /* For now, only return one server address in cpn_src, the
+ * address used by the client to connect to this server.
+ */
+ cn->cpn_src.nl4_type = NL4_NETADDR;
+ status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+ &cn->cpn_src.u.nl4_addr);
+ WARN_ON_ONCE(status);
+ if (status) {
+ nfs4_put_cpntf_state(nn, cps);
+ goto out;
+ }
+out:
+ nfs4_put_stid(stid);
return status;
}
@@ -1347,21 +1645,21 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
- WR_STATE, &file, NULL);
+ WR_STATE, &nf, NULL);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
- status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
- fput(file);
+ nfsd_file_put(nf);
return status;
}
static __be32
@@ -1406,11 +1704,11 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek = &u->seek;
int whence;
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
- RD_STATE, &file, NULL);
+ RD_STATE, &nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
@@ -1432,14 +1730,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
- seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+ seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
- else if (seek->seek_pos >= i_size_read(file_inode(file)))
+ else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true;
out:
- fput(file);
+ nfsd_file_put(nf);
return status;
}
@@ -1915,6 +2213,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
- rqstp->rq_auth_slack;
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+ struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL;
+ struct nfsd4_copy *copy;
+ struct nfsd4_putfh *putfh;
+ int i;
+
+ /* traverse all operation and if it's a COPY compound, mark the
+ * source filehandle to skip verification
+ */
+ for (i = 0; i < args->opcnt; i++) {
+ op = &args->ops[i];
+ if (op->opnum == OP_PUTFH)
+ current_op = op;
+ else if (op->opnum == OP_SAVEFH)
+ saved_op = current_op;
+ else if (op->opnum == OP_RESTOREFH)
+ current_op = saved_op;
+ else if (op->opnum == OP_COPY) {
+ copy = (struct nfsd4_copy *)&op->u;
+ if (!saved_op) {
+ op->status = nfserr_nofilehandle;
+ return;
+ }
+ putfh = (struct nfsd4_putfh *)&saved_op->u;
+ if (!copy->cp_intra)
+ putfh->no_verify = true;
+ }
+ }
+}
+#else
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+}
+#endif
+
/*
* COMPOUND call.
*/
@@ -1963,6 +2300,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
resp->opcnt = 1;
goto encode_op;
}
+ check_if_stalefh_allowed(args);
trace_nfsd_compound(rqstp, args->opcnt);
while (!status && resp->opcnt < args->opcnt) {
@@ -1978,13 +2316,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
op->status = nfsd4_open_omfg(rqstp, cstate, op);
goto encode_op;
}
-
- if (!current_fh->fh_dentry) {
+ if (!current_fh->fh_dentry &&
+ !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) {
if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
- } else if (current_fh->fh_export->ex_fslocs.migrated &&
+ } else if (current_fh->fh_export &&
+ current_fh->fh_export->ex_fslocs.migrated &&
!(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
@@ -2028,7 +2367,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_flags & OP_CLEAR_STATEID)
clear_current_stateid(cstate);
- if (need_wrongsec_check(rqstp))
+ if (current_fh->fh_export &&
+ need_wrongsec_check(rqstp))
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
encode_op:
@@ -2295,6 +2635,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
}
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+ struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 3 /* cnr_lease_time */ +
+ 1 /* We support one cnr_source_server */ +
+ 1 /* cnr_stateid seq */ +
+ op_encode_stateid_maxsz /* cnr_stateid */ +
+ 1 /* num cnr_source_server*/ +
+ 1 /* nl4_type */ +
+ 1 /* nl4 size */ +
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+ * sizeof(__be32);
+}
+
#ifdef CONFIG_NFSD_PNFS
static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
@@ -2719,6 +3074,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_OFFLOAD_CANCEL",
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_COPY_NOTIFY] = {
+ .op_func = nfsd4_copy_notify,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_COPY_NOTIFY",
+ .op_rsize_bop = nfsd4_copy_notify_rsize,
+ },
};
/**
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 87679557d0d6..a8fb18609146 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -59,8 +59,13 @@ struct nfsd4_client_tracking_ops {
void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *);
+ uint8_t version;
+ size_t msglen;
};
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
/* Globals */
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
@@ -173,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
const char *dname, int len, struct nfsd_net *nn)
{
struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
struct nfs4_client_reclaim *crp;
name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -182,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
return;
}
name.len = len;
- crp = nfs4_client_to_reclaim(name, nn);
+ crp = nfs4_client_to_reclaim(name, princhash, nn);
if (!crp) {
kfree(name.data);
return;
@@ -482,6 +488,7 @@ static int
load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
struct xdr_netobj name;
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
if (child->d_name.len != HEXDIR_LEN - 1) {
printk("%s: illegal name %pd in recovery directory\n",
@@ -496,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
goto out;
}
name.len = HEXDIR_LEN;
- if (!nfs4_client_to_reclaim(name, nn))
+ if (!nfs4_client_to_reclaim(name, princhash, nn))
kfree(name.data);
out:
return 0;
@@ -718,6 +725,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.remove = nfsd4_remove_clid_dir,
.check = nfsd4_check_legacy_client,
.grace_done = nfsd4_recdir_purge_old,
+ .version = 1,
+ .msglen = 0,
};
/* Globals */
@@ -731,25 +740,32 @@ struct cld_net {
struct list_head cn_list;
unsigned int cn_xid;
bool cn_has_legacy;
+ struct crypto_shash *cn_tfm;
};
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
struct completion cu_done;
- struct cld_msg cu_msg;
+ union {
+ struct cld_msg_hdr cu_hdr;
+ struct cld_msg cu_msg;
+ struct cld_msg_v2 cu_msg_v2;
+ } cu_u;
};
static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{
int ret;
struct rpc_pipe_msg msg;
- struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+ struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+ nfsd_net_id);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
- msg.len = sizeof(*cmsg);
+ msg.len = nn->client_tracking_ops->msglen;
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
@@ -765,7 +781,7 @@ out:
}
static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
{
int ret;
@@ -781,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
}
static ssize_t
-__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
struct nfsd_net *nn)
{
- uint8_t cmd;
- struct xdr_netobj name;
+ uint8_t cmd, princhashlen;
+ struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
uint16_t namelen;
struct cld_net *cn = nn->cld_net;
@@ -794,22 +810,48 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
return -EFAULT;
}
if (cmd == Cld_GraceStart) {
- if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
- return -EFAULT;
- name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
- if (IS_ERR_OR_NULL(name.data))
- return -EFAULT;
- name.len = namelen;
+ if (nn->client_tracking_ops->version >= 2) {
+ const struct cld_clntinfo __user *ci;
+
+ ci = &cmsg->cm_u.cm_clntinfo;
+ if (get_user(namelen, &ci->cc_name.cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+ if (IS_ERR_OR_NULL(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+ if (princhashlen > 0) {
+ princhash.data = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+ if (IS_ERR_OR_NULL(princhash.data))
+ return -EFAULT;
+ princhash.len = princhashlen;
+ } else
+ princhash.len = 0;
+ } else {
+ const struct cld_name __user *cnm;
+
+ cnm = &cmsg->cm_u.cm_name;
+ if (get_user(namelen, &cnm->cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&cnm->cn_id, namelen);
+ if (IS_ERR_OR_NULL(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ }
if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
name.len = name.len - 5;
memmove(name.data, name.data + 5, name.len);
cn->cn_has_legacy = true;
}
- if (!nfs4_client_to_reclaim(name, nn)) {
+ if (!nfs4_client_to_reclaim(name, princhash, nn)) {
kfree(name.data);
+ kfree(princhash.data);
return -EFAULT;
}
- return sizeof(*cmsg);
+ return nn->client_tracking_ops->msglen;
}
return -EFAULT;
}
@@ -818,21 +860,22 @@ static ssize_t
cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct cld_upcall *tmp, *cup;
- struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+ struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+ struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
uint32_t xid;
struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfsd_net_id);
struct cld_net *cn = nn->cld_net;
int16_t status;
- if (mlen != sizeof(*cmsg)) {
+ if (mlen != nn->client_tracking_ops->msglen) {
dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
- sizeof(*cmsg));
+ nn->client_tracking_ops->msglen);
return -EINVAL;
}
/* copy just the xid so we can try to find that */
- if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+ if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
dprintk("%s: error when copying xid from userspace", __func__);
return -EFAULT;
}
@@ -842,7 +885,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
* list (for -EINPROGRESS, we just want to make sure the xid is
* valid, not remove the upcall from the list)
*/
- if (get_user(status, &cmsg->cm_status)) {
+ if (get_user(status, &hdr->cm_status)) {
dprintk("%s: error when copying status from userspace", __func__);
return -EFAULT;
}
@@ -851,7 +894,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
cup = NULL;
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+ if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
cup = tmp;
if (status != -EINPROGRESS)
list_del_init(&cup->cu_list);
@@ -869,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (status == -EINPROGRESS)
return __cld_pipe_inprogress_downcall(cmsg, nn);
- if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+ if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
return -EFAULT;
complete(&cup->cu_done);
@@ -881,7 +924,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct cld_msg *cmsg = msg->data;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
- cu_msg);
+ cu_u.cu_msg);
/* errno >= 0 means we got a downcall */
if (msg->errno >= 0)
@@ -1007,14 +1050,17 @@ nfsd4_remove_cld_pipe(struct net *net)
nfsd4_cld_unregister_net(net, cn->cn_pipe);
rpc_destroy_pipe_data(cn->cn_pipe);
+ if (cn->cn_tfm)
+ crypto_free_shash(cn->cn_tfm);
kfree(nn->cld_net);
nn->cld_net = NULL;
}
static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
{
struct cld_upcall *new, *tmp;
+ struct cld_net *cn = nn->cld_net;
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
@@ -1024,20 +1070,20 @@ alloc_cld_upcall(struct cld_net *cn)
restart_search:
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+ if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
cn->cn_xid++;
spin_unlock(&cn->cn_lock);
goto restart_search;
}
}
init_completion(&new->cu_done);
- new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
- put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+ new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+ put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
new->cu_net = cn;
list_add(&new->cu_list, &cn->cn_list);
spin_unlock(&cn->cn_lock);
- dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
return new;
}
@@ -1066,20 +1112,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Create;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -1092,6 +1138,75 @@ out_err:
/* Ask daemon to create a new record */
static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct cld_msg_v2 *cmsg;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cmsg = &cup->cu_u.cu_msg_v2;
+ cmsg->cm_cmd = Cld_Create;
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal) {
+ desc->tfm = tfm;
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = crypto_shash_digest(desc, principal, strlen(principal),
+ cksum.data);
+ shash_desc_zero(desc);
+ if (ret) {
+ kfree(cksum.data);
+ goto out;
+ }
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+ cksum.data, cksum.len);
+ kfree(cksum.data);
+ } else
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+ ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+ if (!ret) {
+ ret = cmsg->cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+out:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+ ret);
+}
+
+/* Ask daemon to create a new record */
+static void
nfsd4_cld_remove(struct nfs4_client *clp)
{
int ret;
@@ -1103,20 +1218,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Remove;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -1145,21 +1260,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
printk(KERN_ERR "NFSD: Unable to check client record on "
"stable storage: %d\n", -ENOMEM);
return -ENOMEM;
}
- cup->cu_msg.cm_cmd = Cld_Check;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -1217,22 +1332,95 @@ found:
}
static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+ struct crypto_shash *tfm = cn->cn_tfm;
+ struct xdr_netobj cksum;
+ char *principal = NULL;
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ if (crp->cr_princhash.len) {
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal == NULL)
+ return -ENOENT;
+ desc->tfm = tfm;
+ cksum.len = crypto_shash_digestsize(tfm);
+ cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+ if (cksum.data == NULL)
+ return -ENOENT;
+ status = crypto_shash_digest(desc, principal, strlen(principal),
+ cksum.data);
+ shash_desc_zero(desc);
+ if (status) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ if (memcmp(crp->cr_princhash.data, cksum.data,
+ crp->cr_princhash.len)) {
+ kfree(cksum.data);
+ return -ENOENT;
+ }
+ kfree(cksum.data);
+ }
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
nfsd4_cld_grace_start(struct nfsd_net *nn)
{
int ret;
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_GraceStart;
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret)
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup);
out_err:
@@ -1250,17 +1438,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_GraceDone;
- cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret)
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup);
out_err:
@@ -1279,16 +1467,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_GraceDone;
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret)
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup);
out_err:
@@ -1337,12 +1525,60 @@ cld_running(struct nfsd_net *nn)
}
static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+ int ret = 0;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+ uint8_t version;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ if (ret)
+ goto out_free;
+ version = cup->cu_u.cu_msg.cm_u.cm_version;
+ dprintk("%s: userspace returned version %u\n",
+ __func__, version);
+ if (version < 1)
+ version = 1;
+ else if (version > CLD_UPCALL_VERSION)
+ version = CLD_UPCALL_VERSION;
+
+ switch (version) {
+ case 1:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ break;
+ case 2:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+ break;
+ default:
+ break;
+ }
+ }
+out_free:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get version from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+static int
nfsd4_cld_tracking_init(struct net *net)
{
int status;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
bool running;
int retries = 10;
+ struct crypto_shash *tfm;
status = nfs4_cld_state_init(net);
if (status)
@@ -1367,11 +1603,21 @@ nfsd4_cld_tracking_init(struct net *net)
status = -ETIMEDOUT;
goto err_remove;
}
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
+ goto err_remove;
+ }
+ nn->cld_net->cn_tfm = tfm;
+
+ status = nfsd4_cld_get_version(nn);
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
status = nfsd4_cld_grace_start(nn);
if (status) {
if (status == -EOPNOTSUPP)
- printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+ pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
nfs4_release_reclaim(nn);
goto err_remove;
} else
@@ -1403,6 +1649,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
.remove = nfsd4_cld_remove,
.check = nfsd4_cld_check_v0,
.grace_done = nfsd4_cld_grace_done_v0,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
};
/* For newer nfsdcld's */
@@ -1413,6 +1661,20 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
.remove = nfsd4_cld_remove,
.check = nfsd4_cld_check,
.grace_done = nfsd4_cld_grace_done,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create_v2,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v2,
+ .grace_done = nfsd4_cld_grace_done,
+ .version = 2,
+ .msglen = sizeof(struct cld_msg_v2),
};
/* upcall via usermodehelper */
@@ -1520,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
}
static char *
-nfsd4_cltrack_grace_start(time_t grace_start)
+nfsd4_cltrack_grace_start(time64_t grace_start)
{
int copied;
size_t len;
@@ -1533,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start)
if (!result)
return result;
- copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
+ copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld",
grace_start);
if (copied >= len) {
/* just return nothing if output was truncated */
@@ -1590,19 +1852,14 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
static char *
bin_to_hex_dup(const unsigned char *src, int srclen)
{
- int i;
- char *buf, *hex;
+ char *buf;
/* +1 for terminating NULL */
- buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
+ buf = kzalloc((srclen * 2) + 1, GFP_KERNEL);
if (!buf)
return buf;
- hex = buf;
- for (i = 0; i < srclen; i++) {
- sprintf(hex, "%2.2x", *src++);
- hex += 2;
- }
+ bin2hex(buf, src, srclen);
return buf;
}
@@ -1747,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
char *legacy;
char timestr[22]; /* FIXME: better way to determine max size? */
- sprintf(timestr, "%ld", nn->boot_time);
+ sprintf(timestr, "%lld", nn->boot_time);
legacy = nfsd4_cltrack_legacy_topdir();
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
kfree(legacy);
@@ -1760,6 +2017,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
.remove = nfsd4_umh_cltrack_remove,
.check = nfsd4_umh_cltrack_check,
.grace_done = nfsd4_umh_cltrack_grace_done,
+ .version = 1,
+ .msglen = 0,
};
int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7857942c5ca6..65cfe9ab47be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -50,6 +50,7 @@
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -79,6 +80,7 @@ static u64 current_sessionid = 1;
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
+static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
/* Locking: */
@@ -169,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp)
clp->cl_clientid.cl_boot,
clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &nn->client_lru);
- clp->cl_time = get_seconds();
+ clp->cl_time = ktime_get_boottime_seconds();
}
static void put_client_renew_locked(struct nfs4_client *clp)
@@ -429,18 +431,18 @@ put_nfs4_file(struct nfs4_file *fi)
}
}
-static struct file *
+static struct nfsd_file *
__nfs4_get_fd(struct nfs4_file *f, int oflag)
{
if (f->fi_fds[oflag])
- return get_file(f->fi_fds[oflag]);
+ return nfsd_file_get(f->fi_fds[oflag]);
return NULL;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -450,10 +452,10 @@ find_writeable_file_locked(struct nfs4_file *f)
return ret;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_writeable_file_locked(f);
@@ -462,9 +464,10 @@ find_writeable_file(struct nfs4_file *f)
return ret;
}
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
@@ -474,10 +477,10 @@ static struct file *find_readable_file_locked(struct nfs4_file *f)
return ret;
}
-static struct file *
+static struct nfsd_file *
find_readable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_readable_file_locked(f);
@@ -486,10 +489,10 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
-struct file *
+struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = __nfs4_get_fd(f, O_RDWR);
@@ -590,17 +593,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
might_lock(&fp->fi_lock);
if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
- struct file *f1 = NULL;
- struct file *f2 = NULL;
+ struct nfsd_file *f1 = NULL;
+ struct nfsd_file *f2 = NULL;
swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock);
if (f1)
- fput(f1);
+ nfsd_file_put(f1);
if (f2)
- fput(f2);
+ nfsd_file_put(f2);
}
}
@@ -720,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
/* Will be incremented before return to client: */
refcount_set(&stid->sc_count, 1);
spin_lock_init(&stid->sc_lock);
+ INIT_LIST_HEAD(&stid->sc_cp_list);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -739,30 +743,76 @@ out_free:
/*
* Create a unique stateid_t to represent each COPY.
*/
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+ unsigned char sc_type)
{
int new_id;
+ stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+ stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+ stid->sc_type = sc_type;
+
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
- new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+ new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
+ stid->stid.si_opaque.so_id = new_id;
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
return 0;
- copy->cp_stateid.si_opaque.so_id = new_id;
- copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
- copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
return 1;
}
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+ return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid)
+{
+ struct nfs4_cpntf_state *cps;
+
+ cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+ if (!cps)
+ return NULL;
+ cps->cpntf_time = ktime_get_boottime_seconds();
+ refcount_set(&cps->cp_stateid.sc_count, 1);
+ if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+ goto out_free;
+ spin_lock(&nn->s2s_cp_lock);
+ list_add(&cps->cp_list, &p_stid->sc_cp_list);
+ spin_unlock(&nn->s2s_cp_lock);
+ return cps;
+out_free:
+ kfree(cps);
+ return NULL;
+}
+
+void nfs4_free_copy_state(struct nfsd4_copy *copy)
{
struct nfsd_net *nn;
+ WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
nn = net_generic(copy->cp_clp->net, nfsd_net_id);
spin_lock(&nn->s2s_cp_lock);
- idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+ idr_remove(&nn->s2s_cp_stateids,
+ copy->cp_stateid.stid.si_opaque.so_id);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
+static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
+{
+ struct nfs4_cpntf_state *cps;
+ struct nfsd_net *nn;
+
+ nn = net_generic(net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ while (!list_empty(&stid->sc_cp_list)) {
+ cps = list_first_entry(&stid->sc_cp_list,
+ struct nfs4_cpntf_state, cp_list);
+ _free_cpntf_state_locked(nn, cps);
+ }
spin_unlock(&nn->s2s_cp_lock);
}
@@ -804,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
static DEFINE_SPINLOCK(blocked_delegations_lock);
static struct bloom_pair {
int entries, old_entries;
- time_t swap_time;
+ time64_t swap_time;
int new; /* index into 'set' */
DECLARE_BITMAP(set[2], 256);
} blocked_delegations;
@@ -816,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh)
if (bd->entries == 0)
return 0;
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
spin_lock(&blocked_delegations_lock);
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
memset(bd->set[bd->new], 0,
sizeof(bd->set[0]));
bd->new = 1-bd->new;
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
}
spin_unlock(&blocked_delegations_lock);
}
@@ -854,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh)
__set_bit((hash>>8)&255, bd->set[bd->new]);
__set_bit((hash>>16)&255, bd->set[bd->new]);
if (bd->entries == 0)
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
bd->entries += 1;
spin_unlock(&blocked_delegations_lock);
}
@@ -913,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
if (fp)
@@ -933,25 +984,25 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
static void put_deleg_file(struct nfs4_file *fp)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock);
if (--fp->fi_delegees == 0)
- swap(filp, fp->fi_deleg_file);
+ swap(nf, fp->fi_deleg_file);
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
}
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
- struct file *filp = fp->fi_deleg_file;
+ struct nfsd_file *nf = fp->fi_deleg_file;
WARN_ON_ONCE(!fp->fi_delegees);
- vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+ vfs_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
@@ -1289,11 +1340,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
- struct file *file;
+ struct nfsd_file *nf;
- file = find_any_file(stp->st_stid.sc_file);
- if (file)
- filp_close(file, (fl_owner_t)lo);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
nfs4_free_ol_stateid(stid);
}
@@ -1563,21 +1617,39 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
* re-negotiate active sessions and reduce their slot usage to make
* room for new connections. For now we just fail the create session.
*/
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
{
u32 slotsize = slot_bytes(ca);
u32 num = ca->maxreqs;
unsigned long avail, total_avail;
+ unsigned int scale_factor;
spin_lock(&nfsd_drc_lock);
- total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+ if (nfsd_drc_max_mem > nfsd_drc_mem_used)
+ total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
+ else
+ /* We have handed out more space than we chose in
+ * set_max_drc() to allow. That isn't really a
+ * problem as long as that doesn't make us think we
+ * have lots more due to integer overflow.
+ */
+ total_avail = 0;
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/*
- * Never use more than a third of the remaining memory,
- * unless it's the only way to give this client a slot:
+ * Never use more than a fraction of the remaining memory,
+ * unless it's the only way to give this client a slot.
+ * The chosen fraction is either 1/8 or 1/number of threads,
+ * whichever is smaller. This ensures there are adequate
+ * slots to support multiple clients per thread.
+ * Give the client one slot even if that would require
+ * over-allocation--it is better than failure.
*/
- avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
+ scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
+
+ avail = clamp_t(unsigned long, avail, slotsize,
+ total_avail/scale_factor);
num = min_t(int, num, avail / slotsize);
+ num = max_t(int, num, 1);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
@@ -1839,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
*/
if (clid->cl_boot == (u32)nn->boot_time)
return 0;
- dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
+ dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n",
clid->cl_boot, clid->cl_id, nn->boot_time);
return 1;
}
@@ -2192,14 +2264,14 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy
*/
- verf[0] = (__force __be32)get_seconds();
+ verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
verf[1] = (__force __be32)nn->clverifier_counter++;
memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
{
- clp->cl_clientid.cl_boot = nn->boot_time;
+ clp->cl_clientid.cl_boot = (u32)nn->boot_time;
clp->cl_clientid.cl_id = nn->clientid_counter++;
gen_confirm(clp, nn);
}
@@ -2269,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v)
clp->cl_nii_domain.len);
seq_printf(m, "\nImplementation name: ");
seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
- seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
drop_client(clp);
@@ -2323,9 +2395,9 @@ static void states_stop(struct seq_file *s, void *v)
spin_unlock(&clp->cl_lock);
}
-static void nfs4_show_superblock(struct seq_file *s, struct file *f)
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
{
- struct inode *inode = file_inode(f);
+ struct inode *inode = f->nf_inode;
seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
MAJOR(inode->i_sb->s_dev),
@@ -2343,7 +2415,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_ol_stateid *ols;
struct nfs4_file *nf;
- struct file *file;
+ struct nfsd_file *file;
struct nfs4_stateowner *oo;
unsigned int access, deny;
@@ -2359,10 +2431,10 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
access = bmap_to_share_mode(ols->st_access_bmap);
deny = bmap_to_share_mode(ols->st_deny_bmap);
- seq_printf(s, "access: \%s\%s, ",
+ seq_printf(s, "access: %s%s, ",
access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
- seq_printf(s, "deny: \%s\%s, ",
+ seq_printf(s, "deny: %s%s, ",
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
@@ -2370,7 +2442,7 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
- fput(file);
+ nfsd_file_put(file);
return 0;
}
@@ -2379,7 +2451,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_ol_stateid *ols;
struct nfs4_file *nf;
- struct file *file;
+ struct nfsd_file *file;
struct nfs4_stateowner *oo;
ols = openlockstateid(st);
@@ -2401,7 +2473,7 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
- fput(file);
+ nfsd_file_put(file);
return 0;
}
@@ -2410,7 +2482,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_delegation *ds;
struct nfs4_file *nf;
- struct file *file;
+ struct nfsd_file *file;
ds = delegstateid(st);
nf = st->sc_file;
@@ -2433,7 +2505,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_layout_stateid *ls;
- struct file *file;
+ struct nfsd_file *file;
ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
file = ls->ls_file;
@@ -2589,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
gen_clid(clp, nn);
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
- clp->cl_time = get_seconds();
+ clp->cl_time = ktime_get_boottime_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
@@ -2923,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp,
xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
if (!clp->cl_nii_name.data)
return nfserr_jukebox;
- clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
- clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ clp->cl_nii_time = exid->nii_time;
return 0;
}
@@ -3169,10 +3240,10 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
* performance. When short on memory we therefore prefer to
* decrease number of slots instead of their size. Clients that
* request larger slots than they need will get poor results:
+ * Note that we always allow at least one slot, because our
+ * accounting is soft and provides no guarantees either way.
*/
- ca->maxreqs = nfsd4_get_drc_mem(ca);
- if (!ca->maxreqs)
- return nfserr_jukebox;
+ ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
return nfs_ok;
}
@@ -3350,7 +3421,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
case NFS4_CDFC4_BACK_OR_BOTH:
*dir = NFS4_CDFC4_BOTH;
return nfs_ok;
- };
+ }
return nfserr_inval;
}
@@ -3525,12 +3596,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp,
(bool)seq->cachethis)
return false;
/*
- * If there's an error than the reply can have fewer ops than
- * the call. But if we cached a reply with *more* ops than the
- * call you're sending us now, then this new call is clearly not
- * really a replay of the old one:
+ * If there's an error then the reply can have fewer ops than
+ * the call.
+ */
+ if (slot->sl_opcnt < argp->opcnt && !slot->sl_status)
+ return false;
+ /*
+ * But if we cached a reply with *more* ops than the call you're
+ * sending us now, then this new call is clearly not really a
+ * replay of the old one:
*/
- if (slot->sl_opcnt < argp->opcnt)
+ if (slot->sl_opcnt > argp->opcnt)
return false;
/* This is the only check explicitly called by spec: */
if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
@@ -4255,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
last = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = s;
list_move_tail(&oo->oo_close_lru, &nn->close_lru);
- oo->oo_time = get_seconds();
+ oo->oo_time = ktime_get_boottime_seconds();
spin_unlock(&nn->client_lock);
if (last)
nfs4_put_stid(&last->st_stid);
@@ -4350,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
*/
spin_lock(&state_lock);
if (dp->dl_time == 0) {
- dp->dl_time = get_seconds();
+ dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
spin_unlock(&state_lock);
@@ -4462,7 +4538,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
static __be32 lookup_clientid(clientid_t *clid,
struct nfsd4_compound_state *cstate,
- struct nfsd_net *nn)
+ struct nfsd_net *nn,
+ bool sessions)
{
struct nfs4_client *found;
@@ -4483,7 +4560,7 @@ static __be32 lookup_clientid(clientid_t *clid,
*/
WARN_ON_ONCE(cstate->session);
spin_lock(&nn->client_lock);
- found = find_confirmed_client(clid, false, nn);
+ found = find_confirmed_client(clid, sessions, nn);
if (!found) {
spin_unlock(&nn->client_lock);
return nfserr_expired;
@@ -4516,7 +4593,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
if (open->op_file == NULL)
return nfserr_jukebox;
- status = lookup_clientid(clientid, cstate, nn);
+ status = lookup_clientid(clientid, cstate, nn, false);
if (status)
return status;
clp = cstate->clp;
@@ -4644,14 +4721,14 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+ return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
}
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
struct nfsd4_open *open)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);
@@ -4687,18 +4764,18 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+ status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
if (status)
goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
- fp->fi_fds[oflag] = filp;
- filp = NULL;
+ fp->fi_fds[oflag] = nf;
+ nf = NULL;
}
}
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -4767,7 +4844,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)dp;
fl->fl_pid = current->tgid;
- fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+ fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
return fl;
}
@@ -4777,7 +4854,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
{
int status = 0;
struct nfs4_delegation *dp;
- struct file *filp;
+ struct nfsd_file *nf;
struct file_lock *fl;
/*
@@ -4788,8 +4865,8 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- filp = find_readable_file(fp);
- if (!filp) {
+ nf = find_readable_file(fp);
+ if (!nf) {
/* We should always have a readable file here */
WARN_ON_ONCE(1);
return ERR_PTR(-EBADF);
@@ -4799,17 +4876,17 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
else if (!fp->fi_deleg_file) {
- fp->fi_deleg_file = filp;
+ fp->fi_deleg_file = nf;
/* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
- filp = NULL;
+ nf = NULL;
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (status)
return ERR_PTR(status);
@@ -4822,7 +4899,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (!fl)
goto out_clnt_odstate;
- status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+ status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
if (fl)
locks_free_lock(fl);
if (status)
@@ -4842,7 +4919,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
return dp;
out_unlock:
- vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+ vfs_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
@@ -5105,7 +5182,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
goto out;
clp = cstate->clp;
@@ -5156,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
static bool clients_still_reclaiming(struct nfsd_net *nn)
{
- unsigned long now = get_seconds();
- unsigned long double_grace_period_end = nn->boot_time +
- 2 * nn->nfsd4_lease;
+ time64_t double_grace_period_end = nn->boot_time +
+ 2 * nn->nfsd4_lease;
if (nn->track_reclaim_completes &&
atomic_read(&nn->nr_reclaim_complete) ==
@@ -5171,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
* If we've given them *two* lease times to reclaim, and they're
* still not done, give up:
*/
- if (time_after(now, double_grace_period_end))
+ if (ktime_get_boottime_seconds() > double_grace_period_end)
return false;
return true;
}
-static time_t
+static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
struct nfs4_client *clp;
@@ -5185,8 +5261,11 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_ol_stateid *stp;
struct nfsd4_blocked_lock *nbl;
struct list_head *pos, *next, reaplist;
- time_t cutoff = get_seconds() - nn->nfsd4_lease;
- time_t t, new_timeo = nn->nfsd4_lease;
+ time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
+ time64_t t, new_timeo = nn->nfsd4_lease;
+ struct nfs4_cpntf_state *cps;
+ copy_stateid_t *cps_t;
+ int i;
dprintk("NFSD: laundromat service - starting\n");
@@ -5197,10 +5276,20 @@ nfs4_laundromat(struct nfsd_net *nn)
dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
+
+ spin_lock(&nn->s2s_cp_lock);
+ idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+ cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+ if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
+ cps->cpntf_time > cutoff)
+ _free_cpntf_state_locked(nn, cps);
+ }
+ spin_unlock(&nn->s2s_cp_lock);
+
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+ if (clp->cl_time > cutoff) {
t = clp->cl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5223,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+ if (dp->dl_time > cutoff) {
t = dp->dl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5243,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->close_lru)) {
oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
oo_close_lru);
- if (time_after((unsigned long)oo->oo_time,
- (unsigned long)cutoff)) {
+ if (oo->oo_time > cutoff) {
t = oo->oo_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5274,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->blocked_locks_lru)) {
nbl = list_first_entry(&nn->blocked_locks_lru,
struct nfsd4_blocked_lock, nbl_lru);
- if (time_after((unsigned long)nbl->nbl_time,
- (unsigned long)cutoff)) {
+ if (nbl->nbl_time > cutoff) {
t = nbl->nbl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5292,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn)
free_blocked_lock(nbl);
}
out:
- new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+ new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
}
@@ -5302,13 +5389,13 @@ static void laundromat_main(struct work_struct *);
static void
laundromat_main(struct work_struct *laundry)
{
- time_t t;
+ time64_t t;
struct delayed_work *dwork = to_delayed_work(laundry);
struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
laundromat_work);
t = nfs4_laundromat(nn);
- dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+ dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t);
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
@@ -5493,7 +5580,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
- status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
+ status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn,
+ false);
if (status == nfserr_stale_clientid) {
if (cstate->session)
return nfserr_bad_stateid;
@@ -5513,7 +5601,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok;
}
-static struct file *
+static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
if (!s)
@@ -5523,7 +5611,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
case NFS4_DELEG_STID:
if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
return NULL;
- return get_file(s->sc_file->fi_deleg_file);
+ return nfsd_file_get(s->sc_file->fi_deleg_file);
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
@@ -5549,41 +5637,117 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
- struct file **filpp, bool *tmp_file, int flags)
+ struct nfsd_file **nfp, int flags)
{
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
- struct file *file;
+ struct nfsd_file *nf;
__be32 status;
- file = nfs4_find_file(s, flags);
- if (file) {
+ nf = nfs4_find_file(s, flags);
+ if (nf) {
status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
- fput(file);
- return status;
+ nfsd_file_put(nf);
+ goto out;
}
-
- *filpp = file;
} else {
- status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
if (status)
return status;
-
- if (tmp_file)
- *tmp_file = true;
}
+ *nfp = nf;
+out:
+ return status;
+}
+static void
+_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
+ if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
+ return;
+ list_del(&cps->cp_list);
+ idr_remove(&nn->s2s_cp_stateids,
+ cps->cp_stateid.stid.si_opaque.so_id);
+ kfree(cps);
+}
+/*
+ * A READ from an inter server to server COPY will have a
+ * copy stateid. Look up the copy notify stateid from the
+ * idr structure and take a reference on it.
+ */
+__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps)
+{
+ copy_stateid_t *cps_t;
+ struct nfs4_cpntf_state *state = NULL;
+ if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id)
+ return nfserr_bad_stateid;
+ spin_lock(&nn->s2s_cp_lock);
+ cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id);
+ if (cps_t) {
+ state = container_of(cps_t, struct nfs4_cpntf_state,
+ cp_stateid);
+ if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
+ state = NULL;
+ goto unlock;
+ }
+ if (!clp)
+ refcount_inc(&state->cp_stateid.sc_count);
+ else
+ _free_cpntf_state_locked(nn, state);
+ }
+unlock:
+ spin_unlock(&nn->s2s_cp_lock);
+ if (!state)
+ return nfserr_bad_stateid;
+ if (!clp && state)
+ *cps = state;
return 0;
}
+static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_stid **stid)
+{
+ __be32 status;
+ struct nfs4_cpntf_state *cps = NULL;
+ struct nfsd4_compound_state cstate;
+
+ status = manage_cpntf_state(nn, st, NULL, &cps);
+ if (status)
+ return status;
+
+ cps->cpntf_time = ktime_get_boottime_seconds();
+ memset(&cstate, 0, sizeof(cstate));
+ status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true);
+ if (status)
+ goto out;
+ status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid,
+ NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+ stid, nn);
+ put_client_renew(cstate.clp);
+out:
+ nfs4_put_cpntf_state(nn, cps);
+ return status;
+}
+
+void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ spin_lock(&nn->s2s_cp_lock);
+ _free_cpntf_state_locked(nn, cps);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
/*
* Checks for stateid operations
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+ stateid_t *stateid, int flags, struct nfsd_file **nfp,
+ struct nfs4_stid **cstid)
{
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
@@ -5591,10 +5755,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfs4_stid *s = NULL;
__be32 status;
- if (filpp)
- *filpp = NULL;
- if (tmp_file)
- *tmp_file = false;
+ if (nfp)
+ *nfp = NULL;
if (grace_disallows_io(net, ino))
return nfserr_grace;
@@ -5607,6 +5769,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
+ if (status == nfserr_bad_stateid)
+ status = find_cpntf_state(nn, stateid, &s);
if (status)
return status;
status = nfsd4_stid_check_stateid_generation(stateid, s,
@@ -5631,11 +5795,15 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
status = nfs4_check_fh(fhp, s);
done:
- if (!status && filpp)
- status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+ if (status == nfs_ok && nfp)
+ status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out:
- if (s)
- nfs4_put_stid(s);
+ if (s) {
+ if (!status && cstid)
+ *cstid = s;
+ else
+ nfs4_put_stid(s);
+ }
return status;
}
@@ -6392,7 +6560,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *lock_stp = NULL;
struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
@@ -6474,8 +6642,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Fallthrough */
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
- filp = find_readable_file_locked(fp);
- if (filp)
+ nf = find_readable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
fl_type = F_RDLCK;
@@ -6486,8 +6654,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Fallthrough */
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
- filp = find_writeable_file_locked(fp);
- if (filp)
+ nf = find_writeable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
fl_type = F_WRLCK;
@@ -6497,7 +6665,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- if (!filp) {
+ if (!nf) {
status = nfserr_openmode;
goto out;
}
@@ -6513,7 +6681,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
file_lock->fl_type = fl_type;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = fl_flags;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset;
@@ -6528,14 +6696,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
if (fl_flags & FL_SLEEP) {
- nbl->nbl_time = jiffies;
+ nbl->nbl_time = ktime_get_boottime_seconds();
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
spin_unlock(&nn->blocked_locks_lock);
}
- err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
switch (err) {
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6570,8 +6738,8 @@ out:
}
free_blocked_lock(nbl);
}
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (lock_stp) {
/* Bump seqid manually if the 4.0 replay owner is openowner */
if (cstate->replay_owner &&
@@ -6606,11 +6774,11 @@ out:
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
- struct file *file;
- __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ struct nfsd_file *nf;
+ __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (!err) {
- err = nfserrno(vfs_test_lock(file, lock));
- fput(file);
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ nfsd_file_put(nf);
}
return err;
}
@@ -6635,7 +6803,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
if (!nfsd4_has_session(cstate)) {
- status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
+ status = lookup_clientid(&lockt->lt_clientid, cstate, nn,
+ false);
if (status)
goto out;
}
@@ -6698,7 +6867,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_locku *locku = &u->locku;
struct nfs4_ol_stateid *stp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct file_lock *file_lock = NULL;
__be32 status;
int err;
@@ -6716,8 +6885,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&stp, nn);
if (status)
goto out;
- filp = find_any_file(stp->st_stid.sc_file);
- if (!filp) {
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (!nf) {
status = nfserr_lock_range;
goto put_stateid;
}
@@ -6725,13 +6894,13 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto fput;
+ goto put_file;
}
file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
+ file_lock->fl_file = nf->nf_file;
file_lock->fl_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset;
@@ -6740,14 +6909,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
locku->lu_length);
nfs4_transform_lock_offset(file_lock);
- err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
- fput(filp);
+put_file:
+ nfsd_file_put(nf);
put_stateid:
mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
@@ -6759,7 +6928,7 @@ out:
out_nfserr:
status = nfserrno(err);
- goto fput;
+ goto put_file;
}
/*
@@ -6772,17 +6941,17 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct file *filp = find_any_file(fp);
+ struct nfsd_file *nf = find_any_file(fp);
struct inode *inode;
struct file_lock_context *flctx;
- if (!filp) {
+ if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
return status;
}
- inode = locks_inode(filp);
+ inode = locks_inode(nf->nf_file);
flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -6795,7 +6964,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
spin_unlock(&flctx->flc_lock);
}
- fput(filp);
+ nfsd_file_put(nf);
return status;
}
@@ -6819,7 +6988,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
return status;
@@ -6888,7 +7057,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
* will be freed in nfs4_remove_reclaim_record in the normal case).
*/
struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+ struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp;
@@ -6901,6 +7071,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
crp->cr_name.data = name.data;
crp->cr_name.len = name.len;
+ crp->cr_princhash.data = princhash.data;
+ crp->cr_princhash.len = princhash.len;
crp->cr_clp = NULL;
nn->reclaim_str_hashtbl_size++;
}
@@ -6912,6 +7084,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
{
list_del(&crp->cr_strhash);
kfree(crp->cr_name.data);
+ kfree(crp->cr_princhash.data);
kfree(crp);
nn->reclaim_str_hashtbl_size--;
}
@@ -6962,7 +7135,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
__be32 status;
/* find clientid in conf_id_hashtbl */
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
return nfserr_reclaim_bad;
@@ -7615,7 +7788,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
- nn->boot_time = get_seconds();
+ nn->boot_time = ktime_get_real_seconds();
nn->grace_ended = false;
nn->nfsd4_manager.block_opens = true;
INIT_LIST_HEAD(&nn->nfsd4_manager.list);
@@ -7684,7 +7857,7 @@ nfs4_state_start_net(struct net *net)
nfsd4_client_tracking_init(net);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
goto skip_grace;
- printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+ printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
@@ -7760,7 +7933,8 @@ nfs4_state_shutdown(void)
static void
get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) &&
+ CURRENT_STATEID(stateid))
memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
}
@@ -7769,14 +7943,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
if (cstate->minorversion) {
memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
}
void
clear_current_stateid(struct nfsd4_compound_state *cstate)
{
- CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
/*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 442811809f3d..9761512674a0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,6 +40,7 @@
#include <linux/utsname.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/addr.h>
#include "idmap.h"
#include "acl.h"
@@ -49,6 +50,7 @@
#include "cache.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
@@ -203,6 +205,13 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
return p;
}
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+ unsigned int this = (char *)argp->end - (char *)argp->p;
+
+ return this + argp->pagelen;
+}
+
static int zero_clientid(clientid_t *clid)
{
return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,10 +220,10 @@ static int zero_clientid(clientid_t *clid)
/**
* svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
*
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
*/
static void *
svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
@@ -347,7 +356,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(4); len += 4;
nace = be32_to_cpup(p++);
- if (nace > NFS4_ACL_MAX)
+ if (nace > compoundargs_bytes_left(argp)/20)
+ /*
+ * Even with 4-byte names there wouldn't be
+ * space for that many aces; something fishy is
+ * going on:
+ */
return nfserr_fbig;
*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
@@ -1418,7 +1432,6 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
struct nfsd4_create_session *sess)
{
DECODE_HEAD;
- u32 dummy;
READ_BUF(16);
COPYMEM(&sess->clientid, 8);
@@ -1427,7 +1440,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Fore channel attrs */
READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ p++; /* headerpadsz is always 0 */
sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1444,7 +1457,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Back channel attrs */
READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ p++; /* headerpadsz is always 0 */
sess->back_channel.maxreq_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_sz = be32_to_cpup(p++);
sess->back_channel.maxresp_cached = be32_to_cpup(p++);
@@ -1732,11 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
DECODE_TAIL;
}
+static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ struct nl4_server *ns)
+{
+ DECODE_HEAD;
+ struct nfs42_netaddr *naddr;
+
+ READ_BUF(4);
+ ns->nl4_type = be32_to_cpup(p++);
+
+ /* currently support for 1 inter-server source server */
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ naddr = &ns->u.nl4_addr;
+
+ READ_BUF(4);
+ naddr->netid_len = be32_to_cpup(p++);
+ if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
+ goto xdr_error;
+
+ READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */
+ COPYMEM(naddr->netid, naddr->netid_len);
+
+ naddr->addr_len = be32_to_cpup(p++);
+ if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
+ goto xdr_error;
+
+ READ_BUF(naddr->addr_len);
+ COPYMEM(naddr->addr, naddr->addr_len);
+ break;
+ default:
+ goto xdr_error;
+ }
+ DECODE_TAIL;
+}
+
static __be32
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{
DECODE_HEAD;
- unsigned int tmp;
+ struct nl4_server *ns_dummy;
+ int i, count;
status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
if (status)
@@ -1751,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_count);
p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++);
- tmp = be32_to_cpup(p); /* Source server list not supported */
+
+ count = be32_to_cpup(p++);
+
+ copy->cp_intra = false;
+ if (count == 0) { /* intra-server copy */
+ copy->cp_intra = true;
+ goto intra;
+ }
+
+ /* decode all the supplied server addresses but use first */
+ status = nfsd4_decode_nl4_server(argp, &copy->cp_src);
+ if (status)
+ return status;
+
+ ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
+ if (ns_dummy == NULL)
+ return nfserrno(-ENOMEM);
+ for (i = 0; i < count - 1; i++) {
+ status = nfsd4_decode_nl4_server(argp, ns_dummy);
+ if (status) {
+ kfree(ns_dummy);
+ return status;
+ }
+ }
+ kfree(ns_dummy);
+intra:
DECODE_TAIL;
}
@@ -1764,6 +1838,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
}
static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ struct nfsd4_copy_notify *cn)
+{
+ int status;
+
+ status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+ if (status)
+ return status;
+ return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1864,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2013,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
*/
static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
{
- struct timespec ts;
+ struct timespec64 ts;
u32 ns;
ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
- ts = ns_to_timespec(ns);
+ ts = ns_to_timespec64(ns);
p = xdr_encode_hyper(p, ts.tv_sec);
*p++ = cpu_to_be32(ts.tv_nsec);
@@ -2980,18 +3066,9 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
- if (d_really_is_negative(dentry)) {
- /*
- * we're not holding the i_mutex here, so there's
- * a window where this directory entry could have gone
- * away.
- */
- dput(dentry);
- return nfserr_noent;
- }
exp_get(exp);
/*
@@ -3217,9 +3294,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
if (!p)
return nfserr_resource;
encode_cinfo(p, &create->cr_cinfo);
- nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+ return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
- return 0;
}
static __be32
@@ -3451,7 +3527,6 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
- long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3460,9 +3535,8 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;
- len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
- file, read->rd_offset, &maxcount);
+ file, read->rd_offset, &maxcount, &eof);
read->rd_length = maxcount;
if (nfserr) {
/*
@@ -3474,9 +3548,6 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3547,15 +3618,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
len = maxcount;
nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+ resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+ &eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
tmp = htonl(maxcount);
@@ -3574,11 +3643,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
{
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
- struct file *file = read->rd_filp;
+ struct file *file;
int starting_len = xdr->buf->len;
- struct raparms *ra = NULL;
__be32 *p;
+ if (nfserr)
+ return nfserr;
+ file = read->rd_nf->nf_file;
+
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
@@ -3596,18 +3668,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
- if (read->rd_tmp_file)
- ra = nfsd_init_raparms(file);
-
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
- if (ra)
- nfsd_put_raparams(file, ra);
-
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
@@ -4253,6 +4319,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
}
static __be32
+nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct nfs42_netaddr *addr;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = cpu_to_be32(ns->nl4_type);
+
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ addr = &ns->u.nl4_addr;
+
+ /* netid_len, netid, uaddr_len, uaddr (port included
+ * in RPCBIND_MAXUADDRLEN)
+ */
+ p = xdr_reserve_space(xdr,
+ 4 /* netid len */ +
+ (XDR_QUADLEN(addr->netid_len) * 4) +
+ 4 /* uaddr len */ +
+ (XDR_QUADLEN(addr->addr_len) * 4));
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(addr->netid_len);
+ p = xdr_encode_opaque_fixed(p, addr->netid,
+ addr->netid_len);
+ *p++ = cpu_to_be32(addr->addr_len);
+ p = xdr_encode_opaque_fixed(p, addr->addr,
+ addr->addr_len);
+ break;
+ default:
+ WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR);
+ return nfserr_inval;
+ }
+
+ return 0;
+}
+
+static __be32
nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_copy *copy)
{
@@ -4286,6 +4392,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
}
static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_copy_notify *cn)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+
+ /* 8 sec, 4 nsec */
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
+ return nfserr_resource;
+
+ /* cnr_lease_time */
+ p = xdr_encode_hyper(p, cn->cpn_sec);
+ *p++ = cpu_to_be32(cn->cpn_nsec);
+
+ /* cnr_stateid */
+ nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
+ if (nfserr)
+ return nfserr;
+
+ /* cnr_src.nl_nsvr */
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(1);
+
+ return nfsd42_encode_nl4_server(resp, &cn->cpn_src);
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -4382,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
@@ -4509,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
__be32 *p;
struct nfs4_replay *rp = op->replay;
- BUG_ON(!rp);
-
p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
if (!p) {
WARN_ON_ONCE(1);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 26ad75ae2be0..96352ab7bd81 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -571,7 +571,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- struct nfsd_net *nn = v;
+ struct nfsd_net *nn = m->private;
seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 13c548733860..e109a1007704 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -157,11 +157,11 @@ static int exports_proc_open(struct inode *inode, struct file *file)
return exports_net_open(current->nsproxy->net_ns, file);
}
-static const struct file_operations exports_proc_operations = {
- .open = exports_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops exports_proc_ops = {
+ .proc_open = exports_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
static int exports_nfsd_open(struct inode *inode, struct file *file)
@@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
char *mesg = buf;
int rv, i;
@@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
*time = i;
}
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time);
}
static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
ssize_t rv;
@@ -1171,13 +1171,17 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
return inode;
}
-static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, struct nfsdfs_client *ncl)
{
struct inode *inode;
inode = nfsd_get_inode(dir->i_sb, mode);
if (!inode)
return -ENOMEM;
+ if (ncl) {
+ inode->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
d_add(dentry, inode);
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
@@ -1194,17 +1198,14 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc
dentry = d_alloc_name(parent, name);
if (!dentry)
goto out_err;
- ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600);
+ ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600, ncl);
if (ret)
goto out_err;
- if (ncl) {
- d_inode(dentry)->i_private = ncl;
- kref_get(&ncl->cl_ref);
- }
out:
inode_unlock(dir);
return dentry;
out_err:
+ dput(dentry);
dentry = ERR_PTR(ret);
goto out;
}
@@ -1214,11 +1215,9 @@ static void clear_ncl(struct inode *inode)
struct nfsdfs_client *ncl = inode->i_private;
inode->i_private = NULL;
- synchronize_rcu();
kref_put(&ncl->cl_ref, ncl->cl_release);
}
-
static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc = inode->i_private;
@@ -1232,9 +1231,9 @@ struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
{
struct nfsdfs_client *nc;
- rcu_read_lock();
+ inode_lock_shared(inode);
nc = __get_nfsdfs_client(inode);
- rcu_read_unlock();
+ inode_unlock_shared(inode);
return nc;
}
/* from __rpc_unlink */
@@ -1386,8 +1385,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
static int nfsd_fs_get_tree(struct fs_context *fc)
{
- fc->s_fs_info = get_net(fc->net_ns);
- return vfs_get_super(fc, vfs_get_keyed_super, nfsd_fill_super);
+ return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns));
}
static void nfsd_fs_free_fc(struct fs_context *fc)
@@ -1433,8 +1431,7 @@ static int create_proc_exports_entry(void)
entry = proc_mkdir("fs/nfs", NULL);
if (!entry)
return -ENOMEM;
- entry = proc_create("exports", 0, entry,
- &exports_proc_operations);
+ entry = proc_create("exports", 0, entry, &exports_proc_ops);
if (!entry) {
remove_proc_entry("fs/nfs", NULL);
return -ENOMEM;
@@ -1478,6 +1475,7 @@ static __net_init int nfsd_init_net(struct net *net)
atomic_set(&nn->ntf_refcnt, 0);
init_waitqueue_head(&nn->ntf_wq);
+ seqlock_init(&nn->boot_lock);
mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
if (IS_ERR(mnt)) {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index af2947551e9c..2ab5569126b8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,6 +19,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/addr.h>
#include <uapi/linux/nfsd/debug.h>
@@ -142,7 +143,6 @@ int nfs4_state_start(void);
int nfs4_state_start_net(struct net *net);
void nfs4_state_shutdown(void);
void nfs4_state_shutdown_net(struct net *net);
-void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
@@ -153,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; }
static inline int nfs4_state_start_net(struct net *net) { return 0; }
static inline void nfs4_state_shutdown(void) { }
static inline void nfs4_state_shutdown_net(struct net *net) { }
-static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
static inline char * nfs4_recoverydir(void) {return NULL; }
static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
@@ -280,7 +279,8 @@ void nfsd_lockd_shutdown(void);
#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
-#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
+#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
+#define nfserr_file_open cpu_to_be32(NFS4ERR_FILE_OPEN)
/* error codes for internal use */
/* if a request fails due to kmalloc failure, it gets dropped.
@@ -386,6 +386,37 @@ void nfsd_lockd_shutdown(void);
extern const u32 nfsd_suppattrs[3][3];
+static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr,
+ struct nfs42_netaddr *netaddr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+ unsigned int port;
+ size_t ret_addr, ret_port;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ port = ntohs(sin->sin_port);
+ sprintf(netaddr->netid, "tcp");
+ netaddr->netid_len = 3;
+ break;
+ case AF_INET6:
+ port = ntohs(sin6->sin6_port);
+ sprintf(netaddr->netid, "tcp6");
+ netaddr->netid_len = 4;
+ break;
+ default:
+ return nfserr_inval;
+ }
+ ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr));
+ ret_port = snprintf(netaddr->addr + ret_addr,
+ RPCBIND_MAXUADDRLEN + 1 - ret_addr,
+ ".%u.%u", port >> 8, port & 0xff);
+ WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr);
+ netaddr->addr_len = ret_addr + ret_port;
+ return 0;
+}
+
static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 755e256a9103..56cfbc361561 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -35,15 +35,15 @@ typedef struct svc_fh {
bool fh_locked; /* inode locked by us */
bool fh_want_write; /* remount protection taken */
-
+ int fh_flags; /* FH flags */
#ifdef CONFIG_NFSD_V3
bool fh_post_saved; /* post-op attrs saved */
bool fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
- struct timespec fh_pre_mtime; /* mtime before oper */
- struct timespec fh_pre_ctime; /* ctime before oper */
+ struct timespec64 fh_pre_mtime; /* mtime before oper */
+ struct timespec64 fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
@@ -56,6 +56,9 @@ typedef struct svc_fh {
#endif /* CONFIG_NFSD_V3 */
} svc_fh;
+#define NFSD4_FH_FOREIGN (1<<0)
+#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
+#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f))
enum nfsd_fsid {
FSID_DEV = 0,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d20fd161225..543bbe0a556e 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
* Solaris, at least, doesn't seem to care what the time
* request is. We require it be within 30 minutes of now.
*/
- time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds();
nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (nfserr)
@@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
- nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0);
done:
return nfsd_return_attrs(nfserr, resp);
}
@@ -172,6 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
__be32 nfserr;
+ u32 eof;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -195,7 +196,8 @@ nfsd_proc_read(struct svc_rqst *rqstp)
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
+ &resp->count,
+ &eof);
if (nfserr) return nfserr;
return fh_getattr(&resp->fh, &resp->stat);
@@ -224,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
return nfserr_io;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC);
+ &cnt, NFS_DATA_SYNC, NULL);
return nfsd_return_attrs(nfserr, resp);
}
@@ -378,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0);
}
out_unlock:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 18d94ea984ba..3b77b904212d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -27,9 +27,16 @@
#include "cache.h"
#include "vfs.h"
#include "netns.h"
+#include "filecache.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
+bool inter_copy_offload_enable;
+EXPORT_SYMBOL_GPL(inter_copy_offload_enable);
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+ "Enable inter server to server copy offload. Default: false");
+
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -94,12 +101,11 @@ static const struct svc_version *nfsd_acl_version[] = {
#define NFSD_ACL_MINVERS 2
#define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version)
-static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
static struct svc_program nfsd_acl_program = {
.pg_prog = NFS_ACL_PROGRAM,
.pg_nvers = NFSD_ACL_NRVERS,
- .pg_vers = nfsd_acl_versions,
+ .pg_vers = nfsd_acl_version,
.pg_name = "nfsacl",
.pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
@@ -313,22 +319,17 @@ static int nfsd_startup_generic(int nrservs)
if (nfsd_users++)
return 0;
- /*
- * Readahead param cache - will no-op if it already exists.
- * (Note therefore results will be suboptimal if number of
- * threads is modified after nfsd start.)
- */
- ret = nfsd_racache_init(2*nrservs);
+ ret = nfsd_file_cache_init();
if (ret)
goto dec_users;
ret = nfs4_state_start();
if (ret)
- goto out_racache;
+ goto out_file_cache;
return 0;
-out_racache:
- nfsd_racache_shutdown();
+out_file_cache:
+ nfsd_file_cache_shutdown();
dec_users:
nfsd_users--;
return ret;
@@ -340,7 +341,7 @@ static void nfsd_shutdown_generic(void)
return;
nfs4_state_shutdown();
- nfsd_racache_shutdown();
+ nfsd_file_cache_shutdown();
}
static bool nfsd_needs_lockd(struct nfsd_net *nn)
@@ -348,6 +349,35 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&nn->boot_lock, &seq);
+ /*
+ * This is opaque to client, so no need to byte-swap. Use
+ * __force to keep sparse happy. y2038 time_t overflow is
+ * irrelevant in this usage
+ */
+ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+ verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+ } while (need_seqretry(&nn->boot_lock, seq));
+ done_seqretry(&nn->boot_lock, seq);
+}
+
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+{
+ ktime_get_real_ts64(&nn->nfssvc_boot);
+}
+
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+{
+ write_seqlock(&nn->boot_lock);
+ nfsd_reset_boot_verifier_locked(nn);
+ write_sequnlock(&nn->boot_lock);
+}
+
static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -367,20 +397,25 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
ret = lockd_up(net, cred);
if (ret)
goto out_socks;
- nn->lockd_up = 1;
+ nn->lockd_up = true;
}
- ret = nfs4_state_start_net(net);
+ ret = nfsd_file_cache_start_net(net);
if (ret)
goto out_lockd;
+ ret = nfs4_state_start_net(net);
+ if (ret)
+ goto out_filecache;
nn->nfsd_net_up = true;
return 0;
+out_filecache:
+ nfsd_file_cache_shutdown_net(net);
out_lockd:
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
out_socks:
nfsd_shutdown_generic();
@@ -391,10 +426,11 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nfsd_file_cache_shutdown_net(net);
nfs4_state_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
@@ -599,7 +635,7 @@ int nfsd_create_serv(struct net *net)
#endif
}
atomic_inc(&nn->ntf_refcnt);
- ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+ nfsd_reset_boot_verifier(nn);
return 0;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 5dbd16946e8e..68d3f30ee760 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -56,6 +56,14 @@ typedef struct {
stateid_opaque_t si_opaque;
} stateid_t;
+typedef struct {
+ stateid_t stid;
+#define NFS4_COPY_STID 1
+#define NFS4_COPYNOTIFY_STID 2
+ unsigned char sc_type;
+ refcount_t sc_count;
+} copy_stateid_t;
+
#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
#define STATEID_VAL(s) \
(s)->si_opaque.so_clid.cl_boot, \
@@ -96,6 +104,7 @@ struct nfs4_stid {
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
+ struct list_head sc_cp_list;
unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
@@ -104,6 +113,17 @@ struct nfs4_stid {
void (*sc_free)(struct nfs4_stid *);
};
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+ copy_stateid_t cp_stateid;
+ struct list_head cp_list; /* per parent nfs4_stid */
+ stateid_t cp_p_stateid; /* copy of parent's stateid */
+ clientid_t cp_p_clid; /* copy of parent's clid */
+ time64_t cpntf_time; /* last time stateid used */
+};
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -132,7 +152,7 @@ struct nfs4_delegation {
struct list_head dl_recall_lru; /* delegation recalled */
struct nfs4_clnt_odstate *dl_clnt_odstate;
u32 dl_type;
- time_t dl_time;
+ time64_t dl_time;
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
@@ -310,7 +330,7 @@ struct nfs4_client {
#endif
struct xdr_netobj cl_name; /* id generated by client */
nfs4_verifier cl_verifier; /* generated by client */
- time_t cl_time; /* time of last lease renewal */
+ time64_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
bool cl_mach_cred; /* SP4_MACH_CRED in force */
struct svc_cred cl_cred; /* setclientid principal */
@@ -320,7 +340,7 @@ struct nfs4_client {
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
- struct timespec cl_nii_time;
+ struct timespec64 cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -367,6 +387,7 @@ struct nfs4_client {
struct net *net;
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
+ atomic_t cl_cb_inflight; /* Outstanding callbacks */
};
/* struct nfs4_client_reset
@@ -378,6 +399,7 @@ struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */
struct xdr_netobj cr_name; /* recovery dir name */
+ struct xdr_netobj cr_princhash;
};
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -447,7 +469,7 @@ struct nfs4_openowner {
*/
struct list_head oo_close_lru;
struct nfs4_ol_stateid *oo_last_closed_stid;
- time_t oo_time; /* time of placement on so_close_lru */
+ time64_t oo_time; /* time of placement on so_close_lru */
#define NFS4_OO_CONFIRMED 1
unsigned char oo_flags;
};
@@ -506,7 +528,7 @@ struct nfs4_file {
};
struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
- struct file * fi_fds[3];
+ struct nfsd_file *fi_fds[3];
/*
* Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap:
@@ -516,7 +538,7 @@ struct nfs4_file {
*/
atomic_t fi_access[2];
u32 fi_share_deny;
- struct file *fi_deleg_file;
+ struct nfsd_file *fi_deleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
@@ -565,7 +587,7 @@ struct nfs4_layout_stateid {
spinlock_t ls_lock;
struct list_head ls_layouts;
u32 ls_layout_type;
- struct file *ls_file;
+ struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
@@ -604,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
struct nfsd4_blocked_lock {
struct list_head nbl_list;
struct list_head nbl_lru;
- unsigned long nbl_time;
+ time64_t nbl_time;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
@@ -616,14 +638,17 @@ struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+ stateid_t *stateid, int flags, struct nfsd_file **filp,
+ struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
-void nfs4_free_cp_state(struct nfsd4_copy *copy);
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_copy_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
@@ -645,7 +670,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
- struct nfsd_net *nn);
+ struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
struct nfs4_file *find_file(struct knfsd_fh *fh);
@@ -653,11 +678,16 @@ void put_nfs4_file(struct nfs4_file *fi);
extern void nfs4_put_copy(struct nfsd4_copy *copy);
extern struct nfsd4_copy *
find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
+extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_cpntf_state *cps);
+extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
refcount_inc(&fi->fi_ref);
}
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 9bce3b913189..b1bc582b0493 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -84,17 +84,17 @@ static int nfsd_proc_open(struct inode *inode, struct file *file)
return single_open(file, nfsd_proc_show, NULL);
}
-static const struct file_operations nfsd_proc_fops = {
- .open = nfsd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops nfsd_proc_ops = {
+ .proc_open = nfsd_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
};
void
nfsd_stat_init(void)
{
- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
+ svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
}
void
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 80933e4334d8..06dd0d337049 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -126,6 +126,8 @@ DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err);
#include "state.h"
+#include "filecache.h"
+#include "vfs.h"
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
@@ -164,6 +166,150 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
+TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
+TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
+TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
+
+#define show_nf_flags(val) \
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+ { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
+ { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
+ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val) \
+ __print_flags(val, "|", \
+ { NFSD_MAY_READ, "READ" }, \
+ { NFSD_MAY_WRITE, "WRITE" }, \
+ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ ),
+ TP_fast_assign(
+ __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_hashval,
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may),
+ __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+ struct inode *inode, unsigned int may_flags,
+ struct nfsd_file *nf, __be32 status),
+
+ TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(unsigned int, hash)
+ __field(void *, inode)
+ __field(unsigned int, may_flags)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ __field(u32, status)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->hash = hash;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
+ __entry->nf_flags = nf ? nf->nf_flags : 0;
+ __entry->nf_may = nf ? nf->nf_may : 0;
+ __entry->nf_file = nf ? nf->nf_file : NULL;
+ __entry->status = be32_to_cpu(status);
+ ),
+
+ TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+ __entry->xid, __entry->hash, __entry->inode,
+ show_nf_may(__entry->may_flags), __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nf_may(__entry->nf_may), __entry->nf_file,
+ __entry->status)
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+ TP_PROTO(struct inode *inode, unsigned int hash, int found),
+ TP_ARGS(inode, hash, found),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, hash)
+ __field(int, found)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->hash = hash;
+ __entry->found = found;
+ ),
+ TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+ __entry->inode, __entry->found)
+);
+
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+DEFINE_EVENT(nfsd_file_search_class, name, \
+ TP_PROTO(struct inode *inode, unsigned int hash, int found), \
+ TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ TP_PROTO(struct inode *inode, u32 mask),
+ TP_ARGS(inode, mask),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, nlink)
+ __field(umode_t, mode)
+ __field(u32, mask)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->nlink = inode->i_nlink;
+ __entry->mode = inode->i_mode;
+ __entry->mask = mask;
+ ),
+ TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+ __entry->nlink, __entry->mode, __entry->mask)
+);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c85783e536d5..0aa02eb18bd3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -44,38 +44,11 @@
#include "nfsd.h"
#include "vfs.h"
+#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
-
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
- */
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
- unsigned int p_hindex;
-};
-
-struct raparm_hbucket {
- struct raparms *pb_head;
- spinlock_t pb_lock;
-} ____cacheline_aligned_in_smp;
-
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
-
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
@@ -307,19 +280,25 @@ out:
* Commit metadata changes to stable storage.
*/
static int
-commit_metadata(struct svc_fh *fhp)
+commit_inode_metadata(struct inode *inode)
{
- struct inode *inode = d_inode(fhp->fh_dentry);
const struct export_operations *export_ops = inode->i_sb->s_export_op;
- if (!EX_ISSYNC(fhp->fh_export))
- return 0;
-
if (export_ops->commit_metadata)
return export_ops->commit_metadata(inode);
return sync_inode_metadata(inode, 1);
}
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = d_inode(fhp->fh_dentry);
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+ return commit_inode_metadata(inode);
+}
+
/*
* Go over the attributes and take care of the small differences between
* NFS semantics and what Linux expects.
@@ -385,7 +364,7 @@ out_nfserrno:
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+ int check_guard, time64_t guardtime)
{
struct dentry *dentry;
struct inode *inode;
@@ -551,17 +530,39 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
- u64 dst_pos, u64 count)
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
{
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
loff_t cloned;
+ __be32 ret = 0;
+ down_write(&nf_dst->nf_rwsem);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
- if (cloned < 0)
- return nfserrno(cloned);
- if (count && cloned != count)
- return nfserrno(-EINVAL);
- return 0;
+ if (cloned < 0) {
+ ret = nfserrno(cloned);
+ goto out_err;
+ }
+ if (count && cloned != count) {
+ ret = nfserrno(-EINVAL);
+ goto out_err;
+ }
+ if (sync) {
+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
+
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+ nfsd_net_id));
+ ret = nfserrno(status);
+ }
+ }
+out_err:
+ up_write(&nf_dst->nf_rwsem);
+ return ret;
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -699,7 +700,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
}
#endif /* CONFIG_NFSD_V3 */
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
{
unsigned int mode;
@@ -715,8 +716,8 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+static __be32
+__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
struct path path;
@@ -726,25 +727,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
__be32 err;
int host_err = 0;
- validate_process_creds();
-
- /*
- * If we get here, then the client has already done an "open",
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
- * in case a chmod has now revoked permission.
- *
- * Arguably we should also allow the owner override for
- * directories, but we never have and it doesn't seem to have
- * caused anyone a problem. If we were to change this, note
- * also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
- */
- if (type == S_IFREG)
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
- err = fh_verify(rqstp, fhp, type, may_flags);
- if (err)
- goto out;
-
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
inode = d_inode(path.dentry);
@@ -798,67 +780,46 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
out_nfserr:
err = nfserrno(host_err);
out:
- validate_process_creds();
return err;
}
-struct raparms *
-nfsd_init_raparms(struct file *file)
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct inode *inode = file_inode(file);
- dev_t dev = inode->i_sb->s_dev;
- ino_t ino = inode->i_ino;
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
-
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
-
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
- goto found;
- depth++;
- if (ra->p_count == 0)
- frap = rap;
- }
- depth = nfsdstats.ra_size;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
- return NULL;
- }
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
-found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
- }
- ra->p_count++;
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
+ __be32 err;
- if (ra->p_set)
- file->f_ra = ra->p_ra;
- return ra;
+ validate_process_creds();
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_len that doesn't check permissions.
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+ err = fh_verify(rqstp, fhp, type, may_flags);
+ if (!err)
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
+__be32
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
{
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+ __be32 err;
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
+ validate_process_creds();
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ validate_process_creds();
+ return err;
}
/*
@@ -901,12 +862,23 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+ size_t expected)
+{
+ if (expected != 0 && len == 0)
+ return 1;
+ if (offset+len >= i_size_read(file_inode(file)))
+ return 1;
+ return 0;
+}
+
static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count, int host_err)
+ unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -918,7 +890,8 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset, unsigned long *count)
+ struct file *file, loff_t offset, unsigned long *count,
+ u32 *eof)
{
struct splice_desc sd = {
.len = 0,
@@ -926,25 +899,27 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
.pos = offset,
.u.data = rqstp,
};
- int host_err;
+ ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count)
+ struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
struct iov_iter iter;
- int host_err;
+ loff_t ppos = offset;
+ ssize_t host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, READ, vec, vlen, *count);
- host_err = vfs_iter_read(file, &iter, &offset, 0);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ host_err = vfs_iter_read(file, &iter, &ppos, 0);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
/*
@@ -985,10 +960,12 @@ static int wait_for_concurrent_writes(struct file *file)
}
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable)
+ unsigned long *cnt, int stable,
+ __be32 *verf)
{
+ struct file *file = nf->nf_file;
struct svc_export *exp;
struct iov_iter iter;
__be32 nfserr;
@@ -1019,14 +996,37 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
+ if (flags & RWF_SYNC) {
+ down_write(&nf->nf_rwsem);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ up_write(&nf->nf_rwsem);
+ } else {
+ down_read(&nf->nf_rwsem);
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ up_read(&nf->nf_rwsem);
+ }
+ if (host_err < 0) {
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
goto out_nfserr;
+ }
+ *cnt = host_err;
nfsdstats.io_write += *cnt;
fsnotify_modify(file);
- if (stable && use_wgather)
+ if (stable && use_wgather) {
host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ }
out_nfserr:
if (host_err >= 0) {
@@ -1047,27 +1047,25 @@ out_nfserr:
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+ u32 *eof)
{
+ struct nfsd_file *nf;
struct file *file;
- struct raparms *ra;
__be32 err;
trace_nfsd_read_start(rqstp, fhp, offset, *count);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
if (err)
return err;
- ra = nfsd_init_raparms(file);
-
+ file = nf->nf_file;
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
+ err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
- if (ra)
- nfsd_put_raparams(file, ra);
- fput(file);
+ nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count);
@@ -1081,19 +1079,21 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable)
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ __be32 *verf)
{
- struct file *file = NULL;
- __be32 err = 0;
+ struct nfsd_file *nf;
+ __be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
- fput(file);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
+ vlen, cnt, stable, verf);
+ nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err;
@@ -1111,11 +1111,11 @@ out:
*/
__be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count)
+ loff_t offset, unsigned long count, __be32 *verf)
{
- struct file *file;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
+ struct nfsd_file *nf;
+ loff_t end = LLONG_MAX;
+ __be32 err = nfserr_inval;
if (offset < 0)
goto out;
@@ -1125,20 +1125,34 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
- err = nfsd_open(rqstp, fhp, S_IFREG,
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
+ err = nfsd_file_acquire(rqstp, fhp,
+ NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(file, offset, end, 0);
-
- if (err2 != -EINVAL)
- err = nfserrno(err2);
- else
+ int err2;
+
+ down_write(&nf->nf_rwsem);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ switch (err2) {
+ case 0:
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
+ break;
+ case -EINVAL:
err = nfserr_notsupp;
- }
+ break;
+ default:
+ err = nfserrno(err2);
+ nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+ nfsd_net_id));
+ }
+ up_write(&nf->nf_rwsem);
+ } else
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
- fput(file);
+ nfsd_file_put(nf);
out:
return err;
}
@@ -1160,7 +1174,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
/* Callers expect file metadata to be committed here */
return nfserrno(commit_metadata(resfhp));
}
@@ -1423,7 +1437,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
break;
}
/* fall through */
@@ -1432,7 +1446,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
goto set_attr;
}
/* fall through */
@@ -1449,7 +1463,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
if (created)
- *created = 1;
+ *created = true;
nfsd_check_ignore_resizing(iap);
@@ -1659,6 +1673,26 @@ out_nfserr:
goto out_unlock;
}
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ nfsd_file_close_inode_sync(inode);
+}
+
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+ bool ret = false;
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ ret = nfsd_file_is_cached(inode);
+ return ret;
+}
+
/*
* Rename a file
* N.B. After this call _both_ ffhp and tfhp need an fh_put
@@ -1671,6 +1705,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct inode *fdir, *tdir;
__be32 err;
int host_err;
+ bool has_cached = false;
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
@@ -1689,6 +1724,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+retry:
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
@@ -1728,11 +1764,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
goto out_dput_new;
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
- if (!host_err) {
- host_err = commit_metadata(tfhp);
- if (!host_err)
- host_err = commit_metadata(ffhp);
+ if (nfsd_has_cached_files(ndentry)) {
+ has_cached = true;
+ goto out_dput_old;
+ } else {
+ host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
+ if (!host_err)
+ host_err = commit_metadata(ffhp);
+ }
}
out_dput_new:
dput(ndentry);
@@ -1745,12 +1786,26 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
* as that would do the wrong thing if the two directories
* were the same, so again we do it by hand.
*/
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
+ if (!has_cached) {
+ fill_post_wcc(ffhp);
+ fill_post_wcc(tfhp);
+ }
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = false;
fh_drop_write(ffhp);
+ /*
+ * If the target dentry has cached open files, then we need to try to
+ * close them prior to doing the rename. Flushing delayed fput
+ * shouldn't be done with locks held however, so we delay it until this
+ * point and then reattempt the whole shebang.
+ */
+ if (has_cached) {
+ has_cached = false;
+ nfsd_close_cached_files(ndentry);
+ dput(ndentry);
+ goto retry;
+ }
out:
return err;
}
@@ -1797,10 +1852,13 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- if (type != S_IFDIR)
+ if (type != S_IFDIR) {
+ nfsd_close_cached_files(rdentry);
host_err = vfs_unlink(dirp, rdentry, NULL);
- else
+ } else {
host_err = vfs_rmdir(dirp, rdentry);
+ }
+
if (!host_err)
host_err = commit_metadata(fhp);
dput(rdentry);
@@ -1808,7 +1866,17 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
out_drop_write:
fh_drop_write(fhp);
out_nfserr:
- err = nfserrno(host_err);
+ if (host_err == -EBUSY) {
+ /* name is mounted-on. There is no perfect
+ * error status.
+ */
+ if (nfsd_v4client(rqstp))
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ } else {
+ err = nfserrno(host_err);
+ }
out:
return err;
}
@@ -2074,63 +2142,3 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return err? nfserrno(err) : 0;
}
-
-void
-nfsd_racache_shutdown(void)
-{
- struct raparms *raparm, *last_raparm;
- unsigned int i;
-
- dprintk("nfsd: freeing readahead buffers.\n");
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
- }
- raparm_hash[i].pb_head = NULL;
- }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
- int i;
- int j = 0;
- int nperbucket;
- struct raparms **raparm = NULL;
-
-
- if (raparm_hash[0].pb_head)
- return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- nperbucket = max(2, nperbucket);
- cache_size = nperbucket * RAPARM_HASH_SIZE;
-
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
-
- raparm = &raparm_hash[i].pb_head;
- for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
- goto out_nomem;
- raparm = &(*raparm)->p_next;
- }
- *raparm = NULL;
- }
-
- nfsdstats.ra_size = cache_size;
- return 0;
-
-out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
- nfsd_racache_shutdown();
- return -ENOMEM;
-}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index db351247892d..3eb660ad80d1 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -34,14 +34,14 @@
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+struct nfsd_file;
+
/*
* Callback function for readdir
*/
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */
-int nfsd_racache_init(int);
-void nfsd_racache_shutdown(void);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -50,15 +50,16 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct iattr *, int, time_t);
+ struct iattr *, int, time64_t);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
- u64, u64);
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
@@ -73,26 +74,32 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long);
+ loff_t, unsigned long, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
+int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-struct raparms;
+__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+ int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count);
+ unsigned long *count,
+ u32 *eof);
__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen,
- unsigned long *count);
+ unsigned long *count,
+ u32 *eof);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *);
+ loff_t, struct kvec *, int, unsigned long *,
+ u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *, int);
+ struct kvec *, int, unsigned long *,
+ int stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
+ struct nfsd_file *nf, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int stable);
+ int stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -115,9 +122,6 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
-struct raparms *nfsd_init_raparms(struct file *file);
-void nfsd_put_raparams(struct file *file, struct raparms *ra);
-
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;
@@ -152,23 +156,4 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
-static inline bool nfsd_eof_on_read(long requested, long read,
- loff_t offset, loff_t size)
-{
- /* We assume a short read means eof: */
- if (requested > read)
- return true;
- /*
- * A non-short read might also reach end of file. The spec
- * still requires us to set eof in that case.
- *
- * Further operations may have modified the file size since
- * the read, so the following check is not atomic with the read.
- * We've only seen that cause a problem for a client in the case
- * where the read returned a count of 0 without setting eof.
- * That case was fixed by the addition of the above check.
- */
- return (offset + read >= size);
-}
-
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 2cb29e961a76..4155fd71714c 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
- time_t guardtime;
+ time64_t guardtime;
};
struct nfsd3_diropargs {
@@ -151,7 +151,7 @@ struct nfsd3_readres {
__be32 status;
struct svc_fh fh;
unsigned long count;
- int eof;
+ __u32 eof;
};
struct nfsd3_writeres {
@@ -159,6 +159,7 @@ struct nfsd3_writeres {
struct svc_fh fh;
unsigned long count;
int committed;
+ __be32 verf[2];
};
struct nfsd3_renameres {
@@ -223,6 +224,7 @@ struct nfsd3_pathconfres {
struct nfsd3_commitres {
__be32 status;
struct svc_fh fh;
+ __be32 verf[2];
};
struct nfsd3_getaclres {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d64c870f998a..db63d39b1507 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -46,9 +46,9 @@
#define CURRENT_STATE_ID_FLAG (1<<0)
#define SAVED_STATE_ID_FLAG (1<<1)
-#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
-#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
-#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
+#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f))
+#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f))
+#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f))
struct nfsd4_compound_state {
struct svc_fh current_fh;
@@ -221,6 +221,7 @@ struct nfsd4_lookup {
struct nfsd4_putfh {
u32 pf_fhlen; /* request */
char *pf_fhval; /* request */
+ bool no_verify; /* represents foreigh fh */
};
struct nfsd4_open {
@@ -273,15 +274,14 @@ struct nfsd4_open_downgrade {
struct nfsd4_read {
- stateid_t rd_stateid; /* request */
- u64 rd_offset; /* request */
- u32 rd_length; /* request */
- int rd_vlen;
- struct file *rd_filp;
- bool rd_tmp_file;
+ stateid_t rd_stateid; /* request */
+ u64 rd_offset; /* request */
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct nfsd_file *rd_nf;
- struct svc_rqst *rd_rqstp; /* response */
- struct svc_fh * rd_fhp; /* response */
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh *rd_fhp; /* response */
};
struct nfsd4_readdir {
@@ -519,11 +519,13 @@ struct nfsd42_write_res {
struct nfsd4_copy {
/* request */
- stateid_t cp_src_stateid;
- stateid_t cp_dst_stateid;
- u64 cp_src_pos;
- u64 cp_dst_pos;
- u64 cp_count;
+ stateid_t cp_src_stateid;
+ stateid_t cp_dst_stateid;
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+ struct nl4_server cp_src;
+ bool cp_intra;
/* both */
bool cp_synchronous;
@@ -538,16 +540,21 @@ struct nfsd4_copy {
struct nfs4_client *cp_clp;
- struct file *file_src;
- struct file *file_dst;
+ struct nfsd_file *nf_src;
+ struct nfsd_file *nf_dst;
- stateid_t cp_stateid;
+ copy_stateid_t cp_stateid;
struct list_head copies;
struct task_struct *copy_task;
refcount_t refcount;
bool stopped;
+
+ struct vfsmount *ss_mnt;
+ struct nfs_fh c_fh;
+ nfs4_stateid stateid;
};
+extern bool inter_copy_offload_enable;
struct nfsd4_seek {
/* request */
@@ -569,6 +576,18 @@ struct nfsd4_offload_status {
u32 status;
};
+struct nfsd4_copy_notify {
+ /* request */
+ stateid_t cpn_src_stateid;
+ struct nl4_server cpn_dst;
+
+ /* response */
+ stateid_t cpn_cnr_stateid;
+ u64 cpn_sec;
+ u32 cpn_nsec;
+ struct nl4_server cpn_src;
+};
+
struct nfsd4_op {
int opnum;
const struct nfsd4_operation * opdesc;
@@ -628,6 +647,7 @@ struct nfsd4_op {
struct nfsd4_clone clone;
struct nfsd4_copy copy;
struct nfsd4_offload_status offload_status;
+ struct nfsd4_copy_notify copy_notify;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
OpenPOWER on IntegriCloud