summaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfs
downloadblackbird-op-linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz
blackbird-op-linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile15
-rw-r--r--fs/nfs/callback.c187
-rw-r--r--fs/nfs/callback.h70
-rw-r--r--fs/nfs/callback_proc.c85
-rw-r--r--fs/nfs/callback_xdr.c481
-rw-r--r--fs/nfs/delegation.c342
-rw-r--r--fs/nfs/delegation.h57
-rw-r--r--fs/nfs/dir.c1562
-rw-r--r--fs/nfs/direct.c808
-rw-r--r--fs/nfs/file.c484
-rw-r--r--fs/nfs/idmap.c498
-rw-r--r--fs/nfs/inode.c2003
-rw-r--r--fs/nfs/mount_clnt.c183
-rw-r--r--fs/nfs/nfs2xdr.c711
-rw-r--r--fs/nfs/nfs3proc.c859
-rw-r--r--fs/nfs/nfs3xdr.c1023
-rw-r--r--fs/nfs/nfs4proc.c2786
-rw-r--r--fs/nfs/nfs4renewd.c148
-rw-r--r--fs/nfs/nfs4state.c932
-rw-r--r--fs/nfs/nfs4xdr.c4034
-rw-r--r--fs/nfs/nfsroot.c513
-rw-r--r--fs/nfs/pagelist.c309
-rw-r--r--fs/nfs/proc.c655
-rw-r--r--fs/nfs/read.c618
-rw-r--r--fs/nfs/symlink.c117
-rw-r--r--fs/nfs/unlink.c227
-rw-r--r--fs/nfs/write.c1431
27 files changed, 21138 insertions, 0 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
new file mode 100644
index 000000000000..b4baa031edf4
--- /dev/null
+++ b/fs/nfs/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for the Linux nfs filesystem routines.
+#
+
+obj-$(CONFIG_NFS_FS) += nfs.o
+
+nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \
+ proc.o read.o symlink.o unlink.o write.o
+nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
+nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
+nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
+ delegation.o idmap.o \
+ callback.o callback_xdr.o callback_proc.o
+nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
+nfs-objs := $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
new file mode 100644
index 000000000000..560d6175dd58
--- /dev/null
+++ b/fs/nfs/callback.c
@@ -0,0 +1,187 @@
+/*
+ * linux/fs/nfs/callback.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback handling
+ */
+
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+struct nfs_callback_data {
+ unsigned int users;
+ struct svc_serv *serv;
+ pid_t pid;
+ struct completion started;
+ struct completion stopped;
+};
+
+static struct nfs_callback_data nfs_callback_info;
+static DECLARE_MUTEX(nfs_callback_sema);
+static struct svc_program nfs4_callback_program;
+
+unsigned short nfs_callback_tcpport;
+
+/*
+ * This is the callback kernel thread.
+ */
+static void nfs_callback_svc(struct svc_rqst *rqstp)
+{
+ struct svc_serv *serv = rqstp->rq_server;
+ int err;
+
+ __module_get(THIS_MODULE);
+ lock_kernel();
+
+ nfs_callback_info.pid = current->pid;
+ daemonize("nfsv4-svc");
+ /* Process request with signals blocked, but allow SIGKILL. */
+ allow_signal(SIGKILL);
+
+ complete(&nfs_callback_info.started);
+
+ while (nfs_callback_info.users != 0 || !signalled()) {
+ /*
+ * Listen for a request on the socket
+ */
+ err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
+ if (err == -EAGAIN || err == -EINTR)
+ continue;
+ if (err < 0) {
+ printk(KERN_WARNING
+ "%s: terminating on error %d\n",
+ __FUNCTION__, -err);
+ break;
+ }
+ dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
+ NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
+ svc_process(serv, rqstp);
+ }
+
+ nfs_callback_info.pid = 0;
+ complete(&nfs_callback_info.stopped);
+ unlock_kernel();
+ module_put_and_exit(0);
+}
+
+/*
+ * Bring up the server process if it is not already up.
+ */
+int nfs_callback_up(void)
+{
+ struct svc_serv *serv;
+ struct svc_sock *svsk;
+ int ret = 0;
+
+ lock_kernel();
+ down(&nfs_callback_sema);
+ if (nfs_callback_info.users++ || nfs_callback_info.pid != 0)
+ goto out;
+ init_completion(&nfs_callback_info.started);
+ init_completion(&nfs_callback_info.stopped);
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
+ ret = -ENOMEM;
+ if (!serv)
+ goto out_err;
+ /* FIXME: We don't want to register this socket with the portmapper */
+ ret = svc_makesock(serv, IPPROTO_TCP, 0);
+ if (ret < 0)
+ goto out_destroy;
+ if (!list_empty(&serv->sv_permsocks)) {
+ svsk = list_entry(serv->sv_permsocks.next,
+ struct svc_sock, sk_list);
+ nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
+ dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
+ } else
+ BUG();
+ ret = svc_create_thread(nfs_callback_svc, serv);
+ if (ret < 0)
+ goto out_destroy;
+ nfs_callback_info.serv = serv;
+ wait_for_completion(&nfs_callback_info.started);
+out:
+ up(&nfs_callback_sema);
+ unlock_kernel();
+ return ret;
+out_destroy:
+ svc_destroy(serv);
+out_err:
+ nfs_callback_info.users--;
+ goto out;
+}
+
+/*
+ * Kill the server process if it is not already up.
+ */
+int nfs_callback_down(void)
+{
+ int ret = 0;
+
+ lock_kernel();
+ down(&nfs_callback_sema);
+ if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
+ goto out;
+ kill_proc(nfs_callback_info.pid, SIGKILL, 1);
+ wait_for_completion(&nfs_callback_info.stopped);
+out:
+ up(&nfs_callback_sema);
+ unlock_kernel();
+ return ret;
+}
+
+static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+{
+ struct in_addr *addr = &rqstp->rq_addr.sin_addr;
+ struct nfs4_client *clp;
+
+ /* Don't talk to strangers */
+ clp = nfs4_find_client(addr);
+ if (clp == NULL)
+ return SVC_DROP;
+ dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
+ nfs4_put_client(clp);
+ switch (rqstp->rq_authop->flavour) {
+ case RPC_AUTH_NULL:
+ if (rqstp->rq_proc != CB_NULL)
+ return SVC_DENIED;
+ break;
+ case RPC_AUTH_UNIX:
+ break;
+ case RPC_AUTH_GSS:
+ /* FIXME: RPCSEC_GSS handling? */
+ default:
+ return SVC_DENIED;
+ }
+ return SVC_OK;
+}
+
+/*
+ * Define NFS4 callback program
+ */
+extern struct svc_version nfs4_callback_version1;
+
+static struct svc_version *nfs4_callback_version[] = {
+ [1] = &nfs4_callback_version1,
+};
+
+static struct svc_stat nfs4_callback_stats;
+
+static struct svc_program nfs4_callback_program = {
+ .pg_prog = NFS4_CALLBACK, /* RPC service number */
+ .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
+ .pg_vers = nfs4_callback_version, /* version table */
+ .pg_name = "NFSv4 callback", /* service name */
+ .pg_class = "nfs", /* authentication class */
+ .pg_stats = &nfs4_callback_stats,
+ .pg_authenticate = nfs_callback_authenticate,
+};
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
new file mode 100644
index 000000000000..a0db2d4f9415
--- /dev/null
+++ b/fs/nfs/callback.h
@@ -0,0 +1,70 @@
+/*
+ * linux/fs/nfs/callback.h
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback definitions
+ */
+#ifndef __LINUX_FS_NFS_CALLBACK_H
+#define __LINUX_FS_NFS_CALLBACK_H
+
+#define NFS4_CALLBACK 0x40000000
+#define NFS4_CALLBACK_XDRSIZE 2048
+#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+
+enum nfs4_callback_procnum {
+ CB_NULL = 0,
+ CB_COMPOUND = 1,
+};
+
+enum nfs4_callback_opnum {
+ OP_CB_GETATTR = 3,
+ OP_CB_RECALL = 4,
+ OP_CB_ILLEGAL = 10044,
+};
+
+struct cb_compound_hdr_arg {
+ int taglen;
+ const char *tag;
+ unsigned int callback_ident;
+ unsigned nops;
+};
+
+struct cb_compound_hdr_res {
+ uint32_t *status;
+ int taglen;
+ const char *tag;
+ uint32_t *nops;
+};
+
+struct cb_getattrargs {
+ struct sockaddr_in *addr;
+ struct nfs_fh fh;
+ uint32_t bitmap[2];
+};
+
+struct cb_getattrres {
+ uint32_t status;
+ uint32_t bitmap[2];
+ uint64_t size;
+ uint64_t change_attr;
+ struct timespec ctime;
+ struct timespec mtime;
+};
+
+struct cb_recallargs {
+ struct sockaddr_in *addr;
+ struct nfs_fh fh;
+ nfs4_stateid stateid;
+ uint32_t truncate;
+};
+
+extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+
+extern int nfs_callback_up(void);
+extern int nfs_callback_down(void);
+
+extern unsigned short nfs_callback_tcpport;
+
+#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
new file mode 100644
index 000000000000..ece27e42b93b
--- /dev/null
+++ b/fs/nfs/callback_proc.c
@@ -0,0 +1,85 @@
+/*
+ * linux/fs/nfs/callback_proc.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback procedures
+ */
+#include <linux/config.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+{
+ struct nfs4_client *clp;
+ struct nfs_delegation *delegation;
+ struct nfs_inode *nfsi;
+ struct inode *inode;
+
+ res->bitmap[0] = res->bitmap[1] = 0;
+ res->status = htonl(NFS4ERR_BADHANDLE);
+ clp = nfs4_find_client(&args->addr->sin_addr);
+ if (clp == NULL)
+ goto out;
+ inode = nfs_delegation_find_inode(clp, &args->fh);
+ if (inode == NULL)
+ goto out_putclient;
+ nfsi = NFS_I(inode);
+ down_read(&nfsi->rwsem);
+ delegation = nfsi->delegation;
+ if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0)
+ goto out_iput;
+ res->size = i_size_read(inode);
+ res->change_attr = NFS_CHANGE_ATTR(inode);
+ res->ctime = inode->i_ctime;
+ res->mtime = inode->i_mtime;
+ res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
+ args->bitmap[0];
+ res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) &
+ args->bitmap[1];
+ res->status = 0;
+out_iput:
+ up_read(&nfsi->rwsem);
+ iput(inode);
+out_putclient:
+ nfs4_put_client(clp);
+out:
+ dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
+ return res->status;
+}
+
+unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+{
+ struct nfs4_client *clp;
+ struct inode *inode;
+ unsigned res;
+
+ res = htonl(NFS4ERR_BADHANDLE);
+ clp = nfs4_find_client(&args->addr->sin_addr);
+ if (clp == NULL)
+ goto out;
+ inode = nfs_delegation_find_inode(clp, &args->fh);
+ if (inode == NULL)
+ goto out_putclient;
+ /* Set up a helper thread to actually return the delegation */
+ switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ case 0:
+ res = 0;
+ break;
+ case -ENOENT:
+ res = htonl(NFS4ERR_BAD_STATEID);
+ break;
+ default:
+ res = htonl(NFS4ERR_RESOURCE);
+ }
+ iput(inode);
+out_putclient:
+ nfs4_put_client(clp);
+out:
+ dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
+ return res;
+}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
new file mode 100644
index 000000000000..d271df9df2b2
--- /dev/null
+++ b/fs/nfs/callback_xdr.c
@@ -0,0 +1,481 @@
+/*
+ * linux/fs/nfs/callback_xdr.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFSv4 callback encode/decode procedures
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include "callback.h"
+
+#define CB_OP_TAGLEN_MAXSZ (512)
+#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
+#define CB_OP_GETATTR_BITMAP_MAXSZ (4)
+#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
+ CB_OP_GETATTR_BITMAP_MAXSZ + \
+ 2 + 2 + 3 + 3)
+#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
+
+#define NFSDBG_FACILITY NFSDBG_CALLBACK
+
+typedef unsigned (*callback_process_op_t)(void *, void *);
+typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
+typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
+
+
+struct callback_op {
+ callback_process_op_t process_op;
+ callback_decode_arg_t decode_args;
+ callback_encode_res_t encode_res;
+ long res_maxsize;
+};
+
+static struct callback_op callback_ops[];
+
+static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return htonl(NFS4_OK);
+}
+
+static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+ return xdr_argsize_check(rqstp, p);
+}
+
+static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes)
+{
+ uint32_t *p;
+
+ p = xdr_inline_decode(xdr, nbytes);
+ if (unlikely(p == NULL))
+ printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n");
+ return p;
+}
+
+static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+{
+ uint32_t *p;
+
+ p = read_buf(xdr, 4);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *len = ntohl(*p);
+
+ if (*len != 0) {
+ p = read_buf(xdr, *len);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *str = (const char *)p;
+ } else
+ *str = NULL;
+
+ return 0;
+}
+
+static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ uint32_t *p;
+
+ p = read_buf(xdr, 4);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ fh->size = ntohl(*p);
+ if (fh->size > NFS4_FHSIZE)
+ return htonl(NFS4ERR_BADHANDLE);
+ p = read_buf(xdr, fh->size);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ memcpy(&fh->data[0], p, fh->size);
+ memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size);
+ return 0;
+}
+
+static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+ uint32_t *p;
+ unsigned int attrlen;
+
+ p = read_buf(xdr, 4);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ attrlen = ntohl(*p);
+ p = read_buf(xdr, attrlen << 2);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ if (likely(attrlen > 0))
+ bitmap[0] = ntohl(*p++);
+ if (attrlen > 1)
+ bitmap[1] = ntohl(*p);
+ return 0;
+}
+
+static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+{
+ uint32_t *p;
+
+ p = read_buf(xdr, 16);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ memcpy(stateid->data, p, 16);
+ return 0;
+}
+
+static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr)
+{
+ uint32_t *p;
+ unsigned int minor_version;
+ unsigned status;
+
+ status = decode_string(xdr, &hdr->taglen, &hdr->tag);
+ if (unlikely(status != 0))
+ return status;
+ /* We do not like overly long tags! */
+ if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
+ printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
+ __FUNCTION__, hdr->taglen);
+ return htonl(NFS4ERR_RESOURCE);
+ }
+ p = read_buf(xdr, 12);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ minor_version = ntohl(*p++);
+ /* Check minor version is zero. */
+ if (minor_version != 0) {
+ printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n",
+ __FUNCTION__, minor_version);
+ return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+ }
+ hdr->callback_ident = ntohl(*p++);
+ hdr->nops = ntohl(*p);
+ return 0;
+}
+
+static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
+{
+ uint32_t *p;
+ p = read_buf(xdr, 4);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *op = ntohl(*p);
+ return 0;
+}
+
+static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+{
+ unsigned status;
+
+ status = decode_fh(xdr, &args->fh);
+ if (unlikely(status != 0))
+ goto out;
+ args->addr = &rqstp->rq_addr;
+ status = decode_bitmap(xdr, args->bitmap);
+out:
+ dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+ return status;
+}
+
+static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+{
+ uint32_t *p;
+ unsigned status;
+
+ args->addr = &rqstp->rq_addr;
+ status = decode_stateid(xdr, &args->stateid);
+ if (unlikely(status != 0))
+ goto out;
+ p = read_buf(xdr, 4);
+ if (unlikely(p == NULL)) {
+ status = htonl(NFS4ERR_RESOURCE);
+ goto out;
+ }
+ args->truncate = ntohl(*p);
+ status = decode_fh(xdr, &args->fh);
+out:
+ dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+ return 0;
+}
+
+static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+{
+ uint32_t *p;
+
+ p = xdr_reserve_space(xdr, 4 + len);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ xdr_encode_opaque(p, str, len);
+ return 0;
+}
+
+#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
+#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
+static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep)
+{
+ uint32_t bm[2];
+ uint32_t *p;
+
+ bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
+ bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
+ if (bm[1] != 0) {
+ p = xdr_reserve_space(xdr, 16);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *p++ = htonl(2);
+ *p++ = bm[0];
+ *p++ = bm[1];
+ } else if (bm[0] != 0) {
+ p = xdr_reserve_space(xdr, 12);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *p++ = htonl(1);
+ *p++ = bm[0];
+ } else {
+ p = xdr_reserve_space(xdr, 8);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *p++ = htonl(0);
+ }
+ *savep = p;
+ return 0;
+}
+
+static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change)
+{
+ uint32_t *p;
+
+ if (!(bitmap[0] & FATTR4_WORD0_CHANGE))
+ return 0;
+ p = xdr_reserve_space(xdr, 8);
+ if (unlikely(p == 0))
+ return htonl(NFS4ERR_RESOURCE);
+ p = xdr_encode_hyper(p, change);
+ return 0;
+}
+
+static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size)
+{
+ uint32_t *p;
+
+ if (!(bitmap[0] & FATTR4_WORD0_SIZE))
+ return 0;
+ p = xdr_reserve_space(xdr, 8);
+ if (unlikely(p == 0))
+ return htonl(NFS4ERR_RESOURCE);
+ p = xdr_encode_hyper(p, size);
+ return 0;
+}
+
+static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time)
+{
+ uint32_t *p;
+
+ p = xdr_reserve_space(xdr, 12);
+ if (unlikely(p == 0))
+ return htonl(NFS4ERR_RESOURCE);
+ p = xdr_encode_hyper(p, time->tv_sec);
+ *p = htonl(time->tv_nsec);
+ return 0;
+}
+
+static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+ if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time)
+{
+ if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY))
+ return 0;
+ return encode_attr_time(xdr,time);
+}
+
+static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr)
+{
+ unsigned status;
+
+ hdr->status = xdr_reserve_space(xdr, 4);
+ if (unlikely(hdr->status == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ status = encode_string(xdr, hdr->taglen, hdr->tag);
+ if (unlikely(status != 0))
+ return status;
+ hdr->nops = xdr_reserve_space(xdr, 4);
+ if (unlikely(hdr->nops == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ return 0;
+}
+
+static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
+{
+ uint32_t *p;
+
+ p = xdr_reserve_space(xdr, 8);
+ if (unlikely(p == NULL))
+ return htonl(NFS4ERR_RESOURCE);
+ *p++ = htonl(op);
+ *p = res;
+ return 0;
+}
+
+static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+{
+ uint32_t *savep;
+ unsigned status = res->status;
+
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_bitmap(xdr, res->bitmap, &savep);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_change(xdr, res->bitmap, res->change_attr);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_size(xdr, res->bitmap, res->size);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_ctime(xdr, res->bitmap, &res->ctime);
+ if (unlikely(status != 0))
+ goto out;
+ status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
+ *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
+out:
+ dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
+ return status;
+}
+
+static unsigned process_op(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr_in, void *argp,
+ struct xdr_stream *xdr_out, void *resp)
+{
+ struct callback_op *op;
+ unsigned int op_nr;
+ unsigned int status = 0;
+ long maxlen;
+ unsigned res;
+
+ dprintk("%s: start\n", __FUNCTION__);
+ status = decode_op_hdr(xdr_in, &op_nr);
+ if (unlikely(status != 0)) {
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
+ op_nr = OP_CB_ILLEGAL;
+ op = &callback_ops[0];
+ status = htonl(NFS4ERR_OP_ILLEGAL);
+ } else
+ op = &callback_ops[op_nr];
+
+ maxlen = xdr_out->end - xdr_out->p;
+ if (maxlen > 0 && maxlen < PAGE_SIZE) {
+ if (likely(status == 0 && op->decode_args != NULL))
+ status = op->decode_args(rqstp, xdr_in, argp);
+ if (likely(status == 0 && op->process_op != NULL))
+ status = op->process_op(argp, resp);
+ } else
+ status = htonl(NFS4ERR_RESOURCE);
+
+ res = encode_op_hdr(xdr_out, op_nr, status);
+ if (status == 0)
+ status = res;
+ if (op->encode_res != NULL && status == 0)
+ status = op->encode_res(rqstp, xdr_out, resp);
+ dprintk("%s: done, status = %d\n", __FUNCTION__, status);
+ return status;
+}
+
+/*
+ * Decode, process and encode a COMPOUND
+ */
+static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ struct cb_compound_hdr_arg hdr_arg;
+ struct cb_compound_hdr_res hdr_res;
+ struct xdr_stream xdr_in, xdr_out;
+ uint32_t *p;
+ unsigned int status;
+ unsigned int nops = 1;
+
+ dprintk("%s: start\n", __FUNCTION__);
+
+ xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+
+ p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
+ rqstp->rq_res.head[0].iov_len = PAGE_SIZE;
+ xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+
+ decode_compound_hdr_arg(&xdr_in, &hdr_arg);
+ hdr_res.taglen = hdr_arg.taglen;
+ hdr_res.tag = hdr_arg.tag;
+ encode_compound_hdr_res(&xdr_out, &hdr_res);
+
+ for (;;) {
+ status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp);
+ if (status != 0)
+ break;
+ if (nops == hdr_arg.nops)
+ break;
+ nops++;
+ }
+ *hdr_res.status = status;
+ *hdr_res.nops = htonl(nops);
+ dprintk("%s: done, status = %u\n", __FUNCTION__, status);
+ return rpc_success;
+}
+
+/*
+ * Define NFS4 callback COMPOUND ops.
+ */
+static struct callback_op callback_ops[] = {
+ [0] = {
+ .res_maxsize = CB_OP_HDR_RES_MAXSZ,
+ },
+ [OP_CB_GETATTR] = {
+ .process_op = (callback_process_op_t)nfs4_callback_getattr,
+ .decode_args = (callback_decode_arg_t)decode_getattr_args,
+ .encode_res = (callback_encode_res_t)encode_getattr_res,
+ .res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
+ },
+ [OP_CB_RECALL] = {
+ .process_op = (callback_process_op_t)nfs4_callback_recall,
+ .decode_args = (callback_decode_arg_t)decode_recall_args,
+ .res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ }
+};
+
+/*
+ * Define NFS4 callback procedures
+ */
+static struct svc_procedure nfs4_callback_procedures1[] = {
+ [CB_NULL] = {
+ .pc_func = nfs4_callback_null,
+ .pc_decode = (kxdrproc_t)nfs4_decode_void,
+ .pc_encode = (kxdrproc_t)nfs4_encode_void,
+ .pc_xdrressize = 1,
+ },
+ [CB_COMPOUND] = {
+ .pc_func = nfs4_callback_compound,
+ .pc_encode = (kxdrproc_t)nfs4_encode_void,
+ .pc_argsize = 256,
+ .pc_ressize = 256,
+ .pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
+ }
+};
+
+struct svc_version nfs4_callback_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
+ .vs_proc = nfs4_callback_procedures1,
+ .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
+ .vs_dispatch = NULL,
+};
+
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
new file mode 100644
index 000000000000..5b9c60f97791
--- /dev/null
+++ b/fs/nfs/delegation.c
@@ -0,0 +1,342 @@
+/*
+ * linux/fs/nfs/delegation.c
+ *
+ * Copyright (C) 2004 Trond Myklebust
+ *
+ * NFS file delegation management
+ *
+ */
+#include <linux/config.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+
+#include "delegation.h"
+
+static struct nfs_delegation *nfs_alloc_delegation(void)
+{
+ return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL);
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+ if (delegation->cred)
+ put_rpccred(delegation->cred);
+ kfree(delegation);
+}
+
+static void nfs_delegation_claim_opens(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+
+again:
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ state = ctx->state;
+ if (state == NULL)
+ continue;
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ continue;
+ get_nfs_open_context(ctx);
+ spin_unlock(&inode->i_lock);
+ if (nfs4_open_delegation_recall(ctx->dentry, state) < 0)
+ return;
+ put_nfs_open_context(ctx);
+ goto again;
+ }
+ spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+ struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+
+ if (delegation == NULL)
+ return;
+ memcpy(delegation->stateid.data, res->delegation.data,
+ sizeof(delegation->stateid.data));
+ delegation->type = res->delegation_type;
+ delegation->maxsize = res->maxsize;
+ put_rpccred(cred);
+ delegation->cred = get_rpccred(cred);
+ delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
+ NFS_I(inode)->delegation_state = delegation->type;
+ smp_wmb();
+}
+
+/*
+ * Set up a delegation on an inode
+ */
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int status = 0;
+
+ delegation = nfs_alloc_delegation();
+ if (delegation == NULL)
+ return -ENOMEM;
+ memcpy(delegation->stateid.data, res->delegation.data,
+ sizeof(delegation->stateid.data));
+ delegation->type = res->delegation_type;
+ delegation->maxsize = res->maxsize;
+ delegation->cred = get_rpccred(cred);
+ delegation->inode = inode;
+
+ spin_lock(&clp->cl_lock);
+ if (nfsi->delegation == NULL) {
+ list_add(&delegation->super_list, &clp->cl_delegations);
+ nfsi->delegation = delegation;
+ nfsi->delegation_state = delegation->type;
+ delegation = NULL;
+ } else {
+ if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
+ sizeof(delegation->stateid)) != 0 ||
+ delegation->type != nfsi->delegation->type) {
+ printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
+ __FUNCTION__, NIPQUAD(clp->cl_addr));
+ status = -EIO;
+ }
+ }
+ spin_unlock(&clp->cl_lock);
+ if (delegation != NULL)
+ kfree(delegation);
+ return status;
+}
+
+static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
+{
+ int res = 0;
+
+ __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
+ res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
+ nfs_free_delegation(delegation);
+ return res;
+}
+
+/* Sync all data to disk upon delegation return */
+static void nfs_msync_inode(struct inode *inode)
+{
+ filemap_fdatawrite(inode->i_mapping);
+ nfs_wb_all(inode);
+ filemap_fdatawait(inode->i_mapping);
+}
+
+/*
+ * Basic procedure for returning a delegation to the server
+ */
+int nfs_inode_return_delegation(struct inode *inode)
+{
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+ int res = 0;
+
+ nfs_msync_inode(inode);
+ down_read(&clp->cl_sem);
+ /* Guard against new delegated open calls */
+ down_write(&nfsi->rwsem);
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL) {
+ list_del_init(&delegation->super_list);
+ nfsi->delegation = NULL;
+ nfsi->delegation_state = 0;
+ }
+ spin_unlock(&clp->cl_lock);
+ nfs_delegation_claim_opens(inode);
+ up_write(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ nfs_msync_inode(inode);
+
+ if (delegation != NULL)
+ res = nfs_do_return_delegation(inode, delegation);
+ return res;
+}
+
+/*
+ * Return all delegations associated to a super block
+ */
+void nfs_return_all_delegations(struct super_block *sb)
+{
+ struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+ struct nfs_delegation *delegation;
+ struct inode *inode;
+
+ if (clp == NULL)
+ return;
+restart:
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ if (delegation->inode->i_sb != sb)
+ continue;
+ inode = igrab(delegation->inode);
+ if (inode == NULL)
+ continue;
+ spin_unlock(&clp->cl_lock);
+ nfs_inode_return_delegation(inode);
+ iput(inode);
+ goto restart;
+ }
+ spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
+ */
+void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+{
+ struct nfs_delegation *delegation;
+ struct inode *inode;
+
+ if (clp == NULL)
+ return;
+restart:
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ inode = igrab(delegation->inode);
+ if (inode == NULL)
+ continue;
+ spin_unlock(&clp->cl_lock);
+ nfs_inode_return_delegation(inode);
+ iput(inode);
+ goto restart;
+ }
+ spin_unlock(&clp->cl_lock);
+}
+
+struct recall_threadargs {
+ struct inode *inode;
+ struct nfs4_client *clp;
+ const nfs4_stateid *stateid;
+
+ struct completion started;
+ int result;
+};
+
+static int recall_thread(void *data)
+{
+ struct recall_threadargs *args = (struct recall_threadargs *)data;
+ struct inode *inode = igrab(args->inode);
+ struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_delegation *delegation;
+
+ daemonize("nfsv4-delegreturn");
+
+ nfs_msync_inode(inode);
+ down_read(&clp->cl_sem);
+ down_write(&nfsi->rwsem);
+ spin_lock(&clp->cl_lock);
+ delegation = nfsi->delegation;
+ if (delegation != NULL && memcmp(delegation->stateid.data,
+ args->stateid->data,
+ sizeof(delegation->stateid.data)) == 0) {
+ list_del_init(&delegation->super_list);
+ nfsi->delegation = NULL;
+ nfsi->delegation_state = 0;
+ args->result = 0;
+ } else {
+ delegation = NULL;
+ args->result = -ENOENT;
+ }
+ spin_unlock(&clp->cl_lock);
+ complete(&args->started);
+ nfs_delegation_claim_opens(inode);
+ up_write(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ nfs_msync_inode(inode);
+
+ if (delegation != NULL)
+ nfs_do_return_delegation(inode, delegation);
+ iput(inode);
+ module_put_and_exit(0);
+}
+
+/*
+ * Asynchronous delegation recall!
+ */
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+{
+ struct recall_threadargs data = {
+ .inode = inode,
+ .stateid = stateid,
+ };
+ int status;
+
+ init_completion(&data.started);
+ __module_get(THIS_MODULE);
+ status = kernel_thread(recall_thread, &data, CLONE_KERNEL);
+ if (status < 0)
+ goto out_module_put;
+ wait_for_completion(&data.started);
+ return data.result;
+out_module_put:
+ module_put(THIS_MODULE);
+ return status;
+}
+
+/*
+ * Retrieve the inode associated with a delegation
+ */
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+{
+ struct nfs_delegation *delegation;
+ struct inode *res = NULL;
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+ if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
+ res = igrab(delegation->inode);
+ break;
+ }
+ }
+ spin_unlock(&clp->cl_lock);
+ return res;
+}
+
+/*
+ * Mark all delegations as needing to be reclaimed
+ */
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+{
+ struct nfs_delegation *delegation;
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+ delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
+ spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Reap all unclaimed delegations after reboot recovery is done
+ */
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+{
+ struct nfs_delegation *delegation, *n;
+ LIST_HEAD(head);
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+ if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
+ continue;
+ list_move(&delegation->super_list, &head);
+ NFS_I(delegation->inode)->delegation = NULL;
+ NFS_I(delegation->inode)->delegation_state = 0;
+ }
+ spin_unlock(&clp->cl_lock);
+ while(!list_empty(&head)) {
+ delegation = list_entry(head.next, struct nfs_delegation, super_list);
+ list_del(&delegation->super_list);
+ nfs_free_delegation(delegation);
+ }
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
new file mode 100644
index 000000000000..3f6c45a29d6a
--- /dev/null
+++ b/fs/nfs/delegation.h
@@ -0,0 +1,57 @@
+/*
+ * linux/fs/nfs/delegation.h
+ *
+ * Copyright (c) Trond Myklebust
+ *
+ * Definitions pertaining to NFS delegated files
+ */
+#ifndef FS_NFS_DELEGATION_H
+#define FS_NFS_DELEGATION_H
+
+#if defined(CONFIG_NFS_V4)
+/*
+ * NFSv4 delegation
+ */
+struct nfs_delegation {
+ struct list_head super_list;
+ struct rpc_cred *cred;
+ struct inode *inode;
+ nfs4_stateid stateid;
+ int type;
+#define NFS_DELEGATION_NEED_RECLAIM 1
+ long flags;
+ loff_t maxsize;
+};
+
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+int nfs_inode_return_delegation(struct inode *inode);
+int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+
+struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+void nfs_return_all_delegations(struct super_block *sb);
+void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+
+void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+
+/* NFSv4 delegation-related procedures */
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+ flags &= FMODE_READ|FMODE_WRITE;
+ smp_rmb();
+ if ((NFS_I(inode)->delegation_state & flags) == flags)
+ return 1;
+ return 0;
+}
+#else
+static inline int nfs_have_delegation(struct inode *inode, int flags)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
new file mode 100644
index 000000000000..73f96acd5d37
--- /dev/null
+++ b/fs/nfs/dir.c
@@ -0,0 +1,1562 @@
+/*
+ * linux/fs/nfs/dir.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * nfs directory handling functions
+ *
+ * 10 Apr 1996 Added silly rename for unlink --okir
+ * 28 Sep 1996 Improved directory cache --okir
+ * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
+ * Re-implemented silly rename for unlink, newly implemented
+ * silly rename for nfs_rename() following the suggestions
+ * of Olaf Kirch (okir) found in this file.
+ * Following Linus comments on my original hack, this version
+ * depends only on the dcache stuff and doesn't touch the inode
+ * layer (iput() and friends).
+ * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
+ */
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+#include "delegation.h"
+
+#define NFS_PARANOIA 1
+/* #define NFS_DEBUG_VERBOSE 1 */
+
+static int nfs_opendir(struct inode *, struct file *);
+static int nfs_readdir(struct file *, void *, filldir_t);
+static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int nfs_mkdir(struct inode *, struct dentry *, int);
+static int nfs_rmdir(struct inode *, struct dentry *);
+static int nfs_unlink(struct inode *, struct dentry *);
+static int nfs_symlink(struct inode *, struct dentry *, const char *);
+static int nfs_link(struct dentry *, struct inode *, struct dentry *);
+static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
+static int nfs_rename(struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+static int nfs_fsync_dir(struct file *, struct dentry *, int);
+
+struct file_operations nfs_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = nfs_readdir,
+ .open = nfs_opendir,
+ .release = nfs_release,
+ .fsync = nfs_fsync_dir,
+};
+
+struct inode_operations nfs_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_lookup,
+ .link = nfs_link,
+ .unlink = nfs_unlink,
+ .symlink = nfs_symlink,
+ .mkdir = nfs_mkdir,
+ .rmdir = nfs_rmdir,
+ .mknod = nfs_mknod,
+ .rename = nfs_rename,
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
+
+#ifdef CONFIG_NFS_V4
+
+static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct inode_operations nfs4_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_atomic_lookup,
+ .link = nfs_link,
+ .unlink = nfs_unlink,
+ .symlink = nfs_symlink,
+ .mkdir = nfs_mkdir,
+ .rmdir = nfs_rmdir,
+ .mknod = nfs_mknod,
+ .rename = nfs_rename,
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Open file
+ */
+static int
+nfs_opendir(struct inode *inode, struct file *filp)
+{
+ int res = 0;
+
+ lock_kernel();
+ /* Call generic open code in order to cache credentials */
+ if (!res)
+ res = nfs_open(inode, filp);
+ unlock_kernel();
+ return res;
+}
+
+typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
+typedef struct {
+ struct file *file;
+ struct page *page;
+ unsigned long page_index;
+ u32 *ptr;
+ u64 target;
+ struct nfs_entry *entry;
+ decode_dirent_t decode;
+ int plus;
+ int error;
+} nfs_readdir_descriptor_t;
+
+/* Now we cache directories properly, by stuffing the dirent
+ * data directly in the page cache.
+ *
+ * Inode invalidation due to refresh etc. takes care of
+ * _everything_, no sloppy entry flushing logic, no extraneous
+ * copying, network direct to page cache, the way it was meant
+ * to be.
+ *
+ * NOTE: Dirent information verification is done always by the
+ * page-in of the RPC reply, nowhere else, this simplies
+ * things substantially.
+ */
+static
+int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
+{
+ struct file *file = desc->file;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct rpc_cred *cred = nfs_file_cred(file);
+ unsigned long timestamp;
+ int error;
+
+ dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+
+ again:
+ timestamp = jiffies;
+ error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
+ NFS_SERVER(inode)->dtsize, desc->plus);
+ if (error < 0) {
+ /* We requested READDIRPLUS, but the server doesn't grok it */
+ if (error == -ENOTSUPP && desc->plus) {
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
+ NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+ desc->plus = 0;
+ goto again;
+ }
+ goto error;
+ }
+ SetPageUptodate(page);
+ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+ /* Ensure consistent page alignment of the data.
+ * Note: assumes we have exclusive access to this mapping either
+ * throught inode->i_sem or some other mechanism.
+ */
+ if (page->index == 0) {
+ invalidate_inode_pages(inode->i_mapping);
+ NFS_I(inode)->readdir_timestamp = timestamp;
+ }
+ unlock_page(page);
+ return 0;
+ error:
+ SetPageError(page);
+ unlock_page(page);
+ nfs_zap_caches(inode);
+ desc->error = error;
+ return -EIO;
+}
+
+static inline
+int dir_decode(nfs_readdir_descriptor_t *desc)
+{
+ u32 *p = desc->ptr;
+ p = desc->decode(p, desc->entry, desc->plus);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+ desc->ptr = p;
+ return 0;
+}
+
+static inline
+void dir_page_release(nfs_readdir_descriptor_t *desc)
+{
+ kunmap(desc->page);
+ page_cache_release(desc->page);
+ desc->page = NULL;
+ desc->ptr = NULL;
+}
+
+/*
+ * Given a pointer to a buffer that has already been filled by a call
+ * to readdir, find the next entry.
+ *
+ * If the end of the buffer has been reached, return -EAGAIN, if not,
+ * return the offset within the buffer of the next entry to be
+ * read.
+ */
+static inline
+int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page)
+{
+ struct nfs_entry *entry = desc->entry;
+ int loop_count = 0,
+ status;
+
+ while((status = dir_decode(desc)) == 0) {
+ dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie);
+ if (entry->prev_cookie == desc->target)
+ break;
+ if (loop_count++ > 200) {
+ loop_count = 0;
+ schedule();
+ }
+ }
+ dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
+ return status;
+}
+
+/*
+ * Find the given page, and call find_dirent() in order to try to
+ * return the next entry.
+ */
+static inline
+int find_dirent_page(nfs_readdir_descriptor_t *desc)
+{
+ struct inode *inode = desc->file->f_dentry->d_inode;
+ struct page *page;
+ int status;
+
+ dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+
+ page = read_cache_page(inode->i_mapping, desc->page_index,
+ (filler_t *)nfs_readdir_filler, desc);
+ if (IS_ERR(page)) {
+ status = PTR_ERR(page);
+ goto out;
+ }
+ if (!PageUptodate(page))
+ goto read_error;
+
+ /* NOTE: Someone else may have changed the READDIRPLUS flag */
+ desc->page = page;
+ desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
+ status = find_dirent(desc, page);
+ if (status < 0)
+ dir_page_release(desc);
+ out:
+ dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+ return status;
+ read_error:
+ page_cache_release(page);
+ return -EIO;
+}
+
+/*
+ * Recurse through the page cache pages, and return a
+ * filled nfs_entry structure of the next directory entry if possible.
+ *
+ * The target for the search is 'desc->target'.
+ */
+static inline
+int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
+{
+ int loop_count = 0;
+ int res;
+
+ dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target);
+ for (;;) {
+ res = find_dirent_page(desc);
+ if (res != -EAGAIN)
+ break;
+ /* Align to beginning of next page */
+ desc->page_index ++;
+ if (loop_count++ > 200) {
+ loop_count = 0;
+ schedule();
+ }
+ }
+ dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+ return res;
+}
+
+static inline unsigned int dt_type(struct inode *inode)
+{
+ return (inode->i_mode >> 12) & 15;
+}
+
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
+
+/*
+ * Once we've found the start of the dirent within a page: fill 'er up...
+ */
+static
+int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
+ filldir_t filldir)
+{
+ struct file *file = desc->file;
+ struct nfs_entry *entry = desc->entry;
+ struct dentry *dentry = NULL;
+ unsigned long fileid;
+ int loop_count = 0,
+ res;
+
+ dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target);
+
+ for(;;) {
+ unsigned d_type = DT_UNKNOWN;
+ /* Note: entry->prev_cookie contains the cookie for
+ * retrieving the current dirent on the server */
+ fileid = nfs_fileid_to_ino_t(entry->ino);
+
+ /* Get a dentry if we have one */
+ if (dentry != NULL)
+ dput(dentry);
+ dentry = nfs_readdir_lookup(desc);
+
+ /* Use readdirplus info */
+ if (dentry != NULL && dentry->d_inode != NULL) {
+ d_type = dt_type(dentry->d_inode);
+ fileid = dentry->d_inode->i_ino;
+ }
+
+ res = filldir(dirent, entry->name, entry->len,
+ entry->prev_cookie, fileid, d_type);
+ if (res < 0)
+ break;
+ file->f_pos = desc->target = entry->cookie;
+ if (dir_decode(desc) != 0) {
+ desc->page_index ++;
+ break;
+ }
+ if (loop_count++ > 200) {
+ loop_count = 0;
+ schedule();
+ }
+ }
+ dir_page_release(desc);
+ if (dentry != NULL)
+ dput(dentry);
+ dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
+ return res;
+}
+
+/*
+ * If we cannot find a cookie in our cache, we suspect that this is
+ * because it points to a deleted file, so we ask the server to return
+ * whatever it thinks is the next entry. We then feed this to filldir.
+ * If all goes well, we should then be able to find our way round the
+ * cache on the next call to readdir_search_pagecache();
+ *
+ * NOTE: we cannot add the anonymous page to the pagecache because
+ * the data it contains might not be page aligned. Besides,
+ * we should already have a complete representation of the
+ * directory in the page cache by the time we get here.
+ */
+static inline
+int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
+ filldir_t filldir)
+{
+ struct file *file = desc->file;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct rpc_cred *cred = nfs_file_cred(file);
+ struct page *page = NULL;
+ int status;
+
+ dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target);
+
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page) {
+ status = -ENOMEM;
+ goto out;
+ }
+ desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target,
+ page,
+ NFS_SERVER(inode)->dtsize,
+ desc->plus);
+ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+ desc->page = page;
+ desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
+ if (desc->error >= 0) {
+ if ((status = dir_decode(desc)) == 0)
+ desc->entry->prev_cookie = desc->target;
+ } else
+ status = -EIO;
+ if (status < 0)
+ goto out_release;
+
+ status = nfs_do_filldir(desc, dirent, filldir);
+
+ /* Reset read descriptor so it searches the page cache from
+ * the start upon the next call to readdir_search_pagecache() */
+ desc->page_index = 0;
+ desc->entry->cookie = desc->entry->prev_cookie = 0;
+ desc->entry->eof = 0;
+ out:
+ dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+ return status;
+ out_release:
+ dir_page_release(desc);
+ goto out;
+}
+
+/* The file offset position is now represented as a true offset into the
+ * page cache as is the case in most of the other filesystems.
+ */
+static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ nfs_readdir_descriptor_t my_desc,
+ *desc = &my_desc;
+ struct nfs_entry my_entry;
+ struct nfs_fh fh;
+ struct nfs_fattr fattr;
+ long res;
+
+ lock_kernel();
+
+ res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (res < 0) {
+ unlock_kernel();
+ return res;
+ }
+
+ /*
+ * filp->f_pos points to the file offset in the page cache.
+ * but if the cache has meanwhile been zapped, we need to
+ * read from the last dirent to revalidate f_pos
+ * itself.
+ */
+ memset(desc, 0, sizeof(*desc));
+
+ desc->file = filp;
+ desc->target = filp->f_pos;
+ desc->decode = NFS_PROTO(inode)->decode_dirent;
+ desc->plus = NFS_USE_READDIRPLUS(inode);
+
+ my_entry.cookie = my_entry.prev_cookie = 0;
+ my_entry.eof = 0;
+ my_entry.fh = &fh;
+ my_entry.fattr = &fattr;
+ desc->entry = &my_entry;
+
+ while(!desc->entry->eof) {
+ res = readdir_search_pagecache(desc);
+ if (res == -EBADCOOKIE) {
+ /* This means either end of directory */
+ if (desc->entry->cookie != desc->target) {
+ /* Or that the server has 'lost' a cookie */
+ res = uncached_readdir(desc, dirent, filldir);
+ if (res >= 0)
+ continue;
+ }
+ res = 0;
+ break;
+ }
+ if (res == -ETOOSMALL && desc->plus) {
+ NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
+ nfs_zap_caches(inode);
+ desc->plus = 0;
+ desc->entry->eof = 0;
+ continue;
+ }
+ if (res < 0)
+ break;
+
+ res = nfs_do_filldir(desc, dirent, filldir);
+ if (res < 0) {
+ res = 0;
+ break;
+ }
+ }
+ unlock_kernel();
+ if (desc->error < 0)
+ return desc->error;
+ if (res < 0)
+ return res;
+ return 0;
+}
+
+/*
+ * All directory operations under NFS are synchronous, so fsync()
+ * is a dummy operation.
+ */
+int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
+{
+ return 0;
+}
+
+/*
+ * A check for whether or not the parent directory has changed.
+ * In the case it has, we assume that the dentries are untrustworthy
+ * and may need to be looked up again.
+ */
+static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+{
+ if (IS_ROOT(dentry))
+ return 1;
+ if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
+ || nfs_attribute_timeout(dir))
+ return 0;
+ return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
+}
+
+static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
+{
+ dentry->d_fsdata = (void *)verf;
+}
+
+/*
+ * Whenever an NFS operation succeeds, we know that the dentry
+ * is valid, so we update the revalidation timestamp.
+ */
+static inline void nfs_renew_times(struct dentry * dentry)
+{
+ dentry->d_time = jiffies;
+}
+
+static inline
+int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+
+ if (nd != NULL) {
+ int ndflags = nd->flags;
+ /* VFS wants an on-the-wire revalidation */
+ if (ndflags & LOOKUP_REVAL)
+ goto out_force;
+ /* This is an open(2) */
+ if ((ndflags & LOOKUP_OPEN) &&
+ !(ndflags & LOOKUP_CONTINUE) &&
+ !(server->flags & NFS_MOUNT_NOCTO))
+ goto out_force;
+ }
+ return nfs_revalidate_inode(server, inode);
+out_force:
+ return __nfs_revalidate_inode(server, inode);
+}
+
+/*
+ * We judge how long we want to trust negative
+ * dentries by looking at the parent inode mtime.
+ *
+ * If parent mtime has changed, we revalidate, else we wait for a
+ * period corresponding to the parent's attribute cache timeout value.
+ */
+static inline
+int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ int ndflags = 0;
+
+ if (nd)
+ ndflags = nd->flags;
+ /* Don't revalidate a negative dentry if we're creating a new file */
+ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE))
+ return 0;
+ return !nfs_check_verifier(dir, dentry);
+}
+
+/*
+ * This is called every time the dcache has a lookup hit,
+ * and we should check whether we can really trust that
+ * lookup.
+ *
+ * NOTE! The hit can be a negative hit too, don't assume
+ * we have an inode!
+ *
+ * If the parent directory is seen to have changed, we throw out the
+ * cached dentry and do a new lookup.
+ */
+static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
+{
+ struct inode *dir;
+ struct inode *inode;
+ struct dentry *parent;
+ int error;
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ unsigned long verifier;
+
+ parent = dget_parent(dentry);
+ lock_kernel();
+ dir = parent->d_inode;
+ inode = dentry->d_inode;
+
+ if (!inode) {
+ if (nfs_neg_need_reval(dir, dentry, nd))
+ goto out_bad;
+ goto out_valid;
+ }
+
+ if (is_bad_inode(inode)) {
+ dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ goto out_bad;
+ }
+
+ /* Revalidate parent directory attribute cache */
+ if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+ goto out_zap_parent;
+
+ /* Force a full look up iff the parent directory has changed */
+ if (nfs_check_verifier(dir, dentry)) {
+ if (nfs_lookup_verify_inode(inode, nd))
+ goto out_zap_parent;
+ goto out_valid;
+ }
+
+ if (NFS_STALE(inode))
+ goto out_bad;
+
+ verifier = nfs_save_change_attribute(dir);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (error)
+ goto out_bad;
+ if (nfs_compare_fh(NFS_FH(inode), &fhandle))
+ goto out_bad;
+ if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
+ goto out_bad;
+
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, verifier);
+ out_valid:
+ unlock_kernel();
+ dput(parent);
+ return 1;
+out_zap_parent:
+ nfs_zap_caches(dir);
+ out_bad:
+ NFS_CACHEINV(dir);
+ if (inode && S_ISDIR(inode->i_mode)) {
+ /* Purge readdir caches. */
+ nfs_zap_caches(inode);
+ /* If we have submounts, don't unhash ! */
+ if (have_submounts(dentry))
+ goto out_valid;
+ shrink_dcache_parent(dentry);
+ }
+ d_drop(dentry);
+ unlock_kernel();
+ dput(parent);
+ return 0;
+}
+
+/*
+ * This is called from dput() when d_count is going to 0.
+ */
+static int nfs_dentry_delete(struct dentry *dentry)
+{
+ dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ dentry->d_flags);
+
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ /* Unhash it, so that ->d_iput() would be called */
+ return 1;
+ }
+ if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
+ /* Unhash it, so that ancestors of killed async unlink
+ * files will be cleaned up during umount */
+ return 1;
+ }
+ return 0;
+
+}
+
+/*
+ * Called when the dentry loses inode.
+ * We use it to clean up silly-renamed files.
+ */
+static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ lock_kernel();
+ inode->i_nlink--;
+ nfs_complete_unlink(dentry);
+ unlock_kernel();
+ }
+ /* When creating a negative dentry, we want to renew d_time */
+ nfs_renew_times(dentry);
+ iput(inode);
+}
+
+struct dentry_operations nfs_dentry_operations = {
+ .d_revalidate = nfs_lookup_revalidate,
+ .d_delete = nfs_dentry_delete,
+ .d_iput = nfs_dentry_iput,
+};
+
+static inline
+int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+{
+ if (NFS_PROTO(dir)->version == 2)
+ return 0;
+ if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+ return 0;
+ return (nd->intent.open.flags & O_EXCL) != 0;
+}
+
+static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+{
+ struct dentry *res;
+ struct inode *inode = NULL;
+ int error;
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+
+ dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+
+ res = ERR_PTR(-ENAMETOOLONG);
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ goto out;
+
+ res = ERR_PTR(-ENOMEM);
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ lock_kernel();
+ /* Revalidate parent directory attribute cache */
+ error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+ if (error < 0) {
+ res = ERR_PTR(error);
+ goto out_unlock;
+ }
+
+ /* If we're doing an exclusive create, optimize away the lookup */
+ if (nfs_is_exclusive_create(dir, nd))
+ goto no_entry;
+
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
+ if (error == -ENOENT)
+ goto no_entry;
+ if (error < 0) {
+ res = ERR_PTR(error);
+ goto out_unlock;
+ }
+ res = ERR_PTR(-EACCES);
+ inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
+ if (!inode)
+ goto out_unlock;
+no_entry:
+ res = d_add_unique(dentry, inode);
+ if (res != NULL)
+ dentry = res;
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out_unlock:
+ unlock_kernel();
+out:
+ return res;
+}
+
+#ifdef CONFIG_NFS_V4
+static int nfs_open_revalidate(struct dentry *, struct nameidata *);
+
+struct dentry_operations nfs4_dentry_operations = {
+ .d_revalidate = nfs_open_revalidate,
+ .d_delete = nfs_dentry_delete,
+ .d_iput = nfs_dentry_iput,
+};
+
+static int is_atomic_open(struct inode *dir, struct nameidata *nd)
+{
+ if (!nd)
+ return 0;
+ /* Check that we are indeed trying to open this file */
+ if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN))
+ return 0;
+ /* NFS does not (yet) have a stateful open for directories */
+ if (nd->flags & LOOKUP_DIRECTORY)
+ return 0;
+ /* Are we trying to write to a read only partition? */
+ if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ return 0;
+ return 1;
+}
+
+static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *res = NULL;
+ struct inode *inode = NULL;
+ int error;
+
+ /* Check that we are indeed trying to open this file */
+ if (!is_atomic_open(dir, nd))
+ goto no_open;
+
+ if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
+ res = ERR_PTR(-ENAMETOOLONG);
+ goto out;
+ }
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ /* Let vfs_create() deal with O_EXCL */
+ if (nd->intent.open.flags & O_EXCL)
+ goto no_entry;
+
+ /* Open the file on the server */
+ lock_kernel();
+ /* Revalidate parent directory attribute cache */
+ error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+ if (error < 0) {
+ res = ERR_PTR(error);
+ goto out;
+ }
+
+ if (nd->intent.open.flags & O_CREAT) {
+ nfs_begin_data_update(dir);
+ inode = nfs4_atomic_open(dir, dentry, nd);
+ nfs_end_data_update(dir);
+ } else
+ inode = nfs4_atomic_open(dir, dentry, nd);
+ unlock_kernel();
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
+ switch (error) {
+ /* Make a negative dentry */
+ case -ENOENT:
+ inode = NULL;
+ break;
+ /* This turned out not to be a regular file */
+ case -ELOOP:
+ if (!(nd->intent.open.flags & O_NOFOLLOW))
+ goto no_open;
+ /* case -EISDIR: */
+ /* case -EINVAL: */
+ default:
+ res = ERR_PTR(error);
+ goto out;
+ }
+ }
+no_entry:
+ res = d_add_unique(dentry, inode);
+ if (res != NULL)
+ dentry = res;
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out:
+ return res;
+no_open:
+ return nfs_lookup(dir, dentry, nd);
+}
+
+static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *parent = NULL;
+ struct inode *inode = dentry->d_inode;
+ struct inode *dir;
+ unsigned long verifier;
+ int openflags, ret = 0;
+
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ if (!is_atomic_open(dir, nd))
+ goto no_open;
+ /* We can't create new files in nfs_open_revalidate(), so we
+ * optimize away revalidation of negative dentries.
+ */
+ if (inode == NULL)
+ goto out;
+ /* NFS only supports OPEN on regular files */
+ if (!S_ISREG(inode->i_mode))
+ goto no_open;
+ openflags = nd->intent.open.flags;
+ /* We cannot do exclusive creation on a positive dentry */
+ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+ goto no_open;
+ /* We can't create new files, or truncate existing ones here */
+ openflags &= ~(O_CREAT|O_TRUNC);
+
+ /*
+ * Note: we're not holding inode->i_sem and so may be racing with
+ * operations that change the directory. We therefore save the
+ * change attribute *before* we do the RPC call.
+ */
+ lock_kernel();
+ verifier = nfs_save_change_attribute(dir);
+ ret = nfs4_open_revalidate(dir, dentry, openflags);
+ if (!ret)
+ nfs_set_verifier(dentry, verifier);
+ unlock_kernel();
+out:
+ dput(parent);
+ if (!ret)
+ d_drop(dentry);
+ return ret;
+no_open:
+ dput(parent);
+ if (inode != NULL && nfs_have_delegation(inode, FMODE_READ))
+ return 1;
+ return nfs_lookup_revalidate(dentry, nd);
+}
+#endif /* CONFIG_NFSV4 */
+
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
+{
+ struct dentry *parent = desc->file->f_dentry;
+ struct inode *dir = parent->d_inode;
+ struct nfs_entry *entry = desc->entry;
+ struct dentry *dentry, *alias;
+ struct qstr name = {
+ .name = entry->name,
+ .len = entry->len,
+ };
+ struct inode *inode;
+
+ switch (name.len) {
+ case 2:
+ if (name.name[0] == '.' && name.name[1] == '.')
+ return dget_parent(parent);
+ break;
+ case 1:
+ if (name.name[0] == '.')
+ return dget(parent);
+ }
+ name.hash = full_name_hash(name.name, name.len);
+ dentry = d_lookup(parent, &name);
+ if (dentry != NULL)
+ return dentry;
+ if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
+ return NULL;
+ /* Note: caller is already holding the dir->i_sem! */
+ dentry = d_alloc(parent, &name);
+ if (dentry == NULL)
+ return NULL;
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+ if (!inode) {
+ dput(dentry);
+ return NULL;
+ }
+ alias = d_add_unique(dentry, inode);
+ if (alias != NULL) {
+ dput(dentry);
+ dentry = alias;
+ }
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ return dentry;
+}
+
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct inode *inode;
+ int error = -EACCES;
+
+ /* We may have been initialized further down */
+ if (dentry->d_inode)
+ return 0;
+ if (fhandle->size == 0) {
+ struct inode *dir = dentry->d_parent->d_inode;
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ if (error)
+ goto out_err;
+ }
+ if (!(fattr->valid & NFS_ATTR_FATTR)) {
+ struct nfs_server *server = NFS_SB(dentry->d_sb);
+ error = server->rpc_ops->getattr(server, fhandle, fattr);
+ if (error < 0)
+ goto out_err;
+ }
+ error = -ENOMEM;
+ inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ if (inode == NULL)
+ goto out_err;
+ d_instantiate(dentry, inode);
+ return 0;
+out_err:
+ d_drop(dentry);
+ return error;
+}
+
+/*
+ * Following a failed create operation, we drop the dentry rather
+ * than retain a negative dentry. This avoids a problem in the event
+ * that the operation succeeded on the server, but an error in the
+ * reply path made it appear to have failed.
+ */
+static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ struct iattr attr;
+ int error;
+ int open_flags = 0;
+
+ dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
+
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
+
+ if (nd && (nd->flags & LOOKUP_CREATE))
+ open_flags = nd->intent.open.flags;
+
+ lock_kernel();
+ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+ nfs_end_data_update(dir);
+ if (error != 0)
+ goto out_err;
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ unlock_kernel();
+ return 0;
+out_err:
+ unlock_kernel();
+ d_drop(dentry);
+ return error;
+}
+
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
+static int
+nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+ struct iattr attr;
+ int status;
+
+ dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
+
+ lock_kernel();
+ nfs_begin_data_update(dir);
+ status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
+ nfs_end_data_update(dir);
+ if (status != 0)
+ goto out_err;
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ unlock_kernel();
+ return 0;
+out_err:
+ unlock_kernel();
+ d_drop(dentry);
+ return status;
+}
+
+/*
+ * See comments for nfs_proc_create regarding failed operations.
+ */
+static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+ struct iattr attr;
+ int error;
+
+ dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
+
+ attr.ia_valid = ATTR_MODE;
+ attr.ia_mode = mode | S_IFDIR;
+
+ lock_kernel();
+ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+ nfs_end_data_update(dir);
+ if (error != 0)
+ goto out_err;
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ unlock_kernel();
+ return 0;
+out_err:
+ d_drop(dentry);
+ unlock_kernel();
+ return error;
+}
+
+static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ int error;
+
+ dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
+
+ lock_kernel();
+ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+ /* Ensure the VFS deletes this inode */
+ if (error == 0 && dentry->d_inode != NULL)
+ dentry->d_inode->i_nlink = 0;
+ nfs_end_data_update(dir);
+ unlock_kernel();
+
+ return error;
+}
+
+static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+{
+ static unsigned int sillycounter;
+ const int i_inosize = sizeof(dir->i_ino)*2;
+ const int countersize = sizeof(sillycounter)*2;
+ const int slen = sizeof(".nfs") + i_inosize + countersize - 1;
+ char silly[slen+1];
+ struct qstr qsilly;
+ struct dentry *sdentry;
+ int error = -EIO;
+
+ dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ atomic_read(&dentry->d_count));
+
+#ifdef NFS_PARANOIA
+if (!dentry->d_inode)
+printk("NFS: silly-renaming %s/%s, negative dentry??\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /*
+ * We don't allow a dentry to be silly-renamed twice.
+ */
+ error = -EBUSY;
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+ goto out;
+
+ sprintf(silly, ".nfs%*.*lx",
+ i_inosize, i_inosize, dentry->d_inode->i_ino);
+
+ sdentry = NULL;
+ do {
+ char *suffix = silly + slen - countersize;
+
+ dput(sdentry);
+ sillycounter++;
+ sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+
+ dfprintk(VFS, "trying to rename %s to %s\n",
+ dentry->d_name.name, silly);
+
+ sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+ /*
+ * N.B. Better to return EBUSY here ... it could be
+ * dangerous to delete the file while it's in use.
+ */
+ if (IS_ERR(sdentry))
+ goto out;
+ } while(sdentry->d_inode != NULL); /* need negative lookup */
+
+ qsilly.name = silly;
+ qsilly.len = strlen(silly);
+ nfs_begin_data_update(dir);
+ if (dentry->d_inode) {
+ nfs_begin_data_update(dentry->d_inode);
+ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
+ dir, &qsilly);
+ nfs_end_data_update(dentry->d_inode);
+ } else
+ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
+ dir, &qsilly);
+ nfs_end_data_update(dir);
+ if (!error) {
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ d_move(dentry, sdentry);
+ error = nfs_async_unlink(dentry);
+ /* If we return 0 we don't unlink */
+ }
+ dput(sdentry);
+out:
+ return error;
+}
+
+/*
+ * Remove a file after making sure there are no pending writes,
+ * and after checking that the file has only one user.
+ *
+ * We invalidate the attribute cache and free the inode prior to the operation
+ * to avoid possible races if the server reuses the inode.
+ */
+static int nfs_safe_remove(struct dentry *dentry)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *inode = dentry->d_inode;
+ int error = -EBUSY;
+
+ dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+
+ /* If the dentry was sillyrenamed, we simply call d_delete() */
+ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+ error = 0;
+ goto out;
+ }
+
+ nfs_begin_data_update(dir);
+ if (inode != NULL) {
+ nfs_begin_data_update(inode);
+ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+ /* The VFS may want to delete this inode */
+ if (error == 0)
+ inode->i_nlink--;
+ nfs_end_data_update(inode);
+ } else
+ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+ nfs_end_data_update(dir);
+out:
+ return error;
+}
+
+/* We do silly rename. In case sillyrename() returns -EBUSY, the inode
+ * belongs to an active ".nfs..." file and we return -EBUSY.
+ *
+ * If sillyrename() returns 0, we do nothing, otherwise we unlink.
+ */
+static int nfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ int error;
+ int need_rehash = 0;
+
+ dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name);
+
+ lock_kernel();
+ spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count) > 1) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ error = nfs_sillyrename(dir, dentry);
+ unlock_kernel();
+ return error;
+ }
+ if (!d_unhashed(dentry)) {
+ __d_drop(dentry);
+ need_rehash = 1;
+ }
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ error = nfs_safe_remove(dentry);
+ if (!error) {
+ nfs_renew_times(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ } else if (need_rehash)
+ d_rehash(dentry);
+ unlock_kernel();
+ return error;
+}
+
+static int
+nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+ struct iattr attr;
+ struct nfs_fattr sym_attr;
+ struct nfs_fh sym_fh;
+ struct qstr qsymname;
+ int error;
+
+ dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry->d_name.name, symname);
+
+#ifdef NFS_PARANOIA
+if (dentry->d_inode)
+printk("nfs_proc_symlink: %s/%s not negative!\n",
+dentry->d_parent->d_name.name, dentry->d_name.name);
+#endif
+ /*
+ * Fill in the sattr for the call.
+ * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
+ */
+ attr.ia_valid = ATTR_MODE;
+ attr.ia_mode = S_IFLNK | S_IRWXUGO;
+
+ qsymname.name = symname;
+ qsymname.len = strlen(symname);
+
+ lock_kernel();
+ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
+ &attr, &sym_fh, &sym_attr);
+ nfs_end_data_update(dir);
+ if (!error) {
+ error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
+ } else {
+ if (error == -EEXIST)
+ printk("nfs_proc_symlink: %s/%s already exists??\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ d_drop(dentry);
+ }
+ unlock_kernel();
+ return error;
+}
+
+static int
+nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = old_dentry->d_inode;
+ int error;
+
+ dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
+ old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+
+ /*
+ * Drop the dentry in advance to force a new lookup.
+ * Since nfs_proc_link doesn't return a file handle,
+ * we can't use the existing dentry.
+ */
+ lock_kernel();
+ d_drop(dentry);
+
+ nfs_begin_data_update(dir);
+ nfs_begin_data_update(inode);
+ error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+ nfs_end_data_update(inode);
+ nfs_end_data_update(dir);
+ unlock_kernel();
+ return error;
+}
+
+/*
+ * RENAME
+ * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
+ * different file handle for the same inode after a rename (e.g. when
+ * moving to a different directory). A fail-safe method to do so would
+ * be to look up old_dir/old_name, create a link to new_dir/new_name and
+ * rename the old file using the sillyrename stuff. This way, the original
+ * file in old_dir will go away when the last process iput()s the inode.
+ *
+ * FIXED.
+ *
+ * It actually works quite well. One needs to have the possibility for
+ * at least one ".nfs..." file in each directory the file ever gets
+ * moved or linked to which happens automagically with the new
+ * implementation that only depends on the dcache stuff instead of
+ * using the inode layer
+ *
+ * Unfortunately, things are a little more complicated than indicated
+ * above. For a cross-directory move, we want to make sure we can get
+ * rid of the old inode after the operation. This means there must be
+ * no pending writes (if it's a file), and the use count must be 1.
+ * If these conditions are met, we can drop the dentries before doing
+ * the rename.
+ */
+static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct dentry *dentry = NULL, *rehash = NULL;
+ int error = -EBUSY;
+
+ /*
+ * To prevent any new references to the target during the rename,
+ * we unhash the dentry and free the inode in advance.
+ */
+ lock_kernel();
+ if (!d_unhashed(new_dentry)) {
+ d_drop(new_dentry);
+ rehash = new_dentry;
+ }
+
+ dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
+ old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
+ new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
+ atomic_read(&new_dentry->d_count));
+
+ /*
+ * First check whether the target is busy ... we can't
+ * safely do _any_ rename if the target is in use.
+ *
+ * For files, make a copy of the dentry and then do a
+ * silly-rename. If the silly-rename succeeds, the
+ * copied dentry is hashed and becomes the new target.
+ */
+ if (!new_inode)
+ goto go_ahead;
+ if (S_ISDIR(new_inode->i_mode))
+ goto out;
+ else if (atomic_read(&new_dentry->d_count) > 2) {
+ int err;
+ /* copy the target dentry's name */
+ dentry = d_alloc(new_dentry->d_parent,
+ &new_dentry->d_name);
+ if (!dentry)
+ goto out;
+
+ /* silly-rename the existing target ... */
+ err = nfs_sillyrename(new_dir, new_dentry);
+ if (!err) {
+ new_dentry = rehash = dentry;
+ new_inode = NULL;
+ /* instantiate the replacement target */
+ d_instantiate(new_dentry, NULL);
+ } else if (atomic_read(&new_dentry->d_count) > 1) {
+ /* dentry still busy? */
+#ifdef NFS_PARANOIA
+ printk("nfs_rename: target %s/%s busy, d_count=%d\n",
+ new_dentry->d_parent->d_name.name,
+ new_dentry->d_name.name,
+ atomic_read(&new_dentry->d_count));
+#endif
+ goto out;
+ }
+ }
+
+go_ahead:
+ /*
+ * ... prune child dentries and writebacks if needed.
+ */
+ if (atomic_read(&old_dentry->d_count) > 1) {
+ nfs_wb_all(old_inode);
+ shrink_dcache_parent(old_dentry);
+ }
+
+ if (new_inode)
+ d_delete(new_dentry);
+
+ nfs_begin_data_update(old_dir);
+ nfs_begin_data_update(new_dir);
+ nfs_begin_data_update(old_inode);
+ error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
+ new_dir, &new_dentry->d_name);
+ nfs_end_data_update(old_inode);
+ nfs_end_data_update(new_dir);
+ nfs_end_data_update(old_dir);
+out:
+ if (rehash)
+ d_rehash(rehash);
+ if (!error) {
+ if (!S_ISDIR(old_inode->i_mode))
+ d_move(old_dentry, new_dentry);
+ nfs_renew_times(new_dentry);
+ nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
+ }
+
+ /* new dentry created? */
+ if (dentry)
+ dput(dentry);
+ unlock_kernel();
+ return error;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+ struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+
+ if (cache->cred != cred
+ || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
+ || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
+ return -ENOENT;
+ memcpy(res, cache, sizeof(*res));
+ return 0;
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+ struct nfs_access_entry *cache = &NFS_I(inode)->cache_access;
+
+ if (cache->cred != set->cred) {
+ if (cache->cred)
+ put_rpccred(cache->cred);
+ cache->cred = get_rpccred(set->cred);
+ }
+ NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
+ cache->jiffies = set->jiffies;
+ cache->mask = set->mask;
+}
+
+static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
+{
+ struct nfs_access_entry cache;
+ int status;
+
+ status = nfs_access_get_cached(inode, cred, &cache);
+ if (status == 0)
+ goto out;
+
+ /* Be clever: ask server to check for all possible rights */
+ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+ cache.cred = cred;
+ cache.jiffies = jiffies;
+ status = NFS_PROTO(inode)->access(inode, &cache);
+ if (status != 0)
+ return status;
+ nfs_access_add_cache(inode, &cache);
+out:
+ if ((cache.mask & mask) == mask)
+ return 0;
+ return -EACCES;
+}
+
+int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ struct rpc_cred *cred;
+ int res = 0;
+
+ if (mask == 0)
+ goto out;
+ /* Is this sys_access() ? */
+ if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+ goto force_lookup;
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFLNK:
+ goto out;
+ case S_IFREG:
+ /* NFSv4 has atomic_open... */
+ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
+ && nd != NULL
+ && (nd->flags & LOOKUP_OPEN))
+ goto out;
+ break;
+ case S_IFDIR:
+ /*
+ * Optimize away all write operations, since the server
+ * will check permissions when we perform the op.
+ */
+ if ((mask & MAY_WRITE) && !(mask & MAY_READ))
+ goto out;
+ }
+
+force_lookup:
+ lock_kernel();
+
+ if (!NFS_PROTO(inode)->access)
+ goto out_notsup;
+
+ cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ if (!IS_ERR(cred)) {
+ res = nfs_do_access(inode, cred, mask);
+ put_rpccred(cred);
+ } else
+ res = PTR_ERR(cred);
+ unlock_kernel();
+out:
+ return res;
+out_notsup:
+ res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (res == 0)
+ res = generic_permission(inode, mask, NULL);
+ unlock_kernel();
+ return res;
+}
+
+/*
+ * Local variables:
+ * version-control: t
+ * kept-new-versions: 5
+ * End:
+ */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
new file mode 100644
index 000000000000..68df803f27ca
--- /dev/null
+++ b/fs/nfs/direct.c
@@ -0,0 +1,808 @@
+/*
+ * linux/fs/nfs/direct.c
+ *
+ * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
+ *
+ * High-performance uncached I/O for the Linux NFS client
+ *
+ * There are important applications whose performance or correctness
+ * depends on uncached access to file data. Database clusters
+ * (multiple copies of the same instance running on separate hosts)
+ * implement their own cache coherency protocol that subsumes file
+ * system cache protocols. Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache. A streaming video server, for instance, has no
+ * need to cache the contents of a file.
+ *
+ * When an application requests uncached I/O, all read and write requests
+ * are made directly to the server; data stored or fetched via these
+ * requests is not cached in the Linux page cache. The client does not
+ * correct unaligned requests from applications. All requested bytes are
+ * held on permanent storage before a direct write system call returns to
+ * an application.
+ *
+ * Solaris implements an uncached I/O facility called directio() that
+ * is used for backups and sequential I/O to very large files. Solaris
+ * also supports uncaching whole NFS partitions with "-o forcedirectio,"
+ * an undocumented mount option.
+ *
+ * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
+ * help from Andrew Morton.
+ *
+ * 18 Dec 2001 Initial implementation for 2.4 --cel
+ * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
+ * 08 Jun 2003 Port to 2.5 APIs --cel
+ * 31 Mar 2004 Handle direct I/O without VFS support --cel
+ * 15 Sep 2004 Parallel async reads --cel
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/kref.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
+
+static kmem_cache_t *nfs_direct_cachep;
+
+/*
+ * This represents a set of asynchronous requests that we're waiting on
+ */
+struct nfs_direct_req {
+ struct kref kref; /* release manager */
+ struct list_head list; /* nfs_read_data structs */
+ wait_queue_head_t wait; /* wait for i/o completion */
+ struct page ** pages; /* pages in our buffer */
+ unsigned int npages; /* count of pages */
+ atomic_t complete, /* i/os we're waiting for */
+ count, /* bytes actually processed */
+ error; /* any reported error */
+};
+
+
+/**
+ * nfs_get_user_pages - find and set up pages underlying user's buffer
+ * rw: direction (read or write)
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * @pages: returned array of page struct pointers underlying user's buffer
+ */
+static inline int
+nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
+ struct page ***pages)
+{
+ int result = -ENOMEM;
+ unsigned long page_count;
+ size_t array_size;
+
+ /* set an arbitrary limit to prevent type overflow */
+ /* XXX: this can probably be as large as INT_MAX */
+ if (size > MAX_DIRECTIO_SIZE) {
+ *pages = NULL;
+ return -EFBIG;
+ }
+
+ page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ page_count -= user_addr >> PAGE_SHIFT;
+
+ array_size = (page_count * sizeof(struct page *));
+ *pages = kmalloc(array_size, GFP_KERNEL);
+ if (*pages) {
+ down_read(&current->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
+ page_count, (rw == READ), 0,
+ *pages, NULL);
+ up_read(&current->mm->mmap_sem);
+ }
+ return result;
+}
+
+/**
+ * nfs_free_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer
+ * @npages: number of pages in the array
+ * @do_dirty: dirty the pages as we release them
+ */
+static void
+nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+ int i;
+ for (i = 0; i < npages; i++) {
+ if (do_dirty)
+ set_page_dirty_lock(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ kfree(pages);
+}
+
+/**
+ * nfs_direct_req_release - release nfs_direct_req structure for direct read
+ * @kref: kref object embedded in an nfs_direct_req structure
+ *
+ */
+static void nfs_direct_req_release(struct kref *kref)
+{
+ struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+ kmem_cache_free(nfs_direct_cachep, dreq);
+}
+
+/**
+ * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
+ * @count: count of bytes for the read request
+ * @rsize: local rsize setting
+ *
+ * Note we also set the number of requests we have in the dreq when we are
+ * done. This prevents races with I/O completion so we will always wait
+ * until all requests have been dispatched and completed.
+ */
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+{
+ struct list_head *list;
+ struct nfs_direct_req *dreq;
+ unsigned int reads = 0;
+
+ dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+ if (!dreq)
+ return NULL;
+
+ kref_init(&dreq->kref);
+ init_waitqueue_head(&dreq->wait);
+ INIT_LIST_HEAD(&dreq->list);
+ atomic_set(&dreq->count, 0);
+ atomic_set(&dreq->error, 0);
+
+ list = &dreq->list;
+ for(;;) {
+ struct nfs_read_data *data = nfs_readdata_alloc();
+
+ if (unlikely(!data)) {
+ while (!list_empty(list)) {
+ data = list_entry(list->next,
+ struct nfs_read_data, pages);
+ list_del(&data->pages);
+ nfs_readdata_free(data);
+ }
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, list);
+
+ data->req = (struct nfs_page *) dreq;
+ reads++;
+ if (nbytes <= rsize)
+ break;
+ nbytes -= rsize;
+ }
+ kref_get(&dreq->kref);
+ atomic_set(&dreq->complete, reads);
+ return dreq;
+}
+
+/**
+ * nfs_direct_read_result - handle a read reply for a direct read request
+ * @data: address of NFS READ operation control block
+ * @status: status of this NFS READ operation
+ *
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete. This could be long *after* we are woken up in
+ * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ */
+static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+{
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+ if (likely(status >= 0))
+ atomic_add(data->res.count, &dreq->count);
+ else
+ atomic_set(&dreq->error, status);
+
+ if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+ nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+ wake_up(&dreq->wait);
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ }
+}
+
+/**
+ * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
+ * @dreq: address of nfs_direct_req struct for this request
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ *
+ * For each nfs_read_data struct that was allocated on the list, dispatch
+ * an NFS READ operation
+ */
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+ struct inode *inode, struct nfs_open_context *ctx,
+ unsigned long user_addr, size_t count, loff_t file_offset)
+{
+ struct list_head *list = &dreq->list;
+ struct page **pages = dreq->pages;
+ unsigned int curpage, pgbase;
+ unsigned int rsize = NFS_SERVER(inode)->rsize;
+
+ curpage = 0;
+ pgbase = user_addr & ~PAGE_MASK;
+ do {
+ struct nfs_read_data *data;
+ unsigned int bytes;
+
+ bytes = rsize;
+ if (count < rsize)
+ bytes = count;
+
+ data = list_entry(list->next, struct nfs_read_data, pages);
+ list_del_init(&data->pages);
+
+ data->inode = inode;
+ data->cred = ctx->cred;
+ data->args.fh = NFS_FH(inode);
+ data->args.context = ctx;
+ data->args.offset = file_offset;
+ data->args.pgbase = pgbase;
+ data->args.pages = &pages[curpage];
+ data->args.count = bytes;
+ data->res.fattr = &data->fattr;
+ data->res.eof = 0;
+ data->res.count = bytes;
+
+ NFS_PROTO(inode)->read_setup(data);
+
+ data->task.tk_cookie = (unsigned long) inode;
+ data->task.tk_calldata = data;
+ data->task.tk_release = nfs_readdata_release;
+ data->complete = nfs_direct_read_result;
+
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+
+ dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ bytes,
+ (unsigned long long)data->args.offset);
+
+ file_offset += bytes;
+ pgbase += bytes;
+ curpage += pgbase >> PAGE_SHIFT;
+ pgbase &= ~PAGE_MASK;
+
+ count -= bytes;
+ } while (count != 0);
+}
+
+/**
+ * nfs_direct_read_wait - wait for I/O completion for direct reads
+ * @dreq: request on which we are to wait
+ * @intr: whether or not this wait can be interrupted
+ *
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
+{
+ int result = 0;
+
+ if (intr) {
+ result = wait_event_interruptible(dreq->wait,
+ (atomic_read(&dreq->complete) == 0));
+ } else {
+ wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+ }
+
+ if (!result)
+ result = atomic_read(&dreq->error);
+ if (!result)
+ result = atomic_read(&dreq->count);
+
+ kref_put(&dreq->kref, nfs_direct_req_release);
+ return (ssize_t) result;
+}
+
+/**
+ * nfs_direct_read_seg - Read in one iov segment. Generate separate
+ * read RPCs for each "rsize" bytes.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * @nr_pages: number of pages in the array
+ *
+ */
+static ssize_t nfs_direct_read_seg(struct inode *inode,
+ struct nfs_open_context *ctx, unsigned long user_addr,
+ size_t count, loff_t file_offset, struct page **pages,
+ unsigned int nr_pages)
+{
+ ssize_t result;
+ sigset_t oldset;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_direct_req *dreq;
+
+ dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+ if (!dreq)
+ return -ENOMEM;
+
+ dreq->pages = pages;
+ dreq->npages = nr_pages;
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
+ file_offset);
+ result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+ rpc_clnt_sigunmask(clnt, &oldset);
+
+ return result;
+}
+
+/**
+ * nfs_direct_read - For each iov segment, map the user's buffer
+ * then generate read RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * We've already pushed out any non-direct writes so that this read
+ * will see them when we read from the server.
+ */
+static ssize_t
+nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
+ const struct iovec *iov, loff_t file_offset,
+ unsigned long nr_segs)
+{
+ ssize_t tot_bytes = 0;
+ unsigned long seg = 0;
+
+ while ((seg < nr_segs) && (tot_bytes >= 0)) {
+ ssize_t result;
+ int page_count;
+ struct page **pages;
+ const struct iovec *vec = &iov[seg++];
+ unsigned long user_addr = (unsigned long) vec->iov_base;
+ size_t size = vec->iov_len;
+
+ page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
+ if (page_count < 0) {
+ nfs_free_user_pages(pages, 0, 0);
+ if (tot_bytes > 0)
+ break;
+ return page_count;
+ }
+
+ result = nfs_direct_read_seg(inode, ctx, user_addr, size,
+ file_offset, pages, page_count);
+
+ if (result <= 0) {
+ if (tot_bytes > 0)
+ break;
+ return result;
+ }
+ tot_bytes += result;
+ file_offset += result;
+ if (result < size)
+ break;
+ }
+
+ return tot_bytes;
+}
+
+/**
+ * nfs_direct_write_seg - Write out one iov segment. Generate separate
+ * write RPCs for each "wsize" bytes, then commit.
+ * @inode: target inode
+ * @ctx: target file open context
+ * user_addr: starting address of this segment of user's buffer
+ * count: size of this segment
+ * file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * nr_pages: size of pages array
+ */
+static ssize_t nfs_direct_write_seg(struct inode *inode,
+ struct nfs_open_context *ctx, unsigned long user_addr,
+ size_t count, loff_t file_offset, struct page **pages,
+ int nr_pages)
+{
+ const unsigned int wsize = NFS_SERVER(inode)->wsize;
+ size_t request;
+ int curpage, need_commit;
+ ssize_t result, tot_bytes;
+ struct nfs_writeverf first_verf;
+ struct nfs_write_data *wdata;
+
+ wdata = nfs_writedata_alloc();
+ if (!wdata)
+ return -ENOMEM;
+
+ wdata->inode = inode;
+ wdata->cred = ctx->cred;
+ wdata->args.fh = NFS_FH(inode);
+ wdata->args.context = ctx;
+ wdata->args.stable = NFS_UNSTABLE;
+ if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
+ wdata->args.stable = NFS_FILE_SYNC;
+ wdata->res.fattr = &wdata->fattr;
+ wdata->res.verf = &wdata->verf;
+
+ nfs_begin_data_update(inode);
+retry:
+ need_commit = 0;
+ tot_bytes = 0;
+ curpage = 0;
+ request = count;
+ wdata->args.pgbase = user_addr & ~PAGE_MASK;
+ wdata->args.offset = file_offset;
+ do {
+ wdata->args.count = request;
+ if (wdata->args.count > wsize)
+ wdata->args.count = wsize;
+ wdata->args.pages = &pages[curpage];
+
+ dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
+ wdata->args.count, (long long) wdata->args.offset,
+ user_addr + tot_bytes, wdata->args.pgbase, curpage);
+
+ lock_kernel();
+ result = NFS_PROTO(inode)->write(wdata);
+ unlock_kernel();
+
+ if (result <= 0) {
+ if (tot_bytes > 0)
+ break;
+ goto out;
+ }
+
+ if (tot_bytes == 0)
+ memcpy(&first_verf.verifier, &wdata->verf.verifier,
+ sizeof(first_verf.verifier));
+ if (wdata->verf.committed != NFS_FILE_SYNC) {
+ need_commit = 1;
+ if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
+ sizeof(first_verf.verifier)));
+ goto sync_retry;
+ }
+
+ tot_bytes += result;
+
+ /* in case of a short write: stop now, let the app recover */
+ if (result < wdata->args.count)
+ break;
+
+ wdata->args.offset += result;
+ wdata->args.pgbase += result;
+ curpage += wdata->args.pgbase >> PAGE_SHIFT;
+ wdata->args.pgbase &= ~PAGE_MASK;
+ request -= result;
+ } while (request != 0);
+
+ /*
+ * Commit data written so far, even in the event of an error
+ */
+ if (need_commit) {
+ wdata->args.count = tot_bytes;
+ wdata->args.offset = file_offset;
+
+ lock_kernel();
+ result = NFS_PROTO(inode)->commit(wdata);
+ unlock_kernel();
+
+ if (result < 0 || memcmp(&first_verf.verifier,
+ &wdata->verf.verifier,
+ sizeof(first_verf.verifier)) != 0)
+ goto sync_retry;
+ }
+ result = tot_bytes;
+
+out:
+ nfs_end_data_update_defer(inode);
+ nfs_writedata_free(wdata);
+ return result;
+
+sync_retry:
+ wdata->args.stable = NFS_FILE_SYNC;
+ goto retry;
+}
+
+/**
+ * nfs_direct_write - For each iov segment, map the user's buffer
+ * then generate write and commit RPCs.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ * Upon return, generic_file_direct_IO invalidates any cached pages
+ * that non-direct readers might access, so they will pick up these
+ * writes immediately.
+ */
+static ssize_t nfs_direct_write(struct inode *inode,
+ struct nfs_open_context *ctx, const struct iovec *iov,
+ loff_t file_offset, unsigned long nr_segs)
+{
+ ssize_t tot_bytes = 0;
+ unsigned long seg = 0;
+
+ while ((seg < nr_segs) && (tot_bytes >= 0)) {
+ ssize_t result;
+ int page_count;
+ struct page **pages;
+ const struct iovec *vec = &iov[seg++];
+ unsigned long user_addr = (unsigned long) vec->iov_base;
+ size_t size = vec->iov_len;
+
+ page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
+ if (page_count < 0) {
+ nfs_free_user_pages(pages, 0, 0);
+ if (tot_bytes > 0)
+ break;
+ return page_count;
+ }
+
+ result = nfs_direct_write_seg(inode, ctx, user_addr, size,
+ file_offset, pages, page_count);
+ nfs_free_user_pages(pages, page_count, 0);
+
+ if (result <= 0) {
+ if (tot_bytes > 0)
+ break;
+ return result;
+ }
+ tot_bytes += result;
+ file_offset += result;
+ if (result < size)
+ break;
+ }
+ return tot_bytes;
+}
+
+/**
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * file_offset: offset in file to begin the operation
+ * nr_segs: size of iovec array
+ *
+ */
+ssize_t
+nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t file_offset, unsigned long nr_segs)
+{
+ ssize_t result = -EINVAL;
+ struct file *file = iocb->ki_filp;
+ struct nfs_open_context *ctx;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+
+ /*
+ * No support for async yet
+ */
+ if (!is_sync_kiocb(iocb))
+ return result;
+
+ ctx = (struct nfs_open_context *)file->private_data;
+ switch (rw) {
+ case READ:
+ dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
+ dentry->d_name.name, file_offset, nr_segs);
+
+ result = nfs_direct_read(inode, ctx, iov,
+ file_offset, nr_segs);
+ break;
+ case WRITE:
+ dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
+ dentry->d_name.name, file_offset, nr_segs);
+
+ result = nfs_direct_write(inode, ctx, iov,
+ file_offset, nr_segs);
+ break;
+ default:
+ break;
+ }
+ return result;
+}
+
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer into which to read data
+ * count: number of bytes to read
+ * pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file. For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change. So our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ *
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t
+nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+{
+ ssize_t retval = -EINVAL;
+ loff_t *ppos = &iocb->ki_pos;
+ struct file *file = iocb->ki_filp;
+ struct nfs_open_context *ctx =
+ (struct nfs_open_context *) file->private_data;
+ struct dentry *dentry = file->f_dentry;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = count,
+ };
+
+ dprintk("nfs: direct read(%s/%s, %lu@%lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (unsigned long) pos);
+
+ if (!is_sync_kiocb(iocb))
+ goto out;
+ if (count < 0)
+ goto out;
+ retval = -EFAULT;
+ if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
+
+ if (mapping->nrpages) {
+ retval = filemap_fdatawrite(mapping);
+ if (retval == 0)
+ retval = nfs_wb_all(inode);
+ if (retval == 0)
+ retval = filemap_fdatawait(mapping);
+ if (retval)
+ goto out;
+ }
+
+ retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+ if (retval > 0)
+ *ppos = pos + retval;
+
+out:
+ return retval;
+}
+
+/**
+ * nfs_file_direct_write - file direct write operation for NFS files
+ * @iocb: target I/O control block
+ * @buf: user's buffer from which to write data
+ * count: number of bytes to write
+ * pos: byte offset in file where writing starts
+ *
+ * We use this function for direct writes instead of calling
+ * generic_file_aio_write() in order to avoid taking the inode
+ * semaphore and updating the i_size. The NFS server will set
+ * the new i_size and this client must read the updated size
+ * back into its cache. We let the server do generic write
+ * parameter checking and report problems.
+ *
+ * We also avoid an unnecessary invocation of generic_osync_inode(),
+ * as it is fairly meaningless to sync the metadata of an NFS file.
+ *
+ * We eliminate local atime updates, see direct read above.
+ *
+ * We avoid unnecessary page cache invalidations for normal cached
+ * readers of this file.
+ *
+ * Note that O_APPEND is not supported for NFS direct writes, as there
+ * is no atomic O_APPEND write facility in the NFS protocol.
+ */
+ssize_t
+nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+ ssize_t retval = -EINVAL;
+ loff_t *ppos = &iocb->ki_pos;
+ unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ struct file *file = iocb->ki_filp;
+ struct nfs_open_context *ctx =
+ (struct nfs_open_context *) file->private_data;
+ struct dentry *dentry = file->f_dentry;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct iovec iov = {
+ .iov_base = (char __user *)buf,
+ .iov_len = count,
+ };
+
+ dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+ if (!is_sync_kiocb(iocb))
+ goto out;
+ if (count < 0)
+ goto out;
+ if (pos < 0)
+ goto out;
+ retval = -EFAULT;
+ if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+ goto out;
+ if (file->f_error) {
+ retval = file->f_error;
+ file->f_error = 0;
+ goto out;
+ }
+ retval = -EFBIG;
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ goto out;
+ }
+ if (count > limit - (unsigned long) pos)
+ count = limit - (unsigned long) pos;
+ }
+ retval = 0;
+ if (!count)
+ goto out;
+
+ if (mapping->nrpages) {
+ retval = filemap_fdatawrite(mapping);
+ if (retval == 0)
+ retval = nfs_wb_all(inode);
+ if (retval == 0)
+ retval = filemap_fdatawait(mapping);
+ if (retval)
+ goto out;
+ }
+
+ retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+ if (mapping->nrpages)
+ invalidate_inode_pages2(mapping);
+ if (retval > 0)
+ *ppos = pos + retval;
+
+out:
+ return retval;
+}
+
+int nfs_init_directcache(void)
+{
+ nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
+ sizeof(struct nfs_direct_req),
+ 0, SLAB_RECLAIM_ACCOUNT,
+ NULL, NULL);
+ if (nfs_direct_cachep == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_directcache(void)
+{
+ if (kmem_cache_destroy(nfs_direct_cachep))
+ printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
new file mode 100644
index 000000000000..f06eee6dcff5
--- /dev/null
+++ b/fs/nfs/file.c
@@ -0,0 +1,484 @@
+/*
+ * linux/fs/nfs/file.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * Changes Copyright (C) 1994 by Florian La Roche
+ * - Do not copy data too often around in the kernel.
+ * - In nfs_file_read the return value of kmalloc wasn't checked.
+ * - Put in a better version of read look-ahead buffering. Original idea
+ * and implementation by Wai S Kok elekokws@ee.nus.sg.
+ *
+ * Expire cache on write to a file by Wai S Kok (Oct 1994).
+ *
+ * Total rewrite of read side for new NFS buffer cache.. Linus.
+ *
+ * nfs regular file handling functions
+ */
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_FILE
+
+static int nfs_file_open(struct inode *, struct file *);
+static int nfs_file_release(struct inode *, struct file *);
+static int nfs_file_mmap(struct file *, struct vm_area_struct *);
+static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t);
+static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t);
+static int nfs_file_flush(struct file *);
+static int nfs_fsync(struct file *, struct dentry *dentry, int datasync);
+static int nfs_check_flags(int flags);
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
+
+struct file_operations nfs_file_operations = {
+ .llseek = remote_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
+ .mmap = nfs_file_mmap,
+ .open = nfs_file_open,
+ .flush = nfs_file_flush,
+ .release = nfs_file_release,
+ .fsync = nfs_fsync,
+ .lock = nfs_lock,
+ .flock = nfs_flock,
+ .sendfile = nfs_file_sendfile,
+ .check_flags = nfs_check_flags,
+};
+
+struct inode_operations nfs_file_inode_operations = {
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
+
+/* Hack for future NFS swap support */
+#ifndef IS_SWAPFILE
+# define IS_SWAPFILE(inode) (0)
+#endif
+
+static int nfs_check_flags(int flags)
+{
+ if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * Open file
+ */
+static int
+nfs_file_open(struct inode *inode, struct file *filp)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ int (*open)(struct inode *, struct file *);
+ int res;
+
+ res = nfs_check_flags(filp->f_flags);
+ if (res)
+ return res;
+
+ lock_kernel();
+ /* Do NFSv4 open() call */
+ if ((open = server->rpc_ops->file_open) != NULL)
+ res = open(inode, filp);
+ unlock_kernel();
+ return res;
+}
+
+static int
+nfs_file_release(struct inode *inode, struct file *filp)
+{
+ /* Ensure that dirty pages are flushed out with the right creds */
+ if (filp->f_mode & FMODE_WRITE)
+ filemap_fdatawrite(filp->f_mapping);
+ return NFS_PROTO(inode)->file_release(inode, filp);
+}
+
+/*
+ * Flush all dirty pages, and check for write errors.
+ *
+ */
+static int
+nfs_file_flush(struct file *file)
+{
+ struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct inode *inode = file->f_dentry->d_inode;
+ int status;
+
+ dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+ if ((file->f_mode & FMODE_WRITE) == 0)
+ return 0;
+ lock_kernel();
+ /* Ensure that data+attribute caches are up to date after close() */
+ status = nfs_wb_all(inode);
+ if (!status) {
+ status = ctx->error;
+ ctx->error = 0;
+ if (!status && !nfs_have_delegation(inode, FMODE_READ))
+ __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ }
+ unlock_kernel();
+ return status;
+}
+
+static ssize_t
+nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
+{
+ struct dentry * dentry = iocb->ki_filp->f_dentry;
+ struct inode * inode = dentry->d_inode;
+ ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+ if (iocb->ki_filp->f_flags & O_DIRECT)
+ return nfs_file_direct_read(iocb, buf, count, pos);
+#endif
+
+ dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (unsigned long) pos);
+
+ result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!result)
+ result = generic_file_aio_read(iocb, buf, count, pos);
+ return result;
+}
+
+static ssize_t
+nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
+ read_actor_t actor, void *target)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ ssize_t res;
+
+ dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ (unsigned long) count, (unsigned long long) *ppos);
+
+ res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!res)
+ res = generic_file_sendfile(filp, ppos, count, actor, target);
+ return res;
+}
+
+static int
+nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ int status;
+
+ dfprintk(VFS, "nfs: mmap(%s/%s)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+
+ status = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!status)
+ status = generic_file_mmap(file, vma);
+ return status;
+}
+
+/*
+ * Flush any dirty pages for this process, and check for write errors.
+ * The return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+static int
+nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct inode *inode = dentry->d_inode;
+ int status;
+
+ dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+ lock_kernel();
+ status = nfs_wb_all(inode);
+ if (!status) {
+ status = ctx->error;
+ ctx->error = 0;
+ }
+ unlock_kernel();
+ return status;
+}
+
+/*
+ * This does the "real" work of the write. The generic routine has
+ * allocated the page, locked it, done all the page alignment stuff
+ * calculations etc. Now we should just copy the data from user
+ * space and write it back to the real medium..
+ *
+ * If the writer ends up delaying the write, the writer needs to
+ * increment the page use counts until he is done with the page.
+ */
+static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+ return nfs_flush_incompatible(file, page);
+}
+
+static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+ long status;
+
+ lock_kernel();
+ status = nfs_updatepage(file, page, offset, to-offset);
+ unlock_kernel();
+ return status;
+}
+
+struct address_space_operations nfs_file_aops = {
+ .readpage = nfs_readpage,
+ .readpages = nfs_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .writepage = nfs_writepage,
+ .writepages = nfs_writepages,
+ .prepare_write = nfs_prepare_write,
+ .commit_write = nfs_commit_write,
+#ifdef CONFIG_NFS_DIRECTIO
+ .direct_IO = nfs_direct_IO,
+#endif
+};
+
+/*
+ * Write to a file (through the page cache).
+ */
+static ssize_t
+nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+{
+ struct dentry * dentry = iocb->ki_filp->f_dentry;
+ struct inode * inode = dentry->d_inode;
+ ssize_t result;
+
+#ifdef CONFIG_NFS_DIRECTIO
+ if (iocb->ki_filp->f_flags & O_DIRECT)
+ return nfs_file_direct_write(iocb, buf, count, pos);
+#endif
+
+ dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ inode->i_ino, (unsigned long) count, (unsigned long) pos);
+
+ result = -EBUSY;
+ if (IS_SWAPFILE(inode))
+ goto out_swapfile;
+ result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (result)
+ goto out;
+
+ result = count;
+ if (!count)
+ goto out;
+
+ result = generic_file_aio_write(iocb, buf, count, pos);
+out:
+ return result;
+
+out_swapfile:
+ printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
+ goto out;
+}
+
+static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = filp->f_mapping->host;
+ int status = 0;
+
+ lock_kernel();
+ /* Use local locking if mounted with "-onolock" */
+ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+ status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+ else {
+ struct file_lock *cfl = posix_test_lock(filp, fl);
+
+ fl->fl_type = F_UNLCK;
+ if (cfl != NULL)
+ memcpy(fl, cfl, sizeof(*fl));
+ }
+ unlock_kernel();
+ return status;
+}
+
+static int do_vfs_lock(struct file *file, struct file_lock *fl)
+{
+ int res = 0;
+ switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+ case FL_POSIX:
+ res = posix_lock_file_wait(file, fl);
+ break;
+ case FL_FLOCK:
+ res = flock_lock_file_wait(file, fl);
+ break;
+ default:
+ BUG();
+ }
+ if (res < 0)
+ printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
+ __FUNCTION__);
+ return res;
+}
+
+static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = filp->f_mapping->host;
+ sigset_t oldset;
+ int status;
+
+ rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+ /*
+ * Flush all pending writes before doing anything
+ * with locks..
+ */
+ filemap_fdatawrite(filp->f_mapping);
+ down(&inode->i_sem);
+ nfs_wb_all(inode);
+ up(&inode->i_sem);
+ filemap_fdatawait(filp->f_mapping);
+
+ /* NOTE: special case
+ * If we're signalled while cleaning up locks on process exit, we
+ * still need to complete the unlock.
+ */
+ lock_kernel();
+ /* Use local locking if mounted with "-onolock" */
+ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+ status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+ else
+ status = do_vfs_lock(filp, fl);
+ unlock_kernel();
+ rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+ return status;
+}
+
+static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = filp->f_mapping->host;
+ sigset_t oldset;
+ int status;
+
+ rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
+ /*
+ * Flush all pending writes before doing anything
+ * with locks..
+ */
+ status = filemap_fdatawrite(filp->f_mapping);
+ if (status == 0) {
+ down(&inode->i_sem);
+ status = nfs_wb_all(inode);
+ up(&inode->i_sem);
+ if (status == 0)
+ status = filemap_fdatawait(filp->f_mapping);
+ }
+ if (status < 0)
+ goto out;
+
+ lock_kernel();
+ /* Use local locking if mounted with "-onolock" */
+ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+ status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+ /* If we were signalled we still need to ensure that
+ * we clean up any state on the server. We therefore
+ * record the lock call as having succeeded in order to
+ * ensure that locks_remove_posix() cleans it out when
+ * the process exits.
+ */
+ if (status == -EINTR || status == -ERESTARTSYS)
+ do_vfs_lock(filp, fl);
+ } else
+ status = do_vfs_lock(filp, fl);
+ unlock_kernel();
+ if (status < 0)
+ goto out;
+ /*
+ * Make sure we clear the cache whenever we try to get the lock.
+ * This makes locking act as a cache coherency point.
+ */
+ filemap_fdatawrite(filp->f_mapping);
+ down(&inode->i_sem);
+ nfs_wb_all(inode); /* we may have slept */
+ up(&inode->i_sem);
+ filemap_fdatawait(filp->f_mapping);
+ nfs_zap_caches(inode);
+out:
+ rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
+ return status;
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode * inode = filp->f_mapping->host;
+
+ dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
+ inode->i_sb->s_id, inode->i_ino,
+ fl->fl_type, fl->fl_flags,
+ (long long)fl->fl_start, (long long)fl->fl_end);
+
+ if (!inode)
+ return -EINVAL;
+
+ /* No mandatory locks over NFS */
+ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ return -ENOLCK;
+
+ if (IS_GETLK(cmd))
+ return do_getlk(filp, cmd, fl);
+ if (fl->fl_type == F_UNLCK)
+ return do_unlk(filp, cmd, fl);
+ return do_setlk(filp, cmd, fl);
+}
+
+/*
+ * Lock a (portion of) a file
+ */
+static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ struct inode * inode = filp->f_mapping->host;
+
+ dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
+ inode->i_sb->s_id, inode->i_ino,
+ fl->fl_type, fl->fl_flags);
+
+ if (!inode)
+ return -EINVAL;
+
+ /*
+ * No BSD flocks over NFS allowed.
+ * Note: we could try to fake a POSIX lock request here by
+ * using ((u32) filp | 0x80000000) or some such as the pid.
+ * Not sure whether that would be unique, though, or whether
+ * that would break in other places.
+ */
+ if (!(fl->fl_flags & FL_FLOCK))
+ return -ENOLCK;
+
+ /* We're simulating flock() locks using posix locks on the server */
+ fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_start = 0;
+ fl->fl_end = OFFSET_MAX;
+
+ if (fl->fl_type == F_UNLCK)
+ return do_unlk(filp, cmd, fl);
+ return do_setlk(filp, cmd, fl);
+}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
new file mode 100644
index 000000000000..b74c4e3a64e2
--- /dev/null
+++ b/fs/nfs/idmap.c
@@ -0,0 +1,498 @@
+/*
+ * fs/nfs/idmap.c
+ *
+ * UID and GID to name mapping for clients.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/sched.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/workqueue.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_fs.h>
+
+#include <linux/nfs_idmap.h>
+
+#define IDMAP_HASH_SZ 128
+
+struct idmap_hashent {
+ __u32 ih_id;
+ int ih_namelen;
+ char ih_name[IDMAP_NAMESZ];
+};
+
+struct idmap_hashtable {
+ __u8 h_type;
+ struct idmap_hashent h_entries[IDMAP_HASH_SZ];
+};
+
+struct idmap {
+ char idmap_path[48];
+ struct dentry *idmap_dentry;
+ wait_queue_head_t idmap_wq;
+ struct idmap_msg idmap_im;
+ struct semaphore idmap_lock; /* Serializes upcalls */
+ struct semaphore idmap_im_lock; /* Protects the hashtable */
+ struct idmap_hashtable idmap_user_hash;
+ struct idmap_hashtable idmap_group_hash;
+};
+
+static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
+ char __user *, size_t);
+static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
+ size_t);
+void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static unsigned int fnvhash32(const void *, size_t);
+
+static struct rpc_pipe_ops idmap_upcall_ops = {
+ .upcall = idmap_pipe_upcall,
+ .downcall = idmap_pipe_downcall,
+ .destroy_msg = idmap_pipe_destroy_msg,
+};
+
+void
+nfs_idmap_new(struct nfs4_client *clp)
+{
+ struct idmap *idmap;
+
+ if (clp->cl_idmap != NULL)
+ return;
+ if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+ return;
+
+ memset(idmap, 0, sizeof(*idmap));
+
+ snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
+ "%s/idmap", clp->cl_rpcclient->cl_pathname);
+
+ idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+ idmap, &idmap_upcall_ops, 0);
+ if (IS_ERR(idmap->idmap_dentry)) {
+ kfree(idmap);
+ return;
+ }
+
+ init_MUTEX(&idmap->idmap_lock);
+ init_MUTEX(&idmap->idmap_im_lock);
+ init_waitqueue_head(&idmap->idmap_wq);
+ idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
+ idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
+
+ clp->cl_idmap = idmap;
+}
+
+void
+nfs_idmap_delete(struct nfs4_client *clp)
+{
+ struct idmap *idmap = clp->cl_idmap;
+
+ if (!idmap)
+ return;
+ rpc_unlink(idmap->idmap_path);
+ clp->cl_idmap = NULL;
+ kfree(idmap);
+}
+
+/*
+ * Helper routines for manipulating the hashtable
+ */
+static inline struct idmap_hashent *
+idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
+{
+ return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
+{
+ struct idmap_hashent *he = idmap_name_hash(h, name, len);
+
+ if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
+ return NULL;
+ return he;
+}
+
+static inline struct idmap_hashent *
+idmap_id_hash(struct idmap_hashtable* h, __u32 id)
+{
+ return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
+}
+
+static struct idmap_hashent *
+idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
+{
+ struct idmap_hashent *he = idmap_id_hash(h, id);
+ if (he->ih_id != id || he->ih_namelen == 0)
+ return NULL;
+ return he;
+}
+
+/*
+ * Routines for allocating new entries in the hashtable.
+ * For now, we just have 1 entry per bucket, so it's all
+ * pretty trivial.
+ */
+static inline struct idmap_hashent *
+idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
+{
+ return idmap_name_hash(h, name, len);
+}
+
+static inline struct idmap_hashent *
+idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
+{
+ return idmap_id_hash(h, id);
+}
+
+static void
+idmap_update_entry(struct idmap_hashent *he, const char *name,
+ size_t namelen, __u32 id)
+{
+ he->ih_id = id;
+ memcpy(he->ih_name, name, namelen);
+ he->ih_name[namelen] = '\0';
+ he->ih_namelen = namelen;
+}
+
+/*
+ * Name -> ID
+ */
+static int
+nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
+ const char *name, size_t namelen, __u32 *id)
+{
+ struct rpc_pipe_msg msg;
+ struct idmap_msg *im;
+ struct idmap_hashent *he;
+ DECLARE_WAITQUEUE(wq, current);
+ int ret = -EIO;
+
+ im = &idmap->idmap_im;
+
+ /*
+ * String sanity checks
+ * Note that the userland daemon expects NUL terminated strings
+ */
+ for (;;) {
+ if (namelen == 0)
+ return -EINVAL;
+ if (name[namelen-1] != '\0')
+ break;
+ namelen--;
+ }
+ if (namelen >= IDMAP_NAMESZ)
+ return -EINVAL;
+
+ down(&idmap->idmap_lock);
+ down(&idmap->idmap_im_lock);
+
+ he = idmap_lookup_name(h, name, namelen);
+ if (he != NULL) {
+ *id = he->ih_id;
+ ret = 0;
+ goto out;
+ }
+
+ memset(im, 0, sizeof(*im));
+ memcpy(im->im_name, name, namelen);
+
+ im->im_type = h->h_type;
+ im->im_conv = IDMAP_CONV_NAMETOID;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.data = im;
+ msg.len = sizeof(*im);
+
+ add_wait_queue(&idmap->idmap_wq, &wq);
+ if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+ remove_wait_queue(&idmap->idmap_wq, &wq);
+ goto out;
+ }
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ up(&idmap->idmap_im_lock);
+ schedule();
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&idmap->idmap_wq, &wq);
+ down(&idmap->idmap_im_lock);
+
+ if (im->im_status & IDMAP_STATUS_SUCCESS) {
+ *id = im->im_id;
+ ret = 0;
+ }
+
+ out:
+ memset(im, 0, sizeof(*im));
+ up(&idmap->idmap_im_lock);
+ up(&idmap->idmap_lock);
+ return (ret);
+}
+
+/*
+ * ID -> Name
+ */
+static int
+nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
+ __u32 id, char *name)
+{
+ struct rpc_pipe_msg msg;
+ struct idmap_msg *im;
+ struct idmap_hashent *he;
+ DECLARE_WAITQUEUE(wq, current);
+ int ret = -EIO;
+ unsigned int len;
+
+ im = &idmap->idmap_im;
+
+ down(&idmap->idmap_lock);
+ down(&idmap->idmap_im_lock);
+
+ he = idmap_lookup_id(h, id);
+ if (he != 0) {
+ memcpy(name, he->ih_name, he->ih_namelen);
+ ret = he->ih_namelen;
+ goto out;
+ }
+
+ memset(im, 0, sizeof(*im));
+ im->im_type = h->h_type;
+ im->im_conv = IDMAP_CONV_IDTONAME;
+ im->im_id = id;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.data = im;
+ msg.len = sizeof(*im);
+
+ add_wait_queue(&idmap->idmap_wq, &wq);
+
+ if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) {
+ remove_wait_queue(&idmap->idmap_wq, &wq);
+ goto out;
+ }
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ up(&idmap->idmap_im_lock);
+ schedule();
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&idmap->idmap_wq, &wq);
+ down(&idmap->idmap_im_lock);
+
+ if (im->im_status & IDMAP_STATUS_SUCCESS) {
+ if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
+ goto out;
+ memcpy(name, im->im_name, len);
+ ret = len;
+ }
+
+ out:
+ memset(im, 0, sizeof(*im));
+ up(&idmap->idmap_im_lock);
+ up(&idmap->idmap_lock);
+ return ret;
+}
+
+/* RPC pipefs upcall/downcall routines */
+static ssize_t
+idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+ char __user *dst, size_t buflen)
+{
+ char *data = (char *)msg->data + msg->copied;
+ ssize_t mlen = msg->len - msg->copied;
+ ssize_t left;
+
+ if (mlen > buflen)
+ mlen = buflen;
+
+ left = copy_to_user(dst, data, mlen);
+ if (left < 0) {
+ msg->errno = left;
+ return left;
+ }
+ mlen -= left;
+ msg->copied += mlen;
+ msg->errno = 0;
+ return mlen;
+}
+
+static ssize_t
+idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+ struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
+ struct idmap *idmap = (struct idmap *)rpci->private;
+ struct idmap_msg im_in, *im = &idmap->idmap_im;
+ struct idmap_hashtable *h;
+ struct idmap_hashent *he = NULL;
+ int namelen_in;
+ int ret;
+
+ if (mlen != sizeof(im_in))
+ return (-ENOSPC);
+
+ if (copy_from_user(&im_in, src, mlen) != 0)
+ return (-EFAULT);
+
+ down(&idmap->idmap_im_lock);
+
+ ret = mlen;
+ im->im_status = im_in.im_status;
+ /* If we got an error, terminate now, and wake up pending upcalls */
+ if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) {
+ wake_up(&idmap->idmap_wq);
+ goto out;
+ }
+
+ /* Sanity checking of strings */
+ ret = -EINVAL;
+ namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ);
+ if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ)
+ goto out;
+
+ switch (im_in.im_type) {
+ case IDMAP_TYPE_USER:
+ h = &idmap->idmap_user_hash;
+ break;
+ case IDMAP_TYPE_GROUP:
+ h = &idmap->idmap_group_hash;
+ break;
+ default:
+ goto out;
+ }
+
+ switch (im_in.im_conv) {
+ case IDMAP_CONV_IDTONAME:
+ /* Did we match the current upcall? */
+ if (im->im_conv == IDMAP_CONV_IDTONAME
+ && im->im_type == im_in.im_type
+ && im->im_id == im_in.im_id) {
+ /* Yes: copy string, including the terminating '\0' */
+ memcpy(im->im_name, im_in.im_name, namelen_in);
+ im->im_name[namelen_in] = '\0';
+ wake_up(&idmap->idmap_wq);
+ }
+ he = idmap_alloc_id(h, im_in.im_id);
+ break;
+ case IDMAP_CONV_NAMETOID:
+ /* Did we match the current upcall? */
+ if (im->im_conv == IDMAP_CONV_NAMETOID
+ && im->im_type == im_in.im_type
+ && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in
+ && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) {
+ im->im_id = im_in.im_id;
+ wake_up(&idmap->idmap_wq);
+ }
+ he = idmap_alloc_name(h, im_in.im_name, namelen_in);
+ break;
+ default:
+ goto out;
+ }
+
+ /* If the entry is valid, also copy it to the cache */
+ if (he != NULL)
+ idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
+ ret = mlen;
+out:
+ up(&idmap->idmap_im_lock);
+ return ret;
+}
+
+void
+idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+ struct idmap_msg *im = msg->data;
+ struct idmap *idmap = container_of(im, struct idmap, idmap_im);
+
+ if (msg->errno >= 0)
+ return;
+ down(&idmap->idmap_im_lock);
+ im->im_status = IDMAP_STATUS_LOOKUPFAIL;
+ wake_up(&idmap->idmap_wq);
+ up(&idmap->idmap_im_lock);
+}
+
+/*
+ * Fowler/Noll/Vo hash
+ * http://www.isthe.com/chongo/tech/comp/fnv/
+ */
+
+#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */
+#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */
+
+static unsigned int fnvhash32(const void *buf, size_t buflen)
+{
+ const unsigned char *p, *end = (const unsigned char *)buf + buflen;
+ unsigned int hash = FNV_1_32;
+
+ for (p = buf; p < end; p++) {
+ hash *= FNV_P_32;
+ hash ^= (unsigned int)*p;
+ }
+
+ return (hash);
+}
+
+int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+ struct idmap *idmap = clp->cl_idmap;
+
+ return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
+}
+
+int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+ struct idmap *idmap = clp->cl_idmap;
+
+ return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
+}
+
+int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+ struct idmap *idmap = clp->cl_idmap;
+
+ return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
+}
+int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+{
+ struct idmap *idmap = clp->cl_idmap;
+
+ return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
+}
+
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
new file mode 100644
index 000000000000..6345f26e87ee
--- /dev/null
+++ b/fs/nfs/inode.c
@@ -0,0 +1,2003 @@
+/*
+ * linux/fs/nfs/inode.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * nfs inode and superblock handling functions
+ *
+ * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some
+ * experimental NFS changes. Modularisation taken straight from SYS5 fs.
+ *
+ * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts.
+ * J.S.Peatfield@damtp.cam.ac.uk
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+#define NFS_PARANOIA 1
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ * their needs. People that do NFS over a slow network, might for
+ * instance want to reduce it to something closer to 1 for improved
+ * interactive response.
+ */
+#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
+
+static void nfs_invalidate_inode(struct inode *);
+static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long);
+
+static struct inode *nfs_alloc_inode(struct super_block *sb);
+static void nfs_destroy_inode(struct inode *);
+static int nfs_write_inode(struct inode *,int);
+static void nfs_delete_inode(struct inode *);
+static void nfs_clear_inode(struct inode *);
+static void nfs_umount_begin(struct super_block *);
+static int nfs_statfs(struct super_block *, struct kstatfs *);
+static int nfs_show_options(struct seq_file *, struct vfsmount *);
+
+static struct rpc_program nfs_program;
+
+static struct super_operations nfs_sops = {
+ .alloc_inode = nfs_alloc_inode,
+ .destroy_inode = nfs_destroy_inode,
+ .write_inode = nfs_write_inode,
+ .delete_inode = nfs_delete_inode,
+ .statfs = nfs_statfs,
+ .clear_inode = nfs_clear_inode,
+ .umount_begin = nfs_umount_begin,
+ .show_options = nfs_show_options,
+};
+
+/*
+ * RPC cruft for NFS
+ */
+static struct rpc_stat nfs_rpcstat = {
+ .program = &nfs_program
+};
+static struct rpc_version * nfs_version[] = {
+ NULL,
+ NULL,
+ &nfs_version2,
+#if defined(CONFIG_NFS_V3)
+ &nfs_version3,
+#elif defined(CONFIG_NFS_V4)
+ NULL,
+#endif
+#if defined(CONFIG_NFS_V4)
+ &nfs_version4,
+#endif
+};
+
+static struct rpc_program nfs_program = {
+ .name = "nfs",
+ .number = NFS_PROGRAM,
+ .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]),
+ .version = nfs_version,
+ .stats = &nfs_rpcstat,
+ .pipe_dir_name = "/nfs",
+};
+
+static inline unsigned long
+nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
+{
+ return nfs_fileid_to_ino_t(fattr->fileid);
+}
+
+static int
+nfs_write_inode(struct inode *inode, int sync)
+{
+ int flags = sync ? FLUSH_WAIT : 0;
+ int ret;
+
+ ret = nfs_commit_inode(inode, 0, 0, flags);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void
+nfs_delete_inode(struct inode * inode)
+{
+ dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+ nfs_wb_all(inode);
+ /*
+ * The following should never happen...
+ */
+ if (nfs_have_writebacks(inode)) {
+ printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
+ }
+
+ clear_inode(inode);
+}
+
+/*
+ * For the moment, the only task for the NFS clear_inode method is to
+ * release the mmap credential
+ */
+static void
+nfs_clear_inode(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct rpc_cred *cred;
+
+ nfs_wb_all(inode);
+ BUG_ON (!list_empty(&nfsi->open_files));
+ cred = nfsi->cache_access.cred;
+ if (cred)
+ put_rpccred(cred);
+ BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+}
+
+void
+nfs_umount_begin(struct super_block *sb)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ struct rpc_clnt *rpc;
+
+ /* -EIO all pending I/O */
+ if ((rpc = server->client) != NULL)
+ rpc_killall_tasks(rpc);
+}
+
+
+static inline unsigned long
+nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
+{
+ /* make sure blocksize is a power of two */
+ if ((bsize & (bsize - 1)) || nrbitsp) {
+ unsigned char nrbits;
+
+ for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--)
+ ;
+ bsize = 1 << nrbits;
+ if (nrbitsp)
+ *nrbitsp = nrbits;
+ }
+
+ return bsize;
+}
+
+/*
+ * Calculate the number of 512byte blocks used.
+ */
+static inline unsigned long
+nfs_calc_block_size(u64 tsize)
+{
+ loff_t used = (tsize + 511) >> 9;
+ return (used > ULONG_MAX) ? ULONG_MAX : used;
+}
+
+/*
+ * Compute and set NFS server blocksize
+ */
+static inline unsigned long
+nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
+{
+ if (bsize < 1024)
+ bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
+ bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+
+ return nfs_block_bits(bsize, nrbitsp);
+}
+
+/*
+ * Obtain the root inode of the file system.
+ */
+static struct inode *
+nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ struct inode *rooti;
+ int error;
+
+ error = server->rpc_ops->getroot(server, rootfh, fsinfo);
+ if (error < 0) {
+ dprintk("nfs_get_root: getattr error = %d\n", -error);
+ return ERR_PTR(error);
+ }
+
+ rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
+ if (!rooti)
+ return ERR_PTR(-ENOMEM);
+ return rooti;
+}
+
+/*
+ * Do NFS version-independent mount processing, and sanity checking
+ */
+static int
+nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
+{
+ struct nfs_server *server;
+ struct inode *root_inode;
+ struct nfs_fattr fattr;
+ struct nfs_fsinfo fsinfo = {
+ .fattr = &fattr,
+ };
+ struct nfs_pathconf pathinfo = {
+ .fattr = &fattr,
+ };
+ int no_root_error = 0;
+ unsigned long max_rpc_payload;
+
+ /* We probably want something more informative here */
+ snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
+
+ server = NFS_SB(sb);
+
+ sb->s_magic = NFS_SUPER_MAGIC;
+
+ root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
+ /* Did getting the root inode fail? */
+ if (IS_ERR(root_inode)) {
+ no_root_error = PTR_ERR(root_inode);
+ goto out_no_root;
+ }
+ sb->s_root = d_alloc_root(root_inode);
+ if (!sb->s_root) {
+ no_root_error = -ENOMEM;
+ goto out_no_root;
+ }
+ sb->s_root->d_op = server->rpc_ops->dentry_ops;
+
+ /* Get some general file system info */
+ if (server->namelen == 0 &&
+ server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+ server->namelen = pathinfo.max_namelen;
+ /* Work out a lot of parameters */
+ if (server->rsize == 0)
+ server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
+ if (server->wsize == 0)
+ server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
+
+ if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
+ server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
+ if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
+ server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
+
+ max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+ if (server->rsize > max_rpc_payload)
+ server->rsize = max_rpc_payload;
+ if (server->wsize > max_rpc_payload)
+ server->wsize = max_rpc_payload;
+
+ server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (server->rpages > NFS_READ_MAXIOV) {
+ server->rpages = NFS_READ_MAXIOV;
+ server->rsize = server->rpages << PAGE_CACHE_SHIFT;
+ }
+
+ server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (server->wpages > NFS_WRITE_MAXIOV) {
+ server->wpages = NFS_WRITE_MAXIOV;
+ server->wsize = server->wpages << PAGE_CACHE_SHIFT;
+ }
+
+ if (sb->s_blocksize == 0)
+ sb->s_blocksize = nfs_block_bits(server->wsize,
+ &sb->s_blocksize_bits);
+ server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
+
+ server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
+ if (server->dtsize > PAGE_CACHE_SIZE)
+ server->dtsize = PAGE_CACHE_SIZE;
+ if (server->dtsize > server->rsize)
+ server->dtsize = server->rsize;
+
+ if (server->flags & NFS_MOUNT_NOAC) {
+ server->acregmin = server->acregmax = 0;
+ server->acdirmin = server->acdirmax = 0;
+ sb->s_flags |= MS_SYNCHRONOUS;
+ }
+ server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+ sb->s_maxbytes = fsinfo.maxfilesize;
+ if (sb->s_maxbytes > MAX_LFS_FILESIZE)
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+ server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
+ server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+
+ /* We're airborne Set socket buffersize */
+ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+ return 0;
+ /* Yargs. It didn't work out. */
+out_no_root:
+ dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
+ if (!IS_ERR(root_inode))
+ iput(root_inode);
+ return no_root_error;
+}
+
+/*
+ * Create an RPC client handle.
+ */
+static struct rpc_clnt *
+nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+ struct rpc_timeout timeparms;
+ struct rpc_xprt *xprt = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int tcp = (data->flags & NFS_MOUNT_TCP);
+
+ /* Initialize timeout values */
+ timeparms.to_initval = data->timeo * HZ / 10;
+ timeparms.to_retries = data->retrans;
+ timeparms.to_maxval = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
+ timeparms.to_exponential = 1;
+
+ if (!timeparms.to_initval)
+ timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
+ if (!timeparms.to_retries)
+ timeparms.to_retries = 5;
+
+ /* create transport and client */
+ xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
+ &server->addr, &timeparms);
+ if (IS_ERR(xprt)) {
+ printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+ return (struct rpc_clnt *)xprt;
+ }
+ clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+ server->rpc_ops->version, data->pseudoflavor);
+ if (IS_ERR(clnt)) {
+ printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+ goto out_fail;
+ }
+
+ clnt->cl_intr = 1;
+ clnt->cl_softrtry = 1;
+ clnt->cl_chatty = 1;
+
+ return clnt;
+
+out_fail:
+ xprt_destroy(xprt);
+ return clnt;
+}
+
+/*
+ * The way this works is that the mount process passes a structure
+ * in the data argument which contains the server's IP address
+ * and the root file handle obtained from the server's mount
+ * daemon. We stash these away in the private superblock fields.
+ */
+static int
+nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+{
+ struct nfs_server *server;
+ rpc_authflavor_t authflavor;
+
+ server = NFS_SB(sb);
+ sb->s_blocksize_bits = 0;
+ sb->s_blocksize = 0;
+ if (data->bsize)
+ sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+ if (data->rsize)
+ server->rsize = nfs_block_size(data->rsize, NULL);
+ if (data->wsize)
+ server->wsize = nfs_block_size(data->wsize, NULL);
+ server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+ server->acregmin = data->acregmin*HZ;
+ server->acregmax = data->acregmax*HZ;
+ server->acdirmin = data->acdirmin*HZ;
+ server->acdirmax = data->acdirmax*HZ;
+
+ /* Start lockd here, before we might error out */
+ if (!(server->flags & NFS_MOUNT_NONLM))
+ lockd_up();
+
+ server->namelen = data->namlen;
+ server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
+ if (!server->hostname)
+ return -ENOMEM;
+ strcpy(server->hostname, data->hostname);
+
+ /* Check NFS protocol revision and initialize RPC op vector
+ * and file handle pool. */
+ if (server->flags & NFS_MOUNT_VER3) {
+#ifdef CONFIG_NFS_V3
+ server->rpc_ops = &nfs_v3_clientops;
+ server->caps |= NFS_CAP_READDIRPLUS;
+ if (data->version < 4) {
+ printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
+ return -EIO;
+ }
+#else
+ printk(KERN_NOTICE "NFS: NFSv3 not supported.\n");
+ return -EIO;
+#endif
+ } else {
+ server->rpc_ops = &nfs_v2_clientops;
+ }
+
+ /* Fill in pseudoflavor for mount version < 5 */
+ if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+ data->pseudoflavor = RPC_AUTH_UNIX;
+ authflavor = data->pseudoflavor; /* save for sb_init() */
+ /* XXX maybe we want to add a server->pseudoflavor field */
+
+ /* Create RPC client handles */
+ server->client = nfs_create_client(server, data);
+ if (IS_ERR(server->client))
+ return PTR_ERR(server->client);
+ /* RFC 2623, sec 2.3.2 */
+ if (authflavor != RPC_AUTH_UNIX) {
+ server->client_sys = rpc_clone_client(server->client);
+ if (IS_ERR(server->client_sys))
+ return PTR_ERR(server->client_sys);
+ if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys))
+ return -ENOMEM;
+ } else {
+ atomic_inc(&server->client->cl_count);
+ server->client_sys = server->client;
+ }
+
+ if (server->flags & NFS_MOUNT_VER3) {
+ if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+ server->namelen = NFS3_MAXNAMLEN;
+ sb->s_time_gran = 1;
+ } else {
+ if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+ server->namelen = NFS2_MAXNAMLEN;
+ }
+
+ sb->s_op = &nfs_sops;
+ return nfs_sb_init(sb, authflavor);
+}
+
+static int
+nfs_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ unsigned char blockbits;
+ unsigned long blockres;
+ struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+ struct nfs_fattr fattr;
+ struct nfs_fsstat res = {
+ .fattr = &fattr,
+ };
+ int error;
+
+ lock_kernel();
+
+ error = server->rpc_ops->statfs(server, rootfh, &res);
+ buf->f_type = NFS_SUPER_MAGIC;
+ if (error < 0)
+ goto out_err;
+
+ /*
+ * Current versions of glibc do not correctly handle the
+ * case where f_frsize != f_bsize. Eventually we want to
+ * report the value of wtmult in this field.
+ */
+ buf->f_frsize = sb->s_blocksize;
+
+ /*
+ * On most *nix systems, f_blocks, f_bfree, and f_bavail
+ * are reported in units of f_frsize. Linux hasn't had
+ * an f_frsize field in its statfs struct until recently,
+ * thus historically Linux's sys_statfs reports these
+ * fields in units of f_bsize.
+ */
+ buf->f_bsize = sb->s_blocksize;
+ blockbits = sb->s_blocksize_bits;
+ blockres = (1 << blockbits) - 1;
+ buf->f_blocks = (res.tbytes + blockres) >> blockbits;
+ buf->f_bfree = (res.fbytes + blockres) >> blockbits;
+ buf->f_bavail = (res.abytes + blockres) >> blockbits;
+
+ buf->f_files = res.tfiles;
+ buf->f_ffree = res.afiles;
+
+ buf->f_namelen = server->namelen;
+ out:
+ unlock_kernel();
+
+ return 0;
+
+ out_err:
+ printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error);
+ buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1;
+ goto out;
+
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ static struct proc_nfs_info {
+ int flag;
+ char *str;
+ char *nostr;
+ } nfs_info[] = {
+ { NFS_MOUNT_SOFT, ",soft", ",hard" },
+ { NFS_MOUNT_INTR, ",intr", "" },
+ { NFS_MOUNT_POSIX, ",posix", "" },
+ { NFS_MOUNT_TCP, ",tcp", ",udp" },
+ { NFS_MOUNT_NOCTO, ",nocto", "" },
+ { NFS_MOUNT_NOAC, ",noac", "" },
+ { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+ { 0, NULL, NULL }
+ };
+ struct proc_nfs_info *nfs_infop;
+ struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+ seq_printf(m, ",v%d", nfss->rpc_ops->version);
+ seq_printf(m, ",rsize=%d", nfss->rsize);
+ seq_printf(m, ",wsize=%d", nfss->wsize);
+ if (nfss->acregmin != 3*HZ)
+ seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+ if (nfss->acregmax != 60*HZ)
+ seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+ if (nfss->acdirmin != 30*HZ)
+ seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+ if (nfss->acdirmax != 60*HZ)
+ seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+ for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
+ if (nfss->flags & nfs_infop->flag)
+ seq_puts(m, nfs_infop->str);
+ else
+ seq_puts(m, nfs_infop->nostr);
+ }
+ seq_puts(m, ",addr=");
+ seq_escape(m, nfss->hostname, " \t\n\\");
+ return 0;
+}
+
+/*
+ * Invalidate the local caches
+ */
+void
+nfs_zap_caches(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int mode = inode->i_mode;
+
+ NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+ NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+
+ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
+ nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+ else
+ nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+}
+
+/*
+ * Invalidate, but do not unhash, the inode
+ */
+static void
+nfs_invalidate_inode(struct inode *inode)
+{
+ umode_t save_mode = inode->i_mode;
+
+ make_bad_inode(inode);
+ inode->i_mode = save_mode;
+ nfs_zap_caches(inode);
+}
+
+struct nfs_find_desc {
+ struct nfs_fh *fh;
+ struct nfs_fattr *fattr;
+};
+
+/*
+ * In NFSv3 we can have 64bit inode numbers. In order to support
+ * this, and re-exported directories (also seen in NFSv2)
+ * we are forced to allow 2 different inodes to have the same
+ * i_ino.
+ */
+static int
+nfs_find_actor(struct inode *inode, void *opaque)
+{
+ struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_fh *fh = desc->fh;
+ struct nfs_fattr *fattr = desc->fattr;
+
+ if (NFS_FILEID(inode) != fattr->fileid)
+ return 0;
+ if (nfs_compare_fh(NFS_FH(inode), fh))
+ return 0;
+ if (is_bad_inode(inode) || NFS_STALE(inode))
+ return 0;
+ return 1;
+}
+
+static int
+nfs_init_locked(struct inode *inode, void *opaque)
+{
+ struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_fattr *fattr = desc->fattr;
+
+ NFS_FILEID(inode) = fattr->fileid;
+ nfs_copy_fh(NFS_FH(inode), desc->fh);
+ return 0;
+}
+
+/* Don't use READDIRPLUS on directories that we believe are too large */
+#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
+
+/*
+ * This is our front-end to iget that looks up inodes by file handle
+ * instead of inode number.
+ */
+struct inode *
+nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+ struct nfs_find_desc desc = {
+ .fh = fh,
+ .fattr = fattr
+ };
+ struct inode *inode = NULL;
+ unsigned long hash;
+
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+ goto out_no_inode;
+
+ if (!fattr->nlink) {
+ printk("NFS: Buggy server - nlink == 0!\n");
+ goto out_no_inode;
+ }
+
+ hash = nfs_fattr_to_ino_t(fattr);
+
+ if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+ goto out_no_inode;
+
+ if (inode->i_state & I_NEW) {
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ /* We set i_ino for the few things that still rely on it,
+ * such as stat(2) */
+ inode->i_ino = hash;
+
+ /* We can't support update_atime(), since the server will reset it */
+ inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_mode = fattr->mode;
+ /* Why so? Because we want revalidate for devices/FIFOs, and
+ * that's precisely what we have in nfs_file_inode_operations.
+ */
+ inode->i_op = &nfs_file_inode_operations;
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = &nfs_file_operations;
+ inode->i_data.a_ops = &nfs_file_aops;
+ inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+ inode->i_fop = &nfs_dir_operations;
+ if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
+ && fattr->size <= NFS_LIMIT_READDIRPLUS)
+ NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
+ } else if (S_ISLNK(inode->i_mode))
+ inode->i_op = &nfs_symlink_inode_operations;
+ else
+ init_special_inode(inode, inode->i_mode, fattr->rdev);
+
+ nfsi->read_cache_jiffies = fattr->timestamp;
+ inode->i_atime = fattr->atime;
+ inode->i_mtime = fattr->mtime;
+ inode->i_ctime = fattr->ctime;
+ if (fattr->valid & NFS_ATTR_FATTR_V4)
+ nfsi->change_attr = fattr->change_attr;
+ inode->i_size = nfs_size_to_loff_t(fattr->size);
+ inode->i_nlink = fattr->nlink;
+ inode->i_uid = fattr->uid;
+ inode->i_gid = fattr->gid;
+ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+ /*
+ * report the blocks in 512byte units
+ */
+ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+ inode->i_blksize = inode->i_sb->s_blocksize;
+ } else {
+ inode->i_blocks = fattr->du.nfs2.blocks;
+ inode->i_blksize = fattr->du.nfs2.blocksize;
+ }
+ nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
+ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->cache_access.cred = NULL;
+
+ unlock_new_inode(inode);
+ } else
+ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ atomic_read(&inode->i_count));
+
+out:
+ return inode;
+
+out_no_inode:
+ printk("nfs_fhget: iget failed\n");
+ goto out;
+}
+
+#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET)
+
+int
+nfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct nfs_fattr fattr;
+ int error;
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
+ attr->ia_valid &= ~ATTR_SIZE;
+ }
+
+ /* Optimization: if the end result is no change, don't RPC */
+ attr->ia_valid &= NFS_VALID_ATTRS;
+ if (attr->ia_valid == 0)
+ return 0;
+
+ lock_kernel();
+ nfs_begin_data_update(inode);
+ /* Write all dirty data if we're changing file permissions or size */
+ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
+ if (filemap_fdatawrite(inode->i_mapping) == 0)
+ filemap_fdatawait(inode->i_mapping);
+ nfs_wb_all(inode);
+ }
+ error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
+ if (error == 0) {
+ nfs_refresh_inode(inode, &fattr);
+ if ((attr->ia_valid & ATTR_MODE) != 0) {
+ int mode;
+ mode = inode->i_mode & ~S_IALLUGO;
+ mode |= attr->ia_mode & S_IALLUGO;
+ inode->i_mode = mode;
+ }
+ if ((attr->ia_valid & ATTR_UID) != 0)
+ inode->i_uid = attr->ia_uid;
+ if ((attr->ia_valid & ATTR_GID) != 0)
+ inode->i_gid = attr->ia_gid;
+ if ((attr->ia_valid & ATTR_SIZE) != 0) {
+ inode->i_size = attr->ia_size;
+ vmtruncate(inode, attr->ia_size);
+ }
+ }
+ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+ NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
+ nfs_end_data_update(inode);
+ unlock_kernel();
+ return error;
+}
+
+/*
+ * Wait for the inode to get unlocked.
+ * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
+ */
+static int
+nfs_wait_on_inode(struct inode *inode, int flag)
+{
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ int error;
+ if (!(NFS_FLAGS(inode) & flag))
+ return 0;
+ atomic_inc(&inode->i_count);
+ error = nfs_wait_event(clnt, nfsi->nfs_i_wait,
+ !(NFS_FLAGS(inode) & flag));
+ iput(inode);
+ return error;
+}
+
+int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
+ int err;
+
+ if (__IS_FLG(inode, MS_NOATIME))
+ need_atime = 0;
+ else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
+ need_atime = 0;
+ /* We may force a getattr if the user cares about atime */
+ if (need_atime)
+ err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ else
+ err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!err)
+ generic_fillattr(inode, stat);
+ return err;
+}
+
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+{
+ struct nfs_open_context *ctx;
+
+ ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (ctx != NULL) {
+ atomic_set(&ctx->count, 1);
+ ctx->dentry = dget(dentry);
+ ctx->cred = get_rpccred(cred);
+ ctx->state = NULL;
+ ctx->lockowner = current->files;
+ ctx->error = 0;
+ init_waitqueue_head(&ctx->waitq);
+ }
+ return ctx;
+}
+
+struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+{
+ if (ctx != NULL)
+ atomic_inc(&ctx->count);
+ return ctx;
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+ if (atomic_dec_and_test(&ctx->count)) {
+ if (!list_empty(&ctx->list)) {
+ struct inode *inode = ctx->dentry->d_inode;
+ spin_lock(&inode->i_lock);
+ list_del(&ctx->list);
+ spin_unlock(&inode->i_lock);
+ }
+ if (ctx->state != NULL)
+ nfs4_close_state(ctx->state, ctx->mode);
+ if (ctx->cred != NULL)
+ put_rpccred(ctx->cred);
+ dput(ctx->dentry);
+ kfree(ctx);
+ }
+}
+
+/*
+ * Ensure that mmap has a recent RPC credential for use when writing out
+ * shared pages
+ */
+void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ filp->private_data = get_nfs_open_context(ctx);
+ spin_lock(&inode->i_lock);
+ list_add(&ctx->list, &nfsi->open_files);
+ spin_unlock(&inode->i_lock);
+}
+
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *pos, *ctx = NULL;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(pos, &nfsi->open_files, list) {
+ if ((pos->mode & mode) == mode) {
+ ctx = get_nfs_open_context(pos);
+ break;
+ }
+ }
+ spin_unlock(&inode->i_lock);
+ return ctx;
+}
+
+void nfs_file_clear_open_context(struct file *filp)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
+
+ if (ctx) {
+ filp->private_data = NULL;
+ spin_lock(&inode->i_lock);
+ list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
+ spin_unlock(&inode->i_lock);
+ put_nfs_open_context(ctx);
+ }
+}
+
+/*
+ * These allocate and release file read/write context information.
+ */
+int nfs_open(struct inode *inode, struct file *filp)
+{
+ struct nfs_open_context *ctx;
+ struct rpc_cred *cred;
+
+ cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+ ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+ put_rpccred(cred);
+ if (ctx == NULL)
+ return -ENOMEM;
+ ctx->mode = filp->f_mode;
+ nfs_file_set_open_context(filp, ctx);
+ put_nfs_open_context(ctx);
+ if ((filp->f_mode & FMODE_WRITE) != 0)
+ nfs_begin_data_update(inode);
+ return 0;
+}
+
+int nfs_release(struct inode *inode, struct file *filp)
+{
+ if ((filp->f_mode & FMODE_WRITE) != 0)
+ nfs_end_data_update(inode);
+ nfs_file_clear_open_context(filp);
+ return 0;
+}
+
+/*
+ * This function is called whenever some part of NFS notices that
+ * the cached attributes have to be refreshed.
+ */
+int
+__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+{
+ int status = -ESTALE;
+ struct nfs_fattr fattr;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long verifier;
+ unsigned int flags;
+
+ dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+ inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+
+ lock_kernel();
+ if (!inode || is_bad_inode(inode))
+ goto out_nowait;
+ if (NFS_STALE(inode))
+ goto out_nowait;
+
+ while (NFS_REVALIDATING(inode)) {
+ status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
+ if (status < 0)
+ goto out_nowait;
+ if (NFS_ATTRTIMEO(inode) == 0)
+ continue;
+ if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
+ continue;
+ status = NFS_STALE(inode) ? -ESTALE : 0;
+ goto out_nowait;
+ }
+ NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
+
+ /* Protect against RPC races by saving the change attribute */
+ verifier = nfs_save_change_attribute(inode);
+ status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
+ if (status != 0) {
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode), status);
+ if (status == -ESTALE) {
+ nfs_zap_caches(inode);
+ if (!S_ISDIR(inode->i_mode))
+ NFS_FLAGS(inode) |= NFS_INO_STALE;
+ }
+ goto out;
+ }
+
+ status = nfs_update_inode(inode, &fattr, verifier);
+ if (status) {
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode), status);
+ goto out;
+ }
+ flags = nfsi->flags;
+ /*
+ * We may need to keep the attributes marked as invalid if
+ * we raced with nfs_end_attr_update().
+ */
+ if (verifier == nfsi->cache_change_attribute)
+ nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+ /* Do the page cache invalidation */
+ if (flags & NFS_INO_INVALID_DATA) {
+ if (S_ISREG(inode->i_mode)) {
+ if (filemap_fdatawrite(inode->i_mapping) == 0)
+ filemap_fdatawait(inode->i_mapping);
+ nfs_wb_all(inode);
+ }
+ nfsi->flags &= ~NFS_INO_INVALID_DATA;
+ invalidate_inode_pages2(inode->i_mapping);
+ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+ dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode));
+ /* This ensures we revalidate dentries */
+ nfsi->cache_change_attribute++;
+ }
+ dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode));
+
+out:
+ NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
+ wake_up(&nfsi->nfs_i_wait);
+ out_nowait:
+ unlock_kernel();
+ return status;
+}
+
+int nfs_attribute_timeout(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (nfs_have_delegation(inode, FMODE_READ))
+ return 0;
+ return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
+}
+
+/**
+ * nfs_revalidate_inode - Revalidate the inode attributes
+ * @server - pointer to nfs_server struct
+ * @inode - pointer to inode struct
+ *
+ * Updates inode attribute information by retrieving the data from the server.
+ */
+int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+{
+ if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+ && !nfs_attribute_timeout(inode))
+ return NFS_STALE(inode) ? -ESTALE : 0;
+ return __nfs_revalidate_inode(server, inode);
+}
+
+/**
+ * nfs_begin_data_update
+ * @inode - pointer to inode
+ * Declare that a set of operations will update file data on the server
+ */
+void nfs_begin_data_update(struct inode *inode)
+{
+ atomic_inc(&NFS_I(inode)->data_updates);
+}
+
+/**
+ * nfs_end_data_update
+ * @inode - pointer to inode
+ * Declare end of the operations that will update file data
+ * This will mark the inode as immediately needing revalidation
+ * of its attribute cache.
+ */
+void nfs_end_data_update(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (!nfs_have_delegation(inode, FMODE_READ)) {
+ /* Mark the attribute cache for revalidation */
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+ /* Directories and symlinks: invalidate page cache too */
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ nfsi->flags |= NFS_INO_INVALID_DATA;
+ }
+ nfsi->cache_change_attribute ++;
+ atomic_dec(&nfsi->data_updates);
+}
+
+/**
+ * nfs_end_data_update_defer
+ * @inode - pointer to inode
+ * Declare end of the operations that will update file data
+ * This will defer marking the inode as needing revalidation
+ * unless there are no other pending updates.
+ */
+void nfs_end_data_update_defer(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (atomic_dec_and_test(&nfsi->data_updates)) {
+ /* Mark the attribute cache for revalidation */
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+ /* Directories and symlinks: invalidate page cache too */
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ nfsi->flags |= NFS_INO_INVALID_DATA;
+ nfsi->cache_change_attribute ++;
+ }
+}
+
+/**
+ * nfs_refresh_inode - verify consistency of the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * Verifies the attribute cache. If we have just changed the attributes,
+ * so that fattr carries weak cache consistency data, then it may
+ * also update the ctime/mtime/change_attribute.
+ */
+int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ loff_t cur_size, new_isize;
+ int data_unstable;
+
+ /* Do we hold a delegation? */
+ if (nfs_have_delegation(inode, FMODE_READ))
+ return 0;
+
+ /* Are we in the process of updating data on the server? */
+ data_unstable = nfs_caches_unstable(inode);
+
+ if (fattr->valid & NFS_ATTR_FATTR_V4) {
+ if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
+ && nfsi->change_attr == fattr->pre_change_attr)
+ nfsi->change_attr = fattr->change_attr;
+ if (!data_unstable && nfsi->change_attr != fattr->change_attr)
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+ }
+
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+ return 0;
+
+ /* Has the inode gone and changed behind our back? */
+ if (nfsi->fileid != fattr->fileid
+ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ return -EIO;
+
+ cur_size = i_size_read(inode);
+ new_isize = nfs_size_to_loff_t(fattr->size);
+
+ /* If we have atomic WCC data, we may update some attributes */
+ if ((fattr->valid & NFS_ATTR_WCC) != 0) {
+ if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+ if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime))
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+ }
+
+ /* Verify a few of the more important attributes */
+ if (!data_unstable) {
+ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
+ || cur_size != new_isize)
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+ } else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+
+ /* Have any file permissions changed? */
+ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
+ || inode->i_uid != fattr->uid
+ || inode->i_gid != fattr->gid)
+ nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+
+ /* Has the link count changed? */
+ if (inode->i_nlink != fattr->nlink)
+ nfsi->flags |= NFS_INO_INVALID_ATTR;
+
+ if (!timespec_equal(&inode->i_atime, &fattr->atime))
+ nfsi->flags |= NFS_INO_INVALID_ATIME;
+
+ nfsi->read_cache_jiffies = fattr->timestamp;
+ return 0;
+}
+
+/*
+ * Many nfs protocol calls return the new file attributes after
+ * an operation. Here we update the inode to reflect the state
+ * of the server's inode.
+ *
+ * This is a bit tricky because we have to make sure all dirty pages
+ * have been sent off to the server before calling invalidate_inode_pages.
+ * To make sure no other process adds more write requests while we try
+ * our best to flush them, we make them sleep during the attribute refresh.
+ *
+ * A very similar scenario holds for the dir cache.
+ */
+static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ __u64 new_size;
+ loff_t new_isize;
+ unsigned int invalid = 0;
+ loff_t cur_isize;
+ int data_unstable;
+
+ dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
+ __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
+ atomic_read(&inode->i_count), fattr->valid);
+
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+ return 0;
+
+ if (nfsi->fileid != fattr->fileid) {
+ printk(KERN_ERR "%s: inode number mismatch\n"
+ "expected (%s/0x%Lx), got (%s/0x%Lx)\n",
+ __FUNCTION__,
+ inode->i_sb->s_id, (long long)nfsi->fileid,
+ inode->i_sb->s_id, (long long)fattr->fileid);
+ goto out_err;
+ }
+
+ /*
+ * Make sure the inode's type hasn't changed.
+ */
+ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ goto out_changed;
+
+ /*
+ * Update the read time so we don't revalidate too often.
+ */
+ nfsi->read_cache_jiffies = fattr->timestamp;
+
+ /* Are we racing with known updates of the metadata on the server? */
+ data_unstable = ! nfs_verify_change_attribute(inode, verifier);
+
+ /* Check if the file size agrees */
+ new_size = fattr->size;
+ new_isize = nfs_size_to_loff_t(fattr->size);
+ cur_isize = i_size_read(inode);
+ if (cur_isize != new_size) {
+#ifdef NFS_DEBUG_VERBOSE
+ printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+#endif
+ /*
+ * If we have pending writebacks, things can get
+ * messy.
+ */
+ if (S_ISREG(inode->i_mode) && data_unstable) {
+ if (new_isize > cur_isize) {
+ inode->i_size = new_isize;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ }
+ } else {
+ inode->i_size = new_isize;
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ }
+ }
+
+ /*
+ * Note: we don't check inode->i_mtime since pipes etc.
+ * can change this value in VFS without requiring a
+ * cache revalidation.
+ */
+ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
+ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+#ifdef NFS_DEBUG_VERBOSE
+ printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+#endif
+ if (!data_unstable)
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ }
+
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)
+ && nfsi->change_attr != fattr->change_attr) {
+#ifdef NFS_DEBUG_VERBOSE
+ printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+#endif
+ nfsi->change_attr = fattr->change_attr;
+ if (!data_unstable)
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
+ }
+
+ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+ memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+
+ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
+ inode->i_uid != fattr->uid ||
+ inode->i_gid != fattr->gid)
+ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
+
+ inode->i_mode = fattr->mode;
+ inode->i_nlink = fattr->nlink;
+ inode->i_uid = fattr->uid;
+ inode->i_gid = fattr->gid;
+
+ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+ /*
+ * report the blocks in 512byte units
+ */
+ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
+ inode->i_blksize = inode->i_sb->s_blocksize;
+ } else {
+ inode->i_blocks = fattr->du.nfs2.blocks;
+ inode->i_blksize = fattr->du.nfs2.blocksize;
+ }
+
+ /* Update attrtimeo value if we're out of the unstable period */
+ if (invalid & NFS_INO_INVALID_ATTR) {
+ nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
+ } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
+ if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+ nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
+ }
+ /* Don't invalidate the data if we were to blame */
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
+ || S_ISLNK(inode->i_mode)))
+ invalid &= ~NFS_INO_INVALID_DATA;
+ if (!nfs_have_delegation(inode, FMODE_READ))
+ nfsi->flags |= invalid;
+
+ return 0;
+ out_changed:
+ /*
+ * Big trouble! The inode has become a different object.
+ */
+#ifdef NFS_PARANOIA
+ printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
+ __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode);
+#endif
+ /*
+ * No need to worry about unhashing the dentry, as the
+ * lookup validation will know that the inode is bad.
+ * (But we fall through to invalidate the caches.)
+ */
+ nfs_invalidate_inode(inode);
+ out_err:
+ NFS_FLAGS(inode) |= NFS_INO_STALE;
+ return -ESTALE;
+}
+
+/*
+ * File system information
+ */
+
+static int nfs_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+ return set_anon_super(s, data);
+}
+
+static int nfs_compare_super(struct super_block *sb, void *data)
+{
+ struct nfs_server *server = data;
+ struct nfs_server *old = NFS_SB(sb);
+
+ if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+ return 0;
+ if (old->addr.sin_port != server->addr.sin_port)
+ return 0;
+ return !nfs_compare_fh(&old->fh, &server->fh);
+}
+
+static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ int error;
+ struct nfs_server *server;
+ struct super_block *s;
+ struct nfs_fh *root;
+ struct nfs_mount_data *data = raw_data;
+
+ if (!data) {
+ printk("nfs_read_super: missing data argument\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+ memset(server, 0, sizeof(struct nfs_server));
+ /* Zero out the NFS state stuff */
+ init_nfsv4_state(server);
+
+ if (data->version != NFS_MOUNT_VERSION) {
+ printk("nfs warning: mount version %s than kernel\n",
+ data->version < NFS_MOUNT_VERSION ? "older" : "newer");
+ if (data->version < 2)
+ data->namlen = 0;
+ if (data->version < 3)
+ data->bsize = 0;
+ if (data->version < 4) {
+ data->flags &= ~NFS_MOUNT_VER3;
+ data->root.size = NFS2_FHSIZE;
+ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ }
+ if (data->version < 5)
+ data->flags &= ~NFS_MOUNT_SECFLAVOUR;
+ }
+
+ root = &server->fh;
+ if (data->flags & NFS_MOUNT_VER3)
+ root->size = data->root.size;
+ else
+ root->size = NFS2_FHSIZE;
+ if (root->size > sizeof(root->data)) {
+ printk("nfs_get_sb: invalid root filehandle\n");
+ kfree(server);
+ return ERR_PTR(-EINVAL);
+ }
+ memcpy(root->data, data->root.data, root->size);
+
+ /* We now require that the mount process passes the remote address */
+ memcpy(&server->addr, &data->addr, sizeof(server->addr));
+ if (server->addr.sin_addr.s_addr == INADDR_ANY) {
+ printk("NFS: mount program didn't pass remote address!\n");
+ kfree(server);
+ return ERR_PTR(-EINVAL);
+ }
+
+ s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+
+ if (IS_ERR(s) || s->s_root) {
+ kfree(server);
+ return s;
+ }
+
+ s->s_flags = flags;
+
+ /* Fire up rpciod if not yet running */
+ if (rpciod_up() != 0) {
+ printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+ kfree(server);
+ return ERR_PTR(-EIO);
+ }
+
+ error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+ if (error) {
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ return ERR_PTR(error);
+ }
+ s->s_flags |= MS_ACTIVE;
+ return s;
+}
+
+static void nfs_kill_super(struct super_block *s)
+{
+ struct nfs_server *server = NFS_SB(s);
+
+ kill_anon_super(s);
+
+ if (server->client != NULL && !IS_ERR(server->client))
+ rpc_shutdown_client(server->client);
+ if (server->client_sys != NULL && !IS_ERR(server->client_sys))
+ rpc_shutdown_client(server->client_sys);
+
+ if (!(server->flags & NFS_MOUNT_NONLM))
+ lockd_down(); /* release rpc.lockd */
+
+ rpciod_down(); /* release rpciod */
+
+ if (server->hostname != NULL)
+ kfree(server->hostname);
+ kfree(server);
+}
+
+static struct file_system_type nfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs",
+ .get_sb = nfs_get_sb,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+#ifdef CONFIG_NFS_V4
+
+static void nfs4_clear_inode(struct inode *);
+
+
+static struct super_operations nfs4_sops = {
+ .alloc_inode = nfs_alloc_inode,
+ .destroy_inode = nfs_destroy_inode,
+ .write_inode = nfs_write_inode,
+ .delete_inode = nfs_delete_inode,
+ .statfs = nfs_statfs,
+ .clear_inode = nfs4_clear_inode,
+ .umount_begin = nfs_umount_begin,
+ .show_options = nfs_show_options,
+};
+
+/*
+ * Clean out any remaining NFSv4 state that might be left over due
+ * to open() calls that passed nfs_atomic_lookup, but failed to call
+ * nfs_open().
+ */
+static void nfs4_clear_inode(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ /* If we are holding a delegation, return it! */
+ if (nfsi->delegation != NULL)
+ nfs_inode_return_delegation(inode);
+ /* First call standard NFS clear_inode() code */
+ nfs_clear_inode(inode);
+ /* Now clear out any remaining state */
+ while (!list_empty(&nfsi->open_states)) {
+ struct nfs4_state *state;
+
+ state = list_entry(nfsi->open_states.next,
+ struct nfs4_state,
+ inode_states);
+ dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
+ __FUNCTION__,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ state);
+ BUG_ON(atomic_read(&state->count) != 1);
+ nfs4_close_state(state, state->state);
+ }
+}
+
+
+static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+{
+ struct nfs_server *server;
+ struct nfs4_client *clp = NULL;
+ struct rpc_xprt *xprt = NULL;
+ struct rpc_clnt *clnt = NULL;
+ struct rpc_timeout timeparms;
+ rpc_authflavor_t authflavour;
+ int proto, err = -EIO;
+
+ sb->s_blocksize_bits = 0;
+ sb->s_blocksize = 0;
+ server = NFS_SB(sb);
+ if (data->rsize != 0)
+ server->rsize = nfs_block_size(data->rsize, NULL);
+ if (data->wsize != 0)
+ server->wsize = nfs_block_size(data->wsize, NULL);
+ server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+ server->caps = NFS_CAP_ATOMIC_OPEN;
+
+ server->acregmin = data->acregmin*HZ;
+ server->acregmax = data->acregmax*HZ;
+ server->acdirmin = data->acdirmin*HZ;
+ server->acdirmax = data->acdirmax*HZ;
+
+ server->rpc_ops = &nfs_v4_clientops;
+ /* Initialize timeout values */
+
+ timeparms.to_initval = data->timeo * HZ / 10;
+ timeparms.to_retries = data->retrans;
+ timeparms.to_exponential = 1;
+ if (!timeparms.to_retries)
+ timeparms.to_retries = 5;
+
+ proto = data->proto;
+ /* Which IP protocol do we use? */
+ switch (proto) {
+ case IPPROTO_TCP:
+ timeparms.to_maxval = RPC_MAX_TCP_TIMEOUT;
+ if (!timeparms.to_initval)
+ timeparms.to_initval = 600 * HZ / 10;
+ break;
+ case IPPROTO_UDP:
+ timeparms.to_maxval = RPC_MAX_UDP_TIMEOUT;
+ if (!timeparms.to_initval)
+ timeparms.to_initval = 11 * HZ / 10;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ clp = nfs4_get_client(&server->addr.sin_addr);
+ if (!clp) {
+ printk(KERN_WARNING "NFS: failed to create NFS4 client.\n");
+ return -EIO;
+ }
+
+ /* Now create transport and client */
+ authflavour = RPC_AUTH_UNIX;
+ if (data->auth_flavourlen != 0) {
+ if (data->auth_flavourlen > 1)
+ printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n");
+ if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
+ err = -EFAULT;
+ goto out_fail;
+ }
+ }
+
+ down_write(&clp->cl_sem);
+ if (clp->cl_rpcclient == NULL) {
+ xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+ if (IS_ERR(xprt)) {
+ up_write(&clp->cl_sem);
+ printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
+ err = PTR_ERR(xprt);
+ goto out_fail;
+ }
+ clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
+ server->rpc_ops->version, authflavour);
+ if (IS_ERR(clnt)) {
+ up_write(&clp->cl_sem);
+ printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+ xprt_destroy(xprt);
+ err = PTR_ERR(clnt);
+ goto out_fail;
+ }
+ clnt->cl_intr = 1;
+ clnt->cl_softrtry = 1;
+ clnt->cl_chatty = 1;
+ clp->cl_rpcclient = clnt;
+ clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0);
+ if (IS_ERR(clp->cl_cred)) {
+ up_write(&clp->cl_sem);
+ err = PTR_ERR(clp->cl_cred);
+ clp->cl_cred = NULL;
+ goto out_fail;
+ }
+ memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
+ nfs_idmap_new(clp);
+ }
+ if (list_empty(&clp->cl_superblocks)) {
+ err = nfs4_init_client(clp);
+ if (err != 0) {
+ up_write(&clp->cl_sem);
+ goto out_fail;
+ }
+ }
+ list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+ clnt = rpc_clone_client(clp->cl_rpcclient);
+ if (!IS_ERR(clnt))
+ server->nfs4_state = clp;
+ up_write(&clp->cl_sem);
+ clp = NULL;
+
+ if (IS_ERR(clnt)) {
+ printk(KERN_WARNING "NFS: cannot create RPC client.\n");
+ return PTR_ERR(clnt);
+ }
+
+ server->client = clnt;
+
+ if (server->nfs4_state->cl_idmap == NULL) {
+ printk(KERN_WARNING "NFS: failed to create idmapper.\n");
+ return -ENOMEM;
+ }
+
+ if (clnt->cl_auth->au_flavor != authflavour) {
+ if (rpcauth_create(authflavour, clnt) == NULL) {
+ printk(KERN_WARNING "NFS: couldn't create credcache!\n");
+ return -ENOMEM;
+ }
+ }
+
+ sb->s_time_gran = 1;
+
+ sb->s_op = &nfs4_sops;
+ err = nfs_sb_init(sb, authflavour);
+ if (err == 0)
+ return 0;
+out_fail:
+ if (clp)
+ nfs4_put_client(clp);
+ return err;
+}
+
+static int nfs4_compare_super(struct super_block *sb, void *data)
+{
+ struct nfs_server *server = data;
+ struct nfs_server *old = NFS_SB(sb);
+
+ if (strcmp(server->hostname, old->hostname) != 0)
+ return 0;
+ if (strcmp(server->mnt_path, old->mnt_path) != 0)
+ return 0;
+ return 1;
+}
+
+static void *
+nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+{
+ void *p = NULL;
+
+ if (!src->len)
+ return ERR_PTR(-EINVAL);
+ if (src->len < maxlen)
+ maxlen = src->len;
+ if (dst == NULL) {
+ p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
+ if (p == NULL)
+ return ERR_PTR(-ENOMEM);
+ }
+ if (copy_from_user(dst, src->data, maxlen)) {
+ if (p != NULL)
+ kfree(p);
+ return ERR_PTR(-EFAULT);
+ }
+ dst[maxlen] = '\0';
+ return dst;
+}
+
+static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ int error;
+ struct nfs_server *server;
+ struct super_block *s;
+ struct nfs4_mount_data *data = raw_data;
+ void *p;
+
+ if (!data) {
+ printk("nfs_read_super: missing data argument\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+ memset(server, 0, sizeof(struct nfs_server));
+ /* Zero out the NFS state stuff */
+ init_nfsv4_state(server);
+
+ if (data->version != NFS4_MOUNT_VERSION) {
+ printk("nfs warning: mount version %s than kernel\n",
+ data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
+ }
+
+ p = nfs_copy_user_string(NULL, &data->hostname, 256);
+ if (IS_ERR(p))
+ goto out_err;
+ server->hostname = p;
+
+ p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
+ if (IS_ERR(p))
+ goto out_err;
+ server->mnt_path = p;
+
+ p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
+ sizeof(server->ip_addr) - 1);
+ if (IS_ERR(p))
+ goto out_err;
+
+ /* We now require that the mount process passes the remote address */
+ if (data->host_addrlen != sizeof(server->addr)) {
+ s = ERR_PTR(-EINVAL);
+ goto out_free;
+ }
+ if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
+ s = ERR_PTR(-EFAULT);
+ goto out_free;
+ }
+ if (server->addr.sin_family != AF_INET ||
+ server->addr.sin_addr.s_addr == INADDR_ANY) {
+ printk("NFS: mount program didn't pass remote IP address!\n");
+ s = ERR_PTR(-EINVAL);
+ goto out_free;
+ }
+
+ s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+
+ if (IS_ERR(s) || s->s_root)
+ goto out_free;
+
+ s->s_flags = flags;
+
+ /* Fire up rpciod if not yet running */
+ if (rpciod_up() != 0) {
+ printk(KERN_WARNING "NFS: couldn't start rpciod!\n");
+ s = ERR_PTR(-EIO);
+ goto out_free;
+ }
+
+ error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
+ if (error) {
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ return ERR_PTR(error);
+ }
+ s->s_flags |= MS_ACTIVE;
+ return s;
+out_err:
+ s = (struct super_block *)p;
+out_free:
+ if (server->mnt_path)
+ kfree(server->mnt_path);
+ if (server->hostname)
+ kfree(server->hostname);
+ kfree(server);
+ return s;
+}
+
+static void nfs4_kill_super(struct super_block *sb)
+{
+ struct nfs_server *server = NFS_SB(sb);
+
+ nfs_return_all_delegations(sb);
+ kill_anon_super(sb);
+
+ nfs4_renewd_prepare_shutdown(server);
+
+ if (server->client != NULL && !IS_ERR(server->client))
+ rpc_shutdown_client(server->client);
+ rpciod_down(); /* release rpciod */
+
+ destroy_nfsv4_state(server);
+
+ if (server->hostname != NULL)
+ kfree(server->hostname);
+ kfree(server);
+}
+
+static struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .get_sb = nfs4_get_sb,
+ .kill_sb = nfs4_kill_super,
+ .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+#define nfs4_init_once(nfsi) \
+ do { \
+ INIT_LIST_HEAD(&(nfsi)->open_states); \
+ nfsi->delegation = NULL; \
+ nfsi->delegation_state = 0; \
+ init_rwsem(&nfsi->rwsem); \
+ } while(0)
+#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
+#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+#else
+#define nfs4_init_once(nfsi) \
+ do { } while (0)
+#define register_nfs4fs() (0)
+#define unregister_nfs4fs()
+#endif
+
+extern int nfs_init_nfspagecache(void);
+extern void nfs_destroy_nfspagecache(void);
+extern int nfs_init_readpagecache(void);
+extern void nfs_destroy_readpagecache(void);
+extern int nfs_init_writepagecache(void);
+extern void nfs_destroy_writepagecache(void);
+#ifdef CONFIG_NFS_DIRECTIO
+extern int nfs_init_directcache(void);
+extern void nfs_destroy_directcache(void);
+#endif
+
+static kmem_cache_t * nfs_inode_cachep;
+
+static struct inode *nfs_alloc_inode(struct super_block *sb)
+{
+ struct nfs_inode *nfsi;
+ nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
+ if (!nfsi)
+ return NULL;
+ nfsi->flags = 0;
+ return &nfsi->vfs_inode;
+}
+
+static void nfs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+ struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR) {
+ inode_init_once(&nfsi->vfs_inode);
+ spin_lock_init(&nfsi->req_lock);
+ INIT_LIST_HEAD(&nfsi->dirty);
+ INIT_LIST_HEAD(&nfsi->commit);
+ INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+ atomic_set(&nfsi->data_updates, 0);
+ nfsi->ndirty = 0;
+ nfsi->ncommit = 0;
+ nfsi->npages = 0;
+ init_waitqueue_head(&nfsi->nfs_i_wait);
+ nfs4_init_once(nfsi);
+ }
+}
+
+int nfs_init_inodecache(void)
+{
+ nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
+ sizeof(struct nfs_inode),
+ 0, SLAB_RECLAIM_ACCOUNT,
+ init_once, NULL);
+ if (nfs_inode_cachep == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_inodecache(void)
+{
+ if (kmem_cache_destroy(nfs_inode_cachep))
+ printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
+}
+
+/*
+ * Initialize NFS
+ */
+static int __init init_nfs_fs(void)
+{
+ int err;
+
+ err = nfs_init_nfspagecache();
+ if (err)
+ goto out4;
+
+ err = nfs_init_inodecache();
+ if (err)
+ goto out3;
+
+ err = nfs_init_readpagecache();
+ if (err)
+ goto out2;
+
+ err = nfs_init_writepagecache();
+ if (err)
+ goto out1;
+
+#ifdef CONFIG_NFS_DIRECTIO
+ err = nfs_init_directcache();
+ if (err)
+ goto out0;
+#endif
+
+#ifdef CONFIG_PROC_FS
+ rpc_proc_register(&nfs_rpcstat);
+#endif
+ err = register_filesystem(&nfs_fs_type);
+ if (err)
+ goto out;
+ if ((err = register_nfs4fs()) != 0)
+ goto out;
+ return 0;
+out:
+#ifdef CONFIG_PROC_FS
+ rpc_proc_unregister("nfs");
+#endif
+ nfs_destroy_writepagecache();
+#ifdef CONFIG_NFS_DIRECTIO
+out0:
+ nfs_destroy_directcache();
+#endif
+out1:
+ nfs_destroy_readpagecache();
+out2:
+ nfs_destroy_inodecache();
+out3:
+ nfs_destroy_nfspagecache();
+out4:
+ return err;
+}
+
+static void __exit exit_nfs_fs(void)
+{
+#ifdef CONFIG_NFS_DIRECTIO
+ nfs_destroy_directcache();
+#endif
+ nfs_destroy_writepagecache();
+ nfs_destroy_readpagecache();
+ nfs_destroy_inodecache();
+ nfs_destroy_nfspagecache();
+#ifdef CONFIG_PROC_FS
+ rpc_proc_unregister("nfs");
+#endif
+ unregister_filesystem(&nfs_fs_type);
+ unregister_nfs4fs();
+}
+
+/* Not quite true; I just maintain it */
+MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_LICENSE("GPL");
+
+module_init(init_nfs_fs)
+module_exit(exit_nfs_fs)
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
new file mode 100644
index 000000000000..9d3ddad96d9e
--- /dev/null
+++ b/fs/nfs/mount_clnt.c
@@ -0,0 +1,183 @@
+/*
+ * linux/fs/nfs/mount_clnt.c
+ *
+ * MOUNT client to support NFSroot.
+ *
+ * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs_fs.h>
+
+#ifdef RPC_DEBUG
+# define NFSDBG_FACILITY NFSDBG_ROOT
+#endif
+
+/*
+#define MOUNT_PROGRAM 100005
+#define MOUNT_VERSION 1
+#define MOUNT_MNT 1
+#define MOUNT_UMNT 3
+ */
+
+static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *,
+ int, int);
+static struct rpc_program mnt_program;
+
+struct mnt_fhstatus {
+ unsigned int status;
+ struct nfs_fh * fh;
+};
+
+/*
+ * Obtain an NFS file handle for the given host and path
+ */
+int
+nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
+ int version, int protocol)
+{
+ struct rpc_clnt *mnt_clnt;
+ struct mnt_fhstatus result = {
+ .fh = fh
+ };
+ char hostname[32];
+ int status;
+ int call;
+
+ dprintk("NFS: nfs_mount(%08x:%s)\n",
+ (unsigned)ntohl(addr->sin_addr.s_addr), path);
+
+ sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
+ mnt_clnt = mnt_create(hostname, addr, version, protocol);
+ if (IS_ERR(mnt_clnt))
+ return PTR_ERR(mnt_clnt);
+
+ call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
+ status = rpc_call(mnt_clnt, call, path, &result, 0);
+ return status < 0? status : (result.status? -EACCES : 0);
+}
+
+static struct rpc_clnt *
+mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
+ int protocol)
+{
+ struct rpc_xprt *xprt;
+ struct rpc_clnt *clnt;
+
+ xprt = xprt_create_proto(protocol, srvaddr, NULL);
+ if (IS_ERR(xprt))
+ return (struct rpc_clnt *)xprt;
+
+ clnt = rpc_create_client(xprt, hostname,
+ &mnt_program, version,
+ RPC_AUTH_UNIX);
+ if (IS_ERR(clnt)) {
+ xprt_destroy(xprt);
+ } else {
+ clnt->cl_softrtry = 1;
+ clnt->cl_chatty = 1;
+ clnt->cl_oneshot = 1;
+ clnt->cl_intr = 1;
+ }
+ return clnt;
+}
+
+/*
+ * XDR encode/decode functions for MOUNT
+ */
+static int
+xdr_encode_dirpath(struct rpc_rqst *req, u32 *p, const char *path)
+{
+ p = xdr_encode_string(p, path);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+static int
+xdr_decode_fhstatus(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+ struct nfs_fh *fh = res->fh;
+
+ if ((res->status = ntohl(*p++)) == 0) {
+ fh->size = NFS2_FHSIZE;
+ memcpy(fh->data, p, NFS2_FHSIZE);
+ }
+ return 0;
+}
+
+static int
+xdr_decode_fhstatus3(struct rpc_rqst *req, u32 *p, struct mnt_fhstatus *res)
+{
+ struct nfs_fh *fh = res->fh;
+
+ if ((res->status = ntohl(*p++)) == 0) {
+ int size = ntohl(*p++);
+ if (size <= NFS3_FHSIZE) {
+ fh->size = size;
+ memcpy(fh->data, p, size);
+ } else
+ res->status = -EBADHANDLE;
+ }
+ return 0;
+}
+
+#define MNT_dirpath_sz (1 + 256)
+#define MNT_fhstatus_sz (1 + 8)
+
+static struct rpc_procinfo mnt_procedures[] = {
+[MNTPROC_MNT] = {
+ .p_proc = MNTPROC_MNT,
+ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+ .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
+ .p_bufsiz = MNT_dirpath_sz << 2,
+ },
+};
+
+static struct rpc_procinfo mnt3_procedures[] = {
+[MOUNTPROC3_MNT] = {
+ .p_proc = MOUNTPROC3_MNT,
+ .p_encode = (kxdrproc_t) xdr_encode_dirpath,
+ .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
+ .p_bufsiz = MNT_dirpath_sz << 2,
+ },
+};
+
+
+static struct rpc_version mnt_version1 = {
+ .number = 1,
+ .nrprocs = 2,
+ .procs = mnt_procedures
+};
+
+static struct rpc_version mnt_version3 = {
+ .number = 3,
+ .nrprocs = 2,
+ .procs = mnt3_procedures
+};
+
+static struct rpc_version * mnt_version[] = {
+ NULL,
+ &mnt_version1,
+ NULL,
+ &mnt_version3,
+};
+
+static struct rpc_stat mnt_stats;
+
+static struct rpc_program mnt_program = {
+ .name = "mount",
+ .number = NFS_MNT_PROGRAM,
+ .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]),
+ .version = mnt_version,
+ .stats = &mnt_stats,
+};
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
new file mode 100644
index 000000000000..d91b69044a4d
--- /dev/null
+++ b/fs/nfs/nfs2xdr.c
@@ -0,0 +1,711 @@
+/*
+ * linux/fs/nfs/nfs2xdr.c
+ *
+ * XDR functions to encode/decode NFS RPC arguments and results.
+ *
+ * Copyright (C) 1992, 1993, 1994 Rick Sladkey
+ * Copyright (C) 1996 Olaf Kirch
+ * 04 Aug 1998 Ion Badulescu <ionut@cs.columbia.edu>
+ * FIFO's need special handling in NFSv2
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY NFSDBG_XDR
+/* #define NFS_PARANOIA 1 */
+
+extern int nfs_stat_to_errno(int stat);
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+/*
+ * Declare the space requirements for NFS arguments and replies as
+ * number of 32bit-words
+ */
+#define NFS_fhandle_sz (8)
+#define NFS_sattr_sz (8)
+#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2))
+#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2))
+#define NFS_fattr_sz (17)
+#define NFS_info_sz (5)
+#define NFS_entry_sz (NFS_filename_sz+3)
+
+#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz)
+#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz)
+#define NFS_readlinkargs_sz (NFS_fhandle_sz)
+#define NFS_readargs_sz (NFS_fhandle_sz+3)
+#define NFS_writeargs_sz (NFS_fhandle_sz+4)
+#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
+#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
+#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
+#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
+
+#define NFS_attrstat_sz (1+NFS_fattr_sz)
+#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
+#define NFS_readlinkres_sz (2)
+#define NFS_readres_sz (1+NFS_fattr_sz+1)
+#define NFS_writeres_sz (NFS_attrstat_sz)
+#define NFS_stat_sz (1)
+#define NFS_readdirres_sz (1)
+#define NFS_statfsres_sz (1+NFS_info_sz)
+
+/*
+ * Common NFS XDR functions as inlines
+ */
+static inline u32 *
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+ memcpy(p, fhandle->data, NFS2_FHSIZE);
+ return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32 *
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle)
+{
+ /* NFSv2 handles have a fixed length */
+ fhandle->size = NFS2_FHSIZE;
+ memcpy(fhandle->data, p, NFS2_FHSIZE);
+ return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+static inline u32*
+xdr_encode_time(u32 *p, struct timespec *timep)
+{
+ *p++ = htonl(timep->tv_sec);
+ /* Convert nanoseconds into microseconds */
+ *p++ = htonl(timep->tv_nsec ? timep->tv_nsec / 1000 : 0);
+ return p;
+}
+
+static inline u32*
+xdr_encode_current_server_time(u32 *p, struct timespec *timep)
+{
+ /*
+ * Passing the invalid value useconds=1000000 is a
+ * Sun convention for "set to current server time".
+ * It's needed to make permissions checks for the
+ * "touch" program across v2 mounts to Solaris and
+ * Irix boxes work correctly. See description of
+ * sattr in section 6.1 of "NFS Illustrated" by
+ * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
+ */
+ *p++ = htonl(timep->tv_sec);
+ *p++ = htonl(1000000);
+ return p;
+}
+
+static inline u32*
+xdr_decode_time(u32 *p, struct timespec *timep)
+{
+ timep->tv_sec = ntohl(*p++);
+ /* Convert microseconds into nanoseconds */
+ timep->tv_nsec = ntohl(*p++) * 1000;
+ return p;
+}
+
+static u32 *
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+{
+ u32 rdev;
+ fattr->type = (enum nfs_ftype) ntohl(*p++);
+ fattr->mode = ntohl(*p++);
+ fattr->nlink = ntohl(*p++);
+ fattr->uid = ntohl(*p++);
+ fattr->gid = ntohl(*p++);
+ fattr->size = ntohl(*p++);
+ fattr->du.nfs2.blocksize = ntohl(*p++);
+ rdev = ntohl(*p++);
+ fattr->du.nfs2.blocks = ntohl(*p++);
+ fattr->fsid_u.nfs3 = ntohl(*p++);
+ fattr->fileid = ntohl(*p++);
+ p = xdr_decode_time(p, &fattr->atime);
+ p = xdr_decode_time(p, &fattr->mtime);
+ p = xdr_decode_time(p, &fattr->ctime);
+ fattr->valid |= NFS_ATTR_FATTR;
+ fattr->rdev = new_decode_dev(rdev);
+ if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
+ fattr->type = NFFIFO;
+ fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
+ fattr->rdev = 0;
+ }
+ fattr->timestamp = jiffies;
+ return p;
+}
+
+#define SATTR(p, attr, flag, field) \
+ *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
+static inline u32 *
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+ SATTR(p, attr, ATTR_MODE, ia_mode);
+ SATTR(p, attr, ATTR_UID, ia_uid);
+ SATTR(p, attr, ATTR_GID, ia_gid);
+ SATTR(p, attr, ATTR_SIZE, ia_size);
+
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ p = xdr_encode_time(p, &attr->ia_atime);
+ } else if (attr->ia_valid & ATTR_ATIME) {
+ p = xdr_encode_current_server_time(p, &attr->ia_atime);
+ } else {
+ *p++ = ~(u32) 0;
+ *p++ = ~(u32) 0;
+ }
+
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ p = xdr_encode_time(p, &attr->ia_mtime);
+ } else if (attr->ia_valid & ATTR_MTIME) {
+ p = xdr_encode_current_server_time(p, &attr->ia_mtime);
+ } else {
+ *p++ = ~(u32) 0;
+ *p++ = ~(u32) 0;
+ }
+ return p;
+}
+#undef SATTR
+
+/*
+ * NFS encode functions
+ */
+/*
+ * Encode file handle argument
+ * GETATTR, READLINK, STATFS
+ */
+static int
+nfs_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+{
+ p = xdr_encode_fhandle(p, fh);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode SETATTR arguments
+ */
+static int
+nfs_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs_sattrargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_sattr(p, args->sattr);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode directory ops argument
+ * LOOKUP, REMOVE, RMDIR
+ */
+static int
+nfs_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs_diropargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Arguments to a READ call. Since we read data directly into the page
+ * cache, we also set up the reply iovec here so that iov[1] points
+ * exactly to the page we want to fetch.
+ */
+static int
+nfs_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+ u32 offset = (u32)args->offset;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ *p++ = htonl(offset);
+ *p++ = htonl(count);
+ *p++ = htonl(count);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen,
+ args->pages, args->pgbase, count);
+ return 0;
+}
+
+/*
+ * Decode READ reply
+ */
+static int
+nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+{
+ struct kvec *iov = req->rq_rcv_buf.head;
+ int status, count, recvd, hdrlen;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+ p = xdr_decode_fattr(p, res->fattr);
+
+ count = ntohl(*p++);
+ res->eof = 0;
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READ reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READ header is short. iovec will be shifted.\n");
+ xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+ }
+
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (count > recvd) {
+ printk(KERN_WARNING "NFS: server cheating in read reply: "
+ "count %d > recvd %d\n", count, recvd);
+ count = recvd;
+ }
+
+ dprintk("RPC: readres OK count %d\n", count);
+ if (count < res->count)
+ res->count = count;
+
+ return count;
+}
+
+
+/*
+ * Write arguments. Splice the buffer to be written into the iovec.
+ */
+static int
+nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+ struct xdr_buf *sndbuf = &req->rq_snd_buf;
+ u32 offset = (u32)args->offset;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ *p++ = htonl(offset);
+ *p++ = htonl(offset);
+ *p++ = htonl(count);
+ *p++ = htonl(count);
+ sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+ /* Copy the page array */
+ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+ return 0;
+}
+
+/*
+ * Encode create arguments
+ * CREATE, MKDIR
+ */
+static int
+nfs_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs_createargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+ p = xdr_encode_sattr(p, args->sattr);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode RENAME arguments
+ */
+static int
+nfs_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs_renameargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_array(p, args->fromname, args->fromlen);
+ p = xdr_encode_fhandle(p, args->tofh);
+ p = xdr_encode_array(p, args->toname, args->tolen);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode LINK arguments
+ */
+static int
+nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_fhandle(p, args->tofh);
+ p = xdr_encode_array(p, args->toname, args->tolen);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode SYMLINK arguments
+ */
+static int
+nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_array(p, args->fromname, args->fromlen);
+ p = xdr_encode_array(p, args->topath, args->tolen);
+ p = xdr_encode_sattr(p, args->sattr);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode arguments to readdir call
+ */
+static int
+nfs_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs_readdirargs *args)
+{
+ struct rpc_task *task = req->rq_task;
+ struct rpc_auth *auth = task->tk_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ *p++ = htonl(args->cookie);
+ *p++ = htonl(count); /* see above */
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readdirres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
+ return 0;
+}
+
+/*
+ * Decode the result of a readdir call.
+ * We're not really decoding anymore, we just leave the buffer untouched
+ * and only check that it is syntactically correct.
+ * The real decoding happens in nfs_decode_entry below, called directly
+ * from nfs_readdir for each entry.
+ */
+static int
+nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct kvec *iov = rcvbuf->head;
+ struct page **page;
+ int hdrlen, recvd;
+ int status, nr;
+ unsigned int len, pglen;
+ u32 *end, *entry, *kaddr;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+ xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+ }
+
+ pglen = rcvbuf->page_len;
+ recvd = rcvbuf->len - hdrlen;
+ if (pglen > recvd)
+ pglen = recvd;
+ page = rcvbuf->pages;
+ kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
+ end = (u32 *)((char *)p + pglen);
+ entry = p;
+ for (nr = 0; *p++; nr++) {
+ if (p + 2 > end)
+ goto short_pkt;
+ p++; /* fileid */
+ len = ntohl(*p++);
+ p += XDR_QUADLEN(len) + 1; /* name plus cookie */
+ if (len > NFS2_MAXNAMLEN) {
+ printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n",
+ len);
+ goto err_unmap;
+ }
+ if (p + 2 > end)
+ goto short_pkt;
+ entry = p;
+ }
+ if (!nr && (entry[0] != 0 || entry[1] == 0))
+ goto short_pkt;
+ out:
+ kunmap_atomic(kaddr, KM_USER0);
+ return nr;
+ short_pkt:
+ entry[0] = entry[1] = 0;
+ /* truncate listing ? */
+ if (!nr) {
+ printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
+ goto out;
+err_unmap:
+ nr = -errno_NFSERR_IO;
+ goto out;
+}
+
+u32 *
+nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+ if (!*p++) {
+ if (!*p)
+ return ERR_PTR(-EAGAIN);
+ entry->eof = 1;
+ return ERR_PTR(-EBADCOOKIE);
+ }
+
+ entry->ino = ntohl(*p++);
+ entry->len = ntohl(*p++);
+ entry->name = (const char *) p;
+ p += XDR_QUADLEN(entry->len);
+ entry->prev_cookie = entry->cookie;
+ entry->cookie = ntohl(*p++);
+ entry->eof = !p[0] && p[1];
+
+ return p;
+}
+
+/*
+ * NFS XDR decode functions
+ */
+/*
+ * Decode simple status reply
+ */
+static int
+nfs_xdr_stat(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+ int status;
+
+ if ((status = ntohl(*p++)) != 0)
+ status = -nfs_stat_to_errno(status);
+ return status;
+}
+
+/*
+ * Decode attrstat reply
+ * GETATTR, SETATTR, WRITE
+ */
+static int
+nfs_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+ int status;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+ xdr_decode_fattr(p, fattr);
+ return 0;
+}
+
+/*
+ * Decode diropres reply
+ * LOOKUP, CREATE, MKDIR
+ */
+static int
+nfs_xdr_diropres(struct rpc_rqst *req, u32 *p, struct nfs_diropok *res)
+{
+ int status;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+ p = xdr_decode_fhandle(p, res->fh);
+ xdr_decode_fattr(p, res->fattr);
+ return 0;
+}
+
+/*
+ * Encode READLINK args
+ */
+static int
+nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readlinkres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
+ return 0;
+}
+
+/*
+ * Decode READLINK reply
+ */
+static int
+nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct kvec *iov = rcvbuf->head;
+ int hdrlen, len, recvd;
+ char *kaddr;
+ int status;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+ /* Convert length of symlink */
+ len = ntohl(*p++);
+ if (len >= rcvbuf->page_len || len <= 0) {
+ dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ return -ENAMETOOLONG;
+ }
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+ xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+ }
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (recvd < len) {
+ printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ "count %u > recvd %u\n", len, recvd);
+ return -EIO;
+ }
+
+ /* NULL terminate the string we got */
+ kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+ kaddr[len+rcvbuf->page_base] = '\0';
+ kunmap_atomic(kaddr, KM_USER0);
+ return 0;
+}
+
+/*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+ res->verf->committed = NFS_FILE_SYNC;
+ return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
+ * Decode STATFS reply
+ */
+static int
+nfs_xdr_statfsres(struct rpc_rqst *req, u32 *p, struct nfs2_fsstat *res)
+{
+ int status;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+
+ res->tsize = ntohl(*p++);
+ res->bsize = ntohl(*p++);
+ res->blocks = ntohl(*p++);
+ res->bfree = ntohl(*p++);
+ res->bavail = ntohl(*p++);
+ return 0;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS_OK, 0 },
+ { NFSERR_PERM, EPERM },
+ { NFSERR_NOENT, ENOENT },
+ { NFSERR_IO, errno_NFSERR_IO },
+ { NFSERR_NXIO, ENXIO },
+/* { NFSERR_EAGAIN, EAGAIN }, */
+ { NFSERR_ACCES, EACCES },
+ { NFSERR_EXIST, EEXIST },
+ { NFSERR_XDEV, EXDEV },
+ { NFSERR_NODEV, ENODEV },
+ { NFSERR_NOTDIR, ENOTDIR },
+ { NFSERR_ISDIR, EISDIR },
+ { NFSERR_INVAL, EINVAL },
+ { NFSERR_FBIG, EFBIG },
+ { NFSERR_NOSPC, ENOSPC },
+ { NFSERR_ROFS, EROFS },
+ { NFSERR_MLINK, EMLINK },
+ { NFSERR_NAMETOOLONG, ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, ENOTEMPTY },
+ { NFSERR_DQUOT, EDQUOT },
+ { NFSERR_STALE, ESTALE },
+ { NFSERR_REMOTE, EREMOTE },
+#ifdef EWFLUSH
+ { NFSERR_WFLUSH, EWFLUSH },
+#endif
+ { NFSERR_BADHANDLE, EBADHANDLE },
+ { NFSERR_NOT_SYNC, ENOTSYNC },
+ { NFSERR_BAD_COOKIE, EBADCOOKIE },
+ { NFSERR_NOTSUPP, ENOTSUPP },
+ { NFSERR_TOOSMALL, ETOOSMALL },
+ { NFSERR_SERVERFAULT, ESERVERFAULT },
+ { NFSERR_BADTYPE, EBADTYPE },
+ { NFSERR_JUKEBOX, EJUKEBOX },
+ { -1, EIO }
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+int
+nfs_stat_to_errno(int stat)
+{
+ int i;
+
+ for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+ if (nfs_errtbl[i].stat == stat)
+ return nfs_errtbl[i].errno;
+ }
+ printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
+ return nfs_errtbl[i].errno;
+}
+
+#ifndef MAX
+# define MAX(a, b) (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype, timer) \
+[NFSPROC_##proc] = { \
+ .p_proc = NFSPROC_##proc, \
+ .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
+ .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
+ .p_timer = timer \
+ }
+struct rpc_procinfo nfs_procedures[] = {
+ PROC(GETATTR, fhandle, attrstat, 1),
+ PROC(SETATTR, sattrargs, attrstat, 0),
+ PROC(LOOKUP, diropargs, diropres, 2),
+ PROC(READLINK, readlinkargs, readlinkres, 3),
+ PROC(READ, readargs, readres, 3),
+ PROC(WRITE, writeargs, writeres, 4),
+ PROC(CREATE, createargs, diropres, 0),
+ PROC(REMOVE, diropargs, stat, 0),
+ PROC(RENAME, renameargs, stat, 0),
+ PROC(LINK, linkargs, stat, 0),
+ PROC(SYMLINK, symlinkargs, stat, 0),
+ PROC(MKDIR, createargs, diropres, 0),
+ PROC(RMDIR, diropargs, stat, 0),
+ PROC(READDIR, readdirargs, readdirres, 3),
+ PROC(STATFS, fhandle, statfsres, 0),
+};
+
+struct rpc_version nfs_version2 = {
+ .number = 2,
+ .nrprocs = sizeof(nfs_procedures)/sizeof(nfs_procedures[0]),
+ .procs = nfs_procedures
+};
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
new file mode 100644
index 000000000000..3878494dfc2c
--- /dev/null
+++ b/fs/nfs/nfs3proc.c
@@ -0,0 +1,859 @@
+/*
+ * linux/fs/nfs/nfs3proc.c
+ *
+ * Client-side NFSv3 procedures stubs.
+ *
+ * Copyright (C) 1997, Olaf Kirch
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY NFSDBG_PROC
+
+extern struct rpc_procinfo nfs3_procedures[];
+
+/* A wrapper to handle the EJUKEBOX error message */
+static int
+nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+{
+ sigset_t oldset;
+ int res;
+ rpc_clnt_sigmask(clnt, &oldset);
+ do {
+ res = rpc_call_sync(clnt, msg, flags);
+ if (res != -EJUKEBOX)
+ break;
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+ res = -ERESTARTSYS;
+ } while (!signalled());
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return res;
+}
+
+static inline int
+nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[proc],
+ .rpc_argp = argp,
+ .rpc_resp = resp,
+ };
+ return nfs3_rpc_wrapper(clnt, &msg, flags);
+}
+
+#define rpc_call(clnt, proc, argp, resp, flags) \
+ nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
+#define rpc_call_sync(clnt, msg, flags) \
+ nfs3_rpc_wrapper(clnt, msg, flags)
+
+static int
+nfs3_async_handle_jukebox(struct rpc_task *task)
+{
+ if (task->tk_status != -EJUKEBOX)
+ return 0;
+ task->tk_status = 0;
+ rpc_restart_call(task);
+ rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
+ return 1;
+}
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ int status;
+
+ dprintk("%s: call fsinfo\n", __FUNCTION__);
+ info->fattr->valid = 0;
+ status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
+ if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+ status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+ dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+ }
+ return status;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ int status;
+
+ dprintk("NFS call getattr\n");
+ fattr->valid = 0;
+ status = rpc_call(server->client, NFS3PROC_GETATTR,
+ fhandle, fattr, 0);
+ dprintk("NFS reply getattr: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+ struct iattr *sattr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct nfs3_sattrargs arg = {
+ .fh = NFS_FH(inode),
+ .sattr = sattr,
+ };
+ int status;
+
+ dprintk("NFS call setattr\n");
+ fattr->valid = 0;
+ status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+ dprintk("NFS reply setattr: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ struct nfs_fattr dir_attr;
+ struct nfs3_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ struct nfs3_diropres res = {
+ .dir_attr = &dir_attr,
+ .fh = fhandle,
+ .fattr = fattr
+ };
+ int status;
+
+ dprintk("NFS call lookup %s\n", name->name);
+ dir_attr.valid = 0;
+ fattr->valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
+ if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
+ fhandle, fattr, 0);
+ dprintk("NFS reply lookup: %d\n", status);
+ if (status >= 0)
+ status = nfs_refresh_inode(dir, &dir_attr);
+ return status;
+}
+
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+ struct nfs_fattr fattr;
+ struct nfs3_accessargs arg = {
+ .fh = NFS_FH(inode),
+ };
+ struct nfs3_accessres res = {
+ .fattr = &fattr,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = entry->cred
+ };
+ int mode = entry->mask;
+ int status;
+
+ dprintk("NFS call access\n");
+ fattr.valid = 0;
+
+ if (mode & MAY_READ)
+ arg.access |= NFS3_ACCESS_READ;
+ if (S_ISDIR(inode->i_mode)) {
+ if (mode & MAY_WRITE)
+ arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE;
+ if (mode & MAY_EXEC)
+ arg.access |= NFS3_ACCESS_LOOKUP;
+ } else {
+ if (mode & MAY_WRITE)
+ arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND;
+ if (mode & MAY_EXEC)
+ arg.access |= NFS3_ACCESS_EXECUTE;
+ }
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ nfs_refresh_inode(inode, &fattr);
+ if (status == 0) {
+ entry->mask = 0;
+ if (res.access & NFS3_ACCESS_READ)
+ entry->mask |= MAY_READ;
+ if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE))
+ entry->mask |= MAY_WRITE;
+ if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+ entry->mask |= MAY_EXEC;
+ }
+ dprintk("NFS reply access: %d\n", status);
+ return status;
+}
+
+static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+ unsigned int pgbase, unsigned int pglen)
+{
+ struct nfs_fattr fattr;
+ struct nfs3_readlinkargs args = {
+ .fh = NFS_FH(inode),
+ .pgbase = pgbase,
+ .pglen = pglen,
+ .pages = &page
+ };
+ int status;
+
+ dprintk("NFS call readlink\n");
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
+ &args, &fattr, 0);
+ nfs_refresh_inode(inode, &fattr);
+ dprintk("NFS reply readlink: %d\n", status);
+ return status;
+}
+
+static int nfs3_proc_read(struct nfs_read_data *rdata)
+{
+ int flags = rdata->flags;
+ struct inode * inode = rdata->inode;
+ struct nfs_fattr * fattr = rdata->res.fattr;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
+ .rpc_argp = &rdata->args,
+ .rpc_resp = &rdata->res,
+ .rpc_cred = rdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
+ (long long) rdata->args.offset);
+ fattr->valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+ if (status >= 0)
+ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS reply read: %d\n", status);
+ return status;
+}
+
+static int nfs3_proc_write(struct nfs_write_data *wdata)
+{
+ int rpcflags = wdata->flags;
+ struct inode * inode = wdata->inode;
+ struct nfs_fattr * fattr = wdata->res.fattr;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
+ .rpc_argp = &wdata->args,
+ .rpc_resp = &wdata->res,
+ .rpc_cred = wdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
+ (long long) wdata->args.offset);
+ fattr->valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
+ if (status >= 0)
+ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS reply write: %d\n", status);
+ return status < 0? status : wdata->res.count;
+}
+
+static int nfs3_proc_commit(struct nfs_write_data *cdata)
+{
+ struct inode * inode = cdata->inode;
+ struct nfs_fattr * fattr = cdata->res.fattr;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
+ .rpc_argp = &cdata->args,
+ .rpc_resp = &cdata->res,
+ .rpc_cred = cdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
+ (long long) cdata->args.offset);
+ fattr->valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ if (status >= 0)
+ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS reply commit: %d\n", status);
+ return status;
+}
+
+/*
+ * Create a regular file.
+ * For now, we don't implement O_EXCL.
+ */
+static int
+nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ struct nfs_fattr dir_attr;
+ struct nfs3_createargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr,
+ };
+ struct nfs3_diropres res = {
+ .dir_attr = &dir_attr,
+ .fh = &fhandle,
+ .fattr = &fattr
+ };
+ int status;
+
+ dprintk("NFS call create %s\n", dentry->d_name.name);
+ arg.createmode = NFS3_CREATE_UNCHECKED;
+ if (flags & O_EXCL) {
+ arg.createmode = NFS3_CREATE_EXCLUSIVE;
+ arg.verifier[0] = jiffies;
+ arg.verifier[1] = current->pid;
+ }
+
+again:
+ dir_attr.valid = 0;
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+
+ /* If the server doesn't support the exclusive creation semantics,
+ * try again with simple 'guarded' mode. */
+ if (status == NFSERR_NOTSUPP) {
+ switch (arg.createmode) {
+ case NFS3_CREATE_EXCLUSIVE:
+ arg.createmode = NFS3_CREATE_GUARDED;
+ break;
+
+ case NFS3_CREATE_GUARDED:
+ arg.createmode = NFS3_CREATE_UNCHECKED;
+ break;
+
+ case NFS3_CREATE_UNCHECKED:
+ goto out;
+ }
+ goto again;
+ }
+
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ if (status != 0)
+ goto out;
+
+ /* When we created the file with exclusive semantics, make
+ * sure we set the attributes afterwards. */
+ if (arg.createmode == NFS3_CREATE_EXCLUSIVE) {
+ dprintk("NFS call setattr (post-create)\n");
+
+ if (!(sattr->ia_valid & ATTR_ATIME_SET))
+ sattr->ia_valid |= ATTR_ATIME;
+ if (!(sattr->ia_valid & ATTR_MTIME_SET))
+ sattr->ia_valid |= ATTR_MTIME;
+
+ /* Note: we could use a guarded setattr here, but I'm
+ * not sure this buys us anything (and I'd have
+ * to revamp the NFSv3 XDR code) */
+ status = nfs3_proc_setattr(dentry, &fattr, sattr);
+ nfs_refresh_inode(dentry->d_inode, &fattr);
+ dprintk("NFS reply setattr (post-create): %d\n", status);
+ }
+out:
+ dprintk("NFS reply create: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_remove(struct inode *dir, struct qstr *name)
+{
+ struct nfs_fattr dir_attr;
+ struct nfs3_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE],
+ .rpc_argp = &arg,
+ .rpc_resp = &dir_attr,
+ };
+ int status;
+
+ dprintk("NFS call remove %s\n", name->name);
+ dir_attr.valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ dprintk("NFS reply remove: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+ struct unlinkxdr {
+ struct nfs3_diropargs arg;
+ struct nfs_fattr res;
+ } *ptr;
+
+ ptr = (struct unlinkxdr *)kmalloc(sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+ ptr->arg.fh = NFS_FH(dir->d_inode);
+ ptr->arg.name = name->name;
+ ptr->arg.len = name->len;
+ ptr->res.valid = 0;
+ msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
+ msg->rpc_argp = &ptr->arg;
+ msg->rpc_resp = &ptr->res;
+ return 0;
+}
+
+static int
+nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+ struct rpc_message *msg = &task->tk_msg;
+ struct nfs_fattr *dir_attr;
+
+ if (nfs3_async_handle_jukebox(task))
+ return 1;
+ if (msg->rpc_argp) {
+ dir_attr = (struct nfs_fattr*)msg->rpc_resp;
+ nfs_refresh_inode(dir->d_inode, dir_attr);
+ kfree(msg->rpc_argp);
+ }
+ return 0;
+}
+
+static int
+nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ struct inode *new_dir, struct qstr *new_name)
+{
+ struct nfs_fattr old_dir_attr, new_dir_attr;
+ struct nfs3_renameargs arg = {
+ .fromfh = NFS_FH(old_dir),
+ .fromname = old_name->name,
+ .fromlen = old_name->len,
+ .tofh = NFS_FH(new_dir),
+ .toname = new_name->name,
+ .tolen = new_name->len
+ };
+ struct nfs3_renameres res = {
+ .fromattr = &old_dir_attr,
+ .toattr = &new_dir_attr
+ };
+ int status;
+
+ dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
+ old_dir_attr.valid = 0;
+ new_dir_attr.valid = 0;
+ status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+ nfs_refresh_inode(old_dir, &old_dir_attr);
+ nfs_refresh_inode(new_dir, &new_dir_attr);
+ dprintk("NFS reply rename: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+ struct nfs_fattr dir_attr, fattr;
+ struct nfs3_linkargs arg = {
+ .fromfh = NFS_FH(inode),
+ .tofh = NFS_FH(dir),
+ .toname = name->name,
+ .tolen = name->len
+ };
+ struct nfs3_linkres res = {
+ .dir_attr = &dir_attr,
+ .fattr = &fattr
+ };
+ int status;
+
+ dprintk("NFS call link %s\n", name->name);
+ dir_attr.valid = 0;
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ nfs_refresh_inode(inode, &fattr);
+ dprintk("NFS reply link: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+ struct iattr *sattr, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_fattr dir_attr;
+ struct nfs3_symlinkargs arg = {
+ .fromfh = NFS_FH(dir),
+ .fromname = name->name,
+ .fromlen = name->len,
+ .topath = path->name,
+ .tolen = path->len,
+ .sattr = sattr
+ };
+ struct nfs3_diropres res = {
+ .dir_attr = &dir_attr,
+ .fh = fhandle,
+ .fattr = fattr
+ };
+ int status;
+
+ if (path->len > NFS3_MAXPATHLEN)
+ return -ENAMETOOLONG;
+ dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
+ dir_attr.valid = 0;
+ fattr->valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ dprintk("NFS reply symlink: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr, dir_attr;
+ struct nfs3_mkdirargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr
+ };
+ struct nfs3_diropres res = {
+ .dir_attr = &dir_attr,
+ .fh = &fhandle,
+ .fattr = &fattr
+ };
+ int status;
+
+ dprintk("NFS call mkdir %s\n", dentry->d_name.name);
+ dir_attr.valid = 0;
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ dprintk("NFS reply mkdir: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+ struct nfs_fattr dir_attr;
+ struct nfs3_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ int status;
+
+ dprintk("NFS call rmdir %s\n", name->name);
+ dir_attr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ dprintk("NFS reply rmdir: %d\n", status);
+ return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass the user buffer
+ * to the encode function, which installs it in the receive iovec.
+ * The decode function itself doesn't perform any decoding, it just makes
+ * sure the reply is syntactically correct.
+ *
+ * Also note that this implementation handles both plain readdir and
+ * readdirplus.
+ */
+static int
+nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ u64 cookie, struct page *page, unsigned int count, int plus)
+{
+ struct inode *dir = dentry->d_inode;
+ struct nfs_fattr dir_attr;
+ u32 *verf = NFS_COOKIEVERF(dir);
+ struct nfs3_readdirargs arg = {
+ .fh = NFS_FH(dir),
+ .cookie = cookie,
+ .verf = {verf[0], verf[1]},
+ .plus = plus,
+ .count = count,
+ .pages = &page
+ };
+ struct nfs3_readdirres res = {
+ .dir_attr = &dir_attr,
+ .verf = verf,
+ .plus = plus
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = cred
+ };
+ int status;
+
+ lock_kernel();
+
+ if (plus)
+ msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+
+ dprintk("NFS call readdir%s %d\n",
+ plus? "plus" : "", (unsigned int) cookie);
+
+ dir_attr.valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ dprintk("NFS reply readdir: %d\n", status);
+ unlock_kernel();
+ return status;
+}
+
+static int
+nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ dev_t rdev)
+{
+ struct nfs_fh fh;
+ struct nfs_fattr fattr, dir_attr;
+ struct nfs3_mknodargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr,
+ .rdev = rdev
+ };
+ struct nfs3_diropres res = {
+ .dir_attr = &dir_attr,
+ .fh = &fh,
+ .fattr = &fattr
+ };
+ int status;
+
+ switch (sattr->ia_mode & S_IFMT) {
+ case S_IFBLK: arg.type = NF3BLK; break;
+ case S_IFCHR: arg.type = NF3CHR; break;
+ case S_IFIFO: arg.type = NF3FIFO; break;
+ case S_IFSOCK: arg.type = NF3SOCK; break;
+ default: return -EINVAL;
+ }
+
+ dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
+ MAJOR(rdev), MINOR(rdev));
+ dir_attr.valid = 0;
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+ nfs_refresh_inode(dir, &dir_attr);
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fh, &fattr);
+ dprintk("NFS reply mknod: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsstat *stat)
+{
+ int status;
+
+ dprintk("NFS call fsstat\n");
+ stat->fattr->valid = 0;
+ status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+ dprintk("NFS reply statfs: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ int status;
+
+ dprintk("NFS call fsinfo\n");
+ info->fattr->valid = 0;
+ status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+ dprintk("NFS reply fsinfo: %d\n", status);
+ return status;
+}
+
+static int
+nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_pathconf *info)
+{
+ int status;
+
+ dprintk("NFS call pathconf\n");
+ info->fattr->valid = 0;
+ status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+ dprintk("NFS reply pathconf: %d\n", status);
+ return status;
+}
+
+extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs3_read_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+ if (nfs3_async_handle_jukebox(task))
+ return;
+ /* Call back common NFS readpage processing */
+ if (task->tk_status >= 0)
+ nfs_refresh_inode(data->inode, &data->fattr);
+ nfs_readpage_result(task);
+}
+
+static void
+nfs3_proc_read_setup(struct nfs_read_data *data)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode = data->inode;
+ int flags;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+
+ /* N.B. Do we need to test? Never called for swapfile inode */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_write_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data;
+
+ if (nfs3_async_handle_jukebox(task))
+ return;
+ data = (struct nfs_write_data *)task->tk_calldata;
+ if (task->tk_status >= 0)
+ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_writeback_done(task);
+}
+
+static void
+nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode = data->inode;
+ int stable;
+ int flags;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+
+ if (how & FLUSH_STABLE) {
+ if (!NFS_I(inode)->ncommit)
+ stable = NFS_FILE_SYNC;
+ else
+ stable = NFS_DATA_SYNC;
+ } else
+ stable = NFS_UNSTABLE;
+ data->args.stable = stable;
+
+ /* Set the initial flags for the task. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs3_commit_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data;
+
+ if (nfs3_async_handle_jukebox(task))
+ return;
+ data = (struct nfs_write_data *)task->tk_calldata;
+ if (task->tk_status >= 0)
+ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_commit_done(task);
+}
+
+static void
+nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode = data->inode;
+ int flags;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+
+ /* Set the initial flags for the task. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static int
+nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+struct nfs_rpc_ops nfs_v3_clientops = {
+ .version = 3, /* protocol version */
+ .dentry_ops = &nfs_dentry_operations,
+ .dir_inode_ops = &nfs_dir_inode_operations,
+ .getroot = nfs3_proc_get_root,
+ .getattr = nfs3_proc_getattr,
+ .setattr = nfs3_proc_setattr,
+ .lookup = nfs3_proc_lookup,
+ .access = nfs3_proc_access,
+ .readlink = nfs3_proc_readlink,
+ .read = nfs3_proc_read,
+ .write = nfs3_proc_write,
+ .commit = nfs3_proc_commit,
+ .create = nfs3_proc_create,
+ .remove = nfs3_proc_remove,
+ .unlink_setup = nfs3_proc_unlink_setup,
+ .unlink_done = nfs3_proc_unlink_done,
+ .rename = nfs3_proc_rename,
+ .link = nfs3_proc_link,
+ .symlink = nfs3_proc_symlink,
+ .mkdir = nfs3_proc_mkdir,
+ .rmdir = nfs3_proc_rmdir,
+ .readdir = nfs3_proc_readdir,
+ .mknod = nfs3_proc_mknod,
+ .statfs = nfs3_proc_statfs,
+ .fsinfo = nfs3_proc_fsinfo,
+ .pathconf = nfs3_proc_pathconf,
+ .decode_dirent = nfs3_decode_dirent,
+ .read_setup = nfs3_proc_read_setup,
+ .write_setup = nfs3_proc_write_setup,
+ .commit_setup = nfs3_proc_commit_setup,
+ .file_open = nfs_open,
+ .file_release = nfs_release,
+ .lock = nfs3_proc_lock,
+};
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
new file mode 100644
index 000000000000..a3593d47e5ab
--- /dev/null
+++ b/fs/nfs/nfs3xdr.c
@@ -0,0 +1,1023 @@
+/*
+ * linux/fs/nfs/nfs3xdr.c
+ *
+ * XDR functions to encode/decode NFSv3 RPC arguments and results.
+ *
+ * Copyright (C) 1996, 1997 Olaf Kirch
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+extern int nfs_stat_to_errno(int);
+
+/*
+ * Declare the space requirements for NFS arguments and replies as
+ * number of 32bit-words
+ */
+#define NFS3_fhandle_sz (1+16)
+#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
+#define NFS3_sattr_sz (15)
+#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
+#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
+#define NFS3_fattr_sz (21)
+#define NFS3_wcc_attr_sz (6)
+#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz)
+#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz)
+#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz)
+#define NFS3_fsstat_sz
+#define NFS3_fsinfo_sz
+#define NFS3_pathconf_sz
+#define NFS3_entry_sz (NFS3_filename_sz+3)
+
+#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3)
+#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz)
+#define NFS3_accessargs_sz (NFS3_fh_sz+1)
+#define NFS3_readlinkargs_sz (NFS3_fh_sz)
+#define NFS3_readargs_sz (NFS3_fh_sz+3)
+#define NFS3_writeargs_sz (NFS3_fh_sz+5)
+#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
+#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
+#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
+#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
+#define NFS3_readdirargs_sz (NFS3_fh_sz+2)
+#define NFS3_commitargs_sz (NFS3_fh_sz+3)
+
+#define NFS3_attrstat_sz (1+NFS3_fattr_sz)
+#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz)
+#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
+#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
+#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
+#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
+#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
+#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
+#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
+#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
+#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
+#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
+#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2)
+
+/*
+ * Map file type to S_IFMT bits
+ */
+static struct {
+ unsigned int mode;
+ unsigned int nfs2type;
+} nfs_type2fmt[] = {
+ { 0, NFNON },
+ { S_IFREG, NFREG },
+ { S_IFDIR, NFDIR },
+ { S_IFBLK, NFBLK },
+ { S_IFCHR, NFCHR },
+ { S_IFLNK, NFLNK },
+ { S_IFSOCK, NFSOCK },
+ { S_IFIFO, NFFIFO },
+ { 0, NFBAD }
+};
+
+/*
+ * Common NFS XDR functions as inlines
+ */
+static inline u32 *
+xdr_encode_fhandle(u32 *p, struct nfs_fh *fh)
+{
+ return xdr_encode_array(p, fh->data, fh->size);
+}
+
+static inline u32 *
+xdr_decode_fhandle(u32 *p, struct nfs_fh *fh)
+{
+ if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) {
+ memcpy(fh->data, p, fh->size);
+ return p + XDR_QUADLEN(fh->size);
+ }
+ return NULL;
+}
+
+/*
+ * Encode/decode time.
+ */
+static inline u32 *
+xdr_encode_time3(u32 *p, struct timespec *timep)
+{
+ *p++ = htonl(timep->tv_sec);
+ *p++ = htonl(timep->tv_nsec);
+ return p;
+}
+
+static inline u32 *
+xdr_decode_time3(u32 *p, struct timespec *timep)
+{
+ timep->tv_sec = ntohl(*p++);
+ timep->tv_nsec = ntohl(*p++);
+ return p;
+}
+
+static u32 *
+xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
+{
+ unsigned int type, major, minor;
+ int fmode;
+
+ type = ntohl(*p++);
+ if (type >= NF3BAD)
+ type = NF3BAD;
+ fmode = nfs_type2fmt[type].mode;
+ fattr->type = nfs_type2fmt[type].nfs2type;
+ fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
+ fattr->nlink = ntohl(*p++);
+ fattr->uid = ntohl(*p++);
+ fattr->gid = ntohl(*p++);
+ p = xdr_decode_hyper(p, &fattr->size);
+ p = xdr_decode_hyper(p, &fattr->du.nfs3.used);
+
+ /* Turn remote device info into Linux-specific dev_t */
+ major = ntohl(*p++);
+ minor = ntohl(*p++);
+ fattr->rdev = MKDEV(major, minor);
+ if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor)
+ fattr->rdev = 0;
+
+ p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3);
+ p = xdr_decode_hyper(p, &fattr->fileid);
+ p = xdr_decode_time3(p, &fattr->atime);
+ p = xdr_decode_time3(p, &fattr->mtime);
+ p = xdr_decode_time3(p, &fattr->ctime);
+
+ /* Update the mode bits */
+ fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
+ fattr->timestamp = jiffies;
+ return p;
+}
+
+static inline u32 *
+xdr_encode_sattr(u32 *p, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_MODE) {
+ *p++ = xdr_one;
+ *p++ = htonl(attr->ia_mode);
+ } else {
+ *p++ = xdr_zero;
+ }
+ if (attr->ia_valid & ATTR_UID) {
+ *p++ = xdr_one;
+ *p++ = htonl(attr->ia_uid);
+ } else {
+ *p++ = xdr_zero;
+ }
+ if (attr->ia_valid & ATTR_GID) {
+ *p++ = xdr_one;
+ *p++ = htonl(attr->ia_gid);
+ } else {
+ *p++ = xdr_zero;
+ }
+ if (attr->ia_valid & ATTR_SIZE) {
+ *p++ = xdr_one;
+ p = xdr_encode_hyper(p, (__u64) attr->ia_size);
+ } else {
+ *p++ = xdr_zero;
+ }
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ *p++ = xdr_two;
+ p = xdr_encode_time3(p, &attr->ia_atime);
+ } else if (attr->ia_valid & ATTR_ATIME) {
+ *p++ = xdr_one;
+ } else {
+ *p++ = xdr_zero;
+ }
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ *p++ = xdr_two;
+ p = xdr_encode_time3(p, &attr->ia_mtime);
+ } else if (attr->ia_valid & ATTR_MTIME) {
+ *p++ = xdr_one;
+ } else {
+ *p++ = xdr_zero;
+ }
+ return p;
+}
+
+static inline u32 *
+xdr_decode_wcc_attr(u32 *p, struct nfs_fattr *fattr)
+{
+ p = xdr_decode_hyper(p, &fattr->pre_size);
+ p = xdr_decode_time3(p, &fattr->pre_mtime);
+ p = xdr_decode_time3(p, &fattr->pre_ctime);
+ fattr->valid |= NFS_ATTR_WCC;
+ return p;
+}
+
+static inline u32 *
+xdr_decode_post_op_attr(u32 *p, struct nfs_fattr *fattr)
+{
+ if (*p++)
+ p = xdr_decode_fattr(p, fattr);
+ return p;
+}
+
+static inline u32 *
+xdr_decode_pre_op_attr(u32 *p, struct nfs_fattr *fattr)
+{
+ if (*p++)
+ return xdr_decode_wcc_attr(p, fattr);
+ return p;
+}
+
+
+static inline u32 *
+xdr_decode_wcc_data(u32 *p, struct nfs_fattr *fattr)
+{
+ p = xdr_decode_pre_op_attr(p, fattr);
+ return xdr_decode_post_op_attr(p, fattr);
+}
+
+/*
+ * NFS encode functions
+ */
+
+/*
+ * Encode file handle argument
+ */
+static int
+nfs3_xdr_fhandle(struct rpc_rqst *req, u32 *p, struct nfs_fh *fh)
+{
+ p = xdr_encode_fhandle(p, fh);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode SETATTR arguments
+ */
+static int
+nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_sattr(p, args->sattr);
+ *p++ = htonl(args->guard);
+ if (args->guard)
+ p = xdr_encode_time3(p, &args->guardtime);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode directory ops argument
+ */
+static int
+nfs3_xdr_diropargs(struct rpc_rqst *req, u32 *p, struct nfs3_diropargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode access() argument
+ */
+static int
+nfs3_xdr_accessargs(struct rpc_rqst *req, u32 *p, struct nfs3_accessargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ *p++ = htonl(args->access);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Arguments to a READ call. Since we read data directly into the page
+ * cache, we also set up the reply iovec here so that iov[1] points
+ * exactly to the page we want to fetch.
+ */
+static int
+nfs3_xdr_readargs(struct rpc_rqst *req, u32 *p, struct nfs_readargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_hyper(p, args->offset);
+ *p++ = htonl(count);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen,
+ args->pages, args->pgbase, count);
+ return 0;
+}
+
+/*
+ * Write arguments. Splice the buffer to be written into the iovec.
+ */
+static int
+nfs3_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+ struct xdr_buf *sndbuf = &req->rq_snd_buf;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_hyper(p, args->offset);
+ *p++ = htonl(count);
+ *p++ = htonl(args->stable);
+ *p++ = htonl(count);
+ sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+ /* Copy the page array */
+ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
+ return 0;
+}
+
+/*
+ * Encode CREATE arguments
+ */
+static int
+nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+
+ *p++ = htonl(args->createmode);
+ if (args->createmode == NFS3_CREATE_EXCLUSIVE) {
+ *p++ = args->verifier[0];
+ *p++ = args->verifier[1];
+ } else
+ p = xdr_encode_sattr(p, args->sattr);
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode MKDIR arguments
+ */
+static int
+nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+ p = xdr_encode_sattr(p, args->sattr);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode SYMLINK arguments
+ */
+static int
+nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_array(p, args->fromname, args->fromlen);
+ p = xdr_encode_sattr(p, args->sattr);
+ p = xdr_encode_array(p, args->topath, args->tolen);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode MKNOD arguments
+ */
+static int
+nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_array(p, args->name, args->len);
+ *p++ = htonl(args->type);
+ p = xdr_encode_sattr(p, args->sattr);
+ if (args->type == NF3CHR || args->type == NF3BLK) {
+ *p++ = htonl(MAJOR(args->rdev));
+ *p++ = htonl(MINOR(args->rdev));
+ }
+
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode RENAME arguments
+ */
+static int
+nfs3_xdr_renameargs(struct rpc_rqst *req, u32 *p, struct nfs3_renameargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_array(p, args->fromname, args->fromlen);
+ p = xdr_encode_fhandle(p, args->tofh);
+ p = xdr_encode_array(p, args->toname, args->tolen);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode LINK arguments
+ */
+static int
+nfs3_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs3_linkargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fromfh);
+ p = xdr_encode_fhandle(p, args->tofh);
+ p = xdr_encode_array(p, args->toname, args->tolen);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * Encode arguments to readdir call
+ */
+static int
+nfs3_xdr_readdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_readdirargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+ u32 count = args->count;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_hyper(p, args->cookie);
+ *p++ = args->verf[0];
+ *p++ = args->verf[1];
+ if (args->plus) {
+ /* readdirplus: need dircount + buffer size.
+ * We just make sure we make dircount big enough */
+ *p++ = htonl(count >> 3);
+ }
+ *p++ = htonl(count);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readdirres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, count);
+ return 0;
+}
+
+/*
+ * Decode the result of a readdir call.
+ * We just check for syntactical correctness.
+ */
+static int
+nfs3_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs3_readdirres *res)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct kvec *iov = rcvbuf->head;
+ struct page **page;
+ int hdrlen, recvd;
+ int status, nr;
+ unsigned int len, pglen;
+ u32 *entry, *end, *kaddr;
+
+ status = ntohl(*p++);
+ /* Decode post_op_attrs */
+ p = xdr_decode_post_op_attr(p, res->dir_attr);
+ if (status)
+ return -nfs_stat_to_errno(status);
+ /* Decode verifier cookie */
+ if (res->verf) {
+ res->verf[0] = *p++;
+ res->verf[1] = *p++;
+ } else {
+ p += 2;
+ }
+
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READDIR reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
+ xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+ }
+
+ pglen = rcvbuf->page_len;
+ recvd = rcvbuf->len - hdrlen;
+ if (pglen > recvd)
+ pglen = recvd;
+ page = rcvbuf->pages;
+ kaddr = p = (u32 *)kmap_atomic(*page, KM_USER0);
+ end = (u32 *)((char *)p + pglen);
+ entry = p;
+ for (nr = 0; *p++; nr++) {
+ if (p + 3 > end)
+ goto short_pkt;
+ p += 2; /* inode # */
+ len = ntohl(*p++); /* string length */
+ p += XDR_QUADLEN(len) + 2; /* name + cookie */
+ if (len > NFS3_MAXNAMLEN) {
+ printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n",
+ len);
+ goto err_unmap;
+ }
+
+ if (res->plus) {
+ /* post_op_attr */
+ if (p + 2 > end)
+ goto short_pkt;
+ if (*p++) {
+ p += 21;
+ if (p + 1 > end)
+ goto short_pkt;
+ }
+ /* post_op_fh3 */
+ if (*p++) {
+ if (p + 1 > end)
+ goto short_pkt;
+ len = ntohl(*p++);
+ if (len > NFS3_FHSIZE) {
+ printk(KERN_WARNING "NFS: giant filehandle in "
+ "readdir (len %x)!\n", len);
+ goto err_unmap;
+ }
+ p += XDR_QUADLEN(len);
+ }
+ }
+
+ if (p + 2 > end)
+ goto short_pkt;
+ entry = p;
+ }
+ if (!nr && (entry[0] != 0 || entry[1] == 0))
+ goto short_pkt;
+ out:
+ kunmap_atomic(kaddr, KM_USER0);
+ return nr;
+ short_pkt:
+ entry[0] = entry[1] = 0;
+ /* truncate listing ? */
+ if (!nr) {
+ printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
+ goto out;
+err_unmap:
+ nr = -errno_NFSERR_IO;
+ goto out;
+}
+
+u32 *
+nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+{
+ struct nfs_entry old = *entry;
+
+ if (!*p++) {
+ if (!*p)
+ return ERR_PTR(-EAGAIN);
+ entry->eof = 1;
+ return ERR_PTR(-EBADCOOKIE);
+ }
+
+ p = xdr_decode_hyper(p, &entry->ino);
+ entry->len = ntohl(*p++);
+ entry->name = (const char *) p;
+ p += XDR_QUADLEN(entry->len);
+ entry->prev_cookie = entry->cookie;
+ p = xdr_decode_hyper(p, &entry->cookie);
+
+ if (plus) {
+ entry->fattr->valid = 0;
+ p = xdr_decode_post_op_attr(p, entry->fattr);
+ /* In fact, a post_op_fh3: */
+ if (*p++) {
+ p = xdr_decode_fhandle(p, entry->fh);
+ /* Ugh -- server reply was truncated */
+ if (p == NULL) {
+ dprintk("NFS: FH truncated\n");
+ *entry = old;
+ return ERR_PTR(-EAGAIN);
+ }
+ } else
+ memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
+ }
+
+ entry->eof = !p[0] && p[1];
+ return p;
+}
+
+/*
+ * Encode COMMIT arguments
+ */
+static int
+nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
+{
+ p = xdr_encode_fhandle(p, args->fh);
+ p = xdr_encode_hyper(p, args->offset);
+ *p++ = htonl(args->count);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ return 0;
+}
+
+/*
+ * NFS XDR decode functions
+ */
+
+/*
+ * Decode attrstat reply.
+ */
+static int
+nfs3_xdr_attrstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+ int status;
+
+ if ((status = ntohl(*p++)))
+ return -nfs_stat_to_errno(status);
+ xdr_decode_fattr(p, fattr);
+ return 0;
+}
+
+/*
+ * Decode status+wcc_data reply
+ * SATTR, REMOVE, RMDIR
+ */
+static int
+nfs3_xdr_wccstat(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+ int status;
+
+ if ((status = ntohl(*p++)))
+ status = -nfs_stat_to_errno(status);
+ xdr_decode_wcc_data(p, fattr);
+ return status;
+}
+
+/*
+ * Decode LOOKUP reply
+ */
+static int
+nfs3_xdr_lookupres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+{
+ int status;
+
+ if ((status = ntohl(*p++))) {
+ status = -nfs_stat_to_errno(status);
+ } else {
+ if (!(p = xdr_decode_fhandle(p, res->fh)))
+ return -errno_NFSERR_IO;
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ }
+ xdr_decode_post_op_attr(p, res->dir_attr);
+ return status;
+}
+
+/*
+ * Decode ACCESS reply
+ */
+static int
+nfs3_xdr_accessres(struct rpc_rqst *req, u32 *p, struct nfs3_accessres *res)
+{
+ int status = ntohl(*p++);
+
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ if (status)
+ return -nfs_stat_to_errno(status);
+ res->access = ntohl(*p++);
+ return 0;
+}
+
+static int
+nfs3_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_readlinkargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+
+ p = xdr_encode_fhandle(p, args->fh);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+ /* Inline the page array */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readlinkres_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen);
+ return 0;
+}
+
+/*
+ * Decode READLINK reply
+ */
+static int
+nfs3_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct kvec *iov = rcvbuf->head;
+ int hdrlen, len, recvd;
+ char *kaddr;
+ int status;
+
+ status = ntohl(*p++);
+ p = xdr_decode_post_op_attr(p, fattr);
+
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ /* Convert length of symlink */
+ len = ntohl(*p++);
+ if (len >= rcvbuf->page_len || len <= 0) {
+ dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ return -ENAMETOOLONG;
+ }
+
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READLINK reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
+ xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
+ }
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (recvd < len) {
+ printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ "count %u > recvd %u\n", len, recvd);
+ return -EIO;
+ }
+
+ /* NULL terminate the string we got */
+ kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+ kaddr[len+rcvbuf->page_base] = '\0';
+ kunmap_atomic(kaddr, KM_USER0);
+ return 0;
+}
+
+/*
+ * Decode READ reply
+ */
+static int
+nfs3_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
+{
+ struct kvec *iov = req->rq_rcv_buf.head;
+ int status, count, ocount, recvd, hdrlen;
+
+ status = ntohl(*p++);
+ p = xdr_decode_post_op_attr(p, res->fattr);
+
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ /* Decode reply could and EOF flag. NFSv3 is somewhat redundant
+ * in that it puts the count both in the res struct and in the
+ * opaque data count. */
+ count = ntohl(*p++);
+ res->eof = ntohl(*p++);
+ ocount = ntohl(*p++);
+
+ if (ocount != count) {
+ printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n");
+ return -errno_NFSERR_IO;
+ }
+
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ if (iov->iov_len < hdrlen) {
+ printk(KERN_WARNING "NFS: READ reply header overflowed:"
+ "length %d > %Zu\n", hdrlen, iov->iov_len);
+ return -errno_NFSERR_IO;
+ } else if (iov->iov_len != hdrlen) {
+ dprintk("NFS: READ header is short. iovec will be shifted.\n");
+ xdr_shift_buf(&req->rq_rcv_buf, iov->iov_len - hdrlen);
+ }
+
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (count > recvd) {
+ printk(KERN_WARNING "NFS: server cheating in read reply: "
+ "count %d > recvd %d\n", count, recvd);
+ count = recvd;
+ res->eof = 0;
+ }
+
+ if (count < res->count)
+ res->count = count;
+
+ return count;
+}
+
+/*
+ * Decode WRITE response
+ */
+static int
+nfs3_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+ p = xdr_decode_wcc_data(p, res->fattr);
+
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ res->count = ntohl(*p++);
+ res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
+ res->verf->verifier[0] = *p++;
+ res->verf->verifier[1] = *p++;
+
+ return res->count;
+}
+
+/*
+ * Decode a CREATE response
+ */
+static int
+nfs3_xdr_createres(struct rpc_rqst *req, u32 *p, struct nfs3_diropres *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+ if (status == 0) {
+ if (*p++) {
+ if (!(p = xdr_decode_fhandle(p, res->fh)))
+ return -errno_NFSERR_IO;
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ } else {
+ memset(res->fh, 0, sizeof(*res->fh));
+ /* Do decode post_op_attr but set it to NULL */
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ res->fattr->valid = 0;
+ }
+ } else {
+ status = -nfs_stat_to_errno(status);
+ }
+ p = xdr_decode_wcc_data(p, res->dir_attr);
+ return status;
+}
+
+/*
+ * Decode RENAME reply
+ */
+static int
+nfs3_xdr_renameres(struct rpc_rqst *req, u32 *p, struct nfs3_renameres *res)
+{
+ int status;
+
+ if ((status = ntohl(*p++)) != 0)
+ status = -nfs_stat_to_errno(status);
+ p = xdr_decode_wcc_data(p, res->fromattr);
+ p = xdr_decode_wcc_data(p, res->toattr);
+ return status;
+}
+
+/*
+ * Decode LINK reply
+ */
+static int
+nfs3_xdr_linkres(struct rpc_rqst *req, u32 *p, struct nfs3_linkres *res)
+{
+ int status;
+
+ if ((status = ntohl(*p++)) != 0)
+ status = -nfs_stat_to_errno(status);
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ p = xdr_decode_wcc_data(p, res->dir_attr);
+ return status;
+}
+
+/*
+ * Decode FSSTAT reply
+ */
+static int
+nfs3_xdr_fsstatres(struct rpc_rqst *req, u32 *p, struct nfs_fsstat *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ p = xdr_decode_hyper(p, &res->tbytes);
+ p = xdr_decode_hyper(p, &res->fbytes);
+ p = xdr_decode_hyper(p, &res->abytes);
+ p = xdr_decode_hyper(p, &res->tfiles);
+ p = xdr_decode_hyper(p, &res->ffiles);
+ p = xdr_decode_hyper(p, &res->afiles);
+
+ /* ignore invarsec */
+ return 0;
+}
+
+/*
+ * Decode FSINFO reply
+ */
+static int
+nfs3_xdr_fsinfores(struct rpc_rqst *req, u32 *p, struct nfs_fsinfo *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ res->rtmax = ntohl(*p++);
+ res->rtpref = ntohl(*p++);
+ res->rtmult = ntohl(*p++);
+ res->wtmax = ntohl(*p++);
+ res->wtpref = ntohl(*p++);
+ res->wtmult = ntohl(*p++);
+ res->dtpref = ntohl(*p++);
+ p = xdr_decode_hyper(p, &res->maxfilesize);
+
+ /* ignore time_delta and properties */
+ res->lease_time = 0;
+ return 0;
+}
+
+/*
+ * Decode PATHCONF reply
+ */
+static int
+nfs3_xdr_pathconfres(struct rpc_rqst *req, u32 *p, struct nfs_pathconf *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+
+ p = xdr_decode_post_op_attr(p, res->fattr);
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+ res->max_link = ntohl(*p++);
+ res->max_namelen = ntohl(*p++);
+
+ /* ignore remaining fields */
+ return 0;
+}
+
+/*
+ * Decode COMMIT reply
+ */
+static int
+nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+ int status;
+
+ status = ntohl(*p++);
+ p = xdr_decode_wcc_data(p, res->fattr);
+ if (status != 0)
+ return -nfs_stat_to_errno(status);
+
+ res->verf->verifier[0] = *p++;
+ res->verf->verifier[1] = *p++;
+ return 0;
+}
+
+#ifndef MAX
+# define MAX(a, b) (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype, timer) \
+[NFS3PROC_##proc] = { \
+ .p_proc = NFS3PROC_##proc, \
+ .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
+ .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
+ .p_timer = timer \
+ }
+
+struct rpc_procinfo nfs3_procedures[] = {
+ PROC(GETATTR, fhandle, attrstat, 1),
+ PROC(SETATTR, sattrargs, wccstat, 0),
+ PROC(LOOKUP, diropargs, lookupres, 2),
+ PROC(ACCESS, accessargs, accessres, 1),
+ PROC(READLINK, readlinkargs, readlinkres, 3),
+ PROC(READ, readargs, readres, 3),
+ PROC(WRITE, writeargs, writeres, 4),
+ PROC(CREATE, createargs, createres, 0),
+ PROC(MKDIR, mkdirargs, createres, 0),
+ PROC(SYMLINK, symlinkargs, createres, 0),
+ PROC(MKNOD, mknodargs, createres, 0),
+ PROC(REMOVE, diropargs, wccstat, 0),
+ PROC(RMDIR, diropargs, wccstat, 0),
+ PROC(RENAME, renameargs, renameres, 0),
+ PROC(LINK, linkargs, linkres, 0),
+ PROC(READDIR, readdirargs, readdirres, 3),
+ PROC(READDIRPLUS, readdirargs, readdirres, 3),
+ PROC(FSSTAT, fhandle, fsstatres, 0),
+ PROC(FSINFO, fhandle, fsinfores, 0),
+ PROC(PATHCONF, fhandle, pathconfres, 0),
+ PROC(COMMIT, commitargs, commitres, 5),
+};
+
+struct rpc_version nfs_version3 = {
+ .number = 3,
+ .nrprocs = sizeof(nfs3_procedures)/sizeof(nfs3_procedures[0]),
+ .procs = nfs3_procedures
+};
+
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
new file mode 100644
index 000000000000..1d5cb3e80c3e
--- /dev/null
+++ b/fs/nfs/nfs4proc.c
@@ -0,0 +1,2786 @@
+/*
+ * fs/nfs/nfs4proc.c
+ *
+ * Client-side procedure declarations for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_PROC
+
+#define NFS4_POLL_RETRY_MIN (1*HZ)
+#define NFS4_POLL_RETRY_MAX (15*HZ)
+
+static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
+static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+extern struct rpc_procinfo nfs4_procedures[];
+
+extern nfs4_stateid zero_stateid;
+
+/* Prevent leaks of NFSv4 errors into userland */
+int nfs4_map_errors(int err)
+{
+ if (err < -1000) {
+ dprintk("%s could not handle NFSv4 error %d\n",
+ __FUNCTION__, -err);
+ return -EIO;
+ }
+ return err;
+}
+
+/*
+ * This is our standard bitmap for GETATTR requests.
+ */
+const u32 nfs4_fattr_bitmap[2] = {
+ FATTR4_WORD0_TYPE
+ | FATTR4_WORD0_CHANGE
+ | FATTR4_WORD0_SIZE
+ | FATTR4_WORD0_FSID
+ | FATTR4_WORD0_FILEID,
+ FATTR4_WORD1_MODE
+ | FATTR4_WORD1_NUMLINKS
+ | FATTR4_WORD1_OWNER
+ | FATTR4_WORD1_OWNER_GROUP
+ | FATTR4_WORD1_RAWDEV
+ | FATTR4_WORD1_SPACE_USED
+ | FATTR4_WORD1_TIME_ACCESS
+ | FATTR4_WORD1_TIME_METADATA
+ | FATTR4_WORD1_TIME_MODIFY
+};
+
+const u32 nfs4_statfs_bitmap[2] = {
+ FATTR4_WORD0_FILES_AVAIL
+ | FATTR4_WORD0_FILES_FREE
+ | FATTR4_WORD0_FILES_TOTAL,
+ FATTR4_WORD1_SPACE_AVAIL
+ | FATTR4_WORD1_SPACE_FREE
+ | FATTR4_WORD1_SPACE_TOTAL
+};
+
+u32 nfs4_pathconf_bitmap[2] = {
+ FATTR4_WORD0_MAXLINK
+ | FATTR4_WORD0_MAXNAME,
+ 0
+};
+
+const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
+ | FATTR4_WORD0_MAXREAD
+ | FATTR4_WORD0_MAXWRITE
+ | FATTR4_WORD0_LEASE_TIME,
+ 0
+};
+
+static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
+ struct nfs4_readdir_arg *readdir)
+{
+ u32 *start, *p;
+
+ BUG_ON(readdir->count < 80);
+ if (cookie > 2) {
+ readdir->cookie = (cookie > 2) ? cookie : 0;
+ memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
+ return;
+ }
+
+ readdir->cookie = 0;
+ memset(&readdir->verifier, 0, sizeof(readdir->verifier));
+ if (cookie == 2)
+ return;
+
+ /*
+ * NFSv4 servers do not return entries for '.' and '..'
+ * Therefore, we fake these entries here. We let '.'
+ * have cookie 0 and '..' have cookie 1. Note that
+ * when talking to the server, we always send cookie 0
+ * instead of 1 or 2.
+ */
+ start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
+
+ if (cookie == 0) {
+ *p++ = xdr_one; /* next */
+ *p++ = xdr_zero; /* cookie, first word */
+ *p++ = xdr_one; /* cookie, second word */
+ *p++ = xdr_one; /* entry len */
+ memcpy(p, ".\0\0\0", 4); /* entry */
+ p++;
+ *p++ = xdr_one; /* bitmap length */
+ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
+ *p++ = htonl(8); /* attribute buffer length */
+ p = xdr_encode_hyper(p, dentry->d_inode->i_ino);
+ }
+
+ *p++ = xdr_one; /* next */
+ *p++ = xdr_zero; /* cookie, first word */
+ *p++ = xdr_two; /* cookie, second word */
+ *p++ = xdr_two; /* entry len */
+ memcpy(p, "..\0\0", 4); /* entry */
+ p++;
+ *p++ = xdr_one; /* bitmap length */
+ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
+ *p++ = htonl(8); /* attribute buffer length */
+ p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino);
+
+ readdir->pgbase = (char *)p - (char *)start;
+ readdir->count -= readdir->pgbase;
+ kunmap_atomic(start, KM_USER0);
+}
+
+static void
+renew_lease(struct nfs_server *server, unsigned long timestamp)
+{
+ struct nfs4_client *clp = server->nfs4_state;
+ spin_lock(&clp->cl_lock);
+ if (time_before(clp->cl_last_renewal,timestamp))
+ clp->cl_last_renewal = timestamp;
+ spin_unlock(&clp->cl_lock);
+}
+
+static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (cinfo->before == nfsi->change_attr && cinfo->atomic)
+ nfsi->change_attr = cinfo->after;
+}
+
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+ struct inode *inode = state->inode;
+
+ open_flags &= (FMODE_READ|FMODE_WRITE);
+ /* Protect against nfs4_find_state() */
+ spin_lock(&inode->i_lock);
+ state->state |= open_flags;
+ /* NB! List reordering - see the reclaim code for why. */
+ if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
+ list_move(&state->open_states, &state->owner->so_states);
+ if (open_flags & FMODE_READ)
+ state->nreaders++;
+ memcpy(&state->stateid, stateid, sizeof(state->stateid));
+ spin_unlock(&inode->i_lock);
+}
+
+/*
+ * OPEN_RECLAIM:
+ * reclaim state on the server after a reboot.
+ * Assumes caller is holding the sp->so_sem
+ */
+static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ struct inode *inode = state->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+ struct nfs_openargs o_arg = {
+ .fh = NFS_FH(inode),
+ .seqid = sp->so_seqid,
+ .id = sp->so_id,
+ .open_flags = state->state,
+ .clientid = server->nfs4_state->cl_clientid,
+ .claim = NFS4_OPEN_CLAIM_PREVIOUS,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs_openres o_res = {
+ .server = server, /* Grrr */
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
+ .rpc_argp = &o_arg,
+ .rpc_resp = &o_res,
+ .rpc_cred = sp->so_cred,
+ };
+ int status;
+
+ if (delegation != NULL) {
+ if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+ memcpy(&state->stateid, &delegation->stateid,
+ sizeof(state->stateid));
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ return 0;
+ }
+ o_arg.u.delegation_type = delegation->type;
+ }
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ nfs4_increment_seqid(status, sp);
+ if (status == 0) {
+ memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
+ if (o_res.delegation_type != 0) {
+ nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
+ /* Did the server issue an immediate delegation recall? */
+ if (o_res.do_recall)
+ nfs_async_inode_return_delegation(inode, &o_res.stateid);
+ }
+ }
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ /* Ensure we update the inode attributes */
+ NFS_CACHEINV(inode);
+ return status;
+}
+
+static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = _nfs4_open_reclaim(sp, state);
+ switch (err) {
+ case 0:
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ return err;
+ }
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+{
+ struct nfs4_state_owner *sp = state->owner;
+ struct inode *inode = dentry->d_inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct dentry *parent = dget_parent(dentry);
+ struct nfs_openargs arg = {
+ .fh = NFS_FH(parent->d_inode),
+ .clientid = server->nfs4_state->cl_clientid,
+ .name = &dentry->d_name,
+ .id = sp->so_id,
+ .server = server,
+ .bitmask = server->attr_bitmask,
+ .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR,
+ };
+ struct nfs_openres res = {
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = sp->so_cred,
+ };
+ int status = 0;
+
+ down(&sp->so_sema);
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ goto out;
+ if (state->state == 0)
+ goto out;
+ arg.seqid = sp->so_seqid;
+ arg.open_flags = state->state;
+ memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ nfs4_increment_seqid(status, sp);
+ if (status >= 0) {
+ memcpy(state->stateid.data, res.stateid.data,
+ sizeof(state->stateid.data));
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ }
+out:
+ up(&sp->so_sema);
+ dput(parent);
+ return status;
+}
+
+int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+{
+ struct nfs4_exception exception = { };
+ struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ int err;
+ do {
+ err = _nfs4_open_delegation_recall(dentry, state);
+ switch (err) {
+ case 0:
+ return err;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ /* Don't recall a delegation if it was lost */
+ nfs4_schedule_state_recovery(server->nfs4_state);
+ return err;
+ }
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
+{
+ struct nfs_open_confirmargs arg = {
+ .fh = fh,
+ .seqid = sp->so_seqid,
+ .stateid = *stateid,
+ };
+ struct nfs_open_confirmres res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = sp->so_cred,
+ };
+ int status;
+
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
+ nfs4_increment_seqid(status, sp);
+ if (status >= 0)
+ memcpy(stateid, &res.stateid, sizeof(*stateid));
+ return status;
+}
+
+static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res)
+{
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+ .rpc_argp = o_arg,
+ .rpc_resp = o_res,
+ .rpc_cred = sp->so_cred,
+ };
+ int status;
+
+ /* Update sequence id. The caller must serialize! */
+ o_arg->seqid = sp->so_seqid;
+ o_arg->id = sp->so_id;
+ o_arg->clientid = sp->so_client->cl_clientid;
+
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ nfs4_increment_seqid(status, sp);
+ if (status != 0)
+ goto out;
+ update_changeattr(dir, &o_res->cinfo);
+ if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
+ status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
+ sp, &o_res->stateid);
+ if (status != 0)
+ goto out;
+ }
+ if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
+ status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+out:
+ return status;
+}
+
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
+{
+ struct nfs_access_entry cache;
+ int mask = 0;
+ int status;
+
+ if (openflags & FMODE_READ)
+ mask |= MAY_READ;
+ if (openflags & FMODE_WRITE)
+ mask |= MAY_WRITE;
+ status = nfs_access_get_cached(inode, cred, &cache);
+ if (status == 0)
+ goto out;
+
+ /* Be clever: ask server to check for all possible rights */
+ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
+ cache.cred = cred;
+ cache.jiffies = jiffies;
+ status = _nfs4_proc_access(inode, &cache);
+ if (status != 0)
+ return status;
+ nfs_access_add_cache(inode, &cache);
+out:
+ if ((cache.mask & mask) == mask)
+ return 0;
+ return -EACCES;
+}
+
+/*
+ * OPEN_EXPIRED:
+ * reclaim state on the server after a network partition.
+ * Assumes caller holds the appropriate lock
+ */
+static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+{
+ struct dentry *parent = dget_parent(dentry);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode = state->inode;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+ struct nfs_fattr f_attr = {
+ .valid = 0,
+ };
+ struct nfs_openargs o_arg = {
+ .fh = NFS_FH(dir),
+ .open_flags = state->state,
+ .name = &dentry->d_name,
+ .bitmask = server->attr_bitmask,
+ .claim = NFS4_OPEN_CLAIM_NULL,
+ };
+ struct nfs_openres o_res = {
+ .f_attr = &f_attr,
+ .server = server,
+ };
+ int status = 0;
+
+ if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+ status = _nfs4_do_access(inode, sp->so_cred, state->state);
+ if (status < 0)
+ goto out;
+ memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ goto out;
+ }
+ status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
+ if (status != 0)
+ goto out_nodeleg;
+ /* Check if files differ */
+ if ((f_attr.mode & S_IFMT) != (inode->i_mode & S_IFMT))
+ goto out_stale;
+ /* Has the file handle changed? */
+ if (nfs_compare_fh(&o_res.fh, NFS_FH(inode)) != 0) {
+ /* Verify if the change attributes are the same */
+ if (f_attr.change_attr != NFS_I(inode)->change_attr)
+ goto out_stale;
+ if (nfs_size_to_loff_t(f_attr.size) != inode->i_size)
+ goto out_stale;
+ /* Lets just pretend that this is the same file */
+ nfs_copy_fh(NFS_FH(inode), &o_res.fh);
+ NFS_I(inode)->fileid = f_attr.fileid;
+ }
+ memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
+ if (o_res.delegation_type != 0) {
+ if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM))
+ nfs_inode_set_delegation(inode, sp->so_cred, &o_res);
+ else
+ nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
+ }
+out_nodeleg:
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+out:
+ dput(parent);
+ return status;
+out_stale:
+ status = -ESTALE;
+ /* Invalidate the state owner so we don't ever use it again */
+ nfs4_drop_state_owner(sp);
+ d_drop(dentry);
+ /* Should we be trying to close that stateid? */
+ goto out_nodeleg;
+}
+
+static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ struct nfs_inode *nfsi = NFS_I(state->inode);
+ struct nfs_open_context *ctx;
+ int status;
+
+ spin_lock(&state->inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ if (ctx->state != state)
+ continue;
+ get_nfs_open_context(ctx);
+ spin_unlock(&state->inode->i_lock);
+ status = _nfs4_open_expired(sp, state, ctx->dentry);
+ put_nfs_open_context(ctx);
+ return status;
+ }
+ spin_unlock(&state->inode->i_lock);
+ return -ENOENT;
+}
+
+/*
+ * Returns an nfs4_state + an extra reference to the inode
+ */
+static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs4_state_owner *sp = NULL;
+ struct nfs4_state *state = NULL;
+ int open_flags = flags & (FMODE_READ|FMODE_WRITE);
+ int err;
+
+ /* Protect against reboot recovery - NOTE ORDER! */
+ down_read(&clp->cl_sem);
+ /* Protect against delegation recall */
+ down_read(&nfsi->rwsem);
+ delegation = NFS_I(inode)->delegation;
+ err = -ENOENT;
+ if (delegation == NULL || (delegation->type & open_flags) != open_flags)
+ goto out_err;
+ err = -ENOMEM;
+ if (!(sp = nfs4_get_state_owner(server, cred))) {
+ dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
+ goto out_err;
+ }
+ down(&sp->so_sema);
+ state = nfs4_get_open_state(inode, sp);
+ if (state == NULL)
+ goto out_err;
+
+ err = -ENOENT;
+ if ((state->state & open_flags) == open_flags) {
+ spin_lock(&inode->i_lock);
+ if (open_flags & FMODE_READ)
+ state->nreaders++;
+ if (open_flags & FMODE_WRITE)
+ state->nwriters++;
+ spin_unlock(&inode->i_lock);
+ goto out_ok;
+ } else if (state->state != 0)
+ goto out_err;
+
+ lock_kernel();
+ err = _nfs4_do_access(inode, cred, open_flags);
+ unlock_kernel();
+ if (err != 0)
+ goto out_err;
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ update_open_stateid(state, &delegation->stateid, open_flags);
+out_ok:
+ up(&sp->so_sema);
+ nfs4_put_state_owner(sp);
+ up_read(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ igrab(inode);
+ *res = state;
+ return 0;
+out_err:
+ if (sp != NULL) {
+ if (state != NULL)
+ nfs4_put_open_state(state);
+ up(&sp->so_sema);
+ nfs4_put_state_owner(sp);
+ }
+ up_read(&nfsi->rwsem);
+ up_read(&clp->cl_sem);
+ return err;
+}
+
+static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+{
+ struct nfs4_exception exception = { };
+ struct nfs4_state *res;
+ int err;
+
+ do {
+ err = _nfs4_open_delegated(inode, flags, cred, &res);
+ if (err == 0)
+ break;
+ res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
+ err, &exception));
+ } while (exception.retry);
+ return res;
+}
+
+/*
+ * Returns an nfs4_state + an referenced inode
+ */
+static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+{
+ struct nfs4_state_owner *sp;
+ struct nfs4_state *state = NULL;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs4_client *clp = server->nfs4_state;
+ struct inode *inode = NULL;
+ int status;
+ struct nfs_fattr f_attr = {
+ .valid = 0,
+ };
+ struct nfs_openargs o_arg = {
+ .fh = NFS_FH(dir),
+ .open_flags = flags,
+ .name = &dentry->d_name,
+ .server = server,
+ .bitmask = server->attr_bitmask,
+ .claim = NFS4_OPEN_CLAIM_NULL,
+ };
+ struct nfs_openres o_res = {
+ .f_attr = &f_attr,
+ .server = server,
+ };
+
+ /* Protect against reboot recovery conflicts */
+ down_read(&clp->cl_sem);
+ status = -ENOMEM;
+ if (!(sp = nfs4_get_state_owner(server, cred))) {
+ dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ goto out_err;
+ }
+ if (flags & O_EXCL) {
+ u32 *p = (u32 *) o_arg.u.verifier.data;
+ p[0] = jiffies;
+ p[1] = current->pid;
+ } else
+ o_arg.u.attrs = sattr;
+ /* Serialization for the sequence id */
+ down(&sp->so_sema);
+
+ status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
+ if (status != 0)
+ goto out_err;
+
+ status = -ENOMEM;
+ inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr);
+ if (!inode)
+ goto out_err;
+ state = nfs4_get_open_state(inode, sp);
+ if (!state)
+ goto out_err;
+ update_open_stateid(state, &o_res.stateid, flags);
+ if (o_res.delegation_type != 0)
+ nfs_inode_set_delegation(inode, cred, &o_res);
+ up(&sp->so_sema);
+ nfs4_put_state_owner(sp);
+ up_read(&clp->cl_sem);
+ *res = state;
+ return 0;
+out_err:
+ if (sp != NULL) {
+ if (state != NULL)
+ nfs4_put_open_state(state);
+ up(&sp->so_sema);
+ nfs4_put_state_owner(sp);
+ }
+ /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
+ up_read(&clp->cl_sem);
+ if (inode != NULL)
+ iput(inode);
+ *res = NULL;
+ return status;
+}
+
+
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+{
+ struct nfs4_exception exception = { };
+ struct nfs4_state *res;
+ int status;
+
+ do {
+ status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+ if (status == 0)
+ break;
+ /* NOTE: BAD_SEQID means the server and client disagree about the
+ * book-keeping w.r.t. state-changing operations
+ * (OPEN/CLOSE/LOCK/LOCKU...)
+ * It is actually a sign of a bug on the client or on the server.
+ *
+ * If we receive a BAD_SEQID error in the particular case of
+ * doing an OPEN, we assume that nfs4_increment_seqid() will
+ * have unhashed the old state_owner for us, and that we can
+ * therefore safely retry using a new one. We should still warn
+ * the user though...
+ */
+ if (status == -NFS4ERR_BAD_SEQID) {
+ printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+ exception.retry = 1;
+ continue;
+ }
+ res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ status, &exception));
+ } while (exception.retry);
+ return res;
+}
+
+static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+ struct nfs_fh *fhandle, struct iattr *sattr,
+ struct nfs4_state *state)
+{
+ struct nfs_setattrargs arg = {
+ .fh = fhandle,
+ .iap = sattr,
+ .server = server,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs_setattrres res = {
+ .fattr = fattr,
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+
+ fattr->valid = 0;
+
+ if (state != NULL)
+ msg.rpc_cred = state->owner->so_cred;
+ if (sattr->ia_valid & ATTR_SIZE)
+ nfs4_copy_stateid(&arg.stateid, state, NULL);
+ else
+ memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
+ struct nfs_fh *fhandle, struct iattr *sattr,
+ struct nfs4_state *state)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_do_setattr(server, fattr, fhandle, sattr,
+ state),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+struct nfs4_closedata {
+ struct inode *inode;
+ struct nfs4_state *state;
+ struct nfs_closeargs arg;
+ struct nfs_closeres res;
+};
+
+static void nfs4_close_done(struct rpc_task *task)
+{
+ struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+ struct nfs4_state *state = calldata->state;
+ struct nfs4_state_owner *sp = state->owner;
+ struct nfs_server *server = NFS_SERVER(calldata->inode);
+
+ /* hmm. we are done with the inode, and in the process of freeing
+ * the state_owner. we keep this around to process errors
+ */
+ nfs4_increment_seqid(task->tk_status, sp);
+ switch (task->tk_status) {
+ case 0:
+ memcpy(&state->stateid, &calldata->res.stateid,
+ sizeof(state->stateid));
+ break;
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ state->state = calldata->arg.open_flags;
+ nfs4_schedule_state_recovery(server->nfs4_state);
+ break;
+ default:
+ if (nfs4_async_handle_error(task, server) == -EAGAIN) {
+ rpc_restart_call(task);
+ return;
+ }
+ }
+ state->state = calldata->arg.open_flags;
+ nfs4_put_open_state(state);
+ up(&sp->so_sema);
+ nfs4_put_state_owner(sp);
+ up_read(&server->nfs4_state->cl_sem);
+ kfree(calldata);
+}
+
+static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+ .rpc_argp = &calldata->arg,
+ .rpc_resp = &calldata->res,
+ .rpc_cred = calldata->state->owner->so_cred,
+ };
+ if (calldata->arg.open_flags != 0)
+ msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+ return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+}
+
+/*
+ * It is possible for data to be read/written from a mem-mapped file
+ * after the sys_close call (which hits the vfs layer as a flush).
+ * This means that we can't safely call nfsv4 close on a file until
+ * the inode is cleared. This in turn means that we are not good
+ * NFSv4 citizens - we do not indicate to the server to update the file's
+ * share state even when we are done with one of the three share
+ * stateid's in the inode.
+ *
+ * NOTE: Caller must be holding the sp->so_owner semaphore!
+ */
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
+{
+ struct nfs4_closedata *calldata;
+ int status;
+
+ /* Tell caller we're done */
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ state->state = mode;
+ return 0;
+ }
+ calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+ if (calldata == NULL)
+ return -ENOMEM;
+ calldata->inode = inode;
+ calldata->state = state;
+ calldata->arg.fh = NFS_FH(inode);
+ /* Serialization for the sequence id */
+ calldata->arg.seqid = state->owner->so_seqid;
+ calldata->arg.open_flags = mode;
+ memcpy(&calldata->arg.stateid, &state->stateid,
+ sizeof(calldata->arg.stateid));
+ status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
+ /*
+ * Return -EINPROGRESS on success in order to indicate to the
+ * caller that an asynchronous RPC call has been launched, and
+ * that it will release the semaphores on completion.
+ */
+ return (status == 0) ? -EINPROGRESS : status;
+}
+
+struct inode *
+nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+{
+ struct iattr attr;
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+
+ if (nd->flags & LOOKUP_CREATE) {
+ attr.ia_mode = nd->intent.open.create_mode;
+ attr.ia_valid = ATTR_MODE;
+ if (!IS_POSIXACL(dir))
+ attr.ia_mode &= ~current->fs->umask;
+ } else {
+ attr.ia_valid = 0;
+ BUG_ON(nd->intent.open.flags & O_CREAT);
+ }
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ if (IS_ERR(cred))
+ return (struct inode *)cred;
+ state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return (struct inode *)state;
+ return state->inode;
+}
+
+int
+nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
+{
+ struct rpc_cred *cred;
+ struct nfs4_state *state;
+ struct inode *inode;
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+ state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+ if (IS_ERR(state))
+ state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+ put_rpccred(cred);
+ if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
+ return 1;
+ if (IS_ERR(state))
+ return 0;
+ inode = state->inode;
+ if (inode == dentry->d_inode) {
+ iput(inode);
+ return 1;
+ }
+ d_drop(dentry);
+ nfs4_close_state(state, openflags);
+ iput(inode);
+ return 0;
+}
+
+
+static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+{
+ struct nfs4_server_caps_res res = {};
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
+ .rpc_argp = fhandle,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ status = rpc_call_sync(server->client, &msg, 0);
+ if (status == 0) {
+ memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
+ if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
+ server->caps |= NFS_CAP_ACLS;
+ if (res.has_links != 0)
+ server->caps |= NFS_CAP_HARDLINKS;
+ if (res.has_symlinks != 0)
+ server->caps |= NFS_CAP_SYMLINKS;
+ server->acl_bitmask = res.acl_bitmask;
+ }
+ return status;
+}
+
+static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_server_capabilities(server, fhandle),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ struct nfs_fattr * fattr = info->fattr;
+ struct nfs4_lookup_root_arg args = {
+ .bitmask = nfs4_fattr_bitmap,
+ };
+ struct nfs4_lookup_res res = {
+ .server = server,
+ .fattr = fattr,
+ .fh = fhandle,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ fattr->valid = 0;
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_lookup_root(server, fhandle, info),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ struct nfs_fattr * fattr = info->fattr;
+ unsigned char * p;
+ struct qstr q;
+ struct nfs4_lookup_arg args = {
+ .dir_fh = fhandle,
+ .name = &q,
+ .bitmask = nfs4_fattr_bitmap,
+ };
+ struct nfs4_lookup_res res = {
+ .server = server,
+ .fattr = fattr,
+ .fh = fhandle,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ /*
+ * Now we do a separate LOOKUP for each component of the mount path.
+ * The LOOKUPs are done separately so that we can conveniently
+ * catch an ERR_WRONGSEC if it occurs along the way...
+ */
+ status = nfs4_lookup_root(server, fhandle, info);
+ if (status)
+ goto out;
+
+ p = server->mnt_path;
+ for (;;) {
+ struct nfs4_exception exception = { };
+
+ while (*p == '/')
+ p++;
+ if (!*p)
+ break;
+ q.name = p;
+ while (*p && (*p != '/'))
+ p++;
+ q.len = p - q.name;
+
+ do {
+ fattr->valid = 0;
+ status = nfs4_handle_exception(server,
+ rpc_call_sync(server->client, &msg, 0),
+ &exception);
+ } while (exception.retry);
+ if (status == 0)
+ continue;
+ if (status == -ENOENT) {
+ printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
+ printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
+ }
+ break;
+ }
+ if (status == 0)
+ status = nfs4_server_capabilities(server, fhandle);
+ if (status == 0)
+ status = nfs4_do_fsinfo(server, fhandle, info);
+out:
+ return status;
+}
+
+static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ struct nfs4_getattr_arg args = {
+ .fh = fhandle,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs4_getattr_res res = {
+ .fattr = fattr,
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+
+ fattr->valid = 0;
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_proc_getattr(server, fhandle, fattr),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+/*
+ * The file is not closed if it is opened due to the a request to change
+ * the size of the file. The open call will not be needed once the
+ * VFS layer lookup-intents are implemented.
+ *
+ * Close is called when the inode is destroyed.
+ * If we haven't opened the file for O_WRONLY, we
+ * need to in the size_change case to obtain a stateid.
+ *
+ * Got race?
+ * Because OPEN is always done by name in nfsv4, it is
+ * possible that we opened a different file by the same
+ * name. We can recognize this race condition, but we
+ * can't do anything about it besides returning an error.
+ *
+ * This will be fixed with VFS changes (lookup-intent).
+ */
+static int
+nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+ struct iattr *sattr)
+{
+ struct inode * inode = dentry->d_inode;
+ int size_change = sattr->ia_valid & ATTR_SIZE;
+ struct nfs4_state *state = NULL;
+ int need_iput = 0;
+ int status;
+
+ fattr->valid = 0;
+
+ if (size_change) {
+ struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+ state = nfs4_find_state(inode, cred, FMODE_WRITE);
+ if (state == NULL) {
+ state = nfs4_open_delegated(dentry->d_inode,
+ FMODE_WRITE, cred);
+ if (IS_ERR(state))
+ state = nfs4_do_open(dentry->d_parent->d_inode,
+ dentry, FMODE_WRITE,
+ NULL, cred);
+ need_iput = 1;
+ }
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return PTR_ERR(state);
+
+ if (state->inode != inode) {
+ printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
+ status = -EIO;
+ goto out;
+ }
+ }
+ status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
+ NFS_FH(inode), sattr, state);
+out:
+ if (state) {
+ inode = state->inode;
+ nfs4_close_state(state, FMODE_WRITE);
+ if (need_iput)
+ iput(inode);
+ }
+ return status;
+}
+
+static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int status;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs4_lookup_arg args = {
+ .bitmask = server->attr_bitmask,
+ .dir_fh = NFS_FH(dir),
+ .name = name,
+ };
+ struct nfs4_lookup_res res = {
+ .server = server,
+ .fattr = fattr,
+ .fh = fhandle,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+
+ fattr->valid = 0;
+
+ dprintk("NFS call lookup %s\n", name->name);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ dprintk("NFS reply lookup: %d\n", status);
+ return status;
+}
+
+static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_lookup(dir, name, fhandle, fattr),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+ struct nfs4_accessargs args = {
+ .fh = NFS_FH(inode),
+ };
+ struct nfs4_accessres res = { 0 };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = entry->cred,
+ };
+ int mode = entry->mask;
+ int status;
+
+ /*
+ * Determine which access bits we want to ask for...
+ */
+ if (mode & MAY_READ)
+ args.access |= NFS4_ACCESS_READ;
+ if (S_ISDIR(inode->i_mode)) {
+ if (mode & MAY_WRITE)
+ args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE;
+ if (mode & MAY_EXEC)
+ args.access |= NFS4_ACCESS_LOOKUP;
+ } else {
+ if (mode & MAY_WRITE)
+ args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND;
+ if (mode & MAY_EXEC)
+ args.access |= NFS4_ACCESS_EXECUTE;
+ }
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ if (!status) {
+ entry->mask = 0;
+ if (res.access & NFS4_ACCESS_READ)
+ entry->mask |= MAY_READ;
+ if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
+ entry->mask |= MAY_WRITE;
+ if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+ entry->mask |= MAY_EXEC;
+ }
+ return status;
+}
+
+static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(inode),
+ _nfs4_proc_access(inode, entry),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+/*
+ * TODO: For the time being, we don't try to get any attributes
+ * along with any of the zero-copy operations READ, READDIR,
+ * READLINK, WRITE.
+ *
+ * In the case of the first three, we want to put the GETATTR
+ * after the read-type operation -- this is because it is hard
+ * to predict the length of a GETATTR response in v4, and thus
+ * align the READ data correctly. This means that the GETATTR
+ * may end up partially falling into the page cache, and we should
+ * shift it into the 'tail' of the xdr_buf before processing.
+ * To do this efficiently, we need to know the total length
+ * of data received, which doesn't seem to be available outside
+ * of the RPC layer.
+ *
+ * In the case of WRITE, we also want to put the GETATTR after
+ * the operation -- in this case because we want to make sure
+ * we get the post-operation mtime and size. This means that
+ * we can't use xdr_encode_pages() as written: we need a variant
+ * of it which would leave room in the 'tail' iovec.
+ *
+ * Both of these changes to the XDR layer would in fact be quite
+ * minor, but I decided to leave them for a subsequent patch.
+ */
+static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
+ unsigned int pgbase, unsigned int pglen)
+{
+ struct nfs4_readlink args = {
+ .fh = NFS_FH(inode),
+ .pgbase = pgbase,
+ .pglen = pglen,
+ .pages = &page,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
+ .rpc_argp = &args,
+ .rpc_resp = NULL,
+ };
+
+ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+}
+
+static int nfs4_proc_readlink(struct inode *inode, struct page *page,
+ unsigned int pgbase, unsigned int pglen)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(inode),
+ _nfs4_proc_readlink(inode, page, pgbase, pglen),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_read(struct nfs_read_data *rdata)
+{
+ int flags = rdata->flags;
+ struct inode *inode = rdata->inode;
+ struct nfs_fattr *fattr = rdata->res.fattr;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
+ .rpc_argp = &rdata->args,
+ .rpc_resp = &rdata->res,
+ .rpc_cred = rdata->cred,
+ };
+ unsigned long timestamp = jiffies;
+ int status;
+
+ dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
+ (long long) rdata->args.offset);
+
+ fattr->valid = 0;
+ status = rpc_call_sync(server->client, &msg, flags);
+ if (!status)
+ renew_lease(server, timestamp);
+ dprintk("NFS reply read: %d\n", status);
+ return status;
+}
+
+static int nfs4_proc_read(struct nfs_read_data *rdata)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(rdata->inode),
+ _nfs4_proc_read(rdata),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_write(struct nfs_write_data *wdata)
+{
+ int rpcflags = wdata->flags;
+ struct inode *inode = wdata->inode;
+ struct nfs_fattr *fattr = wdata->res.fattr;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
+ .rpc_argp = &wdata->args,
+ .rpc_resp = &wdata->res,
+ .rpc_cred = wdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
+ (long long) wdata->args.offset);
+
+ fattr->valid = 0;
+ status = rpc_call_sync(server->client, &msg, rpcflags);
+ dprintk("NFS reply write: %d\n", status);
+ return status;
+}
+
+static int nfs4_proc_write(struct nfs_write_data *wdata)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
+ _nfs4_proc_write(wdata),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_commit(struct nfs_write_data *cdata)
+{
+ struct inode *inode = cdata->inode;
+ struct nfs_fattr *fattr = cdata->res.fattr;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
+ .rpc_argp = &cdata->args,
+ .rpc_resp = &cdata->res,
+ .rpc_cred = cdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call commit %d @ %Ld\n", cdata->args.count,
+ (long long) cdata->args.offset);
+
+ fattr->valid = 0;
+ status = rpc_call_sync(server->client, &msg, 0);
+ dprintk("NFS reply commit: %d\n", status);
+ return status;
+}
+
+static int nfs4_proc_commit(struct nfs_write_data *cdata)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
+ _nfs4_proc_commit(cdata),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+/*
+ * Got race?
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this create/open method is prone to inefficiency and race
+ * conditions due to the lookup, create, and open VFS calls from sys_open()
+ * placed on the wire.
+ *
+ * Given the above sorry state of affairs, I'm simply sending an OPEN.
+ * The file will be opened again in the subsequent VFS open call
+ * (nfs4_proc_file_open).
+ *
+ * The open for read will just hang around to be used by any process that
+ * opens the file O_RDONLY. This will all be resolved with the VFS changes.
+ */
+
+static int
+nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags)
+{
+ struct nfs4_state *state;
+ struct rpc_cred *cred;
+ int status = 0;
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+ if (IS_ERR(cred)) {
+ status = PTR_ERR(cred);
+ goto out;
+ }
+ state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state)) {
+ status = PTR_ERR(state);
+ goto out;
+ }
+ d_instantiate(dentry, state->inode);
+ if (flags & O_EXCL) {
+ struct nfs_fattr fattr;
+ status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
+ NFS_FH(state->inode), sattr, state);
+ if (status == 0)
+ goto out;
+ } else if (flags != 0)
+ goto out;
+ nfs4_close_state(state, flags);
+out:
+ return status;
+}
+
+static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+ struct nfs4_remove_arg args = {
+ .fh = NFS_FH(dir),
+ .name = name,
+ };
+ struct nfs4_change_info res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status == 0)
+ update_changeattr(dir, &res);
+ return status;
+}
+
+static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_remove(dir, name),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+struct unlink_desc {
+ struct nfs4_remove_arg args;
+ struct nfs4_change_info res;
+};
+
+static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
+ struct qstr *name)
+{
+ struct unlink_desc *up;
+
+ up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
+ if (!up)
+ return -ENOMEM;
+
+ up->args.fh = NFS_FH(dir->d_inode);
+ up->args.name = name;
+
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
+ msg->rpc_argp = &up->args;
+ msg->rpc_resp = &up->res;
+ return 0;
+}
+
+static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+ struct rpc_message *msg = &task->tk_msg;
+ struct unlink_desc *up;
+
+ if (msg->rpc_resp != NULL) {
+ up = container_of(msg->rpc_resp, struct unlink_desc, res);
+ update_changeattr(dir->d_inode, &up->res);
+ kfree(up);
+ msg->rpc_resp = NULL;
+ msg->rpc_argp = NULL;
+ }
+ return 0;
+}
+
+static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ struct inode *new_dir, struct qstr *new_name)
+{
+ struct nfs4_rename_arg arg = {
+ .old_dir = NFS_FH(old_dir),
+ .new_dir = NFS_FH(new_dir),
+ .old_name = old_name,
+ .new_name = new_name,
+ };
+ struct nfs4_rename_res res = { };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+
+ if (!status) {
+ update_changeattr(old_dir, &res.old_cinfo);
+ update_changeattr(new_dir, &res.new_cinfo);
+ }
+ return status;
+}
+
+static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ struct inode *new_dir, struct qstr *new_name)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(old_dir),
+ _nfs4_proc_rename(old_dir, old_name,
+ new_dir, new_name),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+ struct nfs4_link_arg arg = {
+ .fh = NFS_FH(inode),
+ .dir_fh = NFS_FH(dir),
+ .name = name,
+ };
+ struct nfs4_change_info cinfo = { };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &cinfo,
+ };
+ int status;
+
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ if (!status)
+ update_changeattr(dir, &cinfo);
+
+ return status;
+}
+
+static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(inode),
+ _nfs4_proc_link(inode, dir, name),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
+ struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs4_create_arg arg = {
+ .dir_fh = NFS_FH(dir),
+ .server = server,
+ .name = name,
+ .attrs = sattr,
+ .ftype = NF4LNK,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs4_create_res res = {
+ .server = server,
+ .fh = fhandle,
+ .fattr = fattr,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ if (path->len > NFS4_MAXPATHLEN)
+ return -ENAMETOOLONG;
+ arg.u.symlink = path;
+ fattr->valid = 0;
+
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (!status)
+ update_changeattr(dir, &res.dir_cinfo);
+ return status;
+}
+
+static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
+ struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_symlink(dir, name, path, sattr,
+ fhandle, fattr),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr)
+{
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ struct nfs4_create_arg arg = {
+ .dir_fh = NFS_FH(dir),
+ .server = server,
+ .name = &dentry->d_name,
+ .attrs = sattr,
+ .ftype = NF4DIR,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs4_create_res res = {
+ .server = server,
+ .fh = &fhandle,
+ .fattr = &fattr,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ fattr.valid = 0;
+
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (!status) {
+ update_changeattr(dir, &res.dir_cinfo);
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ }
+ return status;
+}
+
+static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_mkdir(dir, dentry, sattr),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ u64 cookie, struct page *page, unsigned int count, int plus)
+{
+ struct inode *dir = dentry->d_inode;
+ struct nfs4_readdir_arg args = {
+ .fh = NFS_FH(dir),
+ .pages = &page,
+ .pgbase = 0,
+ .count = count,
+ .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+ };
+ struct nfs4_readdir_res res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ lock_kernel();
+ nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
+ res.pgbase = args.pgbase;
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status == 0)
+ memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+ unlock_kernel();
+ return status;
+}
+
+static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ u64 cookie, struct page *page, unsigned int count, int plus)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
+ _nfs4_proc_readdir(dentry, cred, cookie,
+ page, count, plus),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr, dev_t rdev)
+{
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_fh fh;
+ struct nfs_fattr fattr;
+ struct nfs4_create_arg arg = {
+ .dir_fh = NFS_FH(dir),
+ .server = server,
+ .name = &dentry->d_name,
+ .attrs = sattr,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs4_create_res res = {
+ .server = server,
+ .fh = &fh,
+ .fattr = &fattr,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int status;
+ int mode = sattr->ia_mode;
+
+ fattr.valid = 0;
+
+ BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+ BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+ if (S_ISFIFO(mode))
+ arg.ftype = NF4FIFO;
+ else if (S_ISBLK(mode)) {
+ arg.ftype = NF4BLK;
+ arg.u.device.specdata1 = MAJOR(rdev);
+ arg.u.device.specdata2 = MINOR(rdev);
+ }
+ else if (S_ISCHR(mode)) {
+ arg.ftype = NF4CHR;
+ arg.u.device.specdata1 = MAJOR(rdev);
+ arg.u.device.specdata2 = MINOR(rdev);
+ }
+ else
+ arg.ftype = NF4SOCK;
+
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ if (status == 0) {
+ update_changeattr(dir, &res.dir_cinfo);
+ status = nfs_instantiate(dentry, &fh, &fattr);
+ }
+ return status;
+}
+
+static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr, dev_t rdev)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_mknod(dir, dentry, sattr, rdev),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsstat *fsstat)
+{
+ struct nfs4_statfs_arg args = {
+ .fh = fhandle,
+ .bitmask = server->attr_bitmask,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
+ .rpc_argp = &args,
+ .rpc_resp = fsstat,
+ };
+
+ fsstat->fattr->valid = 0;
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_proc_statfs(server, fhandle, fsstat),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *fsinfo)
+{
+ struct nfs4_fsinfo_arg args = {
+ .fh = fhandle,
+ .bitmask = server->attr_bitmask,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO],
+ .rpc_argp = &args,
+ .rpc_resp = fsinfo,
+ };
+
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_do_fsinfo(server, fhandle, fsinfo),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
+{
+ fsinfo->fattr->valid = 0;
+ return nfs4_do_fsinfo(server, fhandle, fsinfo);
+}
+
+static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_pathconf *pathconf)
+{
+ struct nfs4_pathconf_arg args = {
+ .fh = fhandle,
+ .bitmask = server->attr_bitmask,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
+ .rpc_argp = &args,
+ .rpc_resp = pathconf,
+ };
+
+ /* None of the pathconf attributes are mandatory to implement */
+ if ((args.bitmask[0] & nfs4_pathconf_bitmap[0]) == 0) {
+ memset(pathconf, 0, sizeof(*pathconf));
+ return 0;
+ }
+
+ pathconf->fattr->valid = 0;
+ return rpc_call_sync(server->client, &msg, 0);
+}
+
+static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_pathconf *pathconf)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_proc_pathconf(server, fhandle, pathconf),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static void
+nfs4_read_done(struct rpc_task *task)
+{
+ struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+ struct inode *inode = data->inode;
+
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ rpc_restart_call(task);
+ return;
+ }
+ if (task->tk_status > 0)
+ renew_lease(NFS_SERVER(inode), data->timestamp);
+ /* Call back common NFS readpage processing */
+ nfs_readpage_result(task);
+}
+
+static void
+nfs4_proc_read_setup(struct nfs_read_data *data)
+{
+ struct rpc_task *task = &data->task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+ struct inode *inode = data->inode;
+ int flags;
+
+ data->timestamp = jiffies;
+
+ /* N.B. Do we need to test? Never called for swapfile inode */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_write_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+ struct inode *inode = data->inode;
+
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ rpc_restart_call(task);
+ return;
+ }
+ if (task->tk_status >= 0)
+ renew_lease(NFS_SERVER(inode), data->timestamp);
+ /* Call back common NFS writeback processing */
+ nfs_writeback_done(task);
+}
+
+static void
+nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+ struct inode *inode = data->inode;
+ int stable;
+ int flags;
+
+ if (how & FLUSH_STABLE) {
+ if (!NFS_I(inode)->ncommit)
+ stable = NFS_FILE_SYNC;
+ else
+ stable = NFS_DATA_SYNC;
+ } else
+ stable = NFS_UNSTABLE;
+ data->args.stable = stable;
+
+ data->timestamp = jiffies;
+
+ /* Set the initial flags for the task. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs4_commit_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+ struct inode *inode = data->inode;
+
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ rpc_restart_call(task);
+ return;
+ }
+ /* Call back common NFS writeback processing */
+ nfs_commit_done(task);
+}
+
+static void
+nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+ struct inode *inode = data->inode;
+ int flags;
+
+ /* Set the initial flags for the task. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+/*
+ * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
+ * standalone procedure for queueing an asynchronous RENEW.
+ */
+static void
+renew_done(struct rpc_task *task)
+{
+ struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+ unsigned long timestamp = (unsigned long)task->tk_calldata;
+
+ if (task->tk_status < 0) {
+ switch (task->tk_status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_CB_PATH_DOWN:
+ nfs4_schedule_state_recovery(clp);
+ }
+ return;
+ }
+ spin_lock(&clp->cl_lock);
+ if (time_before(clp->cl_last_renewal,timestamp))
+ clp->cl_last_renewal = timestamp;
+ spin_unlock(&clp->cl_lock);
+}
+
+int
+nfs4_proc_async_renew(struct nfs4_client *clp)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
+ .rpc_argp = clp,
+ .rpc_cred = clp->cl_cred,
+ };
+
+ return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+ renew_done, (void *)jiffies);
+}
+
+int
+nfs4_proc_renew(struct nfs4_client *clp)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
+ .rpc_argp = clp,
+ .rpc_cred = clp->cl_cred,
+ };
+ unsigned long now = jiffies;
+ int status;
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+ if (status < 0)
+ return status;
+ spin_lock(&clp->cl_lock);
+ if (time_before(clp->cl_last_renewal,now))
+ clp->cl_last_renewal = now;
+ spin_unlock(&clp->cl_lock);
+ return 0;
+}
+
+/*
+ * We will need to arrange for the VFS layer to provide an atomic open.
+ * Until then, this open method is prone to inefficiency and race conditions
+ * due to the lookup, potential create, and open VFS calls from sys_open()
+ * placed on the wire.
+ */
+static int
+nfs4_proc_file_open(struct inode *inode, struct file *filp)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state = NULL;
+ struct rpc_cred *cred;
+ int status = -ENOMEM;
+
+ dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
+ (int)dentry->d_parent->d_name.len,
+ dentry->d_parent->d_name.name,
+ (int)dentry->d_name.len, dentry->d_name.name);
+
+
+ /* Find our open stateid */
+ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+ ctx = alloc_nfs_open_context(dentry, cred);
+ put_rpccred(cred);
+ if (unlikely(ctx == NULL))
+ return -ENOMEM;
+ status = -EIO; /* ERACE actually */
+ state = nfs4_find_state(inode, cred, filp->f_mode);
+ if (unlikely(state == NULL))
+ goto no_state;
+ ctx->state = state;
+ nfs4_close_state(state, filp->f_mode);
+ ctx->mode = filp->f_mode;
+ nfs_file_set_open_context(filp, ctx);
+ put_nfs_open_context(ctx);
+ if (filp->f_mode & FMODE_WRITE)
+ nfs_begin_data_update(inode);
+ return 0;
+no_state:
+ printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
+ put_nfs_open_context(ctx);
+ return status;
+}
+
+/*
+ * Release our state
+ */
+static int
+nfs4_proc_file_release(struct inode *inode, struct file *filp)
+{
+ if (filp->f_mode & FMODE_WRITE)
+ nfs_end_data_update(inode);
+ nfs_file_clear_open_context(filp);
+ return 0;
+}
+
+static int
+nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+{
+ struct nfs4_client *clp = server->nfs4_state;
+
+ if (!clp || task->tk_status >= 0)
+ return 0;
+ switch(task->tk_status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
+ nfs4_schedule_state_recovery(clp);
+ if (test_bit(NFS4CLNT_OK, &clp->cl_state))
+ rpc_wake_up_task(task);
+ task->tk_status = 0;
+ return -EAGAIN;
+ case -NFS4ERR_GRACE:
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, NFS4_POLL_RETRY_MAX);
+ task->tk_status = 0;
+ return -EAGAIN;
+ case -NFS4ERR_OLD_STATEID:
+ task->tk_status = 0;
+ return -EAGAIN;
+ }
+ task->tk_status = nfs4_map_errors(task->tk_status);
+ return 0;
+}
+
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+{
+ DEFINE_WAIT(wait);
+ sigset_t oldset;
+ int interruptible, res = 0;
+
+ might_sleep();
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ interruptible = TASK_UNINTERRUPTIBLE;
+ if (clnt->cl_intr)
+ interruptible = TASK_INTERRUPTIBLE;
+ prepare_to_wait(&clp->cl_waitq, &wait, interruptible);
+ nfs4_schedule_state_recovery(clp);
+ if (clnt->cl_intr && signalled())
+ res = -ERESTARTSYS;
+ else if (!test_bit(NFS4CLNT_OK, &clp->cl_state))
+ schedule();
+ finish_wait(&clp->cl_waitq, &wait);
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return res;
+}
+
+static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
+{
+ sigset_t oldset;
+ int res = 0;
+
+ might_sleep();
+
+ if (*timeout <= 0)
+ *timeout = NFS4_POLL_RETRY_MIN;
+ if (*timeout > NFS4_POLL_RETRY_MAX)
+ *timeout = NFS4_POLL_RETRY_MAX;
+ rpc_clnt_sigmask(clnt, &oldset);
+ if (clnt->cl_intr) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(*timeout);
+ if (signalled())
+ res = -ERESTARTSYS;
+ } else {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(*timeout);
+ }
+ rpc_clnt_sigunmask(clnt, &oldset);
+ *timeout <<= 1;
+ return res;
+}
+
+/* This is the error handling routine for processes that are allowed
+ * to sleep.
+ */
+int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+{
+ struct nfs4_client *clp = server->nfs4_state;
+ int ret = errorcode;
+
+ exception->retry = 0;
+ switch(errorcode) {
+ case 0:
+ return 0;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ ret = nfs4_wait_clnt_recover(server->client, clp);
+ if (ret == 0)
+ exception->retry = 1;
+ break;
+ case -NFS4ERR_GRACE:
+ case -NFS4ERR_DELAY:
+ ret = nfs4_delay(server->client, &exception->timeout);
+ if (ret == 0)
+ exception->retry = 1;
+ break;
+ case -NFS4ERR_OLD_STATEID:
+ if (ret == 0)
+ exception->retry = 1;
+ }
+ /* We failed to handle the error */
+ return nfs4_map_errors(ret);
+}
+
+int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port)
+{
+ nfs4_verifier sc_verifier;
+ struct nfs4_setclientid setclientid = {
+ .sc_verifier = &sc_verifier,
+ .sc_prog = program,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
+ .rpc_argp = &setclientid,
+ .rpc_resp = clp,
+ .rpc_cred = clp->cl_cred,
+ };
+ u32 *p;
+ int loop = 0;
+ int status;
+
+ p = (u32*)sc_verifier.data;
+ *p++ = htonl((u32)clp->cl_boot_time.tv_sec);
+ *p = htonl((u32)clp->cl_boot_time.tv_nsec);
+
+ for(;;) {
+ setclientid.sc_name_len = scnprintf(setclientid.sc_name,
+ sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
+ clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+ clp->cl_cred->cr_ops->cr_name,
+ clp->cl_id_uniquifier);
+ setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
+ sizeof(setclientid.sc_netid), "tcp");
+ setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
+ sizeof(setclientid.sc_uaddr), "%s.%d.%d",
+ clp->cl_ipaddr, port >> 8, port & 255);
+
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+ if (status != -NFS4ERR_CLID_INUSE)
+ break;
+ if (signalled())
+ break;
+ if (loop++ & 1)
+ ssleep(clp->cl_lease_time + 1);
+ else
+ if (++clp->cl_id_uniquifier == 0)
+ break;
+ }
+ return status;
+}
+
+int
+nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
+{
+ struct nfs_fsinfo fsinfo;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
+ .rpc_argp = clp,
+ .rpc_resp = &fsinfo,
+ .rpc_cred = clp->cl_cred,
+ };
+ unsigned long now;
+ int status;
+
+ now = jiffies;
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+ if (status == 0) {
+ spin_lock(&clp->cl_lock);
+ clp->cl_lease_time = fsinfo.lease_time * HZ;
+ clp->cl_last_renewal = now;
+ spin_unlock(&clp->cl_lock);
+ }
+ return status;
+}
+
+static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+{
+ struct nfs4_delegreturnargs args = {
+ .fhandle = NFS_FH(inode),
+ .stateid = stateid,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
+ .rpc_argp = &args,
+ .rpc_cred = cred,
+ };
+
+ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+}
+
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = _nfs4_proc_delegreturn(inode, cred, stateid);
+ switch (err) {
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_state_recovery(server->nfs4_state);
+ case 0:
+ return 0;
+ }
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+ return err;
+}
+
+#define NFS4_LOCK_MINTIMEOUT (1 * HZ)
+#define NFS4_LOCK_MAXTIMEOUT (30 * HZ)
+
+/*
+ * sleep, with exponential backoff, and retry the LOCK operation.
+ */
+static unsigned long
+nfs4_set_lock_task_retry(unsigned long timeout)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(timeout);
+ timeout <<= 1;
+ if (timeout > NFS4_LOCK_MAXTIMEOUT)
+ return NFS4_LOCK_MAXTIMEOUT;
+ return timeout;
+}
+
+static inline int
+nfs4_lck_type(int cmd, struct file_lock *request)
+{
+ /* set lock type */
+ switch (request->fl_type) {
+ case F_RDLCK:
+ return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT;
+ case F_WRLCK:
+ return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
+ case F_UNLCK:
+ return NFS4_WRITE_LT;
+ }
+ BUG();
+ return 0;
+}
+
+static inline uint64_t
+nfs4_lck_length(struct file_lock *request)
+{
+ if (request->fl_end == OFFSET_MAX)
+ return ~(uint64_t)0;
+ return request->fl_end - request->fl_start + 1;
+}
+
+static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct inode *inode = state->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_lockargs arg = {
+ .fh = NFS_FH(inode),
+ .type = nfs4_lck_type(cmd, request),
+ .offset = request->fl_start,
+ .length = nfs4_lck_length(request),
+ };
+ struct nfs_lockres res = {
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = state->owner->so_cred,
+ };
+ struct nfs_lowner nlo;
+ struct nfs4_lock_state *lsp;
+ int status;
+
+ down_read(&clp->cl_sem);
+ nlo.clientid = clp->cl_clientid;
+ down(&state->lock_sema);
+ lsp = nfs4_find_lock_state(state, request->fl_owner);
+ if (lsp)
+ nlo.id = lsp->ls_id;
+ else {
+ spin_lock(&clp->cl_lock);
+ nlo.id = nfs4_alloc_lockowner_id(clp);
+ spin_unlock(&clp->cl_lock);
+ }
+ arg.u.lockt = &nlo;
+ status = rpc_call_sync(server->client, &msg, 0);
+ if (!status) {
+ request->fl_type = F_UNLCK;
+ } else if (status == -NFS4ERR_DENIED) {
+ int64_t len, start, end;
+ start = res.u.denied.offset;
+ len = res.u.denied.length;
+ end = start + len - 1;
+ if (end < 0 || len == 0)
+ request->fl_end = OFFSET_MAX;
+ else
+ request->fl_end = (loff_t)end;
+ request->fl_start = (loff_t)start;
+ request->fl_type = F_WRLCK;
+ if (res.u.denied.type & 1)
+ request->fl_type = F_RDLCK;
+ request->fl_pid = 0;
+ status = 0;
+ }
+ if (lsp)
+ nfs4_put_lock_state(lsp);
+ up(&state->lock_sema);
+ up_read(&clp->cl_sem);
+ return status;
+}
+
+static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(state->inode),
+ _nfs4_proc_getlk(state, cmd, request),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int do_vfs_lock(struct file *file, struct file_lock *fl)
+{
+ int res = 0;
+ switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+ case FL_POSIX:
+ res = posix_lock_file_wait(file, fl);
+ break;
+ case FL_FLOCK:
+ res = flock_lock_file_wait(file, fl);
+ break;
+ default:
+ BUG();
+ }
+ if (res < 0)
+ printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
+ __FUNCTION__);
+ return res;
+}
+
+static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct inode *inode = state->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_client *clp = server->nfs4_state;
+ struct nfs_lockargs arg = {
+ .fh = NFS_FH(inode),
+ .type = nfs4_lck_type(cmd, request),
+ .offset = request->fl_start,
+ .length = nfs4_lck_length(request),
+ };
+ struct nfs_lockres res = {
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = state->owner->so_cred,
+ };
+ struct nfs4_lock_state *lsp;
+ struct nfs_locku_opargs luargs;
+ int status = 0;
+
+ down_read(&clp->cl_sem);
+ down(&state->lock_sema);
+ lsp = nfs4_find_lock_state(state, request->fl_owner);
+ if (!lsp)
+ goto out;
+ /* We might have lost the locks! */
+ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
+ luargs.seqid = lsp->ls_seqid;
+ memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+ arg.u.locku = &luargs;
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ nfs4_increment_lock_seqid(status, lsp);
+ }
+
+ if (status == 0) {
+ memcpy(&lsp->ls_stateid, &res.u.stateid,
+ sizeof(lsp->ls_stateid));
+ nfs4_notify_unlck(state, request, lsp);
+ }
+ nfs4_put_lock_state(lsp);
+out:
+ up(&state->lock_sema);
+ if (status == 0)
+ do_vfs_lock(request->fl_file, request);
+ up_read(&clp->cl_sem);
+ return status;
+}
+
+static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(state->inode),
+ _nfs4_proc_unlck(state, cmd, request),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
+{
+ struct inode *inode = state->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs4_lock_state *lsp;
+ struct nfs_lockargs arg = {
+ .fh = NFS_FH(inode),
+ .type = nfs4_lck_type(cmd, request),
+ .offset = request->fl_start,
+ .length = nfs4_lck_length(request),
+ };
+ struct nfs_lockres res = {
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ .rpc_cred = state->owner->so_cred,
+ };
+ struct nfs_lock_opargs largs = {
+ .reclaim = reclaim,
+ .new_lock_owner = 0,
+ };
+ int status;
+
+ lsp = nfs4_get_lock_state(state, request->fl_owner);
+ if (lsp == NULL)
+ return -ENOMEM;
+ if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
+ struct nfs4_state_owner *owner = state->owner;
+ struct nfs_open_to_lock otl = {
+ .lock_owner = {
+ .clientid = server->nfs4_state->cl_clientid,
+ },
+ };
+
+ otl.lock_seqid = lsp->ls_seqid;
+ otl.lock_owner.id = lsp->ls_id;
+ memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
+ largs.u.open_lock = &otl;
+ largs.new_lock_owner = 1;
+ arg.u.lock = &largs;
+ down(&owner->so_sema);
+ otl.open_seqid = owner->so_seqid;
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ /* increment open_owner seqid on success, and
+ * seqid mutating errors */
+ nfs4_increment_seqid(status, owner);
+ up(&owner->so_sema);
+ } else {
+ struct nfs_exist_lock el = {
+ .seqid = lsp->ls_seqid,
+ };
+ memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
+ largs.u.exist_lock = &el;
+ largs.new_lock_owner = 0;
+ arg.u.lock = &largs;
+ status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+ }
+ /* increment seqid on success, and * seqid mutating errors*/
+ nfs4_increment_lock_seqid(status, lsp);
+ /* save the returned stateid. */
+ if (status == 0) {
+ memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
+ lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ if (!reclaim)
+ nfs4_notify_setlk(state, request, lsp);
+ } else if (status == -NFS4ERR_DENIED)
+ status = -EAGAIN;
+ nfs4_put_lock_state(lsp);
+ return status;
+}
+
+static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
+{
+ return _nfs4_do_setlk(state, F_SETLK, request, 1);
+}
+
+static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+ return _nfs4_do_setlk(state, F_SETLK, request, 0);
+}
+
+static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct nfs4_client *clp = state->owner->so_client;
+ int status;
+
+ down_read(&clp->cl_sem);
+ down(&state->lock_sema);
+ status = _nfs4_do_setlk(state, cmd, request, 0);
+ up(&state->lock_sema);
+ if (status == 0) {
+ /* Note: we always want to sleep here! */
+ request->fl_flags |= FL_SLEEP;
+ if (do_vfs_lock(request->fl_file, request) < 0)
+ printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+ }
+ up_read(&clp->cl_sem);
+ return status;
+}
+
+static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(state->inode),
+ _nfs4_proc_setlk(state, cmd, request),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int
+nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
+{
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+ unsigned long timeout = NFS4_LOCK_MINTIMEOUT;
+ int status;
+
+ /* verify open state */
+ ctx = (struct nfs_open_context *)filp->private_data;
+ state = ctx->state;
+
+ if (request->fl_start < 0 || request->fl_end < 0)
+ return -EINVAL;
+
+ if (IS_GETLK(cmd))
+ return nfs4_proc_getlk(state, F_GETLK, request);
+
+ if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
+ return -EINVAL;
+
+ if (request->fl_type == F_UNLCK)
+ return nfs4_proc_unlck(state, cmd, request);
+
+ do {
+ status = nfs4_proc_setlk(state, cmd, request);
+ if ((status != -EAGAIN) || IS_SETLK(cmd))
+ break;
+ timeout = nfs4_set_lock_task_retry(timeout);
+ status = -ERESTARTSYS;
+ if (signalled())
+ break;
+ } while(status < 0);
+
+ return status;
+}
+
+struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
+ .recover_open = nfs4_open_reclaim,
+ .recover_lock = nfs4_lock_reclaim,
+};
+
+struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
+ .recover_open = nfs4_open_expired,
+ .recover_lock = nfs4_lock_expired,
+};
+
+struct nfs_rpc_ops nfs_v4_clientops = {
+ .version = 4, /* protocol version */
+ .dentry_ops = &nfs4_dentry_operations,
+ .dir_inode_ops = &nfs4_dir_inode_operations,
+ .getroot = nfs4_proc_get_root,
+ .getattr = nfs4_proc_getattr,
+ .setattr = nfs4_proc_setattr,
+ .lookup = nfs4_proc_lookup,
+ .access = nfs4_proc_access,
+ .readlink = nfs4_proc_readlink,
+ .read = nfs4_proc_read,
+ .write = nfs4_proc_write,
+ .commit = nfs4_proc_commit,
+ .create = nfs4_proc_create,
+ .remove = nfs4_proc_remove,
+ .unlink_setup = nfs4_proc_unlink_setup,
+ .unlink_done = nfs4_proc_unlink_done,
+ .rename = nfs4_proc_rename,
+ .link = nfs4_proc_link,
+ .symlink = nfs4_proc_symlink,
+ .mkdir = nfs4_proc_mkdir,
+ .rmdir = nfs4_proc_remove,
+ .readdir = nfs4_proc_readdir,
+ .mknod = nfs4_proc_mknod,
+ .statfs = nfs4_proc_statfs,
+ .fsinfo = nfs4_proc_fsinfo,
+ .pathconf = nfs4_proc_pathconf,
+ .decode_dirent = nfs4_decode_dirent,
+ .read_setup = nfs4_proc_read_setup,
+ .write_setup = nfs4_proc_write_setup,
+ .commit_setup = nfs4_proc_commit_setup,
+ .file_open = nfs4_proc_file_open,
+ .file_release = nfs4_proc_file_release,
+ .lock = nfs4_proc_lock,
+};
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
new file mode 100644
index 000000000000..667e06f1c647
--- /dev/null
+++ b/fs/nfs/nfs4renewd.c
@@ -0,0 +1,148 @@
+/*
+ * fs/nfs/nfs4renewd.c
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 "renew daemon", which wakes up periodically to
+ * send a RENEW, to keep state alive on the server. The daemon is implemented
+ * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
+ * context. There is one renewd per nfs_server.
+ *
+ * TODO: If the send queue gets backlogged (e.g., if the server goes down),
+ * we will keep filling the queue with periodic RENEW requests. We need a
+ * mechanism for ensuring that if renewd successfully sends off a request,
+ * then it only wakes up when the request is finished. Maybe use the
+ * child task framework of the RPC layer?
+ */
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+
+#define NFSDBG_FACILITY NFSDBG_PROC
+
+void
+nfs4_renew_state(void *data)
+{
+ struct nfs4_client *clp = (struct nfs4_client *)data;
+ long lease, timeout;
+ unsigned long last, now;
+
+ down_read(&clp->cl_sem);
+ dprintk("%s: start\n", __FUNCTION__);
+ /* Are there any active superblocks? */
+ if (list_empty(&clp->cl_superblocks))
+ goto out;
+ spin_lock(&clp->cl_lock);
+ lease = clp->cl_lease_time;
+ last = clp->cl_last_renewal;
+ now = jiffies;
+ timeout = (2 * lease) / 3 + (long)last - (long)now;
+ /* Are we close to a lease timeout? */
+ if (time_after(now, last + lease/3)) {
+ spin_unlock(&clp->cl_lock);
+ /* Queue an asynchronous RENEW. */
+ nfs4_proc_async_renew(clp);
+ timeout = (2 * lease) / 3;
+ spin_lock(&clp->cl_lock);
+ } else
+ dprintk("%s: failed to call renewd. Reason: lease not expired \n",
+ __FUNCTION__);
+ if (timeout < 5 * HZ) /* safeguard */
+ timeout = 5 * HZ;
+ dprintk("%s: requeueing work. Lease period = %ld\n",
+ __FUNCTION__, (timeout + HZ - 1) / HZ);
+ cancel_delayed_work(&clp->cl_renewd);
+ schedule_delayed_work(&clp->cl_renewd, timeout);
+ spin_unlock(&clp->cl_lock);
+out:
+ up_read(&clp->cl_sem);
+ dprintk("%s: done\n", __FUNCTION__);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_schedule_state_renewal(struct nfs4_client *clp)
+{
+ long timeout;
+
+ spin_lock(&clp->cl_lock);
+ timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal
+ - (long)jiffies;
+ if (timeout < 5 * HZ)
+ timeout = 5 * HZ;
+ dprintk("%s: requeueing work. Lease period = %ld\n",
+ __FUNCTION__, (timeout + HZ - 1) / HZ);
+ cancel_delayed_work(&clp->cl_renewd);
+ schedule_delayed_work(&clp->cl_renewd, timeout);
+ spin_unlock(&clp->cl_lock);
+}
+
+void
+nfs4_renewd_prepare_shutdown(struct nfs_server *server)
+{
+ struct nfs4_client *clp = server->nfs4_state;
+
+ if (!clp)
+ return;
+ flush_scheduled_work();
+ down_write(&clp->cl_sem);
+ if (!list_empty(&server->nfs4_siblings))
+ list_del_init(&server->nfs4_siblings);
+ up_write(&clp->cl_sem);
+}
+
+/* Must be called with clp->cl_sem locked for writes */
+void
+nfs4_kill_renewd(struct nfs4_client *clp)
+{
+ down_read(&clp->cl_sem);
+ if (!list_empty(&clp->cl_superblocks)) {
+ up_read(&clp->cl_sem);
+ return;
+ }
+ cancel_delayed_work(&clp->cl_renewd);
+ up_read(&clp->cl_sem);
+ flush_scheduled_work();
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
new file mode 100644
index 000000000000..231cebce3c87
--- /dev/null
+++ b/fs/nfs/nfs4state.c
@@ -0,0 +1,932 @@
+/*
+ * fs/nfs/nfs4state.c
+ *
+ * Client-side XDR for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Implementation of the NFSv4 state model. For the time being,
+ * this is minimal, but will be made much more complex in a
+ * subsequent patch.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+
+#include "callback.h"
+#include "delegation.h"
+
+#define OPENOWNER_POOL_SIZE 8
+
+static DEFINE_SPINLOCK(state_spinlock);
+
+nfs4_stateid zero_stateid;
+
+#if 0
+nfs4_stateid one_stateid =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+#endif
+
+static LIST_HEAD(nfs4_clientid_list);
+
+static void nfs4_recover_state(void *);
+extern void nfs4_renew_state(void *);
+
+void
+init_nfsv4_state(struct nfs_server *server)
+{
+ server->nfs4_state = NULL;
+ INIT_LIST_HEAD(&server->nfs4_siblings);
+}
+
+void
+destroy_nfsv4_state(struct nfs_server *server)
+{
+ if (server->mnt_path) {
+ kfree(server->mnt_path);
+ server->mnt_path = NULL;
+ }
+ if (server->nfs4_state) {
+ nfs4_put_client(server->nfs4_state);
+ server->nfs4_state = NULL;
+ }
+}
+
+/*
+ * nfs4_get_client(): returns an empty client structure
+ * nfs4_put_client(): drops reference to client structure
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs4_client *
+nfs4_alloc_client(struct in_addr *addr)
+{
+ struct nfs4_client *clp;
+
+ if (nfs_callback_up() < 0)
+ return NULL;
+ if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
+ nfs_callback_down();
+ return NULL;
+ }
+ memset(clp, 0, sizeof(*clp));
+ memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+ init_rwsem(&clp->cl_sem);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+ INIT_LIST_HEAD(&clp->cl_state_owners);
+ INIT_LIST_HEAD(&clp->cl_unused);
+ spin_lock_init(&clp->cl_lock);
+ atomic_set(&clp->cl_count, 1);
+ INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp);
+ INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+ INIT_LIST_HEAD(&clp->cl_superblocks);
+ init_waitqueue_head(&clp->cl_waitq);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
+ clp->cl_boot_time = CURRENT_TIME;
+ clp->cl_state = 1 << NFS4CLNT_OK;
+ return clp;
+}
+
+static void
+nfs4_free_client(struct nfs4_client *clp)
+{
+ struct nfs4_state_owner *sp;
+
+ while (!list_empty(&clp->cl_unused)) {
+ sp = list_entry(clp->cl_unused.next,
+ struct nfs4_state_owner,
+ so_list);
+ list_del(&sp->so_list);
+ kfree(sp);
+ }
+ BUG_ON(!list_empty(&clp->cl_state_owners));
+ if (clp->cl_cred)
+ put_rpccred(clp->cl_cred);
+ nfs_idmap_delete(clp);
+ if (clp->cl_rpcclient)
+ rpc_shutdown_client(clp->cl_rpcclient);
+ kfree(clp);
+ nfs_callback_down();
+}
+
+static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
+{
+ struct nfs4_client *clp;
+ list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
+ if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
+ atomic_inc(&clp->cl_count);
+ return clp;
+ }
+ }
+ return NULL;
+}
+
+struct nfs4_client *nfs4_find_client(struct in_addr *addr)
+{
+ struct nfs4_client *clp;
+ spin_lock(&state_spinlock);
+ clp = __nfs4_find_client(addr);
+ spin_unlock(&state_spinlock);
+ return clp;
+}
+
+struct nfs4_client *
+nfs4_get_client(struct in_addr *addr)
+{
+ struct nfs4_client *clp, *new = NULL;
+
+ spin_lock(&state_spinlock);
+ for (;;) {
+ clp = __nfs4_find_client(addr);
+ if (clp != NULL)
+ break;
+ clp = new;
+ if (clp != NULL) {
+ list_add(&clp->cl_servers, &nfs4_clientid_list);
+ new = NULL;
+ break;
+ }
+ spin_unlock(&state_spinlock);
+ new = nfs4_alloc_client(addr);
+ spin_lock(&state_spinlock);
+ if (new == NULL)
+ break;
+ }
+ spin_unlock(&state_spinlock);
+ if (new)
+ nfs4_free_client(new);
+ return clp;
+}
+
+void
+nfs4_put_client(struct nfs4_client *clp)
+{
+ if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
+ return;
+ list_del(&clp->cl_servers);
+ spin_unlock(&state_spinlock);
+ BUG_ON(!list_empty(&clp->cl_superblocks));
+ wake_up_all(&clp->cl_waitq);
+ rpc_wake_up(&clp->cl_rpcwaitq);
+ nfs4_kill_renewd(clp);
+ nfs4_free_client(clp);
+}
+
+static int __nfs4_init_client(struct nfs4_client *clp)
+{
+ int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport);
+ if (status == 0)
+ status = nfs4_proc_setclientid_confirm(clp);
+ if (status == 0)
+ nfs4_schedule_state_renewal(clp);
+ return status;
+}
+
+int nfs4_init_client(struct nfs4_client *clp)
+{
+ return nfs4_map_errors(__nfs4_init_client(clp));
+}
+
+u32
+nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+{
+ return clp->cl_lockowner_id ++;
+}
+
+static struct nfs4_state_owner *
+nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ struct nfs4_state_owner *sp = NULL;
+
+ if (!list_empty(&clp->cl_unused)) {
+ sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
+ atomic_inc(&sp->so_count);
+ sp->so_cred = cred;
+ list_move(&sp->so_list, &clp->cl_state_owners);
+ clp->cl_nunused--;
+ }
+ return sp;
+}
+
+static struct nfs4_state_owner *
+nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+ struct nfs4_state_owner *sp, *res = NULL;
+
+ list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+ if (sp->so_cred != cred)
+ continue;
+ atomic_inc(&sp->so_count);
+ /* Move to the head of the list */
+ list_move(&sp->so_list, &clp->cl_state_owners);
+ res = sp;
+ break;
+ }
+ return res;
+}
+
+/*
+ * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
+ * create a new state_owner.
+ *
+ */
+static struct nfs4_state_owner *
+nfs4_alloc_state_owner(void)
+{
+ struct nfs4_state_owner *sp;
+
+ sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+ if (!sp)
+ return NULL;
+ init_MUTEX(&sp->so_sema);
+ sp->so_seqid = 0; /* arbitrary */
+ INIT_LIST_HEAD(&sp->so_states);
+ INIT_LIST_HEAD(&sp->so_delegations);
+ atomic_set(&sp->so_count, 1);
+ return sp;
+}
+
+void
+nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+{
+ struct nfs4_client *clp = sp->so_client;
+ spin_lock(&clp->cl_lock);
+ list_del_init(&sp->so_list);
+ spin_unlock(&clp->cl_lock);
+}
+
+/*
+ * Note: must be called with clp->cl_sem held in order to prevent races
+ * with reboot recovery!
+ */
+struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
+{
+ struct nfs4_client *clp = server->nfs4_state;
+ struct nfs4_state_owner *sp, *new;
+
+ get_rpccred(cred);
+ new = nfs4_alloc_state_owner();
+ spin_lock(&clp->cl_lock);
+ sp = nfs4_find_state_owner(clp, cred);
+ if (sp == NULL)
+ sp = nfs4_client_grab_unused(clp, cred);
+ if (sp == NULL && new != NULL) {
+ list_add(&new->so_list, &clp->cl_state_owners);
+ new->so_client = clp;
+ new->so_id = nfs4_alloc_lockowner_id(clp);
+ new->so_cred = cred;
+ sp = new;
+ new = NULL;
+ }
+ spin_unlock(&clp->cl_lock);
+ if (new)
+ kfree(new);
+ if (sp != NULL)
+ return sp;
+ put_rpccred(cred);
+ return NULL;
+}
+
+/*
+ * Must be called with clp->cl_sem held in order to avoid races
+ * with state recovery...
+ */
+void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+{
+ struct nfs4_client *clp = sp->so_client;
+ struct rpc_cred *cred = sp->so_cred;
+
+ if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+ return;
+ if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
+ goto out_free;
+ if (list_empty(&sp->so_list))
+ goto out_free;
+ list_move(&sp->so_list, &clp->cl_unused);
+ clp->cl_nunused++;
+ spin_unlock(&clp->cl_lock);
+ put_rpccred(cred);
+ cred = NULL;
+ return;
+out_free:
+ list_del(&sp->so_list);
+ spin_unlock(&clp->cl_lock);
+ put_rpccred(cred);
+ kfree(sp);
+}
+
+static struct nfs4_state *
+nfs4_alloc_open_state(void)
+{
+ struct nfs4_state *state;
+
+ state = kmalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+ state->state = 0;
+ state->nreaders = 0;
+ state->nwriters = 0;
+ state->flags = 0;
+ memset(state->stateid.data, 0, sizeof(state->stateid.data));
+ atomic_set(&state->count, 1);
+ INIT_LIST_HEAD(&state->lock_states);
+ init_MUTEX(&state->lock_sema);
+ rwlock_init(&state->state_lock);
+ return state;
+}
+
+static struct nfs4_state *
+__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs4_state *state;
+
+ mode &= (FMODE_READ|FMODE_WRITE);
+ list_for_each_entry(state, &nfsi->open_states, inode_states) {
+ if (state->owner->so_cred != cred)
+ continue;
+ if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
+ continue;
+ if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
+ continue;
+ if ((state->state & mode) != mode)
+ continue;
+ atomic_inc(&state->count);
+ if (mode & FMODE_READ)
+ state->nreaders++;
+ if (mode & FMODE_WRITE)
+ state->nwriters++;
+ return state;
+ }
+ return NULL;
+}
+
+static struct nfs4_state *
+__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs4_state *state;
+
+ list_for_each_entry(state, &nfsi->open_states, inode_states) {
+ /* Is this in the process of being freed? */
+ if (state->nreaders == 0 && state->nwriters == 0)
+ continue;
+ if (state->owner == owner) {
+ atomic_inc(&state->count);
+ return state;
+ }
+ }
+ return NULL;
+}
+
+struct nfs4_state *
+nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
+{
+ struct nfs4_state *state;
+
+ spin_lock(&inode->i_lock);
+ state = __nfs4_find_state(inode, cred, mode);
+ spin_unlock(&inode->i_lock);
+ return state;
+}
+
+static void
+nfs4_free_open_state(struct nfs4_state *state)
+{
+ kfree(state);
+}
+
+struct nfs4_state *
+nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
+{
+ struct nfs4_state *state, *new;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ spin_lock(&inode->i_lock);
+ state = __nfs4_find_state_byowner(inode, owner);
+ spin_unlock(&inode->i_lock);
+ if (state)
+ goto out;
+ new = nfs4_alloc_open_state();
+ spin_lock(&inode->i_lock);
+ state = __nfs4_find_state_byowner(inode, owner);
+ if (state == NULL && new != NULL) {
+ state = new;
+ /* Caller *must* be holding owner->so_sem */
+ /* Note: The reclaim code dictates that we add stateless
+ * and read-only stateids to the end of the list */
+ list_add_tail(&state->open_states, &owner->so_states);
+ state->owner = owner;
+ atomic_inc(&owner->so_count);
+ list_add(&state->inode_states, &nfsi->open_states);
+ state->inode = igrab(inode);
+ spin_unlock(&inode->i_lock);
+ } else {
+ spin_unlock(&inode->i_lock);
+ if (new)
+ nfs4_free_open_state(new);
+ }
+out:
+ return state;
+}
+
+/*
+ * Beware! Caller must be holding exactly one
+ * reference to clp->cl_sem and owner->so_sema!
+ */
+void nfs4_put_open_state(struct nfs4_state *state)
+{
+ struct inode *inode = state->inode;
+ struct nfs4_state_owner *owner = state->owner;
+
+ if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
+ return;
+ if (!list_empty(&state->inode_states))
+ list_del(&state->inode_states);
+ spin_unlock(&inode->i_lock);
+ list_del(&state->open_states);
+ iput(inode);
+ BUG_ON (state->state != 0);
+ nfs4_free_open_state(state);
+ nfs4_put_state_owner(owner);
+}
+
+/*
+ * Beware! Caller must be holding no references to clp->cl_sem!
+ * of owner->so_sema!
+ */
+void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+{
+ struct inode *inode = state->inode;
+ struct nfs4_state_owner *owner = state->owner;
+ struct nfs4_client *clp = owner->so_client;
+ int newstate;
+
+ atomic_inc(&owner->so_count);
+ down_read(&clp->cl_sem);
+ down(&owner->so_sema);
+ /* Protect against nfs4_find_state() */
+ spin_lock(&inode->i_lock);
+ if (mode & FMODE_READ)
+ state->nreaders--;
+ if (mode & FMODE_WRITE)
+ state->nwriters--;
+ if (state->nwriters == 0) {
+ if (state->nreaders == 0)
+ list_del_init(&state->inode_states);
+ /* See reclaim code */
+ list_move_tail(&state->open_states, &owner->so_states);
+ }
+ spin_unlock(&inode->i_lock);
+ newstate = 0;
+ if (state->state != 0) {
+ if (state->nreaders)
+ newstate |= FMODE_READ;
+ if (state->nwriters)
+ newstate |= FMODE_WRITE;
+ if (state->state == newstate)
+ goto out;
+ if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+ return;
+ }
+out:
+ nfs4_put_open_state(state);
+ up(&owner->so_sema);
+ nfs4_put_state_owner(owner);
+ up_read(&clp->cl_sem);
+}
+
+/*
+ * Search the state->lock_states for an existing lock_owner
+ * that is compatible with current->files
+ */
+static struct nfs4_lock_state *
+__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+ struct nfs4_lock_state *pos;
+ list_for_each_entry(pos, &state->lock_states, ls_locks) {
+ if (pos->ls_owner != fl_owner)
+ continue;
+ atomic_inc(&pos->ls_count);
+ return pos;
+ }
+ return NULL;
+}
+
+struct nfs4_lock_state *
+nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+ struct nfs4_lock_state *lsp;
+ read_lock(&state->state_lock);
+ lsp = __nfs4_find_lock_state(state, fl_owner);
+ read_unlock(&state->state_lock);
+ return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema
+ */
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
+{
+ struct nfs4_lock_state *lsp;
+ struct nfs4_client *clp = state->owner->so_client;
+
+ lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+ if (lsp == NULL)
+ return NULL;
+ lsp->ls_flags = 0;
+ lsp->ls_seqid = 0; /* arbitrary */
+ lsp->ls_id = -1;
+ memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+ atomic_set(&lsp->ls_count, 1);
+ lsp->ls_owner = fl_owner;
+ INIT_LIST_HEAD(&lsp->ls_locks);
+ spin_lock(&clp->cl_lock);
+ lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+ spin_unlock(&clp->cl_lock);
+ return lsp;
+}
+
+/*
+ * Return a compatible lock_state. If no initialized lock_state structure
+ * exists, return an uninitialized one.
+ *
+ * The caller must be holding state->lock_sema and clp->cl_sem
+ */
+struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
+{
+ struct nfs4_lock_state * lsp;
+
+ lsp = nfs4_find_lock_state(state, owner);
+ if (lsp == NULL)
+ lsp = nfs4_alloc_lock_state(state, owner);
+ return lsp;
+}
+
+/*
+ * Byte-range lock aware utility to initialize the stateid of read/write
+ * requests.
+ */
+void
+nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
+{
+ if (test_bit(LK_STATE_IN_USE, &state->flags)) {
+ struct nfs4_lock_state *lsp;
+
+ lsp = nfs4_find_lock_state(state, fl_owner);
+ if (lsp) {
+ memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ nfs4_put_lock_state(lsp);
+ return;
+ }
+ }
+ memcpy(dst, &state->stateid, sizeof(*dst));
+}
+
+/*
+* Called with state->lock_sema and clp->cl_sem held.
+*/
+void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+{
+ if (status == NFS_OK || seqid_mutating_err(-status))
+ lsp->ls_seqid++;
+}
+
+/*
+* Check to see if the request lock (type FL_UNLK) effects the fl lock.
+*
+* fl and request must have the same posix owner
+*
+* return:
+* 0 -> fl not effected by request
+* 1 -> fl consumed by request
+*/
+
+static int
+nfs4_check_unlock(struct file_lock *fl, struct file_lock *request)
+{
+ if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end)
+ return 1;
+ return 0;
+}
+
+/*
+ * Post an initialized lock_state on the state->lock_states list.
+ */
+void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+ if (!list_empty(&lsp->ls_locks))
+ return;
+ atomic_inc(&lsp->ls_count);
+ write_lock(&state->state_lock);
+ list_add(&lsp->ls_locks, &state->lock_states);
+ set_bit(LK_STATE_IN_USE, &state->flags);
+ write_unlock(&state->state_lock);
+}
+
+/*
+ * to decide to 'reap' lock state:
+ * 1) search i_flock for file_locks with fl.lock_state = to ls.
+ * 2) determine if unlock will consume found lock.
+ * if so, reap
+ *
+ * else, don't reap.
+ *
+ */
+void
+nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp)
+{
+ struct inode *inode = state->inode;
+ struct file_lock *fl;
+
+ for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (!(fl->fl_flags & FL_POSIX))
+ continue;
+ if (fl->fl_owner != lsp->ls_owner)
+ continue;
+ /* Exit if we find at least one lock which is not consumed */
+ if (nfs4_check_unlock(fl,request) == 0)
+ return;
+ }
+
+ write_lock(&state->state_lock);
+ list_del_init(&lsp->ls_locks);
+ if (list_empty(&state->lock_states))
+ clear_bit(LK_STATE_IN_USE, &state->flags);
+ write_unlock(&state->state_lock);
+ nfs4_put_lock_state(lsp);
+}
+
+/*
+ * Release reference to lock_state, and free it if we see that
+ * it is no longer in use
+ */
+void
+nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+{
+ if (!atomic_dec_and_test(&lsp->ls_count))
+ return;
+ BUG_ON (!list_empty(&lsp->ls_locks));
+ kfree(lsp);
+}
+
+/*
+* Called with sp->so_sema and clp->cl_sem held.
+*
+* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+* failed with a seqid incrementing error -
+* see comments nfs_fs.h:seqid_mutating_error()
+*/
+void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
+{
+ if (status == NFS_OK || seqid_mutating_err(-status))
+ sp->so_seqid++;
+ /* If the server returns BAD_SEQID, unhash state_owner here */
+ if (status == -NFS4ERR_BAD_SEQID)
+ nfs4_drop_state_owner(sp);
+}
+
+static int reclaimer(void *);
+struct reclaimer_args {
+ struct nfs4_client *clp;
+ struct completion complete;
+};
+
+/*
+ * State recovery routine
+ */
+void
+nfs4_recover_state(void *data)
+{
+ struct nfs4_client *clp = (struct nfs4_client *)data;
+ struct reclaimer_args args = {
+ .clp = clp,
+ };
+ might_sleep();
+
+ init_completion(&args.complete);
+
+ if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0)
+ goto out_failed_clear;
+ wait_for_completion(&args.complete);
+ return;
+out_failed_clear:
+ set_bit(NFS4CLNT_OK, &clp->cl_state);
+ wake_up_all(&clp->cl_waitq);
+ rpc_wake_up(&clp->cl_rpcwaitq);
+}
+
+/*
+ * Schedule a state recovery attempt
+ */
+void
+nfs4_schedule_state_recovery(struct nfs4_client *clp)
+{
+ if (!clp)
+ return;
+ if (test_and_clear_bit(NFS4CLNT_OK, &clp->cl_state))
+ schedule_work(&clp->cl_recoverd);
+}
+
+static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state)
+{
+ struct inode *inode = state->inode;
+ struct file_lock *fl;
+ int status = 0;
+
+ for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
+ if (!(fl->fl_flags & FL_POSIX))
+ continue;
+ if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state)
+ continue;
+ status = ops->recover_lock(state, fl);
+ if (status >= 0)
+ continue;
+ switch (status) {
+ default:
+ printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+ __FUNCTION__, status);
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_NO_GRACE:
+ case -NFS4ERR_RECLAIM_BAD:
+ case -NFS4ERR_RECLAIM_CONFLICT:
+ /* kill_proc(fl->fl_owner, SIGLOST, 1); */
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ goto out_err;
+ }
+ }
+ return 0;
+out_err:
+ return status;
+}
+
+static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp)
+{
+ struct nfs4_state *state;
+ struct nfs4_lock_state *lock;
+ int status = 0;
+
+ /* Note: we rely on the sp->so_states list being ordered
+ * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
+ * states first.
+ * This is needed to ensure that the server won't give us any
+ * read delegations that we have to return if, say, we are
+ * recovering after a network partition or a reboot from a
+ * server that doesn't support a grace period.
+ */
+ list_for_each_entry(state, &sp->so_states, open_states) {
+ if (state->state == 0)
+ continue;
+ status = ops->recover_open(sp, state);
+ list_for_each_entry(lock, &state->lock_states, ls_locks)
+ lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+ if (status >= 0) {
+ status = nfs4_reclaim_locks(ops, state);
+ if (status < 0)
+ goto out_err;
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
+ printk("%s: Lock reclaim failed!\n",
+ __FUNCTION__);
+ }
+ continue;
+ }
+ switch (status) {
+ default:
+ printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
+ __FUNCTION__, status);
+ case -ENOENT:
+ case -NFS4ERR_RECLAIM_BAD:
+ case -NFS4ERR_RECLAIM_CONFLICT:
+ /*
+ * Open state on this file cannot be recovered
+ * All we can do is revert to using the zero stateid.
+ */
+ memset(state->stateid.data, 0,
+ sizeof(state->stateid.data));
+ /* Mark the file as being 'closed' */
+ state->state = 0;
+ break;
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_NO_GRACE:
+ case -NFS4ERR_STALE_CLIENTID:
+ goto out_err;
+ }
+ }
+ return 0;
+out_err:
+ return status;
+}
+
+static int reclaimer(void *ptr)
+{
+ struct reclaimer_args *args = (struct reclaimer_args *)ptr;
+ struct nfs4_client *clp = args->clp;
+ struct nfs4_state_owner *sp;
+ struct nfs4_state_recovery_ops *ops;
+ int status = 0;
+
+ daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr));
+ allow_signal(SIGKILL);
+
+ atomic_inc(&clp->cl_count);
+ complete(&args->complete);
+
+ /* Ensure exclusive access to NFSv4 state */
+ lock_kernel();
+ down_write(&clp->cl_sem);
+ /* Are there any NFS mounts out there? */
+ if (list_empty(&clp->cl_superblocks))
+ goto out;
+restart_loop:
+ status = nfs4_proc_renew(clp);
+ switch (status) {
+ case 0:
+ case -NFS4ERR_CB_PATH_DOWN:
+ goto out;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_LEASE_MOVED:
+ ops = &nfs4_reboot_recovery_ops;
+ break;
+ default:
+ ops = &nfs4_network_partition_recovery_ops;
+ };
+ status = __nfs4_init_client(clp);
+ if (status)
+ goto out_error;
+ /* Mark all delegations for reclaim */
+ nfs_delegation_mark_reclaim(clp);
+ /* Note: list is protected by exclusive lock on cl->cl_sem */
+ list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+ status = nfs4_reclaim_open_state(ops, sp);
+ if (status < 0) {
+ if (status == -NFS4ERR_NO_GRACE) {
+ ops = &nfs4_network_partition_recovery_ops;
+ status = nfs4_reclaim_open_state(ops, sp);
+ }
+ if (status == -NFS4ERR_STALE_CLIENTID)
+ goto restart_loop;
+ if (status == -NFS4ERR_EXPIRED)
+ goto restart_loop;
+ }
+ }
+ nfs_delegation_reap_unclaimed(clp);
+out:
+ set_bit(NFS4CLNT_OK, &clp->cl_state);
+ up_write(&clp->cl_sem);
+ unlock_kernel();
+ wake_up_all(&clp->cl_waitq);
+ rpc_wake_up(&clp->cl_rpcwaitq);
+ if (status == -NFS4ERR_CB_PATH_DOWN)
+ nfs_handle_cb_pathdown(clp);
+ nfs4_put_client(clp);
+ return 0;
+out_error:
+ printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
+ NIPQUAD(clp->cl_addr.s_addr), -status);
+ goto out;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
new file mode 100644
index 000000000000..5f4de05763c9
--- /dev/null
+++ b/fs/nfs/nfs4xdr.c
@@ -0,0 +1,4034 @@
+/*
+ * fs/nfs/nfs4xdr.c
+ *
+ * Client-side XDR for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/kdev_t.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+
+#define NFSDBG_FACILITY NFSDBG_XDR
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+static int nfs_stat_to_errno(int);
+
+/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
+#ifdef DEBUG
+#define NFS4_MAXTAGLEN 20
+#else
+#define NFS4_MAXTAGLEN 0
+#endif
+
+/* lock,open owner id:
+ * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2)
+ */
+#define owner_id_maxsz (1 + 1)
+#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
+#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
+#define op_encode_hdr_maxsz (1)
+#define op_decode_hdr_maxsz (2)
+#define encode_putfh_maxsz (op_encode_hdr_maxsz + 1 + \
+ (NFS4_FHSIZE >> 2))
+#define decode_putfh_maxsz (op_decode_hdr_maxsz)
+#define encode_putrootfh_maxsz (op_encode_hdr_maxsz)
+#define decode_putrootfh_maxsz (op_decode_hdr_maxsz)
+#define encode_getfh_maxsz (op_encode_hdr_maxsz)
+#define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \
+ ((3+NFS4_FHSIZE) >> 2))
+#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3)
+#define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz)
+#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \
+ nfs4_fattr_bitmap_maxsz)
+#define encode_savefh_maxsz (op_encode_hdr_maxsz)
+#define decode_savefh_maxsz (op_decode_hdr_maxsz)
+#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2)
+#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11)
+#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
+#define decode_renew_maxsz (op_decode_hdr_maxsz)
+#define encode_setclientid_maxsz \
+ (op_encode_hdr_maxsz + \
+ 4 /*server->ip_addr*/ + \
+ 1 /*Netid*/ + \
+ 6 /*uaddr*/ + \
+ 6 + (NFS4_VERIFIER_SIZE >> 2))
+#define decode_setclientid_maxsz \
+ (op_decode_hdr_maxsz + \
+ 2 + \
+ 1024) /* large value for CLID_INUSE */
+#define encode_setclientid_confirm_maxsz \
+ (op_encode_hdr_maxsz + \
+ 3 + (NFS4_VERIFIER_SIZE >> 2))
+#define decode_setclientid_confirm_maxsz \
+ (op_decode_hdr_maxsz)
+#define encode_lookup_maxsz (op_encode_hdr_maxsz + \
+ 1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_remove_maxsz (op_encode_hdr_maxsz + \
+ nfs4_name_maxsz)
+#define encode_rename_maxsz (op_encode_hdr_maxsz + \
+ 2 * nfs4_name_maxsz)
+#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5)
+#define encode_link_maxsz (op_encode_hdr_maxsz + \
+ nfs4_name_maxsz)
+#define decode_link_maxsz (op_decode_hdr_maxsz + 5)
+#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
+ 1 + nfs4_name_maxsz + \
+ nfs4_path_maxsz + \
+ nfs4_fattr_bitmap_maxsz)
+#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
+#define encode_create_maxsz (op_encode_hdr_maxsz + \
+ 2 + nfs4_name_maxsz + \
+ nfs4_fattr_bitmap_maxsz)
+#define decode_create_maxsz (op_decode_hdr_maxsz + 8)
+#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
+#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */
+#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */
+#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 7)
+#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 2)
+#define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz)
+#define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz)
+#define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 9)
+#define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 2)
+#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 8)
+#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 4)
+#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 3)
+#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 2)
+#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + \
+ 13 + 3 + 2 + 64 + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+#define NFS4_enc_open_confirm_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 5)
+#define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 4)
+#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + \
+ 11)
+#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + \
+ 4 + 5 + 2 + 3)
+#define NFS4_enc_open_downgrade_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 7)
+#define NFS4_dec_open_downgrade_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 4)
+#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 5)
+#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 4)
+#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 4 + \
+ nfs4_fattr_bitmap_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 3)
+#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_fsinfo_maxsz)
+#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_fsinfo_maxsz)
+#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \
+ encode_renew_maxsz)
+#define NFS4_dec_renew_sz (compound_decode_hdr_maxsz + \
+ decode_renew_maxsz)
+#define NFS4_enc_setclientid_sz (compound_encode_hdr_maxsz + \
+ encode_setclientid_maxsz)
+#define NFS4_dec_setclientid_sz (compound_decode_hdr_maxsz + \
+ decode_setclientid_maxsz)
+#define NFS4_enc_setclientid_confirm_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_setclientid_confirm_maxsz + \
+ encode_putrootfh_maxsz + \
+ encode_fsinfo_maxsz)
+#define NFS4_dec_setclientid_confirm_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_setclientid_confirm_maxsz + \
+ decode_putrootfh_maxsz + \
+ decode_fsinfo_maxsz)
+#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz + \
+ op_encode_hdr_maxsz + \
+ 1 + 1 + 2 + 2 + \
+ 1 + 4 + 1 + 2 + \
+ owner_id_maxsz)
+#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz + \
+ op_decode_hdr_maxsz + \
+ 2 + 2 + 1 + 2 + \
+ owner_id_maxsz)
+#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz + \
+ op_encode_hdr_maxsz + \
+ 1 + 2 + 2 + 2 + \
+ owner_id_maxsz)
+#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz)
+#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz + \
+ op_encode_hdr_maxsz + \
+ 1 + 1 + 4 + 2 + 2)
+#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz + \
+ op_decode_hdr_maxsz + 4)
+#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ op_encode_hdr_maxsz + 1)
+#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 2)
+#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz)
+#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_lookup_maxsz + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+#define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
+ encode_putrootfh_maxsz + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \
+ decode_putrootfh_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+#define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_remove_maxsz)
+#define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 5)
+#define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_putfh_maxsz + \
+ encode_rename_maxsz)
+#define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_putfh_maxsz + \
+ decode_rename_maxsz)
+#define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_putfh_maxsz + \
+ encode_link_maxsz)
+#define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_putfh_maxsz + \
+ decode_link_maxsz)
+#define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_symlink_maxsz + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_symlink_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+#define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_create_maxsz + \
+ encode_getattr_maxsz + \
+ encode_getfh_maxsz)
+#define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_create_maxsz + \
+ decode_getattr_maxsz + \
+ decode_getfh_maxsz)
+#define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz)
+#define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ op_decode_hdr_maxsz + 12)
+#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
+ decode_getattr_maxsz)
+#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_delegreturn_maxsz)
+#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
+ decode_delegreturn_maxsz)
+
+static struct {
+ unsigned int mode;
+ unsigned int nfs2type;
+} nfs_type2fmt[] = {
+ { 0, NFNON },
+ { S_IFREG, NFREG },
+ { S_IFDIR, NFDIR },
+ { S_IFBLK, NFBLK },
+ { S_IFCHR, NFCHR },
+ { S_IFLNK, NFLNK },
+ { S_IFSOCK, NFSOCK },
+ { S_IFIFO, NFFIFO },
+ { 0, NFNON },
+ { 0, NFNON },
+};
+
+struct compound_hdr {
+ int32_t status;
+ uint32_t nops;
+ uint32_t taglen;
+ char * tag;
+};
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ * These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define WRITE32(n) *p++ = htonl(n)
+#define WRITE64(n) do { \
+ *p++ = htonl((uint32_t)((n) >> 32)); \
+ *p++ = htonl((uint32_t)(n)); \
+} while (0)
+#define WRITEMEM(ptr,nbytes) do { \
+ p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
+} while (0)
+
+#define RESERVE_SPACE(nbytes) do { \
+ p = xdr_reserve_space(xdr, nbytes); \
+ if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+ BUG_ON(!p); \
+} while (0)
+
+static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
+{
+ uint32_t *p;
+
+ p = xdr_reserve_space(xdr, 4 + len);
+ BUG_ON(p == NULL);
+ xdr_encode_opaque(p, str, len);
+}
+
+static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+ uint32_t *p;
+
+ dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
+ BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
+ RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2));
+ WRITE32(hdr->taglen);
+ WRITEMEM(hdr->tag, hdr->taglen);
+ WRITE32(NFS4_MINOR_VERSION);
+ WRITE32(hdr->nops);
+ return 0;
+}
+
+static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+ uint32_t *p;
+
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ BUG_ON(p == NULL);
+ xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
+}
+
+static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
+{
+ char owner_name[IDMAP_NAMESZ];
+ char owner_group[IDMAP_NAMESZ];
+ int owner_namelen = 0;
+ int owner_grouplen = 0;
+ uint32_t *p;
+ uint32_t *q;
+ int len;
+ uint32_t bmval0 = 0;
+ uint32_t bmval1 = 0;
+ int status;
+
+ /*
+ * We reserve enough space to write the entire attribute buffer at once.
+ * In the worst-case, this would be
+ * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+ * = 36 bytes, plus any contribution from variable-length fields
+ * such as owner/group/acl's.
+ */
+ len = 16;
+
+ /* Sigh */
+ if (iap->ia_valid & ATTR_SIZE)
+ len += 8;
+ if (iap->ia_valid & ATTR_MODE)
+ len += 4;
+ if (iap->ia_valid & ATTR_UID) {
+ owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+ if (owner_namelen < 0) {
+ printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
+ iap->ia_uid);
+ /* XXX */
+ strcpy(owner_name, "nobody");
+ owner_namelen = sizeof("nobody") - 1;
+ /* goto out; */
+ }
+ len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
+ }
+ if (iap->ia_valid & ATTR_GID) {
+ owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+ if (owner_grouplen < 0) {
+ printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
+ iap->ia_gid);
+ strcpy(owner_group, "nobody");
+ owner_grouplen = sizeof("nobody") - 1;
+ /* goto out; */
+ }
+ len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
+ }
+ if (iap->ia_valid & ATTR_ATIME_SET)
+ len += 16;
+ else if (iap->ia_valid & ATTR_ATIME)
+ len += 4;
+ if (iap->ia_valid & ATTR_MTIME_SET)
+ len += 16;
+ else if (iap->ia_valid & ATTR_MTIME)
+ len += 4;
+ RESERVE_SPACE(len);
+
+ /*
+ * We write the bitmap length now, but leave the bitmap and the attribute
+ * buffer length to be backfilled at the end of this routine.
+ */
+ WRITE32(2);
+ q = p;
+ p += 3;
+
+ if (iap->ia_valid & ATTR_SIZE) {
+ bmval0 |= FATTR4_WORD0_SIZE;
+ WRITE64(iap->ia_size);
+ }
+ if (iap->ia_valid & ATTR_MODE) {
+ bmval1 |= FATTR4_WORD1_MODE;
+ WRITE32(iap->ia_mode);
+ }
+ if (iap->ia_valid & ATTR_UID) {
+ bmval1 |= FATTR4_WORD1_OWNER;
+ WRITE32(owner_namelen);
+ WRITEMEM(owner_name, owner_namelen);
+ }
+ if (iap->ia_valid & ATTR_GID) {
+ bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+ WRITE32(owner_grouplen);
+ WRITEMEM(owner_group, owner_grouplen);
+ }
+ if (iap->ia_valid & ATTR_ATIME_SET) {
+ bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+ WRITE32(NFS4_SET_TO_CLIENT_TIME);
+ WRITE32(0);
+ WRITE32(iap->ia_mtime.tv_sec);
+ WRITE32(iap->ia_mtime.tv_nsec);
+ }
+ else if (iap->ia_valid & ATTR_ATIME) {
+ bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
+ WRITE32(NFS4_SET_TO_SERVER_TIME);
+ }
+ if (iap->ia_valid & ATTR_MTIME_SET) {
+ bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+ WRITE32(NFS4_SET_TO_CLIENT_TIME);
+ WRITE32(0);
+ WRITE32(iap->ia_mtime.tv_sec);
+ WRITE32(iap->ia_mtime.tv_nsec);
+ }
+ else if (iap->ia_valid & ATTR_MTIME) {
+ bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
+ WRITE32(NFS4_SET_TO_SERVER_TIME);
+ }
+
+ /*
+ * Now we backfill the bitmap and the attribute buffer length.
+ */
+ if (len != ((char *)p - (char *)q) + 4) {
+ printk ("encode_attr: Attr length calculation error! %u != %Zu\n",
+ len, ((char *)p - (char *)q) + 4);
+ BUG();
+ }
+ len = (char *)p - (char *)q - 12;
+ *q++ = htonl(bmval0);
+ *q++ = htonl(bmval1);
+ *q++ = htonl(len);
+
+ status = 0;
+/* out: */
+ return status;
+}
+
+static int encode_access(struct xdr_stream *xdr, u32 access)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8);
+ WRITE32(OP_ACCESS);
+ WRITE32(access);
+
+ return 0;
+}
+
+static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8+sizeof(arg->stateid.data));
+ WRITE32(OP_CLOSE);
+ WRITE32(arg->seqid);
+ WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+
+ return 0;
+}
+
+static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(16);
+ WRITE32(OP_COMMIT);
+ WRITE64(args->offset);
+ WRITE32(args->count);
+
+ return 0;
+}
+
+static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8);
+ WRITE32(OP_CREATE);
+ WRITE32(create->ftype);
+
+ switch (create->ftype) {
+ case NF4LNK:
+ RESERVE_SPACE(4 + create->u.symlink->len);
+ WRITE32(create->u.symlink->len);
+ WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+ break;
+
+ case NF4BLK: case NF4CHR:
+ RESERVE_SPACE(8);
+ WRITE32(create->u.device.specdata1);
+ WRITE32(create->u.device.specdata2);
+ break;
+
+ default:
+ break;
+ }
+
+ RESERVE_SPACE(4 + create->name->len);
+ WRITE32(create->name->len);
+ WRITEMEM(create->name->name, create->name->len);
+
+ return encode_attrs(xdr, create->attrs, create->server);
+}
+
+static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(12);
+ WRITE32(OP_GETATTR);
+ WRITE32(1);
+ WRITE32(bitmap);
+ return 0;
+}
+
+static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(16);
+ WRITE32(OP_GETATTR);
+ WRITE32(2);
+ WRITE32(bm0);
+ WRITE32(bm1);
+ return 0;
+}
+
+static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask)
+{
+ extern u32 nfs4_fattr_bitmap[];
+
+ return encode_getattr_two(xdr,
+ bitmask[0] & nfs4_fattr_bitmap[0],
+ bitmask[1] & nfs4_fattr_bitmap[1]);
+}
+
+static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask)
+{
+ extern u32 nfs4_fsinfo_bitmap[];
+
+ return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+ bitmask[1] & nfs4_fsinfo_bitmap[1]);
+}
+
+static int encode_getfh(struct xdr_stream *xdr)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_GETFH);
+
+ return 0;
+}
+
+static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + name->len);
+ WRITE32(OP_LINK);
+ WRITE32(name->len);
+ WRITEMEM(name->name, name->len);
+
+ return 0;
+}
+
+/*
+ * opcode,type,reclaim,offset,length,new_lock_owner = 32
+ * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+ */
+static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+ uint32_t *p;
+ struct nfs_lock_opargs *opargs = arg->u.lock;
+
+ RESERVE_SPACE(32);
+ WRITE32(OP_LOCK);
+ WRITE32(arg->type);
+ WRITE32(opargs->reclaim);
+ WRITE64(arg->offset);
+ WRITE64(arg->length);
+ WRITE32(opargs->new_lock_owner);
+ if (opargs->new_lock_owner){
+ struct nfs_open_to_lock *ol = opargs->u.open_lock;
+
+ RESERVE_SPACE(40);
+ WRITE32(ol->open_seqid);
+ WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
+ WRITE32(ol->lock_seqid);
+ WRITE64(ol->lock_owner.clientid);
+ WRITE32(4);
+ WRITE32(ol->lock_owner.id);
+ }
+ else {
+ struct nfs_exist_lock *el = opargs->u.exist_lock;
+
+ RESERVE_SPACE(20);
+ WRITEMEM(&el->stateid, sizeof(el->stateid));
+ WRITE32(el->seqid);
+ }
+
+ return 0;
+}
+
+static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+ uint32_t *p;
+ struct nfs_lowner *opargs = arg->u.lockt;
+
+ RESERVE_SPACE(40);
+ WRITE32(OP_LOCKT);
+ WRITE32(arg->type);
+ WRITE64(arg->offset);
+ WRITE64(arg->length);
+ WRITE64(opargs->clientid);
+ WRITE32(4);
+ WRITE32(opargs->id);
+
+ return 0;
+}
+
+static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+{
+ uint32_t *p;
+ struct nfs_locku_opargs *opargs = arg->u.locku;
+
+ RESERVE_SPACE(44);
+ WRITE32(OP_LOCKU);
+ WRITE32(arg->type);
+ WRITE32(opargs->seqid);
+ WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
+ WRITE64(arg->offset);
+ WRITE64(arg->length);
+
+ return 0;
+}
+
+static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
+{
+ int len = name->len;
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + len);
+ WRITE32(OP_LOOKUP);
+ WRITE32(len);
+ WRITEMEM(name->name, len);
+
+ return 0;
+}
+
+static void encode_share_access(struct xdr_stream *xdr, int open_flags)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8);
+ switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
+ case FMODE_READ:
+ WRITE32(NFS4_SHARE_ACCESS_READ);
+ break;
+ case FMODE_WRITE:
+ WRITE32(NFS4_SHARE_ACCESS_WRITE);
+ break;
+ case FMODE_READ|FMODE_WRITE:
+ WRITE32(NFS4_SHARE_ACCESS_BOTH);
+ break;
+ default:
+ BUG();
+ }
+ WRITE32(0); /* for linux, share_deny = 0 always */
+}
+
+static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+ uint32_t *p;
+ /*
+ * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
+ * owner 4 = 32
+ */
+ RESERVE_SPACE(8);
+ WRITE32(OP_OPEN);
+ WRITE32(arg->seqid);
+ encode_share_access(xdr, arg->open_flags);
+ RESERVE_SPACE(16);
+ WRITE64(arg->clientid);
+ WRITE32(4);
+ WRITE32(arg->id);
+}
+
+static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ switch(arg->open_flags & O_EXCL) {
+ case 0:
+ WRITE32(NFS4_CREATE_UNCHECKED);
+ encode_attrs(xdr, arg->u.attrs, arg->server);
+ break;
+ default:
+ WRITE32(NFS4_CREATE_EXCLUSIVE);
+ encode_nfs4_verifier(xdr, &arg->u.verifier);
+ }
+}
+
+static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ switch (arg->open_flags & O_CREAT) {
+ case 0:
+ WRITE32(NFS4_OPEN_NOCREATE);
+ break;
+ default:
+ BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
+ WRITE32(NFS4_OPEN_CREATE);
+ encode_createmode(xdr, arg);
+ }
+}
+
+static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ switch (delegation_type) {
+ case 0:
+ WRITE32(NFS4_OPEN_DELEGATE_NONE);
+ break;
+ case FMODE_READ:
+ WRITE32(NFS4_OPEN_DELEGATE_READ);
+ break;
+ case FMODE_WRITE|FMODE_READ:
+ WRITE32(NFS4_OPEN_DELEGATE_WRITE);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(NFS4_OPEN_CLAIM_NULL);
+ encode_string(xdr, name->len, name->name);
+}
+
+static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(NFS4_OPEN_CLAIM_PREVIOUS);
+ encode_delegation_type(xdr, type);
+}
+
+static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4+sizeof(stateid->data));
+ WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+ WRITEMEM(stateid->data, sizeof(stateid->data));
+ encode_string(xdr, name->len, name->name);
+}
+
+static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg)
+{
+ encode_openhdr(xdr, arg);
+ encode_opentype(xdr, arg);
+ switch (arg->claim) {
+ case NFS4_OPEN_CLAIM_NULL:
+ encode_claim_null(xdr, arg->name);
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ encode_claim_previous(xdr, arg->u.delegation_type);
+ break;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
+ break;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8+sizeof(arg->stateid.data));
+ WRITE32(OP_OPEN_CONFIRM);
+ WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+ WRITE32(arg->seqid);
+
+ return 0;
+}
+
+static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8+sizeof(arg->stateid.data));
+ WRITE32(OP_OPEN_DOWNGRADE);
+ WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+ WRITE32(arg->seqid);
+ encode_share_access(xdr, arg->open_flags);
+ return 0;
+}
+
+static int
+encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
+{
+ int len = fh->size;
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + len);
+ WRITE32(OP_PUTFH);
+ WRITE32(len);
+ WRITEMEM(fh->data, len);
+
+ return 0;
+}
+
+static int encode_putrootfh(struct xdr_stream *xdr)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_PUTROOTFH);
+
+ return 0;
+}
+
+static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
+{
+ extern nfs4_stateid zero_stateid;
+ nfs4_stateid stateid;
+ uint32_t *p;
+
+ RESERVE_SPACE(16);
+ if (ctx->state != NULL) {
+ nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+ WRITEMEM(stateid.data, sizeof(stateid.data));
+ } else
+ WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
+}
+
+static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_READ);
+
+ encode_stateid(xdr, args->context);
+
+ RESERVE_SPACE(12);
+ WRITE64(args->offset);
+ WRITE32(args->count);
+
+ return 0;
+}
+
+static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ int replen;
+ uint32_t *p;
+
+ RESERVE_SPACE(32+sizeof(nfs4_verifier));
+ WRITE32(OP_READDIR);
+ WRITE64(readdir->cookie);
+ WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data));
+ WRITE32(readdir->count >> 1); /* We're not doing readdirplus */
+ WRITE32(readdir->count);
+ WRITE32(2);
+ if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ WRITE32(0);
+ WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID);
+ } else {
+ WRITE32(FATTR4_WORD0_FILEID);
+ WRITE32(0);
+ }
+
+ /* set up reply kvec
+ * toplevel_status + taglen + rescount + OP_PUTFH + status
+ * + OP_READDIR + status + verifer(2) = 9
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
+ readdir->pgbase, readdir->count);
+
+ return 0;
+}
+
+static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ unsigned int replen;
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_READLINK);
+
+ /* set up reply kvec
+ * toplevel_status + taglen + rescount + OP_PUTFH + status
+ * + OP_READLINK + status + string length = 8
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + 8) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages,
+ readlink->pgbase, readlink->pglen);
+
+ return 0;
+}
+
+static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + name->len);
+ WRITE32(OP_REMOVE);
+ WRITE32(name->len);
+ WRITEMEM(name->name, name->len);
+
+ return 0;
+}
+
+static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + oldname->len);
+ WRITE32(OP_RENAME);
+ WRITE32(oldname->len);
+ WRITEMEM(oldname->name, oldname->len);
+
+ RESERVE_SPACE(4 + newname->len);
+ WRITE32(newname->len);
+ WRITEMEM(newname->name, newname->len);
+
+ return 0;
+}
+
+static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(12);
+ WRITE32(OP_RENEW);
+ WRITE64(client_stateid->cl_clientid);
+
+ return 0;
+}
+
+static int
+encode_savefh(struct xdr_stream *xdr)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_SAVEFH);
+
+ return 0;
+}
+
+static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server)
+{
+ int status;
+ uint32_t *p;
+
+ RESERVE_SPACE(4+sizeof(arg->stateid.data));
+ WRITE32(OP_SETATTR);
+ WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+
+ if ((status = encode_attrs(xdr, arg->iap, server)))
+ return status;
+
+ return 0;
+}
+
+static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data));
+ WRITE32(OP_SETCLIENTID);
+ WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data));
+
+ encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
+ RESERVE_SPACE(4);
+ WRITE32(setclientid->sc_prog);
+ encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
+ encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
+ RESERVE_SPACE(4);
+ WRITE32(setclientid->sc_cb_ident);
+
+ return 0;
+}
+
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data));
+ WRITE32(OP_SETCLIENTID_CONFIRM);
+ WRITE64(client_state->cl_clientid);
+ WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data));
+
+ return 0;
+}
+
+static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(4);
+ WRITE32(OP_WRITE);
+
+ encode_stateid(xdr, args->context);
+
+ RESERVE_SPACE(16);
+ WRITE64(args->offset);
+ WRITE32(args->stable);
+ WRITE32(args->count);
+
+ xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
+
+ return 0;
+}
+
+static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+{
+ uint32_t *p;
+
+ RESERVE_SPACE(20);
+
+ WRITE32(OP_DELEGRETURN);
+ WRITEMEM(stateid->data, sizeof(stateid->data));
+ return 0;
+
+}
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+/*
+ * Encode an ACCESS request
+ */
+static int nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, const struct nfs4_accessargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->fh)) == 0)
+ status = encode_access(&xdr, args->access);
+ return status;
+}
+
+/*
+ * Encode LOOKUP request
+ */
+static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+ goto out;
+ if ((status = encode_lookup(&xdr, args->name)) != 0)
+ goto out;
+ if ((status = encode_getfh(&xdr)) != 0)
+ goto out;
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
+ return status;
+}
+
+/*
+ * Encode LOOKUP_ROOT request
+ */
+static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, uint32_t *p, const struct nfs4_lookup_root_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 3,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putrootfh(&xdr)) != 0)
+ goto out;
+ if ((status = encode_getfh(&xdr)) == 0)
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
+ return status;
+}
+
+/*
+ * Encode REMOVE request
+ */
+static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct nfs4_remove_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->fh)) == 0)
+ status = encode_remove(&xdr, args->name);
+ return status;
+}
+
+/*
+ * Encode RENAME request
+ */
+static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct nfs4_rename_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->old_dir)) != 0)
+ goto out;
+ if ((status = encode_savefh(&xdr)) != 0)
+ goto out;
+ if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
+ goto out;
+ status = encode_rename(&xdr, args->old_name, args->new_name);
+out:
+ return status;
+}
+
+/*
+ * Encode LINK request
+ */
+static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs4_link_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->fh)) != 0)
+ goto out;
+ if ((status = encode_savefh(&xdr)) != 0)
+ goto out;
+ if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+ goto out;
+ status = encode_link(&xdr, args->name);
+out:
+ return status;
+}
+
+/*
+ * Encode CREATE request
+ */
+static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
+ goto out;
+ if ((status = encode_create(&xdr, args)) != 0)
+ goto out;
+ if ((status = encode_getfh(&xdr)) != 0)
+ goto out;
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
+ return status;
+}
+
+/*
+ * Encode SYMLINK request
+ */
+static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_create_arg *args)
+{
+ return nfs4_xdr_enc_create(req, p, args);
+}
+
+/*
+ * Encode GETATTR request
+ */
+static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, const struct nfs4_getattr_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->fh)) == 0)
+ status = encode_getfattr(&xdr, args->bitmask);
+ return status;
+}
+
+/*
+ * Encode a CLOSE request
+ */
+static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_close(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode an OPEN request
+ */
+static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_open(&xdr, args);
+ if (status)
+ goto out;
+ status = encode_getfh(&xdr);
+ if (status)
+ goto out;
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
+ return status;
+}
+
+/*
+ * Encode an OPEN_CONFIRM request
+ */
+static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_open_confirm(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode an OPEN request with no attributes.
+ */
+static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_open(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode an OPEN_DOWNGRADE request
+ */
+static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_open_downgrade(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode a LOCK request
+ */
+static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_lock(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode a LOCKT request
+ */
+static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_lockt(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode a LOCKU request
+ */
+static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_locku(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * Encode a READLINK request
+ */
+static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readlink *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_readlink(&xdr, args, req);
+out:
+ return status;
+}
+
+/*
+ * Encode a READDIR request
+ */
+static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, const struct nfs4_readdir_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_readdir(&xdr, args, req);
+out:
+ return status;
+}
+
+/*
+ * Encode a READ request
+ */
+static int nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
+{
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int replen, status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_read(&xdr, args);
+ if (status)
+ goto out;
+
+ /* set up reply kvec
+ * toplevel status + taglen=0 + rescount + OP_PUTFH + status
+ * + OP_READ + status + eof + datalen = 9
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
+ xdr_inline_pages(&req->rq_rcv_buf, replen,
+ args->pages, args->pgbase, args->count);
+out:
+ return status;
+}
+
+/*
+ * Encode an SETATTR request
+ */
+static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, uint32_t *p, struct nfs_setattrargs *args)
+
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 3,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if(status)
+ goto out;
+ status = encode_setattr(&xdr, args, args->server);
+ if(status)
+ goto out;
+ status = encode_getfattr(&xdr, args->bitmask);
+out:
+ return status;
+}
+
+/*
+ * Encode a WRITE request
+ */
+static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_write(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * a COMMIT request
+ */
+static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status)
+ goto out;
+ status = encode_commit(&xdr, args);
+out:
+ return status;
+}
+
+/*
+ * FSINFO request
+ */
+static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fsinfo_arg *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (!status)
+ status = encode_fsinfo(&xdr, args->bitmask);
+ return status;
+}
+
+/*
+ * a PATHCONF request
+ */
+static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args)
+{
+ extern u32 nfs4_pathconf_bitmap[2];
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (!status)
+ status = encode_getattr_one(&xdr,
+ args->bitmask[0] & nfs4_pathconf_bitmap[0]);
+ return status;
+}
+
+/*
+ * a STATFS request
+ */
+static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args)
+{
+ extern u32 nfs4_statfs_bitmap[];
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+ if (status == 0)
+ status = encode_getattr_two(&xdr,
+ args->bitmask[0] & nfs4_statfs_bitmap[0],
+ args->bitmask[1] & nfs4_statfs_bitmap[1]);
+ return status;
+}
+
+/*
+ * GETATTR_BITMAP request
+ */
+static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const struct nfs_fh *fhandle)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, fhandle);
+ if (status == 0)
+ status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+ FATTR4_WORD0_LINK_SUPPORT|
+ FATTR4_WORD0_SYMLINK_SUPPORT|
+ FATTR4_WORD0_ACLSUPPORT);
+ return status;
+}
+
+/*
+ * a RENEW request
+ */
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 1,
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ return encode_renew(&xdr, clp);
+}
+
+/*
+ * a SETCLIENTID request
+ */
+static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nfs4_setclientid *sc)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 1,
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ return encode_setclientid(&xdr, sc);
+}
+
+/*
+ * a SETCLIENTID_CONFIRM request
+ */
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 3,
+ };
+ const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_setclientid_confirm(&xdr, clp);
+ if (!status)
+ status = encode_putrootfh(&xdr);
+ if (!status)
+ status = encode_fsinfo(&xdr, lease_bitmap);
+ return status;
+}
+
+/*
+ * DELEGRETURN request
+ */
+static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .nops = 2,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ if ((status = encode_putfh(&xdr, args->fhandle)) == 0)
+ status = encode_delegreturn(&xdr, args->stateid);
+ return status;
+}
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ * These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define READ32(x) (x) = ntohl(*p++)
+#define READ64(x) do { \
+ (x) = (u64)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+#define READTIME(x) do { \
+ p++; \
+ (x.tv_sec) = ntohl(*p++); \
+ (x.tv_nsec) = ntohl(*p++); \
+} while (0)
+#define COPYMEM(x,nbytes) do { \
+ memcpy((x), p, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+
+#define READ_BUF(nbytes) do { \
+ p = xdr_inline_decode(xdr, nbytes); \
+ if (!p) { \
+ printk(KERN_WARNING "%s: reply buffer overflowed in line %d.", \
+ __FUNCTION__, __LINE__); \
+ return -EIO; \
+ } \
+} while (0)
+
+static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string)
+{
+ uint32_t *p;
+
+ READ_BUF(4);
+ READ32(*len);
+ READ_BUF(*len);
+ *string = (char *)p;
+ return 0;
+}
+
+static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+ uint32_t *p;
+
+ READ_BUF(8);
+ READ32(hdr->status);
+ READ32(hdr->taglen);
+
+ READ_BUF(hdr->taglen + 4);
+ hdr->tag = (char *)p;
+ p += XDR_QUADLEN(hdr->taglen);
+ READ32(hdr->nops);
+ return 0;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ uint32_t *p;
+ uint32_t opnum;
+ int32_t nfserr;
+
+ READ_BUF(8);
+ READ32(opnum);
+ if (opnum != expected) {
+ printk(KERN_NOTICE
+ "nfs4_decode_op_hdr: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ return -EIO;
+ }
+ READ32(nfserr);
+ if (nfserr != NFS_OK)
+ return -nfs_stat_to_errno(nfserr);
+ return 0;
+}
+
+/* Dummy routine */
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+{
+ uint32_t *p;
+ uint32_t strlen;
+ char *str;
+
+ READ_BUF(12);
+ return decode_opaque_inline(xdr, &strlen, &str);
+}
+
+static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+ uint32_t bmlen, *p;
+
+ READ_BUF(4);
+ READ32(bmlen);
+
+ bitmap[0] = bitmap[1] = 0;
+ READ_BUF((bmlen << 2));
+ if (bmlen > 0) {
+ READ32(bitmap[0]);
+ if (bmlen > 1)
+ READ32(bitmap[1]);
+ }
+ return 0;
+}
+
+static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, uint32_t **savep)
+{
+ uint32_t *p;
+
+ READ_BUF(4);
+ READ32(*attrlen);
+ *savep = xdr->p;
+ return 0;
+}
+
+static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
+{
+ if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
+ decode_attr_bitmap(xdr, bitmask);
+ bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ } else
+ bitmask[0] = bitmask[1] = 0;
+ dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]);
+ return 0;
+}
+
+static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
+{
+ uint32_t *p;
+
+ *type = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
+ READ_BUF(4);
+ READ32(*type);
+ if (*type < NF4REG || *type > NF4NAMEDATTR) {
+ dprintk("%s: bad type %d\n", __FUNCTION__, *type);
+ return -EIO;
+ }
+ bitmap[0] &= ~FATTR4_WORD0_TYPE;
+ }
+ dprintk("%s: type=0%o\n", __FUNCTION__, nfs_type2fmt[*type].nfs2type);
+ return 0;
+}
+
+static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
+{
+ uint32_t *p;
+
+ *change = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
+ READ_BUF(8);
+ READ64(*change);
+ bitmap[0] &= ~FATTR4_WORD0_CHANGE;
+ }
+ dprintk("%s: change attribute=%Lu\n", __FUNCTION__,
+ (unsigned long long)*change);
+ return 0;
+}
+
+static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
+{
+ uint32_t *p;
+
+ *size = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
+ READ_BUF(8);
+ READ64(*size);
+ bitmap[0] &= ~FATTR4_WORD0_SIZE;
+ }
+ dprintk("%s: file size=%Lu\n", __FUNCTION__, (unsigned long long)*size);
+ return 0;
+}
+
+static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
+ READ_BUF(4);
+ READ32(*res);
+ bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
+ }
+ dprintk("%s: link support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
+static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
+ READ_BUF(4);
+ READ32(*res);
+ bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
+ }
+ dprintk("%s: symlink support=%s\n", __FUNCTION__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
+static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid)
+{
+ uint32_t *p;
+
+ fsid->major = 0;
+ fsid->minor = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
+ READ_BUF(16);
+ READ64(fsid->major);
+ READ64(fsid->minor);
+ bitmap[0] &= ~FATTR4_WORD0_FSID;
+ }
+ dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __FUNCTION__,
+ (unsigned long long)fsid->major,
+ (unsigned long long)fsid->minor);
+ return 0;
+}
+
+static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+
+ *res = 60;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
+ READ_BUF(4);
+ READ32(*res);
+ bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
+ }
+ dprintk("%s: file size=%u\n", __FUNCTION__, (unsigned int)*res);
+ return 0;
+}
+
+static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+
+ *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
+ READ_BUF(4);
+ READ32(*res);
+ bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
+ }
+ dprintk("%s: ACLs supported=%u\n", __FUNCTION__, (unsigned int)*res);
+ return 0;
+}
+
+static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
+{
+ uint32_t *p;
+
+ *fileid = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
+ READ_BUF(8);
+ READ64(*fileid);
+ bitmap[0] &= ~FATTR4_WORD0_FILEID;
+ }
+ dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid);
+ return 0;
+}
+
+static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
+ }
+ dprintk("%s: files avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
+ }
+ dprintk("%s: files free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
+ }
+ dprintk("%s: files total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
+ }
+ dprintk("%s: maxfilesize=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *maxlink = 1;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
+ READ_BUF(4);
+ READ32(*maxlink);
+ bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
+ }
+ dprintk("%s: maxlink=%u\n", __FUNCTION__, *maxlink);
+ return status;
+}
+
+static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *maxname = 1024;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
+ READ_BUF(4);
+ READ32(*maxname);
+ bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
+ }
+ dprintk("%s: maxname=%u\n", __FUNCTION__, *maxname);
+ return status;
+}
+
+static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 1024;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXREAD - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
+ uint64_t maxread;
+ READ_BUF(8);
+ READ64(maxread);
+ if (maxread > 0x7FFFFFFF)
+ maxread = 0x7FFFFFFF;
+ *res = (uint32_t)maxread;
+ bitmap[0] &= ~FATTR4_WORD0_MAXREAD;
+ }
+ dprintk("%s: maxread=%lu\n", __FUNCTION__, (unsigned long)*res);
+ return status;
+}
+
+static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 1024;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXWRITE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
+ uint64_t maxwrite;
+ READ_BUF(8);
+ READ64(maxwrite);
+ if (maxwrite > 0x7FFFFFFF)
+ maxwrite = 0x7FFFFFFF;
+ *res = (uint32_t)maxwrite;
+ bitmap[0] &= ~FATTR4_WORD0_MAXWRITE;
+ }
+ dprintk("%s: maxwrite=%lu\n", __FUNCTION__, (unsigned long)*res);
+ return status;
+}
+
+static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
+{
+ uint32_t *p;
+
+ *mode = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
+ READ_BUF(4);
+ READ32(*mode);
+ *mode &= ~S_IFMT;
+ bitmap[1] &= ~FATTR4_WORD1_MODE;
+ }
+ dprintk("%s: file mode=0%o\n", __FUNCTION__, (unsigned int)*mode);
+ return 0;
+}
+
+static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
+{
+ uint32_t *p;
+
+ *nlink = 1;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
+ READ_BUF(4);
+ READ32(*nlink);
+ bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
+ }
+ dprintk("%s: nlink=%u\n", __FUNCTION__, (unsigned int)*nlink);
+ return 0;
+}
+
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+{
+ uint32_t len, *p;
+
+ *uid = -2;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
+ READ_BUF(4);
+ READ32(len);
+ READ_BUF(len);
+ if (len < XDR_MAX_NETOBJ) {
+ if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
+ dprintk("%s: nfs_map_name_to_uid failed!\n",
+ __FUNCTION__);
+ } else
+ printk(KERN_WARNING "%s: name too long (%u)!\n",
+ __FUNCTION__, len);
+ bitmap[1] &= ~FATTR4_WORD1_OWNER;
+ }
+ dprintk("%s: uid=%d\n", __FUNCTION__, (int)*uid);
+ return 0;
+}
+
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+{
+ uint32_t len, *p;
+
+ *gid = -2;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
+ READ_BUF(4);
+ READ32(len);
+ READ_BUF(len);
+ if (len < XDR_MAX_NETOBJ) {
+ if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
+ dprintk("%s: nfs_map_group_to_gid failed!\n",
+ __FUNCTION__);
+ } else
+ printk(KERN_WARNING "%s: name too long (%u)!\n",
+ __FUNCTION__, len);
+ bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+ }
+ dprintk("%s: gid=%d\n", __FUNCTION__, (int)*gid);
+ return 0;
+}
+
+static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
+{
+ uint32_t major = 0, minor = 0, *p;
+
+ *rdev = MKDEV(0,0);
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
+ dev_t tmp;
+
+ READ_BUF(8);
+ READ32(major);
+ READ32(minor);
+ tmp = MKDEV(major, minor);
+ if (MAJOR(tmp) == major && MINOR(tmp) == minor)
+ *rdev = tmp;
+ bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
+ }
+ dprintk("%s: rdev=(0x%x:0x%x)\n", __FUNCTION__, major, minor);
+ return 0;
+}
+
+static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
+ }
+ dprintk("%s: space avail=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
+ }
+ dprintk("%s: space free=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
+{
+ uint32_t *p;
+ int status = 0;
+
+ *res = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
+ READ_BUF(8);
+ READ64(*res);
+ bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
+ }
+ dprintk("%s: space total=%Lu\n", __FUNCTION__, (unsigned long long)*res);
+ return status;
+}
+
+static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
+{
+ uint32_t *p;
+
+ *used = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
+ READ_BUF(8);
+ READ64(*used);
+ bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
+ }
+ dprintk("%s: space used=%Lu\n", __FUNCTION__,
+ (unsigned long long)*used);
+ return 0;
+}
+
+static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
+{
+ uint32_t *p;
+ uint64_t sec;
+ uint32_t nsec;
+
+ READ_BUF(12);
+ READ64(sec);
+ READ32(nsec);
+ time->tv_sec = (time_t)sec;
+ time->tv_nsec = (long)nsec;
+ return 0;
+}
+
+static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+ int status = 0;
+
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_ACCESS - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
+ status = decode_attr_time(xdr, time);
+ bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ }
+ dprintk("%s: atime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+ return status;
+}
+
+static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+ int status = 0;
+
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_METADATA - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
+ status = decode_attr_time(xdr, time);
+ bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
+ }
+ dprintk("%s: ctime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+ return status;
+}
+
+static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
+{
+ int status = 0;
+
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_MODIFY - 1U)))
+ return -EIO;
+ if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
+ status = decode_attr_time(xdr, time);
+ bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+ }
+ dprintk("%s: mtime=%ld\n", __FUNCTION__, (long)time->tv_sec);
+ return status;
+}
+
+static int verify_attr_len(struct xdr_stream *xdr, uint32_t *savep, uint32_t attrlen)
+{
+ unsigned int attrwords = XDR_QUADLEN(attrlen);
+ unsigned int nwords = xdr->p - savep;
+
+ if (unlikely(attrwords != nwords)) {
+ printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n",
+ __FUNCTION__,
+ attrwords << 2,
+ (attrwords < nwords) ? '<' : '>',
+ nwords << 2);
+ return -EIO;
+ }
+ return 0;
+}
+
+static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+ uint32_t *p;
+
+ READ_BUF(20);
+ READ32(cinfo->atomic);
+ READ64(cinfo->before);
+ READ64(cinfo->after);
+ return 0;
+}
+
+static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
+{
+ uint32_t *p;
+ uint32_t supp, acc;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_ACCESS);
+ if (status)
+ return status;
+ READ_BUF(8);
+ READ32(supp);
+ READ32(acc);
+ access->supported = supp;
+ access->access = acc;
+ return 0;
+}
+
+static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_CLOSE);
+ if (status)
+ return status;
+ READ_BUF(sizeof(res->stateid.data));
+ COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+ return 0;
+}
+
+static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_COMMIT);
+ if (status)
+ return status;
+ READ_BUF(8);
+ COPYMEM(res->verf->verifier, 8);
+ return 0;
+}
+
+static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+ uint32_t *p;
+ uint32_t bmlen;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_CREATE);
+ if (status)
+ return status;
+ if ((status = decode_change_info(xdr, cinfo)))
+ return status;
+ READ_BUF(4);
+ READ32(bmlen);
+ READ_BUF(bmlen << 2);
+ return 0;
+}
+
+static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+{
+ uint32_t *savep;
+ uint32_t attrlen,
+ bitmap[2] = {0};
+ int status;
+
+ if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
+ goto xdr_error;
+ status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+ if (status != 0)
+ printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+ return status;
+}
+
+static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+{
+ uint32_t *savep;
+ uint32_t attrlen,
+ bitmap[2] = {0};
+ int status;
+
+ if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+ goto xdr_error;
+
+ if ((status = decode_attr_files_avail(xdr, bitmap, &fsstat->afiles)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_files_free(xdr, bitmap, &fsstat->ffiles)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_space_total(xdr, bitmap, &fsstat->tbytes)) != 0)
+ goto xdr_error;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+ if (status != 0)
+ printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+ return status;
+}
+
+static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+{
+ uint32_t *savep;
+ uint32_t attrlen,
+ bitmap[2] = {0};
+ int status;
+
+ if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+ goto xdr_error;
+
+ if ((status = decode_attr_maxlink(xdr, bitmap, &pathconf->max_link)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_maxname(xdr, bitmap, &pathconf->max_namelen)) != 0)
+ goto xdr_error;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+ if (status != 0)
+ printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+ return status;
+}
+
+static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server)
+{
+ uint32_t *savep;
+ uint32_t attrlen,
+ bitmap[2] = {0},
+ type;
+ int status, fmode = 0;
+
+ if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ goto xdr_error;
+
+ fattr->bitmap[0] = bitmap[0];
+ fattr->bitmap[1] = bitmap[1];
+
+ if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+ goto xdr_error;
+
+
+ if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
+ goto xdr_error;
+ fattr->type = nfs_type2fmt[type].nfs2type;
+ fmode = nfs_type2fmt[type].mode;
+
+ if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
+ goto xdr_error;
+ fattr->mode |= fmode;
+ if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
+ goto xdr_error;
+ if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) {
+ fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+ fattr->timestamp = jiffies;
+ }
+xdr_error:
+ if (status != 0)
+ printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+ return status;
+}
+
+
+static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
+{
+ uint32_t *savep;
+ uint32_t attrlen, bitmap[2];
+ int status;
+
+ if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+ goto xdr_error;
+
+ fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */
+
+ if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+ goto xdr_error;
+ fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax;
+ if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ goto xdr_error;
+ fsinfo->wtpref = fsinfo->wtmax;
+
+ status = verify_attr_len(xdr, savep, attrlen);
+xdr_error:
+ if (status != 0)
+ printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+ return status;
+}
+
+static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+ uint32_t *p;
+ uint32_t len;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_GETFH);
+ if (status)
+ return status;
+ /* Zero handle first to allow comparisons */
+ memset(fh, 0, sizeof(*fh));
+
+ READ_BUF(4);
+ READ32(len);
+ if (len > NFS4_FHSIZE)
+ return -EIO;
+ fh->size = len;
+ READ_BUF(len);
+ COPYMEM(fh->data, len);
+ return 0;
+}
+
+static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_LINK);
+ if (status)
+ return status;
+ return decode_change_info(xdr, cinfo);
+}
+
+/*
+ * We create the owner, so we know a proper owner.id length is 4.
+ */
+static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied)
+{
+ uint32_t *p;
+ uint32_t namelen;
+
+ READ_BUF(32);
+ READ64(denied->offset);
+ READ64(denied->length);
+ READ32(denied->type);
+ READ64(denied->owner.clientid);
+ READ32(namelen);
+ READ_BUF(namelen);
+ if (namelen == 4)
+ READ32(denied->owner.id);
+ return -NFS4ERR_DENIED;
+}
+
+static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_LOCK);
+ if (status == 0) {
+ READ_BUF(sizeof(nfs4_stateid));
+ COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+ } else if (status == -NFS4ERR_DENIED)
+ return decode_lock_denied(xdr, &res->u.denied);
+ return status;
+}
+
+static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+ int status;
+ status = decode_op_hdr(xdr, OP_LOCKT);
+ if (status == -NFS4ERR_DENIED)
+ return decode_lock_denied(xdr, &res->u.denied);
+ return status;
+}
+
+static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_LOCKU);
+ if (status == 0) {
+ READ_BUF(sizeof(nfs4_stateid));
+ COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+ }
+ return status;
+}
+
+static int decode_lookup(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_LOOKUP);
+}
+
+/* This is too sick! */
+static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
+{
+ uint32_t *p;
+ uint32_t limit_type, nblocks, blocksize;
+
+ READ_BUF(12);
+ READ32(limit_type);
+ switch (limit_type) {
+ case 1:
+ READ64(*maxsize);
+ break;
+ case 2:
+ READ32(nblocks);
+ READ32(blocksize);
+ *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
+ }
+ return 0;
+}
+
+static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
+{
+ uint32_t *p;
+ uint32_t delegation_type;
+
+ READ_BUF(4);
+ READ32(delegation_type);
+ if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
+ res->delegation_type = 0;
+ return 0;
+ }
+ READ_BUF(20);
+ COPYMEM(res->delegation.data, sizeof(res->delegation.data));
+ READ32(res->do_recall);
+ switch (delegation_type) {
+ case NFS4_OPEN_DELEGATE_READ:
+ res->delegation_type = FMODE_READ;
+ break;
+ case NFS4_OPEN_DELEGATE_WRITE:
+ res->delegation_type = FMODE_WRITE|FMODE_READ;
+ if (decode_space_limit(xdr, &res->maxsize) < 0)
+ return -EIO;
+ }
+ return decode_ace(xdr, NULL, res->server->nfs4_state);
+}
+
+static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+{
+ uint32_t *p;
+ uint32_t bmlen;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OPEN);
+ if (status)
+ return status;
+ READ_BUF(sizeof(res->stateid.data));
+ COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+
+ decode_change_info(xdr, &res->cinfo);
+
+ READ_BUF(8);
+ READ32(res->rflags);
+ READ32(bmlen);
+ if (bmlen > 10)
+ goto xdr_error;
+
+ READ_BUF(bmlen << 2);
+ p += bmlen;
+ return decode_delegation(xdr, res);
+xdr_error:
+ printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__);
+ return -EIO;
+}
+
+static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
+ if (status)
+ return status;
+ READ_BUF(sizeof(res->stateid.data));
+ COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+ return 0;
+}
+
+static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
+ if (status)
+ return status;
+ READ_BUF(sizeof(res->stateid.data));
+ COPYMEM(res->stateid.data, sizeof(res->stateid.data));
+ return 0;
+}
+
+static int decode_putfh(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_PUTFH);
+}
+
+static int decode_putrootfh(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_PUTROOTFH);
+}
+
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+{
+ struct kvec *iov = req->rq_rcv_buf.head;
+ uint32_t *p;
+ uint32_t count, eof, recvd, hdrlen;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_READ);
+ if (status)
+ return status;
+ READ_BUF(8);
+ READ32(eof);
+ READ32(count);
+ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (count > recvd) {
+ printk(KERN_WARNING "NFS: server cheating in read reply: "
+ "count %u > recvd %u\n", count, recvd);
+ count = recvd;
+ eof = 0;
+ }
+ xdr_read_pages(xdr, count);
+ res->eof = eof;
+ res->count = count;
+ return 0;
+}
+
+static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct page *page = *rcvbuf->pages;
+ struct kvec *iov = rcvbuf->head;
+ unsigned int nr, pglen = rcvbuf->page_len;
+ uint32_t *end, *entry, *p, *kaddr;
+ uint32_t len, attrlen;
+ int hdrlen, recvd, status;
+
+ status = decode_op_hdr(xdr, OP_READDIR);
+ if (status)
+ return status;
+ READ_BUF(8);
+ COPYMEM(readdir->verifier.data, 8);
+
+ hdrlen = (char *) p - (char *) iov->iov_base;
+ recvd = rcvbuf->len - hdrlen;
+ if (pglen > recvd)
+ pglen = recvd;
+ xdr_read_pages(xdr, pglen);
+
+ BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
+ kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
+ end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+ entry = p;
+ for (nr = 0; *p++; nr++) {
+ if (p + 3 > end)
+ goto short_pkt;
+ p += 2; /* cookie */
+ len = ntohl(*p++); /* filename length */
+ if (len > NFS4_MAXNAMLEN) {
+ printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
+ goto err_unmap;
+ }
+ p += XDR_QUADLEN(len);
+ if (p + 1 > end)
+ goto short_pkt;
+ len = ntohl(*p++); /* bitmap length */
+ p += len;
+ if (p + 1 > end)
+ goto short_pkt;
+ attrlen = XDR_QUADLEN(ntohl(*p++));
+ p += attrlen; /* attributes */
+ if (p + 2 > end)
+ goto short_pkt;
+ entry = p;
+ }
+ if (!nr && (entry[0] != 0 || entry[1] == 0))
+ goto short_pkt;
+out:
+ kunmap_atomic(kaddr, KM_USER0);
+ return 0;
+short_pkt:
+ entry[0] = entry[1] = 0;
+ /* truncate listing ? */
+ if (!nr) {
+ printk(KERN_NOTICE "NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
+ goto out;
+err_unmap:
+ kunmap_atomic(kaddr, KM_USER0);
+ return -errno_NFSERR_IO;
+}
+
+static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
+{
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct kvec *iov = rcvbuf->head;
+ int hdrlen, len, recvd;
+ uint32_t *p;
+ char *kaddr;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_READLINK);
+ if (status)
+ return status;
+
+ /* Convert length of symlink */
+ READ_BUF(4);
+ READ32(len);
+ if (len >= rcvbuf->page_len || len <= 0) {
+ dprintk(KERN_WARNING "nfs: server returned giant symlink!\n");
+ return -ENAMETOOLONG;
+ }
+ hdrlen = (char *) xdr->p - (char *) iov->iov_base;
+ recvd = req->rq_rcv_buf.len - hdrlen;
+ if (recvd < len) {
+ printk(KERN_WARNING "NFS: server cheating in readlink reply: "
+ "count %u > recvd %u\n", len, recvd);
+ return -EIO;
+ }
+ xdr_read_pages(xdr, len);
+ /*
+ * The XDR encode routine has set things up so that
+ * the link text will be copied directly into the
+ * buffer. We just have to do overflow-checking,
+ * and and null-terminate the text (the VFS expects
+ * null-termination).
+ */
+ kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
+ kaddr[len+rcvbuf->page_base] = '\0';
+ kunmap_atomic(kaddr, KM_USER0);
+ return 0;
+}
+
+static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVE);
+ if (status)
+ goto out;
+ status = decode_change_info(xdr, cinfo);
+out:
+ return status;
+}
+
+static int decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo,
+ struct nfs4_change_info *new_cinfo)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_RENAME);
+ if (status)
+ goto out;
+ if ((status = decode_change_info(xdr, old_cinfo)))
+ goto out;
+ status = decode_change_info(xdr, new_cinfo);
+out:
+ return status;
+}
+
+static int decode_renew(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_RENEW);
+}
+
+static int
+decode_savefh(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_SAVEFH);
+}
+
+static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
+{
+ uint32_t *p;
+ uint32_t bmlen;
+ int status;
+
+
+ status = decode_op_hdr(xdr, OP_SETATTR);
+ if (status)
+ return status;
+ READ_BUF(4);
+ READ32(bmlen);
+ READ_BUF(bmlen << 2);
+ return 0;
+}
+
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+{
+ uint32_t *p;
+ uint32_t opnum;
+ int32_t nfserr;
+
+ READ_BUF(8);
+ READ32(opnum);
+ if (opnum != OP_SETCLIENTID) {
+ printk(KERN_NOTICE
+ "nfs4_decode_setclientid: Server returned operation"
+ " %d\n", opnum);
+ return -EIO;
+ }
+ READ32(nfserr);
+ if (nfserr == NFS_OK) {
+ READ_BUF(8 + sizeof(clp->cl_confirm.data));
+ READ64(clp->cl_clientid);
+ COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data));
+ } else if (nfserr == NFSERR_CLID_INUSE) {
+ uint32_t len;
+
+ /* skip netid string */
+ READ_BUF(4);
+ READ32(len);
+ READ_BUF(len);
+
+ /* skip uaddr string */
+ READ_BUF(4);
+ READ32(len);
+ READ_BUF(len);
+ return -NFSERR_CLID_INUSE;
+ } else
+ return -nfs_stat_to_errno(nfserr);
+
+ return 0;
+}
+
+static int decode_setclientid_confirm(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
+}
+
+static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+{
+ uint32_t *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_WRITE);
+ if (status)
+ return status;
+
+ READ_BUF(16);
+ READ32(res->count);
+ READ32(res->verf->committed);
+ COPYMEM(res->verf->verifier, 8);
+ return 0;
+}
+
+static int decode_delegreturn(struct xdr_stream *xdr)
+{
+ return decode_op_hdr(xdr, OP_DELEGRETURN);
+}
+
+/*
+ * Decode OPEN_DOWNGRADE response
+ */
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_open_downgrade(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * Decode ACCESS response
+ */
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) == 0)
+ status = decode_access(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode LOOKUP response
+ */
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_lookup(&xdr)) != 0)
+ goto out;
+ if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ goto out;
+ status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+ return status;
+}
+
+/*
+ * Decode LOOKUP_ROOT response
+ */
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookup_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putrootfh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_getfh(&xdr, res->fh)) == 0)
+ status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+ return status;
+}
+
+/*
+ * Decode REMOVE response
+ */
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) == 0)
+ status = decode_remove(&xdr, cinfo);
+out:
+ return status;
+}
+
+/*
+ * Decode RENAME response
+ */
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_savefh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo);
+out:
+ return status;
+}
+
+/*
+ * Decode LINK response
+ */
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_savefh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ status = decode_link(&xdr, cinfo);
+out:
+ return status;
+}
+
+/*
+ * Decode CREATE response
+ */
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+ goto out;
+ if ((status = decode_getfh(&xdr, res->fh)) != 0)
+ goto out;
+ status = decode_getfattr(&xdr, res->fattr, res->server);
+ if (status == NFS4ERR_DELAY)
+ status = 0;
+out:
+ return status;
+}
+
+/*
+ * Decode SYMLINK response
+ */
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
+{
+ return nfs4_xdr_dec_create(rqstp, p, res);
+}
+
+/*
+ * Decode GETATTR response
+ */
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_getfattr(&xdr, res->fattr, res->server);
+out:
+ return status;
+
+}
+
+
+/*
+ * Decode CLOSE response
+ */
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_close(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode OPEN response
+ */
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_open(&xdr, res);
+ if (status)
+ goto out;
+ status = decode_getfh(&xdr, &res->fh);
+ if (status)
+ goto out;
+ status = decode_getfattr(&xdr, res->f_attr, res->server);
+ if (status == NFS4ERR_DELAY)
+ status = 0;
+out:
+ return status;
+}
+
+/*
+ * Decode OPEN_CONFIRM response
+ */
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_open_confirmres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_open_confirm(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode OPEN response
+ */
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_open(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode SETATTR response
+ */
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_setattrres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_setattr(&xdr, res);
+ if (status)
+ goto out;
+ status = decode_getfattr(&xdr, res->fattr, res->server);
+ if (status == NFS4ERR_DELAY)
+ status = 0;
+out:
+ return status;
+}
+
+/*
+ * Decode LOCK response
+ */
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_lock(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode LOCKT response
+ */
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_lockt(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode LOCKU response
+ */
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_locku(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode READLINK response
+ */
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_readlink(&xdr, rqstp);
+out:
+ return status;
+}
+
+/*
+ * Decode READDIR response
+ */
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_readdir(&xdr, rqstp, res);
+out:
+ return status;
+}
+
+/*
+ * Decode Read response
+ */
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_readres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_read(&xdr, rqstp, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
+/*
+ * Decode WRITE response
+ */
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_write(&xdr, res);
+ if (!status)
+ status = res->count;
+out:
+ return status;
+}
+
+/*
+ * Decode COMMIT response
+ */
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_writeres *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_commit(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * FSINFO request
+ */
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_putfh(&xdr);
+ if (!status)
+ status = decode_fsinfo(&xdr, fsinfo);
+ if (!status)
+ status = -nfs_stat_to_errno(hdr.status);
+ return status;
+}
+
+/*
+ * PATHCONF request
+ */
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_putfh(&xdr);
+ if (!status)
+ status = decode_pathconf(&xdr, pathconf);
+ return status;
+}
+
+/*
+ * STATFS request
+ */
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_putfh(&xdr);
+ if (!status)
+ status = decode_statfs(&xdr, fsstat);
+ return status;
+}
+
+/*
+ * GETATTR_BITMAP request
+ */
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, uint32_t *p, struct nfs4_server_caps_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
+ goto out;
+ if ((status = decode_putfh(&xdr)) != 0)
+ goto out;
+ status = decode_server_caps(&xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode RENEW response
+ */
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_renew(&xdr);
+ return status;
+}
+
+/*
+ * a SETCLIENTID request
+ */
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
+ struct nfs4_client *clp)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_setclientid(&xdr, clp);
+ if (!status)
+ status = -nfs_stat_to_errno(hdr.status);
+ return status;
+}
+
+/*
+ * a SETCLIENTID_CONFIRM request
+ */
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (!status)
+ status = decode_setclientid_confirm(&xdr);
+ if (!status)
+ status = decode_putrootfh(&xdr);
+ if (!status)
+ status = decode_fsinfo(&xdr, fsinfo);
+ if (!status)
+ status = -nfs_stat_to_errno(hdr.status);
+ return status;
+}
+
+/*
+ * DELEGRETURN request
+ */
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status == 0) {
+ status = decode_putfh(&xdr);
+ if (status == 0)
+ status = decode_delegreturn(&xdr);
+ }
+ return status;
+}
+
+uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
+{
+ uint32_t bitmap[2] = {0};
+ uint32_t len;
+
+ if (!*p++) {
+ if (!*p)
+ return ERR_PTR(-EAGAIN);
+ entry->eof = 1;
+ return ERR_PTR(-EBADCOOKIE);
+ }
+
+ entry->prev_cookie = entry->cookie;
+ p = xdr_decode_hyper(p, &entry->cookie);
+ entry->len = ntohl(*p++);
+ entry->name = (const char *) p;
+ p += XDR_QUADLEN(entry->len);
+
+ /*
+ * In case the server doesn't return an inode number,
+ * we fake one here. (We don't use inode number 0,
+ * since glibc seems to choke on it...)
+ */
+ entry->ino = 1;
+
+ len = ntohl(*p++); /* bitmap length */
+ if (len-- > 0) {
+ bitmap[0] = ntohl(*p++);
+ if (len-- > 0) {
+ bitmap[1] = ntohl(*p++);
+ p += len;
+ }
+ }
+ len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */
+ if (len > 0) {
+ if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
+ xdr_decode_hyper(p, &entry->ino);
+ else if (bitmap[0] == FATTR4_WORD0_FILEID)
+ xdr_decode_hyper(p, &entry->ino);
+ p += len;
+ }
+
+ entry->eof = !p[0] && p[1];
+ return p;
+}
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, EPERM },
+ { NFS4ERR_NOENT, ENOENT },
+ { NFS4ERR_IO, errno_NFSERR_IO },
+ { NFS4ERR_NXIO, ENXIO },
+ { NFS4ERR_ACCESS, EACCES },
+ { NFS4ERR_EXIST, EEXIST },
+ { NFS4ERR_XDEV, EXDEV },
+ { NFS4ERR_NOTDIR, ENOTDIR },
+ { NFS4ERR_ISDIR, EISDIR },
+ { NFS4ERR_INVAL, EINVAL },
+ { NFS4ERR_FBIG, EFBIG },
+ { NFS4ERR_NOSPC, ENOSPC },
+ { NFS4ERR_ROFS, EROFS },
+ { NFS4ERR_MLINK, EMLINK },
+ { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, ENOTEMPTY },
+ { NFS4ERR_DQUOT, EDQUOT },
+ { NFS4ERR_STALE, ESTALE },
+ { NFS4ERR_BADHANDLE, EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, ENOTSUPP },
+ { NFS4ERR_TOOSMALL, ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, ESERVERFAULT },
+ { NFS4ERR_BADTYPE, EBADTYPE },
+ { NFS4ERR_LOCKED, EAGAIN },
+ { NFS4ERR_RESOURCE, EREMOTEIO },
+ { NFS4ERR_SYMLINK, ELOOP },
+ { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, EDEADLK },
+ { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs
+ * to be handled by a
+ * middle-layer.
+ */
+ { -1, EIO }
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used jointly by NFSv2 and NFSv3.
+ */
+static int
+nfs_stat_to_errno(int stat)
+{
+ int i;
+ for (i = 0; nfs_errtbl[i].stat != -1; i++) {
+ if (nfs_errtbl[i].stat == stat)
+ return nfs_errtbl[i].errno;
+ }
+ if (stat <= 10000 || stat > 10100) {
+ /* The server is looney tunes. */
+ return ESERVERFAULT;
+ }
+ /* If we cannot translate the error, the recovery routines should
+ * handle it.
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+ return stat;
+}
+
+#ifndef MAX
+# define MAX(a, b) (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, argtype, restype) \
+[NFSPROC4_CLNT_##proc] = { \
+ .p_proc = NFSPROC4_COMPOUND, \
+ .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
+ .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+ }
+
+struct rpc_procinfo nfs4_procedures[] = {
+ PROC(READ, enc_read, dec_read),
+ PROC(WRITE, enc_write, dec_write),
+ PROC(COMMIT, enc_commit, dec_commit),
+ PROC(OPEN, enc_open, dec_open),
+ PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
+ PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr),
+ PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
+ PROC(CLOSE, enc_close, dec_close),
+ PROC(SETATTR, enc_setattr, dec_setattr),
+ PROC(FSINFO, enc_fsinfo, dec_fsinfo),
+ PROC(RENEW, enc_renew, dec_renew),
+ PROC(SETCLIENTID, enc_setclientid, dec_setclientid),
+ PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm),
+ PROC(LOCK, enc_lock, dec_lock),
+ PROC(LOCKT, enc_lockt, dec_lockt),
+ PROC(LOCKU, enc_locku, dec_locku),
+ PROC(ACCESS, enc_access, dec_access),
+ PROC(GETATTR, enc_getattr, dec_getattr),
+ PROC(LOOKUP, enc_lookup, dec_lookup),
+ PROC(LOOKUP_ROOT, enc_lookup_root, dec_lookup_root),
+ PROC(REMOVE, enc_remove, dec_remove),
+ PROC(RENAME, enc_rename, dec_rename),
+ PROC(LINK, enc_link, dec_link),
+ PROC(SYMLINK, enc_symlink, dec_symlink),
+ PROC(CREATE, enc_create, dec_create),
+ PROC(PATHCONF, enc_pathconf, dec_pathconf),
+ PROC(STATFS, enc_statfs, dec_statfs),
+ PROC(READLINK, enc_readlink, dec_readlink),
+ PROC(READDIR, enc_readdir, dec_readdir),
+ PROC(SERVER_CAPS, enc_server_caps, dec_server_caps),
+ PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn),
+};
+
+struct rpc_version nfs_version4 = {
+ .number = 4,
+ .nrprocs = sizeof(nfs4_procedures)/sizeof(nfs4_procedures[0]),
+ .procs = nfs4_procedures
+};
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
new file mode 100644
index 000000000000..fd5bc596fe8a
--- /dev/null
+++ b/fs/nfs/nfsroot.c
@@ -0,0 +1,513 @@
+/*
+ * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
+ *
+ * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de>
+ *
+ * Allow an NFS filesystem to be mounted as root. The way this works is:
+ * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
+ * (2) Handle RPC negotiation with the system which replied to RARP or
+ * was reported as a boot server by BOOTP or manually.
+ * (3) The actual mounting is done later, when init() is running.
+ *
+ *
+ * Changes:
+ *
+ * Alan Cox : Removed get_address name clash with FPU.
+ * Alan Cox : Reformatted a bit.
+ * Gero Kuhlmann : Code cleanup
+ * Michael Rausch : Fixed recognition of an incoming RARP answer.
+ * Martin Mares : (2.0) Auto-configuration via BOOTP supported.
+ * Martin Mares : Manual selection of interface & BOOTP/RARP.
+ * Martin Mares : Using network routes instead of host routes,
+ * allowing the default configuration to be used
+ * for normal operation of the host.
+ * Martin Mares : Randomized timer with exponential backoff
+ * installed to minimize network congestion.
+ * Martin Mares : Code cleanup.
+ * Martin Mares : (2.1) BOOTP and RARP made configuration options.
+ * Martin Mares : Server hostname generation fixed.
+ * Gerd Knorr : Fixed wired inode handling
+ * Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored.
+ * Martin Mares : RARP replies not tested for server address.
+ * Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please
+ * send me your new patches _before_ bothering
+ * Linus so that I don' always have to cleanup
+ * _afterwards_ - thanks)
+ * Gero Kuhlmann : Last changes of Martin Mares undone.
+ * Gero Kuhlmann : RARP replies are tested for specified server
+ * again. However, it's now possible to have
+ * different RARP and NFS servers.
+ * Gero Kuhlmann : "0.0.0.0" addresses from command line are
+ * now mapped to INADDR_NONE.
+ * Gero Kuhlmann : Fixed a bug which prevented BOOTP path name
+ * from being used (thanks to Leo Spiekman)
+ * Andy Walker : Allow to specify the NFS server in nfs_root
+ * without giving a path name
+ * Swen Thümmler : Allow to specify the NFS options in nfs_root
+ * without giving a path name. Fix BOOTP request
+ * for domainname (domainname is NIS domain, not
+ * DNS domain!). Skip dummy devices for BOOTP.
+ * Jacek Zapala : Fixed a bug which prevented server-ip address
+ * from nfsroot parameter from being used.
+ * Olaf Kirch : Adapted to new NFS code.
+ * Jakub Jelinek : Free used code segment.
+ * Marko Kohtala : Fixed some bugs.
+ * Martin Mares : Debug message cleanup
+ * Martin Mares : Changed to use the new generic IP layer autoconfig
+ * code. BOOTP and RARP moved there.
+ * Martin Mares : Default path now contains host name instead of
+ * host IP address (but host name defaults to IP
+ * address anyway).
+ * Martin Mares : Use root_server_addr appropriately during setup.
+ * Martin Mares : Rewrote parameter parsing, now hopefully giving
+ * correct overriding.
+ * Trond Myklebust : Add in preliminary support for NFSv3 and TCP.
+ * Fix bug in root_nfs_addr(). nfs_data.namlen
+ * is NOT for the length of the hostname.
+ * Hua Qin : Support for mounting root file system via
+ * NFS over TCP.
+ * Fabian Frederick: Option parser rebuilt (using parser lib)
+*/
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/in.h>
+#include <linux/major.h>
+#include <linux/utsname.h>
+#include <linux/inet.h>
+#include <linux/root_dev.h>
+#include <net/ipconfig.h>
+#include <linux/parser.h>
+
+/* Define this to allow debugging output */
+#undef NFSROOT_DEBUG
+#define NFSDBG_FACILITY NFSDBG_ROOT
+
+/* Default path we try to mount. "%s" gets replaced by our IP address */
+#define NFS_ROOT "/tftpboot/%s"
+
+/* Parameters passed from the kernel command line */
+static char nfs_root_name[256] __initdata = "";
+
+/* Address of NFS server */
+static __u32 servaddr __initdata = 0;
+
+/* Name of directory to mount */
+static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
+
+/* NFS-related data */
+static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
+static int nfs_port __initdata = 0; /* Port to connect to for NFS */
+static int mount_port __initdata = 0; /* Mount daemon port number */
+
+
+/***************************************************************************
+
+ Parsing of options
+
+ ***************************************************************************/
+
+enum {
+ /* Options that take integer arguments */
+ Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
+ Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
+ /* Options that take no arguments */
+ Opt_soft, Opt_hard, Opt_intr,
+ Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
+ Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
+ /* Error token */
+ Opt_err
+};
+
+static match_table_t __initdata tokens = {
+ {Opt_port, "port=%u"},
+ {Opt_rsize, "rsize=%u"},
+ {Opt_wsize, "wsize=%u"},
+ {Opt_timeo, "timeo=%u"},
+ {Opt_retrans, "retrans=%u"},
+ {Opt_acregmin, "acregmin=%u"},
+ {Opt_acregmax, "acregmax=%u"},
+ {Opt_acdirmin, "acdirmin=%u"},
+ {Opt_acdirmax, "acdirmax=%u"},
+ {Opt_soft, "soft"},
+ {Opt_hard, "hard"},
+ {Opt_intr, "intr"},
+ {Opt_nointr, "nointr"},
+ {Opt_posix, "posix"},
+ {Opt_noposix, "noposix"},
+ {Opt_cto, "cto"},
+ {Opt_nocto, "nocto"},
+ {Opt_ac, "ac"},
+ {Opt_noac, "noac"},
+ {Opt_lock, "lock"},
+ {Opt_nolock, "nolock"},
+ {Opt_v2, "nfsvers=2"},
+ {Opt_v2, "v2"},
+ {Opt_v3, "nfsvers=3"},
+ {Opt_v3, "v3"},
+ {Opt_udp, "proto=udp"},
+ {Opt_udp, "udp"},
+ {Opt_tcp, "proto=tcp"},
+ {Opt_tcp, "tcp"},
+ {Opt_err, NULL}
+
+};
+
+/*
+ * Parse option string.
+ */
+
+static int __init root_nfs_parse(char *name, char *buf)
+{
+
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+
+ if (!name)
+ return 1;
+
+ /* Set the NFS remote path */
+ p = strsep(&name, ",");
+ if (p[0] != '\0' && strcmp(p, "default") != 0)
+ strlcpy(buf, p, NFS_MAXPATHLEN);
+
+ while ((p = strsep (&name, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+
+ /* %u tokens only. Beware if you add new tokens! */
+ if (token < Opt_soft && match_int(&args[0], &option))
+ return 0;
+ switch (token) {
+ case Opt_port:
+ nfs_port = option;
+ break;
+ case Opt_rsize:
+ nfs_data.rsize = option;
+ break;
+ case Opt_wsize:
+ nfs_data.wsize = option;
+ break;
+ case Opt_timeo:
+ nfs_data.timeo = option;
+ break;
+ case Opt_retrans:
+ nfs_data.retrans = option;
+ break;
+ case Opt_acregmin:
+ nfs_data.acregmin = option;
+ break;
+ case Opt_acregmax:
+ nfs_data.acregmax = option;
+ break;
+ case Opt_acdirmin:
+ nfs_data.acdirmin = option;
+ break;
+ case Opt_acdirmax:
+ nfs_data.acdirmax = option;
+ break;
+ case Opt_soft:
+ nfs_data.flags |= NFS_MOUNT_SOFT;
+ break;
+ case Opt_hard:
+ nfs_data.flags &= ~NFS_MOUNT_SOFT;
+ break;
+ case Opt_intr:
+ nfs_data.flags |= NFS_MOUNT_INTR;
+ break;
+ case Opt_nointr:
+ nfs_data.flags &= ~NFS_MOUNT_INTR;
+ break;
+ case Opt_posix:
+ nfs_data.flags |= NFS_MOUNT_POSIX;
+ break;
+ case Opt_noposix:
+ nfs_data.flags &= ~NFS_MOUNT_POSIX;
+ break;
+ case Opt_cto:
+ nfs_data.flags &= ~NFS_MOUNT_NOCTO;
+ break;
+ case Opt_nocto:
+ nfs_data.flags |= NFS_MOUNT_NOCTO;
+ break;
+ case Opt_ac:
+ nfs_data.flags &= ~NFS_MOUNT_NOAC;
+ break;
+ case Opt_noac:
+ nfs_data.flags |= NFS_MOUNT_NOAC;
+ break;
+ case Opt_lock:
+ nfs_data.flags &= ~NFS_MOUNT_NONLM;
+ break;
+ case Opt_nolock:
+ nfs_data.flags |= NFS_MOUNT_NONLM;
+ break;
+ case Opt_v2:
+ nfs_data.flags &= ~NFS_MOUNT_VER3;
+ break;
+ case Opt_v3:
+ nfs_data.flags |= NFS_MOUNT_VER3;
+ break;
+ case Opt_udp:
+ nfs_data.flags &= ~NFS_MOUNT_TCP;
+ break;
+ case Opt_tcp:
+ nfs_data.flags |= NFS_MOUNT_TCP;
+ break;
+ default :
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Prepare the NFS data structure and parse all options.
+ */
+static int __init root_nfs_name(char *name)
+{
+ static char buf[NFS_MAXPATHLEN] __initdata;
+ char *cp;
+
+ /* Set some default values */
+ memset(&nfs_data, 0, sizeof(nfs_data));
+ nfs_port = -1;
+ nfs_data.version = NFS_MOUNT_VERSION;
+ nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */
+ nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
+ nfs_data.acregmin = 3;
+ nfs_data.acregmax = 60;
+ nfs_data.acdirmin = 30;
+ nfs_data.acdirmax = 60;
+ strcpy(buf, NFS_ROOT);
+
+ /* Process options received from the remote server */
+ root_nfs_parse(root_server_path, buf);
+
+ /* Override them by options set on kernel command-line */
+ root_nfs_parse(name, buf);
+
+ cp = system_utsname.nodename;
+ if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+ printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+ return -1;
+ }
+ sprintf(nfs_path, buf, cp);
+
+ return 1;
+}
+
+
+/*
+ * Get NFS server address.
+ */
+static int __init root_nfs_addr(void)
+{
+ if ((servaddr = root_server_addr) == INADDR_NONE) {
+ printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
+ return -1;
+ }
+
+ snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
+ "%u.%u.%u.%u", NIPQUAD(servaddr));
+ return 0;
+}
+
+/*
+ * Tell the user what's going on.
+ */
+#ifdef NFSROOT_DEBUG
+static void __init root_nfs_print(void)
+{
+ printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
+ nfs_path, nfs_data.hostname);
+ printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
+ nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
+ printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
+ nfs_data.acregmin, nfs_data.acregmax,
+ nfs_data.acdirmin, nfs_data.acdirmax);
+ printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
+ nfs_port, mount_port, nfs_data.flags);
+}
+#endif
+
+
+static int __init root_nfs_init(void)
+{
+#ifdef NFSROOT_DEBUG
+ nfs_debug |= NFSDBG_ROOT;
+#endif
+
+ /*
+ * Decode the root directory path name and NFS options from
+ * the kernel command line. This has to go here in order to
+ * be able to use the client IP address for the remote root
+ * directory (necessary for pure RARP booting).
+ */
+ if (root_nfs_name(nfs_root_name) < 0 ||
+ root_nfs_addr() < 0)
+ return -1;
+
+#ifdef NFSROOT_DEBUG
+ root_nfs_print();
+#endif
+
+ return 0;
+}
+
+
+/*
+ * Parse NFS server and directory information passed on the kernel
+ * command line.
+ */
+static int __init nfs_root_setup(char *line)
+{
+ ROOT_DEV = Root_NFS;
+ if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
+ strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
+ } else {
+ int n = strlen(line) + sizeof(NFS_ROOT) - 1;
+ if (n >= sizeof(nfs_root_name))
+ line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
+ sprintf(nfs_root_name, NFS_ROOT, line);
+ }
+ root_server_addr = root_nfs_parse_addr(nfs_root_name);
+ return 1;
+}
+
+__setup("nfsroot=", nfs_root_setup);
+
+/***************************************************************************
+
+ Routines to actually mount the root directory
+
+ ***************************************************************************/
+
+/*
+ * Construct sockaddr_in from address and port number.
+ */
+static inline void
+set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
+{
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = addr;
+ sin->sin_port = port;
+}
+
+/*
+ * Query server portmapper for the port of a daemon program.
+ */
+static int __init root_nfs_getport(int program, int version, int proto)
+{
+ struct sockaddr_in sin;
+
+ printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
+ program, version, NIPQUAD(servaddr));
+ set_sockaddr(&sin, servaddr, 0);
+ return rpc_getport_external(&sin, program, version, proto);
+}
+
+
+/*
+ * Use portmapper to find mountd and nfsd port numbers if not overriden
+ * by the user. Use defaults if portmapper is not available.
+ * XXX: Is there any nfs server with no portmapper?
+ */
+static int __init root_nfs_ports(void)
+{
+ int port;
+ int nfsd_ver, mountd_ver;
+ int nfsd_port, mountd_port;
+ int proto;
+
+ if (nfs_data.flags & NFS_MOUNT_VER3) {
+ nfsd_ver = NFS3_VERSION;
+ mountd_ver = NFS_MNT3_VERSION;
+ nfsd_port = NFS_PORT;
+ mountd_port = NFS_MNT_PORT;
+ } else {
+ nfsd_ver = NFS2_VERSION;
+ mountd_ver = NFS_MNT_VERSION;
+ nfsd_port = NFS_PORT;
+ mountd_port = NFS_MNT_PORT;
+ }
+
+ proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+
+ if (nfs_port < 0) {
+ if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
+ printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
+ "number from server, using default\n");
+ port = nfsd_port;
+ }
+ nfs_port = htons(port);
+ dprintk("Root-NFS: Portmapper on server returned %d "
+ "as nfsd port\n", port);
+ }
+
+ if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
+ printk(KERN_ERR "Root-NFS: Unable to get mountd port "
+ "number from server, using default\n");
+ port = mountd_port;
+ }
+ mount_port = htons(port);
+ dprintk("Root-NFS: mountd port is %d\n", port);
+
+ return 0;
+}
+
+
+/*
+ * Get a file handle from the server for the directory which is to be
+ * mounted.
+ */
+static int __init root_nfs_get_handle(void)
+{
+ struct nfs_fh fh;
+ struct sockaddr_in sin;
+ int status;
+ int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
+ IPPROTO_TCP : IPPROTO_UDP;
+ int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
+ NFS_MNT3_VERSION : NFS_MNT_VERSION;
+
+ set_sockaddr(&sin, servaddr, mount_port);
+ status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+ if (status < 0)
+ printk(KERN_ERR "Root-NFS: Server returned error %d "
+ "while mounting %s\n", status, nfs_path);
+ else {
+ nfs_data.root.size = fh.size;
+ memcpy(nfs_data.root.data, fh.data, fh.size);
+ }
+
+ return status;
+}
+
+/*
+ * Get the NFS port numbers and file handle, and return the prepared 'data'
+ * argument for mount() if everything went OK. Return NULL otherwise.
+ */
+void * __init nfs_root_data(void)
+{
+ if (root_nfs_init() < 0
+ || root_nfs_ports() < 0
+ || root_nfs_get_handle() < 0)
+ return NULL;
+ set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
+ return (void*)&nfs_data;
+}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
new file mode 100644
index 000000000000..4f1ba723848d
--- /dev/null
+++ b/fs/nfs/pagelist.c
@@ -0,0 +1,309 @@
+/*
+ * linux/fs/nfs/pagelist.c
+ *
+ * A set of helper functions for managing NFS read and write requests.
+ * The main purpose of these routines is to provide support for the
+ * coalescing of several requests into a single RPC call.
+ *
+ * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_page.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+
+#define NFS_PARANOIA 1
+
+static kmem_cache_t *nfs_page_cachep;
+
+static inline struct nfs_page *
+nfs_page_alloc(void)
+{
+ struct nfs_page *p;
+ p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->wb_list);
+ }
+ return p;
+}
+
+static inline void
+nfs_page_free(struct nfs_page *p)
+{
+ kmem_cache_free(nfs_page_cachep, p);
+}
+
+/**
+ * nfs_create_request - Create an NFS read/write request.
+ * @file: file descriptor to use
+ * @inode: inode to which the request is attached
+ * @page: page to write
+ * @offset: starting offset within the page for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page, and avoids
+ * a possible deadlock when we reach the hard limit on the number
+ * of dirty pages.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *
+nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_page *req;
+
+ /* Deal with hard limits. */
+ for (;;) {
+ /* try to allocate the request struct */
+ req = nfs_page_alloc();
+ if (req != NULL)
+ break;
+
+ /* Try to free up at least one request in order to stay
+ * below the hard limit
+ */
+ if (signalled() && (server->flags & NFS_MOUNT_INTR))
+ return ERR_PTR(-ERESTARTSYS);
+ yield();
+ }
+
+ /* Initialize the request struct. Initially, we assume a
+ * long write-back delay. This will be adjusted in
+ * update_nfs_request below if the region is not locked. */
+ req->wb_page = page;
+ atomic_set(&req->wb_complete, 0);
+ req->wb_index = page->index;
+ page_cache_get(page);
+ req->wb_offset = offset;
+ req->wb_pgbase = offset;
+ req->wb_bytes = count;
+ atomic_set(&req->wb_count, 1);
+ req->wb_context = get_nfs_open_context(ctx);
+
+ return req;
+}
+
+/**
+ * nfs_unlock_request - Unlock request and wake up sleepers.
+ * @req:
+ */
+void nfs_unlock_request(struct nfs_page *req)
+{
+ if (!NFS_WBACK_BUSY(req)) {
+ printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+ BUG();
+ }
+ smp_mb__before_clear_bit();
+ clear_bit(PG_BUSY, &req->wb_flags);
+ smp_mb__after_clear_bit();
+ wake_up_all(&req->wb_context->waitq);
+ nfs_release_request(req);
+}
+
+/**
+ * nfs_clear_request - Free up all resources allocated to the request
+ * @req:
+ *
+ * Release page resources associated with a write request after it
+ * has completed.
+ */
+void nfs_clear_request(struct nfs_page *req)
+{
+ if (req->wb_page) {
+ page_cache_release(req->wb_page);
+ req->wb_page = NULL;
+ }
+}
+
+
+/**
+ * nfs_release_request - Release the count on an NFS read/write request
+ * @req: request to release
+ *
+ * Note: Should never be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+ if (!atomic_dec_and_test(&req->wb_count))
+ return;
+
+#ifdef NFS_PARANOIA
+ BUG_ON (!list_empty(&req->wb_list));
+ BUG_ON (NFS_WBACK_BUSY(req));
+#endif
+
+ /* Release struct file or cached credential */
+ nfs_clear_request(req);
+ put_nfs_open_context(req->wb_context);
+ nfs_page_free(req);
+}
+
+/**
+ * nfs_list_add_request - Insert a request into a sorted list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ *
+ * Note that the wb_list is sorted by page index in order to facilitate
+ * coalescing of requests.
+ * We use an insertion sort that is optimized for the case of appended
+ * writes.
+ */
+void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+ struct list_head *pos;
+
+#ifdef NFS_PARANOIA
+ if (!list_empty(&req->wb_list)) {
+ printk(KERN_ERR "NFS: Add to list failed!\n");
+ BUG();
+ }
+#endif
+ list_for_each_prev(pos, head) {
+ struct nfs_page *p = nfs_list_entry(pos);
+ if (p->wb_index < req->wb_index)
+ break;
+ }
+ list_add(&req->wb_list, pos);
+ req->wb_list_head = head;
+}
+
+/**
+ * nfs_wait_on_request - Wait for a request to complete.
+ * @req: request to wait upon.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ * The user is responsible for holding a count on the request.
+ */
+int
+nfs_wait_on_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_context->dentry->d_inode;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+
+ if (!NFS_WBACK_BUSY(req))
+ return 0;
+ return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req));
+}
+
+/**
+ * nfs_coalesce_requests - Split coalesced requests out from a list.
+ * @head: source list
+ * @dst: destination list
+ * @nmax: maximum number of requests to coalesce
+ *
+ * Moves a maximum of 'nmax' elements from one list to another.
+ * The elements are checked to ensure that they form a contiguous set
+ * of pages, and that the RPC credentials are the same.
+ */
+int
+nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
+ unsigned int nmax)
+{
+ struct nfs_page *req = NULL;
+ unsigned int npages = 0;
+
+ while (!list_empty(head)) {
+ struct nfs_page *prev = req;
+
+ req = nfs_list_entry(head->next);
+ if (prev) {
+ if (req->wb_context->cred != prev->wb_context->cred)
+ break;
+ if (req->wb_context->lockowner != prev->wb_context->lockowner)
+ break;
+ if (req->wb_context->state != prev->wb_context->state)
+ break;
+ if (req->wb_index != (prev->wb_index + 1))
+ break;
+
+ if (req->wb_pgbase != 0)
+ break;
+ }
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ npages++;
+ if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
+ break;
+ if (npages >= nmax)
+ break;
+ }
+ return npages;
+}
+
+/**
+ * nfs_scan_list - Scan a list for matching requests
+ * @head: One of the NFS inode request lists
+ * @dst: Destination list
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves elements from one of the inode request lists.
+ * If the number of requests is set to 0, the entire address_space
+ * starting at index idx_start, is scanned.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ * You must be holding the inode's req_lock when calling this function
+ */
+int
+nfs_scan_list(struct list_head *head, struct list_head *dst,
+ unsigned long idx_start, unsigned int npages)
+{
+ struct list_head *pos, *tmp;
+ struct nfs_page *req;
+ unsigned long idx_end;
+ int res;
+
+ res = 0;
+ if (npages == 0)
+ idx_end = ~0;
+ else
+ idx_end = idx_start + npages - 1;
+
+ list_for_each_safe(pos, tmp, head) {
+
+ req = nfs_list_entry(pos);
+
+ if (req->wb_index < idx_start)
+ continue;
+ if (req->wb_index > idx_end)
+ break;
+
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ res++;
+ }
+ return res;
+}
+
+int nfs_init_nfspagecache(void)
+{
+ nfs_page_cachep = kmem_cache_create("nfs_page",
+ sizeof(struct nfs_page),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_page_cachep == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+ if (kmem_cache_destroy(nfs_page_cachep))
+ printk(KERN_INFO "nfs_page: not all structures were freed\n");
+}
+
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
new file mode 100644
index 000000000000..d31b4d6e5a5e
--- /dev/null
+++ b/fs/nfs/proc.c
@@ -0,0 +1,655 @@
+/*
+ * linux/fs/nfs/proc.c
+ *
+ * Copyright (C) 1992, 1993, 1994 Rick Sladkey
+ *
+ * OS-independent nfs remote procedure call functions
+ *
+ * Tuned by Alan Cox <A.Cox@swansea.ac.uk> for >3K buffers
+ * so at last we can have decent(ish) throughput off a
+ * Sun server.
+ *
+ * Coding optimized and cleaned up by Florian La Roche.
+ * Note: Error returns are optimized for NFS_OK, which isn't translated via
+ * nfs_stat_to_errno(), but happens to be already the right return code.
+ *
+ * Also, the code currently doesn't check the size of the packet, when
+ * it decodes the packet.
+ *
+ * Feel free to fix it and mail me the diffs if it worries you.
+ *
+ * Completely rewritten to support the new RPC call interface;
+ * rewrote and moved the entire XDR stuff to xdr.c
+ * --Olaf Kirch June 1996
+ *
+ * The code below initializes all auto variables explicitly, otherwise
+ * it will fail to work as a module (gcc generates a memset call for an
+ * incomplete struct).
+ */
+
+#include <linux/types.h>
+#include <linux/param.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+
+#define NFSDBG_FACILITY NFSDBG_PROC
+
+extern struct rpc_procinfo nfs_procedures[];
+
+/*
+ * Bare-bones access to getattr: this is for nfs_read_super.
+ */
+static int
+nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ struct nfs_fattr *fattr = info->fattr;
+ struct nfs2_fsstat fsinfo;
+ int status;
+
+ dprintk("%s: call getattr\n", __FUNCTION__);
+ fattr->valid = 0;
+ status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+ dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
+ if (status)
+ return status;
+ dprintk("%s: call statfs\n", __FUNCTION__);
+ status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
+ if (status)
+ return status;
+ info->rtmax = NFS_MAXDATA;
+ info->rtpref = fsinfo.tsize;
+ info->rtmult = fsinfo.bsize;
+ info->wtmax = NFS_MAXDATA;
+ info->wtpref = fsinfo.tsize;
+ info->wtmult = fsinfo.bsize;
+ info->dtpref = fsinfo.tsize;
+ info->maxfilesize = 0x7FFFFFFF;
+ info->lease_time = 0;
+ return 0;
+}
+
+/*
+ * One function for each procedure in the NFS protocol.
+ */
+static int
+nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ int status;
+
+ dprintk("NFS call getattr\n");
+ fattr->valid = 0;
+ status = rpc_call(server->client, NFSPROC_GETATTR,
+ fhandle, fattr, 0);
+ dprintk("NFS reply getattr: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
+ struct iattr *sattr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct nfs_sattrargs arg = {
+ .fh = NFS_FH(inode),
+ .sattr = sattr
+ };
+ int status;
+
+ dprintk("NFS call setattr\n");
+ fattr->valid = 0;
+ status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+ dprintk("NFS reply setattr: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ struct nfs_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ struct nfs_diropok res = {
+ .fh = fhandle,
+ .fattr = fattr
+ };
+ int status;
+
+ dprintk("NFS call lookup %s\n", name->name);
+ fattr->valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+ dprintk("NFS reply lookup: %d\n", status);
+ return status;
+}
+
+static int nfs_proc_readlink(struct inode *inode, struct page *page,
+ unsigned int pgbase, unsigned int pglen)
+{
+ struct nfs_readlinkargs args = {
+ .fh = NFS_FH(inode),
+ .pgbase = pgbase,
+ .pglen = pglen,
+ .pages = &page
+ };
+ int status;
+
+ dprintk("NFS call readlink\n");
+ status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+ dprintk("NFS reply readlink: %d\n", status);
+ return status;
+}
+
+static int nfs_proc_read(struct nfs_read_data *rdata)
+{
+ int flags = rdata->flags;
+ struct inode * inode = rdata->inode;
+ struct nfs_fattr * fattr = rdata->res.fattr;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READ],
+ .rpc_argp = &rdata->args,
+ .rpc_resp = &rdata->res,
+ .rpc_cred = rdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
+ (long long) rdata->args.offset);
+ fattr->valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+ if (status >= 0) {
+ nfs_refresh_inode(inode, fattr);
+ /* Emulate the eof flag, which isn't normally needed in NFSv2
+ * as it is guaranteed to always return the file attributes
+ */
+ if (rdata->args.offset + rdata->args.count >= fattr->size)
+ rdata->res.eof = 1;
+ }
+ dprintk("NFS reply read: %d\n", status);
+ return status;
+}
+
+static int nfs_proc_write(struct nfs_write_data *wdata)
+{
+ int flags = wdata->flags;
+ struct inode * inode = wdata->inode;
+ struct nfs_fattr * fattr = wdata->res.fattr;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
+ .rpc_argp = &wdata->args,
+ .rpc_resp = &wdata->res,
+ .rpc_cred = wdata->cred,
+ };
+ int status;
+
+ dprintk("NFS call write %d @ %Ld\n", wdata->args.count,
+ (long long) wdata->args.offset);
+ fattr->valid = 0;
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+ if (status >= 0) {
+ nfs_refresh_inode(inode, fattr);
+ wdata->res.count = wdata->args.count;
+ wdata->verf.committed = NFS_FILE_SYNC;
+ }
+ dprintk("NFS reply write: %d\n", status);
+ return status < 0? status : wdata->res.count;
+}
+
+static int
+nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ int flags)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ struct nfs_createargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr
+ };
+ struct nfs_diropok res = {
+ .fh = &fhandle,
+ .fattr = &fattr
+ };
+ int status;
+
+ fattr.valid = 0;
+ dprintk("NFS call create %s\n", dentry->d_name.name);
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ dprintk("NFS reply create: %d\n", status);
+ return status;
+}
+
+/*
+ * In NFSv2, mknod is grafted onto the create call.
+ */
+static int
+nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ dev_t rdev)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ struct nfs_createargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr
+ };
+ struct nfs_diropok res = {
+ .fh = &fhandle,
+ .fattr = &fattr
+ };
+ int status, mode;
+
+ dprintk("NFS call mknod %s\n", dentry->d_name.name);
+
+ mode = sattr->ia_mode;
+ if (S_ISFIFO(mode)) {
+ sattr->ia_mode = (mode & ~S_IFMT) | S_IFCHR;
+ sattr->ia_valid &= ~ATTR_SIZE;
+ } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
+ sattr->ia_valid |= ATTR_SIZE;
+ sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+ }
+
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+
+ if (status == -EINVAL && S_ISFIFO(mode)) {
+ sattr->ia_mode = mode;
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+ }
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ dprintk("NFS reply mknod: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_remove(struct inode *dir, struct qstr *name)
+{
+ struct nfs_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
+ .rpc_argp = &arg,
+ .rpc_resp = NULL,
+ .rpc_cred = NULL
+ };
+ int status;
+
+ dprintk("NFS call remove %s\n", name->name);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ dprintk("NFS reply remove: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+{
+ struct nfs_diropargs *arg;
+
+ arg = (struct nfs_diropargs *)kmalloc(sizeof(*arg), GFP_KERNEL);
+ if (!arg)
+ return -ENOMEM;
+ arg->fh = NFS_FH(dir->d_inode);
+ arg->name = name->name;
+ arg->len = name->len;
+ msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
+ msg->rpc_argp = arg;
+ return 0;
+}
+
+static int
+nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+{
+ struct rpc_message *msg = &task->tk_msg;
+
+ if (msg->rpc_argp)
+ kfree(msg->rpc_argp);
+ return 0;
+}
+
+static int
+nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ struct inode *new_dir, struct qstr *new_name)
+{
+ struct nfs_renameargs arg = {
+ .fromfh = NFS_FH(old_dir),
+ .fromname = old_name->name,
+ .fromlen = old_name->len,
+ .tofh = NFS_FH(new_dir),
+ .toname = new_name->name,
+ .tolen = new_name->len
+ };
+ int status;
+
+ dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
+ status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+ dprintk("NFS reply rename: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+{
+ struct nfs_linkargs arg = {
+ .fromfh = NFS_FH(inode),
+ .tofh = NFS_FH(dir),
+ .toname = name->name,
+ .tolen = name->len
+ };
+ int status;
+
+ dprintk("NFS call link %s\n", name->name);
+ status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+ dprintk("NFS reply link: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
+ struct iattr *sattr, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_symlinkargs arg = {
+ .fromfh = NFS_FH(dir),
+ .fromname = name->name,
+ .fromlen = name->len,
+ .topath = path->name,
+ .tolen = path->len,
+ .sattr = sattr
+ };
+ int status;
+
+ if (path->len > NFS2_MAXPATHLEN)
+ return -ENAMETOOLONG;
+ dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
+ fattr->valid = 0;
+ fhandle->size = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+ dprintk("NFS reply symlink: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+{
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
+ struct nfs_createargs arg = {
+ .fh = NFS_FH(dir),
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len,
+ .sattr = sattr
+ };
+ struct nfs_diropok res = {
+ .fh = &fhandle,
+ .fattr = &fattr
+ };
+ int status;
+
+ dprintk("NFS call mkdir %s\n", dentry->d_name.name);
+ fattr.valid = 0;
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+ if (status == 0)
+ status = nfs_instantiate(dentry, &fhandle, &fattr);
+ dprintk("NFS reply mkdir: %d\n", status);
+ return status;
+}
+
+static int
+nfs_proc_rmdir(struct inode *dir, struct qstr *name)
+{
+ struct nfs_diropargs arg = {
+ .fh = NFS_FH(dir),
+ .name = name->name,
+ .len = name->len
+ };
+ int status;
+
+ dprintk("NFS call rmdir %s\n", name->name);
+ status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+ dprintk("NFS reply rmdir: %d\n", status);
+ return status;
+}
+
+/*
+ * The READDIR implementation is somewhat hackish - we pass a temporary
+ * buffer to the encode function, which installs it in the receive
+ * the receive iovec. The decode function just parses the reply to make
+ * sure it is syntactically correct; the entries itself are decoded
+ * from nfs_readdir by calling the decode_entry function directly.
+ */
+static int
+nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
+ u64 cookie, struct page *page, unsigned int count, int plus)
+{
+ struct inode *dir = dentry->d_inode;
+ struct nfs_readdirargs arg = {
+ .fh = NFS_FH(dir),
+ .cookie = cookie,
+ .count = count,
+ .pages = &page
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
+ .rpc_argp = &arg,
+ .rpc_resp = NULL,
+ .rpc_cred = cred
+ };
+ int status;
+
+ lock_kernel();
+
+ dprintk("NFS call readdir %d\n", (unsigned int)cookie);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ dprintk("NFS reply readdir: %d\n", status);
+ unlock_kernel();
+ return status;
+}
+
+static int
+nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsstat *stat)
+{
+ struct nfs2_fsstat fsinfo;
+ int status;
+
+ dprintk("NFS call statfs\n");
+ stat->fattr->valid = 0;
+ status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ dprintk("NFS reply statfs: %d\n", status);
+ if (status)
+ goto out;
+ stat->tbytes = (u64)fsinfo.blocks * fsinfo.bsize;
+ stat->fbytes = (u64)fsinfo.bfree * fsinfo.bsize;
+ stat->abytes = (u64)fsinfo.bavail * fsinfo.bsize;
+ stat->tfiles = 0;
+ stat->ffiles = 0;
+ stat->afiles = 0;
+out:
+ return status;
+}
+
+static int
+nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsinfo *info)
+{
+ struct nfs2_fsstat fsinfo;
+ int status;
+
+ dprintk("NFS call fsinfo\n");
+ info->fattr->valid = 0;
+ status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+ dprintk("NFS reply fsinfo: %d\n", status);
+ if (status)
+ goto out;
+ info->rtmax = NFS_MAXDATA;
+ info->rtpref = fsinfo.tsize;
+ info->rtmult = fsinfo.bsize;
+ info->wtmax = NFS_MAXDATA;
+ info->wtpref = fsinfo.tsize;
+ info->wtmult = fsinfo.bsize;
+ info->dtpref = fsinfo.tsize;
+ info->maxfilesize = 0x7FFFFFFF;
+ info->lease_time = 0;
+out:
+ return status;
+}
+
+static int
+nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_pathconf *info)
+{
+ info->max_link = 0;
+ info->max_namelen = NFS2_MAXNAMLEN;
+ return 0;
+}
+
+extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+
+static void
+nfs_read_done(struct rpc_task *task)
+{
+ struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+
+ if (task->tk_status >= 0) {
+ nfs_refresh_inode(data->inode, data->res.fattr);
+ /* Emulate the eof flag, which isn't normally needed in NFSv2
+ * as it is guaranteed to always return the file attributes
+ */
+ if (data->args.offset + data->args.count >= data->res.fattr->size)
+ data->res.eof = 1;
+ }
+ nfs_readpage_result(task);
+}
+
+static void
+nfs_proc_read_setup(struct nfs_read_data *data)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode = data->inode;
+ int flags;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_READ],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+
+ /* N.B. Do we need to test? Never called for swapfile inode */
+ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_write_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+ if (task->tk_status >= 0)
+ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_writeback_done(task);
+}
+
+static void
+nfs_proc_write_setup(struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode = data->inode;
+ int flags;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->cred,
+ };
+
+ /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
+ data->args.stable = NFS_FILE_SYNC;
+
+ /* Set the initial flags for the task. */
+ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+
+ /* Finalize the task. */
+ rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
+ rpc_call_setup(task, &msg, 0);
+}
+
+static void
+nfs_proc_commit_setup(struct nfs_write_data *data, int how)
+{
+ BUG();
+}
+
+static int
+nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
+}
+
+
+struct nfs_rpc_ops nfs_v2_clientops = {
+ .version = 2, /* protocol version */
+ .dentry_ops = &nfs_dentry_operations,
+ .dir_inode_ops = &nfs_dir_inode_operations,
+ .getroot = nfs_proc_get_root,
+ .getattr = nfs_proc_getattr,
+ .setattr = nfs_proc_setattr,
+ .lookup = nfs_proc_lookup,
+ .access = NULL, /* access */
+ .readlink = nfs_proc_readlink,
+ .read = nfs_proc_read,
+ .write = nfs_proc_write,
+ .commit = NULL, /* commit */
+ .create = nfs_proc_create,
+ .remove = nfs_proc_remove,
+ .unlink_setup = nfs_proc_unlink_setup,
+ .unlink_done = nfs_proc_unlink_done,
+ .rename = nfs_proc_rename,
+ .link = nfs_proc_link,
+ .symlink = nfs_proc_symlink,
+ .mkdir = nfs_proc_mkdir,
+ .rmdir = nfs_proc_rmdir,
+ .readdir = nfs_proc_readdir,
+ .mknod = nfs_proc_mknod,
+ .statfs = nfs_proc_statfs,
+ .fsinfo = nfs_proc_fsinfo,
+ .pathconf = nfs_proc_pathconf,
+ .decode_dirent = nfs_decode_dirent,
+ .read_setup = nfs_proc_read_setup,
+ .write_setup = nfs_proc_write_setup,
+ .commit_setup = nfs_proc_commit_setup,
+ .file_open = nfs_open,
+ .file_release = nfs_release,
+ .lock = nfs_proc_lock,
+};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
new file mode 100644
index 000000000000..a0042fb58634
--- /dev/null
+++ b/fs/nfs/read.c
@@ -0,0 +1,618 @@
+/*
+ * linux/fs/nfs/read.c
+ *
+ * Block I/O for NFS
+ *
+ * Partial copy of Linus' read cache modifications to fs/nfs/file.c
+ * modified for async RPC by okir@monad.swb.de
+ *
+ * We do an ugly hack here in order to return proper error codes to the
+ * user program when a read request failed: since generic_file_read
+ * only checks the return value of inode->i_op->readpage() which is always 0
+ * for async RPC, we set the error bit of the page to 1 when an error occurs,
+ * and make nfs_readpage transmit requests synchronously when encountering this.
+ * This is only a small problem, though, since we now retry all operations
+ * within the RPC code when root squashing is suspected.
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+
+#include <asm/system.h>
+
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
+static int nfs_pagein_one(struct list_head *, struct inode *);
+static void nfs_readpage_result_partial(struct nfs_read_data *, int);
+static void nfs_readpage_result_full(struct nfs_read_data *, int);
+
+static kmem_cache_t *nfs_rdata_cachep;
+mempool_t *nfs_rdata_mempool;
+
+#define MIN_POOL_READ (32)
+
+void nfs_readdata_release(struct rpc_task *task)
+{
+ struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+ nfs_readdata_free(data);
+}
+
+static
+unsigned int nfs_page_length(struct inode *inode, struct page *page)
+{
+ loff_t i_size = i_size_read(inode);
+ unsigned long idx;
+
+ if (i_size <= 0)
+ return 0;
+ idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
+ if (page->index > idx)
+ return 0;
+ if (page->index != idx)
+ return PAGE_CACHE_SIZE;
+ return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+}
+
+static
+int nfs_return_empty_page(struct page *page)
+{
+ memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+}
+
+/*
+ * Read a page synchronously.
+ */
+static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
+ struct page *page)
+{
+ unsigned int rsize = NFS_SERVER(inode)->rsize;
+ unsigned int count = PAGE_CACHE_SIZE;
+ int result;
+ struct nfs_read_data *rdata;
+
+ rdata = nfs_readdata_alloc();
+ if (!rdata)
+ return -ENOMEM;
+
+ memset(rdata, 0, sizeof(*rdata));
+ rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+ rdata->cred = ctx->cred;
+ rdata->inode = inode;
+ INIT_LIST_HEAD(&rdata->pages);
+ rdata->args.fh = NFS_FH(inode);
+ rdata->args.context = ctx;
+ rdata->args.pages = &page;
+ rdata->args.pgbase = 0UL;
+ rdata->args.count = rsize;
+ rdata->res.fattr = &rdata->fattr;
+
+ dprintk("NFS: nfs_readpage_sync(%p)\n", page);
+
+ /*
+ * This works now because the socket layer never tries to DMA
+ * into this buffer directly.
+ */
+ do {
+ if (count < rsize)
+ rdata->args.count = count;
+ rdata->res.count = rdata->args.count;
+ rdata->args.offset = page_offset(page) + rdata->args.pgbase;
+
+ dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
+ NFS_SERVER(inode)->hostname,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ (unsigned long long)rdata->args.pgbase,
+ rdata->args.count);
+
+ lock_kernel();
+ result = NFS_PROTO(inode)->read(rdata);
+ unlock_kernel();
+
+ /*
+ * Even if we had a partial success we can't mark the page
+ * cache valid.
+ */
+ if (result < 0) {
+ if (result == -EISDIR)
+ result = -EINVAL;
+ goto io_error;
+ }
+ count -= result;
+ rdata->args.pgbase += result;
+ /* Note: result == 0 should only happen if we're caching
+ * a write that extends the file and punches a hole.
+ */
+ if (rdata->res.eof != 0 || result == 0)
+ break;
+ } while (count);
+ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+
+ if (count)
+ memclear_highpage_flush(page, rdata->args.pgbase, count);
+ SetPageUptodate(page);
+ if (PageError(page))
+ ClearPageError(page);
+ result = 0;
+
+io_error:
+ unlock_page(page);
+ nfs_readdata_free(rdata);
+ return result;
+}
+
+static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
+ struct page *page)
+{
+ LIST_HEAD(one_request);
+ struct nfs_page *new;
+ unsigned int len;
+
+ len = nfs_page_length(inode, page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
+ new = nfs_create_request(ctx, inode, page, 0, len);
+ if (IS_ERR(new)) {
+ unlock_page(page);
+ return PTR_ERR(new);
+ }
+ if (len < PAGE_CACHE_SIZE)
+ memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+
+ nfs_lock_request(new);
+ nfs_list_add_request(new, &one_request);
+ nfs_pagein_one(&one_request, inode);
+ return 0;
+}
+
+static void nfs_readpage_release(struct nfs_page *req)
+{
+ unlock_page(req->wb_page);
+
+ nfs_clear_request(req);
+ nfs_release_request(req);
+ nfs_unlock_request(req);
+
+ dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+}
+
+/*
+ * Set up the NFS read request struct
+ */
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ unsigned int count, unsigned int offset)
+{
+ struct inode *inode;
+
+ data->req = req;
+ data->inode = inode = req->wb_context->dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+ data->args.offset = req_offset(req) + offset;
+ data->args.pgbase = req->wb_pgbase + offset;
+ data->args.pages = data->pagevec;
+ data->args.count = count;
+ data->args.context = req->wb_context;
+
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.eof = 0;
+
+ NFS_PROTO(inode)->read_setup(data);
+
+ data->task.tk_cookie = (unsigned long)inode;
+ data->task.tk_calldata = data;
+ /* Release requests */
+ data->task.tk_release = nfs_readdata_release;
+
+ dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ count,
+ (unsigned long long)data->args.offset);
+}
+
+static void
+nfs_async_read_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ SetPageError(req->wb_page);
+ nfs_readpage_release(req);
+ }
+}
+
+/*
+ * Start an async read operation
+ */
+static void nfs_execute_read(struct nfs_read_data *data)
+{
+ struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+ sigset_t oldset;
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+ rpc_clnt_sigunmask(clnt, &oldset);
+}
+
+/*
+ * Generate multiple requests to fill a single page.
+ *
+ * We optimize to reduce the number of read operations on the wire. If we
+ * detect that we're reading a page, or an area of a page, that is past the
+ * end of file, we do not generate NFS read operations but just clear the
+ * parts of the page that would have come back zero from the server anyway.
+ *
+ * We rely on the cached value of i_size to make this determination; another
+ * client can fill pages on the server past our cached end-of-file, but we
+ * won't see the new data until our attribute cache is updated. This is more
+ * or less conventional NFS client behavior.
+ */
+static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
+{
+ struct nfs_page *req = nfs_list_entry(head->next);
+ struct page *page = req->wb_page;
+ struct nfs_read_data *data;
+ unsigned int rsize = NFS_SERVER(inode)->rsize;
+ unsigned int nbytes, offset;
+ int requests = 0;
+ LIST_HEAD(list);
+
+ nfs_list_remove_request(req);
+
+ nbytes = req->wb_bytes;
+ for(;;) {
+ data = nfs_readdata_alloc();
+ if (!data)
+ goto out_bad;
+ INIT_LIST_HEAD(&data->pages);
+ list_add(&data->pages, &list);
+ requests++;
+ if (nbytes <= rsize)
+ break;
+ nbytes -= rsize;
+ }
+ atomic_set(&req->wb_complete, requests);
+
+ ClearPageError(page);
+ offset = 0;
+ nbytes = req->wb_bytes;
+ do {
+ data = list_entry(list.next, struct nfs_read_data, pages);
+ list_del_init(&data->pages);
+
+ data->pagevec[0] = page;
+ data->complete = nfs_readpage_result_partial;
+
+ if (nbytes > rsize) {
+ nfs_read_rpcsetup(req, data, rsize, offset);
+ offset += rsize;
+ nbytes -= rsize;
+ } else {
+ nfs_read_rpcsetup(req, data, nbytes, offset);
+ nbytes = 0;
+ }
+ nfs_execute_read(data);
+ } while (nbytes != 0);
+
+ return 0;
+
+out_bad:
+ while (!list_empty(&list)) {
+ data = list_entry(list.next, struct nfs_read_data, pages);
+ list_del(&data->pages);
+ nfs_readdata_free(data);
+ }
+ SetPageError(page);
+ nfs_readpage_release(req);
+ return -ENOMEM;
+}
+
+static int nfs_pagein_one(struct list_head *head, struct inode *inode)
+{
+ struct nfs_page *req;
+ struct page **pages;
+ struct nfs_read_data *data;
+ unsigned int count;
+
+ if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(head, inode);
+
+ data = nfs_readdata_alloc();
+ if (!data)
+ goto out_bad;
+
+ INIT_LIST_HEAD(&data->pages);
+ pages = data->pagevec;
+ count = 0;
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ ClearPageError(req->wb_page);
+ *pages++ = req->wb_page;
+ count += req->wb_bytes;
+ }
+ req = nfs_list_entry(data->pages.next);
+
+ data->complete = nfs_readpage_result_full;
+ nfs_read_rpcsetup(req, data, count, 0);
+
+ nfs_execute_read(data);
+ return 0;
+out_bad:
+ nfs_async_read_error(head);
+ return -ENOMEM;
+}
+
+static int
+nfs_pagein_list(struct list_head *head, int rpages)
+{
+ LIST_HEAD(one_request);
+ struct nfs_page *req;
+ int error = 0;
+ unsigned int pages = 0;
+
+ while (!list_empty(head)) {
+ pages += nfs_coalesce_requests(head, &one_request, rpages);
+ req = nfs_list_entry(one_request.next);
+ error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
+ if (error < 0)
+ break;
+ }
+ if (error >= 0)
+ return pages;
+
+ nfs_async_read_error(head);
+ return error;
+}
+
+/*
+ * Handle a read reply that fills part of a page.
+ */
+static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+{
+ struct nfs_page *req = data->req;
+ struct page *page = req->wb_page;
+
+ if (status >= 0) {
+ unsigned int request = data->args.count;
+ unsigned int result = data->res.count;
+
+ if (result < request) {
+ memclear_highpage_flush(page,
+ data->args.pgbase + result,
+ request - result);
+ }
+ } else
+ SetPageError(page);
+
+ if (atomic_dec_and_test(&req->wb_complete)) {
+ if (!PageError(page))
+ SetPageUptodate(page);
+ nfs_readpage_release(req);
+ }
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+{
+ unsigned int count = data->res.count;
+
+ while (!list_empty(&data->pages)) {
+ struct nfs_page *req = nfs_list_entry(data->pages.next);
+ struct page *page = req->wb_page;
+ nfs_list_remove_request(req);
+
+ if (status >= 0) {
+ if (count < PAGE_CACHE_SIZE) {
+ if (count < req->wb_bytes)
+ memclear_highpage_flush(page,
+ req->wb_pgbase + count,
+ req->wb_bytes - count);
+ count = 0;
+ } else
+ count -= PAGE_CACHE_SIZE;
+ SetPageUptodate(page);
+ } else
+ SetPageError(page);
+ nfs_readpage_release(req);
+ }
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+void nfs_readpage_result(struct rpc_task *task)
+{
+ struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+ struct nfs_readargs *argp = &data->args;
+ struct nfs_readres *resp = &data->res;
+ int status = task->tk_status;
+
+ dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
+ task->tk_pid, status);
+
+ /* Is this a short read? */
+ if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+ /* Has the server at least made some progress? */
+ if (resp->count != 0) {
+ /* Yes, so retry the read at the end of the data */
+ argp->offset += resp->count;
+ argp->pgbase += resp->count;
+ argp->count -= resp->count;
+ rpc_restart_call(task);
+ return;
+ }
+ task->tk_status = -EIO;
+ }
+ NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+ data->complete(data, status);
+}
+
+/*
+ * Read a page over NFS.
+ * We read the page synchronously in the following case:
+ * - The error flag is set for this page. This happens only when a
+ * previous async read operation failed.
+ */
+int nfs_readpage(struct file *file, struct page *page)
+{
+ struct nfs_open_context *ctx;
+ struct inode *inode = page->mapping->host;
+ int error;
+
+ dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
+ page, PAGE_CACHE_SIZE, page->index);
+ /*
+ * Try to flush any pending writes to the file..
+ *
+ * NOTE! Because we own the page lock, there cannot
+ * be any new pending writes generated at this point
+ * for this page (other pages can be written to).
+ */
+ error = nfs_wb_page(inode, page);
+ if (error)
+ goto out_error;
+
+ if (file == NULL) {
+ ctx = nfs_find_open_context(inode, FMODE_READ);
+ if (ctx == NULL)
+ return -EBADF;
+ } else
+ ctx = get_nfs_open_context((struct nfs_open_context *)
+ file->private_data);
+ if (!IS_SYNC(inode)) {
+ error = nfs_readpage_async(ctx, inode, page);
+ goto out;
+ }
+
+ error = nfs_readpage_sync(ctx, inode, page);
+ if (error < 0 && IS_SWAPFILE(inode))
+ printk("Aiee.. nfs swap-in of page failed!\n");
+out:
+ put_nfs_open_context(ctx);
+ return error;
+
+out_error:
+ unlock_page(page);
+ return error;
+}
+
+struct nfs_readdesc {
+ struct list_head *head;
+ struct nfs_open_context *ctx;
+};
+
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+ struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *new;
+ unsigned int len;
+
+ nfs_wb_page(inode, page);
+ len = nfs_page_length(inode, page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
+ new = nfs_create_request(desc->ctx, inode, page, 0, len);
+ if (IS_ERR(new)) {
+ SetPageError(page);
+ unlock_page(page);
+ return PTR_ERR(new);
+ }
+ if (len < PAGE_CACHE_SIZE)
+ memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
+ nfs_lock_request(new);
+ nfs_list_add_request(new, desc->head);
+ return 0;
+}
+
+int nfs_readpages(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ LIST_HEAD(head);
+ struct nfs_readdesc desc = {
+ .head = &head,
+ };
+ struct inode *inode = mapping->host;
+ struct nfs_server *server = NFS_SERVER(inode);
+ int ret;
+
+ dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ nr_pages);
+
+ if (filp == NULL) {
+ desc.ctx = nfs_find_open_context(inode, FMODE_READ);
+ if (desc.ctx == NULL)
+ return -EBADF;
+ } else
+ desc.ctx = get_nfs_open_context((struct nfs_open_context *)
+ filp->private_data);
+ ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ if (!list_empty(&head)) {
+ int err = nfs_pagein_list(&head, server->rpages);
+ if (!ret)
+ ret = err;
+ }
+ put_nfs_open_context(desc.ctx);
+ return ret;
+}
+
+int nfs_init_readpagecache(void)
+{
+ nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
+ sizeof(struct nfs_read_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_rdata_cachep == NULL)
+ return -ENOMEM;
+
+ nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
+ mempool_alloc_slab,
+ mempool_free_slab,
+ nfs_rdata_cachep);
+ if (nfs_rdata_mempool == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_readpagecache(void)
+{
+ mempool_destroy(nfs_rdata_mempool);
+ if (kmem_cache_destroy(nfs_rdata_cachep))
+ printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
+}
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
new file mode 100644
index 000000000000..35f106599144
--- /dev/null
+++ b/fs/nfs/symlink.c
@@ -0,0 +1,117 @@
+/*
+ * linux/fs/nfs/symlink.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ *
+ * Optimization changes Copyright (C) 1994 Florian La Roche
+ *
+ * Jun 7 1999, cache symlink lookups in the page cache. -DaveM
+ *
+ * nfs symlink handling code
+ */
+
+#define NFS_NEED_XDR_TYPES
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs_fs.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/namei.h>
+
+/* Symlink caching in the page cache is even more simplistic
+ * and straight-forward than readdir caching.
+ *
+ * At the beginning of the page we store pointer to struct page in question,
+ * simplifying nfs_put_link() (if inode got invalidated we can't find the page
+ * to be freed via pagecache lookup).
+ * The NUL-terminated string follows immediately thereafter.
+ */
+
+struct nfs_symlink {
+ struct page *page;
+ char body[0];
+};
+
+static int nfs_symlink_filler(struct inode *inode, struct page *page)
+{
+ const unsigned int pgbase = offsetof(struct nfs_symlink, body);
+ const unsigned int pglen = PAGE_SIZE - pgbase;
+ int error;
+
+ lock_kernel();
+ error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen);
+ unlock_kernel();
+ if (error < 0)
+ goto error;
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+
+error:
+ SetPageError(page);
+ unlock_page(page);
+ return -EIO;
+}
+
+static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct inode *inode = dentry->d_inode;
+ struct page *page;
+ struct nfs_symlink *p;
+ void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
+ if (err)
+ goto read_failed;
+ page = read_cache_page(&inode->i_data, 0,
+ (filler_t *)nfs_symlink_filler, inode);
+ if (IS_ERR(page)) {
+ err = page;
+ goto read_failed;
+ }
+ if (!PageUptodate(page)) {
+ err = ERR_PTR(-EIO);
+ goto getlink_read_error;
+ }
+ p = kmap(page);
+ p->page = page;
+ nd_set_link(nd, p->body);
+ return 0;
+
+getlink_read_error:
+ page_cache_release(page);
+read_failed:
+ nd_set_link(nd, err);
+ return 0;
+}
+
+static void nfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s)) {
+ struct nfs_symlink *p;
+ struct page *page;
+
+ p = container_of(s, struct nfs_symlink, body[0]);
+ page = p->page;
+
+ kunmap(page);
+ page_cache_release(page);
+ }
+}
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations nfs_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = nfs_follow_link,
+ .put_link = nfs_put_link,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
new file mode 100644
index 000000000000..f732541a3332
--- /dev/null
+++ b/fs/nfs/unlink.c
@@ -0,0 +1,227 @@
+/*
+ * linux/fs/nfs/unlink.c
+ *
+ * nfs sillydelete handling
+ *
+ * NOTE: we rely on holding the BKL for list manipulation protection.
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dcache.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+
+
+struct nfs_unlinkdata {
+ struct nfs_unlinkdata *next;
+ struct dentry *dir, *dentry;
+ struct qstr name;
+ struct rpc_task task;
+ struct rpc_cred *cred;
+ unsigned int count;
+};
+
+static struct nfs_unlinkdata *nfs_deletes;
+static RPC_WAITQ(nfs_delete_queue, "nfs_delete_queue");
+
+/**
+ * nfs_detach_unlinkdata - Remove asynchronous unlink from global list
+ * @data: pointer to descriptor
+ */
+static inline void
+nfs_detach_unlinkdata(struct nfs_unlinkdata *data)
+{
+ struct nfs_unlinkdata **q;
+
+ for (q = &nfs_deletes; *q != NULL; q = &((*q)->next)) {
+ if (*q == data) {
+ *q = data->next;
+ break;
+ }
+ }
+}
+
+/**
+ * nfs_put_unlinkdata - release data from a sillydelete operation.
+ * @data: pointer to unlink structure.
+ */
+static void
+nfs_put_unlinkdata(struct nfs_unlinkdata *data)
+{
+ if (--data->count == 0) {
+ nfs_detach_unlinkdata(data);
+ if (data->name.name != NULL)
+ kfree(data->name.name);
+ kfree(data);
+ }
+}
+
+#define NAME_ALLOC_LEN(len) ((len+16) & ~15)
+/**
+ * nfs_copy_dname - copy dentry name to data structure
+ * @dentry: pointer to dentry
+ * @data: nfs_unlinkdata
+ */
+static inline void
+nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+ char *str;
+ int len = dentry->d_name.len;
+
+ str = kmalloc(NAME_ALLOC_LEN(len), GFP_KERNEL);
+ if (!str)
+ return;
+ memcpy(str, dentry->d_name.name, len);
+ if (!data->name.len) {
+ data->name.len = len;
+ data->name.name = str;
+ } else
+ kfree(str);
+}
+
+/**
+ * nfs_async_unlink_init - Initialize the RPC info
+ * @task: rpc_task of the sillydelete
+ *
+ * We delay initializing RPC info until after the call to dentry_iput()
+ * in order to minimize races against rename().
+ */
+static void
+nfs_async_unlink_init(struct rpc_task *task)
+{
+ struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
+ struct dentry *dir = data->dir;
+ struct rpc_message msg = {
+ .rpc_cred = data->cred,
+ };
+ int status = -ENOENT;
+
+ if (!data->name.len)
+ goto out_err;
+
+ status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
+ if (status < 0)
+ goto out_err;
+ nfs_begin_data_update(dir->d_inode);
+ rpc_call_setup(task, &msg, 0);
+ return;
+ out_err:
+ rpc_exit(task, status);
+}
+
+/**
+ * nfs_async_unlink_done - Sillydelete post-processing
+ * @task: rpc_task of the sillydelete
+ *
+ * Do the directory attribute update.
+ */
+static void
+nfs_async_unlink_done(struct rpc_task *task)
+{
+ struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
+ struct dentry *dir = data->dir;
+ struct inode *dir_i;
+
+ if (!dir)
+ return;
+ dir_i = dir->d_inode;
+ nfs_end_data_update(dir_i);
+ if (NFS_PROTO(dir_i)->unlink_done(dir, task))
+ return;
+ put_rpccred(data->cred);
+ data->cred = NULL;
+ dput(dir);
+}
+
+/**
+ * nfs_async_unlink_release - Release the sillydelete data.
+ * @task: rpc_task of the sillydelete
+ *
+ * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
+ * rpc_task would be freed too.
+ */
+static void
+nfs_async_unlink_release(struct rpc_task *task)
+{
+ struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata;
+ nfs_put_unlinkdata(data);
+}
+
+/**
+ * nfs_async_unlink - asynchronous unlinking of a file
+ * @dentry: dentry to unlink
+ */
+int
+nfs_async_unlink(struct dentry *dentry)
+{
+ struct dentry *dir = dentry->d_parent;
+ struct nfs_unlinkdata *data;
+ struct rpc_task *task;
+ struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
+ int status = -ENOMEM;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+ memset(data, 0, sizeof(*data));
+
+ data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
+ if (IS_ERR(data->cred)) {
+ status = PTR_ERR(data->cred);
+ goto out_free;
+ }
+ data->dir = dget(dir);
+ data->dentry = dentry;
+
+ data->next = nfs_deletes;
+ nfs_deletes = data;
+ data->count = 1;
+
+ task = &data->task;
+ rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
+ task->tk_calldata = data;
+ task->tk_action = nfs_async_unlink_init;
+ task->tk_release = nfs_async_unlink_release;
+
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_NFSFS_RENAMED;
+ spin_unlock(&dentry->d_lock);
+
+ rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
+ status = 0;
+ out:
+ return status;
+out_free:
+ kfree(data);
+ return status;
+}
+
+/**
+ * nfs_complete_unlink - Initialize completion of the sillydelete
+ * @dentry: dentry to delete
+ *
+ * Since we're most likely to be called by dentry_iput(), we
+ * only use the dentry to find the sillydelete. We then copy the name
+ * into the qstr.
+ */
+void
+nfs_complete_unlink(struct dentry *dentry)
+{
+ struct nfs_unlinkdata *data;
+
+ for(data = nfs_deletes; data != NULL; data = data->next) {
+ if (dentry == data->dentry)
+ break;
+ }
+ if (!data)
+ return;
+ data->count++;
+ nfs_copy_dname(dentry, data);
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+ spin_unlock(&dentry->d_lock);
+ rpc_wake_up_task(&data->task);
+ nfs_put_unlinkdata(data);
+}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
new file mode 100644
index 000000000000..6f7a4af3bc46
--- /dev/null
+++ b/fs/nfs/write.c
@@ -0,0 +1,1431 @@
+/*
+ * linux/fs/nfs/write.c
+ *
+ * Writing file data over NFS.
+ *
+ * We do it like this: When a (user) process wishes to write data to an
+ * NFS file, a write request is allocated that contains the RPC task data
+ * plus some info on the page to be written, and added to the inode's
+ * write chain. If the process writes past the end of the page, an async
+ * RPC call to write the page is scheduled immediately; otherwise, the call
+ * is delayed for a few seconds.
+ *
+ * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
+ *
+ * Write requests are kept on the inode's writeback list. Each entry in
+ * that list references the page (portion) to be written. When the
+ * cache timeout has expired, the RPC task is woken up, and tries to
+ * lock the page. As soon as it manages to do so, the request is moved
+ * from the writeback list to the writelock list.
+ *
+ * Note: we must make sure never to confuse the inode passed in the
+ * write_page request with the one in page->inode. As far as I understand
+ * it, these are different when doing a swap-out.
+ *
+ * To understand everything that goes on here and in the NFS read code,
+ * one should be aware that a page is locked in exactly one of the following
+ * cases:
+ *
+ * - A write request is in progress.
+ * - A user process is in generic_file_write/nfs_update_page
+ * - A user process is in generic_file_read
+ *
+ * Also note that because of the way pages are invalidated in
+ * nfs_revalidate_inode, the following assertions hold:
+ *
+ * - If a page is dirty, there will be no read requests (a page will
+ * not be re-read unless invalidated by nfs_revalidate_inode).
+ * - If the page is not uptodate, there will be no pending write
+ * requests, and no process will be in nfs_update_page.
+ *
+ * FIXME: Interaction with the vmscan routines is not optimal yet.
+ * Either vmscan must be made nfs-savvy, or we need a different page
+ * reclaim concept that supports something like FS-independent
+ * buffer_heads with a b_ops-> field.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs_page.h>
+#include <asm/uaccess.h>
+#include <linux/smp_lock.h>
+
+#include "delegation.h"
+
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
+#define MIN_POOL_WRITE (32)
+#define MIN_POOL_COMMIT (4)
+
+/*
+ * Local function declarations
+ */
+static struct nfs_page * nfs_update_request(struct nfs_open_context*,
+ struct inode *,
+ struct page *,
+ unsigned int, unsigned int);
+static void nfs_writeback_done_partial(struct nfs_write_data *, int);
+static void nfs_writeback_done_full(struct nfs_write_data *, int);
+static int nfs_wait_on_write_congestion(struct address_space *, int);
+static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
+static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how);
+
+static kmem_cache_t *nfs_wdata_cachep;
+mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_commit_mempool;
+
+static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
+
+static inline struct nfs_write_data *nfs_commit_alloc(void)
+{
+ struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ }
+ return p;
+}
+
+static inline void nfs_commit_free(struct nfs_write_data *p)
+{
+ mempool_free(p, nfs_commit_mempool);
+}
+
+static void nfs_writedata_release(struct rpc_task *task)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
+ nfs_writedata_free(wdata);
+}
+
+/* Adjust the file length if we're writing beyond the end */
+static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
+{
+ struct inode *inode = page->mapping->host;
+ loff_t end, i_size = i_size_read(inode);
+ unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+
+ if (i_size > 0 && page->index < end_index)
+ return;
+ end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
+ if (i_size >= end)
+ return;
+ i_size_write(inode, end);
+}
+
+/* We can set the PG_uptodate flag if we see that a write request
+ * covers the full page.
+ */
+static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+{
+ loff_t end_offs;
+
+ if (PageUptodate(page))
+ return;
+ if (base != 0)
+ return;
+ if (count == PAGE_CACHE_SIZE) {
+ SetPageUptodate(page);
+ return;
+ }
+
+ end_offs = i_size_read(page->mapping->host) - 1;
+ if (end_offs < 0)
+ return;
+ /* Is this the last page? */
+ if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
+ return;
+ /* This is the last page: set PG_uptodate if we cover the entire
+ * extent of the data, then zero the rest of the page.
+ */
+ if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
+ memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
+ SetPageUptodate(page);
+ }
+}
+
+/*
+ * Write a page synchronously.
+ * Offset is the data offset within the page.
+ */
+static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
+ struct page *page, unsigned int offset, unsigned int count,
+ int how)
+{
+ unsigned int wsize = NFS_SERVER(inode)->wsize;
+ int result, written = 0;
+ struct nfs_write_data *wdata;
+
+ wdata = nfs_writedata_alloc();
+ if (!wdata)
+ return -ENOMEM;
+
+ wdata->flags = how;
+ wdata->cred = ctx->cred;
+ wdata->inode = inode;
+ wdata->args.fh = NFS_FH(inode);
+ wdata->args.context = ctx;
+ wdata->args.pages = &page;
+ wdata->args.stable = NFS_FILE_SYNC;
+ wdata->args.pgbase = offset;
+ wdata->args.count = wsize;
+ wdata->res.fattr = &wdata->fattr;
+ wdata->res.verf = &wdata->verf;
+
+ dprintk("NFS: nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ count, (long long)(page_offset(page) + offset));
+
+ nfs_begin_data_update(inode);
+ do {
+ if (count < wsize)
+ wdata->args.count = count;
+ wdata->args.offset = page_offset(page) + wdata->args.pgbase;
+
+ result = NFS_PROTO(inode)->write(wdata);
+
+ if (result < 0) {
+ /* Must mark the page invalid after I/O error */
+ ClearPageUptodate(page);
+ goto io_error;
+ }
+ if (result < wdata->args.count)
+ printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
+ wdata->args.count, result);
+
+ wdata->args.offset += result;
+ wdata->args.pgbase += result;
+ written += result;
+ count -= result;
+ } while (count);
+ /* Update file length */
+ nfs_grow_file(page, offset, written);
+ /* Set the PG_uptodate flag? */
+ nfs_mark_uptodate(page, offset, written);
+
+ if (PageError(page))
+ ClearPageError(page);
+
+io_error:
+ nfs_end_data_update_defer(inode);
+ nfs_writedata_free(wdata);
+ return written ? written : result;
+}
+
+static int nfs_writepage_async(struct nfs_open_context *ctx,
+ struct inode *inode, struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_page *req;
+ int status;
+
+ req = nfs_update_request(ctx, inode, page, offset, count);
+ status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+ if (status < 0)
+ goto out;
+ /* Update file length */
+ nfs_grow_file(page, offset, count);
+ /* Set the PG_uptodate flag? */
+ nfs_mark_uptodate(page, offset, count);
+ nfs_unlock_request(req);
+ out:
+ return status;
+}
+
+static int wb_priority(struct writeback_control *wbc)
+{
+ if (wbc->for_reclaim)
+ return FLUSH_HIGHPRI;
+ if (wbc->for_kupdate)
+ return FLUSH_LOWPRI;
+ return 0;
+}
+
+/*
+ * Write an mmapped page to the server.
+ */
+int nfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct nfs_open_context *ctx;
+ struct inode *inode = page->mapping->host;
+ unsigned long end_index;
+ unsigned offset = PAGE_CACHE_SIZE;
+ loff_t i_size = i_size_read(inode);
+ int inode_referenced = 0;
+ int priority = wb_priority(wbc);
+ int err;
+
+ /*
+ * Note: We need to ensure that we have a reference to the inode
+ * if we are to do asynchronous writes. If not, waiting
+ * in nfs_wait_on_request() may deadlock with clear_inode().
+ *
+ * If igrab() fails here, then it is in any case safe to
+ * call nfs_wb_page(), since there will be no pending writes.
+ */
+ if (igrab(inode) != 0)
+ inode_referenced = 1;
+ end_index = i_size >> PAGE_CACHE_SHIFT;
+
+ /* Ensure we've flushed out any previous writes */
+ nfs_wb_page_priority(inode, page, priority);
+
+ /* easy case */
+ if (page->index < end_index)
+ goto do_it;
+ /* things got complicated... */
+ offset = i_size & (PAGE_CACHE_SIZE-1);
+
+ /* OK, are we completely out? */
+ err = 0; /* potential race with truncate - ignore */
+ if (page->index >= end_index+1 || !offset)
+ goto out;
+do_it:
+ ctx = nfs_find_open_context(inode, FMODE_WRITE);
+ if (ctx == NULL) {
+ err = -EBADF;
+ goto out;
+ }
+ lock_kernel();
+ if (!IS_SYNC(inode) && inode_referenced) {
+ err = nfs_writepage_async(ctx, inode, page, 0, offset);
+ if (err >= 0) {
+ err = 0;
+ if (wbc->for_reclaim)
+ nfs_flush_inode(inode, 0, 0, FLUSH_STABLE);
+ }
+ } else {
+ err = nfs_writepage_sync(ctx, inode, page, 0,
+ offset, priority);
+ if (err >= 0) {
+ if (err != offset)
+ redirty_page_for_writepage(wbc, page);
+ err = 0;
+ }
+ }
+ unlock_kernel();
+ put_nfs_open_context(ctx);
+out:
+ unlock_page(page);
+ if (inode_referenced)
+ iput(inode);
+ return err;
+}
+
+/*
+ * Note: causes nfs_update_request() to block on the assumption
+ * that the writeback is generated due to memory pressure.
+ */
+int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct inode *inode = mapping->host;
+ int err;
+
+ err = generic_writepages(mapping, wbc);
+ if (err)
+ return err;
+ while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
+ if (wbc->nonblocking)
+ return 0;
+ nfs_wait_on_write_congestion(mapping, 0);
+ }
+ err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
+ if (err < 0)
+ goto out;
+ wbc->nr_to_write -= err;
+ if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
+ err = nfs_wait_on_requests(inode, 0, 0);
+ if (err < 0)
+ goto out;
+ }
+ err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc));
+ if (err > 0) {
+ wbc->nr_to_write -= err;
+ err = 0;
+ }
+out:
+ clear_bit(BDI_write_congested, &bdi->state);
+ wake_up_all(&nfs_write_congestion);
+ return err;
+}
+
+/*
+ * Insert a write request into an inode
+ */
+static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int error;
+
+ error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
+ BUG_ON(error == -EEXIST);
+ if (error)
+ return error;
+ if (!nfsi->npages) {
+ igrab(inode);
+ nfs_begin_data_update(inode);
+ if (nfs_have_delegation(inode, FMODE_WRITE))
+ nfsi->change_attr++;
+ }
+ nfsi->npages++;
+ atomic_inc(&req->wb_count);
+ return 0;
+}
+
+/*
+ * Insert a write request into an inode
+ */
+static void nfs_inode_remove_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ BUG_ON (!NFS_WBACK_BUSY(req));
+
+ spin_lock(&nfsi->req_lock);
+ radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
+ nfsi->npages--;
+ if (!nfsi->npages) {
+ spin_unlock(&nfsi->req_lock);
+ nfs_end_data_update_defer(inode);
+ iput(inode);
+ } else
+ spin_unlock(&nfsi->req_lock);
+ nfs_clear_request(req);
+ nfs_release_request(req);
+}
+
+/*
+ * Find a request
+ */
+static inline struct nfs_page *
+_nfs_find_request(struct inode *inode, unsigned long index)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req;
+
+ req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
+ if (req)
+ atomic_inc(&req->wb_count);
+ return req;
+}
+
+static struct nfs_page *
+nfs_find_request(struct inode *inode, unsigned long index)
+{
+ struct nfs_page *req;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ spin_lock(&nfsi->req_lock);
+ req = _nfs_find_request(inode, index);
+ spin_unlock(&nfsi->req_lock);
+ return req;
+}
+
+/*
+ * Add a request to the inode's dirty list.
+ */
+static void
+nfs_mark_request_dirty(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ spin_lock(&nfsi->req_lock);
+ nfs_list_add_request(req, &nfsi->dirty);
+ nfsi->ndirty++;
+ spin_unlock(&nfsi->req_lock);
+ inc_page_state(nr_dirty);
+ mark_inode_dirty(inode);
+}
+
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+ struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+ return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+/*
+ * Add a request to the inode's commit list.
+ */
+static void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ spin_lock(&nfsi->req_lock);
+ nfs_list_add_request(req, &nfsi->commit);
+ nfsi->ncommit++;
+ spin_unlock(&nfsi->req_lock);
+ inc_page_state(nr_unstable);
+ mark_inode_dirty(inode);
+}
+#endif
+
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+static int
+nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req;
+ unsigned long idx_end, next;
+ unsigned int res = 0;
+ int error;
+
+ if (npages == 0)
+ idx_end = ~0;
+ else
+ idx_end = idx_start + npages - 1;
+
+ spin_lock(&nfsi->req_lock);
+ next = idx_start;
+ while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
+ if (req->wb_index > idx_end)
+ break;
+
+ next = req->wb_index + 1;
+ if (!NFS_WBACK_BUSY(req))
+ continue;
+
+ atomic_inc(&req->wb_count);
+ spin_unlock(&nfsi->req_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error < 0)
+ return error;
+ spin_lock(&nfsi->req_lock);
+ res++;
+ }
+ spin_unlock(&nfsi->req_lock);
+ return res;
+}
+
+/*
+ * nfs_scan_dirty - Scan an inode for dirty requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves requests from the inode's dirty page list.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+static int
+nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int res;
+ res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
+ nfsi->ndirty -= res;
+ sub_page_state(nr_dirty,res);
+ if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+ return res;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+/*
+ * nfs_scan_commit - Scan an inode for commit requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+ * Moves requests from the inode's 'commit' request list.
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+static int
+nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int res;
+ res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+ nfsi->ncommit -= res;
+ if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ return res;
+}
+#endif
+
+static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ DEFINE_WAIT(wait);
+ int ret = 0;
+
+ might_sleep();
+
+ if (!bdi_write_congested(bdi))
+ return 0;
+ if (intr) {
+ struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
+ sigset_t oldset;
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
+ if (bdi_write_congested(bdi)) {
+ if (signalled())
+ ret = -ERESTARTSYS;
+ else
+ schedule();
+ }
+ rpc_clnt_sigunmask(clnt, &oldset);
+ } else {
+ prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
+ if (bdi_write_congested(bdi))
+ schedule();
+ }
+ finish_wait(&nfs_write_congestion, &wait);
+ return ret;
+}
+
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+ struct inode *inode, struct page *page,
+ unsigned int offset, unsigned int bytes)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req, *new = NULL;
+ unsigned long rqend, end;
+
+ end = offset + bytes;
+
+ if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+ return ERR_PTR(-ERESTARTSYS);
+ for (;;) {
+ /* Loop over all inode entries and see if we find
+ * A request for the page we wish to update
+ */
+ spin_lock(&nfsi->req_lock);
+ req = _nfs_find_request(inode, page->index);
+ if (req) {
+ if (!nfs_lock_request_dontget(req)) {
+ int error;
+ spin_unlock(&nfsi->req_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error < 0)
+ return ERR_PTR(error);
+ continue;
+ }
+ spin_unlock(&nfsi->req_lock);
+ if (new)
+ nfs_release_request(new);
+ break;
+ }
+
+ if (new) {
+ int error;
+ nfs_lock_request_dontget(new);
+ error = nfs_inode_add_request(inode, new);
+ if (error) {
+ spin_unlock(&nfsi->req_lock);
+ nfs_unlock_request(new);
+ return ERR_PTR(error);
+ }
+ spin_unlock(&nfsi->req_lock);
+ nfs_mark_request_dirty(new);
+ return new;
+ }
+ spin_unlock(&nfsi->req_lock);
+
+ new = nfs_create_request(ctx, inode, page, offset, bytes);
+ if (IS_ERR(new))
+ return new;
+ }
+
+ /* We have a request for our page.
+ * If the creds don't match, or the
+ * page addresses don't match,
+ * tell the caller to wait on the conflicting
+ * request.
+ */
+ rqend = req->wb_offset + req->wb_bytes;
+ if (req->wb_context != ctx
+ || req->wb_page != page
+ || !nfs_dirty_request(req)
+ || offset > rqend || end < req->wb_offset) {
+ nfs_unlock_request(req);
+ return ERR_PTR(-EBUSY);
+ }
+
+ /* Okay, the request matches. Update the region */
+ if (offset < req->wb_offset) {
+ req->wb_offset = offset;
+ req->wb_pgbase = offset;
+ req->wb_bytes = rqend - req->wb_offset;
+ }
+
+ if (end > rqend)
+ req->wb_bytes = end - req->wb_offset;
+
+ return req;
+}
+
+int nfs_flush_incompatible(struct file *file, struct page *page)
+{
+ struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *req;
+ int status = 0;
+ /*
+ * Look for a request corresponding to this page. If there
+ * is one, and it belongs to another file, we flush it out
+ * before we try to copy anything into the page. Do this
+ * due to the lack of an ACCESS-type call in NFSv2.
+ * Also do the same if we find a request from an existing
+ * dropped page.
+ */
+ req = nfs_find_request(inode, page->index);
+ if (req) {
+ if (req->wb_page != page || ctx != req->wb_context)
+ status = nfs_wb_page(inode, page);
+ nfs_release_request(req);
+ }
+ return (status < 0) ? status : 0;
+}
+
+/*
+ * Update and possibly write a cached page of an NFS file.
+ *
+ * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
+ * things with a page scheduled for an RPC call (e.g. invalidate it).
+ */
+int nfs_updatepage(struct file *file, struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *req;
+ int status = 0;
+
+ dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name,
+ count, (long long)(page_offset(page) +offset));
+
+ if (IS_SYNC(inode)) {
+ status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
+ if (status > 0) {
+ if (offset == 0 && status == PAGE_CACHE_SIZE)
+ SetPageUptodate(page);
+ return 0;
+ }
+ return status;
+ }
+
+ /* If we're not using byte range locks, and we know the page
+ * is entirely in cache, it may be more efficient to avoid
+ * fragmenting write requests.
+ */
+ if (PageUptodate(page) && inode->i_flock == NULL) {
+ loff_t end_offs = i_size_read(inode) - 1;
+ unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
+
+ count += offset;
+ offset = 0;
+ if (unlikely(end_offs < 0)) {
+ /* Do nothing */
+ } else if (page->index == end_index) {
+ unsigned int pglen;
+ pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
+ if (count < pglen)
+ count = pglen;
+ } else if (page->index < end_index)
+ count = PAGE_CACHE_SIZE;
+ }
+
+ /*
+ * Try to find an NFS request corresponding to this page
+ * and update it.
+ * If the existing request cannot be updated, we must flush
+ * it out now.
+ */
+ do {
+ req = nfs_update_request(ctx, inode, page, offset, count);
+ status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+ if (status != -EBUSY)
+ break;
+ /* Request could not be updated. Flush it out and try again */
+ status = nfs_wb_page(inode, page);
+ } while (status >= 0);
+ if (status < 0)
+ goto done;
+
+ status = 0;
+
+ /* Update file length */
+ nfs_grow_file(page, offset, count);
+ /* Set the PG_uptodate flag? */
+ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ nfs_unlock_request(req);
+done:
+ dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
+ status, (long long)i_size_read(inode));
+ if (status < 0)
+ ClearPageUptodate(page);
+ return status;
+}
+
+static void nfs_writepage_release(struct nfs_page *req)
+{
+ end_page_writeback(req->wb_page);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (!PageError(req->wb_page)) {
+ if (NFS_NEED_RESCHED(req)) {
+ nfs_mark_request_dirty(req);
+ goto out;
+ } else if (NFS_NEED_COMMIT(req)) {
+ nfs_mark_request_commit(req);
+ goto out;
+ }
+ }
+ nfs_inode_remove_request(req);
+
+out:
+ nfs_clear_commit(req);
+ nfs_clear_reschedule(req);
+#else
+ nfs_inode_remove_request(req);
+#endif
+ nfs_unlock_request(req);
+}
+
+static inline int flush_task_priority(int how)
+{
+ switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
+ case FLUSH_HIGHPRI:
+ return RPC_PRIORITY_HIGH;
+ case FLUSH_LOWPRI:
+ return RPC_PRIORITY_LOW;
+ }
+ return RPC_PRIORITY_NORMAL;
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static void nfs_write_rpcsetup(struct nfs_page *req,
+ struct nfs_write_data *data,
+ unsigned int count, unsigned int offset,
+ int how)
+{
+ struct rpc_task *task = &data->task;
+ struct inode *inode;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->req = req;
+ data->inode = inode = req->wb_context->dentry->d_inode;
+ data->cred = req->wb_context->cred;
+
+ data->args.fh = NFS_FH(inode);
+ data->args.offset = req_offset(req) + offset;
+ data->args.pgbase = req->wb_pgbase + offset;
+ data->args.pages = data->pagevec;
+ data->args.count = count;
+ data->args.context = req->wb_context;
+
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.verf = &data->verf;
+
+ NFS_PROTO(inode)->write_setup(data, how);
+
+ data->task.tk_priority = flush_task_priority(how);
+ data->task.tk_cookie = (unsigned long)inode;
+ data->task.tk_calldata = data;
+ /* Release requests */
+ data->task.tk_release = nfs_writedata_release;
+
+ dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ task->tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ count,
+ (unsigned long long)data->args.offset);
+}
+
+static void nfs_execute_write(struct nfs_write_data *data)
+{
+ struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+ sigset_t oldset;
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ lock_kernel();
+ rpc_execute(&data->task);
+ unlock_kernel();
+ rpc_clnt_sigunmask(clnt, &oldset);
+}
+
+/*
+ * Generate multiple small requests to write out a single
+ * contiguous dirty area on one page.
+ */
+static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+{
+ struct nfs_page *req = nfs_list_entry(head->next);
+ struct page *page = req->wb_page;
+ struct nfs_write_data *data;
+ unsigned int wsize = NFS_SERVER(inode)->wsize;
+ unsigned int nbytes, offset;
+ int requests = 0;
+ LIST_HEAD(list);
+
+ nfs_list_remove_request(req);
+
+ nbytes = req->wb_bytes;
+ for (;;) {
+ data = nfs_writedata_alloc();
+ if (!data)
+ goto out_bad;
+ list_add(&data->pages, &list);
+ requests++;
+ if (nbytes <= wsize)
+ break;
+ nbytes -= wsize;
+ }
+ atomic_set(&req->wb_complete, requests);
+
+ ClearPageError(page);
+ SetPageWriteback(page);
+ offset = 0;
+ nbytes = req->wb_bytes;
+ do {
+ data = list_entry(list.next, struct nfs_write_data, pages);
+ list_del_init(&data->pages);
+
+ data->pagevec[0] = page;
+ data->complete = nfs_writeback_done_partial;
+
+ if (nbytes > wsize) {
+ nfs_write_rpcsetup(req, data, wsize, offset, how);
+ offset += wsize;
+ nbytes -= wsize;
+ } else {
+ nfs_write_rpcsetup(req, data, nbytes, offset, how);
+ nbytes = 0;
+ }
+ nfs_execute_write(data);
+ } while (nbytes != 0);
+
+ return 0;
+
+out_bad:
+ while (!list_empty(&list)) {
+ data = list_entry(list.next, struct nfs_write_data, pages);
+ list_del(&data->pages);
+ nfs_writedata_free(data);
+ }
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ return -ENOMEM;
+}
+
+/*
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+{
+ struct nfs_page *req;
+ struct page **pages;
+ struct nfs_write_data *data;
+ unsigned int count;
+
+ if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(head, inode, how);
+
+ data = nfs_writedata_alloc();
+ if (!data)
+ goto out_bad;
+
+ pages = data->pagevec;
+ count = 0;
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ ClearPageError(req->wb_page);
+ SetPageWriteback(req->wb_page);
+ *pages++ = req->wb_page;
+ count += req->wb_bytes;
+ }
+ req = nfs_list_entry(data->pages.next);
+
+ data->complete = nfs_writeback_done_full;
+ /* Set up the argument struct */
+ nfs_write_rpcsetup(req, data, count, 0, how);
+
+ nfs_execute_write(data);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
+}
+
+static int
+nfs_flush_list(struct list_head *head, int wpages, int how)
+{
+ LIST_HEAD(one_request);
+ struct nfs_page *req;
+ int error = 0;
+ unsigned int pages = 0;
+
+ while (!list_empty(head)) {
+ pages += nfs_coalesce_requests(head, &one_request, wpages);
+ req = nfs_list_entry(one_request.next);
+ error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+ if (error < 0)
+ break;
+ }
+ if (error >= 0)
+ return pages;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return error;
+}
+
+/*
+ * Handle a write reply that flushed part of a page.
+ */
+static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+{
+ struct nfs_page *req = data->req;
+ struct page *page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+ if (status < 0) {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ req->wb_context->error = status;
+ dprintk(", error = %d\n", status);
+ } else {
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (data->verf.committed < NFS_FILE_SYNC) {
+ if (!NFS_NEED_COMMIT(req)) {
+ nfs_defer_commit(req);
+ memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+ dprintk(" defer commit\n");
+ } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
+ nfs_defer_reschedule(req);
+ dprintk(" server reboot detected\n");
+ }
+ } else
+#endif
+ dprintk(" OK\n");
+ }
+
+ if (atomic_dec_and_test(&req->wb_complete))
+ nfs_writepage_release(req);
+}
+
+/*
+ * Handle a write reply that flushes a whole page.
+ *
+ * FIXME: There is an inherent race with invalidate_inode_pages and
+ * writebacks since the page->count is kept > 1 for as long
+ * as the page has a write request pending.
+ */
+static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+{
+ struct nfs_page *req;
+ struct page *page;
+
+ /* Update attributes as result of writeback. */
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+ page = req->wb_page;
+
+ dprintk("NFS: write (%s/%Ld %d@%Ld)",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+
+ if (status < 0) {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ req->wb_context->error = status;
+ end_page_writeback(page);
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", status);
+ goto next;
+ }
+ end_page_writeback(page);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+ nfs_mark_request_commit(req);
+ dprintk(" marked for commit\n");
+#else
+ nfs_inode_remove_request(req);
+#endif
+ next:
+ nfs_unlock_request(req);
+ }
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+void nfs_writeback_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+ struct nfs_writeargs *argp = &data->args;
+ struct nfs_writeres *resp = &data->res;
+
+ dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ /* We tried a write call, but the server did not
+ * commit data to stable storage even though we
+ * requested it.
+ * Note: There is a known bug in Tru64 < 5.0 in which
+ * the server reports NFS_DATA_SYNC, but performs
+ * NFS_FILE_SYNC. We therefore implement this checking
+ * as a dprintk() in order to avoid filling syslog.
+ */
+ static unsigned long complain;
+
+ if (time_before(complain, jiffies)) {
+ dprintk("NFS: faulty NFS server %s:"
+ " (committed = %d) != (stable = %d)\n",
+ NFS_SERVER(data->inode)->hostname,
+ resp->verf->committed, argp->stable);
+ complain = jiffies + 300 * HZ;
+ }
+ }
+#endif
+ /* Is this a short write? */
+ if (task->tk_status >= 0 && resp->count < argp->count) {
+ static unsigned long complain;
+
+ /* Has the server at least made some progress? */
+ if (resp->count != 0) {
+ /* Was this an NFSv2 write or an NFSv3 stable write? */
+ if (resp->verf->committed != NFS_UNSTABLE) {
+ /* Resend from where the server left off */
+ argp->offset += resp->count;
+ argp->pgbase += resp->count;
+ argp->count -= resp->count;
+ } else {
+ /* Resend as a stable write in order to avoid
+ * headaches in the case of a server crash.
+ */
+ argp->stable = NFS_FILE_SYNC;
+ }
+ rpc_restart_call(task);
+ return;
+ }
+ if (time_before(complain, jiffies)) {
+ printk(KERN_WARNING
+ "NFS: Server wrote zero bytes, expected %u.\n",
+ argp->count);
+ complain = jiffies + 300 * HZ;
+ }
+ /* Can't do anything about it except throw an error. */
+ task->tk_status = -EIO;
+ }
+
+ /*
+ * Process the nfs_page list
+ */
+ data->complete(data, task->tk_status);
+}
+
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_commit_release(struct rpc_task *task)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
+ nfs_commit_free(wdata);
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static void nfs_commit_rpcsetup(struct list_head *head,
+ struct nfs_write_data *data, int how)
+{
+ struct rpc_task *task = &data->task;
+ struct nfs_page *first, *last;
+ struct inode *inode;
+ loff_t start, end, len;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ list_splice_init(head, &data->pages);
+ first = nfs_list_entry(data->pages.next);
+ last = nfs_list_entry(data->pages.prev);
+ inode = first->wb_context->dentry->d_inode;
+
+ /*
+ * Determine the offset range of requests in the COMMIT call.
+ * We rely on the fact that data->pages is an ordered list...
+ */
+ start = req_offset(first);
+ end = req_offset(last) + last->wb_bytes;
+ len = end - start;
+ /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
+ if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
+ len = 0;
+
+ data->inode = inode;
+ data->cred = first->wb_context->cred;
+
+ data->args.fh = NFS_FH(data->inode);
+ data->args.offset = start;
+ data->args.count = len;
+ data->res.count = len;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
+
+ NFS_PROTO(inode)->commit_setup(data, how);
+
+ data->task.tk_priority = flush_task_priority(how);
+ data->task.tk_cookie = (unsigned long)inode;
+ data->task.tk_calldata = data;
+ /* Release requests */
+ data->task.tk_release = nfs_commit_release;
+
+ dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+}
+
+/*
+ * Commit dirty pages
+ */
+static int
+nfs_commit_list(struct list_head *head, int how)
+{
+ struct nfs_write_data *data;
+ struct nfs_page *req;
+
+ data = nfs_commit_alloc();
+
+ if (!data)
+ goto out_bad;
+
+ /* Set up the argument struct */
+ nfs_commit_rpcsetup(head, data, how);
+
+ nfs_execute_write(data);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_commit(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
+}
+
+/*
+ * COMMIT call returned
+ */
+void
+nfs_commit_done(struct rpc_task *task)
+{
+ struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
+ struct nfs_page *req;
+ int res = 0;
+
+ dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+
+ dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ if (task->tk_status < 0) {
+ req->wb_context->error = task->tk_status;
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", task->tk_status);
+ goto next;
+ }
+
+ /* Okay, COMMIT succeeded, apparently. Check the verifier
+ * returned by the server against all stored verfs. */
+ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ /* We have a match */
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ /* We have a mismatch. Write the page again */
+ dprintk(" mismatch\n");
+ nfs_mark_request_dirty(req);
+ next:
+ nfs_unlock_request(req);
+ res++;
+ }
+ sub_page_state(nr_unstable,res);
+}
+#endif
+
+static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
+ int res,
+ error = 0;
+
+ spin_lock(&nfsi->req_lock);
+ res = nfs_scan_dirty(inode, &head, idx_start, npages);
+ spin_unlock(&nfsi->req_lock);
+ if (res)
+ error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+ if (error < 0)
+ return error;
+ return res;
+}
+
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ LIST_HEAD(head);
+ int res,
+ error = 0;
+
+ spin_lock(&nfsi->req_lock);
+ res = nfs_scan_commit(inode, &head, idx_start, npages);
+ if (res) {
+ res += nfs_scan_commit(inode, &head, 0, 0);
+ spin_unlock(&nfsi->req_lock);
+ error = nfs_commit_list(&head, how);
+ } else
+ spin_unlock(&nfsi->req_lock);
+ if (error < 0)
+ return error;
+ return res;
+}
+#endif
+
+int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+{
+ int error,
+ wait;
+
+ wait = how & FLUSH_WAIT;
+ how &= ~FLUSH_WAIT;
+
+ do {
+ error = 0;
+ if (wait)
+ error = nfs_wait_on_requests(inode, idx_start, npages);
+ if (error == 0)
+ error = nfs_flush_inode(inode, idx_start, npages, how);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (error == 0)
+ error = nfs_commit_inode(inode, idx_start, npages, how);
+#endif
+ } while (error > 0);
+ return error;
+}
+
+int nfs_init_writepagecache(void)
+{
+ nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
+ sizeof(struct nfs_write_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_wdata_cachep == NULL)
+ return -ENOMEM;
+
+ nfs_wdata_mempool = mempool_create(MIN_POOL_WRITE,
+ mempool_alloc_slab,
+ mempool_free_slab,
+ nfs_wdata_cachep);
+ if (nfs_wdata_mempool == NULL)
+ return -ENOMEM;
+
+ nfs_commit_mempool = mempool_create(MIN_POOL_COMMIT,
+ mempool_alloc_slab,
+ mempool_free_slab,
+ nfs_wdata_cachep);
+ if (nfs_commit_mempool == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_writepagecache(void)
+{
+ mempool_destroy(nfs_commit_mempool);
+ mempool_destroy(nfs_wdata_mempool);
+ if (kmem_cache_destroy(nfs_wdata_cachep))
+ printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
+}
+
OpenPOWER on IntegriCloud