diff options
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Kconfig | 4 | ||||
-rw-r--r-- | fs/nfsd/cache.h | 1 | ||||
-rw-r--r-- | fs/nfsd/export.c | 1 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 13 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 144 | ||||
-rw-r--r-- | fs/nfsd/nfs4idmap.c | 20 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 49 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 205 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 209 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 92 | ||||
-rw-r--r-- | fs/nfsd/nfscache.c | 214 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 45 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 2 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 6 | ||||
-rw-r--r-- | fs/nfsd/state.h | 32 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 37 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 14 |
17 files changed, 731 insertions, 357 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f994e750e0d1..73395156bdb4 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -71,6 +71,7 @@ config NFSD_V4 select FS_POSIX_ACL select SUNRPC_GSS select CRYPTO + select GRACE_PERIOD help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). @@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL If you do not wish to enable fine-grained security labels SELinux or Smack policies on NFSv4 files, say N. - WARNING: there is still a chance of backwards-incompatible protocol changes. - For now we recommend "Y" only for developers and testers. - config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index b582f9ab6b2a..dd96a3830004 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -18,7 +18,6 @@ * is much larger than a sockaddr_in6. */ struct svc_cacherep { - struct hlist_node c_hash; struct list_head c_lru; unsigned char c_state, /* unused, inprog, done */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 72ffd7cce3c3..30a739d896ff 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1145,6 +1145,7 @@ static struct flags { { NFSEXP_ALLSQUASH, {"all_squash", ""}}, { NFSEXP_ASYNC, {"async", "sync"}}, { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, + { NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}}, { NFSEXP_NOHIDE, {"nohide", ""}}, { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index fa2525b2e9d7..12f2aab4f614 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, newfhp = fh_init(&resp->fh, NFS3_FHSIZE); attr = &argp->attrs; - /* Get the directory inode */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); - if (nfserr) - RETURN_STATUS(nfserr); - /* Unfudge the mode bits */ attr->ia_mode &= ~S_IFMT; if (!(attr->ia_valid & ATTR_MODE)) { @@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, resp->buflen = resp->count; resp->rqstp = rqstp; offset = argp->cookie; + + nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) + RETURN_STATUS(nfserr_notsupp); + nfserr = nfsd_readdir(rqstp, &resp->fh, &offset, &resp->common, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e0be57b0f79b..ed2b1151b171 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); /* Index of predefined Linux callback client operations */ -enum { - NFSPROC4_CLNT_CB_NULL = 0, - NFSPROC4_CLNT_CB_RECALL, - NFSPROC4_CLNT_CB_SEQUENCE, -}; - struct nfs4_cb_compound_hdr { /* args */ u32 ident; /* minorversion 0 only */ @@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, const struct nfsd4_callback *cb) { - const struct nfs4_delegation *args = cb->cb_op; + const struct nfs4_delegation *dp = cb_to_delegation(cb); struct nfs4_cb_compound_hdr hdr = { .ident = cb->cb_clp->cl_cb_ident, .minorversion = cb->cb_minorversion, @@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_recall4args(xdr, args, &hdr); + encode_cb_recall4args(xdr, dp, &hdr); encode_cb_nops(&hdr); } @@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { static struct workqueue_struct *callback_wq; -static void run_nfsd4_cb(struct nfsd4_callback *cb) -{ - queue_work(callback_wq, &cb->cb_work); -} - -static void do_probe_callback(struct nfs4_client *clp) -{ - struct nfsd4_callback *cb = &clp->cl_cb_null; - - cb->cb_op = NULL; - cb->cb_clp = clp; - - cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; - cb->cb_msg.rpc_argp = NULL; - cb->cb_msg.rpc_resp = NULL; - - cb->cb_ops = &nfsd4_cb_probe_ops; - - run_nfsd4_cb(cb); -} - /* * Poke the callback thread to process any updates to the callback * parameters, and send a null probe. @@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) { clp->cl_cb_state = NFSD4_CB_UNKNOWN; set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); - do_probe_callback(clp); + nfsd4_run_cb(&clp->cl_cb_null); } void nfsd4_probe_callback_sync(struct nfs4_client *clp) @@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, clp->cl_cb_session->se_cb_seq_nr); - - /* We're done looking into the sequence information */ - task->tk_msg.rpc_resp = NULL; } -} - - -static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) -{ - struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = cb->cb_clp; - struct rpc_clnt *current_rpc_client = clp->cl_cb_client; - - nfsd4_cb_done(task, calldata); - if (current_rpc_client != task->tk_client) { + if (clp->cl_cb_client != task->tk_client) { /* We're shutting down or changing cl_cb_client; leave * it to nfsd4_process_cb_update to restart the call if * necessary. */ @@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) if (cb->cb_done) return; - switch (task->tk_status) { + + switch (cb->cb_ops->done(cb, task)) { case 0: - cb->cb_done = true; + task->tk_status = 0; + rpc_restart_call_prepare(task); return; - case -EBADHANDLE: - case -NFS4ERR_BAD_STATEID: - /* Race: client probably got cb_recall - * before open reply granting delegation */ + case 1: break; - default: + case -1: /* Network partition? */ nfsd4_mark_cb_down(clp, task->tk_status); + break; + default: + BUG(); } - if (dp->dl_retries--) { - rpc_delay(task, 2*HZ); - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; - } - nfsd4_mark_cb_down(clp, task->tk_status); cb->cb_done = true; } -static void nfsd4_cb_recall_release(void *calldata) +static void nfsd4_cb_release(void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); if (cb->cb_done) { spin_lock(&clp->cl_lock); list_del(&cb->cb_per_client); spin_unlock(&clp->cl_lock); - nfs4_put_stid(&dp->dl_stid); + + cb->cb_ops->release(cb); } } -static const struct rpc_call_ops nfsd4_cb_recall_ops = { +static const struct rpc_call_ops nfsd4_cb_ops = { .rpc_call_prepare = nfsd4_cb_prepare, - .rpc_call_done = nfsd4_cb_recall_done, - .rpc_release = nfsd4_cb_recall_release, + .rpc_call_done = nfsd4_cb_done, + .rpc_release = nfsd4_cb_release, }; int nfsd4_create_callback_queue(void) @@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) * instead, nfsd4_run_cb_null() will detect the killed * client, destroy the rpc client, and stop: */ - do_probe_callback(clp); + nfsd4_run_cb(&clp->cl_cb_null); flush_workqueue(callback_wq); } -static void nfsd4_release_cb(struct nfsd4_callback *cb) -{ - if (cb->cb_ops->rpc_release) - cb->cb_ops->rpc_release(cb); -} - /* requires cl_lock: */ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) { @@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) } /* Yay, the callback channel's back! Restart any callbacks: */ list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) - run_nfsd4_cb(cb); + queue_work(callback_wq, &cb->cb_work); } static void -nfsd4_run_callback_rpc(struct nfsd4_callback *cb) +nfsd4_run_cb_work(struct work_struct *work) { + struct nfsd4_callback *cb = + container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; + if (cb->cb_ops && cb->cb_ops->prepare) + cb->cb_ops->prepare(cb); + if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; if (!clnt) { /* Callback channel broken, or client killed; give up: */ - nfsd4_release_cb(cb); + if (cb->cb_ops && cb->cb_ops->release) + cb->cb_ops->release(cb); return; } cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, - cb->cb_ops, cb); + cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); } -void -nfsd4_run_cb_null(struct work_struct *w) +void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, + struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) { - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, - cb_work); - nfsd4_run_callback_rpc(cb); -} - -void -nfsd4_run_cb_recall(struct work_struct *w) -{ - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, - cb_work); - - nfsd4_prepare_cb_recall(cb->cb_op); - nfsd4_run_callback_rpc(cb); -} - -void nfsd4_cb_recall(struct nfs4_delegation *dp) -{ - struct nfsd4_callback *cb = &dp->dl_recall; - struct nfs4_client *clp = dp->dl_stid.sc_client; - - dp->dl_retries = 1; - cb->cb_op = dp; cb->cb_clp = clp; - cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; - - cb->cb_ops = &nfsd4_cb_recall_ops; - + cb->cb_ops = ops; + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); INIT_LIST_HEAD(&cb->cb_per_client); cb->cb_done = true; +} - run_nfsd4_cb(&dp->dl_recall); +void nfsd4_run_cb(struct nfsd4_callback *cb) +{ + queue_work(callback_wq, &cb->cb_work); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a0ab0a847d69..e1b3d3d472da 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) memset(&ent, 0, sizeof(ent)); /* Authentication name */ - if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + len = qword_get(&buf, buf1, PAGE_SIZE); + if (len <= 0 || len >= IDMAP_NAMESZ) goto out; memcpy(ent.authname, buf1, sizeof(ent.authname)); @@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) /* Name */ error = -EINVAL; len = qword_get(&buf, buf1, PAGE_SIZE); - if (len < 0) + if (len < 0 || len >= IDMAP_NAMESZ) goto out; if (len == 0) set_bit(CACHE_NEGATIVE, &ent.h.flags); - else if (len >= IDMAP_NAMESZ) - goto out; else memcpy(ent.name, buf1, sizeof(ent.name)); error = -ENOMEM; @@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) goto out; cache_put(&res->h, cd); - error = 0; out: kfree(buf1); - return error; } - static struct ent * idtoname_lookup(struct cache_detail *cd, struct ent *item) { @@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) { struct ent ent, *res; char *buf1; - int error = -EINVAL; + int len, error = -EINVAL; if (buf[buflen - 1] != '\n') return (-EINVAL); @@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) memset(&ent, 0, sizeof(ent)); /* Authentication name */ - if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + len = qword_get(&buf, buf1, PAGE_SIZE); + if (len <= 0 || len >= IDMAP_NAMESZ) goto out; memcpy(ent.authname, buf1, sizeof(ent.authname)); @@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; /* Name */ - error = qword_get(&buf, buf1, PAGE_SIZE); - if (error <= 0 || error >= IDMAP_NAMESZ) + len = qword_get(&buf, buf1, PAGE_SIZE); + if (len <= 0 || len >= IDMAP_NAMESZ) goto out; memcpy(ent.name, buf1, sizeof(ent.name)); @@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) error = 0; out: kfree(buf1); - return (error); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5e0dc528a0e8..cdeb3cfd6f32 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } +static __be32 +nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_seek *seek) +{ + int whence; + __be32 status; + struct file *file; + + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, + &seek->seek_stateid, + RD_STATE, &file); + if (status) { + dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + return status; + } + + switch (seek->seek_whence) { + case NFS4_CONTENT_DATA: + whence = SEEK_DATA; + break; + case NFS4_CONTENT_HOLE: + whence = SEEK_HOLE; + break; + default: + status = nfserr_union_notsupp; + goto out; + } + + /* + * Note: This call does change file->f_pos, but nothing in NFSD + * should ever file->f_pos. + */ + seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); + if (seek->seek_pos < 0) + status = nfserrno(seek->seek_pos); + else if (seek->seek_pos >= i_size_read(file_inode(file))) + seek->seek_eof = true; + +out: + fput(file); + return status; +} + /* This routine never returns NFS_OK! If there are no other errors, it * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the * attributes matched. VERIFY is implemented by mapping NFSERR_SAME @@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, + + /* NFSv4.2 operations */ + [OP_SEEK] = { + .op_func = (nfsd4op_func)nfsd4_seek, + .op_name = "OP_SEEK", + }, }; int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 8f1af78ebb67..a25490ae6c62 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops { void (*create)(struct nfs4_client *); void (*remove)(struct nfs4_client *); int (*check)(struct nfs4_client *); - void (*grace_done)(struct nfsd_net *, time_t); + void (*grace_done)(struct nfsd_net *); }; /* Globals */ @@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) status = mnt_want_write_file(nn->rec_file); if (status) - return; + goto out_creds; dir = nn->rec_file->f_path.dentry; /* lock the parent */ @@ -228,6 +228,7 @@ out_unlock: user_recovery_dirname); } mnt_drop_write_file(nn->rec_file); +out_creds: nfs4_reset_creds(original_cred); } @@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) } static void -nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) +nfsd4_recdir_purge_old(struct nfsd_net *nn) { int status; @@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net) return status; } +static void +nfsd4_shutdown_recdir(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (!nn->rec_file) + return; + fput(nn->rec_file); + nn->rec_file = NULL; +} static int nfs4_legacy_state_init(struct net *net) @@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net) int status; status = nfsd4_init_recdir(net); - if (!status) - status = nfsd4_recdir_load(net); if (status) - printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; + + status = nfsd4_recdir_load(net); + if (status) + nfsd4_shutdown_recdir(net); + return status; } @@ -546,21 +560,12 @@ err: } static void -nfsd4_shutdown_recdir(struct nfsd_net *nn) -{ - if (!nn->rec_file) - return; - fput(nn->rec_file); - nn->rec_file = NULL; -} - -static void nfsd4_legacy_tracking_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_release_reclaim(nn); - nfsd4_shutdown_recdir(nn); + nfsd4_shutdown_recdir(net); nfs4_legacy_state_shutdown(net); } @@ -1015,7 +1020,7 @@ nfsd4_cld_check(struct nfs4_client *clp) } static void -nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) +nfsd4_cld_grace_done(struct nfsd_net *nn) { int ret; struct cld_upcall *cup; @@ -1028,7 +1033,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) } cup->cu_msg.cm_cmd = Cld_GraceDone; - cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; + cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); if (!ret) ret = cup->cu_msg.cm_status; @@ -1061,6 +1066,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable, #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" +#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION=" +#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START=" static char * nfsd4_cltrack_legacy_topdir(void) @@ -1125,10 +1132,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) return result; } +static char * +nfsd4_cltrack_client_has_session(struct nfs4_client *clp) +{ + int copied; + size_t len; + char *result; + + /* prefix + Y/N character + terminating NULL */ + len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c", + clp->cl_minorversion ? 'Y' : 'N'); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + +static char * +nfsd4_cltrack_grace_start(time_t grace_start) +{ + int copied; + size_t len; + char *result; + + /* prefix + max width of int64_t string + terminating NULL */ + len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", + grace_start); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + static int -nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) +nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) { - char *envp[2]; + char *envp[3]; char *argv[4]; int ret; @@ -1139,10 +1196,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) dprintk("%s: cmd: %s\n", __func__, cmd); dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); - dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); + dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)"); + dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)"); - envp[0] = legacy; - envp[1] = NULL; + envp[0] = env0; + envp[1] = env1; + envp[2] = NULL; argv[0] = (char *)cltrack_prog; argv[1] = cmd; @@ -1186,28 +1245,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen) } static int -nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) +nfsd4_umh_cltrack_init(struct net *net) { + int ret; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time); + /* XXX: The usermode helper s not working in container yet. */ if (net != &init_net) { WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " "tracking in a container!\n"); return -EINVAL; } - return nfsd4_umh_cltrack_upcall("init", NULL, NULL); + + ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); + kfree(grace_start); + return ret; +} + +static void +nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) +{ + wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK, + TASK_UNINTERRUPTIBLE); +} + +static void +nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) +{ + smp_mb__before_atomic(); + clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); + smp_mb__after_atomic(); + wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); } static void nfsd4_umh_cltrack_create(struct nfs4_client *clp) { - char *hexid; + char *hexid, *has_session, *grace_start; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + /* + * With v4.0 clients, there's little difference in outcome between a + * create and check operation, and we can end up calling into this + * function multiple times per client (once for each openowner). So, + * for v4.0 clients skip upcalling once the client has been recorded + * on stable storage. + * + * For v4.1+ clients, the outcome of the two operations is different, + * so we must ensure that we upcall for the create operation. v4.1+ + * clients call this on RECLAIM_COMPLETE though, so we should only end + * up doing a single create upcall per client. + */ + if (clp->cl_minorversion == 0 && + test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return; } - nfsd4_umh_cltrack_upcall("create", hexid, NULL); + + has_session = nfsd4_cltrack_client_has_session(clp); + grace_start = nfsd4_cltrack_grace_start(nn->boot_time); + + nfsd4_cltrack_upcall_lock(clp); + if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start)) + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + nfsd4_cltrack_upcall_unlock(clp); + + kfree(has_session); + kfree(grace_start); kfree(hexid); } @@ -1216,12 +1325,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp) { char *hexid; + if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return; } - nfsd4_umh_cltrack_upcall("remove", hexid, NULL); + + nfsd4_cltrack_upcall_lock(clp); + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) && + nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0) + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + nfsd4_cltrack_upcall_unlock(clp); + kfree(hexid); } @@ -1229,30 +1347,45 @@ static int nfsd4_umh_cltrack_check(struct nfs4_client *clp) { int ret; - char *hexid, *legacy; + char *hexid, *has_session, *legacy; + + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); if (!hexid) { dprintk("%s: can't allocate memory for upcall!\n", __func__); return -ENOMEM; } + + has_session = nfsd4_cltrack_client_has_session(clp); legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); - ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); + + nfsd4_cltrack_upcall_lock(clp); + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) { + ret = 0; + } else { + ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy); + if (ret == 0) + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + nfsd4_cltrack_upcall_unlock(clp); + kfree(has_session); kfree(legacy); kfree(hexid); + return ret; } static void -nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, - time_t boot_time) +nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) { char *legacy; char timestr[22]; /* FIXME: better way to determine max size? */ - sprintf(timestr, "%ld", boot_time); + sprintf(timestr, "%ld", nn->boot_time); legacy = nfsd4_cltrack_legacy_topdir(); - nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); kfree(legacy); } @@ -1355,10 +1488,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) } void -nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) +nfsd4_record_grace_done(struct nfsd_net *nn) { if (nn->client_tracking_ops) - nn->client_tracking_ops->grace_done(nn, boot_time); + nn->client_tracking_ops->grace_done(nn); } static int diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2e80a59e7e91..e9c3afe4b5d3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab; static void free_session(struct nfsd4_session *); +static struct nfsd4_callback_ops nfsd4_cb_recall_ops; + static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_flags & NFS4_SESSION_DEAD; @@ -216,6 +218,13 @@ static void nfsd4_put_session(struct nfsd4_session *ses) spin_unlock(&nn->client_lock); } +static inline struct nfs4_stateowner * +nfs4_get_stateowner(struct nfs4_stateowner *sop) +{ + atomic_inc(&sop->so_count); + return sop; +} + static int same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) { @@ -235,10 +244,8 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, so_strhash) { if (!so->so_is_open_owner) continue; - if (same_owner_str(so, &open->op_owner)) { - atomic_inc(&so->so_count); - return openowner(so); - } + if (same_owner_str(so, &open->op_owner)) + return openowner(nfs4_get_stateowner(so)); } return NULL; } @@ -645,7 +652,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); dp->dl_type = NFS4_OPEN_DELEGATE_READ; - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); + dp->dl_retries = 1; + nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, + &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); return dp; out_dec: atomic_long_dec(&num_delegations); @@ -673,15 +682,16 @@ nfs4_put_stid(struct nfs4_stid *s) static void nfs4_put_deleg_lease(struct nfs4_file *fp) { - lockdep_assert_held(&state_lock); + struct file *filp = NULL; - if (!fp->fi_lease) - return; - if (atomic_dec_and_test(&fp->fi_delegees)) { - vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); - fp->fi_lease = NULL; - fput(fp->fi_deleg_file); - fp->fi_deleg_file = NULL; + spin_lock(&fp->fi_lock); + if (fp->fi_deleg_file && atomic_dec_and_test(&fp->fi_delegees)) + swap(filp, fp->fi_deleg_file); + spin_unlock(&fp->fi_lock); + + if (filp) { + vfs_setlease(filp, F_UNLCK, NULL, NULL); + fput(filp); } } @@ -717,8 +727,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); - if (fp) - nfs4_put_deleg_lease(fp); } static void destroy_delegation(struct nfs4_delegation *dp) @@ -726,6 +734,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhash_delegation_locked(dp); spin_unlock(&state_lock); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } @@ -735,6 +744,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); + if (clp->cl_minorversion == 0) nfs4_put_stid(&dp->dl_stid); else { @@ -1635,6 +1646,7 @@ __destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_revoked)) { @@ -1644,7 +1656,7 @@ __destroy_client(struct nfs4_client *clp) } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); - atomic_inc(&oo->oo_owner.so_count); + nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } nfsd4_shutdown_callback(clp); @@ -1862,7 +1874,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); + nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); @@ -3056,8 +3068,8 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh) INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); fh_copy_shallow(&fp->fi_fhandle, fh); + fp->fi_deleg_file = NULL; fp->fi_had_conflict = false; - fp->fi_lease = NULL; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); @@ -3125,8 +3137,7 @@ static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, { if (!nfsd4_has_session(cstate)) { mutex_lock(&so->so_replay.rp_mutex); - cstate->replay_owner = so; - atomic_inc(&so->so_count); + cstate->replay_owner = nfs4_get_stateowner(so); } } @@ -3225,8 +3236,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); - stp->st_stateowner = &oo->oo_owner; - atomic_inc(&stp->st_stateowner->so_count); + stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; @@ -3349,8 +3359,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } -void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) +static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { + struct nfs4_delegation *dp = cb_to_delegation(cb); struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, nfsd_net_id); @@ -3371,6 +3382,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) spin_unlock(&state_lock); } +static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + struct nfs4_delegation *dp = cb_to_delegation(cb); + + switch (task->tk_status) { + case 0: + return 1; + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* + * Race: client probably got cb_recall before open reply + * granting delegation. + */ + if (dp->dl_retries--) { + rpc_delay(task, 2 * HZ); + return 0; + } + /*FALLTHRU*/ + default: + return -1; + } +} + +static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) +{ + struct nfs4_delegation *dp = cb_to_delegation(cb); + + nfs4_put_stid(&dp->dl_stid); +} + +static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { + .prepare = nfsd4_cb_recall_prepare, + .done = nfsd4_cb_recall_done, + .release = nfsd4_cb_recall_release, +}; + static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { /* @@ -3381,22 +3429,24 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference. */ atomic_inc(&dp->dl_stid.sc_count); - nfsd4_cb_recall(dp); + nfsd4_run_cb(&dp->dl_recall); } /* Called from break_lease() with i_lock held. */ -static void nfsd_break_deleg_cb(struct file_lock *fl) +static bool +nfsd_break_deleg_cb(struct file_lock *fl) { + bool ret = false; struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_delegation *dp; if (!fp) { WARN(1, "(%p)->fl_owner NULL\n", fl); - return; + return ret; } if (fp->fi_had_conflict) { WARN(1, "duplicate break on %p\n", fp); - return; + return ret; } /* * We don't want the locks code to timeout the lease for us; @@ -3408,24 +3458,23 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; /* - * If there are no delegations on the list, then we can't count on this - * lease ever being cleaned up. Set the fl_break_time to jiffies so that - * time_out_leases will do it ASAP. The fact that fi_had_conflict is now - * true should keep any new delegations from being hashed. + * If there are no delegations on the list, then return true + * so that the lease code will go ahead and delete it. */ if (list_empty(&fp->fi_delegations)) - fl->fl_break_time = jiffies; + ret = true; else list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); + return ret; } -static -int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) +static int +nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose) { if (arg & F_UNLCK) - return lease_modify(onlist, arg); + return lease_modify(onlist, arg, dispose); else return -EAGAIN; } @@ -3759,7 +3808,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) fl = locks_alloc_lock(); if (!fl) return NULL; - locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; @@ -3772,7 +3820,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; - struct file_lock *fl; + struct file_lock *fl, *ret; struct file *filp; int status = 0; @@ -3786,11 +3834,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) return -EBADF; } fl->fl_file = filp; - status = vfs_setlease(filp, fl->fl_type, &fl); - if (status) { + ret = fl; + status = vfs_setlease(filp, fl->fl_type, &fl, NULL); + if (fl) locks_free_lock(fl); + if (status) goto out_fput; - } spin_lock(&state_lock); spin_lock(&fp->fi_lock); /* Did the lease get broken before we took the lock? */ @@ -3798,13 +3847,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp) if (fp->fi_had_conflict) goto out_unlock; /* Race breaker */ - if (fp->fi_lease) { + if (fp->fi_deleg_file) { status = 0; atomic_inc(&fp->fi_delegees); hash_delegation_locked(dp, fp); goto out_unlock; } - fp->fi_lease = fl; fp->fi_deleg_file = filp; atomic_set(&fp->fi_delegees, 1); hash_delegation_locked(dp, fp); @@ -3837,7 +3885,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, spin_lock(&state_lock); spin_lock(&fp->fi_lock); dp->dl_stid.sc_file = fp; - if (!fp->fi_lease) { + if (!fp->fi_deleg_file) { spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); status = nfs4_setlease(dp); @@ -4107,7 +4155,7 @@ out: return status; } -static void +void nfsd4_end_grace(struct nfsd_net *nn) { /* do nothing if grace period already ended */ @@ -4116,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn) dprintk("NFSD: end of grace period\n"); nn->grace_ended = true; - nfsd4_record_grace_done(nn, nn->boot_time); + /* + * If the server goes down again right now, an NFSv4 + * client will still be allowed to reclaim after it comes back up, + * even if it hasn't yet had a chance to reclaim state this time. + * + */ + nfsd4_record_grace_done(nn); + /* + * At this point, NFSv4 clients can still reclaim. But if the + * server crashes, any that have not yet reclaimed will be out + * of luck on the next boot. + * + * (NFSv4.1+ clients are considered to have reclaimed once they + * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to + * have reclaimed after their first OPEN.) + */ locks_end_grace(&nn->nfsd4_manager); /* - * Now that every NFSv4 client has had the chance to recover and - * to see the (possibly new, possibly shorter) lease time, we - * can safely set the next grace time to the current lease time: + * At this point, and once lockd and/or any other containers + * exit their grace period, further reclaims will fail and + * regular locking can resume. */ - nn->nfsd4_grace = nn->nfsd4_lease; } static time_t @@ -4867,9 +4929,25 @@ nfs4_transform_lock_offset(struct file_lock *lock) lock->fl_end = OFFSET_MAX; } -/* Hack!: For now, we're defining this just so we can use a pointer to it - * as a unique cookie to identify our (NFSv4's) posix locks. */ +static void nfsd4_fl_get_owner(struct file_lock *dst, struct file_lock *src) +{ + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)src->fl_owner; + dst->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lo->lo_owner)); +} + +static void nfsd4_fl_put_owner(struct file_lock *fl) +{ + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; + + if (lo) { + nfs4_put_stateowner(&lo->lo_owner); + fl->fl_owner = NULL; + } +} + static const struct lock_manager_operations nfsd_posix_mng_ops = { + .lm_get_owner = nfsd4_fl_get_owner, + .lm_put_owner = nfsd4_fl_put_owner, }; static inline void @@ -4915,10 +4993,8 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, so_strhash) { if (so->so_is_open_owner) continue; - if (!same_owner_str(so, owner)) - continue; - atomic_inc(&so->so_count); - return lockowner(so); + if (same_owner_str(so, owner)) + return lockowner(nfs4_get_stateowner(so)); } return NULL; } @@ -4997,8 +5073,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; - stp->st_stateowner = &lo->lo_owner; - atomic_inc(&lo->lo_owner.so_count); + stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_stid.sc_free = nfs4_free_lock_stateid; @@ -5210,7 +5285,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } fp = lock_stp->st_stid.sc_file; - locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: @@ -5238,7 +5312,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_openmode; goto out; } - file_lock->fl_owner = (fl_owner_t)lock_sop; + + file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -5354,7 +5429,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto out; } - locks_init_lock(file_lock); + switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: @@ -5432,9 +5507,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto fput; } - locks_init_lock(file_lock); + file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); + file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner)); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -5541,7 +5616,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, } } - atomic_inc(&sop->so_count); + nfs4_get_stateowner(sop); break; } spin_unlock(&clp->cl_lock); @@ -5645,6 +5720,9 @@ nfs4_check_open_reclaim(clientid_t *clid, if (status) return nfserr_reclaim_bad; + if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) + return nfserr_no_grace; + if (nfsd4_client_record_check(cstate->clp)) return nfserr_reclaim_bad; @@ -6342,10 +6420,10 @@ nfs4_state_start_net(struct net *net) ret = nfs4_state_create_net(net); if (ret) return ret; - nfsd4_client_tracking_init(net); nn->boot_time = get_seconds(); - locks_start_grace(net, &nn->nfsd4_manager); nn->grace_ended = false; + locks_start_grace(net, &nn->nfsd4_manager); + nfsd4_client_tracking_init(net); printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", nn->nfsd4_grace, net); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); @@ -6402,6 +6480,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f9821ce6658a..eeea7a90eb87 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -31,13 +31,6 @@ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * TODO: Neil Brown made the following observation: We currently - * initially reserve NFSD_BUFSIZE space on the transmit queue and - * never release any of that until the request is complete. - * It would be good to calculate a new maximum response size while - * decoding the COMPOUND, and call svc_reserve with this number - * at the end of nfs4svc_decode_compoundargs. */ #include <linux/slab.h> @@ -1521,6 +1514,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str } static __be32 +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); + if (status) + return status; + + READ_BUF(8 + 4); + p = xdr_decode_hyper(p, &seek->seek_offset); + seek->seek_whence = be32_to_cpup(p); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { return nfs_ok; @@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, + + /* new operations for NFSv4.2 */ + [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, + [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, }; static inline bool @@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) readbytes += nfsd4_max_reply(argp->rqstp, op); } else max_reply += nfsd4_max_reply(argp->rqstp, op); + /* + * OP_LOCK may return a conflicting lock. (Special case + * because it will just skip encoding this if it runs + * out of xdr buffer space, and it is the only operation + * that behaves this way.) + */ + if (op->opnum == OP_LOCK) + max_reply += NFS4_OPAQUE_LIMIT; if (op->status) { argp->opcnt = i+1; @@ -2657,6 +2688,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct xdr_stream *xdr = cd->xdr; int start_offset = xdr->buf->len; int cookie_offset; + u32 name_and_cookie; int entry_bytes; __be32 nfserr = nfserr_toosmall; __be64 wire_offset; @@ -2718,7 +2750,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, cd->rd_maxcount -= entry_bytes; if (!cd->rd_dircount) goto fail; - cd->rd_dircount--; + /* + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so + * let's always let through the first entry, at least: + */ + name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; @@ -3096,7 +3135,8 @@ static __be32 nfsd4_encode_splice_read( buf->page_len = maxcount; buf->len += maxcount; - xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) + / PAGE_SIZE; /* Use rest of head for padding and remaining ops: */ buf->tail[0].iov_base = xdr->p; @@ -3321,6 +3361,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } maxcount = min_t(int, maxcount-16, bytes_left); + /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ + if (!readdir->rd_dircount) + readdir->rd_dircount = INT_MAX; + readdir->xdr = xdr; readdir->rd_maxcount = maxcount; readdir->common.err = 0; @@ -3751,6 +3795,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 +nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_seek *seek) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(&resp->xdr, 4 + 8); + *p++ = cpu_to_be32(seek->seek_eof); + p = xdr_encode_hyper(p, seek->seek_pos); + + return nfserr; +} + +static __be32 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { return nfserr; @@ -3822,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, + + /* NFSv4.2 operations */ + [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, + [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, }; /* diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index ff9567633245..122f69185ef5 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -27,8 +27,12 @@ */ #define TARGET_BUCKET_SIZE 64 -static struct hlist_head * cache_hash; -static struct list_head lru_head; +struct nfsd_drc_bucket { + struct list_head lru_head; + spinlock_t cache_lock; +}; + +static struct nfsd_drc_bucket *drc_hashtbl; static struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ @@ -36,6 +40,7 @@ static unsigned int max_drc_entries; /* number of significant bits in the hash value */ static unsigned int maskbits; +static unsigned int drc_hashsize; /* * Stats and other tracking of on the duplicate reply cache. All of these and @@ -43,7 +48,7 @@ static unsigned int maskbits; */ /* total number of entries */ -static unsigned int num_drc_entries; +static atomic_t num_drc_entries; /* cache misses due only to checksum comparison failures */ static unsigned int payload_misses; @@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. */ -static DEFINE_SPINLOCK(cache_lock); static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); /* @@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } +static u32 +nfsd_cache_hash(__be32 xid) +{ + return hash_32(be32_to_cpu(xid), maskbits); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(void) { @@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void) rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; INIT_LIST_HEAD(&rp->c_lru); - INIT_HLIST_NODE(&rp->c_hash); } return rp; } @@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } - if (!hlist_unhashed(&rp->c_hash)) - hlist_del(&rp->c_hash); list_del(&rp->c_lru); - --num_drc_entries; + atomic_dec(&num_drc_entries); drc_mem_usage -= sizeof(*rp); kmem_cache_free(drc_slab, rp); } static void -nfsd_reply_cache_free(struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); nfsd_reply_cache_free_locked(rp); - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); } int nfsd_reply_cache_init(void) { unsigned int hashsize; + unsigned int i; - INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); - num_drc_entries = 0; + atomic_set(&num_drc_entries, 0); hashsize = nfsd_hashsize(max_drc_entries); maskbits = ilog2(hashsize); @@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void) if (!drc_slab) goto out_nomem; - cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); - if (!cache_hash) + drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); + if (!drc_hashtbl) goto out_nomem; + for (i = 0; i < hashsize; i++) { + INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); + spin_lock_init(&drc_hashtbl[i].cache_lock); + } + drc_hashsize = hashsize; return 0; out_nomem: @@ -184,17 +196,22 @@ out_nomem: void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); cancel_delayed_work_sync(&cache_cleaner); - while (!list_empty(&lru_head)) { - rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(rp); + for (i = 0; i < drc_hashsize; i++) { + struct list_head *head = &drc_hashtbl[i].lru_head; + while (!list_empty(head)) { + rp = list_first_entry(head, struct svc_cacherep, c_lru); + nfsd_reply_cache_free_locked(rp); + } } - kfree (cache_hash); - cache_hash = NULL; + kfree (drc_hashtbl); + drc_hashtbl = NULL; + drc_hashsize = 0; if (drc_slab) { kmem_cache_destroy(drc_slab); @@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void) * not already scheduled. */ static void -lru_put_end(struct svc_cacherep *rp) +lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; - list_move_tail(&rp->c_lru, &lru_head); + list_move_tail(&rp->c_lru, &b->lru_head); schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } -/* - * Move a cache entry from one hash list to another - */ -static void -hash_refile(struct svc_cacherep *rp) -{ - hlist_del_init(&rp->c_hash); - /* - * No point in byte swapping c_xid since we're just using it to pick - * a hash bucket. - */ - hlist_add_head(&rp->c_hash, cache_hash + - hash_32((__force u32)rp->c_xid, maskbits)); -} - -/* - * Walk the LRU list and prune off entries that are older than RC_EXPIRE. - * Also prune the oldest ones when the total exceeds the max number of entries. - */ static long -prune_cache_entries(void) +prune_bucket(struct nfsd_drc_bucket *b) { struct svc_cacherep *rp, *tmp; long freed = 0; - list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { /* * Don't free entries attached to calls that are still * in-progress, but do keep scanning the list. */ if (rp->c_state == RC_INPROG) continue; - if (num_drc_entries <= max_drc_entries && + if (atomic_read(&num_drc_entries) <= max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); freed++; } + return freed; +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) +{ + unsigned int i; + long freed = 0; + bool cancel = true; + + for (i = 0; i < drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &drc_hashtbl[i]; + + if (list_empty(&b->lru_head)) + continue; + spin_lock(&b->cache_lock); + freed += prune_bucket(b); + if (!list_empty(&b->lru_head)) + cancel = false; + spin_unlock(&b->cache_lock); + } /* - * Conditionally rearm the job. If we cleaned out the list, then - * cancel any pending run (since there won't be any work to do). - * Otherwise, we rearm the job or modify the existing one to run in - * RC_EXPIRE since we just ran the pruner. + * Conditionally rearm the job to run in RC_EXPIRE since we just + * ran the pruner. */ - if (list_empty(&lru_head)) - cancel_delayed_work(&cache_cleaner); - else + if (!cancel) mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); return freed; } @@ -269,32 +288,19 @@ prune_cache_entries(void) static void cache_cleaner_func(struct work_struct *unused) { - spin_lock(&cache_lock); prune_cache_entries(); - spin_unlock(&cache_lock); } static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned long num; - - spin_lock(&cache_lock); - num = num_drc_entries; - spin_unlock(&cache_lock); - - return num; + return atomic_read(&num_drc_entries); } static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - unsigned long freed; - - spin_lock(&cache_lock); - freed = prune_cache_entries(); - spin_unlock(&cache_lock); - return freed; + return prune_cache_entries(); } /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes @@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp) static bool nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) { - /* Check RPC header info first */ - if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || - rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || - rqstp->rq_arg.len != rp->c_len || - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + /* Check RPC XID first */ + if (rqstp->rq_xid != rp->c_xid) return false; - /* compare checksum of NFS data */ if (csum != rp->c_csum) { ++payload_misses; return false; } + /* Other discriminators */ + if (rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || + rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + return true; } @@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) * NULL on failure. */ static struct svc_cacherep * -nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, + __wsum csum) { struct svc_cacherep *rp, *ret = NULL; - struct hlist_head *rh; + struct list_head *rh = &b->lru_head; unsigned int entries = 0; - /* - * No point in byte swapping rq_xid since we're just using it to pick - * a hash bucket. - */ - rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; - hlist_for_each_entry(rp, rh, c_hash) { + list_for_each_entry(rp, rh, c_lru) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { ret = rp; @@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) /* tally hash chain length stats */ if (entries > longest_chain) { longest_chain = entries; - longest_chain_cachesize = num_drc_entries; + longest_chain_cachesize = atomic_read(&num_drc_entries); } else if (entries == longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min(longest_chain_cachesize, - num_drc_entries); + longest_chain_cachesize = min_t(unsigned int, + longest_chain_cachesize, + atomic_read(&num_drc_entries)); } return ret; @@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) vers = rqstp->rq_vers, proc = rqstp->rq_proc; __wsum csum; + u32 hash = nfsd_cache_hash(xid); + struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; unsigned long age; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * preallocate an entry. */ rp = nfsd_reply_cache_alloc(); - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); if (likely(rp)) { - ++num_drc_entries; + atomic_inc(&num_drc_entries); drc_mem_usage += sizeof(*rp); } /* go ahead and prune the cache */ - prune_cache_entries(); + prune_bucket(b); - found = nfsd_cache_search(rqstp, csum); + found = nfsd_cache_search(b, rqstp, csum); if (found) { if (likely(rp)) nfsd_reply_cache_free_locked(rp); @@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp->c_len = rqstp->rq_arg.len; rp->c_csum = csum; - hash_refile(rp); - lru_put_end(rp); + lru_put_end(b, rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) } rp->c_type = RC_NOCACHE; out: - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return rtn; found_entry: nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - lru_put_end(rp); + lru_put_end(b, rp); rtn = RC_DROPIT; /* Request being processed or excessive rexmits */ @@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + u32 hash; + struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; if (!rp) return; + hash = nfsd_cache_hash(rp->c_xid); + b = &drc_hashtbl[hash]; + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } @@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); drc_mem_usage += bufsize; - lru_put_end(rp); + lru_put_end(b, rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return; } @@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - spin_lock(&cache_lock); seq_printf(m, "max entries: %u\n", max_drc_entries); - seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "num entries: %u\n", + atomic_read(&num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << maskbits); seq_printf(m, "mem usage: %u\n", drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); @@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "payload misses: %u\n", payload_misses); seq_printf(m, "longest chain len: %u\n", longest_chain); seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); - spin_unlock(&cache_lock); return 0; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4e042105fb6e..ca73ca79a0ee 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -49,6 +49,7 @@ enum { NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, + NFSD_V4EndGrace, #endif }; @@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size); static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); +static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { @@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, [NFSD_RecoveryDir] = write_recoverydir, + [NFSD_V4EndGrace] = write_v4_end_grace, #endif }; @@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) return rv; } +/** + * write_v4_end_grace - release grace period for nfsd's v4.x lock manager + * + * Input: + * buf: ignored + * size: zero + * OR + * + * Input: + * buf: any value + * size: non-zero length of C string in @buf + * Output: + * passed-in buffer filled with "Y" or "N" with a newline + * and NULL-terminated C string. This indicates whether + * the grace period has ended in the current net + * namespace. Return code is the size in bytes of the + * string. Writing a string that starts with 'Y', 'y', or + * '1' to the file will end the grace period for nfsd's v4 + * lock manager. + */ +static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) +{ + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (size > 0) { + switch(buf[0]) { + case 'Y': + case 'y': + case '1': + nfsd4_end_grace(nn); + break; + default: + return -EINVAL; + } + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", + nn->grace_ended ? 'Y' : 'N'); +} + #endif /*----------------------------------------------------------------------------*/ @@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} }; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 847daf37e566..747f3b95bd11 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) -#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP) #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e883a5868be6..88026fc6a981 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * fix that case easily. */ struct cred *new = prepare_creds(); - if (!new) - return nfserrno(-ENOMEM); + if (!new) { + error = nfserrno(-ENOMEM); + goto out; + } new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4a89e00d7461..2712042a66b1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -62,16 +62,21 @@ typedef struct { (s)->si_generation struct nfsd4_callback { - void *cb_op; struct nfs4_client *cb_clp; struct list_head cb_per_client; u32 cb_minorversion; struct rpc_message cb_msg; - const struct rpc_call_ops *cb_ops; + struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; bool cb_done; }; +struct nfsd4_callback_ops { + void (*prepare)(struct nfsd4_callback *); + int (*done)(struct nfsd4_callback *, struct rpc_task *); + void (*release)(struct nfsd4_callback *); +}; + /* * A core object that represents a "common" stateid. These are generally * embedded within the different (more specific) stateid objects and contain @@ -127,6 +132,9 @@ struct nfs4_delegation { struct nfsd4_callback dl_recall; }; +#define cb_to_delegation(cb) \ + container_of(cb, struct nfs4_delegation, dl_recall) + /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ @@ -306,6 +314,7 @@ struct nfs4_client { #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ +#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) unsigned long cl_flags; @@ -477,7 +486,6 @@ struct nfs4_file { atomic_t fi_access[2]; u32 fi_share_deny; struct file *fi_deleg_file; - struct file_lock *fi_lease; atomic_t fi_delegees; struct knfsd_fh fi_fhandle; bool fi_had_conflict; @@ -517,6 +525,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) #define RD_STATE 0x00000010 #define WR_STATE 0x00000020 +enum nfsd4_cb_op { + NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_SEQUENCE, +}; + + struct nfsd4_compound_state; struct nfsd_net; @@ -531,12 +546,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); -void nfsd4_run_cb_null(struct work_struct *w); -void nfsd4_run_cb_recall(struct work_struct *w); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); -extern void nfsd4_cb_recall(struct nfs4_delegation *dp); +extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, + struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); +extern void nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); @@ -545,13 +560,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); +/* grace period management */ +void nfsd4_end_grace(struct nfsd_net *nn); + /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); extern void nfsd4_client_tracking_exit(struct net *net); extern void nfsd4_client_record_create(struct nfs4_client *clp); extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); -extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); +extern void nfsd4_record_grace_done(struct nfsd_net *nn); /* nfs fault injection functions */ #ifdef CONFIG_NFSD_FAULT_INJECTION diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f501a9b5c9df..989129e2d6ea 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, if (err) goto out; size_change = 1; + + /* + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) + */ + if (iap->ia_size != i_size_read(inode)) + iap->ia_valid |= ATTR_MTIME; } iap->ia_valid |= ATTR_CTIME; @@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, { struct path path; struct inode *inode; + struct file *file; int flags = O_RDONLY|O_LARGEFILE; __be32 err; int host_err = 0; @@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, else flags = O_WRONLY|O_LARGEFILE; } - *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) { - host_err = PTR_ERR(*filp); - *filp = NULL; - } else { - host_err = ima_file_check(*filp, may_flags); - if (may_flags & NFSD_MAY_64BIT_COOKIE) - (*filp)->f_mode |= FMODE_64BITHASH; - else - (*filp)->f_mode |= FMODE_32BITHASH; + file = dentry_open(&path, flags, current_cred()); + if (IS_ERR(file)) { + host_err = PTR_ERR(file); + goto out_nfserr; } + host_err = ima_file_check(file, may_flags, 0); + if (host_err) { + nfsd_close(file); + goto out_nfserr; + } + + if (may_flags & NFSD_MAY_64BIT_COOKIE) + file->f_mode |= FMODE_64BITHASH; + else + file->f_mode |= FMODE_32BITHASH; + + *filp = file; out_nfserr: err = nfserrno(host_err); out: diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 465e7799742a..5720e9457f33 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete { u32 rca_one_fs; }; +struct nfsd4_seek { + /* request */ + stateid_t seek_stateid; + loff_t seek_offset; + u32 seek_whence; + + /* response */ + u32 seek_eof; + loff_t seek_pos; +}; + struct nfsd4_op { int opnum; __be32 status; @@ -473,6 +484,9 @@ struct nfsd4_op { struct nfsd4_reclaim_complete reclaim_complete; struct nfsd4_test_stateid test_stateid; struct nfsd4_free_stateid free_stateid; + + /* NFSv4.2 */ + struct nfsd4_seek seek; } u; struct nfs4_replay * replay; }; |