From 4a82fd7c4e78a1b7a224f9ae8bb7e1fd95f670e0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 15 Nov 2013 16:36:16 -0500 Subject: NFSv4 wait on recovery for async session errors When the state manager is processing the NFS4CLNT_DELEGRETURN flag, session draining is off, but DELEGRETURN can still get a session error. The async handler calls nfs4_schedule_session_recovery returns -EAGAIN, and the DELEGRETURN done then restarts the RPC task in the prepare state. With the state manager still processing the NFS4CLNT_DELEGRETURN flag with session draining off, these DELEGRETURNs will cycle with errors filling up the session slots. This prevents OPEN reclaims (from nfs_delegation_claim_opens) required by the NFS4CLNT_DELEGRETURN state manager processing from completing, hanging the state manager in the __rpc_wait_for_completion_task in nfs4_run_open_task as seen in this kernel thread dump: kernel: 4.12.32.53-ma D 0000000000000000 0 3393 2 0x00000000 kernel: ffff88013995fb60 0000000000000046 ffff880138cc5400 ffff88013a9df140 kernel: ffff8800000265c0 ffffffff8116eef0 ffff88013fc10080 0000000300000001 kernel: ffff88013a4ad058 ffff88013995ffd8 000000000000fbc8 ffff88013a4ad058 kernel: Call Trace: kernel: [] ? cache_alloc_refill+0x1c0/0x240 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] rpc_wait_bit_killable+0x42/0xa0 [sunrpc] kernel: [] __wait_on_bit+0x5f/0x90 kernel: [] ? rpc_wait_bit_killable+0x0/0xa0 [sunrpc] kernel: [] out_of_line_wait_on_bit+0x78/0x90 kernel: [] ? wake_bit_function+0x0/0x50 kernel: [] __rpc_wait_for_completion_task+0x2d/0x30 [sunrpc] kernel: [] nfs4_run_open_task+0x11c/0x160 [nfs] kernel: [] nfs4_open_recover_helper+0x87/0x120 [nfs] kernel: [] nfs4_open_recover+0xc6/0x150 [nfs] kernel: [] ? nfs4_open_recoverdata_alloc+0x2f/0x60 [nfs] kernel: [] nfs4_open_delegation_recall+0x6a/0xa0 [nfs] kernel: [] nfs_end_delegation_return+0x120/0x2e0 [nfs] kernel: [] ? queue_work+0x1f/0x30 kernel: [] nfs_client_return_marked_delegations+0xd7/0x110 [nfs] kernel: [] nfs4_run_state_manager+0x548/0x620 [nfs] kernel: [] ? nfs4_run_state_manager+0x0/0x620 [nfs] kernel: [] kthread+0x96/0xa0 kernel: [] child_rip+0xa/0x20 kernel: [] ? kthread+0x0/0xa0 kernel: [] ? child_rip+0x0/0x20 The state manager can not therefore process the DELEGRETURN session errors. Change the async handler to wait for recovery on session errors. Signed-off-by: Andy Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ab33c0792df..1f4edfbb4a70 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4803,7 +4803,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto restart_call; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); -- cgit v1.2.1 From c97cf606e43b85a6cf158b810375dd77312024db Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Nov 2013 16:34:14 -0500 Subject: NFSv4: Update list of irrecoverable errors on DELEGRETURN If the DELEGRETURN errors out with something like NFS4ERR_BAD_STATEID then there is no recovery possible. Just quit without returning an error. Also, note that the client must not assume that the NFSv4 lease has been renewed when it sees an error on DELEGRETURN. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- fs/nfs/nfs4proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1f4edfbb4a70..ca36d0d50b7d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4988,11 +4988,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { -- cgit v1.2.1 From 69794ad70cd3b600319f175fc0cbe7abd510791f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 20 Nov 2013 12:57:19 -0500 Subject: NFSv4: close needs to handle NFS4ERR_ADMIN_REVOKED Also ensure that we zero out the stateid mode when exiting Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ca36d0d50b7d..f01e2aa53210 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->roc_barrier); nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - nfs4_close_clear_stateid_flags(state, - calldata->arg.fmode); break; + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: @@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) break; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { rpc_restart_call_prepare(task); + goto out_release; + } } + nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); +out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); -- cgit v1.2.1 From f22e5edd2244609aed3906207a62223e7707a34d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Dec 2013 12:09:45 -0500 Subject: NFSv4.1: Prevent a 3-way deadlock between layoutreturn, open and state recovery Andy Adamson reports: The state manager is recovering expired state and recovery OPENs are being processed. If kswapd is pruning inodes at the same time, a deadlock can occur when kswapd calls evict_inode on an NFSv4.1 inode with a layout, and the resultant layoutreturn gets an error that the state mangager is to handle, causing the layoutreturn to wait on the (NFS client) cl_rpcwaitq. At the same time an open is waiting for the inode deletion to complete in __wait_on_freeing_inode. If the open is either the open called by the state manager, or an open from the same open owner that is holding the NFSv4 sequence id which causes the OPEN from the state manager to wait for the sequence id on the Seqid_waitqueue, then the state is deadlocked with kswapd. The fix is simply to have layoutreturn ignore all errors except NFS4ERR_DELAY. We already know that layouts are dropped on all server reboots, and that it has to be coded to deal with the "forgetful client model" that doesn't send layoutreturns. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1385402270-14284-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/nfs/nfs4proc.c') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f01e2aa53210..e040359983ce 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7599,7 +7599,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; server = NFS_SERVER(lrp->args.inode); - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + switch (task->tk_status) { + default: + task->tk_status = 0; + case 0: + break; + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) + break; rpc_restart_call_prepare(task); return; } -- cgit v1.2.1