From eec70897d81bb2913e22c6792ea2739faf59c3e1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 20 Feb 2014 00:52:01 +0000 Subject: bio-integrity: Drop bio_integrity_verify BUG_ON in post bip->bip_iter world Given that bip->bip_iter.bi_size is decremented after bio_advance() -> bio_integrity_advance() is called, the BUG_ON() in bio_integrity_verify() ends up tripping in v3.14-rc1 code with the advent of immutable biovecs in: commit d57a5f7c6605f15f3b5134837e68b448a7cea88e Author: Kent Overstreet Date: Sat Nov 23 17:20:16 2013 -0800 bio-integrity: Convert to bvec_iter Given that there is no easy way to ascertain the original bi_size value, go ahead and drop this BUG_ON(). Reported-by: Sagi Grimberg Reported-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Kent Overstreet Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0129b78a6908..4f70f383132c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,11 +458,10 @@ static int bio_integrity_verify(struct bio *bio) struct blk_integrity_exchg bix; struct bio_vec *bv; sector_t sector = bio->bi_integrity->bip_iter.bi_sector; - unsigned int sectors, total, ret; + unsigned int sectors, ret = 0; void *prot_buf = bio->bi_integrity->bip_buf; int i; - ret = total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; @@ -484,8 +483,6 @@ static int bio_integrity_verify(struct bio *bio) sectors = bv->bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; - total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } -- cgit v1.2.1 From b7e63a1079b266866a732cf699d8c4d61391bbda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Feb 2014 11:19:14 -0800 Subject: NFSv4: Fix another nfs4_sequence corruptor nfs4_release_lockowner needs to set the rpc_message reply to point to the nfs4_sequence_res in order to avoid another Oopsable situation in nfs41_assign_slot. Fixes: fbd4bfd1d9d21 (NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER) Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2da6a698b8f7..44e088dc357c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5828,8 +5828,7 @@ struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; - struct nfs4_sequence_args seq_args; - struct nfs4_sequence_res seq_res; + struct nfs_release_lockowner_res res; unsigned long timestamp; }; @@ -5837,7 +5836,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, - &data->seq_args, &data->seq_res, task); + &data->args.seq_args, &data->res.seq_res, task); data->timestamp = jiffies; } @@ -5846,7 +5845,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_sequence_done(task, &data->seq_res); + nfs40_sequence_done(task, &data->res.seq_res); switch (task->tk_status) { case 0: @@ -5887,7 +5886,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; - nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,6 +5893,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data->args.lock_owner.s_dev = server->s_dev; msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; } -- cgit v1.2.1 From 755a48a7a4eb05b9c8424e3017d947b2961a60e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Mar 2014 22:03:12 -0500 Subject: NFS: Fix a delegation callback race The clean-up in commit 36281caa839f ended up removing a NULL pointer check that is needed in order to prevent an Oops in nfs_async_inode_return_delegation(). Reported-by: "Yan, Zheng" Link: http://lkml.kernel.org/r/5313E9F6.2020405@intel.com Fixes: 36281caa839f (NFSv4: Further clean-ups of delegation stateid validation) Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ef792f29f831..5d8ccecf5f5c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * -- cgit v1.2.1 From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 3 Mar 2014 15:38:18 -0800 Subject: mm: close PageTail race Commit bf6bddf1924e ("mm: introduce compaction and migration for ballooned pages") introduces page_count(page) into memory compaction which dereferences page->first_page if PageTail(page). This results in a very rare NULL pointer dereference on the aforementioned page_count(page). Indeed, anything that does compound_head(), including page_count() is susceptible to racing with prep_compound_page() and seeing a NULL or dangling page->first_page pointer. This patch uses Andrea's implementation of compound_trans_head() that deals with such a race and makes it the default compound_head() implementation. This includes a read memory barrier that ensures that if PageTail(head) is true that we return a head page that is neither NULL nor dangling. The patch then adds a store memory barrier to prep_compound_page() to ensure page->first_page is set. This is the safest way to ensure we see the head page that we are expecting, PageTail(page) is already in the unlikely() path and the memory barriers are unfortunately required. Hugetlbfs is the exception, we don't enforce a store memory barrier during init since no race is possible. Signed-off-by: David Rientjes Cc: Holger Kiehl Cc: Christoph Lameter Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Rik van Riel Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/page.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index 02174a610315..e647c55275d9 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -121,9 +121,8 @@ u64 stable_page_flags(struct page *page) * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && - (PageLRU(compound_trans_head(page)) || - PageAnon(compound_trans_head(page)))) + else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || + PageAnon(compound_head(page)))) u |= 1 << KPF_THP; /* -- cgit v1.2.1 From 15c34a760630ca2c803848fba90ca0646a9907dd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Mar 2014 15:38:32 -0800 Subject: ocfs2: fix quota file corruption Global quota files are accessed from different nodes. Thus we cannot cache offset of quota structure in the quota file after we drop our node reference count to it because after that moment quota structure may be freed and reallocated elsewhere by a different node resulting in corruption of quota file. Fix the problem by clearing dq_off when we are releasing dquot structure. We also remove the DB_READ_B handling because it is useless - DQ_ACTIVE_B is set iff DQ_READ_B is set. Signed-off-by: Jan Kara Cc: Goldwyn Rodrigues Cc: Joel Becker Reviewed-by: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/quota_global.c | 27 +++++++++++++++++---------- fs/ocfs2/quota_local.c | 4 ---- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index aaa50611ec66..d7b5108789e2 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2e4344be3b96..2001862bf2b1 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } -- cgit v1.2.1 From bd2c00353286d63542a8a0896a8c747f7c880edd Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 3 Mar 2014 15:38:35 -0800 Subject: hfsplus: fix remount issue Current implementation of HFS+ driver has small issue with remount option. Namely, for example, you are unable to remount from RO mode into RW mode by means of command "mount -o remount,rw /dev/loop0 /mnt/hfsplus". Trying to execute sequence of commands results in an error message: mount /dev/loop0 /mnt/hfsplus mount -o remount,ro /dev/loop0 /mnt/hfsplus mount -o remount,rw /dev/loop0 /mnt/hfsplus mount: you must specify the filesystem type mount -t hfsplus -o remount,rw /dev/loop0 /mnt/hfsplus mount: /mnt/hfsplus not mounted or bad option The reason of such issue is failure of mount syscall: mount("/dev/loop0", "/mnt/hfsplus", 0x2282a60, MS_MGC_VAL|MS_REMOUNT, NULL) = -1 EINVAL (Invalid argument) Namely, hfsplus_parse_options_remount() method receives empty "input" argument and return false in such case. As a result, hfsplus_remount() returns -EINVAL error code. This patch fixes the issue by means of return true for the case of empty "input" argument in hfsplus_parse_options_remount() method. Signed-off-by: Vyacheslav Dubeyko Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 968eab5bc1f5..68537e8b7a09 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force) int token; if (!input) - return 0; + return 1; while ((p = strsep(&input, ",")) != NULL) { if (!*p) -- cgit v1.2.1 From e1253be0ece1a95a02c7f5843194877471af8179 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Mar 2014 08:44:23 -0500 Subject: NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid When nfs4_set_rw_stateid() can fails by returning EIO to indicate that the stateid is completely invalid, then it makes no sense to have it trigger a retry of the READ or WRITE operation. Instead, we should just have it fall through and attempt a recovery. This fixes an infinite loop in which the client keeps replaying the same bad stateid back to the server. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44e088dc357c..4ae8141452c9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4011,8 +4011,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } -- cgit v1.2.1 From 927864cd92aaad1d6285e3bb16e503caf3d6e27e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:12:03 -0500 Subject: NFSv4: Fix the return value of nfs4_select_rw_stateid In commit 5521abfdcf4d6 (NFSv4: Resend the READ/WRITE RPC call if a stateid change causes an error), we overloaded the return value of nfs4_select_rw_stateid() to cause it to return -EWOULDBLOCK if an RPC call is outstanding that would cause the NFSv4 lock or open stateid to change. That is all redundant when we actually copy the stateid used in the read/write RPC call that failed, and check that against the current stateid. It is doubly so, when we consider that in the NFSv4.1 case, we also set the stateid's seqid to the special value '0', which means 'match the current valid stateid'. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e1a47217c05e..0deb32105ccf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = 0; - smp_rmb(); - if (!list_empty(&lsp->ls_seqid.list)) - ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -984,10 +981,9 @@ out: return ret; } -static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; - int ret; int seq; do { @@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) if (test_bit(NFS_OPEN_STATE, &state->flags)) src = &state->open_stateid; nfs4_stateid_copy(dst, src); - ret = 0; - smp_rmb(); - if (!list_empty(&state->owner->so_seqid.list)) - ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); - return ret; } /* @@ -1026,7 +1017,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, * choose to use. */ goto out; - ret = nfs4_copy_open_stateid(dst, state); + nfs4_copy_open_stateid(dst, state); + ret = 0; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; -- cgit v1.2.1 From 869a9d375dca601dde1dee1344f3d1d665505f19 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 4 Mar 2014 12:31:09 -0500 Subject: NFSv4.1 Fail data server I/O if stateid represents a lost lock Signed-off-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 12c8132ad408..b9a35c05b60f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -324,8 +324,9 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) &rdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ); + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -435,8 +436,9 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) &wdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE); + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) -- cgit v1.2.1 From 0418dae1056d6091e9527b7158a3763f7aa92353 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:48:16 -0500 Subject: NFSv4: Fail the truncate() if the lock/open stateid is invalid If the open stateid could not be recovered, or the file locks were lost, then we should fail the truncate() operation altogether. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4ae8141452c9..450bfedbe2f4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2398,13 +2398,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { /* Use that stateid */ - } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { + } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); + if (!nfs4_valid_open_stateid(state)) + return -EBADF; + if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + &lockowner) == -EIO) + return -EBADF; } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); -- cgit v1.2.1